Files
readur/tests/migration_integration_tests.rs

359 lines
13 KiB
Rust

use readur::test_utils::TestContext;
use sqlx::Row;
use uuid::Uuid;
#[cfg(test)]
mod migration_integration_tests {
use super::*;
#[tokio::test]
async fn test_full_migration_workflow() {
let ctx = TestContext::new().await;
let pool = ctx.state.db.get_pool();
// Setup: Create a test user first with unique username
let user_id = Uuid::new_v4();
let unique_suffix = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos();
let username = format!("test_migration_user_{}", unique_suffix);
let email = format!("test_migration_{}@example.com", unique_suffix);
sqlx::query(
"INSERT INTO users (id, username, email, password_hash, role)
VALUES ($1, $2, $3, $4, $5)"
)
.bind(user_id)
.bind(&username)
.bind(&email)
.bind("hash")
.bind("user")
.execute(pool)
.await
.unwrap();
// Create test documents with different failure scenarios
let test_documents = vec![
("doc1.pdf", Some("low_ocr_confidence"), "Quality below threshold"),
("doc2.pdf", Some("timeout"), "OCR processing timed out"),
("doc3.pdf", Some("memory_limit"), "Out of memory"),
("doc4.pdf", Some("corrupted"), "File appears corrupted"),
("doc5.pdf", Some("unknown"), "Unknown error occurred"),
("doc6.pdf", None, "Generic failure message"),
];
// Insert test documents
for (filename, failure_reason, error_msg) in &test_documents {
sqlx::query(
r#"
INSERT INTO documents (
user_id, filename, original_filename, file_path, file_size,
mime_type, ocr_status, ocr_failure_reason, ocr_error
) VALUES (
$1, $2, $2, '/fake/path', 1000, 'application/pdf',
'failed', $3, $4
)
"#
)
.bind(user_id)
.bind(filename)
.bind(*failure_reason)
.bind(error_msg)
.execute(pool)
.await
.expect("Failed to insert test document");
}
// Count documents before migration (only for this test's user)
let before_count: i64 = sqlx::query_scalar(
"SELECT COUNT(*) FROM documents WHERE ocr_status = 'failed' AND user_id = $1"
)
.bind(user_id)
.fetch_one(pool)
.await
.expect("Failed to count documents");
assert_eq!(before_count, test_documents.len() as i64);
// Simulate the migration logic
let migration_result = sqlx::query(
r#"
INSERT INTO failed_documents (
user_id, filename, original_filename, file_path, file_size,
mime_type, error_message, failure_reason, failure_stage, ingestion_source,
created_at, updated_at
)
SELECT
d.user_id, d.filename, d.original_filename, d.file_path, d.file_size,
d.mime_type, d.ocr_error,
CASE
WHEN d.ocr_failure_reason = 'low_ocr_confidence' THEN 'low_ocr_confidence'
WHEN d.ocr_failure_reason = 'timeout' THEN 'ocr_timeout'
WHEN d.ocr_failure_reason = 'memory_limit' THEN 'ocr_memory_limit'
WHEN d.ocr_failure_reason = 'pdf_parsing_error' THEN 'pdf_parsing_error'
WHEN d.ocr_failure_reason = 'corrupted' OR d.ocr_failure_reason = 'file_corrupted' THEN 'file_corrupted'
WHEN d.ocr_failure_reason = 'unsupported_format' THEN 'unsupported_format'
WHEN d.ocr_failure_reason = 'access_denied' THEN 'access_denied'
ELSE 'other'
END as failure_reason,
'ocr' as failure_stage,
'migration' as ingestion_source,
d.created_at, d.updated_at
FROM documents d
WHERE d.ocr_status = 'failed' AND d.user_id = $1
"#
)
.bind(user_id)
.execute(pool)
.await;
match migration_result {
Ok(_) => {},
Err(e) => panic!("Migration failed: {:?}", e),
}
// Verify all documents were migrated (only for this test's user)
let migrated_count: i64 = sqlx::query_scalar(
"SELECT COUNT(*) FROM failed_documents WHERE ingestion_source = 'migration' AND user_id = $1"
)
.bind(user_id)
.fetch_one(pool)
.await
.expect("Failed to count migrated documents");
assert_eq!(migrated_count, test_documents.len() as i64);
// Verify specific mappings
let mapping_tests = vec![
("doc1.pdf", "low_ocr_confidence"),
("doc2.pdf", "ocr_timeout"),
("doc3.pdf", "ocr_memory_limit"),
("doc4.pdf", "file_corrupted"),
("doc5.pdf", "other"),
("doc6.pdf", "other"),
];
for (filename, expected_reason) in mapping_tests {
let actual_reason: String = sqlx::query_scalar(
"SELECT failure_reason FROM failed_documents WHERE filename = $1 AND user_id = $2"
)
.bind(filename)
.bind(user_id)
.fetch_one(pool)
.await
.expect("Failed to fetch failure reason");
assert_eq!(
actual_reason,
expected_reason,
"Incorrect mapping for {}",
filename
);
}
// Test deletion of original failed documents (only for this test's user)
let delete_result = sqlx::query(
"DELETE FROM documents WHERE ocr_status = 'failed' AND user_id = $1"
)
.bind(user_id)
.execute(pool)
.await;
assert!(delete_result.is_ok(), "Delete should succeed");
// Verify cleanup (only for this test's user)
let remaining_failed: i64 = sqlx::query_scalar(
"SELECT COUNT(*) FROM documents WHERE ocr_status = 'failed' AND user_id = $1"
)
.bind(user_id)
.fetch_one(pool)
.await
.expect("Failed to count remaining documents");
assert_eq!(remaining_failed, 0);
// Verify failed_documents table integrity (only for this test's user)
let failed_docs = sqlx::query(
"SELECT filename, failure_reason, failure_stage FROM failed_documents WHERE user_id = $1 ORDER BY filename"
)
.bind(user_id)
.fetch_all(pool)
.await
.expect("Failed to fetch failed documents");
assert_eq!(failed_docs.len(), test_documents.len());
for doc in &failed_docs {
// All should have proper stage
let stage: String = doc.get("failure_stage");
assert_eq!(stage, "ocr");
// All should have valid failure_reason
let reason: String = doc.get("failure_reason");
assert!(matches!(
reason.as_str(),
"low_ocr_confidence" | "ocr_timeout" | "ocr_memory_limit" |
"file_corrupted" | "other"
));
}
}
#[tokio::test]
async fn test_migration_with_edge_cases() {
let ctx = TestContext::new().await;
let pool = ctx.state.db.get_pool();
// Create a test user first with unique username
let user_id = Uuid::new_v4();
let unique_suffix = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos();
let username = format!("test_edge_user_{}", unique_suffix);
let email = format!("test_edge_{}@example.com", unique_suffix);
sqlx::query(
"INSERT INTO users (id, username, email, password_hash, role)
VALUES ($1, $2, $3, $4, $5)"
)
.bind(user_id)
.bind(&username)
.bind(&email)
.bind("hash")
.bind("user")
.execute(pool)
.await
.unwrap();
// Edge cases that might break migration
let edge_cases = vec![
("empty.txt", Some(""), "Empty reason"),
("null_like.pdf", Some("null"), "Null-like value"),
("special_chars.pdf", Some("special!@#$%"), "Special characters"),
("very_long_reason.pdf", Some("this_is_a_very_long_failure_reason_that_might_cause_issues"), "Long reason"),
];
for (filename, failure_reason, error_msg) in &edge_cases {
sqlx::query(
r#"
INSERT INTO documents (
user_id, filename, original_filename, file_path, file_size,
mime_type, ocr_status, ocr_failure_reason, ocr_error
) VALUES (
$1, $2, $2, '/fake/path', 1000, 'application/pdf',
'failed', $3, $4
)
"#
)
.bind(user_id)
.bind(filename)
.bind(*failure_reason)
.bind(error_msg)
.execute(pool)
.await
.expect("Failed to insert edge case document");
}
// Run migration on edge cases
let migration_result = sqlx::query(
r#"
INSERT INTO failed_documents (
user_id, filename, failure_reason, failure_stage, ingestion_source
)
SELECT
d.user_id, d.filename,
CASE
WHEN d.ocr_failure_reason = 'low_ocr_confidence' THEN 'low_ocr_confidence'
WHEN d.ocr_failure_reason = 'timeout' THEN 'ocr_timeout'
WHEN d.ocr_failure_reason = 'memory_limit' THEN 'ocr_memory_limit'
WHEN d.ocr_failure_reason = 'pdf_parsing_error' THEN 'pdf_parsing_error'
WHEN d.ocr_failure_reason = 'corrupted' OR d.ocr_failure_reason = 'file_corrupted' THEN 'file_corrupted'
WHEN d.ocr_failure_reason = 'unsupported_format' THEN 'unsupported_format'
WHEN d.ocr_failure_reason = 'access_denied' THEN 'access_denied'
ELSE 'other'
END as failure_reason,
'ocr' as failure_stage,
'migration_edge_test' as ingestion_source
FROM documents d
WHERE d.ocr_status = 'failed' AND d.user_id = $1
"#
)
.bind(user_id)
.execute(pool)
.await;
assert!(migration_result.is_ok(), "Migration should handle edge cases");
// Verify all edge cases mapped to 'other' (since they're not in our mapping)
let edge_case_mappings = sqlx::query(
"SELECT filename, failure_reason FROM failed_documents WHERE ingestion_source = 'migration_edge_test' AND user_id = $1"
)
.bind(user_id)
.fetch_all(pool)
.await
.expect("Failed to fetch edge case mappings");
for mapping in edge_case_mappings {
let filename: String = mapping.get("filename");
let failure_reason: String = mapping.get("failure_reason");
assert_eq!(failure_reason, "other",
"Edge case '{}' should map to 'other'", filename);
}
}
#[tokio::test]
async fn test_constraint_enforcement_during_migration() {
let ctx = TestContext::new().await;
let pool = ctx.state.db.get_pool();
// Create a test user first to avoid foreign key constraint violations
let user_id = Uuid::new_v4();
let unique_suffix = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos();
let username = format!("test_constraint_user_{}", unique_suffix);
let email = format!("test_constraint_{}@example.com", unique_suffix);
sqlx::query(
"INSERT INTO users (id, username, email, password_hash, role)
VALUES ($1, $2, $3, $4, $5)"
)
.bind(user_id)
.bind(&username)
.bind(&email)
.bind("hash")
.bind("user")
.execute(pool)
.await
.unwrap();
// Try to insert data that violates constraints
let invalid_insert = sqlx::query(
r#"
INSERT INTO failed_documents (
user_id, filename, failure_reason, failure_stage, ingestion_source
) VALUES (
$1, 'invalid_test.pdf', 'migration_completed', 'migration', 'test'
)
"#
)
.bind(user_id)
.execute(pool)
.await;
// This should fail due to constraint violation
assert!(invalid_insert.is_err(), "Invalid failure_reason should be rejected");
// Verify the specific constraint that caught it
if let Err(sqlx::Error::Database(db_err)) = invalid_insert {
let error_message = db_err.message();
assert!(
error_message.contains("check_failure_reason") ||
error_message.contains("constraint"),
"Error should mention constraint violation: {}",
error_message
);
}
}
}