Files
readur/tests/latest_migration_tests.rs

773 lines
28 KiB
Rust

use readur::test_utils::TestContext;
use sqlx::{PgPool, Row};
use uuid::Uuid;
use std::process::Command;
use std::path::Path;
use sha2::{Sha256, Digest};
#[cfg(test)]
mod latest_migration_tests {
use super::*;
#[tokio::test]
async fn test_latest_migration_from_previous_state() {
// Step 1: Get the migration files and identify the latest two
let migration_files = get_sorted_migration_files();
if migration_files.len() < 2 {
println!("✅ Only one or no migrations found - skipping previous state test");
return;
}
let second_to_last = &migration_files[migration_files.len() - 2];
let latest = &migration_files[migration_files.len() - 1];
println!("🔄 Testing migration from second-to-last to latest:");
println!(" Previous: {}", extract_migration_name(second_to_last));
println!(" Latest: {}", extract_migration_name(latest));
// Step 2: Create a fresh database and apply migrations up to second-to-last
let ctx = TestContext::new().await;
let pool = ctx.state.db.get_pool();
// Apply all migrations except the latest one using SQLx migration runner
let migration_files = get_sorted_migration_files();
let target_index = migration_files.iter()
.position(|f| f == second_to_last)
.expect("Second-to-last migration not found");
// Apply migrations up to target_index (excluding the latest)
apply_selected_migrations(pool, &migration_files[..target_index+1]).await;
// Step 3: Prefill the database with realistic data in the previous state
let test_data = prefill_database_for_previous_state(pool).await;
// Step 4: Apply the latest migration
apply_single_migration(pool, latest).await;
// Step 5: Validate the migration succeeded and data is intact
validate_latest_migration_success(pool, &test_data, latest).await;
println!("✅ Latest migration successfully applied from previous state");
}
#[tokio::test]
async fn test_latest_migration_with_edge_case_data() {
let migration_files = get_sorted_migration_files();
if migration_files.len() < 2 {
println!("✅ Only one or no migrations found - skipping edge case test");
return;
}
let second_to_last = &migration_files[migration_files.len() - 2];
let latest = &migration_files[migration_files.len() - 1];
println!("🧪 Testing latest migration with edge case data:");
println!(" Testing migration: {}", extract_migration_name(latest));
let ctx = TestContext::new().await;
let pool = ctx.state.db.get_pool();
// Apply migrations up to second-to-last
let migration_files = get_sorted_migration_files();
let target_index = migration_files.iter()
.position(|f| f == second_to_last)
.expect("Second-to-last migration not found");
apply_selected_migrations(pool, &migration_files[..target_index+1]).await;
// Create edge case data that might break the migration
let edge_case_data = create_edge_case_data(pool).await;
// Apply the latest migration
let migration_result = apply_single_migration_safe(pool, latest).await;
match migration_result {
Ok(_) => {
println!("✅ Latest migration handled edge cases successfully");
validate_edge_case_migration(pool, &edge_case_data).await;
}
Err(e) => {
panic!("❌ Latest migration failed with edge case data: {:?}", e);
}
}
}
#[tokio::test]
async fn test_latest_migration_rollback_safety() {
let migration_files = get_sorted_migration_files();
if migration_files.len() < 2 {
println!("✅ Only one or no migrations found - skipping rollback safety test");
return;
}
let second_to_last = &migration_files[migration_files.len() - 2];
let latest = &migration_files[migration_files.len() - 1];
println!("🔒 Testing rollback safety for latest migration:");
let ctx = TestContext::new().await;
let pool = ctx.state.db.get_pool();
// Apply migrations up to second-to-last
let migration_files = get_sorted_migration_files();
let target_index = migration_files.iter()
.position(|f| f == second_to_last)
.expect("Second-to-last migration not found");
apply_selected_migrations(pool, &migration_files[..target_index+1]).await;
// Capture schema snapshot before latest migration
let schema_before = capture_schema_snapshot(pool).await;
// Apply latest migration
apply_single_migration(pool, latest).await;
// Capture schema after latest migration
let schema_after = capture_schema_snapshot(pool).await;
// Validate schema changes are reasonable
validate_schema_changes(&schema_before, &schema_after, latest);
// Test that the migration doesn't break existing functionality
test_basic_database_operations(pool).await;
println!("✅ Latest migration rollback safety verified");
}
#[tokio::test]
async fn test_latest_migration_performance() {
let migration_files = get_sorted_migration_files();
if migration_files.len() < 1 {
println!("✅ No migrations found - skipping performance test");
return;
}
let latest = &migration_files[migration_files.len() - 1];
println!("⚡ Testing performance of latest migration:");
println!(" Migration: {}", extract_migration_name(latest));
let ctx = TestContext::new().await;
let pool = ctx.state.db.get_pool();
// Apply all migrations except the latest
if migration_files.len() > 1 {
let second_to_last = &migration_files[migration_files.len() - 2];
let target_index = migration_files.iter()
.position(|f| f == second_to_last)
.expect("Second-to-last migration not found");
apply_selected_migrations(pool, &migration_files[..target_index+1]).await;
}
// Create a substantial amount of data
create_performance_test_data(pool, 1000).await;
// Measure migration time
let start_time = std::time::Instant::now();
apply_single_migration(pool, latest).await;
let migration_duration = start_time.elapsed();
println!("⏱️ Latest migration completed in: {:?}", migration_duration);
// Performance assertion - should complete reasonably fast even with data
assert!(
migration_duration.as_secs() < 10,
"Latest migration took too long: {:?}. Consider optimizing for larger datasets.",
migration_duration
);
// Verify data integrity after migration
verify_data_integrity_after_performance_test(pool).await;
println!("✅ Latest migration performance acceptable");
}
// Helper functions
struct TestData {
users: Vec<TestUser>,
documents: Vec<TestDocument>,
failed_documents: Vec<TestFailedDocument>,
metadata: DatabaseMetadata,
}
struct TestUser {
id: Uuid,
username: String,
email: String,
}
struct TestDocument {
id: Uuid,
user_id: Uuid,
filename: String,
status: String,
}
struct TestFailedDocument {
id: Uuid,
user_id: Uuid,
filename: String,
reason: String,
}
struct DatabaseMetadata {
table_count: usize,
total_records: usize,
schema_version: String,
}
struct SchemaSnapshot {
tables: Vec<String>,
columns: std::collections::HashMap<String, Vec<String>>,
constraints: Vec<String>,
}
fn get_sorted_migration_files() -> Vec<String> {
let migrations_dir = Path::new("migrations");
let mut files = Vec::new();
if let Ok(entries) = std::fs::read_dir(migrations_dir) {
for entry in entries {
if let Ok(entry) = entry {
let path = entry.path();
if path.extension().and_then(|s| s.to_str()) == Some("sql") {
files.push(path.to_string_lossy().to_string());
}
}
}
}
files.sort();
files
}
fn extract_migration_name(filepath: &str) -> String {
Path::new(filepath)
.file_name()
.unwrap()
.to_string_lossy()
.to_string()
}
async fn apply_selected_migrations(pool: &PgPool, migration_files: &[String]) {
// Create the migrations table if it doesn't exist
sqlx::query(
"CREATE TABLE IF NOT EXISTS _sqlx_migrations (
version BIGINT PRIMARY KEY,
description TEXT NOT NULL,
installed_on TIMESTAMPTZ NOT NULL DEFAULT NOW(),
success BOOLEAN NOT NULL,
checksum BYTEA NOT NULL,
execution_time BIGINT NOT NULL
)"
)
.execute(pool)
.await
.expect("Failed to create migrations table");
for migration_file in migration_files {
let migration_name = extract_migration_name(migration_file);
// Extract version from filename
let version = migration_name
.split('_')
.next()
.and_then(|s| s.parse::<i64>().ok())
.expect(&format!("Failed to parse migration version from {}", migration_name));
// Check if this migration is already applied
let exists = sqlx::query_scalar::<_, bool>(
"SELECT EXISTS(SELECT 1 FROM _sqlx_migrations WHERE version = $1)"
)
.bind(version)
.fetch_one(pool)
.await
.unwrap_or(false);
if exists {
println!(" ⏭️ Skipped (already applied): {}", migration_name);
continue;
}
// Apply this migration
let content = std::fs::read_to_string(migration_file)
.expect(&format!("Failed to read migration file: {}", migration_file));
let start_time = std::time::Instant::now();
// Use raw SQL execution to handle complex PostgreSQL statements including functions
sqlx::raw_sql(&content)
.execute(pool)
.await
.expect(&format!("Failed to apply migration: {}", migration_name));
let execution_time = start_time.elapsed().as_millis() as i64;
let checksum = Sha256::digest(content.as_bytes()).to_vec();
// Record the migration as applied
sqlx::query(
"INSERT INTO _sqlx_migrations (version, description, success, checksum, execution_time)
VALUES ($1, $2, $3, $4, $5)"
)
.bind(version)
.bind(migration_name.clone())
.bind(true)
.bind(checksum)
.bind(execution_time)
.execute(pool)
.await
.expect("Failed to record migration");
println!(" ✓ Applied: {}", migration_name);
}
}
async fn apply_single_migration(pool: &PgPool, migration_file: &str) {
let result = apply_single_migration_safe(pool, migration_file).await;
result.expect(&format!("Failed to apply migration: {}", migration_file));
}
async fn apply_single_migration_safe(pool: &PgPool, migration_file: &str) -> Result<(), sqlx::Error> {
let content = std::fs::read_to_string(migration_file)
.expect(&format!("Failed to read migration file: {}", migration_file));
let migration_name = extract_migration_name(migration_file);
println!(" 🔄 Applying: {}", migration_name);
// Use raw SQL execution to handle complex PostgreSQL statements including functions
sqlx::raw_sql(&content).execute(pool).await?;
println!(" ✅ Applied: {}", migration_name);
Ok(())
}
async fn prefill_database_for_previous_state(pool: &PgPool) -> TestData {
let mut users = Vec::new();
let mut documents = Vec::new();
let mut failed_documents = Vec::new();
// Create test users with unique timestamps to avoid conflicts
let timestamp = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos();
for i in 0..5 {
let user_id = Uuid::new_v4();
let username = format!("previous_state_user_{}_{}", i, timestamp);
let email = format!("previous_{}_{}@test.com", i, timestamp);
sqlx::query(
"INSERT INTO users (id, username, email, password_hash, role) VALUES ($1, $2, $3, $4, $5)"
)
.bind(user_id)
.bind(&username)
.bind(&email)
.bind("test_hash")
.bind("user")
.execute(pool)
.await
.expect("Failed to create test user");
users.push(TestUser { id: user_id, username, email });
}
// Create test documents for each user
for user in &users {
for j in 0..3 {
let doc_id = Uuid::new_v4();
let filename = format!("previous_doc_{}_{}.pdf", user.username, j);
let status = if j == 0 { "completed" } else { "failed" };
// Check if documents table exists before inserting
let table_exists = sqlx::query(
"SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'documents')"
)
.fetch_one(pool)
.await
.unwrap()
.get::<bool, _>(0);
if table_exists {
// Check if original_filename column exists
let original_filename_exists = sqlx::query_scalar::<_, bool>(
"SELECT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'documents' AND column_name = 'original_filename')"
)
.fetch_one(pool)
.await
.unwrap_or(false);
if original_filename_exists {
sqlx::query(
"INSERT INTO documents (id, user_id, filename, original_filename, file_path, file_size, mime_type, ocr_status)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)"
)
.bind(doc_id)
.bind(user.id)
.bind(&filename)
.bind(&filename) // Use same filename for original_filename
.bind(format!("/test/{}", filename))
.bind(1024_i64)
.bind("application/pdf")
.bind(status)
.execute(pool)
.await
.expect("Failed to create test document");
} else {
sqlx::query(
"INSERT INTO documents (id, user_id, filename, file_path, file_size, mime_type, ocr_status)
VALUES ($1, $2, $3, $4, $5, $6, $7)"
)
.bind(doc_id)
.bind(user.id)
.bind(&filename)
.bind(format!("/test/{}", filename))
.bind(1024_i64)
.bind("application/pdf")
.bind(status)
.execute(pool)
.await
.expect("Failed to create test document");
}
}
documents.push(TestDocument {
id: doc_id,
user_id: user.id,
filename,
status: status.to_string(),
});
}
}
// Create failed documents if the table exists
let failed_docs_exists = sqlx::query(
"SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'failed_documents')"
)
.fetch_one(pool)
.await
.unwrap()
.get::<bool, _>(0);
if failed_docs_exists {
for user in &users {
let failed_id = Uuid::new_v4();
let filename = format!("failed_previous_{}.pdf", user.username);
sqlx::query(
"INSERT INTO failed_documents (id, user_id, filename, failure_reason, failure_stage, ingestion_source)
VALUES ($1, $2, $3, $4, $5, $6)"
)
.bind(failed_id)
.bind(user.id)
.bind(&filename)
.bind("other")
.bind("ocr")
.bind("test")
.execute(pool)
.await
.expect("Failed to create test failed document");
failed_documents.push(TestFailedDocument {
id: failed_id,
user_id: user.id,
filename,
reason: "other".to_string(),
});
}
}
let total_records = users.len() + documents.len() + failed_documents.len();
TestData {
users,
documents,
failed_documents,
metadata: DatabaseMetadata {
table_count: get_table_count(pool).await,
total_records,
schema_version: "previous".to_string(),
},
}
}
async fn create_edge_case_data(pool: &PgPool) -> TestData {
let mut users = Vec::new();
let mut documents = Vec::new();
let mut failed_documents = Vec::new();
// Create edge case users with unique timestamps
let timestamp = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos();
let long_string_email = format!("{}_{}@test.com", "a".repeat(30), timestamp);
let special_email = format!("user_{}@domain.com", timestamp);
let unicode_email = format!("test_ñäme_{}@tëst.com", timestamp);
let edge_cases = vec![
("edge_empty_", ""),
("edge_special_", special_email.as_str()),
("edge_unicode_", unicode_email.as_str()),
("edge_long_", long_string_email.as_str()),
];
for (prefix, suffix) in edge_cases {
let user_id = Uuid::new_v4();
let username = format!("{}{}_{}", prefix, user_id.to_string().split('-').next().unwrap(), timestamp);
let email = if suffix.is_empty() {
format!("{}_{}@test.com", user_id.to_string().split('-').next().unwrap(), timestamp)
} else {
suffix.to_string()
};
sqlx::query(
"INSERT INTO users (id, username, email, password_hash, role) VALUES ($1, $2, $3, $4, $5)"
)
.bind(user_id)
.bind(&username)
.bind(&email)
.bind("test_hash")
.bind("user")
.execute(pool)
.await
.expect("Failed to create edge case user");
users.push(TestUser { id: user_id, username, email });
}
let total_records = users.len();
TestData {
users,
documents,
failed_documents,
metadata: DatabaseMetadata {
table_count: get_table_count(pool).await,
total_records,
schema_version: "edge_case".to_string(),
},
}
}
async fn validate_latest_migration_success(pool: &PgPool, test_data: &TestData, migration_file: &str) {
let migration_name = extract_migration_name(migration_file);
// Verify that our test data still exists
let user_count: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM users")
.fetch_one(pool)
.await
.unwrap();
assert!(
user_count >= test_data.users.len() as i64,
"User data lost after migration {}",
migration_name
);
// Check that the migration was applied successfully by verifying the schema
let current_table_count = get_table_count(pool).await;
println!(" 📊 Validation results:");
println!(" - Users preserved: {} / {}", user_count, test_data.users.len());
println!(" - Tables before: {}", test_data.metadata.table_count);
println!(" - Tables after: {}", current_table_count);
// Test basic database operations still work
test_basic_database_operations(pool).await;
}
async fn validate_edge_case_migration(pool: &PgPool, test_data: &TestData) {
// Verify edge case data survived migration
for user in &test_data.users {
let user_exists = sqlx::query(
"SELECT 1 FROM users WHERE id = $1"
)
.bind(user.id)
.fetch_optional(pool)
.await
.unwrap();
assert!(
user_exists.is_some(),
"Edge case user {} lost during migration",
user.username
);
}
println!(" ✅ All edge case data preserved");
}
async fn capture_schema_snapshot(pool: &PgPool) -> SchemaSnapshot {
// Get all tables
let tables = sqlx::query(
"SELECT table_name FROM information_schema.tables
WHERE table_schema = 'public' AND table_type = 'BASE TABLE'"
)
.fetch_all(pool)
.await
.unwrap();
let table_names: Vec<String> = tables.iter()
.map(|row| row.get("table_name"))
.collect();
// Get columns for each table
let mut columns = std::collections::HashMap::new();
for table in &table_names {
let table_columns = sqlx::query(
"SELECT column_name FROM information_schema.columns
WHERE table_schema = 'public' AND table_name = $1"
)
.bind(table)
.fetch_all(pool)
.await
.unwrap();
let column_names: Vec<String> = table_columns.iter()
.map(|row| row.get("column_name"))
.collect();
columns.insert(table.clone(), column_names);
}
// Get constraints
let constraints = sqlx::query(
"SELECT constraint_name FROM information_schema.table_constraints
WHERE table_schema = 'public'"
)
.fetch_all(pool)
.await
.unwrap();
let constraint_names: Vec<String> = constraints.iter()
.map(|row| row.get("constraint_name"))
.collect();
SchemaSnapshot {
tables: table_names,
columns,
constraints: constraint_names,
}
}
fn validate_schema_changes(before: &SchemaSnapshot, after: &SchemaSnapshot, migration_file: &str) {
let migration_name = extract_migration_name(migration_file);
// Check for new tables
let new_tables: Vec<_> = after.tables.iter()
.filter(|table| !before.tables.contains(table))
.collect();
if !new_tables.is_empty() {
println!(" 📋 New tables added by {}: {:?}", migration_name, new_tables);
}
// Check for removed tables (should be rare and carefully considered)
let removed_tables: Vec<_> = before.tables.iter()
.filter(|table| !after.tables.contains(table))
.collect();
if !removed_tables.is_empty() {
println!(" ⚠️ Tables removed by {}: {:?}", migration_name, removed_tables);
// Note: In production, you might want to assert this is intentional
}
// Check for new constraints
let new_constraints: Vec<_> = after.constraints.iter()
.filter(|constraint| !before.constraints.contains(constraint))
.collect();
if !new_constraints.is_empty() {
println!(" 🔒 New constraints added: {}", new_constraints.len());
}
println!(" ✅ Schema changes validated");
}
async fn test_basic_database_operations(pool: &PgPool) {
// Test that we can still perform basic operations
// Test user creation
let test_user_id = Uuid::new_v4();
let result = sqlx::query(
"INSERT INTO users (id, username, email, password_hash, role)
VALUES ($1, $2, $3, $4, $5)"
)
.bind(test_user_id)
.bind("operation_test_user")
.bind("operation_test@test.com")
.bind("test_hash")
.bind("user")
.execute(pool)
.await;
assert!(result.is_ok(), "Basic user creation should still work");
// Clean up
sqlx::query("DELETE FROM users WHERE id = $1")
.bind(test_user_id)
.execute(pool)
.await
.unwrap();
println!(" ✅ Basic database operations verified");
}
async fn create_performance_test_data(pool: &PgPool, user_count: usize) {
println!(" 📊 Creating {} users for performance testing...", user_count);
let timestamp = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos();
for i in 0..user_count {
let user_id = Uuid::new_v4();
let username = format!("perf_user_{}_{}", i, timestamp);
let email = format!("perf_{}_{}@test.com", i, timestamp);
sqlx::query(
"INSERT INTO users (id, username, email, password_hash, role) VALUES ($1, $2, $3, $4, $5)"
)
.bind(user_id)
.bind(&username)
.bind(&email)
.bind("test_hash")
.bind("user")
.execute(pool)
.await
.expect("Failed to create performance test user");
}
println!(" ✅ Performance test data created");
}
async fn verify_data_integrity_after_performance_test(pool: &PgPool) {
let user_count: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM users")
.fetch_one(pool)
.await
.unwrap();
assert!(user_count > 0, "Performance test data should exist after migration");
println!(" ✅ Data integrity verified: {} users", user_count);
}
async fn get_table_count(pool: &PgPool) -> usize {
let tables = sqlx::query(
"SELECT COUNT(*) as count FROM information_schema.tables
WHERE table_schema = 'public' AND table_type = 'BASE TABLE'"
)
.fetch_one(pool)
.await
.unwrap();
tables.get::<i64, _>("count") as usize
}
}