Files
readur/tests/integration_failed_documents_tests.rs

283 lines
12 KiB
Rust

use readur::db::constraint_validation::ConstraintValidator;
/// Simple unit tests for failed_documents functionality
/// These tests focus on business logic and constraint validation
/// without requiring live database connections during compilation
#[cfg(test)]
mod failed_documents_unit_tests {
use super::*;
#[test]
fn test_constraint_validator_failure_reasons() {
// Test all valid failure reasons
let valid_reasons = [
"duplicate_content", "duplicate_filename", "unsupported_format",
"file_too_large", "file_corrupted", "access_denied",
"low_ocr_confidence", "ocr_timeout", "ocr_memory_limit",
"pdf_parsing_error", "storage_quota_exceeded", "network_error",
"permission_denied", "virus_detected", "invalid_structure",
"policy_violation", "other"
];
for reason in valid_reasons {
assert!(
ConstraintValidator::validate_failure_reason(reason).is_ok(),
"Expected '{}' to be valid",
reason
);
}
// Test invalid failure reasons
let invalid_reasons = [
"invalid_reason", "unknown", "timeout", "migration_completed",
"", "random_text", "failure", "error"
];
for reason in invalid_reasons {
assert!(
ConstraintValidator::validate_failure_reason(reason).is_err(),
"Expected '{}' to be invalid",
reason
);
}
}
#[test]
fn test_constraint_validator_failure_stages() {
// Test all valid failure stages
let valid_stages = [
"ingestion", "validation", "ocr", "storage", "processing", "sync"
];
for stage in valid_stages {
assert!(
ConstraintValidator::validate_failure_stage(stage).is_ok(),
"Expected '{}' to be valid",
stage
);
}
// Test invalid failure stages
let invalid_stages = [
"invalid_stage", "unknown", "failed", "error", "", "random_text"
];
for stage in invalid_stages {
assert!(
ConstraintValidator::validate_failure_stage(stage).is_err(),
"Expected '{}' to be invalid",
stage
);
}
}
#[test]
fn test_legacy_ocr_failure_mapping() {
let test_cases = [
(Some("low_ocr_confidence"), "low_ocr_confidence"),
(Some("timeout"), "ocr_timeout"),
(Some("memory_limit"), "ocr_memory_limit"),
(Some("pdf_parsing_error"), "pdf_parsing_error"),
(Some("corrupted"), "file_corrupted"),
(Some("file_corrupted"), "file_corrupted"),
(Some("unsupported_format"), "unsupported_format"),
(Some("access_denied"), "access_denied"),
(Some("unknown"), "other"),
(None, "other"),
(Some("unmapped_value"), "other"),
(Some(""), "other"),
];
for (input, expected) in test_cases {
let result = ConstraintValidator::map_legacy_ocr_failure_reason(input);
assert_eq!(
result, expected,
"Failed for input: {:?}. Expected '{}', got '{}'",
input, expected, result
);
}
}
#[test]
fn test_mapped_legacy_values_are_valid() {
// Ensure all mapped legacy values are actually valid according to our constraints
let legacy_values = [
Some("low_ocr_confidence"),
Some("timeout"),
Some("memory_limit"),
Some("pdf_parsing_error"),
Some("corrupted"),
Some("file_corrupted"),
Some("unsupported_format"),
Some("access_denied"),
Some("unknown"),
None,
Some("random_unmapped_value"),
];
for legacy_value in legacy_values {
let mapped = ConstraintValidator::map_legacy_ocr_failure_reason(legacy_value);
assert!(
ConstraintValidator::validate_failure_reason(mapped).is_ok(),
"Mapped value '{}' from legacy '{:?}' should be valid",
mapped, legacy_value
);
}
}
#[test]
fn test_batch_validation() {
// Test valid batch
let valid_batch = ["other", "low_ocr_confidence", "pdf_parsing_error", "duplicate_content"];
assert!(ConstraintValidator::validate_failure_reasons_batch(&valid_batch).is_ok());
// Test invalid batch
let invalid_batch = ["other", "invalid_reason", "timeout", "low_ocr_confidence"];
let result = ConstraintValidator::validate_failure_reasons_batch(&invalid_batch);
assert!(result.is_err());
let errors = result.unwrap_err();
assert_eq!(errors.len(), 2); // Should have 2 invalid reasons
assert!(errors.iter().any(|e| e.contains("invalid_reason")));
assert!(errors.iter().any(|e| e.contains("timeout")));
// Test empty batch
let empty_batch: &[&str] = &[];
assert!(ConstraintValidator::validate_failure_reasons_batch(empty_batch).is_ok());
}
#[test]
fn test_constraint_error_messages() {
let result = ConstraintValidator::validate_failure_reason("invalid_reason");
assert!(result.is_err());
let error_msg = result.unwrap_err();
assert!(error_msg.contains("Invalid failure_reason 'invalid_reason'"));
assert!(error_msg.contains("Valid values are:"));
assert!(error_msg.contains("low_ocr_confidence"));
assert!(error_msg.contains("other"));
let stage_result = ConstraintValidator::validate_failure_stage("invalid_stage");
assert!(stage_result.is_err());
let stage_error = stage_result.unwrap_err();
assert!(stage_error.contains("Invalid failure_stage 'invalid_stage'"));
assert!(stage_error.contains("Valid values are:"));
assert!(stage_error.contains("ingestion"));
assert!(stage_error.contains("ocr"));
}
#[test]
fn test_constraint_validation_comprehensive() {
// Test that our enum values comprehensively cover expected failure scenarios
// OCR-related failures
assert!(ConstraintValidator::validate_failure_reason("low_ocr_confidence").is_ok());
assert!(ConstraintValidator::validate_failure_reason("ocr_timeout").is_ok());
assert!(ConstraintValidator::validate_failure_reason("ocr_memory_limit").is_ok());
assert!(ConstraintValidator::validate_failure_reason("pdf_parsing_error").is_ok());
// File-related failures
assert!(ConstraintValidator::validate_failure_reason("file_too_large").is_ok());
assert!(ConstraintValidator::validate_failure_reason("file_corrupted").is_ok());
assert!(ConstraintValidator::validate_failure_reason("unsupported_format").is_ok());
assert!(ConstraintValidator::validate_failure_reason("access_denied").is_ok());
// Duplicate detection
assert!(ConstraintValidator::validate_failure_reason("duplicate_content").is_ok());
assert!(ConstraintValidator::validate_failure_reason("duplicate_filename").is_ok());
// System-related failures
assert!(ConstraintValidator::validate_failure_reason("storage_quota_exceeded").is_ok());
assert!(ConstraintValidator::validate_failure_reason("network_error").is_ok());
assert!(ConstraintValidator::validate_failure_reason("permission_denied").is_ok());
// Security-related failures
assert!(ConstraintValidator::validate_failure_reason("virus_detected").is_ok());
assert!(ConstraintValidator::validate_failure_reason("policy_violation").is_ok());
assert!(ConstraintValidator::validate_failure_reason("invalid_structure").is_ok());
// Fallback
assert!(ConstraintValidator::validate_failure_reason("other").is_ok());
}
#[test]
fn test_failure_stages_comprehensive() {
// Test that our stage enum covers the document processing pipeline
// Initial processing stages
assert!(ConstraintValidator::validate_failure_stage("ingestion").is_ok());
assert!(ConstraintValidator::validate_failure_stage("validation").is_ok());
// Core processing stages
assert!(ConstraintValidator::validate_failure_stage("ocr").is_ok());
assert!(ConstraintValidator::validate_failure_stage("processing").is_ok());
// Storage and sync stages
assert!(ConstraintValidator::validate_failure_stage("storage").is_ok());
assert!(ConstraintValidator::validate_failure_stage("sync").is_ok());
}
#[test]
fn test_legacy_mapping_completeness() {
// Ensure we handle all possible legacy OCR failure reasons that could exist
let legacy_ocr_reasons = [
"low_ocr_confidence",
"timeout",
"memory_limit",
"pdf_parsing_error",
"corrupted",
"file_corrupted",
"unsupported_format",
"access_denied",
"unknown",
"some_new_unmapped_reason"
];
for legacy_reason in legacy_ocr_reasons {
let mapped = ConstraintValidator::map_legacy_ocr_failure_reason(Some(legacy_reason));
// All mapped values should be valid
assert!(
ConstraintValidator::validate_failure_reason(mapped).is_ok(),
"Legacy reason '{}' maps to '{}' which should be valid",
legacy_reason, mapped
);
// Unmapped values should fall back to "other"
if !["low_ocr_confidence", "timeout", "memory_limit", "pdf_parsing_error",
"corrupted", "file_corrupted", "unsupported_format", "access_denied", "unknown"]
.contains(&legacy_reason) {
assert_eq!(mapped, "other", "Unmapped legacy reason should fall back to 'other'");
}
}
}
#[test]
fn test_case_sensitivity() {
// Our validation should be case-sensitive
assert!(ConstraintValidator::validate_failure_reason("Low_OCR_Confidence").is_err());
assert!(ConstraintValidator::validate_failure_reason("LOW_OCR_CONFIDENCE").is_err());
assert!(ConstraintValidator::validate_failure_reason("OCR").is_err());
assert!(ConstraintValidator::validate_failure_reason("INGESTION").is_err());
// Only exact lowercase matches should work
assert!(ConstraintValidator::validate_failure_reason("low_ocr_confidence").is_ok());
assert!(ConstraintValidator::validate_failure_stage("ocr").is_ok());
assert!(ConstraintValidator::validate_failure_stage("ingestion").is_ok());
}
#[test]
fn test_whitespace_handling() {
// Validation should not accept values with extra whitespace
assert!(ConstraintValidator::validate_failure_reason(" low_ocr_confidence").is_err());
assert!(ConstraintValidator::validate_failure_reason("low_ocr_confidence ").is_err());
assert!(ConstraintValidator::validate_failure_reason(" low_ocr_confidence ").is_err());
assert!(ConstraintValidator::validate_failure_stage(" ocr").is_err());
assert!(ConstraintValidator::validate_failure_stage("ocr ").is_err());
// Only exact matches should work
assert!(ConstraintValidator::validate_failure_reason("low_ocr_confidence").is_ok());
assert!(ConstraintValidator::validate_failure_stage("ocr").is_ok());
}
}