mirror of
https://github.com/readur/readur.git
synced 2025-12-30 11:00:17 -06:00
283 lines
12 KiB
Rust
283 lines
12 KiB
Rust
use readur::db::constraint_validation::ConstraintValidator;
|
|
|
|
/// Simple unit tests for failed_documents functionality
|
|
/// These tests focus on business logic and constraint validation
|
|
/// without requiring live database connections during compilation
|
|
#[cfg(test)]
|
|
mod failed_documents_unit_tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_constraint_validator_failure_reasons() {
|
|
// Test all valid failure reasons
|
|
let valid_reasons = [
|
|
"duplicate_content", "duplicate_filename", "unsupported_format",
|
|
"file_too_large", "file_corrupted", "access_denied",
|
|
"low_ocr_confidence", "ocr_timeout", "ocr_memory_limit",
|
|
"pdf_parsing_error", "storage_quota_exceeded", "network_error",
|
|
"permission_denied", "virus_detected", "invalid_structure",
|
|
"policy_violation", "other"
|
|
];
|
|
|
|
for reason in valid_reasons {
|
|
assert!(
|
|
ConstraintValidator::validate_failure_reason(reason).is_ok(),
|
|
"Expected '{}' to be valid",
|
|
reason
|
|
);
|
|
}
|
|
|
|
// Test invalid failure reasons
|
|
let invalid_reasons = [
|
|
"invalid_reason", "unknown", "timeout", "migration_completed",
|
|
"", "random_text", "failure", "error"
|
|
];
|
|
|
|
for reason in invalid_reasons {
|
|
assert!(
|
|
ConstraintValidator::validate_failure_reason(reason).is_err(),
|
|
"Expected '{}' to be invalid",
|
|
reason
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_constraint_validator_failure_stages() {
|
|
// Test all valid failure stages
|
|
let valid_stages = [
|
|
"ingestion", "validation", "ocr", "storage", "processing", "sync"
|
|
];
|
|
|
|
for stage in valid_stages {
|
|
assert!(
|
|
ConstraintValidator::validate_failure_stage(stage).is_ok(),
|
|
"Expected '{}' to be valid",
|
|
stage
|
|
);
|
|
}
|
|
|
|
// Test invalid failure stages
|
|
let invalid_stages = [
|
|
"invalid_stage", "unknown", "failed", "error", "", "random_text"
|
|
];
|
|
|
|
for stage in invalid_stages {
|
|
assert!(
|
|
ConstraintValidator::validate_failure_stage(stage).is_err(),
|
|
"Expected '{}' to be invalid",
|
|
stage
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_legacy_ocr_failure_mapping() {
|
|
let test_cases = [
|
|
(Some("low_ocr_confidence"), "low_ocr_confidence"),
|
|
(Some("timeout"), "ocr_timeout"),
|
|
(Some("memory_limit"), "ocr_memory_limit"),
|
|
(Some("pdf_parsing_error"), "pdf_parsing_error"),
|
|
(Some("corrupted"), "file_corrupted"),
|
|
(Some("file_corrupted"), "file_corrupted"),
|
|
(Some("unsupported_format"), "unsupported_format"),
|
|
(Some("access_denied"), "access_denied"),
|
|
(Some("unknown"), "other"),
|
|
(None, "other"),
|
|
(Some("unmapped_value"), "other"),
|
|
(Some(""), "other"),
|
|
];
|
|
|
|
for (input, expected) in test_cases {
|
|
let result = ConstraintValidator::map_legacy_ocr_failure_reason(input);
|
|
assert_eq!(
|
|
result, expected,
|
|
"Failed for input: {:?}. Expected '{}', got '{}'",
|
|
input, expected, result
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_mapped_legacy_values_are_valid() {
|
|
// Ensure all mapped legacy values are actually valid according to our constraints
|
|
let legacy_values = [
|
|
Some("low_ocr_confidence"),
|
|
Some("timeout"),
|
|
Some("memory_limit"),
|
|
Some("pdf_parsing_error"),
|
|
Some("corrupted"),
|
|
Some("file_corrupted"),
|
|
Some("unsupported_format"),
|
|
Some("access_denied"),
|
|
Some("unknown"),
|
|
None,
|
|
Some("random_unmapped_value"),
|
|
];
|
|
|
|
for legacy_value in legacy_values {
|
|
let mapped = ConstraintValidator::map_legacy_ocr_failure_reason(legacy_value);
|
|
assert!(
|
|
ConstraintValidator::validate_failure_reason(mapped).is_ok(),
|
|
"Mapped value '{}' from legacy '{:?}' should be valid",
|
|
mapped, legacy_value
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_batch_validation() {
|
|
// Test valid batch
|
|
let valid_batch = ["other", "low_ocr_confidence", "pdf_parsing_error", "duplicate_content"];
|
|
assert!(ConstraintValidator::validate_failure_reasons_batch(&valid_batch).is_ok());
|
|
|
|
// Test invalid batch
|
|
let invalid_batch = ["other", "invalid_reason", "timeout", "low_ocr_confidence"];
|
|
let result = ConstraintValidator::validate_failure_reasons_batch(&invalid_batch);
|
|
assert!(result.is_err());
|
|
|
|
let errors = result.unwrap_err();
|
|
assert_eq!(errors.len(), 2); // Should have 2 invalid reasons
|
|
assert!(errors.iter().any(|e| e.contains("invalid_reason")));
|
|
assert!(errors.iter().any(|e| e.contains("timeout")));
|
|
|
|
// Test empty batch
|
|
let empty_batch: &[&str] = &[];
|
|
assert!(ConstraintValidator::validate_failure_reasons_batch(empty_batch).is_ok());
|
|
}
|
|
|
|
#[test]
|
|
fn test_constraint_error_messages() {
|
|
let result = ConstraintValidator::validate_failure_reason("invalid_reason");
|
|
assert!(result.is_err());
|
|
|
|
let error_msg = result.unwrap_err();
|
|
assert!(error_msg.contains("Invalid failure_reason 'invalid_reason'"));
|
|
assert!(error_msg.contains("Valid values are:"));
|
|
assert!(error_msg.contains("low_ocr_confidence"));
|
|
assert!(error_msg.contains("other"));
|
|
|
|
let stage_result = ConstraintValidator::validate_failure_stage("invalid_stage");
|
|
assert!(stage_result.is_err());
|
|
|
|
let stage_error = stage_result.unwrap_err();
|
|
assert!(stage_error.contains("Invalid failure_stage 'invalid_stage'"));
|
|
assert!(stage_error.contains("Valid values are:"));
|
|
assert!(stage_error.contains("ingestion"));
|
|
assert!(stage_error.contains("ocr"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_constraint_validation_comprehensive() {
|
|
// Test that our enum values comprehensively cover expected failure scenarios
|
|
|
|
// OCR-related failures
|
|
assert!(ConstraintValidator::validate_failure_reason("low_ocr_confidence").is_ok());
|
|
assert!(ConstraintValidator::validate_failure_reason("ocr_timeout").is_ok());
|
|
assert!(ConstraintValidator::validate_failure_reason("ocr_memory_limit").is_ok());
|
|
assert!(ConstraintValidator::validate_failure_reason("pdf_parsing_error").is_ok());
|
|
|
|
// File-related failures
|
|
assert!(ConstraintValidator::validate_failure_reason("file_too_large").is_ok());
|
|
assert!(ConstraintValidator::validate_failure_reason("file_corrupted").is_ok());
|
|
assert!(ConstraintValidator::validate_failure_reason("unsupported_format").is_ok());
|
|
assert!(ConstraintValidator::validate_failure_reason("access_denied").is_ok());
|
|
|
|
// Duplicate detection
|
|
assert!(ConstraintValidator::validate_failure_reason("duplicate_content").is_ok());
|
|
assert!(ConstraintValidator::validate_failure_reason("duplicate_filename").is_ok());
|
|
|
|
// System-related failures
|
|
assert!(ConstraintValidator::validate_failure_reason("storage_quota_exceeded").is_ok());
|
|
assert!(ConstraintValidator::validate_failure_reason("network_error").is_ok());
|
|
assert!(ConstraintValidator::validate_failure_reason("permission_denied").is_ok());
|
|
|
|
// Security-related failures
|
|
assert!(ConstraintValidator::validate_failure_reason("virus_detected").is_ok());
|
|
assert!(ConstraintValidator::validate_failure_reason("policy_violation").is_ok());
|
|
assert!(ConstraintValidator::validate_failure_reason("invalid_structure").is_ok());
|
|
|
|
// Fallback
|
|
assert!(ConstraintValidator::validate_failure_reason("other").is_ok());
|
|
}
|
|
|
|
#[test]
|
|
fn test_failure_stages_comprehensive() {
|
|
// Test that our stage enum covers the document processing pipeline
|
|
|
|
// Initial processing stages
|
|
assert!(ConstraintValidator::validate_failure_stage("ingestion").is_ok());
|
|
assert!(ConstraintValidator::validate_failure_stage("validation").is_ok());
|
|
|
|
// Core processing stages
|
|
assert!(ConstraintValidator::validate_failure_stage("ocr").is_ok());
|
|
assert!(ConstraintValidator::validate_failure_stage("processing").is_ok());
|
|
|
|
// Storage and sync stages
|
|
assert!(ConstraintValidator::validate_failure_stage("storage").is_ok());
|
|
assert!(ConstraintValidator::validate_failure_stage("sync").is_ok());
|
|
}
|
|
|
|
#[test]
|
|
fn test_legacy_mapping_completeness() {
|
|
// Ensure we handle all possible legacy OCR failure reasons that could exist
|
|
let legacy_ocr_reasons = [
|
|
"low_ocr_confidence",
|
|
"timeout",
|
|
"memory_limit",
|
|
"pdf_parsing_error",
|
|
"corrupted",
|
|
"file_corrupted",
|
|
"unsupported_format",
|
|
"access_denied",
|
|
"unknown",
|
|
"some_new_unmapped_reason"
|
|
];
|
|
|
|
for legacy_reason in legacy_ocr_reasons {
|
|
let mapped = ConstraintValidator::map_legacy_ocr_failure_reason(Some(legacy_reason));
|
|
|
|
// All mapped values should be valid
|
|
assert!(
|
|
ConstraintValidator::validate_failure_reason(mapped).is_ok(),
|
|
"Legacy reason '{}' maps to '{}' which should be valid",
|
|
legacy_reason, mapped
|
|
);
|
|
|
|
// Unmapped values should fall back to "other"
|
|
if !["low_ocr_confidence", "timeout", "memory_limit", "pdf_parsing_error",
|
|
"corrupted", "file_corrupted", "unsupported_format", "access_denied", "unknown"]
|
|
.contains(&legacy_reason) {
|
|
assert_eq!(mapped, "other", "Unmapped legacy reason should fall back to 'other'");
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_case_sensitivity() {
|
|
// Our validation should be case-sensitive
|
|
assert!(ConstraintValidator::validate_failure_reason("Low_OCR_Confidence").is_err());
|
|
assert!(ConstraintValidator::validate_failure_reason("LOW_OCR_CONFIDENCE").is_err());
|
|
assert!(ConstraintValidator::validate_failure_reason("OCR").is_err());
|
|
assert!(ConstraintValidator::validate_failure_reason("INGESTION").is_err());
|
|
|
|
// Only exact lowercase matches should work
|
|
assert!(ConstraintValidator::validate_failure_reason("low_ocr_confidence").is_ok());
|
|
assert!(ConstraintValidator::validate_failure_stage("ocr").is_ok());
|
|
assert!(ConstraintValidator::validate_failure_stage("ingestion").is_ok());
|
|
}
|
|
|
|
#[test]
|
|
fn test_whitespace_handling() {
|
|
// Validation should not accept values with extra whitespace
|
|
assert!(ConstraintValidator::validate_failure_reason(" low_ocr_confidence").is_err());
|
|
assert!(ConstraintValidator::validate_failure_reason("low_ocr_confidence ").is_err());
|
|
assert!(ConstraintValidator::validate_failure_reason(" low_ocr_confidence ").is_err());
|
|
assert!(ConstraintValidator::validate_failure_stage(" ocr").is_err());
|
|
assert!(ConstraintValidator::validate_failure_stage("ocr ").is_err());
|
|
|
|
// Only exact matches should work
|
|
assert!(ConstraintValidator::validate_failure_reason("low_ocr_confidence").is_ok());
|
|
assert!(ConstraintValidator::validate_failure_stage("ocr").is_ok());
|
|
}
|
|
} |