mirror of
https://github.com/readur/readur.git
synced 2026-01-06 06:20:17 -06:00
fix(server/client): fix incorrect OCR measurements
This commit is contained in:
@@ -71,6 +71,7 @@ testcontainers = "0.24"
|
||||
testcontainers-modules = { version = "0.12", features = ["postgres"] }
|
||||
wiremock = "0.6"
|
||||
tokio-test = "0.4"
|
||||
futures = "0.3"
|
||||
|
||||
[profile.test]
|
||||
incremental = false
|
||||
|
||||
@@ -155,6 +155,11 @@ const FailedOcrPage: React.FC = () => {
|
||||
const [previewData, setPreviewData] = useState<any>(null);
|
||||
const [confirmDeleteOpen, setConfirmDeleteOpen] = useState(false);
|
||||
|
||||
// Failed documents deletion state
|
||||
const [failedDocsLoading, setFailedDocsLoading] = useState(false);
|
||||
const [failedPreviewData, setFailedPreviewData] = useState<any>(null);
|
||||
const [confirmDeleteFailedOpen, setConfirmDeleteFailedOpen] = useState(false);
|
||||
|
||||
const fetchFailedDocuments = async () => {
|
||||
try {
|
||||
setLoading(true);
|
||||
@@ -308,6 +313,8 @@ const FailedOcrPage: React.FC = () => {
|
||||
fetchDuplicates();
|
||||
} else if (currentTab === 2) {
|
||||
handlePreviewLowConfidence();
|
||||
} else if (currentTab === 3) {
|
||||
handlePreviewFailedDocuments();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -369,6 +376,51 @@ const FailedOcrPage: React.FC = () => {
|
||||
}
|
||||
};
|
||||
|
||||
// Failed documents handlers
|
||||
const handlePreviewFailedDocuments = async () => {
|
||||
try {
|
||||
setFailedDocsLoading(true);
|
||||
const response = await documentService.deleteFailedOcr(true);
|
||||
setFailedPreviewData(response.data);
|
||||
} catch (error) {
|
||||
setSnackbar({
|
||||
open: true,
|
||||
message: 'Failed to preview failed documents',
|
||||
severity: 'error'
|
||||
});
|
||||
} finally {
|
||||
setFailedDocsLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleDeleteFailedDocuments = async () => {
|
||||
try {
|
||||
setFailedDocsLoading(true);
|
||||
const response = await documentService.deleteFailedOcr(false);
|
||||
|
||||
setSnackbar({
|
||||
open: true,
|
||||
message: response.data.message,
|
||||
severity: 'success'
|
||||
});
|
||||
setFailedPreviewData(null);
|
||||
setConfirmDeleteFailedOpen(false);
|
||||
|
||||
// Refresh failed OCR tab if currently viewing it
|
||||
if (currentTab === 0) {
|
||||
fetchFailedDocuments();
|
||||
}
|
||||
} catch (error) {
|
||||
setSnackbar({
|
||||
open: true,
|
||||
message: 'Failed to delete failed documents',
|
||||
severity: 'error'
|
||||
});
|
||||
} finally {
|
||||
setFailedDocsLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
if (loading && (!documents || documents.length === 0)) {
|
||||
return (
|
||||
<Box display="flex" justifyContent="center" alignItems="center" minHeight="400px">
|
||||
@@ -410,6 +462,11 @@ const FailedOcrPage: React.FC = () => {
|
||||
label={`Low Confidence${previewData ? ` (${previewData.matched_count})` : ''}`}
|
||||
iconPosition="start"
|
||||
/>
|
||||
<Tab
|
||||
icon={<DeleteIcon />}
|
||||
label="Delete Failed"
|
||||
iconPosition="start"
|
||||
/>
|
||||
</Tabs>
|
||||
</Paper>
|
||||
|
||||
@@ -989,6 +1046,83 @@ const FailedOcrPage: React.FC = () => {
|
||||
</>
|
||||
)}
|
||||
|
||||
{/* Delete Failed Documents Tab Content */}
|
||||
{currentTab === 3 && (
|
||||
<>
|
||||
<Alert severity="warning" sx={{ mb: 3 }}>
|
||||
<AlertTitle>Delete Failed OCR Documents</AlertTitle>
|
||||
<Typography>
|
||||
This tool allows you to delete all documents where OCR processing failed completely.
|
||||
This includes documents with NULL confidence values or explicit failure status.
|
||||
Use the preview feature first to see what documents would be affected before deleting.
|
||||
</Typography>
|
||||
</Alert>
|
||||
|
||||
<Card sx={{ mb: 3 }}>
|
||||
<CardContent>
|
||||
<Grid container spacing={3} alignItems="center">
|
||||
<Grid item xs={12} md={6}>
|
||||
<Button
|
||||
variant="outlined"
|
||||
onClick={handlePreviewFailedDocuments}
|
||||
disabled={failedDocsLoading}
|
||||
startIcon={failedDocsLoading ? <CircularProgress size={20} /> : <FindInPageIcon />}
|
||||
fullWidth
|
||||
>
|
||||
Preview Failed Documents
|
||||
</Button>
|
||||
</Grid>
|
||||
<Grid item xs={12} md={6}>
|
||||
<Button
|
||||
variant="contained"
|
||||
color="error"
|
||||
onClick={() => setConfirmDeleteFailedOpen(true)}
|
||||
disabled={!failedPreviewData || failedPreviewData.matched_count === 0 || failedDocsLoading}
|
||||
startIcon={<DeleteIcon />}
|
||||
fullWidth
|
||||
>
|
||||
Delete Failed Documents
|
||||
</Button>
|
||||
</Grid>
|
||||
</Grid>
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
{/* Preview Results */}
|
||||
{failedPreviewData && (
|
||||
<Card sx={{ mb: 3 }}>
|
||||
<CardContent>
|
||||
<Typography variant="h6" gutterBottom>
|
||||
Preview Results
|
||||
</Typography>
|
||||
<Typography color={failedPreviewData.matched_count > 0 ? 'error.main' : 'success.main'}>
|
||||
{failedPreviewData.message}
|
||||
</Typography>
|
||||
{failedPreviewData.matched_count > 0 && (
|
||||
<Box sx={{ mt: 2 }}>
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
Document IDs that would be deleted:
|
||||
</Typography>
|
||||
<Typography variant="body2" sx={{ fontFamily: 'monospace', wordBreak: 'break-all' }}>
|
||||
{failedPreviewData.document_ids.slice(0, 10).join(', ')}
|
||||
{failedPreviewData.document_ids.length > 10 && ` ... and ${failedPreviewData.document_ids.length - 10} more`}
|
||||
</Typography>
|
||||
</Box>
|
||||
)}
|
||||
</CardContent>
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{/* Loading State */}
|
||||
{failedDocsLoading && !failedPreviewData && (
|
||||
<Box display="flex" justifyContent="center" alignItems="center" minHeight="200px">
|
||||
<CircularProgress />
|
||||
<Typography sx={{ ml: 2 }}>Processing request...</Typography>
|
||||
</Box>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
|
||||
{/* Confirmation Dialog */}
|
||||
<Dialog
|
||||
open={confirmDeleteOpen}
|
||||
@@ -1024,6 +1158,41 @@ const FailedOcrPage: React.FC = () => {
|
||||
</DialogActions>
|
||||
</Dialog>
|
||||
|
||||
{/* Confirmation Dialog for Failed Documents */}
|
||||
<Dialog
|
||||
open={confirmDeleteFailedOpen}
|
||||
onClose={() => setConfirmDeleteFailedOpen(false)}
|
||||
maxWidth="sm"
|
||||
fullWidth
|
||||
>
|
||||
<DialogTitle color="error.main">
|
||||
<DeleteIcon sx={{ mr: 1, verticalAlign: 'middle' }} />
|
||||
Confirm Failed Document Deletion
|
||||
</DialogTitle>
|
||||
<DialogContent>
|
||||
<Typography>
|
||||
Are you sure you want to delete {failedPreviewData?.matched_count || 0} documents with failed OCR processing?
|
||||
</Typography>
|
||||
<Alert severity="error" sx={{ mt: 2 }}>
|
||||
This action cannot be undone. The documents and their files will be permanently deleted.
|
||||
</Alert>
|
||||
</DialogContent>
|
||||
<DialogActions>
|
||||
<Button onClick={() => setConfirmDeleteFailedOpen(false)}>
|
||||
Cancel
|
||||
</Button>
|
||||
<Button
|
||||
onClick={handleDeleteFailedDocuments}
|
||||
color="error"
|
||||
variant="contained"
|
||||
disabled={failedDocsLoading}
|
||||
startIcon={failedDocsLoading ? <CircularProgress size={20} /> : <DeleteIcon />}
|
||||
>
|
||||
{failedDocsLoading ? 'Deleting...' : 'Delete Failed Documents'}
|
||||
</Button>
|
||||
</DialogActions>
|
||||
</Dialog>
|
||||
|
||||
{/* Document Details Dialog */}
|
||||
<Dialog
|
||||
open={detailsOpen}
|
||||
|
||||
@@ -248,6 +248,11 @@ export const documentService = {
|
||||
preview_only: previewOnly
|
||||
})
|
||||
},
|
||||
deleteFailedOcr: (previewOnly: boolean = false) => {
|
||||
return api.post('/documents/delete-failed-ocr', {
|
||||
preview_only: previewOnly
|
||||
})
|
||||
},
|
||||
}
|
||||
|
||||
export interface OcrStatusResponse {
|
||||
|
||||
59
migrations/20250628000001_backfill_ocr_confidence.sql
Normal file
59
migrations/20250628000001_backfill_ocr_confidence.sql
Normal file
@@ -0,0 +1,59 @@
|
||||
-- Backfill OCR confidence scores for existing documents
|
||||
-- Since OCR confidence was previously hardcoded to 85%, we need to recalculate
|
||||
-- actual confidence for documents that currently have this placeholder value
|
||||
|
||||
-- First, let's identify documents that likely have placeholder confidence
|
||||
-- (85% exactly, which was the hardcoded value)
|
||||
CREATE TEMP TABLE documents_to_update AS
|
||||
SELECT id, ocr_text, ocr_status
|
||||
FROM documents
|
||||
WHERE ocr_confidence = 85.0
|
||||
AND ocr_status = 'completed'
|
||||
AND ocr_text IS NOT NULL
|
||||
AND length(trim(ocr_text)) > 0;
|
||||
|
||||
-- For now, we'll estimate confidence based on text quality metrics
|
||||
-- This is a rough approximation until we can re-run OCR with actual confidence
|
||||
UPDATE documents
|
||||
SET ocr_confidence = CASE
|
||||
-- High quality text: good length, reasonable character distribution
|
||||
WHEN length(trim(ocr_text)) > 1000
|
||||
AND (length(ocr_text) - length(replace(replace(ocr_text, ' ', ''), char(10), ''))) * 100.0 / length(ocr_text) > 10.0 -- > 10% whitespace
|
||||
AND length(replace(replace(replace(ocr_text, ' ', ''), char(10), ''), char(13), '')) * 100.0 / length(ocr_text) > 70.0 -- > 70% non-whitespace chars
|
||||
THEN 90.0 + (random() * 8.0) -- 90-98%
|
||||
|
||||
-- Medium quality text: decent length, some structure
|
||||
WHEN length(trim(ocr_text)) > 100
|
||||
AND (length(ocr_text) - length(replace(replace(ocr_text, ' ', ''), char(10), ''))) * 100.0 / length(ocr_text) > 5.0 -- > 5% whitespace
|
||||
AND length(replace(replace(replace(ocr_text, ' ', ''), char(10), ''), char(13), '')) * 100.0 / length(ocr_text) > 50.0 -- > 50% non-whitespace chars
|
||||
THEN 70.0 + (random() * 15.0) -- 70-85%
|
||||
|
||||
-- Low quality text: short or poor structure
|
||||
WHEN length(trim(ocr_text)) > 10
|
||||
AND length(replace(replace(replace(ocr_text, ' ', ''), char(10), ''), char(13), '')) * 100.0 / length(ocr_text) > 30.0 -- > 30% non-whitespace chars
|
||||
THEN 40.0 + (random() * 25.0) -- 40-65%
|
||||
|
||||
-- Very poor quality: very short or mostly garbage
|
||||
ELSE 20.0 + (random() * 15.0) -- 20-35%
|
||||
END
|
||||
WHERE id IN (SELECT id FROM documents_to_update);
|
||||
|
||||
-- Add a comment explaining what we did
|
||||
COMMENT ON COLUMN documents.ocr_confidence IS 'OCR confidence percentage (0-100). Values may be estimated for documents processed before real confidence calculation was implemented.';
|
||||
|
||||
-- Log the update
|
||||
DO $$
|
||||
DECLARE
|
||||
updated_count INTEGER;
|
||||
BEGIN
|
||||
SELECT COUNT(*) INTO updated_count FROM documents_to_update;
|
||||
RAISE NOTICE 'Backfilled OCR confidence for % documents that had placeholder 85%% confidence', updated_count;
|
||||
END $$;
|
||||
|
||||
-- Clean up
|
||||
DROP TABLE documents_to_update;
|
||||
|
||||
-- Create an index to help with confidence-based queries
|
||||
CREATE INDEX IF NOT EXISTS idx_documents_ocr_confidence_range
|
||||
ON documents(ocr_confidence)
|
||||
WHERE ocr_confidence IS NOT NULL;
|
||||
@@ -1586,6 +1586,165 @@ impl Database {
|
||||
Ok(documents)
|
||||
}
|
||||
|
||||
/// Find documents with failed OCR processing
|
||||
pub async fn find_failed_ocr_documents(&self, user_id: uuid::Uuid, user_role: crate::models::UserRole) -> Result<Vec<Document>> {
|
||||
let documents = if user_role == crate::models::UserRole::Admin {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash
|
||||
FROM documents
|
||||
WHERE ocr_status = 'failed' OR (ocr_confidence IS NULL AND ocr_status != 'pending' AND ocr_status != 'processing')
|
||||
ORDER BY created_at DESC
|
||||
"#,
|
||||
)
|
||||
.fetch_all(&self.pool)
|
||||
.await?;
|
||||
|
||||
rows.into_iter().map(|r| Document {
|
||||
id: r.get("id"),
|
||||
filename: r.get("filename"),
|
||||
original_filename: r.get("original_filename"),
|
||||
file_path: r.get("file_path"),
|
||||
file_size: r.get("file_size"),
|
||||
mime_type: r.get("mime_type"),
|
||||
content: r.get("content"),
|
||||
ocr_text: r.get("ocr_text"),
|
||||
ocr_confidence: r.get("ocr_confidence"),
|
||||
ocr_word_count: r.get("ocr_word_count"),
|
||||
ocr_processing_time_ms: r.get("ocr_processing_time_ms"),
|
||||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
user_id: r.get("user_id"),
|
||||
file_hash: r.get("file_hash"),
|
||||
}).collect()
|
||||
} else {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash
|
||||
FROM documents
|
||||
WHERE (ocr_status = 'failed' OR (ocr_confidence IS NULL AND ocr_status != 'pending' AND ocr_status != 'processing')) AND user_id = $1
|
||||
ORDER BY created_at DESC
|
||||
"#,
|
||||
)
|
||||
.bind(user_id)
|
||||
.fetch_all(&self.pool)
|
||||
.await?;
|
||||
|
||||
rows.into_iter().map(|r| Document {
|
||||
id: r.get("id"),
|
||||
filename: r.get("filename"),
|
||||
original_filename: r.get("original_filename"),
|
||||
file_path: r.get("file_path"),
|
||||
file_size: r.get("file_size"),
|
||||
mime_type: r.get("mime_type"),
|
||||
content: r.get("content"),
|
||||
ocr_text: r.get("ocr_text"),
|
||||
ocr_confidence: r.get("ocr_confidence"),
|
||||
ocr_word_count: r.get("ocr_word_count"),
|
||||
ocr_processing_time_ms: r.get("ocr_processing_time_ms"),
|
||||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
user_id: r.get("user_id"),
|
||||
file_hash: r.get("file_hash"),
|
||||
}).collect()
|
||||
};
|
||||
|
||||
Ok(documents)
|
||||
}
|
||||
|
||||
/// Find documents with low confidence or failed OCR (combined)
|
||||
pub async fn find_low_confidence_and_failed_documents(&self, max_confidence: f32, user_id: uuid::Uuid, user_role: crate::models::UserRole) -> Result<Vec<Document>> {
|
||||
let documents = if user_role == crate::models::UserRole::Admin {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash
|
||||
FROM documents
|
||||
WHERE (ocr_confidence IS NOT NULL AND ocr_confidence < $1)
|
||||
OR ocr_status = 'failed'
|
||||
OR (ocr_confidence IS NULL AND ocr_status != 'pending' AND ocr_status != 'processing')
|
||||
ORDER BY
|
||||
CASE WHEN ocr_confidence IS NOT NULL THEN ocr_confidence ELSE -1 END ASC,
|
||||
created_at DESC
|
||||
"#,
|
||||
)
|
||||
.bind(max_confidence)
|
||||
.fetch_all(&self.pool)
|
||||
.await?;
|
||||
|
||||
rows.into_iter().map(|r| Document {
|
||||
id: r.get("id"),
|
||||
filename: r.get("filename"),
|
||||
original_filename: r.get("original_filename"),
|
||||
file_path: r.get("file_path"),
|
||||
file_size: r.get("file_size"),
|
||||
mime_type: r.get("mime_type"),
|
||||
content: r.get("content"),
|
||||
ocr_text: r.get("ocr_text"),
|
||||
ocr_confidence: r.get("ocr_confidence"),
|
||||
ocr_word_count: r.get("ocr_word_count"),
|
||||
ocr_processing_time_ms: r.get("ocr_processing_time_ms"),
|
||||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
user_id: r.get("user_id"),
|
||||
file_hash: r.get("file_hash"),
|
||||
}).collect()
|
||||
} else {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash
|
||||
FROM documents
|
||||
WHERE ((ocr_confidence IS NOT NULL AND ocr_confidence < $1)
|
||||
OR ocr_status = 'failed'
|
||||
OR (ocr_confidence IS NULL AND ocr_status != 'pending' AND ocr_status != 'processing'))
|
||||
AND user_id = $2
|
||||
ORDER BY
|
||||
CASE WHEN ocr_confidence IS NOT NULL THEN ocr_confidence ELSE -1 END ASC,
|
||||
created_at DESC
|
||||
"#,
|
||||
)
|
||||
.bind(max_confidence)
|
||||
.bind(user_id)
|
||||
.fetch_all(&self.pool)
|
||||
.await?;
|
||||
|
||||
rows.into_iter().map(|r| Document {
|
||||
id: r.get("id"),
|
||||
filename: r.get("filename"),
|
||||
original_filename: r.get("original_filename"),
|
||||
file_path: r.get("file_path"),
|
||||
file_size: r.get("file_size"),
|
||||
mime_type: r.get("mime_type"),
|
||||
content: r.get("content"),
|
||||
ocr_text: r.get("ocr_text"),
|
||||
ocr_confidence: r.get("ocr_confidence"),
|
||||
ocr_word_count: r.get("ocr_word_count"),
|
||||
ocr_processing_time_ms: r.get("ocr_processing_time_ms"),
|
||||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
user_id: r.get("user_id"),
|
||||
file_hash: r.get("file_hash"),
|
||||
}).collect()
|
||||
};
|
||||
|
||||
Ok(documents)
|
||||
}
|
||||
|
||||
pub async fn count_documents_for_source(&self, source_id: Uuid) -> Result<(i64, i64)> {
|
||||
let row = sqlx::query(
|
||||
r#"
|
||||
|
||||
@@ -295,15 +295,21 @@ impl EnhancedOcrService {
|
||||
Ok(tesseract)
|
||||
}
|
||||
|
||||
/// Calculate overall confidence score
|
||||
/// Calculate overall confidence score using Tesseract's mean confidence
|
||||
#[cfg(feature = "ocr")]
|
||||
fn calculate_overall_confidence(&self, _tesseract: &mut Tesseract) -> Result<f32> {
|
||||
// Note: get_word_confidences may not be available in current tesseract crate version
|
||||
// For now, we'll estimate confidence based on text quality
|
||||
// This can be enhanced when the API is available or with alternative methods
|
||||
fn calculate_overall_confidence(&self, tesseract: &mut Tesseract) -> Result<f32> {
|
||||
// Use Tesseract's built-in mean confidence calculation
|
||||
let confidence = tesseract.mean_text_conf();
|
||||
|
||||
// Return a reasonable default confidence for now
|
||||
Ok(85.0)
|
||||
// Convert from i32 to f32 and ensure it's within valid range
|
||||
let confidence_f32 = confidence as f32;
|
||||
|
||||
// Clamp confidence to valid range (0.0 to 100.0)
|
||||
let clamped_confidence = confidence_f32.max(0.0).min(100.0);
|
||||
|
||||
debug!("Tesseract confidence: {} -> {:.1}%", confidence, clamped_confidence);
|
||||
|
||||
Ok(clamped_confidence)
|
||||
}
|
||||
|
||||
/// Detect and correct image orientation
|
||||
|
||||
@@ -53,6 +53,7 @@ pub fn router() -> Router<Arc<AppState>> {
|
||||
.route("/failed-ocr", get(get_failed_ocr_documents))
|
||||
.route("/duplicates", get(get_user_duplicates))
|
||||
.route("/delete-low-confidence", post(delete_low_confidence_documents))
|
||||
.route("/delete-failed-ocr", post(delete_failed_ocr_documents))
|
||||
}
|
||||
|
||||
#[utoipa::path(
|
||||
@@ -1055,10 +1056,10 @@ pub async fn delete_low_confidence_documents(
|
||||
|
||||
let is_preview = request.preview_only.unwrap_or(false);
|
||||
|
||||
// Find documents with confidence below threshold
|
||||
// Find documents with confidence below threshold OR failed OCR
|
||||
let matched_documents = state
|
||||
.db
|
||||
.find_documents_by_confidence_threshold(request.max_confidence, auth_user.user.id, auth_user.user.role)
|
||||
.find_low_confidence_and_failed_documents(request.max_confidence, auth_user.user.id, auth_user.user.role)
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
@@ -1136,4 +1137,100 @@ pub async fn delete_low_confidence_documents(
|
||||
"ignored_file_creation_failures": ignored_file_creation_failures,
|
||||
"deleted_document_ids": deleted_documents.iter().map(|d| d.id).collect::<Vec<_>>()
|
||||
})))
|
||||
}
|
||||
|
||||
/// Delete all documents with failed OCR processing
|
||||
pub async fn delete_failed_ocr_documents(
|
||||
State(state): State<Arc<AppState>>,
|
||||
auth_user: AuthUser,
|
||||
Json(request): Json<serde_json::Value>,
|
||||
) -> Result<Json<serde_json::Value>, StatusCode> {
|
||||
let is_preview = request.get("preview_only").and_then(|v| v.as_bool()).unwrap_or(false);
|
||||
|
||||
// Find documents with failed OCR
|
||||
let matched_documents = state
|
||||
.db
|
||||
.find_failed_ocr_documents(auth_user.user.id, auth_user.user.role)
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
let matched_count = matched_documents.len();
|
||||
|
||||
if is_preview {
|
||||
return Ok(Json(serde_json::json!({
|
||||
"success": true,
|
||||
"message": format!("Found {} documents with failed OCR processing", matched_count),
|
||||
"matched_count": matched_count,
|
||||
"preview": true,
|
||||
"document_ids": matched_documents.iter().map(|d| d.id).collect::<Vec<_>>()
|
||||
})));
|
||||
}
|
||||
|
||||
if matched_documents.is_empty() {
|
||||
return Ok(Json(serde_json::json!({
|
||||
"success": true,
|
||||
"message": "No documents found with failed OCR processing",
|
||||
"deleted_count": 0
|
||||
})));
|
||||
}
|
||||
|
||||
// Extract document IDs for bulk deletion
|
||||
let document_ids: Vec<uuid::Uuid> = matched_documents.iter().map(|d| d.id).collect();
|
||||
|
||||
// Use existing bulk delete logic
|
||||
let deleted_documents = state
|
||||
.db
|
||||
.bulk_delete_documents(&document_ids, auth_user.user.id, auth_user.user.role)
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
// Create ignored file records for all successfully deleted documents
|
||||
let mut ignored_file_creation_failures = 0;
|
||||
for document in &deleted_documents {
|
||||
let reason = if let Some(ref error) = document.ocr_error {
|
||||
format!("deleted due to failed OCR processing: {}", error)
|
||||
} else {
|
||||
"deleted due to failed OCR processing".to_string()
|
||||
};
|
||||
|
||||
if let Err(e) = crate::db::ignored_files::create_ignored_file_from_document(
|
||||
state.db.get_pool(),
|
||||
document.id,
|
||||
auth_user.user.id,
|
||||
Some(reason),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
).await {
|
||||
ignored_file_creation_failures += 1;
|
||||
tracing::warn!("Failed to create ignored file record for document {}: {}", document.id, e);
|
||||
}
|
||||
}
|
||||
|
||||
let file_service = FileService::new(state.config.upload_path.clone());
|
||||
let mut successful_file_deletions = 0;
|
||||
let mut failed_file_deletions = 0;
|
||||
|
||||
for document in &deleted_documents {
|
||||
match file_service.delete_document_files(document).await {
|
||||
Ok(_) => successful_file_deletions += 1,
|
||||
Err(e) => {
|
||||
failed_file_deletions += 1;
|
||||
tracing::warn!("Failed to delete files for document {}: {}", document.id, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let deleted_count = deleted_documents.len();
|
||||
|
||||
Ok(Json(serde_json::json!({
|
||||
"success": true,
|
||||
"message": format!("Successfully deleted {} documents with failed OCR processing", deleted_count),
|
||||
"deleted_count": deleted_count,
|
||||
"matched_count": matched_count,
|
||||
"successful_file_deletions": successful_file_deletions,
|
||||
"failed_file_deletions": failed_file_deletions,
|
||||
"ignored_file_creation_failures": ignored_file_creation_failures,
|
||||
"deleted_document_ids": deleted_documents.iter().map(|d| d.id).collect::<Vec<_>>()
|
||||
})))
|
||||
}
|
||||
@@ -633,4 +633,304 @@ mod document_routes_deletion_tests {
|
||||
// This should result in zero matched documents
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod delete_failed_ocr_tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
|
||||
#[test]
|
||||
fn test_delete_failed_ocr_request_serialization() {
|
||||
// Test preview mode
|
||||
let preview_request = json!({
|
||||
"preview_only": true
|
||||
});
|
||||
|
||||
let parsed: serde_json::Value = serde_json::from_value(preview_request).unwrap();
|
||||
assert_eq!(parsed["preview_only"], true);
|
||||
|
||||
// Test delete mode
|
||||
let delete_request = json!({
|
||||
"preview_only": false
|
||||
});
|
||||
|
||||
let parsed: serde_json::Value = serde_json::from_value(delete_request).unwrap();
|
||||
assert_eq!(parsed["preview_only"], false);
|
||||
|
||||
// Test empty request (should default to preview_only: false)
|
||||
let empty_request = json!({});
|
||||
|
||||
let parsed: serde_json::Value = serde_json::from_value(empty_request).unwrap();
|
||||
assert!(parsed.get("preview_only").is_none() || parsed["preview_only"] == false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_failed_ocr_user_authorization() {
|
||||
let admin_user = create_test_user(UserRole::Admin);
|
||||
let regular_user = create_test_user(UserRole::User);
|
||||
|
||||
// Both admins and regular users should be able to delete their own failed documents
|
||||
assert_eq!(admin_user.role, UserRole::Admin);
|
||||
assert_eq!(regular_user.role, UserRole::User);
|
||||
|
||||
// Admin should be able to see all failed documents
|
||||
// Regular user should only see their own failed documents
|
||||
// This logic would be tested in the actual endpoint implementation
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_failed_document_criteria() {
|
||||
let user_id = Uuid::new_v4();
|
||||
|
||||
// Test document with failed OCR status
|
||||
let mut failed_doc = create_test_document(user_id);
|
||||
failed_doc.ocr_status = Some("failed".to_string());
|
||||
failed_doc.ocr_confidence = None;
|
||||
failed_doc.ocr_error = Some("OCR processing failed".to_string());
|
||||
|
||||
// Should be included in failed document deletion
|
||||
assert_eq!(failed_doc.ocr_status, Some("failed".to_string()));
|
||||
assert!(failed_doc.ocr_confidence.is_none());
|
||||
|
||||
// Test document with NULL confidence but completed status
|
||||
let mut null_confidence_doc = create_test_document(user_id);
|
||||
null_confidence_doc.ocr_status = Some("completed".to_string());
|
||||
null_confidence_doc.ocr_confidence = None;
|
||||
null_confidence_doc.ocr_text = Some("Text but no confidence".to_string());
|
||||
|
||||
// Should be included in failed document deletion (NULL confidence indicates failure)
|
||||
assert_eq!(null_confidence_doc.ocr_status, Some("completed".to_string()));
|
||||
assert!(null_confidence_doc.ocr_confidence.is_none());
|
||||
|
||||
// Test document with successful OCR
|
||||
let mut success_doc = create_test_document(user_id);
|
||||
success_doc.ocr_status = Some("completed".to_string());
|
||||
success_doc.ocr_confidence = Some(85.0);
|
||||
success_doc.ocr_text = Some("Successfully extracted text".to_string());
|
||||
|
||||
// Should NOT be included in failed document deletion
|
||||
assert_eq!(success_doc.ocr_status, Some("completed".to_string()));
|
||||
assert!(success_doc.ocr_confidence.is_some());
|
||||
|
||||
// Test document with pending status
|
||||
let mut pending_doc = create_test_document(user_id);
|
||||
pending_doc.ocr_status = Some("pending".to_string());
|
||||
pending_doc.ocr_confidence = None;
|
||||
|
||||
// Should NOT be included in failed document deletion (still processing)
|
||||
assert_eq!(pending_doc.ocr_status, Some("pending".to_string()));
|
||||
|
||||
// Test document with processing status
|
||||
let mut processing_doc = create_test_document(user_id);
|
||||
processing_doc.ocr_status = Some("processing".to_string());
|
||||
processing_doc.ocr_confidence = None;
|
||||
|
||||
// Should NOT be included in failed document deletion (still processing)
|
||||
assert_eq!(processing_doc.ocr_status, Some("processing".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_failed_ocr_response_format() {
|
||||
// Test preview response format
|
||||
let preview_response = json!({
|
||||
"success": true,
|
||||
"message": "Found 5 documents with failed OCR processing",
|
||||
"matched_count": 5,
|
||||
"preview": true,
|
||||
"document_ids": ["id1", "id2", "id3", "id4", "id5"]
|
||||
});
|
||||
|
||||
assert_eq!(preview_response["success"], true);
|
||||
assert_eq!(preview_response["matched_count"], 5);
|
||||
assert_eq!(preview_response["preview"], true);
|
||||
assert!(preview_response["document_ids"].is_array());
|
||||
|
||||
// Test delete response format
|
||||
let delete_response = json!({
|
||||
"success": true,
|
||||
"message": "Successfully deleted 3 documents with failed OCR processing",
|
||||
"deleted_count": 3,
|
||||
"matched_count": 3,
|
||||
"successful_file_deletions": 3,
|
||||
"failed_file_deletions": 0,
|
||||
"ignored_file_creation_failures": 0,
|
||||
"deleted_document_ids": ["id1", "id2", "id3"]
|
||||
});
|
||||
|
||||
assert_eq!(delete_response["success"], true);
|
||||
assert_eq!(delete_response["deleted_count"], 3);
|
||||
assert_eq!(delete_response["matched_count"], 3);
|
||||
assert!(delete_response["deleted_document_ids"].is_array());
|
||||
assert!(delete_response.get("preview").is_none()); // Should not have preview flag in delete response
|
||||
|
||||
// Test no documents found response
|
||||
let no_docs_response = json!({
|
||||
"success": true,
|
||||
"message": "No documents found with failed OCR processing",
|
||||
"deleted_count": 0
|
||||
});
|
||||
|
||||
assert_eq!(no_docs_response["success"], true);
|
||||
assert_eq!(no_docs_response["deleted_count"], 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_failed_ocr_error_scenarios() {
|
||||
// Test with no failed documents
|
||||
let no_failed_docs_request = json!({
|
||||
"preview_only": true
|
||||
});
|
||||
|
||||
// Should return success with 0 matched count
|
||||
// This would be tested in integration tests with actual database
|
||||
|
||||
// Test with file deletion failures
|
||||
let file_deletion_error = json!({
|
||||
"success": true,
|
||||
"message": "Successfully deleted 2 documents with failed OCR processing",
|
||||
"deleted_count": 2,
|
||||
"matched_count": 2,
|
||||
"successful_file_deletions": 1,
|
||||
"failed_file_deletions": 1,
|
||||
"ignored_file_creation_failures": 0,
|
||||
"deleted_document_ids": ["id1", "id2"]
|
||||
});
|
||||
|
||||
// Should still report success but indicate file deletion issues
|
||||
assert_eq!(file_deletion_error["success"], true);
|
||||
assert_eq!(file_deletion_error["failed_file_deletions"], 1);
|
||||
|
||||
// Test with ignored file creation failures
|
||||
let ignored_file_error = json!({
|
||||
"success": true,
|
||||
"message": "Successfully deleted 2 documents with failed OCR processing",
|
||||
"deleted_count": 2,
|
||||
"matched_count": 2,
|
||||
"successful_file_deletions": 2,
|
||||
"failed_file_deletions": 0,
|
||||
"ignored_file_creation_failures": 1,
|
||||
"deleted_document_ids": ["id1", "id2"]
|
||||
});
|
||||
|
||||
assert_eq!(ignored_file_error["success"], true);
|
||||
assert_eq!(ignored_file_error["ignored_file_creation_failures"], 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_failed_ocr_failure_reason_handling() {
|
||||
let user_id = Uuid::new_v4();
|
||||
|
||||
// Test document with specific failure reason
|
||||
let mut ocr_timeout_doc = create_test_document(user_id);
|
||||
ocr_timeout_doc.ocr_status = Some("failed".to_string());
|
||||
ocr_timeout_doc.ocr_error = Some("OCR processing timed out after 2 minutes".to_string());
|
||||
|
||||
// Test document with corruption error
|
||||
let mut corruption_doc = create_test_document(user_id);
|
||||
corruption_doc.ocr_status = Some("failed".to_string());
|
||||
corruption_doc.ocr_error = Some("Invalid image format - file appears corrupted".to_string());
|
||||
|
||||
// Test document with font encoding error
|
||||
let mut font_error_doc = create_test_document(user_id);
|
||||
font_error_doc.ocr_status = Some("failed".to_string());
|
||||
font_error_doc.ocr_error = Some("PDF text extraction failed due to font encoding issues".to_string());
|
||||
|
||||
// All should be valid candidates for deletion
|
||||
assert!(ocr_timeout_doc.ocr_error.is_some());
|
||||
assert!(corruption_doc.ocr_error.is_some());
|
||||
assert!(font_error_doc.ocr_error.is_some());
|
||||
|
||||
// The deletion should create appropriate ignored file records with the error reasons
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_failed_ocr_ignored_file_creation() {
|
||||
// Test that deleted failed documents create proper ignored file records
|
||||
let user_id = Uuid::new_v4();
|
||||
|
||||
let mut failed_doc = create_test_document(user_id);
|
||||
failed_doc.ocr_status = Some("failed".to_string());
|
||||
failed_doc.ocr_error = Some("OCR processing failed due to corrupted image".to_string());
|
||||
|
||||
// Expected ignored file reason should include the error
|
||||
let expected_reason = "deleted due to failed OCR processing: OCR processing failed due to corrupted image";
|
||||
|
||||
// In the actual implementation, this would be tested by verifying the ignored file record
|
||||
assert!(failed_doc.ocr_error.is_some());
|
||||
|
||||
// Test document with no specific error
|
||||
let mut failed_no_error_doc = create_test_document(user_id);
|
||||
failed_no_error_doc.ocr_status = Some("failed".to_string());
|
||||
failed_no_error_doc.ocr_error = None;
|
||||
|
||||
// Should use generic reason
|
||||
let expected_generic_reason = "deleted due to failed OCR processing";
|
||||
|
||||
// Both should result in appropriate ignored file records
|
||||
assert_eq!(failed_doc.ocr_status, Some("failed".to_string()));
|
||||
assert_eq!(failed_no_error_doc.ocr_status, Some("failed".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_failed_ocr_vs_low_confidence_distinction() {
|
||||
let user_id = Uuid::new_v4();
|
||||
|
||||
// Failed OCR document (should be in failed deletion, not low confidence)
|
||||
let mut failed_doc = create_test_document(user_id);
|
||||
failed_doc.ocr_status = Some("failed".to_string());
|
||||
failed_doc.ocr_confidence = None;
|
||||
|
||||
// Low confidence document (should be in low confidence deletion, not failed)
|
||||
let mut low_confidence_doc = create_test_document(user_id);
|
||||
low_confidence_doc.ocr_status = Some("completed".to_string());
|
||||
low_confidence_doc.ocr_confidence = Some(25.0);
|
||||
|
||||
// NULL confidence but completed (edge case - should be in failed deletion)
|
||||
let mut null_confidence_doc = create_test_document(user_id);
|
||||
null_confidence_doc.ocr_status = Some("completed".to_string());
|
||||
null_confidence_doc.ocr_confidence = None;
|
||||
|
||||
// High confidence document (should be in neither)
|
||||
let mut high_confidence_doc = create_test_document(user_id);
|
||||
high_confidence_doc.ocr_status = Some("completed".to_string());
|
||||
high_confidence_doc.ocr_confidence = Some(95.0);
|
||||
|
||||
// Verify the logic for each type
|
||||
assert_eq!(failed_doc.ocr_status, Some("failed".to_string()));
|
||||
assert!(failed_doc.ocr_confidence.is_none());
|
||||
|
||||
assert_eq!(low_confidence_doc.ocr_status, Some("completed".to_string()));
|
||||
assert!(low_confidence_doc.ocr_confidence.unwrap() < 50.0);
|
||||
|
||||
assert_eq!(null_confidence_doc.ocr_status, Some("completed".to_string()));
|
||||
assert!(null_confidence_doc.ocr_confidence.is_none());
|
||||
|
||||
assert_eq!(high_confidence_doc.ocr_status, Some("completed".to_string()));
|
||||
assert!(high_confidence_doc.ocr_confidence.unwrap() > 50.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_failed_ocr_endpoint_path() {
|
||||
// Test that the endpoint path is correct
|
||||
let endpoint_path = "/api/documents/delete-failed-ocr";
|
||||
|
||||
// This would be used in integration tests
|
||||
assert!(endpoint_path.contains("delete-failed-ocr"));
|
||||
assert!(endpoint_path.starts_with("/api/documents/"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_failed_ocr_http_methods() {
|
||||
// The endpoint should only accept POST requests
|
||||
// GET, PUT, DELETE should not be allowed
|
||||
|
||||
// This would be tested in integration tests with actual HTTP requests
|
||||
let allowed_method = "POST";
|
||||
let disallowed_methods = vec!["GET", "PUT", "DELETE", "PATCH"];
|
||||
|
||||
assert_eq!(allowed_method, "POST");
|
||||
assert!(disallowed_methods.contains(&"GET"));
|
||||
assert!(disallowed_methods.contains(&"DELETE"));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1796,4 +1796,398 @@ mod deletion_error_handling_tests {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_find_failed_ocr_documents() {
|
||||
use testcontainers::{runners::AsyncRunner};
|
||||
use testcontainers_modules::postgres::Postgres;
|
||||
|
||||
let postgres_image = Postgres::default();
|
||||
let container = postgres_image.start().await.expect("Failed to start postgres container");
|
||||
let port = container.get_host_port_ipv4(5432).await.expect("Failed to get postgres port");
|
||||
|
||||
// Use TEST_DATABASE_URL if available, otherwise use the container
|
||||
let connection_string = std::env::var("TEST_DATABASE_URL")
|
||||
.unwrap_or_else(|_| format!("postgres://postgres:postgres@127.0.0.1:{}/postgres", port));
|
||||
let database = Database::new(&connection_string).await.unwrap();
|
||||
database.migrate().await.unwrap();
|
||||
let user_id = Uuid::new_v4();
|
||||
let admin_user_id = Uuid::new_v4();
|
||||
|
||||
// Create test documents with different OCR statuses
|
||||
let mut success_doc = create_test_document(user_id);
|
||||
success_doc.ocr_status = Some("completed".to_string());
|
||||
success_doc.ocr_confidence = Some(85.0);
|
||||
success_doc.ocr_text = Some("Successfully extracted text".to_string());
|
||||
|
||||
let mut failed_doc = create_test_document(user_id);
|
||||
failed_doc.ocr_status = Some("failed".to_string());
|
||||
failed_doc.ocr_confidence = None;
|
||||
failed_doc.ocr_text = None;
|
||||
failed_doc.ocr_error = Some("OCR processing failed due to corrupted image".to_string());
|
||||
|
||||
let mut null_confidence_doc = create_test_document(user_id);
|
||||
null_confidence_doc.ocr_status = Some("completed".to_string());
|
||||
null_confidence_doc.ocr_confidence = None; // NULL confidence but not failed
|
||||
null_confidence_doc.ocr_text = Some("Text extracted but no confidence".to_string());
|
||||
|
||||
let mut pending_doc = create_test_document(user_id);
|
||||
pending_doc.ocr_status = Some("pending".to_string());
|
||||
pending_doc.ocr_confidence = None;
|
||||
pending_doc.ocr_text = None;
|
||||
|
||||
let mut processing_doc = create_test_document(user_id);
|
||||
processing_doc.ocr_status = Some("processing".to_string());
|
||||
processing_doc.ocr_confidence = None;
|
||||
processing_doc.ocr_text = None;
|
||||
|
||||
// Different user's failed document
|
||||
let mut other_user_failed_doc = create_test_document(admin_user_id);
|
||||
other_user_failed_doc.ocr_status = Some("failed".to_string());
|
||||
other_user_failed_doc.ocr_confidence = None;
|
||||
|
||||
// Insert all documents
|
||||
let success_id = database.create_document(success_doc).await.unwrap().id;
|
||||
let failed_id = database.create_document(failed_doc).await.unwrap().id;
|
||||
let null_confidence_id = database.create_document(null_confidence_doc).await.unwrap().id;
|
||||
let pending_id = database.create_document(pending_doc).await.unwrap().id;
|
||||
let processing_id = database.create_document(processing_doc).await.unwrap().id;
|
||||
let other_user_failed_id = database.create_document(other_user_failed_doc).await.unwrap().id;
|
||||
|
||||
// Test as regular user
|
||||
let failed_docs = database
|
||||
.find_failed_ocr_documents(user_id, crate::models::UserRole::User)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Should find: failed_doc and null_confidence_doc (but not pending/processing)
|
||||
assert_eq!(failed_docs.len(), 2);
|
||||
let failed_ids: Vec<Uuid> = failed_docs.iter().map(|d| d.id).collect();
|
||||
assert!(failed_ids.contains(&failed_id));
|
||||
assert!(failed_ids.contains(&null_confidence_id));
|
||||
assert!(!failed_ids.contains(&success_id));
|
||||
assert!(!failed_ids.contains(&pending_id));
|
||||
assert!(!failed_ids.contains(&processing_id));
|
||||
assert!(!failed_ids.contains(&other_user_failed_id)); // Different user
|
||||
|
||||
// Test as admin
|
||||
let admin_failed_docs = database
|
||||
.find_failed_ocr_documents(admin_user_id, crate::models::UserRole::Admin)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Should find all failed documents (from all users)
|
||||
assert!(admin_failed_docs.len() >= 3); // At least our 3 failed docs
|
||||
let admin_failed_ids: Vec<Uuid> = admin_failed_docs.iter().map(|d| d.id).collect();
|
||||
assert!(admin_failed_ids.contains(&failed_id));
|
||||
assert!(admin_failed_ids.contains(&null_confidence_id));
|
||||
assert!(admin_failed_ids.contains(&other_user_failed_id));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_find_low_confidence_and_failed_documents() {
|
||||
use testcontainers::{runners::AsyncRunner};
|
||||
use testcontainers_modules::postgres::Postgres;
|
||||
|
||||
let postgres_image = Postgres::default();
|
||||
let container = postgres_image.start().await.expect("Failed to start postgres container");
|
||||
let port = container.get_host_port_ipv4(5432).await.expect("Failed to get postgres port");
|
||||
|
||||
// Use TEST_DATABASE_URL if available, otherwise use the container
|
||||
let connection_string = std::env::var("TEST_DATABASE_URL")
|
||||
.unwrap_or_else(|_| format!("postgres://postgres:postgres@127.0.0.1:{}/postgres", port));
|
||||
let database = Database::new(&connection_string).await.unwrap();
|
||||
database.migrate().await.unwrap();
|
||||
let user_id = Uuid::new_v4();
|
||||
|
||||
// Create test documents with different confidence levels
|
||||
let mut high_confidence_doc = create_test_document(user_id);
|
||||
high_confidence_doc.ocr_confidence = Some(95.0);
|
||||
high_confidence_doc.ocr_status = Some("completed".to_string());
|
||||
|
||||
let mut medium_confidence_doc = create_test_document(user_id);
|
||||
medium_confidence_doc.ocr_confidence = Some(65.0);
|
||||
medium_confidence_doc.ocr_status = Some("completed".to_string());
|
||||
|
||||
let mut low_confidence_doc = create_test_document(user_id);
|
||||
low_confidence_doc.ocr_confidence = Some(25.0);
|
||||
low_confidence_doc.ocr_status = Some("completed".to_string());
|
||||
|
||||
let mut failed_doc = create_test_document(user_id);
|
||||
failed_doc.ocr_status = Some("failed".to_string());
|
||||
failed_doc.ocr_confidence = None;
|
||||
failed_doc.ocr_error = Some("Processing failed".to_string());
|
||||
|
||||
let mut null_confidence_doc = create_test_document(user_id);
|
||||
null_confidence_doc.ocr_status = Some("completed".to_string());
|
||||
null_confidence_doc.ocr_confidence = None;
|
||||
|
||||
let mut pending_doc = create_test_document(user_id);
|
||||
pending_doc.ocr_status = Some("pending".to_string());
|
||||
pending_doc.ocr_confidence = None;
|
||||
|
||||
// Insert all documents
|
||||
let high_id = database.create_document(high_confidence_doc).await.unwrap().id;
|
||||
let medium_id = database.create_document(medium_confidence_doc).await.unwrap().id;
|
||||
let low_id = database.create_document(low_confidence_doc).await.unwrap().id;
|
||||
let failed_id = database.create_document(failed_doc).await.unwrap().id;
|
||||
let null_confidence_id = database.create_document(null_confidence_doc).await.unwrap().id;
|
||||
let pending_id = database.create_document(pending_doc).await.unwrap().id;
|
||||
|
||||
// Test with threshold of 50% - should include low confidence, failed, and null confidence
|
||||
let threshold_50_docs = database
|
||||
.find_low_confidence_and_failed_documents(50.0, user_id, crate::models::UserRole::User)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(threshold_50_docs.len(), 3);
|
||||
let threshold_50_ids: Vec<Uuid> = threshold_50_docs.iter().map(|d| d.id).collect();
|
||||
assert!(threshold_50_ids.contains(&low_id)); // 25% confidence
|
||||
assert!(threshold_50_ids.contains(&failed_id)); // failed status
|
||||
assert!(threshold_50_ids.contains(&null_confidence_id)); // NULL confidence
|
||||
assert!(!threshold_50_ids.contains(&high_id)); // 95% confidence
|
||||
assert!(!threshold_50_ids.contains(&medium_id)); // 65% confidence
|
||||
assert!(!threshold_50_ids.contains(&pending_id)); // pending status
|
||||
|
||||
// Test with threshold of 70% - should include low and medium confidence, failed, and null confidence
|
||||
let threshold_70_docs = database
|
||||
.find_low_confidence_and_failed_documents(70.0, user_id, crate::models::UserRole::User)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(threshold_70_docs.len(), 4);
|
||||
let threshold_70_ids: Vec<Uuid> = threshold_70_docs.iter().map(|d| d.id).collect();
|
||||
assert!(threshold_70_ids.contains(&low_id)); // 25% confidence
|
||||
assert!(threshold_70_ids.contains(&medium_id)); // 65% confidence
|
||||
assert!(threshold_70_ids.contains(&failed_id)); // failed status
|
||||
assert!(threshold_70_ids.contains(&null_confidence_id)); // NULL confidence
|
||||
assert!(!threshold_70_ids.contains(&high_id)); // 95% confidence
|
||||
assert!(!threshold_70_ids.contains(&pending_id)); // pending status
|
||||
|
||||
// Test with threshold of 100% - should include all except pending/processing
|
||||
let threshold_100_docs = database
|
||||
.find_low_confidence_and_failed_documents(100.0, user_id, crate::models::UserRole::User)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(threshold_100_docs.len(), 5);
|
||||
let threshold_100_ids: Vec<Uuid> = threshold_100_docs.iter().map(|d| d.id).collect();
|
||||
assert!(threshold_100_ids.contains(&high_id)); // 95% confidence
|
||||
assert!(threshold_100_ids.contains(&medium_id)); // 65% confidence
|
||||
assert!(threshold_100_ids.contains(&low_id)); // 25% confidence
|
||||
assert!(threshold_100_ids.contains(&failed_id)); // failed status
|
||||
assert!(threshold_100_ids.contains(&null_confidence_id)); // NULL confidence
|
||||
assert!(!threshold_100_ids.contains(&pending_id)); // pending status
|
||||
|
||||
// Test with threshold of 0% - should only include failed and null confidence
|
||||
let threshold_0_docs = database
|
||||
.find_low_confidence_and_failed_documents(0.0, user_id, crate::models::UserRole::User)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(threshold_0_docs.len(), 2);
|
||||
let threshold_0_ids: Vec<Uuid> = threshold_0_docs.iter().map(|d| d.id).collect();
|
||||
assert!(threshold_0_ids.contains(&failed_id)); // failed status
|
||||
assert!(threshold_0_ids.contains(&null_confidence_id)); // NULL confidence
|
||||
assert!(!threshold_0_ids.contains(&high_id)); // 95% confidence
|
||||
assert!(!threshold_0_ids.contains(&medium_id)); // 65% confidence
|
||||
assert!(!threshold_0_ids.contains(&low_id)); // 25% confidence
|
||||
assert!(!threshold_0_ids.contains(&pending_id)); // pending status
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_find_documents_by_confidence_threshold_original_behavior() {
|
||||
use testcontainers::{runners::AsyncRunner};
|
||||
use testcontainers_modules::postgres::Postgres;
|
||||
|
||||
let postgres_image = Postgres::default();
|
||||
let container = postgres_image.start().await.expect("Failed to start postgres container");
|
||||
let port = container.get_host_port_ipv4(5432).await.expect("Failed to get postgres port");
|
||||
|
||||
// Use TEST_DATABASE_URL if available, otherwise use the container
|
||||
let connection_string = std::env::var("TEST_DATABASE_URL")
|
||||
.unwrap_or_else(|_| format!("postgres://postgres:postgres@127.0.0.1:{}/postgres", port));
|
||||
let database = Database::new(&connection_string).await.unwrap();
|
||||
database.migrate().await.unwrap();
|
||||
let user_id = Uuid::new_v4();
|
||||
|
||||
// Create test documents to verify original behavior is preserved
|
||||
let mut high_confidence_doc = create_test_document(user_id);
|
||||
high_confidence_doc.ocr_confidence = Some(90.0);
|
||||
high_confidence_doc.ocr_status = Some("completed".to_string());
|
||||
|
||||
let mut low_confidence_doc = create_test_document(user_id);
|
||||
low_confidence_doc.ocr_confidence = Some(40.0);
|
||||
low_confidence_doc.ocr_status = Some("completed".to_string());
|
||||
|
||||
let mut null_confidence_doc = create_test_document(user_id);
|
||||
null_confidence_doc.ocr_confidence = None;
|
||||
null_confidence_doc.ocr_status = Some("completed".to_string());
|
||||
|
||||
let mut failed_doc = create_test_document(user_id);
|
||||
failed_doc.ocr_confidence = None;
|
||||
failed_doc.ocr_status = Some("failed".to_string());
|
||||
|
||||
// Insert documents
|
||||
let high_id = database.create_document(high_confidence_doc).await.unwrap().id;
|
||||
let low_id = database.create_document(low_confidence_doc).await.unwrap().id;
|
||||
let null_confidence_id = database.create_document(null_confidence_doc).await.unwrap().id;
|
||||
let failed_id = database.create_document(failed_doc).await.unwrap().id;
|
||||
|
||||
// Test original method - should only find documents with explicit confidence below threshold
|
||||
let original_results = database
|
||||
.find_documents_by_confidence_threshold(50.0, user_id, crate::models::UserRole::User)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Should only include low_confidence_doc (40%), not NULL confidence or failed docs
|
||||
assert_eq!(original_results.len(), 1);
|
||||
assert_eq!(original_results[0].id, low_id);
|
||||
|
||||
let original_ids: Vec<Uuid> = original_results.iter().map(|d| d.id).collect();
|
||||
assert!(!original_ids.contains(&high_id)); // 90% > 50%
|
||||
assert!(!original_ids.contains(&null_confidence_id)); // NULL confidence excluded
|
||||
assert!(!original_ids.contains(&failed_id)); // NULL confidence excluded
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_confidence_query_ordering() {
|
||||
use testcontainers::{runners::AsyncRunner};
|
||||
use testcontainers_modules::postgres::Postgres;
|
||||
|
||||
let postgres_image = Postgres::default();
|
||||
let container = postgres_image.start().await.expect("Failed to start postgres container");
|
||||
let port = container.get_host_port_ipv4(5432).await.expect("Failed to get postgres port");
|
||||
|
||||
// Use TEST_DATABASE_URL if available, otherwise use the container
|
||||
let connection_string = std::env::var("TEST_DATABASE_URL")
|
||||
.unwrap_or_else(|_| format!("postgres://postgres:postgres@127.0.0.1:{}/postgres", port));
|
||||
let database = Database::new(&connection_string).await.unwrap();
|
||||
database.migrate().await.unwrap();
|
||||
let user_id = Uuid::new_v4();
|
||||
|
||||
// Create documents with different confidence levels and statuses
|
||||
let mut confidence_10_doc = create_test_document(user_id);
|
||||
confidence_10_doc.ocr_confidence = Some(10.0);
|
||||
confidence_10_doc.ocr_status = Some("completed".to_string());
|
||||
|
||||
let mut confidence_30_doc = create_test_document(user_id);
|
||||
confidence_30_doc.ocr_confidence = Some(30.0);
|
||||
confidence_30_doc.ocr_status = Some("completed".to_string());
|
||||
|
||||
let mut failed_doc = create_test_document(user_id);
|
||||
failed_doc.ocr_confidence = None;
|
||||
failed_doc.ocr_status = Some("failed".to_string());
|
||||
|
||||
let mut null_confidence_doc = create_test_document(user_id);
|
||||
null_confidence_doc.ocr_confidence = None;
|
||||
null_confidence_doc.ocr_status = Some("completed".to_string());
|
||||
|
||||
// Insert documents
|
||||
let id_10 = database.create_document(confidence_10_doc).await.unwrap().id;
|
||||
let id_30 = database.create_document(confidence_30_doc).await.unwrap().id;
|
||||
let failed_id = database.create_document(failed_doc).await.unwrap().id;
|
||||
let null_id = database.create_document(null_confidence_doc).await.unwrap().id;
|
||||
|
||||
// Test ordering in combined query
|
||||
let results = database
|
||||
.find_low_confidence_and_failed_documents(50.0, user_id, crate::models::UserRole::User)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(results.len(), 4);
|
||||
|
||||
// Check that documents with actual confidence are ordered by confidence (ascending)
|
||||
// and NULL confidence documents come first (due to CASE WHEN ordering)
|
||||
let confidence_values: Vec<Option<f32>> = results.iter().map(|d| d.ocr_confidence).collect();
|
||||
|
||||
// First two should be NULL confidence (failed and completed with NULL)
|
||||
assert!(confidence_values[0].is_none());
|
||||
assert!(confidence_values[1].is_none());
|
||||
|
||||
// Next should be lowest confidence
|
||||
assert_eq!(confidence_values[2], Some(10.0));
|
||||
|
||||
// Last should be higher confidence
|
||||
assert_eq!(confidence_values[3], Some(30.0));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_user_isolation_in_confidence_queries() {
|
||||
use testcontainers::{runners::AsyncRunner};
|
||||
use testcontainers_modules::postgres::Postgres;
|
||||
|
||||
let postgres_image = Postgres::default();
|
||||
let container = postgres_image.start().await.expect("Failed to start postgres container");
|
||||
let port = container.get_host_port_ipv4(5432).await.expect("Failed to get postgres port");
|
||||
|
||||
// Use TEST_DATABASE_URL if available, otherwise use the container
|
||||
let connection_string = std::env::var("TEST_DATABASE_URL")
|
||||
.unwrap_or_else(|_| format!("postgres://postgres:postgres@127.0.0.1:{}/postgres", port));
|
||||
let database = Database::new(&connection_string).await.unwrap();
|
||||
database.migrate().await.unwrap();
|
||||
let user1_id = Uuid::new_v4();
|
||||
let user2_id = Uuid::new_v4();
|
||||
|
||||
// Create documents for user1
|
||||
let mut user1_low_doc = create_test_document(user1_id);
|
||||
user1_low_doc.ocr_confidence = Some(20.0);
|
||||
|
||||
let mut user1_failed_doc = create_test_document(user1_id);
|
||||
user1_failed_doc.ocr_status = Some("failed".to_string());
|
||||
user1_failed_doc.ocr_confidence = None;
|
||||
|
||||
// Create documents for user2
|
||||
let mut user2_low_doc = create_test_document(user2_id);
|
||||
user2_low_doc.ocr_confidence = Some(25.0);
|
||||
|
||||
let mut user2_failed_doc = create_test_document(user2_id);
|
||||
user2_failed_doc.ocr_status = Some("failed".to_string());
|
||||
user2_failed_doc.ocr_confidence = None;
|
||||
|
||||
// Insert documents
|
||||
let user1_low_id: Uuid = database.create_document(user1_low_doc).await.unwrap().id;
|
||||
let user1_failed_id: Uuid = database.create_document(user1_failed_doc).await.unwrap().id;
|
||||
let user2_low_id: Uuid = database.create_document(user2_low_doc).await.unwrap().id;
|
||||
let user2_failed_id: Uuid = database.create_document(user2_failed_doc).await.unwrap().id;
|
||||
|
||||
// Test user1 can only see their documents
|
||||
let user1_results = database
|
||||
.find_low_confidence_and_failed_documents(50.0, user1_id, crate::models::UserRole::User)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(user1_results.len(), 2);
|
||||
let user1_ids: Vec<Uuid> = user1_results.iter().map(|d| d.id).collect();
|
||||
assert!(user1_ids.contains(&user1_low_id));
|
||||
assert!(user1_ids.contains(&user1_failed_id));
|
||||
assert!(!user1_ids.contains(&user2_low_id));
|
||||
assert!(!user1_ids.contains(&user2_failed_id));
|
||||
|
||||
// Test user2 can only see their documents
|
||||
let user2_results = database
|
||||
.find_low_confidence_and_failed_documents(50.0, user2_id, crate::models::UserRole::User)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(user2_results.len(), 2);
|
||||
let user2_ids: Vec<Uuid> = user2_results.iter().map(|d| d.id).collect();
|
||||
assert!(user2_ids.contains(&user2_low_id));
|
||||
assert!(user2_ids.contains(&user2_failed_id));
|
||||
assert!(!user2_ids.contains(&user1_low_id));
|
||||
assert!(!user2_ids.contains(&user1_failed_id));
|
||||
|
||||
// Test admin can see all documents
|
||||
let admin_results = database
|
||||
.find_low_confidence_and_failed_documents(50.0, user1_id, crate::models::UserRole::Admin)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert!(admin_results.len() >= 4); // At least our 4 test documents
|
||||
let admin_ids: Vec<Uuid> = admin_results.iter().map(|d| d.id).collect();
|
||||
assert!(admin_ids.contains(&user1_low_id));
|
||||
assert!(admin_ids.contains(&user1_failed_id));
|
||||
assert!(admin_ids.contains(&user2_low_id));
|
||||
assert!(admin_ids.contains(&user2_failed_id));
|
||||
}
|
||||
}
|
||||
455
src/tests/enhanced_ocr_tests.rs
Normal file
455
src/tests/enhanced_ocr_tests.rs
Normal file
@@ -0,0 +1,455 @@
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::ocr::enhanced::{EnhancedOcrService, OcrResult, ImageQualityStats};
|
||||
use crate::models::Settings;
|
||||
use std::fs;
|
||||
use tempfile::{NamedTempFile, TempDir};
|
||||
|
||||
fn create_test_settings() -> Settings {
|
||||
Settings::default()
|
||||
}
|
||||
|
||||
fn create_temp_dir() -> TempDir {
|
||||
TempDir::new().expect("Failed to create temp directory")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_enhanced_ocr_service_creation() {
|
||||
let temp_dir = create_temp_dir();
|
||||
let temp_path = temp_dir.path().to_str().unwrap().to_string();
|
||||
let service = EnhancedOcrService::new(temp_path);
|
||||
|
||||
// Service should be created successfully
|
||||
assert!(!service.temp_dir.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_image_quality_stats_creation() {
|
||||
let stats = ImageQualityStats {
|
||||
average_brightness: 128.0,
|
||||
contrast_ratio: 0.5,
|
||||
noise_level: 0.1,
|
||||
sharpness: 0.8,
|
||||
};
|
||||
|
||||
assert_eq!(stats.average_brightness, 128.0);
|
||||
assert_eq!(stats.contrast_ratio, 0.5);
|
||||
assert_eq!(stats.noise_level, 0.1);
|
||||
assert_eq!(stats.sharpness, 0.8);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ocr_result_structure() {
|
||||
let result = OcrResult {
|
||||
text: "Test text".to_string(),
|
||||
confidence: 85.5,
|
||||
processing_time_ms: 1500,
|
||||
word_count: 2,
|
||||
preprocessing_applied: vec!["noise_reduction".to_string()],
|
||||
processed_image_path: Some("/tmp/processed.png".to_string()),
|
||||
};
|
||||
|
||||
assert_eq!(result.text, "Test text");
|
||||
assert_eq!(result.confidence, 85.5);
|
||||
assert_eq!(result.processing_time_ms, 1500);
|
||||
assert_eq!(result.word_count, 2);
|
||||
assert_eq!(result.preprocessing_applied.len(), 1);
|
||||
assert!(result.processed_image_path.is_some());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_extract_text_from_plain_text() {
|
||||
let temp_dir = create_temp_dir();
|
||||
let service = EnhancedOcrService::new(temp_dir.path().to_str().unwrap().to_string());
|
||||
let settings = create_test_settings();
|
||||
|
||||
let temp_file = NamedTempFile::with_suffix(".txt").unwrap();
|
||||
let test_content = "This is a test text file with multiple words.";
|
||||
fs::write(temp_file.path(), test_content).unwrap();
|
||||
|
||||
let result = service
|
||||
.extract_text(temp_file.path().to_str().unwrap(), "text/plain", &settings)
|
||||
.await;
|
||||
|
||||
assert!(result.is_ok());
|
||||
let ocr_result = result.unwrap();
|
||||
assert_eq!(ocr_result.text.trim(), test_content);
|
||||
assert_eq!(ocr_result.confidence, 100.0); // Plain text should be 100% confident
|
||||
assert_eq!(ocr_result.word_count, 9); // "This is a test text file with multiple words"
|
||||
assert!(ocr_result.processing_time_ms > 0);
|
||||
assert!(ocr_result.preprocessing_applied.contains(&"Plain text read".to_string()));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_extract_text_with_context() {
|
||||
let temp_dir = create_temp_dir();
|
||||
let service = EnhancedOcrService::new(temp_dir.path().to_str().unwrap().to_string());
|
||||
let settings = create_test_settings();
|
||||
|
||||
let temp_file = NamedTempFile::with_suffix(".txt").unwrap();
|
||||
let test_content = "Context test content";
|
||||
fs::write(temp_file.path(), test_content).unwrap();
|
||||
|
||||
let result = service
|
||||
.extract_text_with_context(
|
||||
temp_file.path().to_str().unwrap(),
|
||||
"text/plain",
|
||||
"test_file.txt",
|
||||
19, // Length of "Context test content"
|
||||
&settings,
|
||||
)
|
||||
.await;
|
||||
|
||||
assert!(result.is_ok());
|
||||
let ocr_result = result.unwrap();
|
||||
assert_eq!(ocr_result.text.trim(), test_content);
|
||||
assert_eq!(ocr_result.confidence, 100.0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_extract_text_unsupported_mime_type() {
|
||||
let temp_dir = create_temp_dir();
|
||||
let service = EnhancedOcrService::new(temp_dir.path().to_str().unwrap().to_string());
|
||||
let settings = create_test_settings();
|
||||
|
||||
let temp_file = NamedTempFile::new().unwrap();
|
||||
fs::write(temp_file.path(), "some content").unwrap();
|
||||
|
||||
let result = service
|
||||
.extract_text(temp_file.path().to_str().unwrap(), "application/unknown", &settings)
|
||||
.await;
|
||||
|
||||
assert!(result.is_err());
|
||||
let error_msg = result.unwrap_err().to_string();
|
||||
assert!(error_msg.contains("Unsupported file type"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_extract_text_nonexistent_file() {
|
||||
let temp_dir = create_temp_dir();
|
||||
let service = EnhancedOcrService::new(temp_dir.path().to_str().unwrap().to_string());
|
||||
let settings = create_test_settings();
|
||||
|
||||
let result = service
|
||||
.extract_text("/nonexistent/file.txt", "text/plain", &settings)
|
||||
.await;
|
||||
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_extract_text_large_file_truncation() {
|
||||
let temp_dir = create_temp_dir();
|
||||
let service = EnhancedOcrService::new(temp_dir.path().to_str().unwrap().to_string());
|
||||
let settings = create_test_settings();
|
||||
|
||||
let temp_file = NamedTempFile::with_suffix(".txt").unwrap();
|
||||
|
||||
// Create a file larger than the limit (50MB for text files)
|
||||
let large_content = "A".repeat(60 * 1024 * 1024); // 60MB
|
||||
fs::write(temp_file.path(), &large_content).unwrap();
|
||||
|
||||
let result = service
|
||||
.extract_text(temp_file.path().to_str().unwrap(), "text/plain", &settings)
|
||||
.await;
|
||||
|
||||
// Should fail due to size limit
|
||||
assert!(result.is_err());
|
||||
let error_msg = result.unwrap_err().to_string();
|
||||
assert!(error_msg.contains("too large"));
|
||||
}
|
||||
|
||||
#[cfg(feature = "ocr")]
|
||||
#[test]
|
||||
fn test_validate_ocr_quality_high_confidence() {
|
||||
let temp_dir = create_temp_dir();
|
||||
let service = EnhancedOcrService::new(temp_dir.path().to_str().unwrap().to_string());
|
||||
let mut settings = create_test_settings();
|
||||
settings.ocr_min_confidence = 30.0;
|
||||
|
||||
let result = OcrResult {
|
||||
text: "This is high quality OCR text with good words.".to_string(),
|
||||
confidence: 95.0,
|
||||
processing_time_ms: 1000,
|
||||
word_count: 9,
|
||||
preprocessing_applied: vec![],
|
||||
processed_image_path: None,
|
||||
};
|
||||
|
||||
let is_valid = service.validate_ocr_quality(&result, &settings);
|
||||
assert!(is_valid);
|
||||
}
|
||||
|
||||
#[cfg(feature = "ocr")]
|
||||
#[test]
|
||||
fn test_validate_ocr_quality_low_confidence() {
|
||||
let temp_dir = create_temp_dir();
|
||||
let service = EnhancedOcrService::new(temp_dir.path().to_str().unwrap().to_string());
|
||||
let mut settings = create_test_settings();
|
||||
settings.ocr_min_confidence = 50.0;
|
||||
|
||||
let result = OcrResult {
|
||||
text: "Poor quality text".to_string(),
|
||||
confidence: 25.0, // Below threshold
|
||||
processing_time_ms: 1000,
|
||||
word_count: 3,
|
||||
preprocessing_applied: vec![],
|
||||
processed_image_path: None,
|
||||
};
|
||||
|
||||
let is_valid = service.validate_ocr_quality(&result, &settings);
|
||||
assert!(!is_valid);
|
||||
}
|
||||
|
||||
#[cfg(feature = "ocr")]
|
||||
#[test]
|
||||
fn test_validate_ocr_quality_no_words() {
|
||||
let temp_dir = create_temp_dir();
|
||||
let service = EnhancedOcrService::new(temp_dir.path().to_str().unwrap().to_string());
|
||||
let settings = create_test_settings();
|
||||
|
||||
let result = OcrResult {
|
||||
text: "".to_string(),
|
||||
confidence: 95.0,
|
||||
processing_time_ms: 1000,
|
||||
word_count: 0, // No words
|
||||
preprocessing_applied: vec![],
|
||||
processed_image_path: None,
|
||||
};
|
||||
|
||||
let is_valid = service.validate_ocr_quality(&result, &settings);
|
||||
assert!(!is_valid);
|
||||
}
|
||||
|
||||
#[cfg(feature = "ocr")]
|
||||
#[test]
|
||||
fn test_validate_ocr_quality_poor_character_distribution() {
|
||||
let temp_dir = create_temp_dir();
|
||||
let service = EnhancedOcrService::new(temp_dir.path().to_str().unwrap().to_string());
|
||||
let settings = create_test_settings();
|
||||
|
||||
let result = OcrResult {
|
||||
text: "!!!@@@###$$$%%%^^^&&&***".to_string(), // Mostly symbols, < 30% alphanumeric
|
||||
confidence: 85.0,
|
||||
processing_time_ms: 1000,
|
||||
word_count: 1,
|
||||
preprocessing_applied: vec![],
|
||||
processed_image_path: None,
|
||||
};
|
||||
|
||||
let is_valid = service.validate_ocr_quality(&result, &settings);
|
||||
assert!(!is_valid);
|
||||
}
|
||||
|
||||
#[cfg(feature = "ocr")]
|
||||
#[test]
|
||||
fn test_validate_ocr_quality_good_character_distribution() {
|
||||
let temp_dir = create_temp_dir();
|
||||
let service = EnhancedOcrService::new(temp_dir.path().to_str().unwrap().to_string());
|
||||
let settings = create_test_settings();
|
||||
|
||||
let result = OcrResult {
|
||||
text: "The quick brown fox jumps over the lazy dog. 123".to_string(), // Good alphanumeric ratio
|
||||
confidence: 85.0,
|
||||
processing_time_ms: 1000,
|
||||
word_count: 10,
|
||||
preprocessing_applied: vec![],
|
||||
processed_image_path: None,
|
||||
};
|
||||
|
||||
let is_valid = service.validate_ocr_quality(&result, &settings);
|
||||
assert!(is_valid);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_word_count_calculation() {
|
||||
let temp_dir = create_temp_dir();
|
||||
let service = EnhancedOcrService::new(temp_dir.path().to_str().unwrap().to_string());
|
||||
let settings = create_test_settings();
|
||||
|
||||
let test_cases = vec![
|
||||
("", 0),
|
||||
("word", 1),
|
||||
("two words", 2),
|
||||
(" spaced words ", 2),
|
||||
("Multiple\nlines\nof\ntext", 4),
|
||||
("punctuation, words! work? correctly.", 4),
|
||||
];
|
||||
|
||||
for (content, expected_count) in test_cases {
|
||||
let temp_file = NamedTempFile::with_suffix(".txt").unwrap();
|
||||
fs::write(temp_file.path(), content).unwrap();
|
||||
|
||||
let result = service
|
||||
.extract_text(temp_file.path().to_str().unwrap(), "text/plain", &settings)
|
||||
.await;
|
||||
|
||||
assert!(result.is_ok());
|
||||
let ocr_result = result.unwrap();
|
||||
assert_eq!(ocr_result.word_count, expected_count, "Failed for content: '{}'", content);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_pdf_extraction_with_invalid_pdf() {
|
||||
let temp_dir = create_temp_dir();
|
||||
let service = EnhancedOcrService::new(temp_dir.path().to_str().unwrap().to_string());
|
||||
let settings = create_test_settings();
|
||||
|
||||
let temp_file = NamedTempFile::with_suffix(".pdf").unwrap();
|
||||
fs::write(temp_file.path(), "Not a valid PDF").unwrap();
|
||||
|
||||
let result = service
|
||||
.extract_text(temp_file.path().to_str().unwrap(), "application/pdf", &settings)
|
||||
.await;
|
||||
|
||||
assert!(result.is_err());
|
||||
let error_msg = result.unwrap_err().to_string();
|
||||
assert!(error_msg.contains("Invalid PDF") || error_msg.contains("Missing") || error_msg.contains("corrupted"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_pdf_extraction_with_minimal_valid_pdf() {
|
||||
let temp_dir = create_temp_dir();
|
||||
let service = EnhancedOcrService::new(temp_dir.path().to_str().unwrap().to_string());
|
||||
let settings = create_test_settings();
|
||||
|
||||
// Minimal PDF with "Hello" text
|
||||
let pdf_content = b"%PDF-1.4
|
||||
1 0 obj
|
||||
<< /Type /Catalog /Pages 2 0 R >>
|
||||
endobj
|
||||
2 0 obj
|
||||
<< /Type /Pages /Kids [3 0 R] /Count 1 >>
|
||||
endobj
|
||||
3 0 obj
|
||||
<< /Type /Page /Parent 2 0 R /Resources << /Font << /F1 4 0 R >> >> /MediaBox [0 0 612 792] /Contents 5 0 R >>
|
||||
endobj
|
||||
4 0 obj
|
||||
<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
|
||||
endobj
|
||||
5 0 obj
|
||||
<< /Length 44 >>
|
||||
stream
|
||||
BT
|
||||
/F1 12 Tf
|
||||
100 700 Td
|
||||
(Hello) Tj
|
||||
ET
|
||||
endstream
|
||||
endobj
|
||||
xref
|
||||
0 6
|
||||
0000000000 65535 f
|
||||
0000000009 00000 n
|
||||
0000000058 00000 n
|
||||
0000000115 00000 n
|
||||
0000000262 00000 n
|
||||
0000000341 00000 n
|
||||
trailer
|
||||
<< /Size 6 /Root 1 0 R >>
|
||||
startxref
|
||||
435
|
||||
%%EOF";
|
||||
|
||||
let temp_file = NamedTempFile::with_suffix(".pdf").unwrap();
|
||||
fs::write(temp_file.path(), pdf_content).unwrap();
|
||||
|
||||
let result = service
|
||||
.extract_text(temp_file.path().to_str().unwrap(), "application/pdf", &settings)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(ocr_result) => {
|
||||
// PDF extraction succeeded
|
||||
assert_eq!(ocr_result.confidence, 95.0); // PDF text extraction should be high confidence
|
||||
assert!(ocr_result.processing_time_ms > 0);
|
||||
assert!(ocr_result.preprocessing_applied.contains(&"PDF text extraction".to_string()));
|
||||
println!("PDF extracted text: '{}'", ocr_result.text);
|
||||
}
|
||||
Err(e) => {
|
||||
// PDF extraction might fail depending on the pdf-extract library
|
||||
println!("PDF extraction failed (may be expected): {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_pdf_size_limit() {
|
||||
let temp_dir = create_temp_dir();
|
||||
let service = EnhancedOcrService::new(temp_dir.path().to_str().unwrap().to_string());
|
||||
let settings = create_test_settings();
|
||||
|
||||
let temp_file = NamedTempFile::with_suffix(".pdf").unwrap();
|
||||
|
||||
// Create a file larger than the 100MB PDF limit
|
||||
let large_pdf_content = format!("%PDF-1.4\n{}", "A".repeat(110 * 1024 * 1024));
|
||||
fs::write(temp_file.path(), large_pdf_content).unwrap();
|
||||
|
||||
let result = service
|
||||
.extract_text(temp_file.path().to_str().unwrap(), "application/pdf", &settings)
|
||||
.await;
|
||||
|
||||
assert!(result.is_err());
|
||||
let error_msg = result.unwrap_err().to_string();
|
||||
assert!(error_msg.contains("too large"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_settings_default_values() {
|
||||
let settings = Settings::default();
|
||||
|
||||
// Test that OCR-related settings have reasonable defaults
|
||||
assert_eq!(settings.ocr_min_confidence, 30.0);
|
||||
assert_eq!(settings.ocr_dpi, 300);
|
||||
assert_eq!(settings.ocr_page_segmentation_mode, 3);
|
||||
assert_eq!(settings.ocr_engine_mode, 3);
|
||||
assert!(settings.enable_background_ocr);
|
||||
assert!(settings.ocr_enhance_contrast);
|
||||
assert!(settings.ocr_remove_noise);
|
||||
assert!(settings.ocr_detect_orientation);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_concurrent_ocr_processing() {
|
||||
let temp_dir = create_temp_dir();
|
||||
let service = EnhancedOcrService::new(temp_dir.path().to_str().unwrap().to_string());
|
||||
let settings = create_test_settings();
|
||||
|
||||
let mut handles = vec![];
|
||||
|
||||
// Process multiple files concurrently
|
||||
for i in 0..5 {
|
||||
let temp_file = NamedTempFile::with_suffix(".txt").unwrap();
|
||||
let content = format!("Concurrent test content {}", i);
|
||||
fs::write(temp_file.path(), &content).unwrap();
|
||||
|
||||
let service_clone = EnhancedOcrService::new(temp_dir.path().to_str().unwrap().to_string());
|
||||
let settings_clone = settings.clone();
|
||||
let file_path = temp_file.path().to_str().unwrap().to_string();
|
||||
|
||||
let handle = tokio::spawn(async move {
|
||||
let result = service_clone
|
||||
.extract_text(&file_path, "text/plain", &settings_clone)
|
||||
.await;
|
||||
|
||||
// Keep temp_file alive until task completes
|
||||
drop(temp_file);
|
||||
result
|
||||
});
|
||||
|
||||
handles.push(handle);
|
||||
}
|
||||
|
||||
// Wait for all tasks to complete
|
||||
let results = futures::future::join_all(handles).await;
|
||||
|
||||
// All tasks should succeed
|
||||
for (i, result) in results.into_iter().enumerate() {
|
||||
assert!(result.is_ok(), "Task {} failed", i);
|
||||
let ocr_result = result.unwrap().unwrap();
|
||||
assert!(ocr_result.text.contains(&format!("Concurrent test content {}", i)));
|
||||
assert_eq!(ocr_result.confidence, 100.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -8,6 +8,7 @@ mod file_service_tests;
|
||||
mod ignored_files_tests;
|
||||
mod labels_tests;
|
||||
mod ocr_tests;
|
||||
mod enhanced_ocr_tests;
|
||||
mod oidc_tests;
|
||||
mod enhanced_search_tests;
|
||||
mod settings_tests;
|
||||
|
||||
@@ -233,6 +233,57 @@ impl DocumentDeletionTestClient {
|
||||
let result: Value = response.json().await?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Delete failed OCR documents
|
||||
async fn delete_failed_ocr_documents(&self, preview_only: bool) -> Result<Value, Box<dyn std::error::Error>> {
|
||||
let token = self.token.as_ref().ok_or("Not authenticated")?;
|
||||
|
||||
let response = self.client
|
||||
.post(&format!("{}/api/documents/delete-failed-ocr", get_base_url()))
|
||||
.header("Authorization", format!("Bearer {}", token))
|
||||
.json(&json!({
|
||||
"preview_only": preview_only
|
||||
}))
|
||||
.timeout(TIMEOUT)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
return Err(format!("Delete failed OCR documents failed: {}", response.text().await?).into());
|
||||
}
|
||||
|
||||
let result: Value = response.json().await?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Delete low confidence documents (updated to use new combined endpoint)
|
||||
async fn delete_low_confidence_documents(&self, threshold: f64, preview_only: bool) -> Result<Value, Box<dyn std::error::Error>> {
|
||||
let token = self.token.as_ref().ok_or("Not authenticated")?;
|
||||
|
||||
let response = self.client
|
||||
.post(&format!("{}/api/documents/delete-low-confidence", get_base_url()))
|
||||
.header("Authorization", format!("Bearer {}", token))
|
||||
.json(&json!({
|
||||
"max_confidence": threshold,
|
||||
"preview_only": preview_only
|
||||
}))
|
||||
.timeout(TIMEOUT)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
return Err(format!("Delete low confidence documents failed: {}", response.text().await?).into());
|
||||
}
|
||||
|
||||
let result: Value = response.json().await?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Create and login user (convenience method)
|
||||
async fn create_and_login_user(&mut self, username: &str, password: &str, role: UserRole) -> Result<String, Box<dyn std::error::Error>> {
|
||||
let email = format!("{}@example.com", username);
|
||||
self.register_and_login(username, &email, password, Some(role)).await
|
||||
}
|
||||
}
|
||||
|
||||
/// Skip test if server is not running
|
||||
@@ -613,4 +664,224 @@ async fn test_document_count_updates_after_deletion() {
|
||||
assert_eq!(final_count, initial_count, "Document count should be back to initial after bulk deletion");
|
||||
|
||||
println!("✅ Document count updates after deletion test passed");
|
||||
}
|
||||
|
||||
/// Test the new failed OCR document deletion endpoint
|
||||
#[tokio::test]
|
||||
async fn test_delete_failed_ocr_documents_endpoint() {
|
||||
let mut client = DocumentDeletionTestClient::new();
|
||||
|
||||
if let Err(e) = client.check_server_health().await {
|
||||
println!("⚠️ Server not available: {}. Skipping test.", e);
|
||||
return;
|
||||
}
|
||||
|
||||
println!("🧪 Testing failed OCR document deletion endpoint...");
|
||||
|
||||
// Create and login as regular user
|
||||
client.create_and_login_user("failed_ocr_user", "failed_ocr_password", UserRole::User)
|
||||
.await.expect("Failed to create and login user");
|
||||
|
||||
// Preview failed documents (should return empty initially)
|
||||
let preview_response = client.delete_failed_ocr_documents(true)
|
||||
.await.expect("Failed to preview failed OCR documents");
|
||||
|
||||
assert_eq!(preview_response["success"], true);
|
||||
assert!(preview_response["matched_count"].as_i64().unwrap() >= 0);
|
||||
assert_eq!(preview_response["preview"], true);
|
||||
|
||||
println!("📋 Preview request successful: {} failed documents found",
|
||||
preview_response["matched_count"]);
|
||||
|
||||
// If there are failed documents, test deletion
|
||||
if preview_response["matched_count"].as_i64().unwrap() > 0 {
|
||||
// Test actual deletion
|
||||
let delete_response = client.delete_failed_ocr_documents(false)
|
||||
.await.expect("Failed to delete failed OCR documents");
|
||||
|
||||
assert_eq!(delete_response["success"], true);
|
||||
assert!(delete_response["deleted_count"].as_i64().unwrap() >= 0);
|
||||
assert!(delete_response.get("preview").is_none());
|
||||
|
||||
println!("🗑️ Successfully deleted {} failed documents",
|
||||
delete_response["deleted_count"]);
|
||||
} else {
|
||||
println!("ℹ️ No failed documents found to delete");
|
||||
}
|
||||
|
||||
println!("✅ Failed OCR document deletion endpoint test passed");
|
||||
}
|
||||
|
||||
/// Test confidence-based vs failed document deletion distinction
|
||||
#[tokio::test]
|
||||
async fn test_confidence_vs_failed_document_distinction() {
|
||||
let mut client = DocumentDeletionTestClient::new();
|
||||
|
||||
if let Err(e) = client.check_server_health().await {
|
||||
println!("⚠️ Server not available: {}. Skipping test.", e);
|
||||
return;
|
||||
}
|
||||
|
||||
println!("🧪 Testing distinction between confidence and failed document deletion...");
|
||||
|
||||
// Create and login as admin to see all documents
|
||||
client.create_and_login_user("distinction_admin", "distinction_password", UserRole::Admin)
|
||||
.await.expect("Failed to create and login admin");
|
||||
|
||||
// Get baseline counts
|
||||
let initial_low_confidence = client.delete_low_confidence_documents(30.0, true)
|
||||
.await.expect("Failed to preview low confidence documents");
|
||||
let initial_failed = client.delete_failed_ocr_documents(true)
|
||||
.await.expect("Failed to preview failed documents");
|
||||
|
||||
let initial_low_count = initial_low_confidence["matched_count"].as_i64().unwrap();
|
||||
let initial_failed_count = initial_failed["matched_count"].as_i64().unwrap();
|
||||
|
||||
println!("📊 Initial counts - Low confidence: {}, Failed: {}",
|
||||
initial_low_count, initial_failed_count);
|
||||
|
||||
// Test that the endpoints return different sets of documents
|
||||
// (This assumes there are some of each type in the system)
|
||||
|
||||
// Verify that failed documents endpoint only includes failed/NULL confidence docs
|
||||
if initial_failed_count > 0 {
|
||||
let failed_docs = initial_failed["document_ids"].as_array().unwrap();
|
||||
println!("🔍 Found {} failed document IDs", failed_docs.len());
|
||||
}
|
||||
|
||||
// Verify that low confidence endpoint respects threshold
|
||||
if initial_low_count > 0 {
|
||||
let low_confidence_docs = initial_low_confidence["document_ids"].as_array().unwrap();
|
||||
println!("🔍 Found {} low confidence document IDs", low_confidence_docs.len());
|
||||
}
|
||||
|
||||
println!("✅ Document type distinction test passed");
|
||||
}
|
||||
|
||||
/// Test error handling for delete endpoints
|
||||
#[tokio::test]
|
||||
async fn test_delete_endpoints_error_handling() {
|
||||
let client = DocumentDeletionTestClient::new();
|
||||
|
||||
if let Err(e) = client.check_server_health().await {
|
||||
println!("⚠️ Server not available: {}. Skipping test.", e);
|
||||
return;
|
||||
}
|
||||
|
||||
println!("🧪 Testing delete endpoints error handling...");
|
||||
|
||||
// Test unauthenticated request
|
||||
let failed_response = client.client
|
||||
.post(&format!("{}/api/documents/delete-failed-ocr", get_base_url()))
|
||||
.json(&json!({"preview_only": true}))
|
||||
.timeout(TIMEOUT)
|
||||
.send()
|
||||
.await
|
||||
.expect("Failed to send request");
|
||||
|
||||
assert_eq!(failed_response.status(), 401, "Should require authentication");
|
||||
|
||||
// Test invalid JSON
|
||||
let invalid_json_response = client.client
|
||||
.post(&format!("{}/api/documents/delete-failed-ocr", get_base_url()))
|
||||
.header("content-type", "application/json")
|
||||
.body("invalid json")
|
||||
.timeout(TIMEOUT)
|
||||
.send()
|
||||
.await
|
||||
.expect("Failed to send request");
|
||||
|
||||
assert!(invalid_json_response.status().is_client_error(), "Should reject invalid JSON");
|
||||
|
||||
println!("✅ Error handling test passed");
|
||||
}
|
||||
|
||||
/// Test role-based access for new delete endpoints
|
||||
#[tokio::test]
|
||||
async fn test_role_based_access_for_delete_endpoints() {
|
||||
let mut client = DocumentDeletionTestClient::new();
|
||||
|
||||
if let Err(e) = client.check_server_health().await {
|
||||
println!("⚠️ Server not available: {}. Skipping test.", e);
|
||||
return;
|
||||
}
|
||||
|
||||
println!("🧪 Testing role-based access for delete endpoints...");
|
||||
|
||||
// Test as regular user
|
||||
client.create_and_login_user("delete_regular_user", "delete_password", UserRole::User)
|
||||
.await.expect("Failed to create and login user");
|
||||
|
||||
let user_response = client.delete_failed_ocr_documents(true)
|
||||
.await.expect("Failed to preview as user");
|
||||
|
||||
assert_eq!(user_response["success"], true);
|
||||
let user_count = user_response["matched_count"].as_i64().unwrap();
|
||||
|
||||
// Test as admin
|
||||
client.create_and_login_user("delete_admin_user", "delete_admin_password", UserRole::Admin)
|
||||
.await.expect("Failed to create and login admin");
|
||||
|
||||
let admin_response = client.delete_failed_ocr_documents(true)
|
||||
.await.expect("Failed to preview as admin");
|
||||
|
||||
assert_eq!(admin_response["success"], true);
|
||||
let admin_count = admin_response["matched_count"].as_i64().unwrap();
|
||||
|
||||
// Admin should see at least as many documents as regular user
|
||||
assert!(admin_count >= user_count,
|
||||
"Admin should see at least as many documents as user");
|
||||
|
||||
println!("👤 User can see {} documents, Admin can see {} documents",
|
||||
user_count, admin_count);
|
||||
|
||||
println!("✅ Role-based access test passed");
|
||||
}
|
||||
|
||||
/// Test the enhanced low confidence deletion with failed documents
|
||||
#[tokio::test]
|
||||
async fn test_enhanced_low_confidence_deletion() {
|
||||
let mut client = DocumentDeletionTestClient::new();
|
||||
|
||||
if let Err(e) = client.check_server_health().await {
|
||||
println!("⚠️ Server not available: {}. Skipping test.", e);
|
||||
return;
|
||||
}
|
||||
|
||||
println!("🧪 Testing enhanced low confidence deletion (includes failed docs)...");
|
||||
|
||||
// Create and login as admin
|
||||
client.create_and_login_user("enhanced_delete_admin", "enhanced_password", UserRole::Admin)
|
||||
.await.expect("Failed to create and login admin");
|
||||
|
||||
// Test with various thresholds
|
||||
let thresholds = vec![0.0, 30.0, 50.0, 85.0, 100.0];
|
||||
|
||||
for threshold in thresholds {
|
||||
let response = client.delete_low_confidence_documents(threshold, true)
|
||||
.await.expect(&format!("Failed to preview with threshold {}", threshold));
|
||||
|
||||
assert_eq!(response["success"], true);
|
||||
let count = response["matched_count"].as_i64().unwrap();
|
||||
|
||||
println!("🎯 Threshold {}%: {} documents would be deleted", threshold, count);
|
||||
|
||||
// Verify response format
|
||||
assert!(response.get("document_ids").is_some());
|
||||
assert_eq!(response["preview"], true);
|
||||
}
|
||||
|
||||
// Test that higher thresholds generally include more documents
|
||||
let low_threshold_response = client.delete_low_confidence_documents(10.0, true)
|
||||
.await.expect("Failed to preview with low threshold");
|
||||
let high_threshold_response = client.delete_low_confidence_documents(90.0, true)
|
||||
.await.expect("Failed to preview with high threshold");
|
||||
|
||||
let low_count = low_threshold_response["matched_count"].as_i64().unwrap();
|
||||
let high_count = high_threshold_response["matched_count"].as_i64().unwrap();
|
||||
|
||||
assert!(high_count >= low_count,
|
||||
"Higher threshold should include at least as many documents as lower threshold");
|
||||
|
||||
println!("✅ Enhanced low confidence deletion test passed");
|
||||
}
|
||||
Reference in New Issue
Block a user