mirror of
https://github.com/readur/readur.git
synced 2026-02-18 04:51:16 -06:00
Merge pull request #87 from readur/feat/retry-functionality
feat(server/client): implement retry functionality for both successfu…
This commit is contained in:
427
frontend/src/components/BulkRetryModal.tsx
Normal file
427
frontend/src/components/BulkRetryModal.tsx
Normal file
@@ -0,0 +1,427 @@
|
||||
import React, { useState, useEffect } from 'react';
|
||||
import {
|
||||
Dialog,
|
||||
DialogTitle,
|
||||
DialogContent,
|
||||
DialogActions,
|
||||
Button,
|
||||
FormControl,
|
||||
FormLabel,
|
||||
RadioGroup,
|
||||
FormControlLabel,
|
||||
Radio,
|
||||
TextField,
|
||||
Chip,
|
||||
Box,
|
||||
Typography,
|
||||
Alert,
|
||||
LinearProgress,
|
||||
Accordion,
|
||||
AccordionSummary,
|
||||
AccordionDetails,
|
||||
Checkbox,
|
||||
Slider,
|
||||
Stack,
|
||||
Card,
|
||||
CardContent,
|
||||
Divider,
|
||||
} from '@mui/material';
|
||||
import {
|
||||
ExpandMore as ExpandMoreIcon,
|
||||
Schedule as ScheduleIcon,
|
||||
Assessment as AssessmentIcon,
|
||||
Refresh as RefreshIcon,
|
||||
} from '@mui/icons-material';
|
||||
import { documentService, BulkOcrRetryRequest, OcrRetryFilter, BulkOcrRetryResponse } from '../services/api';
|
||||
|
||||
interface BulkRetryModalProps {
|
||||
open: boolean;
|
||||
onClose: () => void;
|
||||
onSuccess: (result: BulkOcrRetryResponse) => void;
|
||||
selectedDocumentIds?: string[];
|
||||
}
|
||||
|
||||
const COMMON_MIME_TYPES = [
|
||||
{ value: 'application/pdf', label: 'PDF' },
|
||||
{ value: 'image/png', label: 'PNG' },
|
||||
{ value: 'image/jpeg', label: 'JPEG' },
|
||||
{ value: 'image/tiff', label: 'TIFF' },
|
||||
{ value: 'text/plain', label: 'Text' },
|
||||
];
|
||||
|
||||
const COMMON_FAILURE_REASONS = [
|
||||
{ value: 'pdf_font_encoding', label: 'Font Encoding Issues' },
|
||||
{ value: 'ocr_timeout', label: 'Processing Timeout' },
|
||||
{ value: 'pdf_corruption', label: 'File Corruption' },
|
||||
{ value: 'low_ocr_confidence', label: 'Low Confidence' },
|
||||
{ value: 'no_extractable_text', label: 'No Text Found' },
|
||||
{ value: 'ocr_memory_limit', label: 'Memory Limit' },
|
||||
];
|
||||
|
||||
const FILE_SIZE_PRESETS = [
|
||||
{ label: '< 1MB', value: 1024 * 1024 },
|
||||
{ label: '< 5MB', value: 5 * 1024 * 1024 },
|
||||
{ label: '< 10MB', value: 10 * 1024 * 1024 },
|
||||
{ label: '< 50MB', value: 50 * 1024 * 1024 },
|
||||
];
|
||||
|
||||
export const BulkRetryModal: React.FC<BulkRetryModalProps> = ({
|
||||
open,
|
||||
onClose,
|
||||
onSuccess,
|
||||
selectedDocumentIds = [],
|
||||
}) => {
|
||||
const [mode, setMode] = useState<'all' | 'specific' | 'filter'>('all');
|
||||
const [filter, setFilter] = useState<OcrRetryFilter>({});
|
||||
const [priorityOverride, setPriorityOverride] = useState<number>(10);
|
||||
const [usePriorityOverride, setUsePriorityOverride] = useState(false);
|
||||
const [previewOnly, setPreviewOnly] = useState(true);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [previewResult, setPreviewResult] = useState<BulkOcrRetryResponse | null>(null);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
// Initialize mode based on selected documents
|
||||
useEffect(() => {
|
||||
if (selectedDocumentIds.length > 0) {
|
||||
setMode('specific');
|
||||
}
|
||||
}, [selectedDocumentIds]);
|
||||
|
||||
const handleModeChange = (event: React.ChangeEvent<HTMLInputElement>) => {
|
||||
setMode(event.target.value as 'all' | 'specific' | 'filter');
|
||||
setPreviewResult(null);
|
||||
setError(null);
|
||||
};
|
||||
|
||||
const handleFilterChange = (key: keyof OcrRetryFilter, value: any) => {
|
||||
setFilter(prev => ({
|
||||
...prev,
|
||||
[key]: value,
|
||||
}));
|
||||
setPreviewResult(null);
|
||||
};
|
||||
|
||||
const handleMimeTypeToggle = (mimeType: string) => {
|
||||
const current = filter.mime_types || [];
|
||||
if (current.includes(mimeType)) {
|
||||
handleFilterChange('mime_types', current.filter(t => t !== mimeType));
|
||||
} else {
|
||||
handleFilterChange('mime_types', [...current, mimeType]);
|
||||
}
|
||||
};
|
||||
|
||||
const handleFailureReasonToggle = (reason: string) => {
|
||||
const current = filter.failure_reasons || [];
|
||||
if (current.includes(reason)) {
|
||||
handleFilterChange('failure_reasons', current.filter(r => r !== reason));
|
||||
} else {
|
||||
handleFilterChange('failure_reasons', [...current, reason]);
|
||||
}
|
||||
};
|
||||
|
||||
const buildRequest = (preview: boolean): BulkOcrRetryRequest => {
|
||||
const request: BulkOcrRetryRequest = {
|
||||
mode,
|
||||
preview_only: preview,
|
||||
};
|
||||
|
||||
if (mode === 'specific') {
|
||||
request.document_ids = selectedDocumentIds;
|
||||
} else if (mode === 'filter') {
|
||||
request.filter = filter;
|
||||
}
|
||||
|
||||
if (usePriorityOverride) {
|
||||
request.priority_override = priorityOverride;
|
||||
}
|
||||
|
||||
return request;
|
||||
};
|
||||
|
||||
const handlePreview = async () => {
|
||||
setLoading(true);
|
||||
setError(null);
|
||||
try {
|
||||
const request = buildRequest(true);
|
||||
const response = await documentService.bulkRetryOcr(request);
|
||||
setPreviewResult(response.data);
|
||||
} catch (err: any) {
|
||||
setError(err.response?.data?.message || 'Failed to preview retry operation');
|
||||
setPreviewResult(null);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleExecute = async () => {
|
||||
setLoading(true);
|
||||
setError(null);
|
||||
try {
|
||||
const request = buildRequest(false);
|
||||
const response = await documentService.bulkRetryOcr(request);
|
||||
onSuccess(response.data);
|
||||
onClose();
|
||||
} catch (err: any) {
|
||||
setError(err.response?.data?.message || 'Failed to execute retry operation');
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
const formatFileSize = (bytes: number) => {
|
||||
if (bytes < 1024) return `${bytes} B`;
|
||||
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
|
||||
if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
|
||||
return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
|
||||
};
|
||||
|
||||
const formatDuration = (minutes: number) => {
|
||||
if (minutes < 1) return `${Math.round(minutes * 60)} seconds`;
|
||||
if (minutes < 60) return `${Math.round(minutes)} minutes`;
|
||||
return `${Math.round(minutes / 60)} hours`;
|
||||
};
|
||||
|
||||
return (
|
||||
<Dialog open={open} onClose={onClose} maxWidth="md" fullWidth>
|
||||
<DialogTitle>
|
||||
<Box display="flex" alignItems="center" gap={1}>
|
||||
<RefreshIcon />
|
||||
Bulk OCR Retry
|
||||
</Box>
|
||||
</DialogTitle>
|
||||
|
||||
<DialogContent>
|
||||
<Stack spacing={3}>
|
||||
{error && (
|
||||
<Alert severity="error">{error}</Alert>
|
||||
)}
|
||||
|
||||
{/* Selection Mode */}
|
||||
<FormControl component="fieldset">
|
||||
<FormLabel component="legend">Retry Mode</FormLabel>
|
||||
<RadioGroup value={mode} onChange={handleModeChange}>
|
||||
<FormControlLabel
|
||||
value="all"
|
||||
control={<Radio />}
|
||||
label="Retry all failed OCR documents"
|
||||
/>
|
||||
<FormControlLabel
|
||||
value="specific"
|
||||
control={<Radio />}
|
||||
label={`Retry selected documents (${selectedDocumentIds.length} selected)`}
|
||||
disabled={selectedDocumentIds.length === 0}
|
||||
/>
|
||||
<FormControlLabel
|
||||
value="filter"
|
||||
control={<Radio />}
|
||||
label="Retry documents matching criteria"
|
||||
/>
|
||||
</RadioGroup>
|
||||
</FormControl>
|
||||
|
||||
{/* Filter Options */}
|
||||
{mode === 'filter' && (
|
||||
<Accordion>
|
||||
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||
<Typography variant="h6">Filter Criteria</Typography>
|
||||
</AccordionSummary>
|
||||
<AccordionDetails>
|
||||
<Stack spacing={3}>
|
||||
{/* MIME Types */}
|
||||
<Box>
|
||||
<Typography variant="subtitle1" gutterBottom>
|
||||
File Types
|
||||
</Typography>
|
||||
<Box display="flex" flexWrap="wrap" gap={1}>
|
||||
{COMMON_MIME_TYPES.map(({ value, label }) => (
|
||||
<Chip
|
||||
key={value}
|
||||
label={label}
|
||||
variant={filter.mime_types?.includes(value) ? 'filled' : 'outlined'}
|
||||
onClick={() => handleMimeTypeToggle(value)}
|
||||
clickable
|
||||
/>
|
||||
))}
|
||||
</Box>
|
||||
</Box>
|
||||
|
||||
{/* Failure Reasons */}
|
||||
<Box>
|
||||
<Typography variant="subtitle1" gutterBottom>
|
||||
Failure Reasons
|
||||
</Typography>
|
||||
<Box display="flex" flexWrap="wrap" gap={1}>
|
||||
{COMMON_FAILURE_REASONS.map(({ value, label }) => (
|
||||
<Chip
|
||||
key={value}
|
||||
label={label}
|
||||
variant={filter.failure_reasons?.includes(value) ? 'filled' : 'outlined'}
|
||||
onClick={() => handleFailureReasonToggle(value)}
|
||||
clickable
|
||||
color="secondary"
|
||||
/>
|
||||
))}
|
||||
</Box>
|
||||
</Box>
|
||||
|
||||
{/* File Size */}
|
||||
<Box>
|
||||
<Typography variant="subtitle1" gutterBottom>
|
||||
Maximum File Size
|
||||
</Typography>
|
||||
<Box display="flex" flexWrap="wrap" gap={1} mb={2}>
|
||||
{FILE_SIZE_PRESETS.map(({ label, value }) => (
|
||||
<Chip
|
||||
key={value}
|
||||
label={label}
|
||||
variant={filter.max_file_size === value ? 'filled' : 'outlined'}
|
||||
onClick={() => handleFilterChange('max_file_size',
|
||||
filter.max_file_size === value ? undefined : value)}
|
||||
clickable
|
||||
color="primary"
|
||||
/>
|
||||
))}
|
||||
</Box>
|
||||
{filter.max_file_size && (
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
Max file size: {formatFileSize(filter.max_file_size)}
|
||||
</Typography>
|
||||
)}
|
||||
</Box>
|
||||
|
||||
{/* Limit */}
|
||||
<TextField
|
||||
label="Maximum Documents to Retry"
|
||||
type="number"
|
||||
value={filter.limit || ''}
|
||||
onChange={(e) => handleFilterChange('limit',
|
||||
e.target.value ? parseInt(e.target.value) : undefined)}
|
||||
InputProps={{
|
||||
inputProps: { min: 1, max: 1000 }
|
||||
}}
|
||||
helperText="Leave empty for no limit"
|
||||
/>
|
||||
</Stack>
|
||||
</AccordionDetails>
|
||||
</Accordion>
|
||||
)}
|
||||
|
||||
{/* Priority Override */}
|
||||
<Accordion>
|
||||
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||
<Typography variant="h6">Advanced Options</Typography>
|
||||
</AccordionSummary>
|
||||
<AccordionDetails>
|
||||
<Stack spacing={2}>
|
||||
<FormControlLabel
|
||||
control={
|
||||
<Checkbox
|
||||
checked={usePriorityOverride}
|
||||
onChange={(e) => setUsePriorityOverride(e.target.checked)}
|
||||
/>
|
||||
}
|
||||
label="Override processing priority"
|
||||
/>
|
||||
{usePriorityOverride && (
|
||||
<Box>
|
||||
<Typography gutterBottom>
|
||||
Priority: {priorityOverride} (Higher = More Urgent)
|
||||
</Typography>
|
||||
<Slider
|
||||
value={priorityOverride}
|
||||
onChange={(_, value) => setPriorityOverride(value as number)}
|
||||
min={1}
|
||||
max={20}
|
||||
marks={[
|
||||
{ value: 1, label: 'Low' },
|
||||
{ value: 10, label: 'Normal' },
|
||||
{ value: 20, label: 'High' },
|
||||
]}
|
||||
valueLabelDisplay="auto"
|
||||
/>
|
||||
</Box>
|
||||
)}
|
||||
</Stack>
|
||||
</AccordionDetails>
|
||||
</Accordion>
|
||||
|
||||
{/* Preview Results */}
|
||||
{previewResult && (
|
||||
<Card>
|
||||
<CardContent>
|
||||
<Typography variant="h6" gutterBottom>
|
||||
<AssessmentIcon sx={{ mr: 1, verticalAlign: 'middle' }} />
|
||||
Preview Results
|
||||
</Typography>
|
||||
<Stack spacing={2}>
|
||||
<Box display="flex" justifyContent="space-between">
|
||||
<Typography>Documents matched:</Typography>
|
||||
<Typography fontWeight="bold">{previewResult.matched_count}</Typography>
|
||||
</Box>
|
||||
<Box display="flex" justifyContent="space-between">
|
||||
<Typography>Estimated processing time:</Typography>
|
||||
<Typography fontWeight="bold">
|
||||
<ScheduleIcon sx={{ mr: 0.5, verticalAlign: 'middle', fontSize: 'small' }} />
|
||||
{formatDuration(previewResult.estimated_total_time_minutes)}
|
||||
</Typography>
|
||||
</Box>
|
||||
{previewResult.documents && previewResult.documents.length > 0 && (
|
||||
<Box>
|
||||
<Typography variant="subtitle2" gutterBottom>
|
||||
Sample Documents:
|
||||
</Typography>
|
||||
<Box maxHeight={200} overflow="auto">
|
||||
{(previewResult.documents || []).slice(0, 10).map((doc) => (
|
||||
<Box key={doc.id} py={0.5}>
|
||||
<Typography variant="body2">
|
||||
{doc.filename} ({formatFileSize(doc.file_size)})
|
||||
{doc.ocr_failure_reason && (
|
||||
<Chip
|
||||
size="small"
|
||||
label={doc.ocr_failure_reason}
|
||||
sx={{ ml: 1, fontSize: '0.7rem' }}
|
||||
/>
|
||||
)}
|
||||
</Typography>
|
||||
</Box>
|
||||
))}
|
||||
{previewResult.documents && previewResult.documents.length > 10 && (
|
||||
<Typography variant="body2" color="text.secondary" mt={1}>
|
||||
... and {previewResult.documents.length - 10} more documents
|
||||
</Typography>
|
||||
)}
|
||||
</Box>
|
||||
</Box>
|
||||
)}
|
||||
</Stack>
|
||||
</CardContent>
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{loading && <LinearProgress />}
|
||||
</Stack>
|
||||
</DialogContent>
|
||||
|
||||
<DialogActions>
|
||||
<Button onClick={onClose} disabled={loading}>
|
||||
Cancel
|
||||
</Button>
|
||||
<Button
|
||||
onClick={handlePreview}
|
||||
disabled={loading}
|
||||
variant="outlined"
|
||||
>
|
||||
Preview
|
||||
</Button>
|
||||
<Button
|
||||
onClick={handleExecute}
|
||||
disabled={loading || !previewResult || previewResult.matched_count === 0}
|
||||
variant="contained"
|
||||
color="primary"
|
||||
>
|
||||
{loading ? 'Processing...' : `Retry ${previewResult?.matched_count || 0} Documents`}
|
||||
</Button>
|
||||
</DialogActions>
|
||||
</Dialog>
|
||||
);
|
||||
};
|
||||
296
frontend/src/components/RetryHistoryModal.tsx
Normal file
296
frontend/src/components/RetryHistoryModal.tsx
Normal file
@@ -0,0 +1,296 @@
|
||||
import React, { useState, useEffect } from 'react';
|
||||
import {
|
||||
Dialog,
|
||||
DialogTitle,
|
||||
DialogContent,
|
||||
DialogActions,
|
||||
Button,
|
||||
Typography,
|
||||
Table,
|
||||
TableBody,
|
||||
TableCell,
|
||||
TableContainer,
|
||||
TableHead,
|
||||
TableRow,
|
||||
Paper,
|
||||
Alert,
|
||||
LinearProgress,
|
||||
Box,
|
||||
Chip,
|
||||
Tooltip,
|
||||
IconButton,
|
||||
} from '@mui/material';
|
||||
import {
|
||||
History as HistoryIcon,
|
||||
Close as CloseIcon,
|
||||
Refresh as RefreshIcon,
|
||||
Schedule as ScheduleIcon,
|
||||
PriorityHigh as PriorityIcon,
|
||||
} from '@mui/icons-material';
|
||||
import { documentService, DocumentRetryHistoryItem } from '../services/api';
|
||||
import { format, formatDistanceToNow } from 'date-fns';
|
||||
|
||||
interface RetryHistoryModalProps {
|
||||
open: boolean;
|
||||
onClose: () => void;
|
||||
documentId: string;
|
||||
documentName?: string;
|
||||
}
|
||||
|
||||
const RETRY_REASON_LABELS: Record<string, string> = {
|
||||
manual_retry: 'Manual Retry',
|
||||
bulk_retry_all: 'Bulk Retry (All)',
|
||||
bulk_retry_specific: 'Bulk Retry (Selected)',
|
||||
bulk_retry_filtered: 'Bulk Retry (Filtered)',
|
||||
scheduled_retry: 'Scheduled Retry',
|
||||
auto_retry: 'Automatic Retry',
|
||||
};
|
||||
|
||||
const STATUS_COLORS: Record<string, 'default' | 'primary' | 'secondary' | 'error' | 'info' | 'success' | 'warning'> = {
|
||||
pending: 'info',
|
||||
processing: 'warning',
|
||||
completed: 'success',
|
||||
failed: 'error',
|
||||
cancelled: 'default',
|
||||
};
|
||||
|
||||
export const RetryHistoryModal: React.FC<RetryHistoryModalProps> = ({
|
||||
open,
|
||||
onClose,
|
||||
documentId,
|
||||
documentName,
|
||||
}) => {
|
||||
const [history, setHistory] = useState<DocumentRetryHistoryItem[]>([]);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [totalRetries, setTotalRetries] = useState(0);
|
||||
|
||||
const loadRetryHistory = async () => {
|
||||
if (!documentId) return;
|
||||
|
||||
setLoading(true);
|
||||
setError(null);
|
||||
try {
|
||||
const response = await documentService.getDocumentRetryHistory(documentId);
|
||||
setHistory(response.data?.retry_history || []);
|
||||
setTotalRetries(response.data?.total_retries || 0);
|
||||
} catch (err: any) {
|
||||
setError(err.response?.data?.message || 'Failed to load retry history');
|
||||
setHistory([]);
|
||||
setTotalRetries(0);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (open && documentId) {
|
||||
loadRetryHistory();
|
||||
}
|
||||
}, [open, documentId]);
|
||||
|
||||
const formatRetryReason = (reason: string) => {
|
||||
return RETRY_REASON_LABELS[reason] || reason.replace(/_/g, ' ');
|
||||
};
|
||||
|
||||
const getPriorityLabel = (priority: number) => {
|
||||
if (priority >= 15) return 'Very High';
|
||||
if (priority >= 12) return 'High';
|
||||
if (priority >= 8) return 'Medium';
|
||||
if (priority >= 5) return 'Low';
|
||||
return 'Very Low';
|
||||
};
|
||||
|
||||
const getPriorityColor = (priority: number): 'default' | 'primary' | 'secondary' | 'error' | 'info' | 'success' | 'warning' => {
|
||||
if (priority >= 15) return 'error';
|
||||
if (priority >= 12) return 'warning';
|
||||
if (priority >= 8) return 'primary';
|
||||
if (priority >= 5) return 'info';
|
||||
return 'default';
|
||||
};
|
||||
|
||||
return (
|
||||
<Dialog open={open} onClose={onClose} maxWidth="lg" fullWidth>
|
||||
<DialogTitle>
|
||||
<Box display="flex" alignItems="center" justifyContent="space-between">
|
||||
<Box display="flex" alignItems="center" gap={1}>
|
||||
<HistoryIcon />
|
||||
<Box>
|
||||
<Typography variant="h6">OCR Retry History</Typography>
|
||||
{documentName && (
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
{documentName}
|
||||
</Typography>
|
||||
)}
|
||||
</Box>
|
||||
</Box>
|
||||
<IconButton onClick={onClose} size="small">
|
||||
<CloseIcon />
|
||||
</IconButton>
|
||||
</Box>
|
||||
</DialogTitle>
|
||||
|
||||
<DialogContent>
|
||||
{error && (
|
||||
<Alert severity="error" sx={{ mb: 2 }}>
|
||||
{error}
|
||||
</Alert>
|
||||
)}
|
||||
|
||||
{loading ? (
|
||||
<Box>
|
||||
<LinearProgress />
|
||||
<Typography variant="body2" color="text.secondary" mt={1} textAlign="center">
|
||||
Loading retry history...
|
||||
</Typography>
|
||||
</Box>
|
||||
) : (!history || history.length === 0) ? (
|
||||
<Alert severity="info">
|
||||
<Typography variant="body1">
|
||||
No retry attempts found for this document.
|
||||
</Typography>
|
||||
<Typography variant="body2" color="text.secondary" mt={1}>
|
||||
This document hasn't been retried yet, or retry history is not available.
|
||||
</Typography>
|
||||
</Alert>
|
||||
) : (
|
||||
<Box>
|
||||
{/* Summary */}
|
||||
<Alert severity="info" sx={{ mb: 3 }}>
|
||||
<Typography variant="body1">
|
||||
<strong>{totalRetries}</strong> retry attempts found for this document.
|
||||
</Typography>
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
Most recent attempt: {history && history.length > 0 ? formatDistanceToNow(new Date(history[0].created_at)) + ' ago' : 'No attempts yet'}
|
||||
</Typography>
|
||||
</Alert>
|
||||
|
||||
{/* History Table */}
|
||||
<TableContainer component={Paper}>
|
||||
<Table>
|
||||
<TableHead>
|
||||
<TableRow>
|
||||
<TableCell>Date & Time</TableCell>
|
||||
<TableCell>Retry Reason</TableCell>
|
||||
<TableCell>Previous Status</TableCell>
|
||||
<TableCell>Priority</TableCell>
|
||||
<TableCell>Queue Status</TableCell>
|
||||
</TableRow>
|
||||
</TableHead>
|
||||
<TableBody>
|
||||
{(history || []).map((item, index) => (
|
||||
<TableRow key={item.id} hover>
|
||||
<TableCell>
|
||||
<Box>
|
||||
<Typography variant="body2">
|
||||
{format(new Date(item.created_at), 'MMM dd, yyyy')}
|
||||
</Typography>
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
{format(new Date(item.created_at), 'h:mm a')}
|
||||
</Typography>
|
||||
<Typography variant="caption" color="text.secondary">
|
||||
({formatDistanceToNow(new Date(item.created_at))} ago)
|
||||
</Typography>
|
||||
</Box>
|
||||
</TableCell>
|
||||
|
||||
<TableCell>
|
||||
<Chip
|
||||
label={formatRetryReason(item.retry_reason)}
|
||||
size="small"
|
||||
variant="outlined"
|
||||
/>
|
||||
</TableCell>
|
||||
|
||||
<TableCell>
|
||||
<Box>
|
||||
{item.previous_status && (
|
||||
<Chip
|
||||
label={item.previous_status}
|
||||
size="small"
|
||||
color={STATUS_COLORS[item.previous_status] || 'default'}
|
||||
sx={{ mb: 0.5 }}
|
||||
/>
|
||||
)}
|
||||
{item.previous_failure_reason && (
|
||||
<Typography variant="caption" display="block" color="text.secondary">
|
||||
{item.previous_failure_reason.replace(/_/g, ' ')}
|
||||
</Typography>
|
||||
)}
|
||||
{item.previous_error && (
|
||||
<Tooltip title={item.previous_error}>
|
||||
<Typography variant="caption" display="block" color="error.main" sx={{
|
||||
maxWidth: 200,
|
||||
overflow: 'hidden',
|
||||
textOverflow: 'ellipsis',
|
||||
whiteSpace: 'nowrap',
|
||||
cursor: 'help'
|
||||
}}>
|
||||
{item.previous_error}
|
||||
</Typography>
|
||||
</Tooltip>
|
||||
)}
|
||||
</Box>
|
||||
</TableCell>
|
||||
|
||||
<TableCell>
|
||||
<Tooltip title={`Priority: ${item.priority}/20`}>
|
||||
<Chip
|
||||
icon={<PriorityIcon fontSize="small" />}
|
||||
label={`${getPriorityLabel(item.priority)} (${item.priority})`}
|
||||
size="small"
|
||||
color={getPriorityColor(item.priority)}
|
||||
/>
|
||||
</Tooltip>
|
||||
</TableCell>
|
||||
|
||||
<TableCell>
|
||||
{item.queue_id ? (
|
||||
<Box>
|
||||
<Typography variant="body2" color="success.main">
|
||||
✓ Queued
|
||||
</Typography>
|
||||
<Typography variant="caption" color="text.secondary">
|
||||
ID: {item.queue_id.slice(0, 8)}...
|
||||
</Typography>
|
||||
</Box>
|
||||
) : (
|
||||
<Typography variant="body2" color="warning.main">
|
||||
⚠ Not queued
|
||||
</Typography>
|
||||
)}
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
))}
|
||||
</TableBody>
|
||||
</Table>
|
||||
</TableContainer>
|
||||
|
||||
{/* Legend */}
|
||||
<Box mt={2} p={2} bgcolor="grey.50" borderRadius={1}>
|
||||
<Typography variant="caption" color="text.secondary" paragraph>
|
||||
<strong>Priority Levels:</strong> Very High (15-20), High (12-14), Medium (8-11), Low (5-7), Very Low (1-4)
|
||||
</Typography>
|
||||
<Typography variant="caption" color="text.secondary">
|
||||
<strong>Retry Reasons:</strong> Manual (user-initiated), Bulk (batch operations), Scheduled (automatic), Auto (system-triggered)
|
||||
</Typography>
|
||||
</Box>
|
||||
</Box>
|
||||
)}
|
||||
</DialogContent>
|
||||
|
||||
<DialogActions>
|
||||
<Button
|
||||
startIcon={<RefreshIcon />}
|
||||
onClick={loadRetryHistory}
|
||||
disabled={loading}
|
||||
>
|
||||
Refresh
|
||||
</Button>
|
||||
<Button onClick={onClose} variant="contained">
|
||||
Close
|
||||
</Button>
|
||||
</DialogActions>
|
||||
</Dialog>
|
||||
);
|
||||
};
|
||||
245
frontend/src/components/RetryRecommendations.tsx
Normal file
245
frontend/src/components/RetryRecommendations.tsx
Normal file
@@ -0,0 +1,245 @@
|
||||
import React, { useState, useEffect } from 'react';
|
||||
import {
|
||||
Card,
|
||||
CardContent,
|
||||
Typography,
|
||||
Button,
|
||||
Box,
|
||||
Alert,
|
||||
LinearProgress,
|
||||
Chip,
|
||||
Stack,
|
||||
Divider,
|
||||
Tooltip,
|
||||
IconButton,
|
||||
} from '@mui/material';
|
||||
import {
|
||||
Lightbulb as LightbulbIcon,
|
||||
Refresh as RefreshIcon,
|
||||
TrendingUp as TrendingUpIcon,
|
||||
Info as InfoIcon,
|
||||
} from '@mui/icons-material';
|
||||
import { documentService, OcrRetryRecommendation, BulkOcrRetryResponse } from '../services/api';
|
||||
|
||||
interface RetryRecommendationsProps {
|
||||
onRetrySuccess?: (result: BulkOcrRetryResponse) => void;
|
||||
onRetryClick?: (recommendation: OcrRetryRecommendation) => void;
|
||||
}
|
||||
|
||||
export const RetryRecommendations: React.FC<RetryRecommendationsProps> = ({
|
||||
onRetrySuccess,
|
||||
onRetryClick,
|
||||
}) => {
|
||||
const [recommendations, setRecommendations] = useState<OcrRetryRecommendation[]>([]);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [retryingRecommendation, setRetryingRecommendation] = useState<string | null>(null);
|
||||
|
||||
const loadRecommendations = async () => {
|
||||
setLoading(true);
|
||||
setError(null);
|
||||
try {
|
||||
const response = await documentService.getRetryRecommendations();
|
||||
setRecommendations(response.data.recommendations);
|
||||
} catch (err: any) {
|
||||
setError(err.response?.data?.message || 'Failed to load retry recommendations');
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
loadRecommendations();
|
||||
}, []);
|
||||
|
||||
const handleRetryRecommendation = async (recommendation: OcrRetryRecommendation) => {
|
||||
if (onRetryClick) {
|
||||
onRetryClick(recommendation);
|
||||
return;
|
||||
}
|
||||
|
||||
setRetryingRecommendation(recommendation.reason);
|
||||
try {
|
||||
const response = await documentService.bulkRetryOcr({
|
||||
mode: 'filter',
|
||||
filter: recommendation.filter,
|
||||
preview_only: false,
|
||||
});
|
||||
|
||||
if (onRetrySuccess) {
|
||||
onRetrySuccess(response.data);
|
||||
}
|
||||
|
||||
// Reload recommendations after successful retry
|
||||
loadRecommendations();
|
||||
} catch (err: any) {
|
||||
setError(err.response?.data?.message || 'Failed to execute retry');
|
||||
} finally {
|
||||
setRetryingRecommendation(null);
|
||||
}
|
||||
};
|
||||
|
||||
const getSuccessRateColor = (rate: number) => {
|
||||
if (rate >= 0.7) return 'success';
|
||||
if (rate >= 0.4) return 'warning';
|
||||
return 'error';
|
||||
};
|
||||
|
||||
const getSuccessRateLabel = (rate: number) => {
|
||||
const percentage = Math.round(rate * 100);
|
||||
if (percentage >= 70) return `${percentage}% (High)`;
|
||||
if (percentage >= 40) return `${percentage}% (Medium)`;
|
||||
return `${percentage}% (Low)`;
|
||||
};
|
||||
|
||||
if (loading && (!recommendations || recommendations.length === 0)) {
|
||||
return (
|
||||
<Card>
|
||||
<CardContent>
|
||||
<Box display="flex" alignItems="center" gap={1} mb={2}>
|
||||
<LightbulbIcon color="primary" />
|
||||
<Typography variant="h6">Retry Recommendations</Typography>
|
||||
</Box>
|
||||
<LinearProgress />
|
||||
<Typography variant="body2" color="text.secondary" mt={1}>
|
||||
Analyzing failure patterns...
|
||||
</Typography>
|
||||
</CardContent>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<CardContent>
|
||||
<Box display="flex" alignItems="center" justifyContent="space-between" mb={2}>
|
||||
<Box display="flex" alignItems="center" gap={1}>
|
||||
<LightbulbIcon color="primary" />
|
||||
<Typography variant="h6">Retry Recommendations</Typography>
|
||||
<Tooltip title="AI-powered suggestions based on failure patterns and recent improvements">
|
||||
<IconButton size="small">
|
||||
<InfoIcon fontSize="small" />
|
||||
</IconButton>
|
||||
</Tooltip>
|
||||
</Box>
|
||||
<Button
|
||||
startIcon={<RefreshIcon />}
|
||||
onClick={loadRecommendations}
|
||||
disabled={loading}
|
||||
size="small"
|
||||
>
|
||||
Refresh
|
||||
</Button>
|
||||
</Box>
|
||||
|
||||
{error && (
|
||||
<Alert severity="error" sx={{ mb: 2 }}>
|
||||
{error}
|
||||
</Alert>
|
||||
)}
|
||||
|
||||
{(!recommendations || recommendations.length === 0) && !loading ? (
|
||||
<Alert severity="info">
|
||||
<Typography variant="body2">
|
||||
No retry recommendations available. This usually means:
|
||||
</Typography>
|
||||
<ul style={{ margin: '8px 0', paddingLeft: '20px' }}>
|
||||
<li>All failed documents have already been retried multiple times</li>
|
||||
<li>No clear patterns in failure reasons that suggest likely success</li>
|
||||
<li>No documents with failure types that commonly succeed on retry</li>
|
||||
</ul>
|
||||
</Alert>
|
||||
) : (
|
||||
<Stack spacing={2}>
|
||||
{(recommendations || []).map((recommendation, index) => (
|
||||
<Card key={recommendation.reason} variant="outlined">
|
||||
<CardContent>
|
||||
<Box display="flex" justifyContent="space-between" alignItems="flex-start" mb={1}>
|
||||
<Typography variant="h6" component="div">
|
||||
{recommendation.title}
|
||||
</Typography>
|
||||
<Chip
|
||||
icon={<TrendingUpIcon />}
|
||||
label={getSuccessRateLabel(recommendation.estimated_success_rate)}
|
||||
color={getSuccessRateColor(recommendation.estimated_success_rate) as any}
|
||||
size="small"
|
||||
/>
|
||||
</Box>
|
||||
|
||||
<Typography variant="body2" color="text.secondary" paragraph>
|
||||
{recommendation.description}
|
||||
</Typography>
|
||||
|
||||
<Box display="flex" alignItems="center" gap={2} mb={2}>
|
||||
<Typography variant="body2">
|
||||
<strong>{recommendation.document_count}</strong> documents
|
||||
</Typography>
|
||||
<Divider orientation="vertical" flexItem />
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
Pattern: {recommendation.reason.replace(/_/g, ' ')}
|
||||
</Typography>
|
||||
</Box>
|
||||
|
||||
{/* Filter Summary */}
|
||||
<Box mb={2}>
|
||||
<Typography variant="body2" color="text.secondary" gutterBottom>
|
||||
Criteria:
|
||||
</Typography>
|
||||
<Box display="flex" flexWrap="wrap" gap={0.5}>
|
||||
{recommendation.filter.failure_reasons?.map((reason) => (
|
||||
<Chip
|
||||
key={reason}
|
||||
label={reason.replace(/_/g, ' ')}
|
||||
size="small"
|
||||
variant="outlined"
|
||||
/>
|
||||
))}
|
||||
{recommendation.filter.mime_types?.map((type) => (
|
||||
<Chip
|
||||
key={type}
|
||||
label={type.split('/')[1].toUpperCase()}
|
||||
size="small"
|
||||
variant="outlined"
|
||||
color="secondary"
|
||||
/>
|
||||
))}
|
||||
{recommendation.filter.max_file_size && (
|
||||
<Chip
|
||||
label={`< ${Math.round(recommendation.filter.max_file_size / (1024 * 1024))}MB`}
|
||||
size="small"
|
||||
variant="outlined"
|
||||
color="primary"
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
</Box>
|
||||
|
||||
<Button
|
||||
variant="contained"
|
||||
color="primary"
|
||||
onClick={() => handleRetryRecommendation(recommendation)}
|
||||
disabled={retryingRecommendation !== null}
|
||||
startIcon={retryingRecommendation === recommendation.reason ?
|
||||
<LinearProgress sx={{ width: 20, height: 20 }} /> :
|
||||
<RefreshIcon />
|
||||
}
|
||||
fullWidth
|
||||
>
|
||||
{retryingRecommendation === recommendation.reason
|
||||
? 'Retrying...'
|
||||
: `Retry ${recommendation.document_count} Documents`
|
||||
}
|
||||
</Button>
|
||||
</CardContent>
|
||||
</Card>
|
||||
))}
|
||||
</Stack>
|
||||
)}
|
||||
|
||||
{loading && recommendations && recommendations.length > 0 && (
|
||||
<LinearProgress sx={{ mt: 2 }} />
|
||||
)}
|
||||
</CardContent>
|
||||
</Card>
|
||||
);
|
||||
};
|
||||
91
frontend/src/components/__tests__/BulkRetryModal.test.tsx
Normal file
91
frontend/src/components/__tests__/BulkRetryModal.test.tsx
Normal file
@@ -0,0 +1,91 @@
|
||||
import { describe, test, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||
import { render, screen } from '@testing-library/react';
|
||||
import userEvent from '@testing-library/user-event';
|
||||
import { BulkRetryModal } from '../BulkRetryModal';
|
||||
|
||||
// Create unique mock functions for this test file
|
||||
const mockBulkRetryOcr = vi.fn();
|
||||
|
||||
// Mock the API module with a unique namespace
|
||||
vi.mock('../../services/api', () => ({
|
||||
documentService: {
|
||||
bulkRetryOcr: mockBulkRetryOcr,
|
||||
},
|
||||
}));
|
||||
|
||||
describe('BulkRetryModal', () => {
|
||||
const mockProps = {
|
||||
open: true,
|
||||
onClose: vi.fn(),
|
||||
onSuccess: vi.fn(),
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
vi.resetAllMocks();
|
||||
|
||||
// Reset mock props
|
||||
mockProps.onClose.mockClear();
|
||||
mockProps.onSuccess.mockClear();
|
||||
|
||||
// Default mock response
|
||||
mockBulkRetryOcr.mockResolvedValue({
|
||||
data: {
|
||||
success: true,
|
||||
queued_count: 5,
|
||||
matched_count: 5,
|
||||
documents: [],
|
||||
estimated_total_time_minutes: 2.5,
|
||||
message: 'Operation completed successfully',
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.clearAllMocks();
|
||||
vi.resetAllMocks();
|
||||
});
|
||||
|
||||
test('renders modal with title and form elements', async () => {
|
||||
render(<BulkRetryModal {...mockProps} />);
|
||||
|
||||
expect(screen.getByText('Bulk OCR Retry')).toBeInTheDocument();
|
||||
expect(screen.getByText('Retry Mode')).toBeInTheDocument();
|
||||
expect(screen.getByText('Retry all failed OCR documents')).toBeInTheDocument();
|
||||
expect(screen.getByText('Retry documents matching criteria')).toBeInTheDocument();
|
||||
});
|
||||
|
||||
test('closes modal when close button is clicked', async () => {
|
||||
const user = userEvent.setup();
|
||||
|
||||
render(<BulkRetryModal {...mockProps} />);
|
||||
|
||||
const closeButton = screen.getByText('Cancel');
|
||||
await user.click(closeButton);
|
||||
|
||||
expect(mockProps.onClose).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test('shows preview by default', async () => {
|
||||
render(<BulkRetryModal {...mockProps} />);
|
||||
|
||||
const previewButton = screen.getByText('Preview');
|
||||
expect(previewButton).toBeInTheDocument();
|
||||
});
|
||||
|
||||
test('does not render when modal is closed', async () => {
|
||||
render(<BulkRetryModal {...mockProps} open={false} />);
|
||||
|
||||
expect(screen.queryByText('Bulk OCR Retry')).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
test('resets form when modal is closed and reopened', async () => {
|
||||
const { rerender } = render(<BulkRetryModal {...mockProps} open={false} />);
|
||||
|
||||
// Reopen the modal
|
||||
rerender(<BulkRetryModal {...mockProps} open={true} />);
|
||||
|
||||
// Should be back to default state
|
||||
expect(screen.getByLabelText('Retry all failed OCR documents')).toBeChecked();
|
||||
});
|
||||
});
|
||||
66
frontend/src/components/__tests__/RetryHistoryModal.test.tsx
Normal file
66
frontend/src/components/__tests__/RetryHistoryModal.test.tsx
Normal file
@@ -0,0 +1,66 @@
|
||||
import { describe, test, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||
import { render, screen } from '@testing-library/react';
|
||||
import userEvent from '@testing-library/user-event';
|
||||
import { RetryHistoryModal } from '../RetryHistoryModal';
|
||||
|
||||
// Create unique mock functions for this test file
|
||||
const mockGetDocumentRetryHistory = vi.fn();
|
||||
|
||||
// Mock the API module with a unique namespace for this test
|
||||
vi.mock('../../services/api', () => ({
|
||||
documentService: {
|
||||
getDocumentRetryHistory: mockGetDocumentRetryHistory,
|
||||
},
|
||||
}));
|
||||
|
||||
describe('RetryHistoryModal', () => {
|
||||
const mockProps = {
|
||||
open: true,
|
||||
onClose: vi.fn(),
|
||||
documentId: 'test-doc-123',
|
||||
documentName: 'test-document.pdf',
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
vi.resetAllMocks();
|
||||
|
||||
// Reset mock props
|
||||
mockProps.onClose.mockClear();
|
||||
|
||||
// Default mock response
|
||||
mockGetDocumentRetryHistory.mockResolvedValue({
|
||||
data: {
|
||||
document_id: 'test-doc-123',
|
||||
retry_history: [],
|
||||
total_retries: 0,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.clearAllMocks();
|
||||
vi.resetAllMocks();
|
||||
});
|
||||
|
||||
test('does not render when modal is closed', async () => {
|
||||
render(<RetryHistoryModal {...mockProps} open={false} />);
|
||||
|
||||
expect(screen.queryByText('OCR Retry History')).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
test('renders modal with correct structure when open', async () => {
|
||||
render(<RetryHistoryModal {...mockProps} />);
|
||||
|
||||
// Check that the modal renders with the correct title
|
||||
expect(screen.getByText('OCR Retry History')).toBeInTheDocument();
|
||||
expect(screen.getByText('test-document.pdf')).toBeInTheDocument();
|
||||
});
|
||||
|
||||
test('handles missing documentName gracefully', async () => {
|
||||
render(<RetryHistoryModal {...mockProps} documentName={undefined} />);
|
||||
|
||||
// The component only shows documentName if it exists, so we just check the modal title appears
|
||||
expect(screen.getByText('OCR Retry History')).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
100
frontend/src/components/__tests__/RetryRecommendations.test.tsx
Normal file
100
frontend/src/components/__tests__/RetryRecommendations.test.tsx
Normal file
@@ -0,0 +1,100 @@
|
||||
import { describe, test, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||
import { render, screen, waitFor } from '@testing-library/react';
|
||||
import userEvent from '@testing-library/user-event';
|
||||
import { RetryRecommendations } from '../RetryRecommendations';
|
||||
|
||||
// Create unique mock functions for this test file
|
||||
const mockGetRetryRecommendations = vi.fn();
|
||||
const mockBulkRetryOcr = vi.fn();
|
||||
|
||||
// Mock the API module with a unique namespace for this test
|
||||
vi.mock('../../services/api', () => ({
|
||||
documentService: {
|
||||
getRetryRecommendations: mockGetRetryRecommendations,
|
||||
bulkRetryOcr: mockBulkRetryOcr,
|
||||
},
|
||||
}));
|
||||
|
||||
describe('RetryRecommendations', () => {
|
||||
const mockProps = {
|
||||
onRetrySuccess: vi.fn(),
|
||||
onRetryClick: vi.fn(),
|
||||
};
|
||||
|
||||
const sampleRecommendations = [
|
||||
{
|
||||
reason: 'low_confidence',
|
||||
title: 'Low Confidence Results',
|
||||
description: 'Documents with OCR confidence below 70%',
|
||||
estimated_success_rate: 0.8,
|
||||
document_count: 15,
|
||||
filter: {
|
||||
failure_reasons: ['low_confidence'],
|
||||
min_confidence: 0,
|
||||
max_confidence: 70,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
vi.resetAllMocks();
|
||||
|
||||
// Reset mock props
|
||||
mockProps.onRetrySuccess.mockClear();
|
||||
mockProps.onRetryClick.mockClear();
|
||||
|
||||
mockGetRetryRecommendations.mockResolvedValue({
|
||||
data: {
|
||||
recommendations: sampleRecommendations,
|
||||
total_recommendations: 1,
|
||||
},
|
||||
});
|
||||
mockBulkRetryOcr.mockResolvedValue({
|
||||
data: {
|
||||
success: true,
|
||||
queued_count: 10,
|
||||
matched_count: 15,
|
||||
documents: [],
|
||||
estimated_total_time_minutes: 5.2,
|
||||
message: 'Retry operation completed successfully',
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.clearAllMocks();
|
||||
vi.resetAllMocks();
|
||||
});
|
||||
|
||||
test('shows empty state when no recommendations are available', async () => {
|
||||
mockGetRetryRecommendations.mockResolvedValue({
|
||||
data: {
|
||||
recommendations: [],
|
||||
total_recommendations: 0,
|
||||
},
|
||||
});
|
||||
|
||||
render(<RetryRecommendations {...mockProps} />);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText(/No retry recommendations/)).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
test('handles null/undefined recommendations safely', async () => {
|
||||
mockGetRetryRecommendations.mockResolvedValue({
|
||||
data: {
|
||||
recommendations: null,
|
||||
total_recommendations: 0,
|
||||
},
|
||||
});
|
||||
|
||||
render(<RetryRecommendations {...mockProps} />);
|
||||
|
||||
await waitFor(() => {
|
||||
// Should not crash and show empty state
|
||||
expect(screen.getByText(/No retry recommendations/)).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -39,12 +39,15 @@ import {
|
||||
AccessTime as AccessTimeIcon,
|
||||
Create as CreateIcon,
|
||||
Info as InfoIcon,
|
||||
Refresh as RefreshIcon,
|
||||
History as HistoryIcon,
|
||||
} from '@mui/icons-material';
|
||||
import { documentService, OcrResponse } from '../services/api';
|
||||
import DocumentViewer from '../components/DocumentViewer';
|
||||
import LabelSelector from '../components/Labels/LabelSelector';
|
||||
import { type LabelData } from '../components/Labels/Label';
|
||||
import MetadataDisplay from '../components/MetadataDisplay';
|
||||
import { RetryHistoryModal } from '../components/RetryHistoryModal';
|
||||
import api from '../services/api';
|
||||
|
||||
interface Document {
|
||||
@@ -80,6 +83,37 @@ const DocumentDetailsPage: React.FC = () => {
|
||||
const [availableLabels, setAvailableLabels] = useState<LabelData[]>([]);
|
||||
const [showLabelDialog, setShowLabelDialog] = useState<boolean>(false);
|
||||
const [labelsLoading, setLabelsLoading] = useState<boolean>(false);
|
||||
|
||||
// Retry functionality state
|
||||
const [retryingOcr, setRetryingOcr] = useState<boolean>(false);
|
||||
const [retryHistoryModalOpen, setRetryHistoryModalOpen] = useState<boolean>(false);
|
||||
|
||||
// Retry handlers
|
||||
const handleRetryOcr = async () => {
|
||||
if (!document) return;
|
||||
|
||||
setRetryingOcr(true);
|
||||
try {
|
||||
await documentService.bulkRetryOcr({
|
||||
mode: 'specific',
|
||||
document_ids: [document.id],
|
||||
priority_override: 15,
|
||||
});
|
||||
|
||||
// Show success message and refresh document
|
||||
setTimeout(() => {
|
||||
fetchDocumentDetails();
|
||||
}, 1000);
|
||||
} catch (error) {
|
||||
console.error('Failed to retry OCR:', error);
|
||||
} finally {
|
||||
setRetryingOcr(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleShowRetryHistory = () => {
|
||||
setRetryHistoryModalOpen(true);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (id) {
|
||||
@@ -429,6 +463,23 @@ const DocumentDetailsPage: React.FC = () => {
|
||||
{processedImageLoading ? 'Loading...' : 'Processed Image'}
|
||||
</Button>
|
||||
)}
|
||||
<Button
|
||||
variant="outlined"
|
||||
startIcon={retryingOcr ? <CircularProgress size={16} /> : <RefreshIcon />}
|
||||
onClick={handleRetryOcr}
|
||||
disabled={retryingOcr}
|
||||
sx={{ borderRadius: 2 }}
|
||||
>
|
||||
{retryingOcr ? 'Retrying...' : 'Retry OCR'}
|
||||
</Button>
|
||||
<Button
|
||||
variant="outlined"
|
||||
startIcon={<HistoryIcon />}
|
||||
onClick={handleShowRetryHistory}
|
||||
sx={{ borderRadius: 2 }}
|
||||
>
|
||||
Retry History
|
||||
</Button>
|
||||
</Stack>
|
||||
|
||||
{document.has_ocr_text && (
|
||||
@@ -980,6 +1031,16 @@ const DocumentDetailsPage: React.FC = () => {
|
||||
</Button>
|
||||
</DialogActions>
|
||||
</Dialog>
|
||||
|
||||
{/* Retry History Modal */}
|
||||
{document && (
|
||||
<RetryHistoryModal
|
||||
open={retryHistoryModalOpen}
|
||||
onClose={() => setRetryHistoryModalOpen(false)}
|
||||
documentId={document.id}
|
||||
documentName={document.original_filename}
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -52,12 +52,16 @@ import {
|
||||
OpenInNew as OpenInNewIcon,
|
||||
Warning as WarningIcon,
|
||||
Block as BlockIcon,
|
||||
History as HistoryIcon,
|
||||
} from '@mui/icons-material';
|
||||
import { format } from 'date-fns';
|
||||
import { api, documentService, queueService } from '../services/api';
|
||||
import { api, documentService, queueService, BulkOcrRetryResponse } from '../services/api';
|
||||
import DocumentViewer from '../components/DocumentViewer';
|
||||
import FailedDocumentViewer from '../components/FailedDocumentViewer';
|
||||
import MetadataDisplay from '../components/MetadataDisplay';
|
||||
import { BulkRetryModal } from '../components/BulkRetryModal';
|
||||
import { RetryRecommendations } from '../components/RetryRecommendations';
|
||||
import { RetryHistoryModal } from '../components/RetryHistoryModal';
|
||||
|
||||
interface FailedDocument {
|
||||
id: string;
|
||||
@@ -224,6 +228,12 @@ const DocumentManagementPage: React.FC = () => {
|
||||
const [bulkDeleteIgnoredDialog, setBulkDeleteIgnoredDialog] = useState(false);
|
||||
const [deletingIgnoredFiles, setDeletingIgnoredFiles] = useState(false);
|
||||
|
||||
// Advanced retry functionality state
|
||||
const [bulkRetryModalOpen, setBulkRetryModalOpen] = useState(false);
|
||||
const [retryHistoryModalOpen, setRetryHistoryModalOpen] = useState(false);
|
||||
const [selectedDocumentForHistory, setSelectedDocumentForHistory] = useState<string | null>(null);
|
||||
const [selectedDocumentIds, setSelectedDocumentIds] = useState<string[]>([]);
|
||||
|
||||
const fetchFailedDocuments = async () => {
|
||||
try {
|
||||
setLoading(true);
|
||||
@@ -381,6 +391,21 @@ const DocumentManagementPage: React.FC = () => {
|
||||
}
|
||||
};
|
||||
|
||||
// Advanced retry functionality handlers
|
||||
const handleBulkRetrySuccess = (result: BulkOcrRetryResponse) => {
|
||||
setSnackbar({
|
||||
open: true,
|
||||
message: `Successfully queued ${result.queued_count} of ${result.matched_count} documents for retry. Estimated processing time: ${Math.round(result.estimated_total_time_minutes)} minutes.`,
|
||||
severity: 'success'
|
||||
});
|
||||
fetchFailedDocuments(); // Refresh the list
|
||||
};
|
||||
|
||||
const handleShowRetryHistory = (documentId: string) => {
|
||||
setSelectedDocumentForHistory(documentId);
|
||||
setRetryHistoryModalOpen(true);
|
||||
};
|
||||
|
||||
const formatFileSize = (bytes: number): string => {
|
||||
if (bytes === 0) return '0 B';
|
||||
const k = 1024;
|
||||
@@ -833,6 +858,33 @@ const DocumentManagementPage: React.FC = () => {
|
||||
</Grid>
|
||||
)}
|
||||
|
||||
{/* Advanced Retry Components */}
|
||||
<Grid container spacing={3} mb={3}>
|
||||
<Grid item xs={12} md={6}>
|
||||
<Card>
|
||||
<CardContent>
|
||||
<Box display="flex" justifyContent="space-between" alignItems="center" mb={2}>
|
||||
<Typography variant="h6">Advanced Retry Options</Typography>
|
||||
<Button
|
||||
variant="outlined"
|
||||
onClick={() => setBulkRetryModalOpen(true)}
|
||||
disabled={!statistics || statistics.total_failed === 0}
|
||||
startIcon={<RefreshIcon />}
|
||||
>
|
||||
Advanced Retry
|
||||
</Button>
|
||||
</Box>
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
Use advanced filtering and selection options to retry specific subsets of failed documents based on file type, failure reason, size, and more.
|
||||
</Typography>
|
||||
</CardContent>
|
||||
</Card>
|
||||
</Grid>
|
||||
<Grid item xs={12} md={6}>
|
||||
<RetryRecommendations onRetrySuccess={handleBulkRetrySuccess} />
|
||||
</Grid>
|
||||
</Grid>
|
||||
|
||||
{/* Filter Controls */}
|
||||
<Card sx={{ mb: 3 }}>
|
||||
<CardContent>
|
||||
@@ -975,6 +1027,14 @@ const DocumentManagementPage: React.FC = () => {
|
||||
<VisibilityIcon />
|
||||
</IconButton>
|
||||
</Tooltip>
|
||||
<Tooltip title="Retry History">
|
||||
<IconButton
|
||||
size="small"
|
||||
onClick={() => handleShowRetryHistory(document.id)}
|
||||
>
|
||||
<HistoryIcon />
|
||||
</IconButton>
|
||||
</Tooltip>
|
||||
<Tooltip title="Download Document">
|
||||
<IconButton
|
||||
size="small"
|
||||
@@ -2159,6 +2219,23 @@ const DocumentManagementPage: React.FC = () => {
|
||||
</DialogActions>
|
||||
</Dialog>
|
||||
|
||||
{/* Advanced Retry Modal */}
|
||||
<BulkRetryModal
|
||||
open={bulkRetryModalOpen}
|
||||
onClose={() => setBulkRetryModalOpen(false)}
|
||||
onSuccess={handleBulkRetrySuccess}
|
||||
selectedDocumentIds={selectedDocumentIds}
|
||||
/>
|
||||
|
||||
{/* Retry History Modal */}
|
||||
<RetryHistoryModal
|
||||
open={retryHistoryModalOpen}
|
||||
onClose={() => setRetryHistoryModalOpen(false)}
|
||||
documentId={selectedDocumentForHistory || ''}
|
||||
documentName={selectedDocumentForHistory ?
|
||||
documents.find(d => d.id === selectedDocumentForHistory)?.filename : undefined}
|
||||
/>
|
||||
|
||||
{/* Success/Error Snackbar */}
|
||||
<Snackbar
|
||||
open={snackbar.open}
|
||||
|
||||
@@ -57,12 +57,15 @@ import {
|
||||
CheckBox as CheckBoxIcon,
|
||||
SelectAll as SelectAllIcon,
|
||||
Close as CloseIcon,
|
||||
Refresh as RefreshIcon,
|
||||
History as HistoryIcon,
|
||||
} from '@mui/icons-material';
|
||||
import { documentService } from '../services/api';
|
||||
import DocumentThumbnail from '../components/DocumentThumbnail';
|
||||
import Label, { type LabelData } from '../components/Labels/Label';
|
||||
import LabelSelector from '../components/Labels/LabelSelector';
|
||||
import { useApi } from '../hooks/useApi';
|
||||
import { RetryHistoryModal } from '../components/RetryHistoryModal';
|
||||
|
||||
interface Document {
|
||||
id: string;
|
||||
@@ -130,6 +133,11 @@ const DocumentsPage: React.FC = () => {
|
||||
const [bulkDeleteDialogOpen, setBulkDeleteDialogOpen] = useState<boolean>(false);
|
||||
const [bulkDeleteLoading, setBulkDeleteLoading] = useState<boolean>(false);
|
||||
|
||||
// Retry functionality state
|
||||
const [retryingDocument, setRetryingDocument] = useState<string | null>(null);
|
||||
const [retryHistoryModalOpen, setRetryHistoryModalOpen] = useState<boolean>(false);
|
||||
const [selectedDocumentForHistory, setSelectedDocumentForHistory] = useState<string | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
fetchDocuments();
|
||||
fetchLabels();
|
||||
@@ -331,6 +339,35 @@ const DocumentsPage: React.FC = () => {
|
||||
setDocumentToDelete(null);
|
||||
};
|
||||
|
||||
// Retry functionality handlers
|
||||
const handleRetryOcr = async (doc: Document): Promise<void> => {
|
||||
try {
|
||||
setRetryingDocument(doc.id);
|
||||
await documentService.bulkRetryOcr({
|
||||
mode: 'specific',
|
||||
document_ids: [doc.id],
|
||||
priority_override: 15,
|
||||
});
|
||||
|
||||
// Refresh the document list to get updated status
|
||||
await fetchDocuments();
|
||||
|
||||
setError(null);
|
||||
} catch (error) {
|
||||
console.error('Failed to retry OCR:', error);
|
||||
setError('Failed to retry OCR processing');
|
||||
} finally {
|
||||
setRetryingDocument(null);
|
||||
handleDocMenuClose();
|
||||
}
|
||||
};
|
||||
|
||||
const handleShowRetryHistory = (docId: string): void => {
|
||||
setSelectedDocumentForHistory(docId);
|
||||
setRetryHistoryModalOpen(true);
|
||||
handleDocMenuClose();
|
||||
};
|
||||
|
||||
const handlePageChange = (event: React.ChangeEvent<unknown>, page: number): void => {
|
||||
const newOffset = (page - 1) * pagination.limit;
|
||||
setPagination(prev => ({ ...prev, offset: newOffset }));
|
||||
@@ -632,6 +669,27 @@ const DocumentsPage: React.FC = () => {
|
||||
<ListItemText>Edit Labels</ListItemText>
|
||||
</MenuItem>
|
||||
<Divider />
|
||||
<MenuItem onClick={() => {
|
||||
if (selectedDoc) handleRetryOcr(selectedDoc);
|
||||
}} disabled={retryingDocument === selectedDoc?.id}>
|
||||
<ListItemIcon>
|
||||
{retryingDocument === selectedDoc?.id ? (
|
||||
<CircularProgress size={16} />
|
||||
) : (
|
||||
<RefreshIcon fontSize="small" />
|
||||
)}
|
||||
</ListItemIcon>
|
||||
<ListItemText>
|
||||
{retryingDocument === selectedDoc?.id ? 'Retrying OCR...' : 'Retry OCR'}
|
||||
</ListItemText>
|
||||
</MenuItem>
|
||||
<MenuItem onClick={() => {
|
||||
if (selectedDoc) handleShowRetryHistory(selectedDoc.id);
|
||||
}}>
|
||||
<ListItemIcon><HistoryIcon fontSize="small" /></ListItemIcon>
|
||||
<ListItemText>Retry History</ListItemText>
|
||||
</MenuItem>
|
||||
<Divider />
|
||||
<MenuItem onClick={() => {
|
||||
if (selectedDoc) handleDeleteClick(selectedDoc);
|
||||
}}>
|
||||
@@ -989,6 +1047,15 @@ const DocumentsPage: React.FC = () => {
|
||||
</Box>
|
||||
)}
|
||||
</Box>
|
||||
|
||||
{/* Retry History Modal */}
|
||||
<RetryHistoryModal
|
||||
open={retryHistoryModalOpen}
|
||||
onClose={() => setRetryHistoryModalOpen(false)}
|
||||
documentId={selectedDocumentForHistory || ''}
|
||||
documentName={selectedDocumentForHistory ?
|
||||
documents.find(d => d.id === selectedDocumentForHistory)?.original_filename : undefined}
|
||||
/>
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -14,6 +14,9 @@ const mockDocumentService = {
|
||||
deleteLowConfidence: vi.fn(),
|
||||
deleteFailedOcr: vi.fn(),
|
||||
downloadFile: vi.fn(),
|
||||
getRetryRecommendations: vi.fn(),
|
||||
getRetryStats: vi.fn(),
|
||||
getDocumentRetryHistory: vi.fn(),
|
||||
};
|
||||
|
||||
const mockQueueService = {
|
||||
@@ -23,6 +26,7 @@ const mockQueueService = {
|
||||
const mockApi = {
|
||||
get: vi.fn(),
|
||||
delete: vi.fn(),
|
||||
bulkRetryOcr: vi.fn(),
|
||||
};
|
||||
|
||||
// Mock API with comprehensive responses
|
||||
@@ -51,6 +55,20 @@ describe('DocumentManagementPage - Runtime Error Prevention', () => {
|
||||
mockDocumentService.getFailedOcrDocuments.mockClear();
|
||||
mockDocumentService.getDuplicates.mockClear();
|
||||
mockQueueService.requeueFailed.mockClear();
|
||||
|
||||
// Setup default mock returns for retry functionality
|
||||
mockDocumentService.getRetryRecommendations.mockResolvedValue({
|
||||
data: { recommendations: [], total_recommendations: 0 }
|
||||
});
|
||||
mockDocumentService.getRetryStats.mockResolvedValue({
|
||||
data: { failure_reasons: [], file_types: [], total_failed: 0 }
|
||||
});
|
||||
mockDocumentService.getDocumentRetryHistory.mockResolvedValue({
|
||||
data: { document_id: 'test', retry_history: [], total_retries: 0 }
|
||||
});
|
||||
mockApi.bulkRetryOcr.mockResolvedValue({
|
||||
data: { success: true, queued_count: 0, matched_count: 0, documents: [] }
|
||||
});
|
||||
});
|
||||
|
||||
describe('OCR Confidence Display - Null Safety', () => {
|
||||
|
||||
@@ -12,17 +12,24 @@ export const api = {
|
||||
// Mock document service
|
||||
export const documentService = {
|
||||
list: vi.fn(),
|
||||
get: vi.fn(),
|
||||
getById: vi.fn(),
|
||||
getOcrText: vi.fn(),
|
||||
upload: vi.fn(),
|
||||
delete: vi.fn(),
|
||||
search: vi.fn(),
|
||||
enhancedSearch: vi.fn(),
|
||||
download: vi.fn(),
|
||||
getThumbnail: vi.fn(),
|
||||
getProcessedImage: vi.fn(),
|
||||
updateTags: vi.fn(),
|
||||
getFailedOcrDocuments: vi.fn(),
|
||||
getDuplicates: vi.fn(),
|
||||
retryOcr: vi.fn(),
|
||||
deleteLowConfidence: vi.fn(),
|
||||
getDocumentRetryHistory: vi.fn(),
|
||||
getRetryRecommendations: vi.fn(),
|
||||
getRetryStats: vi.fn(),
|
||||
bulkRetryOcr: vi.fn(),
|
||||
}
|
||||
|
||||
// Re-export types that components might need
|
||||
|
||||
@@ -86,6 +86,93 @@ export interface SearchFacetsResponse {
|
||||
tags: FacetItem[]
|
||||
}
|
||||
|
||||
// OCR Retry Types
|
||||
export interface OcrRetryFilter {
|
||||
mime_types?: string[]
|
||||
file_extensions?: string[]
|
||||
failure_reasons?: string[]
|
||||
min_file_size?: number
|
||||
max_file_size?: number
|
||||
created_after?: string
|
||||
created_before?: string
|
||||
tags?: string[]
|
||||
limit?: number
|
||||
}
|
||||
|
||||
export interface BulkOcrRetryRequest {
|
||||
mode: 'all' | 'specific' | 'filter'
|
||||
document_ids?: string[]
|
||||
filter?: OcrRetryFilter
|
||||
priority_override?: number
|
||||
preview_only?: boolean
|
||||
}
|
||||
|
||||
export interface OcrRetryDocumentInfo {
|
||||
id: string
|
||||
filename: string
|
||||
file_size: number
|
||||
mime_type: string
|
||||
ocr_failure_reason?: string
|
||||
priority: number
|
||||
queue_id?: string
|
||||
}
|
||||
|
||||
export interface BulkOcrRetryResponse {
|
||||
success: boolean
|
||||
message: string
|
||||
queued_count: number
|
||||
matched_count: number
|
||||
documents: OcrRetryDocumentInfo[]
|
||||
estimated_total_time_minutes: number
|
||||
}
|
||||
|
||||
export interface OcrRetryStatsResponse {
|
||||
failure_reasons: Array<{
|
||||
reason: string
|
||||
count: number
|
||||
avg_file_size_mb: number
|
||||
first_occurrence: string
|
||||
last_occurrence: string
|
||||
}>
|
||||
file_types: Array<{
|
||||
mime_type: string
|
||||
count: number
|
||||
avg_file_size_mb: number
|
||||
}>
|
||||
total_failed: number
|
||||
}
|
||||
|
||||
export interface OcrRetryRecommendation {
|
||||
reason: string
|
||||
title: string
|
||||
description: string
|
||||
estimated_success_rate: number
|
||||
document_count: number
|
||||
filter: OcrRetryFilter
|
||||
}
|
||||
|
||||
export interface OcrRetryRecommendationsResponse {
|
||||
recommendations: OcrRetryRecommendation[]
|
||||
total_recommendations: number
|
||||
}
|
||||
|
||||
export interface DocumentRetryHistoryItem {
|
||||
id: string
|
||||
retry_reason: string
|
||||
previous_status?: string
|
||||
previous_failure_reason?: string
|
||||
previous_error?: string
|
||||
priority: number
|
||||
queue_id?: string
|
||||
created_at: string
|
||||
}
|
||||
|
||||
export interface DocumentRetryHistoryResponse {
|
||||
document_id: string
|
||||
retry_history: DocumentRetryHistoryItem[]
|
||||
total_retries: number
|
||||
}
|
||||
|
||||
export interface PaginatedResponse<T> {
|
||||
documents: T[]
|
||||
pagination: {
|
||||
@@ -203,6 +290,23 @@ export const documentService = {
|
||||
return api.post(`/documents/${id}/retry-ocr`)
|
||||
},
|
||||
|
||||
// Advanced OCR retry functionality
|
||||
bulkRetryOcr: (request: BulkOcrRetryRequest) => {
|
||||
return api.post<BulkOcrRetryResponse>('/documents/ocr/bulk-retry', request)
|
||||
},
|
||||
|
||||
getRetryStats: () => {
|
||||
return api.get<OcrRetryStatsResponse>('/documents/ocr/retry-stats')
|
||||
},
|
||||
|
||||
getRetryRecommendations: () => {
|
||||
return api.get<OcrRetryRecommendationsResponse>('/documents/ocr/retry-recommendations')
|
||||
},
|
||||
|
||||
getDocumentRetryHistory: (id: string) => {
|
||||
return api.get<DocumentRetryHistoryResponse>(`/documents/${id}/ocr/retry-history`)
|
||||
},
|
||||
|
||||
getFailedOcrDocuments: (limit = 50, offset = 0) => {
|
||||
return api.get(`/documents/failed`, {
|
||||
params: { stage: 'ocr', limit, offset },
|
||||
|
||||
48
migrations/20250701000001_add_ocr_retry_history.sql
Normal file
48
migrations/20250701000001_add_ocr_retry_history.sql
Normal file
@@ -0,0 +1,48 @@
|
||||
-- Create table to track OCR retry history for audit and analytics
|
||||
CREATE TABLE IF NOT EXISTS ocr_retry_history (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
document_id UUID NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
|
||||
user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
||||
retry_reason TEXT,
|
||||
previous_status TEXT,
|
||||
previous_failure_reason TEXT,
|
||||
previous_error TEXT,
|
||||
priority INT NOT NULL,
|
||||
queue_id UUID,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Create indexes for efficient querying
|
||||
CREATE INDEX idx_ocr_retry_history_document_id ON ocr_retry_history(document_id);
|
||||
CREATE INDEX idx_ocr_retry_history_user_id ON ocr_retry_history(user_id);
|
||||
CREATE INDEX idx_ocr_retry_history_created_at ON ocr_retry_history(created_at);
|
||||
|
||||
-- Add retry count to documents table if not exists
|
||||
ALTER TABLE documents
|
||||
ADD COLUMN IF NOT EXISTS ocr_retry_count INT DEFAULT 0;
|
||||
|
||||
-- Add comment
|
||||
COMMENT ON TABLE ocr_retry_history IS 'Tracks history of OCR retry attempts for auditing and analytics';
|
||||
COMMENT ON COLUMN ocr_retry_history.retry_reason IS 'Reason for retry: manual, bulk_retry, scheduled, etc.';
|
||||
COMMENT ON COLUMN ocr_retry_history.previous_status IS 'OCR status before retry';
|
||||
COMMENT ON COLUMN ocr_retry_history.previous_failure_reason IS 'Previous failure reason if any';
|
||||
COMMENT ON COLUMN ocr_retry_history.priority IS 'Priority assigned to the retry in queue';
|
||||
|
||||
-- Create view for retry analytics
|
||||
CREATE OR REPLACE VIEW ocr_retry_analytics AS
|
||||
SELECT
|
||||
d.id as document_id,
|
||||
d.filename,
|
||||
d.mime_type,
|
||||
d.file_size,
|
||||
d.ocr_retry_count,
|
||||
d.ocr_status,
|
||||
d.ocr_failure_reason,
|
||||
COUNT(h.id) as total_retries,
|
||||
MAX(h.created_at) as last_retry_at,
|
||||
MIN(h.created_at) as first_retry_at
|
||||
FROM documents d
|
||||
LEFT JOIN ocr_retry_history h ON d.id = h.document_id
|
||||
GROUP BY d.id, d.filename, d.mime_type, d.file_size, d.ocr_retry_count, d.ocr_status, d.ocr_failure_reason
|
||||
HAVING COUNT(h.id) > 0
|
||||
ORDER BY total_retries DESC;
|
||||
@@ -12,6 +12,7 @@ pub mod sources;
|
||||
pub mod images;
|
||||
pub mod ignored_files;
|
||||
pub mod constraint_validation;
|
||||
pub mod ocr_retry;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Database {
|
||||
|
||||
254
src/db/ocr_retry.rs
Normal file
254
src/db/ocr_retry.rs
Normal file
@@ -0,0 +1,254 @@
|
||||
use anyhow::Result;
|
||||
use sqlx::{PgPool, Row};
|
||||
use uuid::Uuid;
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, sqlx::FromRow)]
|
||||
pub struct OcrRetryHistory {
|
||||
pub id: Uuid,
|
||||
pub document_id: Uuid,
|
||||
pub user_id: Uuid,
|
||||
pub retry_reason: Option<String>,
|
||||
pub previous_status: Option<String>,
|
||||
pub previous_failure_reason: Option<String>,
|
||||
pub previous_error: Option<String>,
|
||||
pub priority: i32,
|
||||
pub queue_id: Option<Uuid>,
|
||||
pub created_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
/// Record an OCR retry attempt
|
||||
pub async fn record_ocr_retry(
|
||||
pool: &PgPool,
|
||||
document_id: Uuid,
|
||||
user_id: Uuid,
|
||||
retry_reason: &str,
|
||||
priority: i32,
|
||||
queue_id: Option<Uuid>,
|
||||
) -> Result<Uuid> {
|
||||
// First get the current OCR status
|
||||
let current_status = sqlx::query(
|
||||
r#"
|
||||
SELECT ocr_status, ocr_failure_reason, ocr_error
|
||||
FROM documents
|
||||
WHERE id = $1
|
||||
"#
|
||||
)
|
||||
.bind(document_id)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
|
||||
let (previous_status, previous_failure_reason, previous_error) = if let Some(row) = current_status {
|
||||
(
|
||||
row.get::<Option<String>, _>("ocr_status"),
|
||||
row.get::<Option<String>, _>("ocr_failure_reason"),
|
||||
row.get::<Option<String>, _>("ocr_error"),
|
||||
)
|
||||
} else {
|
||||
(None, None, None)
|
||||
};
|
||||
|
||||
// Insert retry history record
|
||||
let retry_id: Uuid = sqlx::query_scalar(
|
||||
r#"
|
||||
INSERT INTO ocr_retry_history (
|
||||
document_id, user_id, retry_reason, previous_status,
|
||||
previous_failure_reason, previous_error, priority, queue_id
|
||||
)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
||||
RETURNING id
|
||||
"#
|
||||
)
|
||||
.bind(document_id)
|
||||
.bind(user_id)
|
||||
.bind(retry_reason)
|
||||
.bind(previous_status)
|
||||
.bind(previous_failure_reason)
|
||||
.bind(previous_error)
|
||||
.bind(priority)
|
||||
.bind(queue_id)
|
||||
.fetch_one(pool)
|
||||
.await?;
|
||||
|
||||
// Increment retry count
|
||||
sqlx::query(
|
||||
r#"
|
||||
UPDATE documents
|
||||
SET ocr_retry_count = COALESCE(ocr_retry_count, 0) + 1,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
"#
|
||||
)
|
||||
.bind(document_id)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
|
||||
Ok(retry_id)
|
||||
}
|
||||
|
||||
/// Get retry history for a document
|
||||
pub async fn get_document_retry_history(
|
||||
pool: &PgPool,
|
||||
document_id: Uuid,
|
||||
) -> Result<Vec<OcrRetryHistory>> {
|
||||
let history = sqlx::query_as::<_, OcrRetryHistory>(
|
||||
r#"
|
||||
SELECT id, document_id, user_id, retry_reason, previous_status,
|
||||
previous_failure_reason, previous_error, priority, queue_id, created_at
|
||||
FROM ocr_retry_history
|
||||
WHERE document_id = $1
|
||||
ORDER BY created_at DESC
|
||||
"#
|
||||
)
|
||||
.bind(document_id)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
Ok(history)
|
||||
}
|
||||
|
||||
/// Get documents eligible for OCR retry based on criteria
|
||||
pub async fn get_eligible_documents_for_retry(
|
||||
pool: &PgPool,
|
||||
user_id: Option<Uuid>,
|
||||
mime_types: Option<&[String]>,
|
||||
failure_reasons: Option<&[String]>,
|
||||
max_retry_count: Option<i32>,
|
||||
limit: Option<i64>,
|
||||
) -> Result<Vec<EligibleDocument>> {
|
||||
let mut query = sqlx::QueryBuilder::new(
|
||||
r#"
|
||||
SELECT d.id, d.filename, d.file_size, d.mime_type,
|
||||
d.ocr_failure_reason, d.ocr_retry_count,
|
||||
d.created_at, d.updated_at
|
||||
FROM documents d
|
||||
WHERE d.ocr_status = 'failed'
|
||||
"#
|
||||
);
|
||||
|
||||
// Add user filter
|
||||
if let Some(uid) = user_id {
|
||||
query.push(" AND d.user_id = ");
|
||||
query.push_bind(uid);
|
||||
}
|
||||
|
||||
// Add MIME type filter
|
||||
if let Some(types) = mime_types {
|
||||
if !types.is_empty() {
|
||||
query.push(" AND d.mime_type = ANY(");
|
||||
query.push_bind(types);
|
||||
query.push(")");
|
||||
}
|
||||
}
|
||||
|
||||
// Add failure reason filter
|
||||
if let Some(reasons) = failure_reasons {
|
||||
if !reasons.is_empty() {
|
||||
query.push(" AND d.ocr_failure_reason = ANY(");
|
||||
query.push_bind(reasons);
|
||||
query.push(")");
|
||||
}
|
||||
}
|
||||
|
||||
// Add retry count filter
|
||||
if let Some(max_retries) = max_retry_count {
|
||||
query.push(" AND COALESCE(d.ocr_retry_count, 0) < ");
|
||||
query.push_bind(max_retries);
|
||||
}
|
||||
|
||||
query.push(" ORDER BY d.created_at DESC");
|
||||
|
||||
if let Some(lim) = limit {
|
||||
query.push(" LIMIT ");
|
||||
query.push_bind(lim);
|
||||
}
|
||||
|
||||
let documents = query.build_query_as::<EligibleDocument>()
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
Ok(documents)
|
||||
}
|
||||
|
||||
/// Get OCR retry statistics
|
||||
pub async fn get_ocr_retry_statistics(
|
||||
pool: &PgPool,
|
||||
user_id: Option<Uuid>,
|
||||
) -> Result<OcrRetryStats> {
|
||||
let user_filter = if let Some(uid) = user_id {
|
||||
format!("AND user_id = '{}'", uid)
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
||||
let stats = sqlx::query(&format!(
|
||||
r#"
|
||||
SELECT
|
||||
COUNT(DISTINCT document_id) as documents_with_retries,
|
||||
COUNT(*) as total_retry_attempts,
|
||||
AVG(priority) as avg_priority,
|
||||
MAX(created_at) as last_retry_at
|
||||
FROM ocr_retry_history
|
||||
WHERE 1=1 {}
|
||||
"#,
|
||||
user_filter
|
||||
))
|
||||
.fetch_one(pool)
|
||||
.await?;
|
||||
|
||||
let retry_counts = sqlx::query(&format!(
|
||||
r#"
|
||||
SELECT
|
||||
COALESCE(ocr_retry_count, 0) as retry_count,
|
||||
COUNT(*) as document_count
|
||||
FROM documents
|
||||
WHERE ocr_status = 'failed'
|
||||
{}
|
||||
GROUP BY ocr_retry_count
|
||||
ORDER BY retry_count
|
||||
"#,
|
||||
if user_id.is_some() { "AND user_id = $1" } else { "" }
|
||||
))
|
||||
.bind(user_id)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
let retry_distribution: Vec<(i32, i64)> = retry_counts.into_iter()
|
||||
.map(|row| {
|
||||
(
|
||||
row.get::<i32, _>("retry_count"),
|
||||
row.get::<i64, _>("document_count"),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(OcrRetryStats {
|
||||
documents_with_retries: stats.get::<i64, _>("documents_with_retries"),
|
||||
total_retry_attempts: stats.get::<i64, _>("total_retry_attempts"),
|
||||
avg_priority: stats.get::<Option<f64>, _>("avg_priority").unwrap_or(0.0),
|
||||
last_retry_at: stats.get::<Option<DateTime<Utc>>, _>("last_retry_at"),
|
||||
retry_distribution,
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, sqlx::FromRow)]
|
||||
pub struct EligibleDocument {
|
||||
pub id: Uuid,
|
||||
pub filename: String,
|
||||
pub file_size: i64,
|
||||
pub mime_type: String,
|
||||
pub ocr_failure_reason: Option<String>,
|
||||
pub ocr_retry_count: Option<i32>,
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct OcrRetryStats {
|
||||
pub documents_with_retries: i64,
|
||||
pub total_retry_attempts: i64,
|
||||
pub avg_priority: f64,
|
||||
pub last_retry_at: Option<DateTime<Utc>>,
|
||||
pub retry_distribution: Vec<(i32, i64)>, // (retry_count, document_count)
|
||||
}
|
||||
@@ -64,6 +64,10 @@ pub fn router() -> Router<Arc<AppState>> {
|
||||
.route("/failed/{id}/view", get(view_failed_document))
|
||||
.route("/delete-low-confidence", post(delete_low_confidence_documents))
|
||||
.route("/delete-failed-ocr", post(delete_failed_ocr_documents))
|
||||
.route("/ocr/bulk-retry", post(crate::routes::documents_ocr_retry::bulk_retry_ocr))
|
||||
.route("/ocr/retry-stats", get(crate::routes::documents_ocr_retry::get_ocr_retry_stats))
|
||||
.route("/ocr/retry-recommendations", get(crate::routes::documents_ocr_retry::get_retry_recommendations))
|
||||
.route("/{id}/ocr/retry-history", get(crate::routes::documents_ocr_retry::get_document_retry_history))
|
||||
}
|
||||
|
||||
#[utoipa::path(
|
||||
@@ -625,6 +629,18 @@ async fn retry_ocr(
|
||||
// Add to OCR queue with detailed logging
|
||||
match state.queue_service.enqueue_document(document_id, priority, document.file_size).await {
|
||||
Ok(queue_id) => {
|
||||
// Record retry history
|
||||
if let Err(e) = crate::db::ocr_retry::record_ocr_retry(
|
||||
state.db.get_pool(),
|
||||
document_id,
|
||||
auth_user.user.id,
|
||||
"manual_retry",
|
||||
priority,
|
||||
Some(queue_id),
|
||||
).await {
|
||||
tracing::warn!("Failed to record retry history for document {}: {}", document_id, e);
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"OCR retry queued for document {} ({}): queue_id={}, priority={}, size={}",
|
||||
document_id, document.filename, queue_id, priority, document.file_size
|
||||
|
||||
624
src/routes/documents_ocr_retry.rs
Normal file
624
src/routes/documents_ocr_retry.rs
Normal file
@@ -0,0 +1,624 @@
|
||||
use std::sync::Arc;
|
||||
use axum::{
|
||||
extract::{Path, State},
|
||||
http::StatusCode,
|
||||
response::Json,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sqlx::Row;
|
||||
use uuid::Uuid;
|
||||
use tracing::{info, error, warn};
|
||||
use utoipa::ToSchema;
|
||||
|
||||
use crate::{
|
||||
auth::AuthUser,
|
||||
AppState,
|
||||
models::UserRole,
|
||||
};
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize, ToSchema)]
|
||||
pub struct BulkOcrRetryRequest {
|
||||
/// Selection mode: "all", "specific", "filter"
|
||||
pub mode: SelectionMode,
|
||||
/// Specific document IDs (when mode = "specific")
|
||||
pub document_ids: Option<Vec<Uuid>>,
|
||||
/// Filter criteria (when mode = "filter")
|
||||
pub filter: Option<OcrRetryFilter>,
|
||||
/// Priority override (1-20, higher = more urgent)
|
||||
pub priority_override: Option<i32>,
|
||||
/// Preview mode - just return what would be processed
|
||||
pub preview_only: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize, Clone, ToSchema)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum SelectionMode {
|
||||
All, // All failed OCR documents
|
||||
Specific, // Specific document IDs
|
||||
Filter, // Filter by criteria
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize, Clone, ToSchema)]
|
||||
pub struct OcrRetryFilter {
|
||||
/// Filter by MIME types
|
||||
pub mime_types: Option<Vec<String>>,
|
||||
/// Filter by file extensions
|
||||
pub file_extensions: Option<Vec<String>>,
|
||||
/// Filter by OCR failure reasons
|
||||
pub failure_reasons: Option<Vec<String>>,
|
||||
/// Filter by minimum file size (bytes)
|
||||
pub min_file_size: Option<i64>,
|
||||
/// Filter by maximum file size (bytes)
|
||||
pub max_file_size: Option<i64>,
|
||||
/// Filter by date range - documents created after this date
|
||||
pub created_after: Option<chrono::DateTime<chrono::Utc>>,
|
||||
/// Filter by date range - documents created before this date
|
||||
pub created_before: Option<chrono::DateTime<chrono::Utc>>,
|
||||
/// Filter by tags
|
||||
pub tags: Option<Vec<String>>,
|
||||
/// Maximum number of documents to retry
|
||||
pub limit: Option<i64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, ToSchema)]
|
||||
pub struct BulkOcrRetryResponse {
|
||||
pub success: bool,
|
||||
pub message: String,
|
||||
pub queued_count: usize,
|
||||
pub matched_count: usize,
|
||||
pub documents: Vec<OcrRetryDocumentInfo>,
|
||||
pub estimated_total_time_minutes: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, ToSchema)]
|
||||
pub struct OcrRetryDocumentInfo {
|
||||
pub id: Uuid,
|
||||
pub filename: String,
|
||||
pub file_size: i64,
|
||||
pub mime_type: String,
|
||||
pub ocr_failure_reason: Option<String>,
|
||||
pub priority: i32,
|
||||
pub queue_id: Option<Uuid>,
|
||||
}
|
||||
|
||||
/// Bulk retry OCR for multiple documents based on selection criteria
|
||||
#[utoipa::path(
|
||||
post,
|
||||
path = "/api/documents/ocr/bulk-retry",
|
||||
tag = "documents",
|
||||
security(
|
||||
("bearer_auth" = [])
|
||||
),
|
||||
request_body = BulkOcrRetryRequest,
|
||||
responses(
|
||||
(status = 200, description = "Bulk OCR retry result", body = BulkOcrRetryResponse),
|
||||
(status = 401, description = "Unauthorized"),
|
||||
(status = 400, description = "Invalid request")
|
||||
)
|
||||
)]
|
||||
pub async fn bulk_retry_ocr(
|
||||
State(state): State<Arc<AppState>>,
|
||||
auth_user: AuthUser,
|
||||
Json(request): Json<BulkOcrRetryRequest>,
|
||||
) -> Result<Json<BulkOcrRetryResponse>, StatusCode> {
|
||||
info!("Bulk OCR retry requested by user {} with mode: {:?}", auth_user.user.id, request.mode);
|
||||
|
||||
let preview_only = request.preview_only.unwrap_or(false);
|
||||
|
||||
// Build query based on selection mode
|
||||
let documents = match request.mode {
|
||||
SelectionMode::All => {
|
||||
get_all_failed_ocr_documents(&state, &auth_user).await?
|
||||
}
|
||||
SelectionMode::Specific => {
|
||||
if let Some(ids) = request.document_ids {
|
||||
get_specific_documents(&state, &auth_user, ids).await?
|
||||
} else {
|
||||
return Err(StatusCode::BAD_REQUEST);
|
||||
}
|
||||
}
|
||||
SelectionMode::Filter => {
|
||||
if let Some(filter) = request.filter {
|
||||
get_filtered_documents(&state, &auth_user, filter).await?
|
||||
} else {
|
||||
return Err(StatusCode::BAD_REQUEST);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let matched_count = documents.len();
|
||||
let mut retry_documents = Vec::new();
|
||||
let mut queued_count = 0;
|
||||
let mut total_estimated_time = 0.0;
|
||||
|
||||
for doc in documents {
|
||||
let priority = calculate_priority(doc.file_size, request.priority_override);
|
||||
|
||||
let mut doc_info = OcrRetryDocumentInfo {
|
||||
id: doc.id,
|
||||
filename: doc.filename.clone(),
|
||||
file_size: doc.file_size,
|
||||
mime_type: doc.mime_type,
|
||||
ocr_failure_reason: doc.ocr_failure_reason,
|
||||
priority,
|
||||
queue_id: None,
|
||||
};
|
||||
|
||||
if !preview_only {
|
||||
// Reset OCR fields
|
||||
if let Err(e) = reset_document_ocr_status(&state, doc.id).await {
|
||||
warn!("Failed to reset OCR status for document {}: {}", doc.id, e);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Queue for OCR
|
||||
match state.queue_service.enqueue_document(doc.id, priority, doc.file_size).await {
|
||||
Ok(queue_id) => {
|
||||
doc_info.queue_id = Some(queue_id);
|
||||
queued_count += 1;
|
||||
|
||||
// Record retry history
|
||||
let retry_reason = match &request.mode {
|
||||
SelectionMode::All => "bulk_retry_all",
|
||||
SelectionMode::Specific => "bulk_retry_specific",
|
||||
SelectionMode::Filter => "bulk_retry_filtered",
|
||||
};
|
||||
|
||||
if let Err(e) = crate::db::ocr_retry::record_ocr_retry(
|
||||
state.db.get_pool(),
|
||||
doc.id,
|
||||
auth_user.user.id,
|
||||
retry_reason,
|
||||
priority,
|
||||
Some(queue_id),
|
||||
).await {
|
||||
warn!("Failed to record retry history for document {}: {}", doc.id, e);
|
||||
}
|
||||
|
||||
info!("Queued document {} for OCR retry with priority {}", doc.id, priority);
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to queue document {} for OCR retry: {}", doc.id, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Estimate processing time (2 seconds per MB as rough estimate)
|
||||
total_estimated_time += (doc.file_size as f64 / 1_048_576.0) * 2.0;
|
||||
retry_documents.push(doc_info);
|
||||
}
|
||||
|
||||
let response = BulkOcrRetryResponse {
|
||||
success: true,
|
||||
message: if preview_only {
|
||||
format!("Preview: {} documents would be queued for OCR retry", matched_count)
|
||||
} else {
|
||||
format!("Successfully queued {} out of {} documents for OCR retry", queued_count, matched_count)
|
||||
},
|
||||
queued_count,
|
||||
matched_count,
|
||||
documents: retry_documents,
|
||||
estimated_total_time_minutes: total_estimated_time / 60.0,
|
||||
};
|
||||
|
||||
Ok(Json(response))
|
||||
}
|
||||
|
||||
/// Get retry history for a specific document
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "/api/documents/{id}/ocr/retry-history",
|
||||
tag = "documents",
|
||||
security(
|
||||
("bearer_auth" = [])
|
||||
),
|
||||
params(
|
||||
("id" = Uuid, Path, description = "Document ID")
|
||||
),
|
||||
responses(
|
||||
(status = 200, description = "OCR retry history", body = String),
|
||||
(status = 401, description = "Unauthorized"),
|
||||
(status = 404, description = "Document not found")
|
||||
)
|
||||
)]
|
||||
pub async fn get_document_retry_history(
|
||||
State(state): State<Arc<AppState>>,
|
||||
auth_user: AuthUser,
|
||||
Path(document_id): Path<Uuid>,
|
||||
) -> Result<Json<serde_json::Value>, StatusCode> {
|
||||
// Check if document exists and belongs to user
|
||||
let doc_exists = sqlx::query(
|
||||
r#"
|
||||
SELECT 1 FROM documents
|
||||
WHERE id = $1
|
||||
AND ($2::uuid IS NULL OR user_id = $2)
|
||||
"#
|
||||
)
|
||||
.bind(document_id)
|
||||
.bind(if auth_user.user.role == UserRole::Admin { None } else { Some(auth_user.user.id) })
|
||||
.fetch_optional(state.db.get_pool())
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
if doc_exists.is_none() {
|
||||
return Err(StatusCode::NOT_FOUND);
|
||||
}
|
||||
|
||||
let history = crate::db::ocr_retry::get_document_retry_history(state.db.get_pool(), document_id)
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
let history_items: Vec<serde_json::Value> = history.into_iter()
|
||||
.map(|h| {
|
||||
serde_json::json!({
|
||||
"id": h.id,
|
||||
"retry_reason": h.retry_reason,
|
||||
"previous_status": h.previous_status,
|
||||
"previous_failure_reason": h.previous_failure_reason,
|
||||
"previous_error": h.previous_error,
|
||||
"priority": h.priority,
|
||||
"queue_id": h.queue_id,
|
||||
"created_at": h.created_at,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(Json(serde_json::json!({
|
||||
"document_id": document_id,
|
||||
"retry_history": history_items,
|
||||
"total_retries": history_items.len(),
|
||||
})))
|
||||
}
|
||||
|
||||
/// Get OCR retry statistics
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "/api/documents/ocr/retry-stats",
|
||||
tag = "documents",
|
||||
security(
|
||||
("bearer_auth" = [])
|
||||
),
|
||||
responses(
|
||||
(status = 200, description = "OCR retry statistics", body = String),
|
||||
(status = 401, description = "Unauthorized")
|
||||
)
|
||||
)]
|
||||
pub async fn get_ocr_retry_stats(
|
||||
State(state): State<Arc<AppState>>,
|
||||
auth_user: AuthUser,
|
||||
) -> Result<Json<serde_json::Value>, StatusCode> {
|
||||
let user_filter = if auth_user.user.role == UserRole::Admin {
|
||||
None
|
||||
} else {
|
||||
Some(auth_user.user.id)
|
||||
};
|
||||
|
||||
// Get statistics by failure reason
|
||||
let failure_stats = sqlx::query(
|
||||
r#"
|
||||
SELECT
|
||||
ocr_failure_reason,
|
||||
COUNT(*) as count,
|
||||
AVG(file_size) as avg_file_size,
|
||||
MIN(created_at) as first_occurrence,
|
||||
MAX(updated_at) as last_occurrence
|
||||
FROM documents
|
||||
WHERE ocr_status = 'failed'
|
||||
AND ($1::uuid IS NULL OR user_id = $1)
|
||||
GROUP BY ocr_failure_reason
|
||||
ORDER BY count DESC
|
||||
"#
|
||||
)
|
||||
.bind(user_filter)
|
||||
.fetch_all(state.db.get_pool())
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
// Get statistics by file type
|
||||
let type_stats = sqlx::query(
|
||||
r#"
|
||||
SELECT
|
||||
mime_type,
|
||||
COUNT(*) as count,
|
||||
AVG(file_size) as avg_file_size
|
||||
FROM documents
|
||||
WHERE ocr_status = 'failed'
|
||||
AND ($1::uuid IS NULL OR user_id = $1)
|
||||
GROUP BY mime_type
|
||||
ORDER BY count DESC
|
||||
"#
|
||||
)
|
||||
.bind(user_filter)
|
||||
.fetch_all(state.db.get_pool())
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
let failure_reasons: Vec<serde_json::Value> = failure_stats.into_iter()
|
||||
.map(|row| {
|
||||
// Handle NUMERIC type from database by trying different types
|
||||
let avg_file_size_mb = if let Ok(val) = row.try_get::<f64, _>("avg_file_size") {
|
||||
val / 1_048_576.0
|
||||
} else if let Ok(val) = row.try_get::<i64, _>("avg_file_size") {
|
||||
val as f64 / 1_048_576.0
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
serde_json::json!({
|
||||
"reason": row.get::<Option<String>, _>("ocr_failure_reason").unwrap_or_else(|| "unknown".to_string()),
|
||||
"count": row.get::<i64, _>("count"),
|
||||
"avg_file_size_mb": avg_file_size_mb,
|
||||
"first_occurrence": row.get::<chrono::DateTime<chrono::Utc>, _>("first_occurrence"),
|
||||
"last_occurrence": row.get::<chrono::DateTime<chrono::Utc>, _>("last_occurrence"),
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
let file_types: Vec<serde_json::Value> = type_stats.into_iter()
|
||||
.map(|row| {
|
||||
// Handle NUMERIC type from database by trying different types
|
||||
let avg_file_size_mb = if let Ok(val) = row.try_get::<f64, _>("avg_file_size") {
|
||||
val / 1_048_576.0
|
||||
} else if let Ok(val) = row.try_get::<i64, _>("avg_file_size") {
|
||||
val as f64 / 1_048_576.0
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
serde_json::json!({
|
||||
"mime_type": row.get::<String, _>("mime_type"),
|
||||
"count": row.get::<i64, _>("count"),
|
||||
"avg_file_size_mb": avg_file_size_mb,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(Json(serde_json::json!({
|
||||
"failure_reasons": failure_reasons,
|
||||
"file_types": file_types,
|
||||
"total_failed": failure_reasons.iter().map(|r| r["count"].as_i64().unwrap_or(0)).sum::<i64>(),
|
||||
})))
|
||||
}
|
||||
|
||||
/// Get intelligent retry recommendations based on failure patterns
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "/api/documents/ocr/retry-recommendations",
|
||||
tag = "documents",
|
||||
security(
|
||||
("bearer_auth" = [])
|
||||
),
|
||||
responses(
|
||||
(status = 200, description = "OCR retry recommendations", body = String),
|
||||
(status = 401, description = "Unauthorized")
|
||||
)
|
||||
)]
|
||||
pub async fn get_retry_recommendations(
|
||||
State(state): State<Arc<AppState>>,
|
||||
auth_user: AuthUser,
|
||||
) -> Result<Json<serde_json::Value>, StatusCode> {
|
||||
let retry_service = crate::services::ocr_retry_service::OcrRetryService::new(state);
|
||||
|
||||
let recommendations = retry_service.get_retry_recommendations(auth_user.user.id)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
error!("Failed to get retry recommendations: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
let recommendations_json: Vec<serde_json::Value> = recommendations.into_iter()
|
||||
.map(|rec| {
|
||||
serde_json::json!({
|
||||
"reason": rec.reason,
|
||||
"title": rec.title,
|
||||
"description": rec.description,
|
||||
"estimated_success_rate": rec.estimated_success_rate,
|
||||
"document_count": rec.document_count,
|
||||
"filter": rec.filter,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(Json(serde_json::json!({
|
||||
"recommendations": recommendations_json,
|
||||
"total_recommendations": recommendations_json.len(),
|
||||
})))
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
|
||||
async fn get_all_failed_ocr_documents(
|
||||
state: &Arc<AppState>,
|
||||
auth_user: &AuthUser
|
||||
) -> Result<Vec<DocumentInfo>, StatusCode> {
|
||||
let user_filter = if auth_user.user.role == UserRole::Admin {
|
||||
None
|
||||
} else {
|
||||
Some(auth_user.user.id)
|
||||
};
|
||||
|
||||
let documents = sqlx::query_as::<_, DocumentInfo>(
|
||||
r#"
|
||||
SELECT id, filename, file_size, mime_type, ocr_failure_reason
|
||||
FROM documents
|
||||
WHERE ocr_status = 'failed'
|
||||
AND ($1::uuid IS NULL OR user_id = $1)
|
||||
ORDER BY created_at DESC
|
||||
"#
|
||||
)
|
||||
.bind(user_filter)
|
||||
.fetch_all(state.db.get_pool())
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
Ok(documents)
|
||||
}
|
||||
|
||||
async fn get_specific_documents(
|
||||
state: &Arc<AppState>,
|
||||
auth_user: &AuthUser,
|
||||
document_ids: Vec<Uuid>
|
||||
) -> Result<Vec<DocumentInfo>, StatusCode> {
|
||||
let user_filter = if auth_user.user.role == UserRole::Admin {
|
||||
None
|
||||
} else {
|
||||
Some(auth_user.user.id)
|
||||
};
|
||||
|
||||
let documents = sqlx::query_as::<_, DocumentInfo>(
|
||||
r#"
|
||||
SELECT id, filename, file_size, mime_type, ocr_failure_reason
|
||||
FROM documents
|
||||
WHERE id = ANY($1)
|
||||
AND ocr_status = 'failed'
|
||||
AND ($2::uuid IS NULL OR user_id = $2)
|
||||
"#
|
||||
)
|
||||
.bind(&document_ids)
|
||||
.bind(user_filter)
|
||||
.fetch_all(state.db.get_pool())
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
Ok(documents)
|
||||
}
|
||||
|
||||
async fn get_filtered_documents(
|
||||
state: &Arc<AppState>,
|
||||
auth_user: &AuthUser,
|
||||
filter: OcrRetryFilter
|
||||
) -> Result<Vec<DocumentInfo>, StatusCode> {
|
||||
let mut query = sqlx::QueryBuilder::new(
|
||||
"SELECT id, filename, file_size, mime_type, ocr_failure_reason FROM documents WHERE ocr_status = 'failed'"
|
||||
);
|
||||
|
||||
// User filter
|
||||
if auth_user.user.role != UserRole::Admin {
|
||||
query.push(" AND user_id = ");
|
||||
query.push_bind(auth_user.user.id);
|
||||
}
|
||||
|
||||
// MIME type filter
|
||||
if let Some(mime_types) = &filter.mime_types {
|
||||
if !mime_types.is_empty() {
|
||||
query.push(" AND mime_type = ANY(");
|
||||
query.push_bind(mime_types);
|
||||
query.push(")");
|
||||
}
|
||||
}
|
||||
|
||||
// File extension filter
|
||||
if let Some(extensions) = &filter.file_extensions {
|
||||
if !extensions.is_empty() {
|
||||
query.push(" AND (");
|
||||
for (i, ext) in extensions.iter().enumerate() {
|
||||
if i > 0 {
|
||||
query.push(" OR ");
|
||||
}
|
||||
query.push("filename ILIKE ");
|
||||
query.push_bind(format!("%.{}", ext));
|
||||
}
|
||||
query.push(")");
|
||||
}
|
||||
}
|
||||
|
||||
// Failure reason filter
|
||||
if let Some(reasons) = &filter.failure_reasons {
|
||||
if !reasons.is_empty() {
|
||||
query.push(" AND ocr_failure_reason = ANY(");
|
||||
query.push_bind(reasons);
|
||||
query.push(")");
|
||||
}
|
||||
}
|
||||
|
||||
// File size filters
|
||||
if let Some(min_size) = filter.min_file_size {
|
||||
query.push(" AND file_size >= ");
|
||||
query.push_bind(min_size);
|
||||
}
|
||||
|
||||
if let Some(max_size) = filter.max_file_size {
|
||||
query.push(" AND file_size <= ");
|
||||
query.push_bind(max_size);
|
||||
}
|
||||
|
||||
// Date filters
|
||||
if let Some(created_after) = filter.created_after {
|
||||
query.push(" AND created_at >= ");
|
||||
query.push_bind(created_after);
|
||||
}
|
||||
|
||||
if let Some(created_before) = filter.created_before {
|
||||
query.push(" AND created_at <= ");
|
||||
query.push_bind(created_before);
|
||||
}
|
||||
|
||||
// Tag filter
|
||||
if let Some(tags) = &filter.tags {
|
||||
if !tags.is_empty() {
|
||||
query.push(" AND tags && ");
|
||||
query.push_bind(tags);
|
||||
}
|
||||
}
|
||||
|
||||
// Order and limit
|
||||
query.push(" ORDER BY created_at DESC");
|
||||
|
||||
if let Some(limit) = filter.limit {
|
||||
query.push(" LIMIT ");
|
||||
query.push_bind(limit);
|
||||
}
|
||||
|
||||
let documents = query.build_query_as::<DocumentInfo>()
|
||||
.fetch_all(state.db.get_pool())
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
Ok(documents)
|
||||
}
|
||||
|
||||
async fn reset_document_ocr_status(state: &Arc<AppState>, document_id: Uuid) -> Result<(), anyhow::Error> {
|
||||
sqlx::query(
|
||||
r#"
|
||||
UPDATE documents
|
||||
SET ocr_status = 'pending',
|
||||
ocr_text = NULL,
|
||||
ocr_error = NULL,
|
||||
ocr_failure_reason = NULL,
|
||||
ocr_confidence = NULL,
|
||||
ocr_word_count = NULL,
|
||||
ocr_processing_time_ms = NULL,
|
||||
ocr_completed_at = NULL,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
"#
|
||||
)
|
||||
.bind(document_id)
|
||||
.execute(state.db.get_pool())
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn calculate_priority(file_size: i64, override_priority: Option<i32>) -> i32 {
|
||||
if let Some(priority) = override_priority {
|
||||
return priority.clamp(1, 20);
|
||||
}
|
||||
|
||||
match file_size {
|
||||
0..=1048576 => 15, // <= 1MB: highest priority
|
||||
..=5242880 => 12, // 1-5MB: high priority
|
||||
..=10485760 => 10, // 5-10MB: medium priority
|
||||
..=52428800 => 8, // 10-50MB: low priority
|
||||
_ => 6, // > 50MB: lowest priority
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, sqlx::FromRow)]
|
||||
struct DocumentInfo {
|
||||
id: Uuid,
|
||||
filename: String,
|
||||
file_size: i64,
|
||||
mime_type: String,
|
||||
ocr_failure_reason: Option<String>,
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
pub mod auth;
|
||||
pub mod documents;
|
||||
pub mod documents_ocr_retry;
|
||||
pub mod ignored_files;
|
||||
pub mod labels;
|
||||
pub mod metrics;
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
pub mod file_service;
|
||||
pub mod local_folder_service;
|
||||
pub mod ocr_retry_service;
|
||||
pub mod s3_service;
|
||||
pub mod s3_service_stub;
|
||||
pub mod webdav_service;
|
||||
365
src/services/ocr_retry_service.rs
Normal file
365
src/services/ocr_retry_service.rs
Normal file
@@ -0,0 +1,365 @@
|
||||
use anyhow::Result;
|
||||
use std::sync::Arc;
|
||||
use uuid::Uuid;
|
||||
use tracing::{info, warn, error};
|
||||
|
||||
use crate::{
|
||||
AppState,
|
||||
routes::documents_ocr_retry::OcrRetryFilter,
|
||||
};
|
||||
use sqlx::Row;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct OcrRetryService {
|
||||
state: Arc<AppState>,
|
||||
}
|
||||
|
||||
impl OcrRetryService {
|
||||
pub fn new(state: Arc<AppState>) -> Self {
|
||||
Self { state }
|
||||
}
|
||||
|
||||
/// Retry OCR for all failed documents for a user
|
||||
pub async fn retry_all_failed(&self, user_id: Uuid, priority_override: Option<i32>) -> Result<RetryResult> {
|
||||
info!("Starting bulk retry for all failed OCR documents for user {}", user_id);
|
||||
|
||||
let documents = self.get_all_failed_documents(user_id).await?;
|
||||
let retry_result = self.process_documents_for_retry(
|
||||
documents,
|
||||
user_id,
|
||||
"bulk_retry_all",
|
||||
priority_override
|
||||
).await?;
|
||||
|
||||
info!("Bulk retry completed: {} out of {} documents queued",
|
||||
retry_result.queued_count, retry_result.matched_count);
|
||||
|
||||
Ok(retry_result)
|
||||
}
|
||||
|
||||
/// Retry OCR for documents matching specific criteria
|
||||
pub async fn retry_by_criteria(&self, user_id: Uuid, filter: OcrRetryFilter, priority_override: Option<i32>) -> Result<RetryResult> {
|
||||
info!("Starting filtered retry for user {} with criteria: mime_types={:?}, failure_reasons={:?}",
|
||||
user_id, filter.mime_types, filter.failure_reasons);
|
||||
|
||||
let documents = self.get_filtered_documents(user_id, filter).await?;
|
||||
let retry_result = self.process_documents_for_retry(
|
||||
documents,
|
||||
user_id,
|
||||
"bulk_retry_filtered",
|
||||
priority_override
|
||||
).await?;
|
||||
|
||||
info!("Filtered retry completed: {} out of {} documents queued",
|
||||
retry_result.queued_count, retry_result.matched_count);
|
||||
|
||||
Ok(retry_result)
|
||||
}
|
||||
|
||||
/// Retry OCR for specific document IDs
|
||||
pub async fn retry_specific_documents(&self, user_id: Uuid, document_ids: Vec<Uuid>, priority_override: Option<i32>) -> Result<RetryResult> {
|
||||
info!("Starting specific document retry for user {} with {} documents", user_id, document_ids.len());
|
||||
|
||||
let documents = self.get_specific_documents(user_id, document_ids).await?;
|
||||
let retry_result = self.process_documents_for_retry(
|
||||
documents,
|
||||
user_id,
|
||||
"bulk_retry_specific",
|
||||
priority_override
|
||||
).await?;
|
||||
|
||||
info!("Specific document retry completed: {} out of {} documents queued",
|
||||
retry_result.queued_count, retry_result.matched_count);
|
||||
|
||||
Ok(retry_result)
|
||||
}
|
||||
|
||||
/// Get retry recommendations based on failure patterns
|
||||
pub async fn get_retry_recommendations(&self, user_id: Uuid) -> Result<Vec<RetryRecommendation>> {
|
||||
let mut recommendations = Vec::new();
|
||||
|
||||
// Get failure statistics
|
||||
let failure_stats = self.get_failure_statistics(user_id).await?;
|
||||
|
||||
// Recommend retrying recent font encoding errors (often transient)
|
||||
if let Some(font_errors) = failure_stats.iter().find(|s| s.reason.contains("font_encoding")) {
|
||||
if font_errors.count > 0 && font_errors.recent_failures > 0 {
|
||||
recommendations.push(RetryRecommendation {
|
||||
reason: "pdf_font_encoding".to_string(),
|
||||
title: "Font Encoding Errors".to_string(),
|
||||
description: "These PDF files failed due to font encoding issues. Recent OCR improvements may resolve these.".to_string(),
|
||||
estimated_success_rate: 0.7,
|
||||
document_count: font_errors.count,
|
||||
filter: OcrRetryFilter {
|
||||
failure_reasons: Some(vec!["pdf_font_encoding".to_string()]),
|
||||
..Default::default()
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Recommend retrying corrupted files with smaller size (might be fixed)
|
||||
if let Some(corruption_errors) = failure_stats.iter().find(|s| s.reason.contains("corruption")) {
|
||||
if corruption_errors.count > 0 && corruption_errors.avg_file_size_mb < 10.0 {
|
||||
recommendations.push(RetryRecommendation {
|
||||
reason: "pdf_corruption".to_string(),
|
||||
title: "Small Corrupted Files".to_string(),
|
||||
description: "These smaller PDF files failed due to corruption. They may succeed with updated parsing logic.".to_string(),
|
||||
estimated_success_rate: 0.5,
|
||||
document_count: corruption_errors.count,
|
||||
filter: OcrRetryFilter {
|
||||
failure_reasons: Some(vec!["pdf_corruption".to_string()]),
|
||||
max_file_size: Some(10 * 1024 * 1024), // 10MB
|
||||
..Default::default()
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Recommend retrying timeout errors with higher priority
|
||||
if let Some(timeout_errors) = failure_stats.iter().find(|s| s.reason.contains("timeout")) {
|
||||
if timeout_errors.count > 0 {
|
||||
recommendations.push(RetryRecommendation {
|
||||
reason: "ocr_timeout".to_string(),
|
||||
title: "Timeout Errors".to_string(),
|
||||
description: "These files timed out during processing. Retrying with higher priority may help.".to_string(),
|
||||
estimated_success_rate: 0.8,
|
||||
document_count: timeout_errors.count,
|
||||
filter: OcrRetryFilter {
|
||||
failure_reasons: Some(vec!["ocr_timeout".to_string()]),
|
||||
..Default::default()
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(recommendations)
|
||||
}
|
||||
|
||||
// Helper methods
|
||||
|
||||
async fn get_all_failed_documents(&self, user_id: Uuid) -> Result<Vec<crate::db::ocr_retry::EligibleDocument>> {
|
||||
let user_filter = if self.is_admin(user_id).await? { None } else { Some(user_id) };
|
||||
|
||||
crate::db::ocr_retry::get_eligible_documents_for_retry(
|
||||
self.state.db.get_pool(),
|
||||
user_filter,
|
||||
None, // No MIME type filter
|
||||
None, // No failure reason filter
|
||||
Some(5), // Max 5 retries
|
||||
None, // No limit
|
||||
).await
|
||||
}
|
||||
|
||||
async fn get_filtered_documents(&self, user_id: Uuid, filter: OcrRetryFilter) -> Result<Vec<crate::db::ocr_retry::EligibleDocument>> {
|
||||
let user_filter = if self.is_admin(user_id).await? { None } else { Some(user_id) };
|
||||
|
||||
crate::db::ocr_retry::get_eligible_documents_for_retry(
|
||||
self.state.db.get_pool(),
|
||||
user_filter,
|
||||
filter.mime_types.as_deref(),
|
||||
filter.failure_reasons.as_deref(),
|
||||
Some(5), // Max 5 retries
|
||||
filter.limit,
|
||||
).await
|
||||
}
|
||||
|
||||
async fn get_specific_documents(&self, user_id: Uuid, document_ids: Vec<Uuid>) -> Result<Vec<crate::db::ocr_retry::EligibleDocument>> {
|
||||
let user_filter = if self.is_admin(user_id).await? { None } else { Some(user_id) };
|
||||
|
||||
let documents = sqlx::query_as::<_, crate::db::ocr_retry::EligibleDocument>(
|
||||
r#"
|
||||
SELECT id, filename, file_size, mime_type, ocr_failure_reason, ocr_retry_count, created_at, updated_at
|
||||
FROM documents
|
||||
WHERE id = ANY($1)
|
||||
AND ocr_status = 'failed'
|
||||
AND ($2::uuid IS NULL OR user_id = $2)
|
||||
"#
|
||||
)
|
||||
.bind(&document_ids)
|
||||
.bind(user_filter)
|
||||
.fetch_all(self.state.db.get_pool())
|
||||
.await?;
|
||||
|
||||
Ok(documents)
|
||||
}
|
||||
|
||||
async fn process_documents_for_retry(
|
||||
&self,
|
||||
documents: Vec<crate::db::ocr_retry::EligibleDocument>,
|
||||
user_id: Uuid,
|
||||
retry_reason: &str,
|
||||
priority_override: Option<i32>
|
||||
) -> Result<RetryResult> {
|
||||
let mut queued_count = 0;
|
||||
let matched_count = documents.len();
|
||||
|
||||
for doc in documents {
|
||||
let priority = self.calculate_priority(doc.file_size, priority_override);
|
||||
|
||||
// Reset OCR status
|
||||
if let Err(e) = self.reset_document_ocr_status(doc.id).await {
|
||||
warn!("Failed to reset OCR status for document {}: {}", doc.id, e);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Queue for OCR
|
||||
match self.state.queue_service.enqueue_document(doc.id, priority, doc.file_size).await {
|
||||
Ok(queue_id) => {
|
||||
// Record retry history
|
||||
if let Err(e) = crate::db::ocr_retry::record_ocr_retry(
|
||||
self.state.db.get_pool(),
|
||||
doc.id,
|
||||
user_id,
|
||||
retry_reason,
|
||||
priority,
|
||||
Some(queue_id),
|
||||
).await {
|
||||
warn!("Failed to record retry history for document {}: {}", doc.id, e);
|
||||
}
|
||||
|
||||
queued_count += 1;
|
||||
info!("Queued document {} for OCR retry with priority {}", doc.id, priority);
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to queue document {} for OCR retry: {}", doc.id, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(RetryResult {
|
||||
queued_count,
|
||||
matched_count,
|
||||
})
|
||||
}
|
||||
|
||||
async fn reset_document_ocr_status(&self, document_id: Uuid) -> Result<()> {
|
||||
sqlx::query(
|
||||
r#"
|
||||
UPDATE documents
|
||||
SET ocr_status = 'pending',
|
||||
ocr_text = NULL,
|
||||
ocr_error = NULL,
|
||||
ocr_failure_reason = NULL,
|
||||
ocr_confidence = NULL,
|
||||
ocr_word_count = NULL,
|
||||
ocr_processing_time_ms = NULL,
|
||||
ocr_completed_at = NULL,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
"#
|
||||
)
|
||||
.bind(document_id)
|
||||
.execute(self.state.db.get_pool())
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn calculate_priority(&self, file_size: i64, override_priority: Option<i32>) -> i32 {
|
||||
if let Some(priority) = override_priority {
|
||||
return priority.clamp(1, 20);
|
||||
}
|
||||
|
||||
match file_size {
|
||||
0..=1048576 => 15, // <= 1MB: highest priority
|
||||
..=5242880 => 12, // 1-5MB: high priority
|
||||
..=10485760 => 10, // 5-10MB: medium priority
|
||||
..=52428800 => 8, // 10-50MB: low priority
|
||||
_ => 6, // > 50MB: lowest priority
|
||||
}
|
||||
}
|
||||
|
||||
async fn is_admin(&self, user_id: Uuid) -> Result<bool> {
|
||||
let role: Option<String> = sqlx::query_scalar(
|
||||
"SELECT role FROM users WHERE id = $1"
|
||||
)
|
||||
.bind(user_id)
|
||||
.fetch_optional(self.state.db.get_pool())
|
||||
.await?;
|
||||
|
||||
Ok(role.as_deref() == Some("admin"))
|
||||
}
|
||||
|
||||
async fn get_failure_statistics(&self, user_id: Uuid) -> Result<Vec<FailureStatistic>> {
|
||||
let user_filter = if self.is_admin(user_id).await? { None } else { Some(user_id) };
|
||||
|
||||
let stats = sqlx::query(
|
||||
r#"
|
||||
SELECT
|
||||
COALESCE(ocr_failure_reason, 'unknown') as reason,
|
||||
COUNT(*) as count,
|
||||
AVG(file_size) as avg_file_size,
|
||||
COUNT(*) FILTER (WHERE updated_at > NOW() - INTERVAL '7 days') as recent_failures
|
||||
FROM documents
|
||||
WHERE ocr_status = 'failed'
|
||||
AND ($1::uuid IS NULL OR user_id = $1)
|
||||
GROUP BY ocr_failure_reason
|
||||
ORDER BY count DESC
|
||||
"#
|
||||
)
|
||||
.bind(user_filter)
|
||||
.fetch_all(self.state.db.get_pool())
|
||||
.await?;
|
||||
|
||||
let statistics: Vec<FailureStatistic> = stats.into_iter()
|
||||
.map(|row| FailureStatistic {
|
||||
reason: row.get::<String, _>("reason"),
|
||||
count: row.get::<i64, _>("count"),
|
||||
avg_file_size_mb: {
|
||||
// Handle NUMERIC type from database by trying different types
|
||||
if let Ok(val) = row.try_get::<f64, _>("avg_file_size") {
|
||||
val / 1_048_576.0
|
||||
} else if let Ok(val) = row.try_get::<i64, _>("avg_file_size") {
|
||||
val as f64 / 1_048_576.0
|
||||
} else {
|
||||
0.0
|
||||
}
|
||||
},
|
||||
recent_failures: row.get::<i64, _>("recent_failures"),
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(statistics)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RetryResult {
|
||||
pub queued_count: usize,
|
||||
pub matched_count: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RetryRecommendation {
|
||||
pub reason: String,
|
||||
pub title: String,
|
||||
pub description: String,
|
||||
pub estimated_success_rate: f64,
|
||||
pub document_count: i64,
|
||||
pub filter: OcrRetryFilter,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct FailureStatistic {
|
||||
reason: String,
|
||||
count: i64,
|
||||
avg_file_size_mb: f64,
|
||||
recent_failures: i64,
|
||||
}
|
||||
|
||||
impl Default for OcrRetryFilter {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
mime_types: None,
|
||||
file_extensions: None,
|
||||
failure_reasons: None,
|
||||
min_file_size: None,
|
||||
max_file_size: None,
|
||||
created_after: None,
|
||||
created_before: None,
|
||||
tags: None,
|
||||
limit: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -20,4 +20,5 @@ mod generic_migration_tests;
|
||||
mod migration_constraint_tests;
|
||||
mod migration_integration_tests;
|
||||
mod failed_documents_unit_tests;
|
||||
mod document_response_serialization_tests;
|
||||
mod document_response_serialization_tests;
|
||||
mod unit_ocr_retry_db_tests_simple;
|
||||
|
||||
65
src/tests/unit_ocr_retry_db_tests_simple.rs
Normal file
65
src/tests/unit_ocr_retry_db_tests_simple.rs
Normal file
@@ -0,0 +1,65 @@
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::db::ocr_retry::*;
|
||||
use sqlx::{PgPool, Row};
|
||||
use testcontainers::{runners::AsyncRunner, ContainerAsync};
|
||||
use testcontainers_modules::postgres::Postgres;
|
||||
use uuid::Uuid;
|
||||
|
||||
async fn setup_test_db() -> (ContainerAsync<Postgres>, PgPool) {
|
||||
let postgres_image = Postgres::default();
|
||||
let container = postgres_image.start().await.expect("Failed to start postgres container");
|
||||
let port = container.get_host_port_ipv4(5432).await.expect("Failed to get postgres port");
|
||||
|
||||
let connection_string = format!(
|
||||
"postgres://postgres:postgres@127.0.0.1:{}/postgres",
|
||||
port
|
||||
);
|
||||
|
||||
let pool = PgPool::connect(&connection_string).await.expect("Failed to connect to test database");
|
||||
sqlx::migrate!("./migrations").run(&pool).await.expect("Failed to run migrations");
|
||||
|
||||
(container, pool)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_simple_retry_record() {
|
||||
let (_container, pool) = setup_test_db().await;
|
||||
|
||||
// Create a simple test document entry first
|
||||
let doc_id = Uuid::new_v4();
|
||||
let user_id = Uuid::new_v4();
|
||||
|
||||
sqlx::query("INSERT INTO users (id, username, email, password_hash) VALUES ($1, 'test', 'test@test.com', 'test')")
|
||||
.bind(user_id)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.expect("Failed to create test user");
|
||||
|
||||
sqlx::query("INSERT INTO documents (id, filename, original_filename, user_id, mime_type, file_size, created_at, updated_at) VALUES ($1, 'test.pdf', 'test.pdf', $2, 'application/pdf', 1024, NOW(), NOW())")
|
||||
.bind(doc_id)
|
||||
.bind(user_id)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.expect("Failed to create test document");
|
||||
|
||||
// Test the record_ocr_retry function
|
||||
let retry_id = record_ocr_retry(
|
||||
&pool,
|
||||
doc_id,
|
||||
user_id,
|
||||
"manual_retry",
|
||||
10,
|
||||
None,
|
||||
).await.expect("Failed to record retry");
|
||||
|
||||
// Verify the retry was recorded
|
||||
let count: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM ocr_retry_history WHERE id = $1")
|
||||
.bind(retry_id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.expect("Failed to count retries");
|
||||
|
||||
assert_eq!(count, 1);
|
||||
}
|
||||
}
|
||||
486
tests/integration_ocr_retry_tests.rs
Normal file
486
tests/integration_ocr_retry_tests.rs
Normal file
@@ -0,0 +1,486 @@
|
||||
use reqwest::Client;
|
||||
use serde_json::{json, Value};
|
||||
use std::time::Duration;
|
||||
use uuid::Uuid;
|
||||
|
||||
use readur::models::{CreateUser, LoginRequest, LoginResponse, UserRole};
|
||||
|
||||
fn get_base_url() -> String {
|
||||
std::env::var("API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string())
|
||||
}
|
||||
|
||||
const TIMEOUT: Duration = Duration::from_secs(60);
|
||||
|
||||
struct OcrRetryTestHelper {
|
||||
client: Client,
|
||||
token: String,
|
||||
}
|
||||
|
||||
impl OcrRetryTestHelper {
|
||||
async fn new() -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
|
||||
let client = Client::new();
|
||||
|
||||
// First check if server is running with better error handling
|
||||
let health_check = client
|
||||
.get(&format!("{}/api/health", get_base_url()))
|
||||
.timeout(Duration::from_secs(10))
|
||||
.send()
|
||||
.await;
|
||||
|
||||
match health_check {
|
||||
Ok(response) => {
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let text = response.text().await.unwrap_or_else(|_| "Unable to read response".to_string());
|
||||
return Err(format!("Health check failed with status {}: {}. Is the server running at {}?", status, text, get_base_url()).into());
|
||||
}
|
||||
println!("✅ Server health check passed at {}", get_base_url());
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("❌ Cannot connect to server at {}: {}", get_base_url(), e);
|
||||
eprintln!("💡 To run integration tests, start the server first:");
|
||||
eprintln!(" cargo run");
|
||||
eprintln!(" Then run tests in another terminal:");
|
||||
eprintln!(" cargo test --test integration_ocr_retry_tests");
|
||||
return Err(format!("Server not reachable: {}", e).into());
|
||||
}
|
||||
}
|
||||
|
||||
// Create a test admin user
|
||||
let test_id = Uuid::new_v4().simple().to_string();
|
||||
let nanos = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_nanos();
|
||||
let username = format!("ocr_retry_admin_{}_{}", test_id, nanos);
|
||||
let email = format!("ocr_retry_admin_{}@{}.example.com", test_id, nanos);
|
||||
let password = "testpassword123";
|
||||
|
||||
// Register admin user
|
||||
let user_data = CreateUser {
|
||||
username: username.clone(),
|
||||
email: email.clone(),
|
||||
password: password.to_string(),
|
||||
role: Some(UserRole::Admin),
|
||||
};
|
||||
|
||||
let register_response = client
|
||||
.post(&format!("{}/api/auth/register", get_base_url()))
|
||||
.json(&user_data)
|
||||
.timeout(TIMEOUT)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if !register_response.status().is_success() {
|
||||
return Err(format!("Registration failed: {}", register_response.text().await?).into());
|
||||
}
|
||||
|
||||
// Login with the new user
|
||||
let login_data = LoginRequest {
|
||||
username: username.clone(),
|
||||
password: password.to_string(),
|
||||
};
|
||||
|
||||
let login_response = client
|
||||
.post(&format!("{}/api/auth/login", get_base_url()))
|
||||
.json(&login_data)
|
||||
.timeout(TIMEOUT)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if !login_response.status().is_success() {
|
||||
return Err(format!("Login failed: {}", login_response.text().await?).into());
|
||||
}
|
||||
|
||||
let login_result: LoginResponse = login_response.json().await?;
|
||||
let token = login_result.token;
|
||||
|
||||
Ok(Self { client, token })
|
||||
}
|
||||
|
||||
fn get_auth_header(&self) -> String {
|
||||
format!("Bearer {}", self.token)
|
||||
}
|
||||
|
||||
async fn get_retry_stats(&self) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
|
||||
let response = self.client
|
||||
.get(&format!("{}/api/documents/ocr/retry-stats", get_base_url()))
|
||||
.header("Authorization", self.get_auth_header())
|
||||
.timeout(TIMEOUT)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
let status = response.status();
|
||||
let response_text = response.text().await?;
|
||||
|
||||
if !status.is_success() {
|
||||
return Err(format!("Failed to get retry stats (status {}): {}", status, response_text).into());
|
||||
}
|
||||
|
||||
// Try to parse the JSON and provide better error messages
|
||||
match serde_json::from_str::<Value>(&response_text) {
|
||||
Ok(result) => Ok(result),
|
||||
Err(e) => {
|
||||
eprintln!("JSON parsing failed for retry stats response:");
|
||||
eprintln!("Status: {}", status);
|
||||
eprintln!("Response text: {}", response_text);
|
||||
Err(format!("Failed to parse JSON response: {}. Raw response: {}", e, response_text).into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_retry_recommendations(&self) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
|
||||
let response = self.client
|
||||
.get(&format!("{}/api/documents/ocr/retry-recommendations", get_base_url()))
|
||||
.header("Authorization", self.get_auth_header())
|
||||
.timeout(TIMEOUT)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
let status = response.status();
|
||||
let response_text = response.text().await?;
|
||||
|
||||
if !status.is_success() {
|
||||
return Err(format!("Failed to get retry recommendations (status {}): {}", status, response_text).into());
|
||||
}
|
||||
|
||||
// Try to parse the JSON and provide better error messages
|
||||
match serde_json::from_str::<Value>(&response_text) {
|
||||
Ok(result) => Ok(result),
|
||||
Err(e) => {
|
||||
eprintln!("JSON parsing failed for retry recommendations response:");
|
||||
eprintln!("Status: {}", status);
|
||||
eprintln!("Response text: {}", response_text);
|
||||
Err(format!("Failed to parse JSON response: {}. Raw response: {}", e, response_text).into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn bulk_retry_ocr(&self, mode: &str, document_ids: Option<Vec<String>>, preview_only: bool) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
|
||||
let mut request_body = json!({
|
||||
"mode": mode,
|
||||
"preview_only": preview_only
|
||||
});
|
||||
|
||||
if let Some(ids) = document_ids {
|
||||
request_body["document_ids"] = json!(ids);
|
||||
}
|
||||
|
||||
let response = self.client
|
||||
.post(&format!("{}/api/documents/ocr/bulk-retry", get_base_url()))
|
||||
.header("Authorization", self.get_auth_header())
|
||||
.json(&request_body)
|
||||
.timeout(TIMEOUT)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
let status = response.status();
|
||||
let response_text = response.text().await?;
|
||||
|
||||
if !status.is_success() {
|
||||
return Err(format!("Failed to bulk retry OCR (status {}): {}", status, response_text).into());
|
||||
}
|
||||
|
||||
// Try to parse the JSON and provide better error messages
|
||||
match serde_json::from_str::<Value>(&response_text) {
|
||||
Ok(result) => Ok(result),
|
||||
Err(e) => {
|
||||
eprintln!("JSON parsing failed for bulk retry response:");
|
||||
eprintln!("Status: {}", status);
|
||||
eprintln!("Response text: {}", response_text);
|
||||
Err(format!("Failed to parse JSON response: {}. Raw response: {}", e, response_text).into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_document_retry_history(&self, document_id: &str) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
|
||||
let response = self.client
|
||||
.get(&format!("{}/api/documents/{}/ocr/retry-history", get_base_url(), document_id))
|
||||
.header("Authorization", self.get_auth_header())
|
||||
.timeout(TIMEOUT)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
return Err(format!("Failed to get retry history: {}", response.text().await?).into());
|
||||
}
|
||||
|
||||
let result: Value = response.json().await?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn get_failed_documents(&self) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
|
||||
let response = self.client
|
||||
.get(&format!("{}/api/documents/failed", get_base_url()))
|
||||
.header("Authorization", self.get_auth_header())
|
||||
.timeout(TIMEOUT)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
return Err(format!("Failed to get failed documents: {}", response.text().await?).into());
|
||||
}
|
||||
|
||||
let result: Value = response.json().await?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn create_failed_test_document(&self) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
|
||||
// Upload a simple text file first
|
||||
let test_content = "This is a test document for OCR retry testing.";
|
||||
let file_part = reqwest::multipart::Part::bytes(test_content.as_bytes())
|
||||
.file_name("test_retry_document.txt")
|
||||
.mime_str("text/plain")?;
|
||||
let form = reqwest::multipart::Form::new()
|
||||
.part("file", file_part);
|
||||
|
||||
let response = self.client
|
||||
.post(&format!("{}/api/documents", get_base_url()))
|
||||
.header("Authorization", self.get_auth_header())
|
||||
.multipart(form)
|
||||
.timeout(TIMEOUT)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
return Err(format!("Failed to upload test document: {}", response.text().await?).into());
|
||||
}
|
||||
|
||||
let upload_result: Value = response.json().await?;
|
||||
let doc_id = upload_result["id"].as_str()
|
||||
.ok_or("No document ID in upload response")?
|
||||
.to_string();
|
||||
|
||||
// Wait a moment for processing
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
|
||||
|
||||
// Manually mark the document as failed via direct database manipulation isn't available,
|
||||
// so we'll just return the document ID and use it for testing the endpoint structure
|
||||
Ok(doc_id)
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_ocr_retry_stats_endpoint() {
|
||||
let helper = match OcrRetryTestHelper::new().await {
|
||||
Ok(h) => h,
|
||||
Err(e) => {
|
||||
println!("⚠️ Skipping OCR retry stats test (setup failed): {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// Test getting retry statistics
|
||||
match helper.get_retry_stats().await {
|
||||
Ok(stats) => {
|
||||
println!("✅ OCR retry stats endpoint working");
|
||||
|
||||
// Verify response structure
|
||||
assert!(stats["failure_reasons"].is_array(), "Should have failure_reasons array");
|
||||
assert!(stats["file_types"].is_array(), "Should have file_types array");
|
||||
assert!(stats["total_failed"].is_number(), "Should have total_failed count");
|
||||
|
||||
println!("📊 Total failed documents: {}", stats["total_failed"]);
|
||||
}
|
||||
Err(e) => {
|
||||
println!("❌ OCR retry stats test failed: {}", e);
|
||||
println!("💡 This might indicate a server issue or missing endpoint implementation");
|
||||
panic!("OCR retry stats endpoint failed: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_ocr_retry_recommendations_endpoint() {
|
||||
let helper = match OcrRetryTestHelper::new().await {
|
||||
Ok(h) => h,
|
||||
Err(e) => {
|
||||
println!("⚠️ Skipping OCR retry recommendations test (setup failed): {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// Test getting retry recommendations
|
||||
match helper.get_retry_recommendations().await {
|
||||
Ok(recommendations) => {
|
||||
println!("✅ OCR retry recommendations endpoint working");
|
||||
|
||||
// Verify response structure
|
||||
assert!(recommendations["recommendations"].is_array(), "Should have recommendations array");
|
||||
assert!(recommendations["total_recommendations"].is_number(), "Should have total count");
|
||||
|
||||
let recs = recommendations["recommendations"].as_array().unwrap();
|
||||
println!("💡 Got {} retry recommendations", recs.len());
|
||||
|
||||
for rec in recs {
|
||||
println!(" - {}: {} documents ({}% success rate)",
|
||||
rec["title"].as_str().unwrap_or("Unknown"),
|
||||
rec["document_count"].as_i64().unwrap_or(0),
|
||||
(rec["estimated_success_rate"].as_f64().unwrap_or(0.0) * 100.0) as i32
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
println!("❌ OCR retry recommendations test failed: {}", e);
|
||||
println!("💡 This might indicate a server issue or missing endpoint implementation");
|
||||
panic!("OCR retry recommendations endpoint failed: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_bulk_retry_preview_mode() {
|
||||
let helper = match OcrRetryTestHelper::new().await {
|
||||
Ok(h) => h,
|
||||
Err(e) => {
|
||||
println!("⚠️ Skipping bulk retry preview test (setup failed): {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// Test preview mode - should not actually queue anything
|
||||
match helper.bulk_retry_ocr("all", None, true).await {
|
||||
Ok(result) => {
|
||||
println!("✅ Bulk retry preview mode working");
|
||||
|
||||
// Verify response structure
|
||||
assert!(result["success"].as_bool().unwrap_or(false), "Should be successful");
|
||||
assert!(result["matched_count"].is_number(), "Should have matched_count");
|
||||
assert!(result["queued_count"].is_number(), "Should have queued_count");
|
||||
assert!(result["documents"].is_array(), "Should have documents array");
|
||||
assert!(result["message"].as_str().unwrap_or("").contains("Preview"), "Should indicate preview mode");
|
||||
|
||||
// In preview mode, queued_count should be 0
|
||||
assert_eq!(result["queued_count"].as_u64().unwrap_or(1), 0, "Preview mode should not queue any documents");
|
||||
|
||||
println!("📋 Preview found {} documents that would be retried", result["matched_count"]);
|
||||
}
|
||||
Err(e) => {
|
||||
println!("❌ Bulk retry preview test failed: {}", e);
|
||||
println!("💡 This might indicate a server issue or missing endpoint implementation");
|
||||
panic!("Bulk retry preview failed: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_document_retry_history() {
|
||||
let helper = match OcrRetryTestHelper::new().await {
|
||||
Ok(h) => h,
|
||||
Err(e) => {
|
||||
println!("⚠️ Skipping retry history test (setup failed): {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// Create a failed document by uploading a file and manually marking it as failed
|
||||
println!("🔄 Creating a test failed document...");
|
||||
|
||||
// First try to create a failed document for testing
|
||||
let doc_id = match helper.create_failed_test_document().await {
|
||||
Ok(id) => {
|
||||
println!("✅ Created test failed document with ID: {}", id);
|
||||
id
|
||||
}
|
||||
Err(e) => {
|
||||
println!("⚠️ Could not create test failed document: {}", e);
|
||||
// Just test the endpoint with a random UUID to verify it doesn't crash
|
||||
let test_uuid = "00000000-0000-0000-0000-000000000000";
|
||||
match helper.get_document_retry_history(test_uuid).await {
|
||||
Ok(_) => {
|
||||
println!("✅ Document retry history endpoint working (with test UUID)");
|
||||
return;
|
||||
}
|
||||
Err(retry_err) => {
|
||||
// A 404 is expected for non-existent document - that's fine
|
||||
if retry_err.to_string().contains("404") {
|
||||
println!("✅ Document retry history endpoint working (404 for non-existent document is expected)");
|
||||
return;
|
||||
} else {
|
||||
println!("❌ Document retry history test failed even with test UUID: {}", retry_err);
|
||||
panic!("Document retry history failed: {}", retry_err);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Test getting retry history for this document
|
||||
match helper.get_document_retry_history(&doc_id).await {
|
||||
Ok(history) => {
|
||||
println!("✅ Document retry history endpoint working");
|
||||
|
||||
// Verify response structure
|
||||
assert!(history["document_id"].is_string(), "Should have document_id");
|
||||
assert!(history["retry_history"].is_array(), "Should have retry_history array");
|
||||
assert!(history["total_retries"].is_number(), "Should have total_retries count");
|
||||
|
||||
println!("📜 Document {} has {} retry attempts",
|
||||
doc_id,
|
||||
history["total_retries"].as_i64().unwrap_or(0)
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
println!("❌ Document retry history test failed: {}", e);
|
||||
println!("💡 This might indicate a server issue or missing endpoint implementation");
|
||||
panic!("Document retry history failed: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_filtered_bulk_retry_preview() {
|
||||
let helper = match OcrRetryTestHelper::new().await {
|
||||
Ok(h) => h,
|
||||
Err(e) => {
|
||||
println!("⚠️ Skipping filtered bulk retry test (setup failed): {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// Test filtered retry with specific criteria
|
||||
let request_body = json!({
|
||||
"mode": "filter",
|
||||
"preview_only": true,
|
||||
"filter": {
|
||||
"mime_types": ["application/pdf"],
|
||||
"max_file_size": 5242880, // 5MB
|
||||
"limit": 10
|
||||
}
|
||||
});
|
||||
|
||||
let response = helper.client
|
||||
.post(&format!("{}/api/documents/ocr/bulk-retry", get_base_url()))
|
||||
.header("Authorization", helper.get_auth_header())
|
||||
.json(&request_body)
|
||||
.timeout(TIMEOUT)
|
||||
.send()
|
||||
.await;
|
||||
|
||||
match response {
|
||||
Ok(res) if res.status().is_success() => {
|
||||
let result: Value = res.json().await.unwrap();
|
||||
println!("✅ Filtered bulk retry preview working");
|
||||
|
||||
// Verify filtering worked
|
||||
let documents = result["documents"].as_array().unwrap();
|
||||
for doc in documents {
|
||||
let mime_type = doc["mime_type"].as_str().unwrap_or("");
|
||||
assert_eq!(mime_type, "application/pdf", "Should only return PDF documents");
|
||||
|
||||
let file_size = doc["file_size"].as_i64().unwrap_or(0);
|
||||
assert!(file_size <= 5242880, "Should only return files <= 5MB");
|
||||
}
|
||||
|
||||
println!("🔍 Filtered preview found {} matching documents", documents.len());
|
||||
}
|
||||
Ok(res) => {
|
||||
let status = res.status();
|
||||
let error_text = res.text().await.unwrap_or_else(|_| "Unknown error".to_string());
|
||||
println!("❌ Filtered bulk retry failed with status {}: {}", status, error_text);
|
||||
}
|
||||
Err(e) => {
|
||||
println!("❌ Filtered bulk retry request failed: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user