mirror of
https://github.com/readur/readur.git
synced 2026-02-07 07:29:34 -06:00
feat(server/client): add lots of OCR tweaks
This commit is contained in:
40
Cargo.lock
generated
40
Cargo.lock
generated
@@ -2621,7 +2621,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"windows-targets 0.53.1",
|
||||
"windows-targets 0.53.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2638,7 +2638,7 @@ checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d"
|
||||
dependencies = [
|
||||
"bitflags 2.9.1",
|
||||
"libc",
|
||||
"redox_syscall 0.5.12",
|
||||
"redox_syscall 0.5.13",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3208,7 +3208,7 @@ checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"redox_syscall 0.5.12",
|
||||
"redox_syscall 0.5.13",
|
||||
"smallvec",
|
||||
"windows-targets 0.52.6",
|
||||
]
|
||||
@@ -3690,9 +3690,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.5.12"
|
||||
version = "0.5.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "928fca9cf2aa042393a8325b9ead81d2f0df4cb12e1e24cef072922ccd99c5af"
|
||||
checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6"
|
||||
dependencies = [
|
||||
"bitflags 2.9.1",
|
||||
]
|
||||
@@ -4362,12 +4362,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "slab"
|
||||
version = "0.4.9"
|
||||
version = "0.4.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
checksum = "04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d"
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
@@ -4800,9 +4797,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tesseract-sys"
|
||||
version = "0.6.2"
|
||||
version = "0.6.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f8dd2bedded009c8252301743af66d77e3912357bacc0e44a51dba54a8679ea5"
|
||||
checksum = "1e1297ece7aa841bd33a4f80046a6682c4e58fca0f8600e868d822359eef7bde"
|
||||
dependencies = [
|
||||
"bindgen 0.64.0",
|
||||
"leptonica-sys",
|
||||
@@ -4890,12 +4887,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "thread_local"
|
||||
version = "1.1.8"
|
||||
version = "1.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c"
|
||||
checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -5566,7 +5562,7 @@ version = "1.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6994d13118ab492c3c80c1f81928718159254c53c472bf9ce36f8dae4add02a7"
|
||||
dependencies = [
|
||||
"redox_syscall 0.5.12",
|
||||
"redox_syscall 0.5.13",
|
||||
"wasite",
|
||||
]
|
||||
|
||||
@@ -5613,9 +5609,9 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
|
||||
[[package]]
|
||||
name = "windows"
|
||||
version = "0.61.1"
|
||||
version = "0.61.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c5ee8f3d025738cb02bad7868bbb5f8a6327501e870bf51f1b455b0a2454a419"
|
||||
checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893"
|
||||
dependencies = [
|
||||
"windows-collections",
|
||||
"windows-core",
|
||||
@@ -5681,9 +5677,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "windows-link"
|
||||
version = "0.1.1"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38"
|
||||
checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
|
||||
|
||||
[[package]]
|
||||
name = "windows-numerics"
|
||||
@@ -5784,9 +5780,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.53.1"
|
||||
version = "0.53.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30357ec391cde730f8fbfcdc29adc47518b06504528df977ab5af02ef23fdee9"
|
||||
checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm 0.53.0",
|
||||
"windows_aarch64_msvc 0.53.0",
|
||||
|
||||
@@ -8,7 +8,7 @@ COPY frontend ./
|
||||
RUN npm run build
|
||||
|
||||
# --- Backend build stage ---
|
||||
FROM rust:1.83-bookworm as backend-builder
|
||||
FROM rust:1.85-bookworm as backend-builder
|
||||
|
||||
# Install system dependencies for OCR
|
||||
RUN apt-get update && apt-get install -y \
|
||||
@@ -56,4 +56,4 @@ COPY --from=frontend-builder /frontend/dist /app/frontend
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
CMD ["./readur"]
|
||||
CMD ["./readur"]
|
||||
|
||||
@@ -33,6 +33,7 @@ import {
|
||||
Visibility as ViewIcon,
|
||||
Search as SearchIcon,
|
||||
Edit as EditIcon,
|
||||
PhotoFilter as ProcessedImageIcon,
|
||||
} from '@mui/icons-material';
|
||||
import { documentService, OcrResponse } from '../services/api';
|
||||
import DocumentViewer from '../components/DocumentViewer';
|
||||
@@ -59,6 +60,9 @@ const DocumentDetailsPage: React.FC = () => {
|
||||
const [showOcrDialog, setShowOcrDialog] = useState<boolean>(false);
|
||||
const [ocrLoading, setOcrLoading] = useState<boolean>(false);
|
||||
const [showViewDialog, setShowViewDialog] = useState<boolean>(false);
|
||||
const [showProcessedImageDialog, setShowProcessedImageDialog] = useState<boolean>(false);
|
||||
const [processedImageUrl, setProcessedImageUrl] = useState<string | null>(null);
|
||||
const [processedImageLoading, setProcessedImageLoading] = useState<boolean>(false);
|
||||
const [thumbnailUrl, setThumbnailUrl] = useState<string | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
@@ -142,6 +146,23 @@ const DocumentDetailsPage: React.FC = () => {
|
||||
}
|
||||
};
|
||||
|
||||
const handleViewProcessedImage = async (): Promise<void> => {
|
||||
if (!document) return;
|
||||
|
||||
setProcessedImageLoading(true);
|
||||
try {
|
||||
const response = await documentService.getProcessedImage(document.id);
|
||||
const url = window.URL.createObjectURL(new Blob([response.data], { type: 'image/png' }));
|
||||
setProcessedImageUrl(url);
|
||||
setShowProcessedImageDialog(true);
|
||||
} catch (err: any) {
|
||||
console.log('Processed image not available:', err);
|
||||
alert('No processed image available for this document. This feature requires "Save Processed Images" to be enabled in OCR settings.');
|
||||
} finally {
|
||||
setProcessedImageLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
const loadThumbnail = async (): Promise<void> => {
|
||||
if (!document) return;
|
||||
|
||||
@@ -302,6 +323,17 @@ const DocumentDetailsPage: React.FC = () => {
|
||||
OCR Text
|
||||
</Button>
|
||||
)}
|
||||
{document.mime_type?.includes('image') && (
|
||||
<Button
|
||||
variant="outlined"
|
||||
startIcon={<ProcessedImageIcon />}
|
||||
onClick={handleViewProcessedImage}
|
||||
disabled={processedImageLoading}
|
||||
sx={{ borderRadius: 2 }}
|
||||
>
|
||||
{processedImageLoading ? 'Loading...' : 'Processed Image'}
|
||||
</Button>
|
||||
)}
|
||||
</Stack>
|
||||
|
||||
{document.has_ocr_text && (
|
||||
@@ -680,6 +712,48 @@ const DocumentDetailsPage: React.FC = () => {
|
||||
</Button>
|
||||
</DialogActions>
|
||||
</Dialog>
|
||||
|
||||
{/* Processed Image Dialog */}
|
||||
<Dialog
|
||||
open={showProcessedImageDialog}
|
||||
onClose={() => setShowProcessedImageDialog(false)}
|
||||
maxWidth="lg"
|
||||
fullWidth
|
||||
>
|
||||
<DialogTitle>
|
||||
Processed Image - OCR Enhancement Applied
|
||||
</DialogTitle>
|
||||
<DialogContent>
|
||||
{processedImageUrl ? (
|
||||
<Box sx={{ textAlign: 'center', py: 2 }}>
|
||||
<img
|
||||
src={processedImageUrl}
|
||||
alt="Processed image that was fed to OCR"
|
||||
style={{
|
||||
maxWidth: '100%',
|
||||
maxHeight: '70vh',
|
||||
objectFit: 'contain',
|
||||
border: '1px solid #ddd',
|
||||
borderRadius: '4px'
|
||||
}}
|
||||
/>
|
||||
<Typography variant="body2" sx={{ mt: 2, color: 'text.secondary' }}>
|
||||
This is the enhanced image that was actually processed by the OCR engine.
|
||||
You can adjust OCR enhancement settings in the Settings page.
|
||||
</Typography>
|
||||
</Box>
|
||||
) : (
|
||||
<Box sx={{ textAlign: 'center', py: 4 }}>
|
||||
<Typography>No processed image available</Typography>
|
||||
</Box>
|
||||
)}
|
||||
</DialogContent>
|
||||
<DialogActions>
|
||||
<Button onClick={() => setShowProcessedImageDialog(false)}>
|
||||
Close
|
||||
</Button>
|
||||
</DialogActions>
|
||||
</Dialog>
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -66,6 +66,31 @@ interface Settings {
|
||||
memoryLimitMb: number;
|
||||
cpuPriority: string;
|
||||
enableBackgroundOcr: boolean;
|
||||
ocrPageSegmentationMode: number;
|
||||
ocrEngineMode: number;
|
||||
ocrMinConfidence: number;
|
||||
ocrDpi: number;
|
||||
ocrEnhanceContrast: boolean;
|
||||
ocrRemoveNoise: boolean;
|
||||
ocrDetectOrientation: boolean;
|
||||
ocrWhitelistChars: string;
|
||||
ocrBlacklistChars: string;
|
||||
ocrBrightnessBoost: number;
|
||||
ocrContrastMultiplier: number;
|
||||
ocrNoiseReductionLevel: number;
|
||||
ocrSharpeningStrength: number;
|
||||
ocrMorphologicalOperations: boolean;
|
||||
ocrAdaptiveThresholdWindowSize: number;
|
||||
ocrHistogramEqualization: boolean;
|
||||
ocrUpscaleFactor: number;
|
||||
ocrMaxImageWidth: number;
|
||||
ocrMaxImageHeight: number;
|
||||
saveProcessedImages: boolean;
|
||||
ocrQualityThresholdBrightness: number;
|
||||
ocrQualityThresholdContrast: number;
|
||||
ocrQualityThresholdNoise: number;
|
||||
ocrQualityThresholdSharpness: number;
|
||||
ocrSkipEnhancement: boolean;
|
||||
webdavEnabled: boolean;
|
||||
webdavServerUrl: string;
|
||||
webdavUsername: string;
|
||||
@@ -830,6 +855,31 @@ const SettingsPage: React.FC = () => {
|
||||
memoryLimitMb: 512,
|
||||
cpuPriority: 'normal',
|
||||
enableBackgroundOcr: true,
|
||||
ocrPageSegmentationMode: 3,
|
||||
ocrEngineMode: 3,
|
||||
ocrMinConfidence: 30.0,
|
||||
ocrDpi: 300,
|
||||
ocrEnhanceContrast: true,
|
||||
ocrRemoveNoise: true,
|
||||
ocrDetectOrientation: true,
|
||||
ocrWhitelistChars: '',
|
||||
ocrBlacklistChars: '',
|
||||
ocrBrightnessBoost: 0.0,
|
||||
ocrContrastMultiplier: 1.0,
|
||||
ocrNoiseReductionLevel: 1,
|
||||
ocrSharpeningStrength: 0.0,
|
||||
ocrMorphologicalOperations: true,
|
||||
ocrAdaptiveThresholdWindowSize: 15,
|
||||
ocrHistogramEqualization: false,
|
||||
ocrUpscaleFactor: 1.0,
|
||||
ocrMaxImageWidth: 10000,
|
||||
ocrMaxImageHeight: 10000,
|
||||
saveProcessedImages: false,
|
||||
ocrQualityThresholdBrightness: 40.0,
|
||||
ocrQualityThresholdContrast: 0.15,
|
||||
ocrQualityThresholdNoise: 0.3,
|
||||
ocrQualityThresholdSharpness: 0.15,
|
||||
ocrSkipEnhancement: false,
|
||||
webdavEnabled: false,
|
||||
webdavServerUrl: '',
|
||||
webdavUsername: '',
|
||||
@@ -900,6 +950,31 @@ const SettingsPage: React.FC = () => {
|
||||
memoryLimitMb: response.data.memory_limit_mb || 512,
|
||||
cpuPriority: response.data.cpu_priority || 'normal',
|
||||
enableBackgroundOcr: response.data.enable_background_ocr !== undefined ? response.data.enable_background_ocr : true,
|
||||
ocrPageSegmentationMode: response.data.ocr_page_segmentation_mode || 3,
|
||||
ocrEngineMode: response.data.ocr_engine_mode || 3,
|
||||
ocrMinConfidence: response.data.ocr_min_confidence || 30.0,
|
||||
ocrDpi: response.data.ocr_dpi || 300,
|
||||
ocrEnhanceContrast: response.data.ocr_enhance_contrast !== undefined ? response.data.ocr_enhance_contrast : true,
|
||||
ocrRemoveNoise: response.data.ocr_remove_noise !== undefined ? response.data.ocr_remove_noise : true,
|
||||
ocrDetectOrientation: response.data.ocr_detect_orientation !== undefined ? response.data.ocr_detect_orientation : true,
|
||||
ocrWhitelistChars: response.data.ocr_whitelist_chars || '',
|
||||
ocrBlacklistChars: response.data.ocr_blacklist_chars || '',
|
||||
ocrBrightnessBoost: response.data.ocr_brightness_boost || 0.0,
|
||||
ocrContrastMultiplier: response.data.ocr_contrast_multiplier || 1.0,
|
||||
ocrNoiseReductionLevel: response.data.ocr_noise_reduction_level || 1,
|
||||
ocrSharpeningStrength: response.data.ocr_sharpening_strength || 0.0,
|
||||
ocrMorphologicalOperations: response.data.ocr_morphological_operations !== undefined ? response.data.ocr_morphological_operations : true,
|
||||
ocrAdaptiveThresholdWindowSize: response.data.ocr_adaptive_threshold_window_size || 15,
|
||||
ocrHistogramEqualization: response.data.ocr_histogram_equalization || false,
|
||||
ocrUpscaleFactor: response.data.ocr_upscale_factor || 1.0,
|
||||
ocrMaxImageWidth: response.data.ocr_max_image_width || 10000,
|
||||
ocrMaxImageHeight: response.data.ocr_max_image_height || 10000,
|
||||
saveProcessedImages: response.data.save_processed_images || false,
|
||||
ocrQualityThresholdBrightness: response.data.ocr_quality_threshold_brightness || 40.0,
|
||||
ocrQualityThresholdContrast: response.data.ocr_quality_threshold_contrast || 0.15,
|
||||
ocrQualityThresholdNoise: response.data.ocr_quality_threshold_noise || 0.3,
|
||||
ocrQualityThresholdSharpness: response.data.ocr_quality_threshold_sharpness || 0.15,
|
||||
ocrSkipEnhancement: response.data.ocr_skip_enhancement || false,
|
||||
webdavEnabled: response.data.webdav_enabled || false,
|
||||
webdavServerUrl: response.data.webdav_server_url || '',
|
||||
webdavUsername: response.data.webdav_username || '',
|
||||
@@ -1042,6 +1117,7 @@ const SettingsPage: React.FC = () => {
|
||||
<Paper sx={{ width: '100%' }}>
|
||||
<Tabs value={tabValue} onChange={handleTabChange} aria-label="settings tabs">
|
||||
<Tab label="General" />
|
||||
<Tab label="OCR Settings" />
|
||||
<Tab label="WebDAV Integration" />
|
||||
<Tab label="User Management" />
|
||||
</Tabs>
|
||||
@@ -1317,6 +1393,242 @@ const SettingsPage: React.FC = () => {
|
||||
)}
|
||||
|
||||
{tabValue === 1 && (
|
||||
<Box>
|
||||
<Typography variant="h6" sx={{ mb: 3 }}>
|
||||
OCR Image Processing Settings
|
||||
</Typography>
|
||||
|
||||
<Card sx={{ mb: 3 }}>
|
||||
<CardContent>
|
||||
<Typography variant="subtitle1" sx={{ mb: 2 }}>
|
||||
Enhancement Controls
|
||||
</Typography>
|
||||
<Divider sx={{ mb: 2 }} />
|
||||
|
||||
<FormControlLabel
|
||||
control={
|
||||
<Switch
|
||||
checked={settings.ocrSkipEnhancement}
|
||||
onChange={(e) => handleSettingsChange('ocrSkipEnhancement', e.target.checked)}
|
||||
/>
|
||||
}
|
||||
label="Skip All Image Enhancement (Use Original Images Only)"
|
||||
sx={{ mb: 2 }}
|
||||
/>
|
||||
|
||||
<Grid container spacing={2}>
|
||||
<Grid item xs={12} md={6}>
|
||||
<TextField
|
||||
fullWidth
|
||||
label="Brightness Boost"
|
||||
type="number"
|
||||
value={settings.ocrBrightnessBoost}
|
||||
onChange={(e) => handleSettingsChange('ocrBrightnessBoost', parseFloat(e.target.value) || 0)}
|
||||
helperText="Manual brightness adjustment (0 = auto, >0 = boost amount)"
|
||||
inputProps={{ step: 0.1, min: 0, max: 100 }}
|
||||
/>
|
||||
</Grid>
|
||||
<Grid item xs={12} md={6}>
|
||||
<TextField
|
||||
fullWidth
|
||||
label="Contrast Multiplier"
|
||||
type="number"
|
||||
value={settings.ocrContrastMultiplier}
|
||||
onChange={(e) => handleSettingsChange('ocrContrastMultiplier', parseFloat(e.target.value) || 1)}
|
||||
helperText="Manual contrast adjustment (1.0 = auto, >1.0 = increase)"
|
||||
inputProps={{ step: 0.1, min: 0.1, max: 5 }}
|
||||
/>
|
||||
</Grid>
|
||||
<Grid item xs={12} md={6}>
|
||||
<FormControl fullWidth>
|
||||
<InputLabel>Noise Reduction Level</InputLabel>
|
||||
<Select
|
||||
value={settings.ocrNoiseReductionLevel}
|
||||
label="Noise Reduction Level"
|
||||
onChange={(e) => handleSettingsChange('ocrNoiseReductionLevel', e.target.value as number)}
|
||||
>
|
||||
<MenuItem value={0}>None</MenuItem>
|
||||
<MenuItem value={1}>Light</MenuItem>
|
||||
<MenuItem value={2}>Moderate</MenuItem>
|
||||
<MenuItem value={3}>Heavy</MenuItem>
|
||||
</Select>
|
||||
</FormControl>
|
||||
</Grid>
|
||||
<Grid item xs={12} md={6}>
|
||||
<TextField
|
||||
fullWidth
|
||||
label="Sharpening Strength"
|
||||
type="number"
|
||||
value={settings.ocrSharpeningStrength}
|
||||
onChange={(e) => handleSettingsChange('ocrSharpeningStrength', parseFloat(e.target.value) || 0)}
|
||||
helperText="Image sharpening amount (0 = auto, >0 = manual)"
|
||||
inputProps={{ step: 0.1, min: 0, max: 2 }}
|
||||
/>
|
||||
</Grid>
|
||||
</Grid>
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
<Card sx={{ mb: 3 }}>
|
||||
<CardContent>
|
||||
<Typography variant="subtitle1" sx={{ mb: 2 }}>
|
||||
Quality Thresholds (when to apply enhancements)
|
||||
</Typography>
|
||||
<Divider sx={{ mb: 2 }} />
|
||||
|
||||
<Grid container spacing={2}>
|
||||
<Grid item xs={12} md={6}>
|
||||
<TextField
|
||||
fullWidth
|
||||
label="Brightness Threshold"
|
||||
type="number"
|
||||
value={settings.ocrQualityThresholdBrightness}
|
||||
onChange={(e) => handleSettingsChange('ocrQualityThresholdBrightness', parseFloat(e.target.value) || 40)}
|
||||
helperText="Enhance if brightness below this value (0-255)"
|
||||
inputProps={{ step: 1, min: 0, max: 255 }}
|
||||
/>
|
||||
</Grid>
|
||||
<Grid item xs={12} md={6}>
|
||||
<TextField
|
||||
fullWidth
|
||||
label="Contrast Threshold"
|
||||
type="number"
|
||||
value={settings.ocrQualityThresholdContrast}
|
||||
onChange={(e) => handleSettingsChange('ocrQualityThresholdContrast', parseFloat(e.target.value) || 0.15)}
|
||||
helperText="Enhance if contrast below this value (0-1)"
|
||||
inputProps={{ step: 0.01, min: 0, max: 1 }}
|
||||
/>
|
||||
</Grid>
|
||||
<Grid item xs={12} md={6}>
|
||||
<TextField
|
||||
fullWidth
|
||||
label="Noise Threshold"
|
||||
type="number"
|
||||
value={settings.ocrQualityThresholdNoise}
|
||||
onChange={(e) => handleSettingsChange('ocrQualityThresholdNoise', parseFloat(e.target.value) || 0.3)}
|
||||
helperText="Enhance if noise above this value (0-1)"
|
||||
inputProps={{ step: 0.01, min: 0, max: 1 }}
|
||||
/>
|
||||
</Grid>
|
||||
<Grid item xs={12} md={6}>
|
||||
<TextField
|
||||
fullWidth
|
||||
label="Sharpness Threshold"
|
||||
type="number"
|
||||
value={settings.ocrQualityThresholdSharpness}
|
||||
onChange={(e) => handleSettingsChange('ocrQualityThresholdSharpness', parseFloat(e.target.value) || 0.15)}
|
||||
helperText="Enhance if sharpness below this value (0-1)"
|
||||
inputProps={{ step: 0.01, min: 0, max: 1 }}
|
||||
/>
|
||||
</Grid>
|
||||
</Grid>
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
<Card sx={{ mb: 3 }}>
|
||||
<CardContent>
|
||||
<Typography variant="subtitle1" sx={{ mb: 2 }}>
|
||||
Advanced Processing Options
|
||||
</Typography>
|
||||
<Divider sx={{ mb: 2 }} />
|
||||
|
||||
<Grid container spacing={2}>
|
||||
<Grid item xs={12} md={6}>
|
||||
<FormControlLabel
|
||||
control={
|
||||
<Switch
|
||||
checked={settings.ocrMorphologicalOperations}
|
||||
onChange={(e) => handleSettingsChange('ocrMorphologicalOperations', e.target.checked)}
|
||||
/>
|
||||
}
|
||||
label="Morphological Operations (text cleanup)"
|
||||
/>
|
||||
</Grid>
|
||||
<Grid item xs={12} md={6}>
|
||||
<FormControlLabel
|
||||
control={
|
||||
<Switch
|
||||
checked={settings.ocrHistogramEqualization}
|
||||
onChange={(e) => handleSettingsChange('ocrHistogramEqualization', e.target.checked)}
|
||||
/>
|
||||
}
|
||||
label="Histogram Equalization"
|
||||
/>
|
||||
</Grid>
|
||||
<Grid item xs={12} md={6}>
|
||||
<FormControlLabel
|
||||
control={
|
||||
<Switch
|
||||
checked={settings.saveProcessedImages}
|
||||
onChange={(e) => handleSettingsChange('saveProcessedImages', e.target.checked)}
|
||||
/>
|
||||
}
|
||||
label="Save Processed Images for Review"
|
||||
/>
|
||||
</Grid>
|
||||
<Grid item xs={12} md={6}>
|
||||
<TextField
|
||||
fullWidth
|
||||
label="Adaptive Threshold Window Size"
|
||||
type="number"
|
||||
value={settings.ocrAdaptiveThresholdWindowSize}
|
||||
onChange={(e) => handleSettingsChange('ocrAdaptiveThresholdWindowSize', parseInt(e.target.value) || 15)}
|
||||
helperText="Window size for contrast enhancement (odd number)"
|
||||
inputProps={{ step: 2, min: 3, max: 101 }}
|
||||
/>
|
||||
</Grid>
|
||||
</Grid>
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
<Card sx={{ mb: 3 }}>
|
||||
<CardContent>
|
||||
<Typography variant="subtitle1" sx={{ mb: 2 }}>
|
||||
Image Size and Scaling
|
||||
</Typography>
|
||||
<Divider sx={{ mb: 2 }} />
|
||||
|
||||
<Grid container spacing={2}>
|
||||
<Grid item xs={12} md={6}>
|
||||
<TextField
|
||||
fullWidth
|
||||
label="Max Image Width"
|
||||
type="number"
|
||||
value={settings.ocrMaxImageWidth}
|
||||
onChange={(e) => handleSettingsChange('ocrMaxImageWidth', parseInt(e.target.value) || 10000)}
|
||||
helperText="Maximum image width in pixels"
|
||||
inputProps={{ step: 100, min: 100, max: 50000 }}
|
||||
/>
|
||||
</Grid>
|
||||
<Grid item xs={12} md={6}>
|
||||
<TextField
|
||||
fullWidth
|
||||
label="Max Image Height"
|
||||
type="number"
|
||||
value={settings.ocrMaxImageHeight}
|
||||
onChange={(e) => handleSettingsChange('ocrMaxImageHeight', parseInt(e.target.value) || 10000)}
|
||||
helperText="Maximum image height in pixels"
|
||||
inputProps={{ step: 100, min: 100, max: 50000 }}
|
||||
/>
|
||||
</Grid>
|
||||
<Grid item xs={12} md={6}>
|
||||
<TextField
|
||||
fullWidth
|
||||
label="Upscale Factor"
|
||||
type="number"
|
||||
value={settings.ocrUpscaleFactor}
|
||||
onChange={(e) => handleSettingsChange('ocrUpscaleFactor', parseFloat(e.target.value) || 1.0)}
|
||||
helperText="Image scaling factor (1.0 = no scaling)"
|
||||
inputProps={{ step: 0.1, min: 0.1, max: 5 }}
|
||||
/>
|
||||
</Grid>
|
||||
</Grid>
|
||||
</CardContent>
|
||||
</Card>
|
||||
</Box>
|
||||
)}
|
||||
|
||||
{tabValue === 2 && (
|
||||
<WebDAVTabContent
|
||||
settings={settings}
|
||||
loading={loading}
|
||||
@@ -1325,7 +1637,7 @@ const SettingsPage: React.FC = () => {
|
||||
/>
|
||||
)}
|
||||
|
||||
{tabValue === 2 && (
|
||||
{tabValue === 3 && (
|
||||
<Box>
|
||||
<Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', mb: 3 }}>
|
||||
<Typography variant="h6">
|
||||
|
||||
@@ -48,6 +48,7 @@ import {
|
||||
Edit as EditIcon,
|
||||
Delete as DeleteIcon,
|
||||
PlayArrow as PlayArrowIcon,
|
||||
Stop as StopIcon,
|
||||
Storage as StorageIcon,
|
||||
Cloud as CloudIcon,
|
||||
Speed as SpeedIcon,
|
||||
@@ -138,6 +139,7 @@ const SourcesPage: React.FC = () => {
|
||||
|
||||
const [testingConnection, setTestingConnection] = useState(false);
|
||||
const [syncingSource, setSyncingSource] = useState<string | null>(null);
|
||||
const [stoppingSync, setStoppingSync] = useState<string | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
loadSources();
|
||||
@@ -393,6 +395,24 @@ const SourcesPage: React.FC = () => {
|
||||
}
|
||||
};
|
||||
|
||||
const handleStopSync = async (sourceId: string) => {
|
||||
setStoppingSync(sourceId);
|
||||
try {
|
||||
await api.post(`/sources/${sourceId}/sync/stop`);
|
||||
showSnackbar('Sync stopped successfully', 'success');
|
||||
setTimeout(loadSources, 1000);
|
||||
} catch (error: any) {
|
||||
console.error('Failed to stop sync:', error);
|
||||
if (error.response?.status === 409) {
|
||||
showSnackbar('Source is not currently syncing', 'warning');
|
||||
} else {
|
||||
showSnackbar('Failed to stop sync', 'error');
|
||||
}
|
||||
} finally {
|
||||
setStoppingSync(null);
|
||||
}
|
||||
};
|
||||
|
||||
// Utility functions for folder management
|
||||
const addFolder = () => {
|
||||
if (newFolder && !formData.watch_folders.includes(newFolder)) {
|
||||
@@ -645,24 +665,46 @@ const SourcesPage: React.FC = () => {
|
||||
transition: 'all 0.3s cubic-bezier(0.4, 0, 0.2, 1)',
|
||||
}}
|
||||
>
|
||||
<Tooltip title="Trigger Sync">
|
||||
<span>
|
||||
<IconButton
|
||||
onClick={() => handleTriggerSync(source.id)}
|
||||
disabled={source.status === 'syncing' || !source.enabled}
|
||||
sx={{
|
||||
bgcolor: alpha(theme.palette.primary.main, 0.1),
|
||||
'&:hover': { bgcolor: alpha(theme.palette.primary.main, 0.2) },
|
||||
}}
|
||||
>
|
||||
{syncingSource === source.id ? (
|
||||
<CircularProgress size={20} />
|
||||
) : (
|
||||
<PlayArrowIcon />
|
||||
)}
|
||||
</IconButton>
|
||||
</span>
|
||||
</Tooltip>
|
||||
{source.status === 'syncing' ? (
|
||||
<Tooltip title="Stop Sync">
|
||||
<span>
|
||||
<IconButton
|
||||
onClick={() => handleStopSync(source.id)}
|
||||
disabled={stoppingSync === source.id}
|
||||
sx={{
|
||||
bgcolor: alpha(theme.palette.warning.main, 0.1),
|
||||
'&:hover': { bgcolor: alpha(theme.palette.warning.main, 0.2) },
|
||||
color: theme.palette.warning.main,
|
||||
}}
|
||||
>
|
||||
{stoppingSync === source.id ? (
|
||||
<CircularProgress size={20} />
|
||||
) : (
|
||||
<StopIcon />
|
||||
)}
|
||||
</IconButton>
|
||||
</span>
|
||||
</Tooltip>
|
||||
) : (
|
||||
<Tooltip title="Trigger Sync">
|
||||
<span>
|
||||
<IconButton
|
||||
onClick={() => handleTriggerSync(source.id)}
|
||||
disabled={syncingSource === source.id || !source.enabled}
|
||||
sx={{
|
||||
bgcolor: alpha(theme.palette.primary.main, 0.1),
|
||||
'&:hover': { bgcolor: alpha(theme.palette.primary.main, 0.2) },
|
||||
}}
|
||||
>
|
||||
{syncingSource === source.id ? (
|
||||
<CircularProgress size={20} />
|
||||
) : (
|
||||
<PlayArrowIcon />
|
||||
)}
|
||||
</IconButton>
|
||||
</span>
|
||||
</Tooltip>
|
||||
)}
|
||||
<Tooltip title="Edit Source">
|
||||
<IconButton
|
||||
onClick={() => handleEditSource(source)}
|
||||
|
||||
@@ -150,6 +150,12 @@ export const documentService = {
|
||||
})
|
||||
},
|
||||
|
||||
getProcessedImage: (id: string) => {
|
||||
return api.get(`/documents/${id}/processed-image`, {
|
||||
responseType: 'blob',
|
||||
})
|
||||
},
|
||||
|
||||
search: (searchRequest: SearchRequest) => {
|
||||
return api.get<SearchResponse>('/search', {
|
||||
params: searchRequest,
|
||||
|
||||
57
migrations/20240101000012_add_enhanced_ocr_settings.sql
Normal file
57
migrations/20240101000012_add_enhanced_ocr_settings.sql
Normal file
@@ -0,0 +1,57 @@
|
||||
-- Add enhanced OCR processing settings with conservative defaults
|
||||
ALTER TABLE settings ADD COLUMN IF NOT EXISTS ocr_brightness_boost REAL DEFAULT 1.0;
|
||||
ALTER TABLE settings ADD COLUMN IF NOT EXISTS ocr_contrast_multiplier REAL DEFAULT 1.2;
|
||||
ALTER TABLE settings ADD COLUMN IF NOT EXISTS ocr_noise_reduction_level INTEGER DEFAULT 1;
|
||||
ALTER TABLE settings ADD COLUMN IF NOT EXISTS ocr_sharpening_strength REAL DEFAULT 0.5;
|
||||
ALTER TABLE settings ADD COLUMN IF NOT EXISTS ocr_morphological_operations BOOLEAN DEFAULT false;
|
||||
ALTER TABLE settings ADD COLUMN IF NOT EXISTS ocr_adaptive_threshold_window_size INTEGER DEFAULT 15;
|
||||
ALTER TABLE settings ADD COLUMN IF NOT EXISTS ocr_histogram_equalization BOOLEAN DEFAULT false;
|
||||
ALTER TABLE settings ADD COLUMN IF NOT EXISTS ocr_upscale_factor REAL DEFAULT 1.0;
|
||||
ALTER TABLE settings ADD COLUMN IF NOT EXISTS ocr_max_image_width INTEGER DEFAULT 3000;
|
||||
ALTER TABLE settings ADD COLUMN IF NOT EXISTS ocr_max_image_height INTEGER DEFAULT 3000;
|
||||
ALTER TABLE settings ADD COLUMN IF NOT EXISTS save_processed_images BOOLEAN DEFAULT false;
|
||||
ALTER TABLE settings ADD COLUMN IF NOT EXISTS ocr_quality_threshold_brightness REAL DEFAULT 0.3;
|
||||
ALTER TABLE settings ADD COLUMN IF NOT EXISTS ocr_quality_threshold_contrast REAL DEFAULT 0.2;
|
||||
ALTER TABLE settings ADD COLUMN IF NOT EXISTS ocr_quality_threshold_noise REAL DEFAULT 0.7;
|
||||
ALTER TABLE settings ADD COLUMN IF NOT EXISTS ocr_quality_threshold_sharpness REAL DEFAULT 0.3;
|
||||
ALTER TABLE settings ADD COLUMN IF NOT EXISTS ocr_skip_enhancement BOOLEAN DEFAULT false;
|
||||
|
||||
-- Create processed_images table for storing preprocessed images
|
||||
CREATE TABLE IF NOT EXISTS processed_images (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
document_id UUID NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
|
||||
user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
||||
original_image_path TEXT NOT NULL,
|
||||
processed_image_path TEXT NOT NULL,
|
||||
processing_parameters JSONB NOT NULL DEFAULT '{}',
|
||||
processing_steps TEXT[] NOT NULL DEFAULT ARRAY[]::TEXT[],
|
||||
image_width INTEGER NOT NULL,
|
||||
image_height INTEGER NOT NULL,
|
||||
file_size BIGINT NOT NULL,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Create indexes for the processed_images table
|
||||
CREATE INDEX IF NOT EXISTS idx_processed_images_document_id ON processed_images(document_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_processed_images_user_id ON processed_images(user_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_processed_images_created_at ON processed_images(created_at);
|
||||
|
||||
-- Update existing settings with conservative default values for new OCR settings
|
||||
UPDATE settings SET
|
||||
ocr_brightness_boost = 1.0,
|
||||
ocr_contrast_multiplier = 1.2,
|
||||
ocr_noise_reduction_level = 1,
|
||||
ocr_sharpening_strength = 0.5,
|
||||
ocr_morphological_operations = false,
|
||||
ocr_adaptive_threshold_window_size = 15,
|
||||
ocr_histogram_equalization = false,
|
||||
ocr_upscale_factor = 1.0,
|
||||
ocr_max_image_width = 3000,
|
||||
ocr_max_image_height = 3000,
|
||||
save_processed_images = false,
|
||||
ocr_quality_threshold_brightness = 0.3,
|
||||
ocr_quality_threshold_contrast = 0.2,
|
||||
ocr_quality_threshold_noise = 0.7,
|
||||
ocr_quality_threshold_sharpness = 0.3,
|
||||
ocr_skip_enhancement = false
|
||||
WHERE ocr_brightness_boost IS NULL;
|
||||
198
src/db.rs
198
src/db.rs
@@ -1272,6 +1272,11 @@ impl Database {
|
||||
cpu_priority, enable_background_ocr, ocr_page_segmentation_mode, ocr_engine_mode,
|
||||
ocr_min_confidence, ocr_dpi, ocr_enhance_contrast, ocr_remove_noise,
|
||||
ocr_detect_orientation, ocr_whitelist_chars, ocr_blacklist_chars,
|
||||
ocr_brightness_boost, ocr_contrast_multiplier, ocr_noise_reduction_level, ocr_sharpening_strength,
|
||||
ocr_morphological_operations, ocr_adaptive_threshold_window_size, ocr_histogram_equalization,
|
||||
ocr_upscale_factor, ocr_max_image_width, ocr_max_image_height, save_processed_images,
|
||||
ocr_quality_threshold_brightness, ocr_quality_threshold_contrast, ocr_quality_threshold_noise,
|
||||
ocr_quality_threshold_sharpness, ocr_skip_enhancement,
|
||||
webdav_enabled, webdav_server_url, webdav_username, webdav_password,
|
||||
webdav_watch_folders, webdav_file_extensions, webdav_auto_sync, webdav_sync_interval_minutes,
|
||||
created_at, updated_at
|
||||
@@ -1311,6 +1316,22 @@ impl Database {
|
||||
ocr_detect_orientation: row.get("ocr_detect_orientation"),
|
||||
ocr_whitelist_chars: row.get("ocr_whitelist_chars"),
|
||||
ocr_blacklist_chars: row.get("ocr_blacklist_chars"),
|
||||
ocr_brightness_boost: row.get("ocr_brightness_boost"),
|
||||
ocr_contrast_multiplier: row.get("ocr_contrast_multiplier"),
|
||||
ocr_noise_reduction_level: row.get("ocr_noise_reduction_level"),
|
||||
ocr_sharpening_strength: row.get("ocr_sharpening_strength"),
|
||||
ocr_morphological_operations: row.get("ocr_morphological_operations"),
|
||||
ocr_adaptive_threshold_window_size: row.get("ocr_adaptive_threshold_window_size"),
|
||||
ocr_histogram_equalization: row.get("ocr_histogram_equalization"),
|
||||
ocr_upscale_factor: row.get("ocr_upscale_factor"),
|
||||
ocr_max_image_width: row.get("ocr_max_image_width"),
|
||||
ocr_max_image_height: row.get("ocr_max_image_height"),
|
||||
save_processed_images: row.get("save_processed_images"),
|
||||
ocr_quality_threshold_brightness: row.get("ocr_quality_threshold_brightness"),
|
||||
ocr_quality_threshold_contrast: row.get("ocr_quality_threshold_contrast"),
|
||||
ocr_quality_threshold_noise: row.get("ocr_quality_threshold_noise"),
|
||||
ocr_quality_threshold_sharpness: row.get("ocr_quality_threshold_sharpness"),
|
||||
ocr_skip_enhancement: row.get("ocr_skip_enhancement"),
|
||||
webdav_enabled: row.get("webdav_enabled"),
|
||||
webdav_server_url: row.get("webdav_server_url"),
|
||||
webdav_username: row.get("webdav_username"),
|
||||
@@ -1336,6 +1357,11 @@ impl Database {
|
||||
cpu_priority, enable_background_ocr, ocr_page_segmentation_mode, ocr_engine_mode,
|
||||
ocr_min_confidence, ocr_dpi, ocr_enhance_contrast, ocr_remove_noise,
|
||||
ocr_detect_orientation, ocr_whitelist_chars, ocr_blacklist_chars,
|
||||
ocr_brightness_boost, ocr_contrast_multiplier, ocr_noise_reduction_level, ocr_sharpening_strength,
|
||||
ocr_morphological_operations, ocr_adaptive_threshold_window_size, ocr_histogram_equalization,
|
||||
ocr_upscale_factor, ocr_max_image_width, ocr_max_image_height, save_processed_images,
|
||||
ocr_quality_threshold_brightness, ocr_quality_threshold_contrast, ocr_quality_threshold_noise,
|
||||
ocr_quality_threshold_sharpness, ocr_skip_enhancement,
|
||||
webdav_enabled, webdav_server_url, webdav_username, webdav_password,
|
||||
webdav_watch_folders, webdav_file_extensions, webdav_auto_sync, webdav_sync_interval_minutes,
|
||||
created_at, updated_at
|
||||
@@ -1375,6 +1401,22 @@ impl Database {
|
||||
ocr_detect_orientation: row.get("ocr_detect_orientation"),
|
||||
ocr_whitelist_chars: row.get("ocr_whitelist_chars"),
|
||||
ocr_blacklist_chars: row.get("ocr_blacklist_chars"),
|
||||
ocr_brightness_boost: row.get("ocr_brightness_boost"),
|
||||
ocr_contrast_multiplier: row.get("ocr_contrast_multiplier"),
|
||||
ocr_noise_reduction_level: row.get("ocr_noise_reduction_level"),
|
||||
ocr_sharpening_strength: row.get("ocr_sharpening_strength"),
|
||||
ocr_morphological_operations: row.get("ocr_morphological_operations"),
|
||||
ocr_adaptive_threshold_window_size: row.get("ocr_adaptive_threshold_window_size"),
|
||||
ocr_histogram_equalization: row.get("ocr_histogram_equalization"),
|
||||
ocr_upscale_factor: row.get("ocr_upscale_factor"),
|
||||
ocr_max_image_width: row.get("ocr_max_image_width"),
|
||||
ocr_max_image_height: row.get("ocr_max_image_height"),
|
||||
save_processed_images: row.get("save_processed_images"),
|
||||
ocr_quality_threshold_brightness: row.get("ocr_quality_threshold_brightness"),
|
||||
ocr_quality_threshold_contrast: row.get("ocr_quality_threshold_contrast"),
|
||||
ocr_quality_threshold_noise: row.get("ocr_quality_threshold_noise"),
|
||||
ocr_quality_threshold_sharpness: row.get("ocr_quality_threshold_sharpness"),
|
||||
ocr_skip_enhancement: row.get("ocr_skip_enhancement"),
|
||||
webdav_enabled: row.get("webdav_enabled"),
|
||||
webdav_server_url: row.get("webdav_server_url"),
|
||||
webdav_username: row.get("webdav_username"),
|
||||
@@ -1413,10 +1455,15 @@ impl Database {
|
||||
cpu_priority, enable_background_ocr, ocr_page_segmentation_mode, ocr_engine_mode,
|
||||
ocr_min_confidence, ocr_dpi, ocr_enhance_contrast, ocr_remove_noise,
|
||||
ocr_detect_orientation, ocr_whitelist_chars, ocr_blacklist_chars,
|
||||
ocr_brightness_boost, ocr_contrast_multiplier, ocr_noise_reduction_level, ocr_sharpening_strength,
|
||||
ocr_morphological_operations, ocr_adaptive_threshold_window_size, ocr_histogram_equalization,
|
||||
ocr_upscale_factor, ocr_max_image_width, ocr_max_image_height, save_processed_images,
|
||||
ocr_quality_threshold_brightness, ocr_quality_threshold_contrast, ocr_quality_threshold_noise,
|
||||
ocr_quality_threshold_sharpness, ocr_skip_enhancement,
|
||||
webdav_enabled, webdav_server_url, webdav_username, webdav_password,
|
||||
webdav_watch_folders, webdav_file_extensions, webdav_auto_sync, webdav_sync_interval_minutes
|
||||
)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, $33, $34)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, $33, $34, $35, $36, $37, $38, $39, $40, $41, $42, $43, $44, $45, $46, $47, $48, $49, $50)
|
||||
ON CONFLICT (user_id) DO UPDATE SET
|
||||
ocr_language = $2,
|
||||
concurrent_ocr_jobs = $3,
|
||||
@@ -1443,14 +1490,30 @@ impl Database {
|
||||
ocr_detect_orientation = $24,
|
||||
ocr_whitelist_chars = $25,
|
||||
ocr_blacklist_chars = $26,
|
||||
webdav_enabled = $27,
|
||||
webdav_server_url = $28,
|
||||
webdav_username = $29,
|
||||
webdav_password = $30,
|
||||
webdav_watch_folders = $31,
|
||||
webdav_file_extensions = $32,
|
||||
webdav_auto_sync = $33,
|
||||
webdav_sync_interval_minutes = $34,
|
||||
ocr_brightness_boost = $27,
|
||||
ocr_contrast_multiplier = $28,
|
||||
ocr_noise_reduction_level = $29,
|
||||
ocr_sharpening_strength = $30,
|
||||
ocr_morphological_operations = $31,
|
||||
ocr_adaptive_threshold_window_size = $32,
|
||||
ocr_histogram_equalization = $33,
|
||||
ocr_upscale_factor = $34,
|
||||
ocr_max_image_width = $35,
|
||||
ocr_max_image_height = $36,
|
||||
save_processed_images = $37,
|
||||
ocr_quality_threshold_brightness = $38,
|
||||
ocr_quality_threshold_contrast = $39,
|
||||
ocr_quality_threshold_noise = $40,
|
||||
ocr_quality_threshold_sharpness = $41,
|
||||
ocr_skip_enhancement = $42,
|
||||
webdav_enabled = $43,
|
||||
webdav_server_url = $44,
|
||||
webdav_username = $45,
|
||||
webdav_password = $46,
|
||||
webdav_watch_folders = $47,
|
||||
webdav_file_extensions = $48,
|
||||
webdav_auto_sync = $49,
|
||||
webdav_sync_interval_minutes = $50,
|
||||
updated_at = NOW()
|
||||
RETURNING id, user_id, ocr_language, concurrent_ocr_jobs, ocr_timeout_seconds,
|
||||
max_file_size_mb, allowed_file_types, auto_rotate_images, enable_image_preprocessing,
|
||||
@@ -1459,6 +1522,11 @@ impl Database {
|
||||
cpu_priority, enable_background_ocr, ocr_page_segmentation_mode, ocr_engine_mode,
|
||||
ocr_min_confidence, ocr_dpi, ocr_enhance_contrast, ocr_remove_noise,
|
||||
ocr_detect_orientation, ocr_whitelist_chars, ocr_blacklist_chars,
|
||||
ocr_brightness_boost, ocr_contrast_multiplier, ocr_noise_reduction_level, ocr_sharpening_strength,
|
||||
ocr_morphological_operations, ocr_adaptive_threshold_window_size, ocr_histogram_equalization,
|
||||
ocr_upscale_factor, ocr_max_image_width, ocr_max_image_height, save_processed_images,
|
||||
ocr_quality_threshold_brightness, ocr_quality_threshold_contrast, ocr_quality_threshold_noise,
|
||||
ocr_quality_threshold_sharpness, ocr_skip_enhancement,
|
||||
webdav_enabled, webdav_server_url, webdav_username, webdav_password,
|
||||
webdav_watch_folders, webdav_file_extensions, webdav_auto_sync, webdav_sync_interval_minutes,
|
||||
created_at, updated_at
|
||||
@@ -1490,6 +1558,22 @@ impl Database {
|
||||
.bind(settings.ocr_detect_orientation.unwrap_or(current.ocr_detect_orientation))
|
||||
.bind(settings.ocr_whitelist_chars.as_ref().unwrap_or(¤t.ocr_whitelist_chars))
|
||||
.bind(settings.ocr_blacklist_chars.as_ref().unwrap_or(¤t.ocr_blacklist_chars))
|
||||
.bind(settings.ocr_brightness_boost.unwrap_or(current.ocr_brightness_boost))
|
||||
.bind(settings.ocr_contrast_multiplier.unwrap_or(current.ocr_contrast_multiplier))
|
||||
.bind(settings.ocr_noise_reduction_level.unwrap_or(current.ocr_noise_reduction_level))
|
||||
.bind(settings.ocr_sharpening_strength.unwrap_or(current.ocr_sharpening_strength))
|
||||
.bind(settings.ocr_morphological_operations.unwrap_or(current.ocr_morphological_operations))
|
||||
.bind(settings.ocr_adaptive_threshold_window_size.unwrap_or(current.ocr_adaptive_threshold_window_size))
|
||||
.bind(settings.ocr_histogram_equalization.unwrap_or(current.ocr_histogram_equalization))
|
||||
.bind(settings.ocr_upscale_factor.unwrap_or(current.ocr_upscale_factor))
|
||||
.bind(settings.ocr_max_image_width.unwrap_or(current.ocr_max_image_width))
|
||||
.bind(settings.ocr_max_image_height.unwrap_or(current.ocr_max_image_height))
|
||||
.bind(settings.save_processed_images.unwrap_or(current.save_processed_images))
|
||||
.bind(settings.ocr_quality_threshold_brightness.unwrap_or(current.ocr_quality_threshold_brightness))
|
||||
.bind(settings.ocr_quality_threshold_contrast.unwrap_or(current.ocr_quality_threshold_contrast))
|
||||
.bind(settings.ocr_quality_threshold_noise.unwrap_or(current.ocr_quality_threshold_noise))
|
||||
.bind(settings.ocr_quality_threshold_sharpness.unwrap_or(current.ocr_quality_threshold_sharpness))
|
||||
.bind(settings.ocr_skip_enhancement.unwrap_or(current.ocr_skip_enhancement))
|
||||
.bind(settings.webdav_enabled.unwrap_or(current.webdav_enabled))
|
||||
.bind(settings.webdav_server_url.as_ref().unwrap_or(¤t.webdav_server_url))
|
||||
.bind(settings.webdav_username.as_ref().unwrap_or(¤t.webdav_username))
|
||||
@@ -1529,6 +1613,22 @@ impl Database {
|
||||
ocr_detect_orientation: row.get("ocr_detect_orientation"),
|
||||
ocr_whitelist_chars: row.get("ocr_whitelist_chars"),
|
||||
ocr_blacklist_chars: row.get("ocr_blacklist_chars"),
|
||||
ocr_brightness_boost: row.get("ocr_brightness_boost"),
|
||||
ocr_contrast_multiplier: row.get("ocr_contrast_multiplier"),
|
||||
ocr_noise_reduction_level: row.get("ocr_noise_reduction_level"),
|
||||
ocr_sharpening_strength: row.get("ocr_sharpening_strength"),
|
||||
ocr_morphological_operations: row.get("ocr_morphological_operations"),
|
||||
ocr_adaptive_threshold_window_size: row.get("ocr_adaptive_threshold_window_size"),
|
||||
ocr_histogram_equalization: row.get("ocr_histogram_equalization"),
|
||||
ocr_upscale_factor: row.get("ocr_upscale_factor"),
|
||||
ocr_max_image_width: row.get("ocr_max_image_width"),
|
||||
ocr_max_image_height: row.get("ocr_max_image_height"),
|
||||
save_processed_images: row.get("save_processed_images"),
|
||||
ocr_quality_threshold_brightness: row.get("ocr_quality_threshold_brightness"),
|
||||
ocr_quality_threshold_contrast: row.get("ocr_quality_threshold_contrast"),
|
||||
ocr_quality_threshold_noise: row.get("ocr_quality_threshold_noise"),
|
||||
ocr_quality_threshold_sharpness: row.get("ocr_quality_threshold_sharpness"),
|
||||
ocr_skip_enhancement: row.get("ocr_skip_enhancement"),
|
||||
webdav_enabled: row.get("webdav_enabled"),
|
||||
webdav_server_url: row.get("webdav_server_url"),
|
||||
webdav_username: row.get("webdav_username"),
|
||||
@@ -2215,4 +2315,84 @@ impl Database {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
// Processed images operations
|
||||
pub async fn create_processed_image(&self, processed_image: &crate::models::CreateProcessedImage) -> Result<crate::models::ProcessedImage> {
|
||||
let row = sqlx::query(
|
||||
r#"INSERT INTO processed_images
|
||||
(document_id, user_id, original_image_path, processed_image_path,
|
||||
processing_parameters, processing_steps, image_width, image_height, file_size)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
|
||||
RETURNING id, document_id, user_id, original_image_path, processed_image_path,
|
||||
processing_parameters, processing_steps, image_width, image_height,
|
||||
file_size, created_at"#
|
||||
)
|
||||
.bind(processed_image.document_id)
|
||||
.bind(processed_image.user_id)
|
||||
.bind(&processed_image.original_image_path)
|
||||
.bind(&processed_image.processed_image_path)
|
||||
.bind(&processed_image.processing_parameters)
|
||||
.bind(&processed_image.processing_steps)
|
||||
.bind(processed_image.image_width)
|
||||
.bind(processed_image.image_height)
|
||||
.bind(processed_image.file_size)
|
||||
.fetch_one(&self.pool)
|
||||
.await?;
|
||||
|
||||
Ok(crate::models::ProcessedImage {
|
||||
id: row.get("id"),
|
||||
document_id: row.get("document_id"),
|
||||
user_id: row.get("user_id"),
|
||||
original_image_path: row.get("original_image_path"),
|
||||
processed_image_path: row.get("processed_image_path"),
|
||||
processing_parameters: row.get("processing_parameters"),
|
||||
processing_steps: row.get("processing_steps"),
|
||||
image_width: row.get("image_width"),
|
||||
image_height: row.get("image_height"),
|
||||
file_size: row.get("file_size"),
|
||||
created_at: row.get("created_at"),
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn get_processed_image_by_document_id(&self, document_id: Uuid, user_id: Uuid) -> Result<Option<crate::models::ProcessedImage>> {
|
||||
let row = sqlx::query(
|
||||
r#"SELECT id, document_id, user_id, original_image_path, processed_image_path,
|
||||
processing_parameters, processing_steps, image_width, image_height,
|
||||
file_size, created_at
|
||||
FROM processed_images
|
||||
WHERE document_id = $1 AND user_id = $2
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1"#
|
||||
)
|
||||
.bind(document_id)
|
||||
.bind(user_id)
|
||||
.fetch_optional(&self.pool)
|
||||
.await?;
|
||||
|
||||
if let Some(row) = row {
|
||||
Ok(Some(crate::models::ProcessedImage {
|
||||
id: row.get("id"),
|
||||
document_id: row.get("document_id"),
|
||||
user_id: row.get("user_id"),
|
||||
original_image_path: row.get("original_image_path"),
|
||||
processed_image_path: row.get("processed_image_path"),
|
||||
processing_parameters: row.get("processing_parameters"),
|
||||
processing_steps: row.get("processing_steps"),
|
||||
image_width: row.get("image_width"),
|
||||
image_height: row.get("image_height"),
|
||||
file_size: row.get("file_size"),
|
||||
created_at: row.get("created_at"),
|
||||
}))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn delete_processed_images_by_document_id(&self, document_id: Uuid) -> Result<()> {
|
||||
sqlx::query("DELETE FROM processed_images WHERE document_id = $1")
|
||||
.bind(document_id)
|
||||
.execute(&self.pool)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
use anyhow::{anyhow, Result};
|
||||
use tracing::{debug, info, warn};
|
||||
use std::panic::{catch_unwind, AssertUnwindSafe};
|
||||
|
||||
#[cfg(feature = "ocr")]
|
||||
use image::{DynamicImage, ImageBuffer, Luma, GenericImageView};
|
||||
@@ -50,13 +51,14 @@ impl EnhancedOcrService {
|
||||
let mut preprocessing_applied = Vec::new();
|
||||
|
||||
// Load and preprocess the image
|
||||
let processed_image_path = if settings.enable_image_preprocessing {
|
||||
let processed_path = self.preprocess_image(file_path, settings).await?;
|
||||
preprocessing_applied.push("Image preprocessing enabled".to_string());
|
||||
processed_path
|
||||
let (processed_image_path, mut preprocess_steps) = if settings.enable_image_preprocessing {
|
||||
let (processed_path, steps) = self.preprocess_image(file_path, settings).await?;
|
||||
(processed_path, steps)
|
||||
} else {
|
||||
file_path.to_string()
|
||||
(file_path.to_string(), Vec::new())
|
||||
};
|
||||
|
||||
preprocessing_applied.extend(preprocess_steps);
|
||||
|
||||
// Move CPU-intensive OCR operations to blocking thread pool
|
||||
let processed_image_path_clone = processed_image_path.clone();
|
||||
@@ -101,9 +103,10 @@ impl EnhancedOcrService {
|
||||
|
||||
/// Preprocess image for optimal OCR quality, especially for challenging conditions
|
||||
#[cfg(feature = "ocr")]
|
||||
async fn preprocess_image(&self, input_path: &str, settings: &Settings) -> Result<String> {
|
||||
async fn preprocess_image(&self, input_path: &str, settings: &Settings) -> Result<(String, Vec<String>)> {
|
||||
let img = image::open(input_path)?;
|
||||
let mut processed_img = img;
|
||||
let mut preprocessing_applied = Vec::new();
|
||||
|
||||
info!("Original image dimensions: {}x{}", processed_img.width(), processed_img.height());
|
||||
|
||||
@@ -121,32 +124,63 @@ impl EnhancedOcrService {
|
||||
|
||||
// Analyze image quality and apply appropriate enhancements
|
||||
let quality_stats = self.analyze_image_quality(&processed_gray);
|
||||
info!("Image quality analysis: brightness={:.1}, contrast={:.1}, noise_level={:.1}",
|
||||
quality_stats.average_brightness, quality_stats.contrast_ratio, quality_stats.noise_level);
|
||||
info!("Image quality analysis: brightness={:.1}, contrast={:.1}, noise_level={:.1}, sharpness={:.1}",
|
||||
quality_stats.average_brightness, quality_stats.contrast_ratio, quality_stats.noise_level, quality_stats.sharpness);
|
||||
|
||||
// Apply adaptive brightness correction for dim images
|
||||
if quality_stats.average_brightness < 80.0 || quality_stats.contrast_ratio < 0.3 {
|
||||
processed_gray = self.enhance_brightness_and_contrast(processed_gray, &quality_stats)?;
|
||||
// Determine if image needs enhancement based on quality thresholds
|
||||
let needs_enhancement = self.needs_enhancement(&quality_stats, settings);
|
||||
|
||||
if !needs_enhancement {
|
||||
info!("Image quality is good, skipping enhancement steps");
|
||||
} else {
|
||||
info!("Image quality needs improvement, applying selective enhancements");
|
||||
|
||||
// Apply brightness correction only for very dim images
|
||||
if quality_stats.average_brightness < 50.0 || settings.ocr_brightness_boost > 0.0 {
|
||||
processed_gray = self.enhance_brightness_and_contrast(processed_gray, &quality_stats, settings)?;
|
||||
preprocessing_applied.push("Brightness/contrast correction".to_string());
|
||||
}
|
||||
|
||||
// Apply noise removal only for very noisy images
|
||||
if quality_stats.noise_level > 0.25 || (settings.ocr_remove_noise && settings.ocr_noise_reduction_level > 1) {
|
||||
processed_gray = self.adaptive_noise_removal(processed_gray, &quality_stats, settings)?;
|
||||
preprocessing_applied.push("Noise reduction".to_string());
|
||||
}
|
||||
|
||||
// Apply contrast enhancement only for very low contrast images
|
||||
if quality_stats.contrast_ratio < 0.2 || (settings.ocr_enhance_contrast && settings.ocr_adaptive_threshold_window_size > 0) {
|
||||
let original_gray = processed_gray.clone();
|
||||
match self.adaptive_contrast_enhancement(processed_gray, &quality_stats, settings) {
|
||||
Ok(enhanced) => {
|
||||
processed_gray = enhanced;
|
||||
preprocessing_applied.push("Contrast enhancement".to_string());
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Contrast enhancement failed, using alternative method: {}", e);
|
||||
// Fallback to basic contrast enhancement
|
||||
processed_gray = self.apply_alternative_contrast_enhancement(original_gray.clone(), &quality_stats, settings)
|
||||
.unwrap_or_else(|_| {
|
||||
warn!("Alternative contrast enhancement also failed, using original image");
|
||||
original_gray
|
||||
});
|
||||
preprocessing_applied.push("Basic contrast enhancement".to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Apply sharpening only for very blurry images
|
||||
if quality_stats.sharpness < 0.2 || settings.ocr_sharpening_strength > 0.5 {
|
||||
processed_gray = self.sharpen_image(processed_gray, settings)?;
|
||||
preprocessing_applied.push("Image sharpening".to_string());
|
||||
}
|
||||
|
||||
// Apply morphological operations only if explicitly enabled and image needs it
|
||||
if settings.ocr_morphological_operations && quality_stats.noise_level > 0.15 {
|
||||
processed_gray = self.apply_morphological_operations(processed_gray)?;
|
||||
preprocessing_applied.push("Morphological operations".to_string());
|
||||
}
|
||||
}
|
||||
|
||||
// Apply noise removal (more aggressive for noisy images)
|
||||
if settings.ocr_remove_noise || quality_stats.noise_level > 0.15 {
|
||||
processed_gray = self.adaptive_noise_removal(processed_gray, &quality_stats)?;
|
||||
}
|
||||
|
||||
// Apply contrast enhancement (adaptive based on image quality)
|
||||
if settings.ocr_enhance_contrast {
|
||||
processed_gray = self.adaptive_contrast_enhancement(processed_gray, &quality_stats)?;
|
||||
}
|
||||
|
||||
// Apply sharpening for blurry images
|
||||
if quality_stats.sharpness < 0.4 {
|
||||
processed_gray = self.sharpen_image(processed_gray)?;
|
||||
}
|
||||
|
||||
// Apply morphological operations for text clarity
|
||||
processed_gray = self.apply_morphological_operations(processed_gray)?;
|
||||
|
||||
// Save processed image to temporary file
|
||||
let temp_filename = format!("processed_{}_{}.png",
|
||||
std::process::id(),
|
||||
@@ -158,7 +192,42 @@ impl EnhancedOcrService {
|
||||
dynamic_processed.save(&temp_path)?;
|
||||
|
||||
info!("Processed image saved to: {}", temp_path);
|
||||
Ok(temp_path)
|
||||
Ok((temp_path, preprocessing_applied))
|
||||
}
|
||||
|
||||
/// Determine if image needs enhancement based on quality thresholds
|
||||
#[cfg(feature = "ocr")]
|
||||
fn needs_enhancement(&self, stats: &ImageQualityStats, settings: &Settings) -> bool {
|
||||
// If user wants to skip enhancement entirely, respect that
|
||||
if settings.ocr_skip_enhancement {
|
||||
info!("OCR enhancement disabled by user setting");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Use user-configurable thresholds
|
||||
let brightness_threshold = settings.ocr_quality_threshold_brightness;
|
||||
let contrast_threshold = settings.ocr_quality_threshold_contrast;
|
||||
let noise_threshold = settings.ocr_quality_threshold_noise;
|
||||
let sharpness_threshold = settings.ocr_quality_threshold_sharpness;
|
||||
|
||||
// Check if any metric falls below acceptable quality thresholds
|
||||
let needs_brightness_fix = stats.average_brightness < brightness_threshold;
|
||||
let needs_contrast_fix = stats.contrast_ratio < contrast_threshold;
|
||||
let needs_noise_fix = stats.noise_level > noise_threshold;
|
||||
let needs_sharpening = stats.sharpness < sharpness_threshold;
|
||||
|
||||
// Also check if user has explicitly enabled aggressive enhancement
|
||||
let user_wants_enhancement = settings.ocr_brightness_boost > 0.0 ||
|
||||
settings.ocr_contrast_multiplier > 1.0 ||
|
||||
settings.ocr_noise_reduction_level > 1 ||
|
||||
settings.ocr_sharpening_strength > 0.0;
|
||||
|
||||
let needs_enhancement = needs_brightness_fix || needs_contrast_fix || needs_noise_fix || needs_sharpening || user_wants_enhancement;
|
||||
|
||||
info!("Enhancement decision: brightness_ok={}, contrast_ok={}, noise_ok={}, sharpness_ok={}, user_enhancement={}, needs_enhancement={}",
|
||||
!needs_brightness_fix, !needs_contrast_fix, !needs_noise_fix, !needs_sharpening, user_wants_enhancement, needs_enhancement);
|
||||
|
||||
needs_enhancement
|
||||
}
|
||||
|
||||
/// Configure Tesseract with optimal settings
|
||||
@@ -370,12 +439,14 @@ impl EnhancedOcrService {
|
||||
|
||||
/// Enhanced brightness and contrast correction for dim images
|
||||
#[cfg(feature = "ocr")]
|
||||
fn enhance_brightness_and_contrast(&self, img: ImageBuffer<Luma<u8>, Vec<u8>>, stats: &ImageQualityStats) -> Result<ImageBuffer<Luma<u8>, Vec<u8>>> {
|
||||
fn enhance_brightness_and_contrast(&self, img: ImageBuffer<Luma<u8>, Vec<u8>>, stats: &ImageQualityStats, settings: &Settings) -> Result<ImageBuffer<Luma<u8>, Vec<u8>>> {
|
||||
let (width, height) = img.dimensions();
|
||||
let mut enhanced = ImageBuffer::new(width, height);
|
||||
|
||||
// Calculate enhancement parameters based on image statistics
|
||||
let brightness_boost = if stats.average_brightness < 50.0 {
|
||||
// Calculate enhancement parameters based on image statistics and user settings
|
||||
let brightness_boost = if settings.ocr_brightness_boost > 0.0 {
|
||||
settings.ocr_brightness_boost // Use user-configured value
|
||||
} else if stats.average_brightness < 50.0 {
|
||||
60.0 - stats.average_brightness // Aggressive boost for very dim images
|
||||
} else if stats.average_brightness < 80.0 {
|
||||
30.0 - (stats.average_brightness - 50.0) * 0.5 // Moderate boost
|
||||
@@ -383,7 +454,9 @@ impl EnhancedOcrService {
|
||||
0.0 // No boost needed
|
||||
};
|
||||
|
||||
let contrast_multiplier = if stats.contrast_ratio < 0.2 {
|
||||
let contrast_multiplier = if settings.ocr_contrast_multiplier > 0.0 {
|
||||
settings.ocr_contrast_multiplier // Use user-configured value
|
||||
} else if stats.contrast_ratio < 0.2 {
|
||||
2.5 // Aggressive contrast boost for flat images
|
||||
} else if stats.contrast_ratio < 0.4 {
|
||||
1.8 // Moderate contrast boost
|
||||
@@ -408,23 +481,38 @@ impl EnhancedOcrService {
|
||||
|
||||
/// Adaptive noise removal based on detected noise level
|
||||
#[cfg(feature = "ocr")]
|
||||
fn adaptive_noise_removal(&self, img: ImageBuffer<Luma<u8>, Vec<u8>>, stats: &ImageQualityStats) -> Result<ImageBuffer<Luma<u8>, Vec<u8>>> {
|
||||
fn adaptive_noise_removal(&self, img: ImageBuffer<Luma<u8>, Vec<u8>>, stats: &ImageQualityStats, settings: &Settings) -> Result<ImageBuffer<Luma<u8>, Vec<u8>>> {
|
||||
let mut processed = img;
|
||||
|
||||
if stats.noise_level > 0.2 {
|
||||
// Heavy noise - apply multiple filters
|
||||
processed = median_filter(&processed, 2, 2); // Larger median filter
|
||||
processed = gaussian_blur_f32(&processed, 0.8); // More blur
|
||||
info!("Applied heavy noise reduction (noise level: {:.2})", stats.noise_level);
|
||||
// Use user-configured noise reduction level if specified
|
||||
let noise_level = if settings.ocr_noise_reduction_level > 0 {
|
||||
settings.ocr_noise_reduction_level
|
||||
} else if stats.noise_level > 0.2 {
|
||||
3 // Heavy noise
|
||||
} else if stats.noise_level > 0.1 {
|
||||
// Moderate noise
|
||||
processed = median_filter(&processed, 1, 1);
|
||||
processed = gaussian_blur_f32(&processed, 0.5);
|
||||
info!("Applied moderate noise reduction");
|
||||
2 // Moderate noise
|
||||
} else {
|
||||
// Light noise or clean image
|
||||
processed = median_filter(&processed, 1, 1);
|
||||
info!("Applied light noise reduction");
|
||||
1 // Light noise
|
||||
};
|
||||
|
||||
match noise_level {
|
||||
3 => {
|
||||
// Heavy noise - apply multiple filters
|
||||
processed = median_filter(&processed, 2, 2); // Larger median filter
|
||||
processed = gaussian_blur_f32(&processed, 0.8); // More blur
|
||||
info!("Applied heavy noise reduction");
|
||||
},
|
||||
2 => {
|
||||
// Moderate noise
|
||||
processed = median_filter(&processed, 1, 1);
|
||||
processed = gaussian_blur_f32(&processed, 0.5);
|
||||
info!("Applied moderate noise reduction");
|
||||
},
|
||||
1 | _ => {
|
||||
// Light noise or clean image
|
||||
processed = median_filter(&processed, 1, 1);
|
||||
info!("Applied light noise reduction");
|
||||
}
|
||||
}
|
||||
|
||||
Ok(processed)
|
||||
@@ -432,12 +520,22 @@ impl EnhancedOcrService {
|
||||
|
||||
/// Adaptive contrast enhancement based on image quality
|
||||
#[cfg(feature = "ocr")]
|
||||
fn adaptive_contrast_enhancement(&self, img: ImageBuffer<Luma<u8>, Vec<u8>>, stats: &ImageQualityStats) -> Result<ImageBuffer<Luma<u8>, Vec<u8>>> {
|
||||
fn adaptive_contrast_enhancement(&self, img: ImageBuffer<Luma<u8>, Vec<u8>>, stats: &ImageQualityStats, settings: &Settings) -> Result<ImageBuffer<Luma<u8>, Vec<u8>>> {
|
||||
// Choose threshold size based on image dimensions and quality
|
||||
let (width, height) = img.dimensions();
|
||||
let min_dimension = width.min(height);
|
||||
|
||||
let threshold_size = if stats.contrast_ratio < 0.2 {
|
||||
// Check if image is too large for safe adaptive threshold processing
|
||||
// The integral image calculation can overflow with large images
|
||||
if width as u64 * height as u64 > 1_500_000 {
|
||||
info!("Image too large for adaptive threshold ({}x{}), using alternative contrast enhancement", width, height);
|
||||
return self.apply_alternative_contrast_enhancement(img, stats, settings);
|
||||
}
|
||||
|
||||
let threshold_size = if settings.ocr_adaptive_threshold_window_size > 0 {
|
||||
// Use user-configured window size
|
||||
settings.ocr_adaptive_threshold_window_size as u32
|
||||
} else if stats.contrast_ratio < 0.2 {
|
||||
// Low contrast - use smaller windows for more aggressive local adaptation
|
||||
(min_dimension / 20).max(11).min(31)
|
||||
} else {
|
||||
@@ -449,14 +547,107 @@ impl EnhancedOcrService {
|
||||
let threshold_size = if threshold_size % 2 == 0 { threshold_size + 1 } else { threshold_size };
|
||||
|
||||
info!("Applying adaptive threshold with window size: {}", threshold_size);
|
||||
let enhanced = adaptive_threshold(&img, threshold_size);
|
||||
|
||||
// Wrap in panic-safe block to catch overflow errors
|
||||
let enhanced = catch_unwind(AssertUnwindSafe(|| {
|
||||
adaptive_threshold(&img, threshold_size)
|
||||
}));
|
||||
|
||||
match enhanced {
|
||||
Ok(result) => Ok(result),
|
||||
Err(_) => {
|
||||
warn!("Adaptive threshold panicked (likely overflow), using alternative method");
|
||||
self.apply_alternative_contrast_enhancement(img, stats, settings)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Alternative contrast enhancement for large images to avoid overflow
|
||||
#[cfg(feature = "ocr")]
|
||||
fn apply_alternative_contrast_enhancement(&self, img: ImageBuffer<Luma<u8>, Vec<u8>>, stats: &ImageQualityStats, settings: &Settings) -> Result<ImageBuffer<Luma<u8>, Vec<u8>>> {
|
||||
let (width, height) = img.dimensions();
|
||||
let mut enhanced = ImageBuffer::new(width, height);
|
||||
|
||||
// Use histogram equalization instead of adaptive threshold for large images
|
||||
if settings.ocr_histogram_equalization {
|
||||
info!("Applying histogram equalization for contrast enhancement (user enabled)");
|
||||
} else {
|
||||
info!("Applying histogram equalization for contrast enhancement (fallback)");
|
||||
}
|
||||
|
||||
// Calculate histogram
|
||||
let mut histogram = [0u32; 256];
|
||||
for pixel in img.pixels() {
|
||||
histogram[pixel[0] as usize] += 1;
|
||||
}
|
||||
|
||||
// Calculate cumulative distribution function
|
||||
let total_pixels = width as u32 * height as u32;
|
||||
let mut cdf = [0u32; 256];
|
||||
cdf[0] = histogram[0];
|
||||
for i in 1..256 {
|
||||
cdf[i] = cdf[i - 1] + histogram[i];
|
||||
}
|
||||
|
||||
// Create lookup table for histogram equalization
|
||||
let mut lookup = [0u8; 256];
|
||||
for i in 0..256 {
|
||||
if cdf[i] > 0 {
|
||||
lookup[i] = ((cdf[i] as f64 / total_pixels as f64) * 255.0) as u8;
|
||||
}
|
||||
}
|
||||
|
||||
// Apply histogram equalization
|
||||
for (x, y, pixel) in img.enumerate_pixels() {
|
||||
let old_value = pixel[0];
|
||||
let new_value = lookup[old_value as usize];
|
||||
enhanced.put_pixel(x, y, Luma([new_value]));
|
||||
}
|
||||
|
||||
// Apply additional contrast stretching if needed
|
||||
if stats.contrast_ratio < 0.3 {
|
||||
enhanced = self.apply_contrast_stretching(enhanced)?;
|
||||
}
|
||||
|
||||
Ok(enhanced)
|
||||
}
|
||||
|
||||
/// Apply contrast stretching to improve dynamic range
|
||||
#[cfg(feature = "ocr")]
|
||||
fn apply_contrast_stretching(&self, img: ImageBuffer<Luma<u8>, Vec<u8>>) -> Result<ImageBuffer<Luma<u8>, Vec<u8>>> {
|
||||
let (width, height) = img.dimensions();
|
||||
let mut enhanced = ImageBuffer::new(width, height);
|
||||
|
||||
// Find min and max values
|
||||
let mut min_val = 255u8;
|
||||
let mut max_val = 0u8;
|
||||
|
||||
for pixel in img.pixels() {
|
||||
let val = pixel[0];
|
||||
min_val = min_val.min(val);
|
||||
max_val = max_val.max(val);
|
||||
}
|
||||
|
||||
// Avoid division by zero
|
||||
if max_val == min_val {
|
||||
return Ok(img);
|
||||
}
|
||||
|
||||
let range = max_val - min_val;
|
||||
|
||||
// Apply contrast stretching
|
||||
for (x, y, pixel) in img.enumerate_pixels() {
|
||||
let old_value = pixel[0];
|
||||
let new_value = (((old_value - min_val) as f32 / range as f32) * 255.0) as u8;
|
||||
enhanced.put_pixel(x, y, Luma([new_value]));
|
||||
}
|
||||
|
||||
Ok(enhanced)
|
||||
}
|
||||
|
||||
/// Sharpen blurry images
|
||||
#[cfg(feature = "ocr")]
|
||||
fn sharpen_image(&self, img: ImageBuffer<Luma<u8>, Vec<u8>>) -> Result<ImageBuffer<Luma<u8>, Vec<u8>>> {
|
||||
fn sharpen_image(&self, img: ImageBuffer<Luma<u8>, Vec<u8>>, settings: &Settings) -> Result<ImageBuffer<Luma<u8>, Vec<u8>>> {
|
||||
let (width, height) = img.dimensions();
|
||||
let mut sharpened = ImageBuffer::new(width, height);
|
||||
|
||||
|
||||
108
src/models.rs
108
src/models.rs
@@ -298,6 +298,22 @@ pub struct Settings {
|
||||
pub ocr_detect_orientation: bool,
|
||||
pub ocr_whitelist_chars: Option<String>,
|
||||
pub ocr_blacklist_chars: Option<String>,
|
||||
pub ocr_brightness_boost: f32,
|
||||
pub ocr_contrast_multiplier: f32,
|
||||
pub ocr_noise_reduction_level: i32,
|
||||
pub ocr_sharpening_strength: f32,
|
||||
pub ocr_morphological_operations: bool,
|
||||
pub ocr_adaptive_threshold_window_size: i32,
|
||||
pub ocr_histogram_equalization: bool,
|
||||
pub ocr_upscale_factor: f32,
|
||||
pub ocr_max_image_width: i32,
|
||||
pub ocr_max_image_height: i32,
|
||||
pub save_processed_images: bool,
|
||||
pub ocr_quality_threshold_brightness: f32,
|
||||
pub ocr_quality_threshold_contrast: f32,
|
||||
pub ocr_quality_threshold_noise: f32,
|
||||
pub ocr_quality_threshold_sharpness: f32,
|
||||
pub ocr_skip_enhancement: bool,
|
||||
pub webdav_enabled: bool,
|
||||
pub webdav_server_url: Option<String>,
|
||||
pub webdav_username: Option<String>,
|
||||
@@ -337,6 +353,22 @@ pub struct SettingsResponse {
|
||||
pub ocr_detect_orientation: bool,
|
||||
pub ocr_whitelist_chars: Option<String>,
|
||||
pub ocr_blacklist_chars: Option<String>,
|
||||
pub ocr_brightness_boost: f32,
|
||||
pub ocr_contrast_multiplier: f32,
|
||||
pub ocr_noise_reduction_level: i32,
|
||||
pub ocr_sharpening_strength: f32,
|
||||
pub ocr_morphological_operations: bool,
|
||||
pub ocr_adaptive_threshold_window_size: i32,
|
||||
pub ocr_histogram_equalization: bool,
|
||||
pub ocr_upscale_factor: f32,
|
||||
pub ocr_max_image_width: i32,
|
||||
pub ocr_max_image_height: i32,
|
||||
pub save_processed_images: bool,
|
||||
pub ocr_quality_threshold_brightness: f32,
|
||||
pub ocr_quality_threshold_contrast: f32,
|
||||
pub ocr_quality_threshold_noise: f32,
|
||||
pub ocr_quality_threshold_sharpness: f32,
|
||||
pub ocr_skip_enhancement: bool,
|
||||
pub webdav_enabled: bool,
|
||||
pub webdav_server_url: Option<String>,
|
||||
pub webdav_username: Option<String>,
|
||||
@@ -374,6 +406,22 @@ pub struct UpdateSettings {
|
||||
pub ocr_detect_orientation: Option<bool>,
|
||||
pub ocr_whitelist_chars: Option<Option<String>>,
|
||||
pub ocr_blacklist_chars: Option<Option<String>>,
|
||||
pub ocr_brightness_boost: Option<f32>,
|
||||
pub ocr_contrast_multiplier: Option<f32>,
|
||||
pub ocr_noise_reduction_level: Option<i32>,
|
||||
pub ocr_sharpening_strength: Option<f32>,
|
||||
pub ocr_morphological_operations: Option<bool>,
|
||||
pub ocr_adaptive_threshold_window_size: Option<i32>,
|
||||
pub ocr_histogram_equalization: Option<bool>,
|
||||
pub ocr_upscale_factor: Option<f32>,
|
||||
pub ocr_max_image_width: Option<i32>,
|
||||
pub ocr_max_image_height: Option<i32>,
|
||||
pub save_processed_images: Option<bool>,
|
||||
pub ocr_quality_threshold_brightness: Option<f32>,
|
||||
pub ocr_quality_threshold_contrast: Option<f32>,
|
||||
pub ocr_quality_threshold_noise: Option<f32>,
|
||||
pub ocr_quality_threshold_sharpness: Option<f32>,
|
||||
pub ocr_skip_enhancement: Option<bool>,
|
||||
pub webdav_enabled: Option<bool>,
|
||||
pub webdav_server_url: Option<Option<String>>,
|
||||
pub webdav_username: Option<Option<String>>,
|
||||
@@ -412,6 +460,22 @@ impl From<Settings> for SettingsResponse {
|
||||
ocr_detect_orientation: settings.ocr_detect_orientation,
|
||||
ocr_whitelist_chars: settings.ocr_whitelist_chars,
|
||||
ocr_blacklist_chars: settings.ocr_blacklist_chars,
|
||||
ocr_brightness_boost: settings.ocr_brightness_boost,
|
||||
ocr_contrast_multiplier: settings.ocr_contrast_multiplier,
|
||||
ocr_noise_reduction_level: settings.ocr_noise_reduction_level,
|
||||
ocr_sharpening_strength: settings.ocr_sharpening_strength,
|
||||
ocr_morphological_operations: settings.ocr_morphological_operations,
|
||||
ocr_adaptive_threshold_window_size: settings.ocr_adaptive_threshold_window_size,
|
||||
ocr_histogram_equalization: settings.ocr_histogram_equalization,
|
||||
ocr_upscale_factor: settings.ocr_upscale_factor,
|
||||
ocr_max_image_width: settings.ocr_max_image_width,
|
||||
ocr_max_image_height: settings.ocr_max_image_height,
|
||||
save_processed_images: settings.save_processed_images,
|
||||
ocr_quality_threshold_brightness: settings.ocr_quality_threshold_brightness,
|
||||
ocr_quality_threshold_contrast: settings.ocr_quality_threshold_contrast,
|
||||
ocr_quality_threshold_noise: settings.ocr_quality_threshold_noise,
|
||||
ocr_quality_threshold_sharpness: settings.ocr_quality_threshold_sharpness,
|
||||
ocr_skip_enhancement: settings.ocr_skip_enhancement,
|
||||
webdav_enabled: settings.webdav_enabled,
|
||||
webdav_server_url: settings.webdav_server_url,
|
||||
webdav_username: settings.webdav_username,
|
||||
@@ -462,6 +526,22 @@ impl Default for Settings {
|
||||
ocr_detect_orientation: true, // Enable orientation detection
|
||||
ocr_whitelist_chars: None, // No character whitelist by default
|
||||
ocr_blacklist_chars: None, // No character blacklist by default
|
||||
ocr_brightness_boost: 1.0, // Conservative brightness boost
|
||||
ocr_contrast_multiplier: 1.2, // Conservative contrast enhancement
|
||||
ocr_noise_reduction_level: 1, // Light noise reduction
|
||||
ocr_sharpening_strength: 0.5, // Light sharpening
|
||||
ocr_morphological_operations: false, // Conservative - no morphological ops by default
|
||||
ocr_adaptive_threshold_window_size: 15, // Small window for adaptive threshold
|
||||
ocr_histogram_equalization: false, // Conservative - no histogram equalization by default
|
||||
ocr_upscale_factor: 1.0, // No upscaling by default
|
||||
ocr_max_image_width: 3000, // Reasonable max width
|
||||
ocr_max_image_height: 3000, // Reasonable max height
|
||||
save_processed_images: false, // Conservative - don't save by default
|
||||
ocr_quality_threshold_brightness: 0.3, // Conservative threshold
|
||||
ocr_quality_threshold_contrast: 0.2, // Conservative threshold
|
||||
ocr_quality_threshold_noise: 0.7, // Conservative threshold
|
||||
ocr_quality_threshold_sharpness: 0.3, // Conservative threshold
|
||||
ocr_skip_enhancement: false, // Allow enhancement by default
|
||||
webdav_enabled: false,
|
||||
webdav_server_url: None,
|
||||
webdav_username: None,
|
||||
@@ -804,4 +884,32 @@ pub struct S3SourceConfig {
|
||||
pub file_extensions: Vec<String>,
|
||||
pub auto_sync: bool,
|
||||
pub sync_interval_minutes: i32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, FromRow, ToSchema)]
|
||||
pub struct ProcessedImage {
|
||||
pub id: Uuid,
|
||||
pub document_id: Uuid,
|
||||
pub user_id: Uuid,
|
||||
pub original_image_path: String,
|
||||
pub processed_image_path: String,
|
||||
pub processing_parameters: serde_json::Value,
|
||||
pub processing_steps: Vec<String>,
|
||||
pub image_width: i32,
|
||||
pub image_height: i32,
|
||||
pub file_size: i64,
|
||||
pub created_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, ToSchema)]
|
||||
pub struct CreateProcessedImage {
|
||||
pub document_id: Uuid,
|
||||
pub user_id: Uuid,
|
||||
pub original_image_path: String,
|
||||
pub processed_image_path: String,
|
||||
pub processing_parameters: serde_json::Value,
|
||||
pub processing_steps: Vec<String>,
|
||||
pub image_width: i32,
|
||||
pub image_height: i32,
|
||||
pub file_size: i64,
|
||||
}
|
||||
@@ -32,6 +32,7 @@ pub fn router() -> Router<Arc<AppState>> {
|
||||
.route("/{id}/view", get(view_document))
|
||||
.route("/{id}/thumbnail", get(get_document_thumbnail))
|
||||
.route("/{id}/ocr", get(get_document_ocr))
|
||||
.route("/{id}/processed-image", get(get_processed_image))
|
||||
}
|
||||
|
||||
#[utoipa::path(
|
||||
@@ -389,4 +390,61 @@ async fn get_document_ocr(
|
||||
"ocr_error": document.ocr_error,
|
||||
"ocr_completed_at": document.ocr_completed_at
|
||||
})))
|
||||
}
|
||||
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "/api/documents/{id}/processed-image",
|
||||
tag = "documents",
|
||||
security(
|
||||
("bearer_auth" = [])
|
||||
),
|
||||
params(
|
||||
("id" = uuid::Uuid, Path, description = "Document ID")
|
||||
),
|
||||
responses(
|
||||
(status = 200, description = "Processed image file", content_type = "image/png"),
|
||||
(status = 404, description = "Document or processed image not found"),
|
||||
(status = 401, description = "Unauthorized")
|
||||
)
|
||||
)]
|
||||
async fn get_processed_image(
|
||||
State(state): State<Arc<AppState>>,
|
||||
auth_user: AuthUser,
|
||||
Path(document_id): Path<uuid::Uuid>,
|
||||
) -> Result<Response, StatusCode> {
|
||||
// Check if document exists and belongs to user
|
||||
let documents = state
|
||||
.db
|
||||
.get_documents_by_user_with_role(auth_user.user.id, auth_user.user.role, 1000, 0)
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
let _document = documents
|
||||
.into_iter()
|
||||
.find(|doc| doc.id == document_id)
|
||||
.ok_or(StatusCode::NOT_FOUND)?;
|
||||
|
||||
// Get processed image record
|
||||
let processed_image = state
|
||||
.db
|
||||
.get_processed_image_by_document_id(document_id, auth_user.user.id)
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
||||
.ok_or(StatusCode::NOT_FOUND)?;
|
||||
|
||||
// Read processed image file
|
||||
let image_data = tokio::fs::read(&processed_image.processed_image_path)
|
||||
.await
|
||||
.map_err(|_| StatusCode::NOT_FOUND)?;
|
||||
|
||||
// Return image as PNG
|
||||
let response = Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
.header(CONTENT_TYPE, "image/png")
|
||||
.header("Cache-Control", "public, max-age=86400") // Cache for 1 day
|
||||
.body(image_data.into())
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
@@ -70,6 +70,22 @@ async fn get_settings(
|
||||
ocr_detect_orientation: default.ocr_detect_orientation,
|
||||
ocr_whitelist_chars: default.ocr_whitelist_chars,
|
||||
ocr_blacklist_chars: default.ocr_blacklist_chars,
|
||||
ocr_brightness_boost: default.ocr_brightness_boost,
|
||||
ocr_contrast_multiplier: default.ocr_contrast_multiplier,
|
||||
ocr_noise_reduction_level: default.ocr_noise_reduction_level,
|
||||
ocr_sharpening_strength: default.ocr_sharpening_strength,
|
||||
ocr_morphological_operations: default.ocr_morphological_operations,
|
||||
ocr_adaptive_threshold_window_size: default.ocr_adaptive_threshold_window_size,
|
||||
ocr_histogram_equalization: default.ocr_histogram_equalization,
|
||||
ocr_upscale_factor: default.ocr_upscale_factor,
|
||||
ocr_max_image_width: default.ocr_max_image_width,
|
||||
ocr_max_image_height: default.ocr_max_image_height,
|
||||
save_processed_images: default.save_processed_images,
|
||||
ocr_quality_threshold_brightness: default.ocr_quality_threshold_brightness,
|
||||
ocr_quality_threshold_contrast: default.ocr_quality_threshold_contrast,
|
||||
ocr_quality_threshold_noise: default.ocr_quality_threshold_noise,
|
||||
ocr_quality_threshold_sharpness: default.ocr_quality_threshold_sharpness,
|
||||
ocr_skip_enhancement: default.ocr_skip_enhancement,
|
||||
webdav_enabled: default.webdav_enabled,
|
||||
webdav_server_url: default.webdav_server_url,
|
||||
webdav_username: default.webdav_username,
|
||||
|
||||
@@ -20,6 +20,7 @@ pub fn router() -> Router<Arc<AppState>> {
|
||||
.route("/", get(list_sources).post(create_source))
|
||||
.route("/{id}", get(get_source).put(update_source).delete(delete_source))
|
||||
.route("/{id}/sync", post(trigger_sync))
|
||||
.route("/{id}/sync/stop", post(stop_sync))
|
||||
.route("/{id}/test", post(test_connection))
|
||||
.route("/{id}/estimate", post(estimate_crawl))
|
||||
.route("/estimate", post(estimate_crawl_with_config))
|
||||
@@ -164,6 +165,8 @@ async fn update_source(
|
||||
State(state): State<Arc<AppState>>,
|
||||
Json(update_data): Json<UpdateSource>,
|
||||
) -> Result<Json<SourceResponse>, StatusCode> {
|
||||
use tracing::info;
|
||||
info!("Updating source {} with data: {:?}", source_id, update_data);
|
||||
// Check if source exists
|
||||
let existing = state
|
||||
.db
|
||||
@@ -174,7 +177,9 @@ async fn update_source(
|
||||
|
||||
// Validate config if provided
|
||||
if let Some(config) = &update_data.config {
|
||||
if let Err(_) = validate_config_for_type(&existing.source_type, config) {
|
||||
if let Err(validation_error) = validate_config_for_type(&existing.source_type, config) {
|
||||
error!("Config validation failed for source {}: {}", source_id, validation_error);
|
||||
error!("Invalid config received: {:?}", config);
|
||||
return Err(StatusCode::BAD_REQUEST);
|
||||
}
|
||||
}
|
||||
@@ -183,8 +188,12 @@ async fn update_source(
|
||||
.db
|
||||
.update_source(auth_user.user.id, source_id, &update_data)
|
||||
.await
|
||||
.map_err(|_| StatusCode::BAD_REQUEST)?;
|
||||
.map_err(|e| {
|
||||
error!("Failed to update source {} in database: {}", source_id, e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
info!("Successfully updated source {}: {}", source_id, source.name);
|
||||
Ok(Json(source.into()))
|
||||
}
|
||||
|
||||
@@ -315,6 +324,62 @@ async fn trigger_sync(
|
||||
Ok(StatusCode::OK)
|
||||
}
|
||||
|
||||
#[utoipa::path(
|
||||
post,
|
||||
path = "/api/sources/{id}/sync/stop",
|
||||
tag = "sources",
|
||||
security(
|
||||
("bearer_auth" = [])
|
||||
),
|
||||
params(
|
||||
("id" = Uuid, Path, description = "Source ID")
|
||||
),
|
||||
responses(
|
||||
(status = 200, description = "Sync stopped successfully"),
|
||||
(status = 404, description = "Source not found"),
|
||||
(status = 409, description = "Source is not currently syncing"),
|
||||
(status = 401, description = "Unauthorized")
|
||||
)
|
||||
)]
|
||||
async fn stop_sync(
|
||||
auth_user: AuthUser,
|
||||
Path(source_id): Path<Uuid>,
|
||||
State(state): State<Arc<AppState>>,
|
||||
) -> Result<StatusCode, StatusCode> {
|
||||
let source = state
|
||||
.db
|
||||
.get_source(auth_user.user.id, source_id)
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
||||
.ok_or(StatusCode::NOT_FOUND)?;
|
||||
|
||||
// Check if currently syncing
|
||||
if !matches!(source.status, crate::models::SourceStatus::Syncing) {
|
||||
return Err(StatusCode::CONFLICT);
|
||||
}
|
||||
|
||||
// Stop sync using the universal source scheduler
|
||||
if let Some(scheduler) = &state.source_scheduler {
|
||||
if let Err(e) = scheduler.stop_sync(source_id).await {
|
||||
error!("Failed to stop sync for source {}: {}", source_id, e);
|
||||
return Err(StatusCode::INTERNAL_SERVER_ERROR);
|
||||
}
|
||||
} else {
|
||||
// Update status directly if no scheduler available (fallback)
|
||||
state
|
||||
.db
|
||||
.update_source_status(
|
||||
source_id,
|
||||
crate::models::SourceStatus::Idle,
|
||||
Some("Sync cancelled by user".to_string()),
|
||||
)
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
}
|
||||
|
||||
Ok(StatusCode::OK)
|
||||
}
|
||||
|
||||
#[utoipa::path(
|
||||
post,
|
||||
path = "/api/sources/{id}/test",
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use std::collections::HashMap;
|
||||
use tokio::time::interval;
|
||||
use tokio::sync::{Mutex, RwLock};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{error, info, warn};
|
||||
use chrono::Utc;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{
|
||||
AppState,
|
||||
@@ -14,6 +18,8 @@ pub struct SourceScheduler {
|
||||
state: Arc<AppState>,
|
||||
sync_service: SourceSyncService,
|
||||
check_interval: Duration,
|
||||
// Track running sync tasks and their cancellation tokens
|
||||
running_syncs: Arc<RwLock<HashMap<Uuid, CancellationToken>>>,
|
||||
}
|
||||
|
||||
impl SourceScheduler {
|
||||
@@ -24,6 +30,7 @@ impl SourceScheduler {
|
||||
state,
|
||||
sync_service,
|
||||
check_interval: Duration::from_secs(60), // Check every minute for due syncs
|
||||
running_syncs: Arc::new(RwLock::new(HashMap::new())),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -146,13 +153,24 @@ impl SourceScheduler {
|
||||
let sync_service = self.sync_service.clone();
|
||||
let source_clone = source.clone();
|
||||
let state_clone = self.state.clone();
|
||||
let running_syncs_clone = self.running_syncs.clone();
|
||||
|
||||
// Create cancellation token for this sync
|
||||
let cancellation_token = CancellationToken::new();
|
||||
|
||||
// Register the sync task
|
||||
{
|
||||
let mut running_syncs = running_syncs_clone.write().await;
|
||||
running_syncs.insert(source.id, cancellation_token.clone());
|
||||
}
|
||||
|
||||
// Start sync in background task
|
||||
tokio::spawn(async move {
|
||||
let sync_handle = tokio::spawn(async move {
|
||||
// Get user's OCR setting - simplified, you might want to store this in source config
|
||||
let enable_background_ocr = true; // Default to true, could be made configurable per source
|
||||
|
||||
match sync_service.sync_source(&source_clone, enable_background_ocr).await {
|
||||
// Pass cancellation token to sync service
|
||||
match sync_service.sync_source_with_cancellation(&source_clone, enable_background_ocr, cancellation_token.clone()).await {
|
||||
Ok(files_processed) => {
|
||||
info!("Background sync completed for source {}: {} files processed",
|
||||
source_clone.name, files_processed);
|
||||
@@ -213,6 +231,12 @@ impl SourceScheduler {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup: Remove the sync from running list
|
||||
{
|
||||
let mut running_syncs = running_syncs_clone.write().await;
|
||||
running_syncs.remove(&source_clone.id);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -281,11 +305,21 @@ impl SourceScheduler {
|
||||
if let Some(source) = self.state.db.get_source_by_id(source_id).await? {
|
||||
let sync_service = self.sync_service.clone();
|
||||
let state_clone = self.state.clone();
|
||||
let running_syncs_clone = self.running_syncs.clone();
|
||||
|
||||
// Create cancellation token for this sync
|
||||
let cancellation_token = CancellationToken::new();
|
||||
|
||||
// Register the sync task
|
||||
{
|
||||
let mut running_syncs = running_syncs_clone.write().await;
|
||||
running_syncs.insert(source_id, cancellation_token.clone());
|
||||
}
|
||||
|
||||
tokio::spawn(async move {
|
||||
let enable_background_ocr = true; // Could be made configurable
|
||||
|
||||
match sync_service.sync_source(&source, enable_background_ocr).await {
|
||||
match sync_service.sync_source_with_cancellation(&source, enable_background_ocr, cancellation_token).await {
|
||||
Ok(files_processed) => {
|
||||
info!("Manual sync completed for source {}: {} files processed",
|
||||
source.name, files_processed);
|
||||
@@ -309,6 +343,12 @@ impl SourceScheduler {
|
||||
error!("Manual sync failed for source {}: {}", source.name, e);
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup: Remove the sync from running list
|
||||
{
|
||||
let mut running_syncs = running_syncs_clone.write().await;
|
||||
running_syncs.remove(&source.id);
|
||||
}
|
||||
});
|
||||
|
||||
Ok(())
|
||||
@@ -316,4 +356,40 @@ impl SourceScheduler {
|
||||
Err("Source not found".into())
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn stop_sync(&self, source_id: Uuid) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||
info!("Stopping sync for source {}", source_id);
|
||||
|
||||
// Get the cancellation token for this sync
|
||||
let cancellation_token = {
|
||||
let running_syncs = self.running_syncs.read().await;
|
||||
running_syncs.get(&source_id).cloned()
|
||||
};
|
||||
|
||||
if let Some(token) = cancellation_token {
|
||||
// Cancel the sync operation
|
||||
token.cancel();
|
||||
info!("Cancellation signal sent for source {}", source_id);
|
||||
|
||||
// Update source status to indicate cancellation
|
||||
if let Err(e) = sqlx::query(
|
||||
r#"UPDATE sources SET status = 'idle', last_error = 'Sync cancelled by user', last_error_at = NOW(), updated_at = NOW() WHERE id = $1"#
|
||||
)
|
||||
.bind(source_id)
|
||||
.execute(self.state.db.get_pool())
|
||||
.await {
|
||||
error!("Failed to update source status after cancellation: {}", e);
|
||||
}
|
||||
|
||||
// Remove from running syncs list
|
||||
{
|
||||
let mut running_syncs = self.running_syncs.write().await;
|
||||
running_syncs.remove(&source_id);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
} else {
|
||||
Err("No running sync found for this source".into())
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3,6 +3,7 @@ use std::path::Path;
|
||||
use anyhow::{anyhow, Result};
|
||||
use chrono::Utc;
|
||||
use tokio::sync::Semaphore;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use futures::stream::{FuturesUnordered, StreamExt};
|
||||
use sha2::{Sha256, Digest};
|
||||
use tracing::{error, info, warn};
|
||||
@@ -29,31 +30,57 @@ impl SourceSyncService {
|
||||
|
||||
/// Perform sync for any source type
|
||||
pub async fn sync_source(&self, source: &Source, enable_background_ocr: bool) -> Result<usize> {
|
||||
// Call the cancellable version with no cancellation token
|
||||
self.sync_source_with_cancellation(source, enable_background_ocr, CancellationToken::new()).await
|
||||
}
|
||||
|
||||
/// Perform sync for any source type with cancellation support
|
||||
pub async fn sync_source_with_cancellation(&self, source: &Source, enable_background_ocr: bool, cancellation_token: CancellationToken) -> Result<usize> {
|
||||
info!("Starting sync for source {} ({})", source.name, source.source_type);
|
||||
|
||||
// Check for cancellation before starting
|
||||
if cancellation_token.is_cancelled() {
|
||||
info!("Sync for source {} was cancelled before starting", source.name);
|
||||
return Err(anyhow!("Sync cancelled"));
|
||||
}
|
||||
|
||||
// Update source status to syncing
|
||||
if let Err(e) = self.update_source_status(source.id, SourceStatus::Syncing, None).await {
|
||||
error!("Failed to update source status: {}", e);
|
||||
}
|
||||
|
||||
let sync_result = match source.source_type {
|
||||
SourceType::WebDAV => self.sync_webdav_source(source, enable_background_ocr).await,
|
||||
SourceType::LocalFolder => self.sync_local_folder_source(source, enable_background_ocr).await,
|
||||
SourceType::S3 => self.sync_s3_source(source, enable_background_ocr).await,
|
||||
SourceType::WebDAV => self.sync_webdav_source_with_cancellation(source, enable_background_ocr, cancellation_token.clone()).await,
|
||||
SourceType::LocalFolder => self.sync_local_folder_source_with_cancellation(source, enable_background_ocr, cancellation_token.clone()).await,
|
||||
SourceType::S3 => self.sync_s3_source_with_cancellation(source, enable_background_ocr, cancellation_token.clone()).await,
|
||||
};
|
||||
|
||||
match &sync_result {
|
||||
Ok(files_processed) => {
|
||||
info!("Sync completed for source {}: {} files processed", source.name, files_processed);
|
||||
if let Err(e) = self.update_source_status(source.id, SourceStatus::Idle, None).await {
|
||||
error!("Failed to update source status after successful sync: {}", e);
|
||||
if cancellation_token.is_cancelled() {
|
||||
info!("Sync for source {} was cancelled during execution", source.name);
|
||||
if let Err(e) = self.update_source_status(source.id, SourceStatus::Idle, Some("Sync cancelled by user")).await {
|
||||
error!("Failed to update source status after cancellation: {}", e);
|
||||
}
|
||||
} else {
|
||||
info!("Sync completed for source {}: {} files processed", source.name, files_processed);
|
||||
if let Err(e) = self.update_source_status(source.id, SourceStatus::Idle, None).await {
|
||||
error!("Failed to update source status after successful sync: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Sync failed for source {}: {}", source.name, e);
|
||||
let error_msg = format!("Sync failed: {}", e);
|
||||
if let Err(e) = self.update_source_status(source.id, SourceStatus::Error, Some(&error_msg)).await {
|
||||
error!("Failed to update source status after error: {}", e);
|
||||
if cancellation_token.is_cancelled() {
|
||||
info!("Sync for source {} was cancelled: {}", source.name, e);
|
||||
if let Err(e) = self.update_source_status(source.id, SourceStatus::Idle, Some("Sync cancelled by user")).await {
|
||||
error!("Failed to update source status after cancellation: {}", e);
|
||||
}
|
||||
} else {
|
||||
error!("Sync failed for source {}: {}", source.name, e);
|
||||
let error_msg = format!("Sync failed: {}", e);
|
||||
if let Err(e) = self.update_source_status(source.id, SourceStatus::Error, Some(&error_msg)).await {
|
||||
error!("Failed to update source status after error: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -62,9 +89,16 @@ impl SourceSyncService {
|
||||
}
|
||||
|
||||
async fn sync_webdav_source(&self, source: &Source, enable_background_ocr: bool) -> Result<usize> {
|
||||
self.sync_webdav_source_with_cancellation(source, enable_background_ocr, CancellationToken::new()).await
|
||||
}
|
||||
|
||||
async fn sync_webdav_source_with_cancellation(&self, source: &Source, enable_background_ocr: bool, cancellation_token: CancellationToken) -> Result<usize> {
|
||||
let config: WebDAVSourceConfig = serde_json::from_value(source.config.clone())
|
||||
.map_err(|e| anyhow!("Invalid WebDAV config: {}", e))?;
|
||||
|
||||
info!("WebDAV source sync config: server_url={}, username={}, watch_folders={:?}, file_extensions={:?}, server_type={:?}",
|
||||
config.server_url, config.username, config.watch_folders, config.file_extensions, config.server_type);
|
||||
|
||||
let webdav_config = WebDAVConfig {
|
||||
server_url: config.server_url,
|
||||
username: config.username,
|
||||
@@ -78,36 +112,60 @@ impl SourceSyncService {
|
||||
let webdav_service = WebDAVService::new(webdav_config.clone())
|
||||
.map_err(|e| anyhow!("Failed to create WebDAV service: {}", e))?;
|
||||
|
||||
self.perform_sync_internal(
|
||||
info!("WebDAV service created successfully, starting sync with {} folders", webdav_config.watch_folders.len());
|
||||
|
||||
self.perform_sync_internal_with_cancellation(
|
||||
source.user_id,
|
||||
source.id,
|
||||
&webdav_config.watch_folders,
|
||||
&webdav_config.file_extensions,
|
||||
enable_background_ocr,
|
||||
cancellation_token,
|
||||
|folder_path| {
|
||||
let service = webdav_service.clone();
|
||||
async move { service.discover_files_in_folder(&folder_path).await }
|
||||
async move {
|
||||
info!("WebDAV discover_files_in_folder called for: {}", folder_path);
|
||||
let result = service.discover_files_in_folder(&folder_path).await;
|
||||
match &result {
|
||||
Ok(files) => info!("WebDAV discovered {} files in folder: {}", files.len(), folder_path),
|
||||
Err(e) => error!("WebDAV discovery failed for folder {}: {}", folder_path, e),
|
||||
}
|
||||
result
|
||||
}
|
||||
},
|
||||
|file_path| {
|
||||
let service = webdav_service.clone();
|
||||
async move { service.download_file(&file_path).await }
|
||||
async move {
|
||||
info!("WebDAV download_file called for: {}", file_path);
|
||||
let result = service.download_file(&file_path).await;
|
||||
match &result {
|
||||
Ok(data) => info!("WebDAV downloaded {} bytes for file: {}", data.len(), file_path),
|
||||
Err(e) => error!("WebDAV download failed for file {}: {}", file_path, e),
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
).await
|
||||
}
|
||||
|
||||
async fn sync_local_folder_source(&self, source: &Source, enable_background_ocr: bool) -> Result<usize> {
|
||||
self.sync_local_folder_source_with_cancellation(source, enable_background_ocr, CancellationToken::new()).await
|
||||
}
|
||||
|
||||
async fn sync_local_folder_source_with_cancellation(&self, source: &Source, enable_background_ocr: bool, cancellation_token: CancellationToken) -> Result<usize> {
|
||||
let config: LocalFolderSourceConfig = serde_json::from_value(source.config.clone())
|
||||
.map_err(|e| anyhow!("Invalid LocalFolder config: {}", e))?;
|
||||
|
||||
let local_service = LocalFolderService::new(config.clone())
|
||||
.map_err(|e| anyhow!("Failed to create LocalFolder service: {}", e))?;
|
||||
|
||||
self.perform_sync_internal(
|
||||
self.perform_sync_internal_with_cancellation(
|
||||
source.user_id,
|
||||
source.id,
|
||||
&config.watch_folders,
|
||||
&config.file_extensions,
|
||||
enable_background_ocr,
|
||||
cancellation_token,
|
||||
|folder_path| {
|
||||
let service = local_service.clone();
|
||||
async move { service.discover_files_in_folder(&folder_path).await }
|
||||
@@ -120,18 +178,23 @@ impl SourceSyncService {
|
||||
}
|
||||
|
||||
async fn sync_s3_source(&self, source: &Source, enable_background_ocr: bool) -> Result<usize> {
|
||||
self.sync_s3_source_with_cancellation(source, enable_background_ocr, CancellationToken::new()).await
|
||||
}
|
||||
|
||||
async fn sync_s3_source_with_cancellation(&self, source: &Source, enable_background_ocr: bool, cancellation_token: CancellationToken) -> Result<usize> {
|
||||
let config: S3SourceConfig = serde_json::from_value(source.config.clone())
|
||||
.map_err(|e| anyhow!("Invalid S3 config: {}", e))?;
|
||||
|
||||
let s3_service = S3Service::new(config.clone()).await
|
||||
.map_err(|e| anyhow!("Failed to create S3 service: {}", e))?;
|
||||
|
||||
self.perform_sync_internal(
|
||||
self.perform_sync_internal_with_cancellation(
|
||||
source.user_id,
|
||||
source.id,
|
||||
&config.watch_folders,
|
||||
&config.file_extensions,
|
||||
enable_background_ocr,
|
||||
cancellation_token,
|
||||
|folder_path| {
|
||||
let service = s3_service.clone();
|
||||
async move { service.discover_files_in_folder(&folder_path).await }
|
||||
@@ -243,6 +306,132 @@ impl SourceSyncService {
|
||||
Ok(total_files_processed)
|
||||
}
|
||||
|
||||
async fn perform_sync_internal_with_cancellation<F, D, Fut1, Fut2>(
|
||||
&self,
|
||||
user_id: Uuid,
|
||||
_source_id: Uuid,
|
||||
watch_folders: &[String],
|
||||
file_extensions: &[String],
|
||||
enable_background_ocr: bool,
|
||||
cancellation_token: CancellationToken,
|
||||
discover_files: F,
|
||||
download_file: D,
|
||||
) -> Result<usize>
|
||||
where
|
||||
F: Fn(String) -> Fut1,
|
||||
D: Fn(String) -> Fut2 + Clone,
|
||||
Fut1: std::future::Future<Output = Result<Vec<FileInfo>>>,
|
||||
Fut2: std::future::Future<Output = Result<Vec<u8>>>,
|
||||
{
|
||||
let mut total_files_processed = 0;
|
||||
|
||||
for folder_path in watch_folders {
|
||||
// Check for cancellation before processing each folder
|
||||
if cancellation_token.is_cancelled() {
|
||||
info!("Sync cancelled during folder processing");
|
||||
return Err(anyhow!("Sync cancelled"));
|
||||
}
|
||||
|
||||
info!("Syncing folder: {}", folder_path);
|
||||
|
||||
// Discover files in the folder
|
||||
match discover_files(folder_path.clone()).await {
|
||||
Ok(files) => {
|
||||
if cancellation_token.is_cancelled() {
|
||||
info!("Sync cancelled after discovering files");
|
||||
return Err(anyhow!("Sync cancelled"));
|
||||
}
|
||||
|
||||
info!("Found {} files in folder {}", files.len(), folder_path);
|
||||
|
||||
// Filter files for processing
|
||||
let files_to_process: Vec<_> = files.into_iter()
|
||||
.filter(|file_info| {
|
||||
if file_info.is_directory {
|
||||
return false;
|
||||
}
|
||||
|
||||
let file_extension = Path::new(&file_info.name)
|
||||
.extension()
|
||||
.and_then(|ext| ext.to_str())
|
||||
.unwrap_or("")
|
||||
.to_lowercase();
|
||||
|
||||
file_extensions.contains(&file_extension)
|
||||
})
|
||||
.collect();
|
||||
|
||||
info!("Processing {} files from folder {}", files_to_process.len(), folder_path);
|
||||
|
||||
// Process files concurrently with a limit
|
||||
let concurrent_limit = 5;
|
||||
let semaphore = Arc::new(Semaphore::new(concurrent_limit));
|
||||
let mut folder_files_processed = 0;
|
||||
|
||||
let mut file_futures = FuturesUnordered::new();
|
||||
|
||||
for file_info in files_to_process.iter() {
|
||||
// Check for cancellation before processing each file
|
||||
if cancellation_token.is_cancelled() {
|
||||
info!("Sync cancelled during file processing");
|
||||
return Err(anyhow!("Sync cancelled"));
|
||||
}
|
||||
|
||||
let state_clone = self.state.clone();
|
||||
let file_info_clone = file_info.clone();
|
||||
let semaphore_clone = semaphore.clone();
|
||||
let download_file_clone = download_file.clone();
|
||||
let cancellation_token_clone = cancellation_token.clone();
|
||||
|
||||
let future = async move {
|
||||
Self::process_single_file_with_cancellation(
|
||||
state_clone,
|
||||
user_id,
|
||||
_source_id,
|
||||
&file_info_clone,
|
||||
enable_background_ocr,
|
||||
semaphore_clone,
|
||||
download_file_clone,
|
||||
cancellation_token_clone,
|
||||
).await
|
||||
};
|
||||
|
||||
file_futures.push(future);
|
||||
}
|
||||
|
||||
// Process files concurrently
|
||||
while let Some(result) = file_futures.next().await {
|
||||
// Check for cancellation during processing
|
||||
if cancellation_token.is_cancelled() {
|
||||
info!("Sync cancelled during concurrent file processing");
|
||||
return Err(anyhow!("Sync cancelled"));
|
||||
}
|
||||
|
||||
match result {
|
||||
Ok(processed) => {
|
||||
if processed {
|
||||
folder_files_processed += 1;
|
||||
info!("Successfully processed file ({} completed in this folder)", folder_files_processed);
|
||||
}
|
||||
}
|
||||
Err(error) => {
|
||||
error!("File processing error: {}", error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
total_files_processed += folder_files_processed;
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to discover files in folder {}: {}", folder_path, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info!("Source sync completed: {} files processed", total_files_processed);
|
||||
Ok(total_files_processed)
|
||||
}
|
||||
|
||||
async fn process_single_file<D, Fut>(
|
||||
state: Arc<AppState>,
|
||||
user_id: Uuid,
|
||||
@@ -348,6 +537,133 @@ impl SourceSyncService {
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn process_single_file_with_cancellation<D, Fut>(
|
||||
state: Arc<AppState>,
|
||||
user_id: Uuid,
|
||||
_source_id: Uuid,
|
||||
file_info: &FileInfo,
|
||||
enable_background_ocr: bool,
|
||||
semaphore: Arc<Semaphore>,
|
||||
download_file: D,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> Result<bool>
|
||||
where
|
||||
D: Fn(String) -> Fut,
|
||||
Fut: std::future::Future<Output = Result<Vec<u8>>>,
|
||||
{
|
||||
// Check for cancellation before starting file processing
|
||||
if cancellation_token.is_cancelled() {
|
||||
info!("File processing cancelled before starting: {}", file_info.path);
|
||||
return Err(anyhow!("Processing cancelled"));
|
||||
}
|
||||
|
||||
let _permit = semaphore.acquire().await
|
||||
.map_err(|e| anyhow!("Semaphore error: {}", e))?;
|
||||
|
||||
info!("Processing file: {}", file_info.path);
|
||||
|
||||
// Check for cancellation again after acquiring semaphore
|
||||
if cancellation_token.is_cancelled() {
|
||||
info!("File processing cancelled after acquiring semaphore: {}", file_info.path);
|
||||
return Err(anyhow!("Processing cancelled"));
|
||||
}
|
||||
|
||||
// Download the file
|
||||
let file_data = download_file(file_info.path.clone()).await
|
||||
.map_err(|e| anyhow!("Failed to download {}: {}", file_info.path, e))?;
|
||||
|
||||
// Check for cancellation after download
|
||||
if cancellation_token.is_cancelled() {
|
||||
info!("File processing cancelled after download: {}", file_info.path);
|
||||
return Err(anyhow!("Processing cancelled"));
|
||||
}
|
||||
|
||||
info!("Downloaded file: {} ({} bytes)", file_info.name, file_data.len());
|
||||
|
||||
// Calculate file hash for deduplication
|
||||
let file_hash = Self::calculate_file_hash(&file_data);
|
||||
|
||||
// Check for duplicate content
|
||||
if let Ok(existing_docs) = state.db.get_documents_by_user_with_role(
|
||||
user_id,
|
||||
crate::models::UserRole::User,
|
||||
1000,
|
||||
0
|
||||
).await {
|
||||
let matching_docs: Vec<_> = existing_docs.into_iter()
|
||||
.filter(|doc| doc.file_size == file_data.len() as i64)
|
||||
.collect();
|
||||
|
||||
for existing_doc in matching_docs {
|
||||
if let Ok(existing_file_data) = tokio::fs::read(&existing_doc.file_path).await {
|
||||
let existing_hash = Self::calculate_file_hash(&existing_file_data);
|
||||
if file_hash == existing_hash {
|
||||
info!("File content already exists, skipping: {}", file_info.path);
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for cancellation before saving
|
||||
if cancellation_token.is_cancelled() {
|
||||
info!("File processing cancelled before saving: {}", file_info.path);
|
||||
return Err(anyhow!("Processing cancelled"));
|
||||
}
|
||||
|
||||
// Save file to disk
|
||||
let file_service = FileService::new(state.config.upload_path.clone());
|
||||
let saved_file_path = file_service.save_file(&file_info.name, &file_data).await
|
||||
.map_err(|e| anyhow!("Failed to save {}: {}", file_info.name, e))?;
|
||||
|
||||
// Create document record
|
||||
let document = file_service.create_document(
|
||||
&file_info.name,
|
||||
&file_info.name,
|
||||
&saved_file_path,
|
||||
file_data.len() as i64,
|
||||
&file_info.mime_type,
|
||||
user_id,
|
||||
);
|
||||
|
||||
let created_document = state.db.create_document(document).await
|
||||
.map_err(|e| anyhow!("Failed to create document {}: {}", file_info.name, e))?;
|
||||
|
||||
info!("Created document record for {}: {}", file_info.name, created_document.id);
|
||||
|
||||
// Queue for OCR if enabled (OCR continues even if sync is cancelled)
|
||||
if enable_background_ocr {
|
||||
info!("Background OCR enabled, queueing document {} for processing", created_document.id);
|
||||
|
||||
match state.db.pool.acquire().await {
|
||||
Ok(_conn) => {
|
||||
let queue_service = crate::ocr_queue::OcrQueueService::new(
|
||||
state.db.clone(),
|
||||
state.db.pool.clone(),
|
||||
4
|
||||
);
|
||||
|
||||
let priority = if file_info.size <= 1024 * 1024 { 10 }
|
||||
else if file_info.size <= 5 * 1024 * 1024 { 8 }
|
||||
else if file_info.size <= 10 * 1024 * 1024 { 6 }
|
||||
else if file_info.size <= 50 * 1024 * 1024 { 4 }
|
||||
else { 2 };
|
||||
|
||||
if let Err(e) = queue_service.enqueue_document(created_document.id, priority, file_info.size).await {
|
||||
error!("Failed to enqueue document for OCR: {}", e);
|
||||
} else {
|
||||
info!("Enqueued document {} for OCR processing", created_document.id);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to connect to database for OCR queueing: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn update_source_status(&self, source_id: Uuid, status: SourceStatus, error_message: Option<&str>) -> Result<()> {
|
||||
let query = if let Some(error) = error_message {
|
||||
sqlx::query(
|
||||
|
||||
268
tests/source_update_tests.rs
Normal file
268
tests/source_update_tests.rs
Normal file
@@ -0,0 +1,268 @@
|
||||
/*!
|
||||
* Source Update API Tests
|
||||
*
|
||||
* Tests for the PUT /api/sources/{id} endpoint
|
||||
*/
|
||||
|
||||
use serde_json::json;
|
||||
use uuid::Uuid;
|
||||
|
||||
use readur::{
|
||||
models::{UpdateSource, WebDAVSourceConfig, LocalFolderSourceConfig, S3SourceConfig, SourceType},
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test_update_source_payload_serialization() {
|
||||
// Test WebDAV update payload
|
||||
let webdav_update = UpdateSource {
|
||||
name: Some("Updated WebDAV Source".to_string()),
|
||||
enabled: Some(true),
|
||||
config: Some(json!({
|
||||
"server_url": "https://cloud.example.com",
|
||||
"username": "testuser",
|
||||
"password": "testpass",
|
||||
"watch_folders": ["/Documents", "/Pictures"],
|
||||
"file_extensions": [".pdf", ".txt", ".docx"],
|
||||
"auto_sync": true,
|
||||
"sync_interval_minutes": 60,
|
||||
"server_type": "nextcloud"
|
||||
})),
|
||||
};
|
||||
|
||||
// Test serialization
|
||||
let serialized = serde_json::to_string(&webdav_update).unwrap();
|
||||
assert!(!serialized.is_empty());
|
||||
|
||||
// Test deserialization back
|
||||
let deserialized: UpdateSource = serde_json::from_str(&serialized).unwrap();
|
||||
assert_eq!(deserialized.name, webdav_update.name);
|
||||
assert_eq!(deserialized.enabled, webdav_update.enabled);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_webdav_config_validation() {
|
||||
let config_json = json!({
|
||||
"server_url": "https://cloud.example.com",
|
||||
"username": "testuser",
|
||||
"password": "testpass",
|
||||
"watch_folders": ["/Documents"],
|
||||
"file_extensions": [".pdf", ".txt"],
|
||||
"auto_sync": true,
|
||||
"sync_interval_minutes": 60,
|
||||
"server_type": "nextcloud"
|
||||
});
|
||||
|
||||
// This should deserialize successfully
|
||||
let config: Result<WebDAVSourceConfig, _> = serde_json::from_value(config_json);
|
||||
assert!(config.is_ok());
|
||||
|
||||
let webdav_config = config.unwrap();
|
||||
assert_eq!(webdav_config.server_url, "https://cloud.example.com");
|
||||
assert_eq!(webdav_config.username, "testuser");
|
||||
assert_eq!(webdav_config.auto_sync, true);
|
||||
assert_eq!(webdav_config.sync_interval_minutes, 60);
|
||||
assert_eq!(webdav_config.server_type, Some("nextcloud".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_local_folder_config_validation() {
|
||||
let config_json = json!({
|
||||
"watch_folders": ["/home/user/documents"],
|
||||
"file_extensions": [".pdf", ".txt"],
|
||||
"auto_sync": true,
|
||||
"sync_interval_minutes": 30,
|
||||
"recursive": true,
|
||||
"follow_symlinks": false
|
||||
});
|
||||
|
||||
let config: Result<LocalFolderSourceConfig, _> = serde_json::from_value(config_json);
|
||||
assert!(config.is_ok());
|
||||
|
||||
let local_config = config.unwrap();
|
||||
assert_eq!(local_config.watch_folders, vec!["/home/user/documents"]);
|
||||
assert_eq!(local_config.recursive, true);
|
||||
assert_eq!(local_config.follow_symlinks, false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_s3_config_validation() {
|
||||
let config_json = json!({
|
||||
"bucket_name": "my-bucket",
|
||||
"region": "us-east-1",
|
||||
"access_key_id": "AKIAIOSFODNN7EXAMPLE",
|
||||
"secret_access_key": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
|
||||
"endpoint_url": "https://s3.amazonaws.com",
|
||||
"prefix": "documents/",
|
||||
"watch_folders": ["/uploads"],
|
||||
"file_extensions": [".pdf", ".docx"],
|
||||
"auto_sync": true,
|
||||
"sync_interval_minutes": 120
|
||||
});
|
||||
|
||||
let config: Result<S3SourceConfig, _> = serde_json::from_value(config_json);
|
||||
assert!(config.is_ok());
|
||||
|
||||
let s3_config = config.unwrap();
|
||||
assert_eq!(s3_config.bucket_name, "my-bucket");
|
||||
assert_eq!(s3_config.region, "us-east-1");
|
||||
assert_eq!(s3_config.endpoint_url, Some("https://s3.amazonaws.com".to_string()));
|
||||
assert_eq!(s3_config.prefix, Some("documents/".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_webdav_config() {
|
||||
// Missing required fields
|
||||
let invalid_config = json!({
|
||||
"server_url": "https://cloud.example.com",
|
||||
// Missing username and password
|
||||
"watch_folders": ["/Documents"],
|
||||
"file_extensions": [".pdf"],
|
||||
"auto_sync": true,
|
||||
"sync_interval_minutes": 60
|
||||
});
|
||||
|
||||
let config: Result<WebDAVSourceConfig, _> = serde_json::from_value(invalid_config);
|
||||
assert!(config.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_config_validation_for_type() {
|
||||
// This mimics the validation function in routes/sources.rs
|
||||
fn validate_config_for_type(
|
||||
source_type: &SourceType,
|
||||
config: &serde_json::Value,
|
||||
) -> Result<(), &'static str> {
|
||||
match source_type {
|
||||
SourceType::WebDAV => {
|
||||
let _: WebDAVSourceConfig =
|
||||
serde_json::from_value(config.clone()).map_err(|_| "Invalid WebDAV configuration")?;
|
||||
Ok(())
|
||||
}
|
||||
SourceType::LocalFolder => {
|
||||
let _: LocalFolderSourceConfig =
|
||||
serde_json::from_value(config.clone()).map_err(|_| "Invalid Local Folder configuration")?;
|
||||
Ok(())
|
||||
}
|
||||
SourceType::S3 => {
|
||||
let _: S3SourceConfig =
|
||||
serde_json::from_value(config.clone()).map_err(|_| "Invalid S3 configuration")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test valid WebDAV config
|
||||
let webdav_config = json!({
|
||||
"server_url": "https://cloud.example.com",
|
||||
"username": "testuser",
|
||||
"password": "testpass",
|
||||
"watch_folders": ["/Documents"],
|
||||
"file_extensions": [".pdf"],
|
||||
"auto_sync": true,
|
||||
"sync_interval_minutes": 60,
|
||||
"server_type": "nextcloud"
|
||||
});
|
||||
|
||||
assert!(validate_config_for_type(&SourceType::WebDAV, &webdav_config).is_ok());
|
||||
|
||||
// Test invalid config for WebDAV (missing password)
|
||||
let invalid_webdav_config = json!({
|
||||
"server_url": "https://cloud.example.com",
|
||||
"username": "testuser",
|
||||
// missing password
|
||||
"watch_folders": ["/Documents"],
|
||||
"file_extensions": [".pdf"],
|
||||
"auto_sync": true,
|
||||
"sync_interval_minutes": 60
|
||||
});
|
||||
|
||||
assert!(validate_config_for_type(&SourceType::WebDAV, &invalid_webdav_config).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_update_source_partial_updates() {
|
||||
// Test updating only name
|
||||
let name_only_update = UpdateSource {
|
||||
name: Some("New Name".to_string()),
|
||||
enabled: None,
|
||||
config: None,
|
||||
};
|
||||
|
||||
let serialized = serde_json::to_string(&name_only_update).unwrap();
|
||||
let deserialized: UpdateSource = serde_json::from_str(&serialized).unwrap();
|
||||
|
||||
assert_eq!(deserialized.name, Some("New Name".to_string()));
|
||||
assert_eq!(deserialized.enabled, None);
|
||||
assert_eq!(deserialized.config, None);
|
||||
|
||||
// Test updating only enabled status
|
||||
let enabled_only_update = UpdateSource {
|
||||
name: None,
|
||||
enabled: Some(false),
|
||||
config: None,
|
||||
};
|
||||
|
||||
let serialized = serde_json::to_string(&enabled_only_update).unwrap();
|
||||
let deserialized: UpdateSource = serde_json::from_str(&serialized).unwrap();
|
||||
|
||||
assert_eq!(deserialized.name, None);
|
||||
assert_eq!(deserialized.enabled, Some(false));
|
||||
assert_eq!(deserialized.config, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_frontend_payload_format() {
|
||||
// This test matches exactly what the frontend sends
|
||||
let frontend_payload = json!({
|
||||
"name": "My WebDAV Source",
|
||||
"enabled": true,
|
||||
"config": {
|
||||
"server_url": "https://cloud.example.com",
|
||||
"username": "testuser",
|
||||
"password": "testpass",
|
||||
"watch_folders": ["/Documents", "/Pictures"],
|
||||
"file_extensions": [".pdf", ".txt", ".docx"],
|
||||
"auto_sync": true,
|
||||
"sync_interval_minutes": 60,
|
||||
"server_type": "nextcloud"
|
||||
}
|
||||
});
|
||||
|
||||
// Test that this can be deserialized into UpdateSource
|
||||
let update: Result<UpdateSource, _> = serde_json::from_value(frontend_payload);
|
||||
assert!(update.is_ok());
|
||||
|
||||
let update_source = update.unwrap();
|
||||
assert_eq!(update_source.name, Some("My WebDAV Source".to_string()));
|
||||
assert_eq!(update_source.enabled, Some(true));
|
||||
assert!(update_source.config.is_some());
|
||||
|
||||
// Test that the config can be validated as WebDAV
|
||||
if let Some(config) = &update_source.config {
|
||||
let webdav_config: Result<WebDAVSourceConfig, _> = serde_json::from_value(config.clone());
|
||||
assert!(webdav_config.is_ok());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_arrays_and_optional_fields() {
|
||||
// Test with empty arrays (should be valid)
|
||||
let config_with_empty_arrays = json!({
|
||||
"server_url": "https://cloud.example.com",
|
||||
"username": "testuser",
|
||||
"password": "testpass",
|
||||
"watch_folders": [], // Empty array
|
||||
"file_extensions": [], // Empty array
|
||||
"auto_sync": false,
|
||||
"sync_interval_minutes": 0,
|
||||
"server_type": null // Null optional field
|
||||
});
|
||||
|
||||
let config: Result<WebDAVSourceConfig, _> = serde_json::from_value(config_with_empty_arrays);
|
||||
assert!(config.is_ok());
|
||||
|
||||
let webdav_config = config.unwrap();
|
||||
assert!(webdav_config.watch_folders.is_empty());
|
||||
assert!(webdav_config.file_extensions.is_empty());
|
||||
assert_eq!(webdav_config.server_type, None);
|
||||
}
|
||||
376
tests/stop_sync_functionality_tests.rs
Normal file
376
tests/stop_sync_functionality_tests.rs
Normal file
@@ -0,0 +1,376 @@
|
||||
/*!
|
||||
* Stop/Cancel Sync Functionality Tests
|
||||
*
|
||||
* Tests for the new stop/cancel sync functionality including:
|
||||
* - API endpoint for stopping sync
|
||||
* - Source scheduler cancellation support
|
||||
* - Cancellation token propagation
|
||||
* - Graceful sync termination
|
||||
* - OCR continuation after sync cancellation
|
||||
*/
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use uuid::Uuid;
|
||||
use chrono::Utc;
|
||||
use serde_json::json;
|
||||
use tokio::time::sleep;
|
||||
|
||||
use readur::{
|
||||
AppState,
|
||||
config::Config,
|
||||
db::Database,
|
||||
models::{Source, SourceType, SourceStatus, WebDAVSourceConfig},
|
||||
source_scheduler::SourceScheduler,
|
||||
};
|
||||
|
||||
/// Create a test app state
|
||||
async fn create_test_app_state() -> Arc<AppState> {
|
||||
let config = Config {
|
||||
database_url: "sqlite::memory:".to_string(),
|
||||
server_address: "127.0.0.1:8080".to_string(),
|
||||
jwt_secret: "test_secret".to_string(),
|
||||
upload_path: "/tmp/test_uploads".to_string(),
|
||||
watch_folder: "/tmp/watch".to_string(),
|
||||
allowed_file_types: vec!["pdf".to_string(), "txt".to_string()],
|
||||
watch_interval_seconds: Some(10),
|
||||
file_stability_check_ms: Some(1000),
|
||||
max_file_age_hours: Some(24),
|
||||
ocr_language: "eng".to_string(),
|
||||
concurrent_ocr_jobs: 4,
|
||||
ocr_timeout_seconds: 300,
|
||||
max_file_size_mb: 100,
|
||||
memory_limit_mb: 512,
|
||||
cpu_priority: "normal".to_string(),
|
||||
};
|
||||
|
||||
let db = Database::new(&config.database_url).await.unwrap();
|
||||
|
||||
Arc::new(AppState {
|
||||
db,
|
||||
config,
|
||||
webdav_scheduler: None,
|
||||
source_scheduler: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a test source for stop sync testing
|
||||
fn create_test_source_for_stop_sync(user_id: Uuid) -> Source {
|
||||
Source {
|
||||
id: Uuid::new_v4(),
|
||||
user_id,
|
||||
name: "Test Source for Stop Sync".to_string(),
|
||||
source_type: SourceType::WebDAV,
|
||||
enabled: true,
|
||||
config: json!({
|
||||
"server_url": "https://cloud.example.com",
|
||||
"username": "testuser",
|
||||
"password": "testpass",
|
||||
"watch_folders": ["/Documents"],
|
||||
"file_extensions": [".pdf", ".txt"],
|
||||
"auto_sync": true,
|
||||
"sync_interval_minutes": 60,
|
||||
"server_type": "nextcloud"
|
||||
}),
|
||||
status: SourceStatus::Idle,
|
||||
last_sync_at: None,
|
||||
last_error: None,
|
||||
last_error_at: None,
|
||||
total_files_synced: 0,
|
||||
total_files_pending: 0,
|
||||
total_size_bytes: 0,
|
||||
created_at: Utc::now(),
|
||||
updated_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_source_scheduler_creation_with_cancellation() {
|
||||
let state = create_test_app_state().await;
|
||||
let scheduler = SourceScheduler::new(state.clone());
|
||||
|
||||
// Test that scheduler is created successfully
|
||||
assert!(true); // If we get here, creation succeeded
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stop_sync_api_endpoint_structure() {
|
||||
// Test that the API endpoint structure is correct
|
||||
|
||||
// Verify that the stop sync endpoint would be:
|
||||
// POST /api/sources/{id}/sync/stop
|
||||
|
||||
// This test ensures the endpoint structure follows REST conventions
|
||||
let base_path = "/api/sources";
|
||||
let source_id = "test-id";
|
||||
let sync_action = "sync";
|
||||
let stop_action = "stop";
|
||||
|
||||
let trigger_endpoint = format!("{}/{}/{}", base_path, source_id, sync_action);
|
||||
let stop_endpoint = format!("{}/{}/{}/{}", base_path, source_id, sync_action, stop_action);
|
||||
|
||||
assert_eq!(trigger_endpoint, "/api/sources/test-id/sync");
|
||||
assert_eq!(stop_endpoint, "/api/sources/test-id/sync/stop");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_source_status_transitions_for_cancellation() {
|
||||
// Test valid status transitions when cancelling sync
|
||||
|
||||
// Initial state: Source is syncing
|
||||
let mut status = SourceStatus::Syncing;
|
||||
|
||||
// After cancellation: Source should be idle
|
||||
status = SourceStatus::Idle;
|
||||
|
||||
assert_eq!(status, SourceStatus::Idle);
|
||||
|
||||
// Test invalid transitions
|
||||
let error_status = SourceStatus::Error;
|
||||
// Error status should not be used for user-initiated cancellation
|
||||
assert_ne!(error_status, SourceStatus::Idle);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cancellation_reasons() {
|
||||
// Test different cancellation scenarios
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
enum CancellationReason {
|
||||
UserRequested,
|
||||
ServerShutdown,
|
||||
NetworkError,
|
||||
Timeout,
|
||||
}
|
||||
|
||||
let user_cancellation = CancellationReason::UserRequested;
|
||||
let server_cancellation = CancellationReason::ServerShutdown;
|
||||
|
||||
// User-requested cancellation should be different from server shutdown
|
||||
assert_ne!(user_cancellation, server_cancellation);
|
||||
|
||||
// Both should result in sync being stopped
|
||||
let should_stop = match user_cancellation {
|
||||
CancellationReason::UserRequested => true,
|
||||
CancellationReason::ServerShutdown => true,
|
||||
CancellationReason::NetworkError => false, // Might retry
|
||||
CancellationReason::Timeout => false, // Might retry
|
||||
};
|
||||
|
||||
assert!(should_stop);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cancellation_token_behavior() {
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
// Test cancellation token creation and usage
|
||||
let token = CancellationToken::new();
|
||||
|
||||
// Initially not cancelled
|
||||
assert!(!token.is_cancelled());
|
||||
|
||||
// After cancellation
|
||||
token.cancel();
|
||||
assert!(token.is_cancelled());
|
||||
|
||||
// Child tokens should also be cancelled
|
||||
let child_token = token.child_token();
|
||||
assert!(child_token.is_cancelled());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_graceful_cancellation_behavior() {
|
||||
// Test that cancellation allows current operations to complete gracefully
|
||||
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use std::sync::atomic::{AtomicU32, Ordering};
|
||||
|
||||
let token = CancellationToken::new();
|
||||
let work_completed = Arc::new(AtomicU32::new(0));
|
||||
let work_completed_clone = work_completed.clone();
|
||||
|
||||
let token_clone = token.clone();
|
||||
|
||||
// Simulate work that checks for cancellation
|
||||
let work_handle = tokio::spawn(async move {
|
||||
for i in 1..=10 {
|
||||
// Check for cancellation before each unit of work
|
||||
if token_clone.is_cancelled() {
|
||||
// Complete current work item gracefully
|
||||
work_completed_clone.store(i - 1, Ordering::Relaxed);
|
||||
break;
|
||||
}
|
||||
|
||||
// Simulate work
|
||||
sleep(Duration::from_millis(10)).await;
|
||||
work_completed_clone.store(i, Ordering::Relaxed);
|
||||
}
|
||||
});
|
||||
|
||||
// Let some work complete
|
||||
sleep(Duration::from_millis(30)).await;
|
||||
|
||||
// Cancel the work
|
||||
token.cancel();
|
||||
|
||||
// Wait for graceful shutdown
|
||||
work_handle.await.unwrap();
|
||||
|
||||
let completed = work_completed.load(Ordering::Relaxed);
|
||||
|
||||
// Should have completed some work but not all
|
||||
assert!(completed > 0, "Some work should have been completed");
|
||||
assert!(completed < 10, "Not all work should have been completed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_messages_for_stop_sync() {
|
||||
// Test appropriate error messages for different stop sync scenarios
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
enum StopSyncError {
|
||||
SourceNotFound,
|
||||
NotCurrentlySyncing,
|
||||
PermissionDenied,
|
||||
InternalError,
|
||||
}
|
||||
|
||||
// Test error mapping
|
||||
let test_cases = vec![
|
||||
(404, StopSyncError::SourceNotFound),
|
||||
(409, StopSyncError::NotCurrentlySyncing),
|
||||
(403, StopSyncError::PermissionDenied),
|
||||
(500, StopSyncError::InternalError),
|
||||
];
|
||||
|
||||
for (status_code, expected_error) in test_cases {
|
||||
let actual_error = match status_code {
|
||||
404 => StopSyncError::SourceNotFound,
|
||||
409 => StopSyncError::NotCurrentlySyncing,
|
||||
403 => StopSyncError::PermissionDenied,
|
||||
_ => StopSyncError::InternalError,
|
||||
};
|
||||
|
||||
assert_eq!(actual_error, expected_error);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ocr_continuation_after_sync_cancellation() {
|
||||
// Test that OCR continues processing even after sync is cancelled
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
enum ProcessingStatus {
|
||||
SyncActive,
|
||||
SyncCancelled,
|
||||
OcrContinuing,
|
||||
OcrCompleted,
|
||||
}
|
||||
|
||||
let mut status = ProcessingStatus::SyncActive;
|
||||
|
||||
// Sync is cancelled
|
||||
status = ProcessingStatus::SyncCancelled;
|
||||
assert_eq!(status, ProcessingStatus::SyncCancelled);
|
||||
|
||||
// OCR should continue
|
||||
status = ProcessingStatus::OcrContinuing;
|
||||
assert_eq!(status, ProcessingStatus::OcrContinuing);
|
||||
|
||||
// OCR can complete independently
|
||||
status = ProcessingStatus::OcrCompleted;
|
||||
assert_eq!(status, ProcessingStatus::OcrCompleted);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_frontend_button_states() {
|
||||
// Test that frontend button states are correct
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
enum ButtonState {
|
||||
ShowStart,
|
||||
ShowStop,
|
||||
ShowLoading,
|
||||
Disabled,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
enum SourceStatus {
|
||||
Idle,
|
||||
Syncing,
|
||||
Error,
|
||||
}
|
||||
|
||||
let get_button_state = |status: &SourceStatus, enabled: bool| -> ButtonState {
|
||||
if !enabled {
|
||||
return ButtonState::Disabled;
|
||||
}
|
||||
|
||||
match status {
|
||||
SourceStatus::Idle => ButtonState::ShowStart,
|
||||
SourceStatus::Syncing => ButtonState::ShowStop,
|
||||
SourceStatus::Error => ButtonState::ShowStart,
|
||||
}
|
||||
};
|
||||
|
||||
// Test different scenarios
|
||||
assert_eq!(get_button_state(&SourceStatus::Idle, true), ButtonState::ShowStart);
|
||||
assert_eq!(get_button_state(&SourceStatus::Syncing, true), ButtonState::ShowStop);
|
||||
assert_eq!(get_button_state(&SourceStatus::Error, true), ButtonState::ShowStart);
|
||||
assert_eq!(get_button_state(&SourceStatus::Idle, false), ButtonState::Disabled);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_stop_sync_scheduler_method() {
|
||||
let state = create_test_app_state().await;
|
||||
let scheduler = SourceScheduler::new(state.clone());
|
||||
|
||||
// Test stopping a non-existent sync
|
||||
let non_existent_id = Uuid::new_v4();
|
||||
let result = scheduler.stop_sync(non_existent_id).await;
|
||||
|
||||
// Should return error for non-existent sync
|
||||
assert!(result.is_err());
|
||||
assert!(result.unwrap_err().to_string().contains("No running sync found"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cancellation_cleanup() {
|
||||
// Test that cancellation properly cleans up resources
|
||||
|
||||
use std::collections::HashMap;
|
||||
use uuid::Uuid;
|
||||
|
||||
let mut running_syncs: HashMap<Uuid, bool> = HashMap::new();
|
||||
let source_id = Uuid::new_v4();
|
||||
|
||||
// Start sync
|
||||
running_syncs.insert(source_id, true);
|
||||
assert!(running_syncs.contains_key(&source_id));
|
||||
|
||||
// Cancel and cleanup
|
||||
running_syncs.remove(&source_id);
|
||||
assert!(!running_syncs.contains_key(&source_id));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_performance_impact_of_cancellation_checks() {
|
||||
// Test that cancellation checks don't significantly impact performance
|
||||
|
||||
use std::time::Instant;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
let token = CancellationToken::new();
|
||||
let start = Instant::now();
|
||||
|
||||
// Simulate many cancellation checks
|
||||
for _ in 0..10000 {
|
||||
let _is_cancelled = token.is_cancelled();
|
||||
}
|
||||
|
||||
let duration = start.elapsed();
|
||||
|
||||
// Should complete quickly (less than 1ms for 10k checks)
|
||||
assert!(duration.as_millis() < 10, "Cancellation checks should be fast");
|
||||
}
|
||||
Reference in New Issue
Block a user