fix(server): resolve compilation warnings and fix test that expects no pass, to have it actually expect pass

This commit is contained in:
perf3ct
2025-09-02 22:51:17 +00:00
parent 1b7fbed90d
commit 43b679f59b
7 changed files with 90 additions and 28 deletions

2
docs/v2.6.0 Normal file
View File

@@ -0,0 +1,2 @@
> [!WARNING]
> The external dependencies `catdoc` and `antiword` have been added to support consumption of `.doc` documents.

View File

@@ -195,7 +195,7 @@ impl Database {
("ocr_text", document.ocr_text.as_deref().unwrap_or(""))
];
for (source, text) in texts {
for (_source, text) in texts {
if text.is_empty() {
continue;
}

View File

@@ -6,7 +6,7 @@ use std::collections::HashMap;
use super::Database;
use crate::models::{
CreateSourceScanFailure, SourceScanFailure, SourceScanFailureStats,
ErrorSourceType, SourceErrorType, SourceErrorSeverity, ListFailuresQuery,
ErrorSourceType, ListFailuresQuery,
};
impl Database {
@@ -59,22 +59,22 @@ impl Database {
let mut bind_index = 2;
let mut conditions = Vec::new();
if let Some(source_type) = &query.source_type {
if let Some(_source_type) = &query.source_type {
conditions.push(format!("source_type = ${}::source_error_source_type", bind_index));
bind_index += 1;
}
if let Some(source_id) = &query.source_id {
if let Some(_source_id) = &query.source_id {
conditions.push(format!("source_id = ${}", bind_index));
bind_index += 1;
}
if let Some(error_type) = &query.error_type {
if let Some(_error_type) = &query.error_type {
conditions.push(format!("error_type = ${}::source_error_type", bind_index));
bind_index += 1;
}
if let Some(severity) = &query.severity {
if let Some(_severity) = &query.severity {
conditions.push(format!("error_severity = ${}::source_error_severity", bind_index));
bind_index += 1;
}
@@ -104,12 +104,12 @@ impl Database {
sql.push_str(" ORDER BY error_severity DESC, last_failure_at DESC");
if let Some(limit) = query.limit {
if let Some(_limit) = query.limit {
sql.push_str(&format!(" LIMIT ${}", bind_index));
bind_index += 1;
}
if let Some(offset) = query.offset {
if let Some(_offset) = query.offset {
sql.push_str(&format!(" OFFSET ${}", bind_index));
}
@@ -361,7 +361,7 @@ impl Database {
WHERE user_id = $1"#
);
let mut bind_index = 2;
let bind_index = 2;
if let Some(_) = source_type {
sql.push_str(&format!(" AND source_type = ${}::source_error_source_type", bind_index));
}

View File

@@ -1,5 +1,4 @@
use anyhow::Result;
use std::collections::HashMap;
// Unused imports removed - anyhow::Result and std::collections::HashMap are not used in this file
use crate::models::{
ErrorSourceType, SourceErrorType, SourceErrorSeverity, SourceErrorClassifier,

View File

@@ -1,5 +1,4 @@
use anyhow::Result;
use std::collections::HashMap;
// Unused imports removed - anyhow::Result and std::collections::HashMap are not used in this file
use crate::models::{
ErrorSourceType, SourceErrorType, SourceErrorSeverity, SourceErrorClassifier,

View File

@@ -1,11 +1,11 @@
use anyhow::{anyhow, Result};
use reqwest::{Client, Method, Response};
use reqwest::{Client, Method};
use std::sync::Arc;
use std::time::{Duration, Instant};
use std::collections::{HashMap, HashSet};
use std::collections::HashSet;
use tokio::sync::Semaphore;
use tokio::time::sleep;
use futures_util::stream;
// futures_util::stream import removed as unused
use tracing::{debug, error, info, warn};
use serde::{Deserialize, Serialize};
use rand::Rng;
@@ -15,12 +15,11 @@ use crate::models::{
};
use crate::models::source::{
WebDAVConnectionResult, WebDAVCrawlEstimate, WebDAVTestConnection,
WebDAVFolderInfo,
};
use crate::models::source_error::{ErrorSourceType, ErrorContext};
use crate::services::source_error_tracker::SourceErrorTracker;
use crate::webdav_xml_parser::{parse_propfind_response, parse_propfind_response_with_directories};
use crate::mime_detection::{detect_mime_from_content, update_mime_type_with_content, MimeDetectionResult};
use crate::mime_detection::{detect_mime_from_content, MimeDetectionResult};
use super::{config::{WebDAVConfig, RetryConfig, ConcurrencyConfig}, SyncProgress};
use super::common::build_user_agent;

View File

@@ -328,12 +328,13 @@ async fn test_corrupted_docx() {
}
#[tokio::test]
async fn test_legacy_doc_error() {
async fn test_legacy_doc_extraction() {
let temp_dir = TempDir::new().unwrap();
let doc_path = temp_dir.path().join("legacy.doc");
// Create a fake DOC file
fs::write(&doc_path, b"Legacy DOC format").unwrap();
// Create a simple text file with .doc extension to test DOC processing
// catdoc will process this as text, which is expected behavior
fs::write(&doc_path, b"This is test content for DOC extraction").unwrap();
// Create OCR service
let ocr_service = EnhancedOcrService {
@@ -343,19 +344,81 @@ async fn test_legacy_doc_error() {
let settings = Settings::default();
// Try to extract text from legacy DOC
// Try to extract text from DOC file
let result = ocr_service.extract_text_from_office(
doc_path.to_str().unwrap(),
"application/msword",
&settings
).await;
// Should fail with helpful error about external tools not available
assert!(result.is_err(), "Legacy DOC should return an error");
let error_msg = result.unwrap_err().to_string();
// The error message now comes from external tool extraction failure
assert!(error_msg.contains("DOC extraction tools") || error_msg.contains("antiword") || error_msg.contains("catdoc"),
"Expected error about DOC extraction tools, got: {}", error_msg);
// DOC processing should succeed when external tools are available
assert!(result.is_ok(), "DOC extraction should succeed when tools are available");
let ocr_result = result.unwrap();
// Verify the extraction results
assert!(ocr_result.word_count > 0, "Should have extracted some words");
assert!(ocr_result.text.contains("test content"), "Should contain the test text");
assert!(ocr_result.confidence > 0.0, "Should have confidence score");
assert!(ocr_result.preprocessing_applied.len() > 0, "Should have preprocessing steps recorded");
// Verify it used an external DOC tool
let preprocessing_info = &ocr_result.preprocessing_applied[0];
assert!(
preprocessing_info.contains("catdoc") ||
preprocessing_info.contains("antiword") ||
preprocessing_info.contains("wvText"),
"Should indicate which DOC tool was used"
);
}
#[tokio::test]
async fn test_legacy_doc_error_when_tools_unavailable() {
// This test documents the expected behavior when DOC extraction tools are not available.
// Since antiword and catdoc are available in the current test environment, this test
// would need to be run in an environment without these tools to actually fail.
// For now, this serves as documentation of the expected error message format.
let temp_dir = TempDir::new().unwrap();
let doc_path = temp_dir.path().join("test.doc");
// Create a test DOC file
fs::write(&doc_path, b"Test DOC content").unwrap();
// Create OCR service
let ocr_service = EnhancedOcrService {
temp_dir: temp_dir.path().to_str().unwrap().to_string(),
file_service: FileService::new(temp_dir.path().to_str().unwrap().to_string()),
};
let settings = Settings::default();
// Try to extract text from DOC file
let result = ocr_service.extract_text_from_office(
doc_path.to_str().unwrap(),
"application/msword",
&settings
).await;
// Since tools are available in this environment, this should succeed
// In an environment without DOC tools, it would fail with a helpful error message like:
// "None of the DOC extraction tools (antiword, catdoc, wvText) are available or working."
match result {
Ok(ocr_result) => {
// Tools are available - verify successful extraction
assert!(ocr_result.word_count > 0, "Should extract text when tools are available");
println!("DOC tools are available, extraction succeeded with {} words", ocr_result.word_count);
}
Err(error) => {
// Tools are not available - verify proper error message
let error_msg = error.to_string();
assert!(
error_msg.contains("DOC extraction tools") &&
(error_msg.contains("antiword") || error_msg.contains("catdoc") || error_msg.contains("wvText")),
"Should provide helpful error about missing DOC tools, got: {}", error_msg
);
println!("DOC tools not available, got expected error: {}", error_msg);
}
}
}
#[tokio::test]