mirror of
https://github.com/readur/readur.git
synced 2025-12-17 20:35:17 -06:00
fix(server): resolve compilation warnings and fix test that expects no pass, to have it actually expect pass
This commit is contained in:
2
docs/v2.6.0
Normal file
2
docs/v2.6.0
Normal file
@@ -0,0 +1,2 @@
|
||||
> [!WARNING]
|
||||
> The external dependencies `catdoc` and `antiword` have been added to support consumption of `.doc` documents.
|
||||
@@ -195,7 +195,7 @@ impl Database {
|
||||
("ocr_text", document.ocr_text.as_deref().unwrap_or(""))
|
||||
];
|
||||
|
||||
for (source, text) in texts {
|
||||
for (_source, text) in texts {
|
||||
if text.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ use std::collections::HashMap;
|
||||
use super::Database;
|
||||
use crate::models::{
|
||||
CreateSourceScanFailure, SourceScanFailure, SourceScanFailureStats,
|
||||
ErrorSourceType, SourceErrorType, SourceErrorSeverity, ListFailuresQuery,
|
||||
ErrorSourceType, ListFailuresQuery,
|
||||
};
|
||||
|
||||
impl Database {
|
||||
@@ -59,22 +59,22 @@ impl Database {
|
||||
let mut bind_index = 2;
|
||||
let mut conditions = Vec::new();
|
||||
|
||||
if let Some(source_type) = &query.source_type {
|
||||
if let Some(_source_type) = &query.source_type {
|
||||
conditions.push(format!("source_type = ${}::source_error_source_type", bind_index));
|
||||
bind_index += 1;
|
||||
}
|
||||
|
||||
if let Some(source_id) = &query.source_id {
|
||||
if let Some(_source_id) = &query.source_id {
|
||||
conditions.push(format!("source_id = ${}", bind_index));
|
||||
bind_index += 1;
|
||||
}
|
||||
|
||||
if let Some(error_type) = &query.error_type {
|
||||
if let Some(_error_type) = &query.error_type {
|
||||
conditions.push(format!("error_type = ${}::source_error_type", bind_index));
|
||||
bind_index += 1;
|
||||
}
|
||||
|
||||
if let Some(severity) = &query.severity {
|
||||
if let Some(_severity) = &query.severity {
|
||||
conditions.push(format!("error_severity = ${}::source_error_severity", bind_index));
|
||||
bind_index += 1;
|
||||
}
|
||||
@@ -104,12 +104,12 @@ impl Database {
|
||||
|
||||
sql.push_str(" ORDER BY error_severity DESC, last_failure_at DESC");
|
||||
|
||||
if let Some(limit) = query.limit {
|
||||
if let Some(_limit) = query.limit {
|
||||
sql.push_str(&format!(" LIMIT ${}", bind_index));
|
||||
bind_index += 1;
|
||||
}
|
||||
|
||||
if let Some(offset) = query.offset {
|
||||
if let Some(_offset) = query.offset {
|
||||
sql.push_str(&format!(" OFFSET ${}", bind_index));
|
||||
}
|
||||
|
||||
@@ -361,7 +361,7 @@ impl Database {
|
||||
WHERE user_id = $1"#
|
||||
);
|
||||
|
||||
let mut bind_index = 2;
|
||||
let bind_index = 2;
|
||||
if let Some(_) = source_type {
|
||||
sql.push_str(&format!(" AND source_type = ${}::source_error_source_type", bind_index));
|
||||
}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use anyhow::Result;
|
||||
use std::collections::HashMap;
|
||||
// Unused imports removed - anyhow::Result and std::collections::HashMap are not used in this file
|
||||
|
||||
use crate::models::{
|
||||
ErrorSourceType, SourceErrorType, SourceErrorSeverity, SourceErrorClassifier,
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use anyhow::Result;
|
||||
use std::collections::HashMap;
|
||||
// Unused imports removed - anyhow::Result and std::collections::HashMap are not used in this file
|
||||
|
||||
use crate::models::{
|
||||
ErrorSourceType, SourceErrorType, SourceErrorSeverity, SourceErrorClassifier,
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
use anyhow::{anyhow, Result};
|
||||
use reqwest::{Client, Method, Response};
|
||||
use reqwest::{Client, Method};
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::collections::HashSet;
|
||||
use tokio::sync::Semaphore;
|
||||
use tokio::time::sleep;
|
||||
use futures_util::stream;
|
||||
// futures_util::stream import removed as unused
|
||||
use tracing::{debug, error, info, warn};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use rand::Rng;
|
||||
@@ -15,12 +15,11 @@ use crate::models::{
|
||||
};
|
||||
use crate::models::source::{
|
||||
WebDAVConnectionResult, WebDAVCrawlEstimate, WebDAVTestConnection,
|
||||
WebDAVFolderInfo,
|
||||
};
|
||||
use crate::models::source_error::{ErrorSourceType, ErrorContext};
|
||||
use crate::services::source_error_tracker::SourceErrorTracker;
|
||||
use crate::webdav_xml_parser::{parse_propfind_response, parse_propfind_response_with_directories};
|
||||
use crate::mime_detection::{detect_mime_from_content, update_mime_type_with_content, MimeDetectionResult};
|
||||
use crate::mime_detection::{detect_mime_from_content, MimeDetectionResult};
|
||||
|
||||
use super::{config::{WebDAVConfig, RetryConfig, ConcurrencyConfig}, SyncProgress};
|
||||
use super::common::build_user_agent;
|
||||
|
||||
@@ -328,12 +328,13 @@ async fn test_corrupted_docx() {
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_legacy_doc_error() {
|
||||
async fn test_legacy_doc_extraction() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let doc_path = temp_dir.path().join("legacy.doc");
|
||||
|
||||
// Create a fake DOC file
|
||||
fs::write(&doc_path, b"Legacy DOC format").unwrap();
|
||||
// Create a simple text file with .doc extension to test DOC processing
|
||||
// catdoc will process this as text, which is expected behavior
|
||||
fs::write(&doc_path, b"This is test content for DOC extraction").unwrap();
|
||||
|
||||
// Create OCR service
|
||||
let ocr_service = EnhancedOcrService {
|
||||
@@ -343,19 +344,81 @@ async fn test_legacy_doc_error() {
|
||||
|
||||
let settings = Settings::default();
|
||||
|
||||
// Try to extract text from legacy DOC
|
||||
// Try to extract text from DOC file
|
||||
let result = ocr_service.extract_text_from_office(
|
||||
doc_path.to_str().unwrap(),
|
||||
"application/msword",
|
||||
&settings
|
||||
).await;
|
||||
|
||||
// Should fail with helpful error about external tools not available
|
||||
assert!(result.is_err(), "Legacy DOC should return an error");
|
||||
let error_msg = result.unwrap_err().to_string();
|
||||
// The error message now comes from external tool extraction failure
|
||||
assert!(error_msg.contains("DOC extraction tools") || error_msg.contains("antiword") || error_msg.contains("catdoc"),
|
||||
"Expected error about DOC extraction tools, got: {}", error_msg);
|
||||
// DOC processing should succeed when external tools are available
|
||||
assert!(result.is_ok(), "DOC extraction should succeed when tools are available");
|
||||
let ocr_result = result.unwrap();
|
||||
|
||||
// Verify the extraction results
|
||||
assert!(ocr_result.word_count > 0, "Should have extracted some words");
|
||||
assert!(ocr_result.text.contains("test content"), "Should contain the test text");
|
||||
assert!(ocr_result.confidence > 0.0, "Should have confidence score");
|
||||
assert!(ocr_result.preprocessing_applied.len() > 0, "Should have preprocessing steps recorded");
|
||||
|
||||
// Verify it used an external DOC tool
|
||||
let preprocessing_info = &ocr_result.preprocessing_applied[0];
|
||||
assert!(
|
||||
preprocessing_info.contains("catdoc") ||
|
||||
preprocessing_info.contains("antiword") ||
|
||||
preprocessing_info.contains("wvText"),
|
||||
"Should indicate which DOC tool was used"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_legacy_doc_error_when_tools_unavailable() {
|
||||
// This test documents the expected behavior when DOC extraction tools are not available.
|
||||
// Since antiword and catdoc are available in the current test environment, this test
|
||||
// would need to be run in an environment without these tools to actually fail.
|
||||
// For now, this serves as documentation of the expected error message format.
|
||||
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let doc_path = temp_dir.path().join("test.doc");
|
||||
|
||||
// Create a test DOC file
|
||||
fs::write(&doc_path, b"Test DOC content").unwrap();
|
||||
|
||||
// Create OCR service
|
||||
let ocr_service = EnhancedOcrService {
|
||||
temp_dir: temp_dir.path().to_str().unwrap().to_string(),
|
||||
file_service: FileService::new(temp_dir.path().to_str().unwrap().to_string()),
|
||||
};
|
||||
|
||||
let settings = Settings::default();
|
||||
|
||||
// Try to extract text from DOC file
|
||||
let result = ocr_service.extract_text_from_office(
|
||||
doc_path.to_str().unwrap(),
|
||||
"application/msword",
|
||||
&settings
|
||||
).await;
|
||||
|
||||
// Since tools are available in this environment, this should succeed
|
||||
// In an environment without DOC tools, it would fail with a helpful error message like:
|
||||
// "None of the DOC extraction tools (antiword, catdoc, wvText) are available or working."
|
||||
match result {
|
||||
Ok(ocr_result) => {
|
||||
// Tools are available - verify successful extraction
|
||||
assert!(ocr_result.word_count > 0, "Should extract text when tools are available");
|
||||
println!("DOC tools are available, extraction succeeded with {} words", ocr_result.word_count);
|
||||
}
|
||||
Err(error) => {
|
||||
// Tools are not available - verify proper error message
|
||||
let error_msg = error.to_string();
|
||||
assert!(
|
||||
error_msg.contains("DOC extraction tools") &&
|
||||
(error_msg.contains("antiword") || error_msg.contains("catdoc") || error_msg.contains("wvText")),
|
||||
"Should provide helpful error about missing DOC tools, got: {}", error_msg
|
||||
);
|
||||
println!("DOC tools not available, got expected error: {}", error_msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
Reference in New Issue
Block a user