mirror of
https://github.com/readur/readur.git
synced 2025-12-21 06:10:45 -06:00
feat(webdav): create dedicated module for managing the mess that is WebDAV XML responses
This commit is contained in:
@@ -209,6 +209,9 @@ async fn extract_file_info_from_path(path: &Path) -> Result<FileIngestionInfo> {
|
||||
let (permissions, owner, group) = (None, None, None);
|
||||
|
||||
Ok(FileIngestionInfo {
|
||||
relative_path: path.to_string_lossy().to_string(),
|
||||
full_path: path.to_string_lossy().to_string(), // For filesystem, relative and full are the same
|
||||
#[allow(deprecated)]
|
||||
path: path.to_string_lossy().to_string(),
|
||||
name: filename,
|
||||
size: file_size,
|
||||
|
||||
@@ -99,7 +99,7 @@ impl DocumentIngestionService {
|
||||
}
|
||||
|
||||
// Add source path
|
||||
metadata.insert("source_path".to_string(), serde_json::Value::String(file_info.path.clone()));
|
||||
metadata.insert("source_path".to_string(), serde_json::Value::String(file_info.relative_path.clone()));
|
||||
|
||||
// Merge any additional metadata from the source
|
||||
if let Some(ref source_meta) = file_info.metadata {
|
||||
@@ -339,7 +339,7 @@ impl DocumentIngestionService {
|
||||
source_id,
|
||||
original_created_at,
|
||||
original_modified_at,
|
||||
source_path: Some(file_info.path.clone()),
|
||||
source_path: Some(file_info.relative_path.clone()),
|
||||
file_permissions: file_info.permissions.map(|p| p as i32),
|
||||
file_owner: file_info.owner.clone(),
|
||||
file_group: file_info.group.clone(),
|
||||
|
||||
@@ -254,6 +254,12 @@ pub struct CreateIgnoredFile {
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FileIngestionInfo {
|
||||
/// Relative path from WebDAV root (e.g., "/Photos/image.jpg")
|
||||
pub relative_path: String,
|
||||
/// Full WebDAV path as returned by server (e.g., "/remote.php/dav/files/user/Photos/image.jpg")
|
||||
pub full_path: String,
|
||||
/// Legacy field - deprecated, use relative_path instead
|
||||
#[deprecated(note = "Use relative_path instead for new code")]
|
||||
pub path: String,
|
||||
pub name: String,
|
||||
pub size: i64,
|
||||
|
||||
@@ -179,6 +179,9 @@ pub async fn upload_document(
|
||||
use chrono::Utc;
|
||||
|
||||
let mut file_info = FileIngestionInfo {
|
||||
relative_path: format!("upload/{}", filename), // Virtual path for web uploads
|
||||
full_path: format!("upload/{}", filename), // For web uploads, relative and full are the same
|
||||
#[allow(deprecated)]
|
||||
path: format!("upload/{}", filename), // Virtual path for web uploads
|
||||
name: filename.clone(),
|
||||
size: data.len() as i64,
|
||||
|
||||
@@ -534,11 +534,11 @@ impl SourceSyncService {
|
||||
let _permit = semaphore.acquire().await
|
||||
.map_err(|e| anyhow!("Semaphore error: {}", e))?;
|
||||
|
||||
debug!("Processing file: {}", file_info.path);
|
||||
debug!("Processing file: {}", file_info.relative_path);
|
||||
|
||||
// Download the file
|
||||
let file_data = download_file(file_info.path.clone()).await
|
||||
.map_err(|e| anyhow!("Failed to download {}: {}", file_info.path, e))?;
|
||||
let file_data = download_file(file_info.relative_path.clone()).await
|
||||
.map_err(|e| anyhow!("Failed to download {}: {}", file_info.relative_path, e))?;
|
||||
|
||||
debug!("Downloaded file: {} ({} bytes)", file_info.name, file_data.len());
|
||||
|
||||
@@ -613,28 +613,28 @@ impl SourceSyncService {
|
||||
{
|
||||
// Check for cancellation before starting file processing
|
||||
if cancellation_token.is_cancelled() {
|
||||
info!("File processing cancelled before starting: {}", file_info.path);
|
||||
info!("File processing cancelled before starting: {}", file_info.relative_path);
|
||||
return Err(anyhow!("Processing cancelled"));
|
||||
}
|
||||
|
||||
let _permit = semaphore.acquire().await
|
||||
.map_err(|e| anyhow!("Semaphore error: {}", e))?;
|
||||
|
||||
debug!("Processing file: {}", file_info.path);
|
||||
debug!("Processing file: {}", file_info.relative_path);
|
||||
|
||||
// Check for cancellation again after acquiring semaphore
|
||||
if cancellation_token.is_cancelled() {
|
||||
info!("File processing cancelled after acquiring semaphore: {}", file_info.path);
|
||||
info!("File processing cancelled after acquiring semaphore: {}", file_info.relative_path);
|
||||
return Err(anyhow!("Processing cancelled"));
|
||||
}
|
||||
|
||||
// Download the file
|
||||
let file_data = download_file(file_info.path.clone()).await
|
||||
.map_err(|e| anyhow!("Failed to download {}: {}", file_info.path, e))?;
|
||||
let file_data = download_file(file_info.relative_path.clone()).await
|
||||
.map_err(|e| anyhow!("Failed to download {}: {}", file_info.relative_path, e))?;
|
||||
|
||||
// Check for cancellation after download
|
||||
if cancellation_token.is_cancelled() {
|
||||
info!("File processing cancelled after download: {}", file_info.path);
|
||||
info!("File processing cancelled after download: {}", file_info.relative_path);
|
||||
return Err(anyhow!("Processing cancelled"));
|
||||
}
|
||||
|
||||
@@ -642,7 +642,7 @@ impl SourceSyncService {
|
||||
|
||||
// Check for cancellation before processing
|
||||
if cancellation_token.is_cancelled() {
|
||||
info!("File processing cancelled before ingestion: {}", file_info.path);
|
||||
info!("File processing cancelled before ingestion: {}", file_info.relative_path);
|
||||
return Err(anyhow!("Processing cancelled"));
|
||||
}
|
||||
|
||||
|
||||
@@ -417,6 +417,9 @@ async fn extract_file_info_from_path(path: &Path) -> Result<FileIngestionInfo> {
|
||||
let (permissions, owner, group) = (None, None, None);
|
||||
|
||||
Ok(FileIngestionInfo {
|
||||
relative_path: path.to_string_lossy().to_string(),
|
||||
full_path: path.to_string_lossy().to_string(), // For filesystem, relative and full are the same
|
||||
#[allow(deprecated)]
|
||||
path: path.to_string_lossy().to_string(),
|
||||
name: filename,
|
||||
size: file_size,
|
||||
|
||||
@@ -138,6 +138,9 @@ impl LocalFolderService {
|
||||
additional_metadata.insert("readonly".to_string(), serde_json::Value::Bool(metadata.permissions().readonly()));
|
||||
|
||||
let file_info = FileIngestionInfo {
|
||||
relative_path: path.to_string_lossy().to_string(),
|
||||
full_path: path.to_string_lossy().to_string(), // For filesystem, relative and full are the same
|
||||
#[allow(deprecated)]
|
||||
path: path.to_string_lossy().to_string(),
|
||||
name: file_name,
|
||||
size: metadata.len() as i64,
|
||||
|
||||
@@ -175,6 +175,9 @@ impl S3Service {
|
||||
metadata_map.insert("s3_region".to_string(), serde_json::Value::String(self.config.region.clone()));
|
||||
|
||||
let file_info = FileIngestionInfo {
|
||||
relative_path: key.clone(),
|
||||
full_path: format!("s3://{}/{}", self.config.bucket_name, key), // S3 full path includes bucket
|
||||
#[allow(deprecated)]
|
||||
path: key.clone(),
|
||||
name: file_name,
|
||||
size,
|
||||
|
||||
@@ -9,6 +9,7 @@ use crate::models::{FileIngestionInfo, WebDAVCrawlEstimate, WebDAVFolderInfo};
|
||||
use crate::webdav_xml_parser::{parse_propfind_response, parse_propfind_response_with_directories};
|
||||
use super::config::{WebDAVConfig, ConcurrencyConfig};
|
||||
use super::connection::WebDAVConnection;
|
||||
use super::url_management::WebDAVUrlManager;
|
||||
|
||||
/// Results from WebDAV discovery including both files and directories
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -21,6 +22,7 @@ pub struct WebDAVDiscovery {
|
||||
connection: WebDAVConnection,
|
||||
config: WebDAVConfig,
|
||||
concurrency_config: ConcurrencyConfig,
|
||||
url_manager: WebDAVUrlManager,
|
||||
}
|
||||
|
||||
impl WebDAVDiscovery {
|
||||
@@ -29,10 +31,12 @@ impl WebDAVDiscovery {
|
||||
config: WebDAVConfig,
|
||||
concurrency_config: ConcurrencyConfig
|
||||
) -> Self {
|
||||
let url_manager = WebDAVUrlManager::new(config.clone());
|
||||
Self {
|
||||
connection,
|
||||
config,
|
||||
concurrency_config
|
||||
concurrency_config,
|
||||
url_manager
|
||||
}
|
||||
}
|
||||
|
||||
@@ -89,6 +93,9 @@ impl WebDAVDiscovery {
|
||||
let body = response.text().await?;
|
||||
let files = parse_propfind_response(&body)?;
|
||||
|
||||
// Process file paths using the centralized URL manager
|
||||
let files = self.url_manager.process_file_infos(files);
|
||||
|
||||
// Filter files based on supported extensions
|
||||
let filtered_files: Vec<FileIngestionInfo> = files
|
||||
.into_iter()
|
||||
@@ -132,6 +139,9 @@ impl WebDAVDiscovery {
|
||||
let body = response.text().await?;
|
||||
let all_items = parse_propfind_response_with_directories(&body)?;
|
||||
|
||||
// Process file paths using the centralized URL manager
|
||||
let all_items = self.url_manager.process_file_infos(all_items);
|
||||
|
||||
// Separate files and directories
|
||||
let mut files = Vec::new();
|
||||
let mut directories = Vec::new();
|
||||
@@ -271,19 +281,17 @@ impl WebDAVDiscovery {
|
||||
let body = response.text().await?;
|
||||
let all_items = parse_propfind_response_with_directories(&body)?;
|
||||
|
||||
// Process file paths using the centralized URL manager
|
||||
let all_items = self.url_manager.process_file_infos(all_items);
|
||||
|
||||
// Separate files and directories
|
||||
let mut filtered_files = Vec::new();
|
||||
let mut subdirectory_paths = Vec::new();
|
||||
|
||||
for item in all_items {
|
||||
if item.is_directory {
|
||||
// Convert directory path to full path
|
||||
let full_path = if directory_path == "/" {
|
||||
format!("/{}", item.path.trim_start_matches('/'))
|
||||
} else {
|
||||
format!("{}/{}", directory_path.trim_end_matches('/'), item.path.trim_start_matches('/'))
|
||||
};
|
||||
subdirectory_paths.push(full_path);
|
||||
// Use the relative_path which is now properly set by url_manager
|
||||
subdirectory_paths.push(item.relative_path.clone());
|
||||
} else if self.config.is_supported_extension(&item.name) {
|
||||
filtered_files.push(item);
|
||||
}
|
||||
@@ -328,6 +336,9 @@ impl WebDAVDiscovery {
|
||||
let body = response.text().await?;
|
||||
let all_items = parse_propfind_response_with_directories(&body)?;
|
||||
|
||||
// Process file paths using the centralized URL manager
|
||||
let all_items = self.url_manager.process_file_infos(all_items);
|
||||
|
||||
// Separate files and directories
|
||||
let mut filtered_files = Vec::new();
|
||||
let mut directories = Vec::new();
|
||||
@@ -335,20 +346,9 @@ impl WebDAVDiscovery {
|
||||
|
||||
for item in all_items {
|
||||
if item.is_directory {
|
||||
// Fix the directory path to be absolute
|
||||
let full_path = if directory_path == "/" {
|
||||
format!("/{}", item.path.trim_start_matches('/'))
|
||||
} else {
|
||||
format!("{}/{}", directory_path.trim_end_matches('/'), item.path.trim_start_matches('/'))
|
||||
};
|
||||
|
||||
// Create a directory info with the corrected path
|
||||
let mut directory_info = item.clone();
|
||||
directory_info.path = full_path.clone();
|
||||
directories.push(directory_info);
|
||||
|
||||
// Add to paths for further scanning
|
||||
subdirectory_paths.push(full_path);
|
||||
// Use the relative_path which is now properly set by url_manager
|
||||
directories.push(item.clone());
|
||||
subdirectory_paths.push(item.relative_path.clone());
|
||||
} else if self.config.is_supported_extension(&item.name) {
|
||||
filtered_files.push(item);
|
||||
}
|
||||
@@ -476,11 +476,14 @@ impl WebDAVDiscovery {
|
||||
let body = response.text().await?;
|
||||
let all_items = parse_propfind_response_with_directories(&body)?;
|
||||
|
||||
// Process file paths using the centralized URL manager
|
||||
let all_items = self.url_manager.process_file_infos(all_items);
|
||||
|
||||
// Filter out only directories and extract their paths
|
||||
let directory_paths: Vec<String> = all_items
|
||||
.into_iter()
|
||||
.filter(|item| item.is_directory)
|
||||
.map(|item| item.path)
|
||||
.map(|item| item.relative_path)
|
||||
.collect();
|
||||
|
||||
Ok(directory_paths)
|
||||
@@ -522,7 +525,7 @@ impl WebDAVDiscovery {
|
||||
let is_duplicate = if !file.etag.is_empty() {
|
||||
!seen_etags.insert(file.etag.clone())
|
||||
} else {
|
||||
!seen_paths.insert(file.path.clone())
|
||||
!seen_paths.insert(file.relative_path.clone())
|
||||
};
|
||||
|
||||
if !is_duplicate {
|
||||
|
||||
@@ -6,6 +6,7 @@ pub mod discovery;
|
||||
pub mod validation;
|
||||
pub mod service;
|
||||
pub mod smart_sync;
|
||||
pub mod url_management;
|
||||
|
||||
// Re-export main types for convenience
|
||||
pub use config::{WebDAVConfig, RetryConfig, ConcurrencyConfig};
|
||||
@@ -17,6 +18,7 @@ pub use validation::{
|
||||
};
|
||||
pub use service::{WebDAVService, ServerCapabilities, HealthStatus, test_webdav_connection};
|
||||
pub use smart_sync::{SmartSyncService, SmartSyncDecision, SmartSyncStrategy, SmartSyncResult};
|
||||
pub use url_management::WebDAVUrlManager;
|
||||
|
||||
// Test modules
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -193,10 +193,10 @@ impl WebDAVService {
|
||||
pub async fn download_file_info(&self, file_info: &FileIngestionInfo) -> Result<Vec<u8>> {
|
||||
let _permit = self.download_semaphore.acquire().await?;
|
||||
|
||||
debug!("⬇️ Downloading file: {}", file_info.path);
|
||||
debug!("⬇️ Downloading file: {}", file_info.relative_path);
|
||||
|
||||
// Convert full WebDAV paths to relative paths to prevent double path construction
|
||||
let relative_path = self.convert_to_relative_path(&file_info.path);
|
||||
// Use the relative path directly since it's already processed
|
||||
let relative_path = &file_info.relative_path;
|
||||
let url = self.connection.get_url_for_path(&relative_path);
|
||||
|
||||
let response = self.connection
|
||||
@@ -211,13 +211,13 @@ impl WebDAVService {
|
||||
if !response.status().is_success() {
|
||||
return Err(anyhow!(
|
||||
"Failed to download file '{}': HTTP {}",
|
||||
file_info.path,
|
||||
file_info.relative_path,
|
||||
response.status()
|
||||
));
|
||||
}
|
||||
|
||||
let content = response.bytes().await?;
|
||||
debug!("✅ Downloaded {} bytes for file: {}", content.len(), file_info.path);
|
||||
debug!("✅ Downloaded {} bytes for file: {}", content.len(), file_info.relative_path);
|
||||
|
||||
Ok(content.to_vec())
|
||||
}
|
||||
@@ -282,7 +282,7 @@ impl WebDAVService {
|
||||
let files = crate::webdav_xml_parser::parse_propfind_response(&body)?;
|
||||
|
||||
files.into_iter()
|
||||
.find(|f| f.path == file_path)
|
||||
.find(|f| f.relative_path == file_path)
|
||||
.ok_or_else(|| anyhow!("File metadata not found: {}", file_path))
|
||||
}
|
||||
|
||||
|
||||
@@ -81,17 +81,17 @@ impl SmartSyncService {
|
||||
|
||||
// Check if any immediate subdirectories have changed ETags
|
||||
for directory in &root_discovery.directories {
|
||||
match relevant_dirs.get(&directory.path) {
|
||||
match relevant_dirs.get(&directory.relative_path) {
|
||||
Some(known_etag) => {
|
||||
if known_etag != &directory.etag {
|
||||
info!("Directory changed: {} (old: {}, new: {})",
|
||||
directory.path, known_etag, directory.etag);
|
||||
changed_directories.push(directory.path.clone());
|
||||
directory.relative_path, known_etag, directory.etag);
|
||||
changed_directories.push(directory.relative_path.clone());
|
||||
}
|
||||
}
|
||||
None => {
|
||||
info!("New directory discovered: {}", directory.path);
|
||||
new_directories.push(directory.path.clone());
|
||||
info!("New directory discovered: {}", directory.relative_path);
|
||||
new_directories.push(directory.relative_path.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -183,7 +183,7 @@ impl SmartSyncService {
|
||||
for directory_info in &discovery_result.directories {
|
||||
let webdav_directory = CreateWebDAVDirectory {
|
||||
user_id,
|
||||
directory_path: directory_info.path.clone(),
|
||||
directory_path: directory_info.relative_path.clone(),
|
||||
directory_etag: directory_info.etag.clone(),
|
||||
file_count: 0, // Will be updated by stats
|
||||
total_size_bytes: 0, // Will be updated by stats
|
||||
@@ -191,11 +191,11 @@ impl SmartSyncService {
|
||||
|
||||
match self.state.db.create_or_update_webdav_directory(&webdav_directory).await {
|
||||
Ok(_) => {
|
||||
debug!("Saved directory ETag: {} -> {}", directory_info.path, directory_info.etag);
|
||||
debug!("Saved directory ETag: {} -> {}", directory_info.relative_path, directory_info.etag);
|
||||
directories_saved += 1;
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Failed to save directory ETag for {}: {}", directory_info.path, e);
|
||||
warn!("Failed to save directory ETag for {}: {}", directory_info.relative_path, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -232,16 +232,16 @@ impl SmartSyncService {
|
||||
for directory_info in &discovery_result.directories {
|
||||
let webdav_directory = CreateWebDAVDirectory {
|
||||
user_id,
|
||||
directory_path: directory_info.path.clone(),
|
||||
directory_path: directory_info.relative_path.clone(),
|
||||
directory_etag: directory_info.etag.clone(),
|
||||
file_count: 0,
|
||||
total_size_bytes: 0,
|
||||
};
|
||||
|
||||
if let Err(e) = self.state.db.create_or_update_webdav_directory(&webdav_directory).await {
|
||||
warn!("Failed to save directory ETag for {}: {}", directory_info.path, e);
|
||||
warn!("Failed to save directory ETag for {}: {}", directory_info.relative_path, e);
|
||||
} else {
|
||||
debug!("Updated directory ETag: {} -> {}", directory_info.path, directory_info.etag);
|
||||
debug!("Updated directory ETag: {} -> {}", directory_info.relative_path, directory_info.etag);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -26,6 +26,9 @@ fn create_complex_nested_structure() -> Vec<FileIngestionInfo> {
|
||||
vec![
|
||||
// Root directories at different levels
|
||||
FileIngestionInfo {
|
||||
relative_path: "/FullerDocuments".to_string(),
|
||||
full_path: "/FullerDocuments".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/FullerDocuments".to_string(),
|
||||
name: "FullerDocuments".to_string(),
|
||||
size: 0,
|
||||
@@ -40,6 +43,9 @@ fn create_complex_nested_structure() -> Vec<FileIngestionInfo> {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/FullerDocuments/JonDocuments".to_string(),
|
||||
full_path: "/FullerDocuments/JonDocuments".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/FullerDocuments/JonDocuments".to_string(),
|
||||
name: "JonDocuments".to_string(),
|
||||
size: 0,
|
||||
@@ -55,6 +61,9 @@ fn create_complex_nested_structure() -> Vec<FileIngestionInfo> {
|
||||
},
|
||||
// Multiple levels of nesting
|
||||
FileIngestionInfo {
|
||||
relative_path: "/FullerDocuments/JonDocuments/Work".to_string(),
|
||||
full_path: "/FullerDocuments/JonDocuments/Work".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/FullerDocuments/JonDocuments/Work".to_string(),
|
||||
name: "Work".to_string(),
|
||||
size: 0,
|
||||
@@ -69,6 +78,9 @@ fn create_complex_nested_structure() -> Vec<FileIngestionInfo> {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/FullerDocuments/JonDocuments/Personal".to_string(),
|
||||
full_path: "/FullerDocuments/JonDocuments/Personal".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/FullerDocuments/JonDocuments/Personal".to_string(),
|
||||
name: "Personal".to_string(),
|
||||
size: 0,
|
||||
@@ -83,6 +95,9 @@ fn create_complex_nested_structure() -> Vec<FileIngestionInfo> {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/FullerDocuments/JonDocuments/Work/Projects".to_string(),
|
||||
full_path: "/FullerDocuments/JonDocuments/Work/Projects".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/FullerDocuments/JonDocuments/Work/Projects".to_string(),
|
||||
name: "Projects".to_string(),
|
||||
size: 0,
|
||||
@@ -97,6 +112,9 @@ fn create_complex_nested_structure() -> Vec<FileIngestionInfo> {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/FullerDocuments/JonDocuments/Work/Reports".to_string(),
|
||||
full_path: "/FullerDocuments/JonDocuments/Work/Reports".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/FullerDocuments/JonDocuments/Work/Reports".to_string(),
|
||||
name: "Reports".to_string(),
|
||||
size: 0,
|
||||
@@ -111,6 +129,9 @@ fn create_complex_nested_structure() -> Vec<FileIngestionInfo> {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/FullerDocuments/JonDocuments/Work/Projects/WebApp".to_string(),
|
||||
full_path: "/FullerDocuments/JonDocuments/Work/Projects/WebApp".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/FullerDocuments/JonDocuments/Work/Projects/WebApp".to_string(),
|
||||
name: "WebApp".to_string(),
|
||||
size: 0,
|
||||
@@ -126,6 +147,9 @@ fn create_complex_nested_structure() -> Vec<FileIngestionInfo> {
|
||||
},
|
||||
// Files at various nesting levels - this is the key part that was failing
|
||||
FileIngestionInfo {
|
||||
relative_path: "/FullerDocuments/JonDocuments/index.txt".to_string(),
|
||||
full_path: "/FullerDocuments/JonDocuments/index.txt".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/FullerDocuments/JonDocuments/index.txt".to_string(),
|
||||
name: "index.txt".to_string(),
|
||||
size: 1500,
|
||||
@@ -140,6 +164,9 @@ fn create_complex_nested_structure() -> Vec<FileIngestionInfo> {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/FullerDocuments/JonDocuments/Work/schedule.pdf".to_string(),
|
||||
full_path: "/FullerDocuments/JonDocuments/Work/schedule.pdf".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/FullerDocuments/JonDocuments/Work/schedule.pdf".to_string(),
|
||||
name: "schedule.pdf".to_string(),
|
||||
size: 2048000,
|
||||
@@ -154,6 +181,9 @@ fn create_complex_nested_structure() -> Vec<FileIngestionInfo> {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/FullerDocuments/JonDocuments/Work/Projects/proposal.docx".to_string(),
|
||||
full_path: "/FullerDocuments/JonDocuments/Work/Projects/proposal.docx".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/FullerDocuments/JonDocuments/Work/Projects/proposal.docx".to_string(),
|
||||
name: "proposal.docx".to_string(),
|
||||
size: 1024000,
|
||||
@@ -168,6 +198,9 @@ fn create_complex_nested_structure() -> Vec<FileIngestionInfo> {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/FullerDocuments/JonDocuments/Work/Projects/WebApp/design.pdf".to_string(),
|
||||
full_path: "/FullerDocuments/JonDocuments/Work/Projects/WebApp/design.pdf".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/FullerDocuments/JonDocuments/Work/Projects/WebApp/design.pdf".to_string(),
|
||||
name: "design.pdf".to_string(),
|
||||
size: 3072000,
|
||||
@@ -182,6 +215,9 @@ fn create_complex_nested_structure() -> Vec<FileIngestionInfo> {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/FullerDocuments/JonDocuments/Work/Reports/monthly.pdf".to_string(),
|
||||
full_path: "/FullerDocuments/JonDocuments/Work/Reports/monthly.pdf".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/FullerDocuments/JonDocuments/Work/Reports/monthly.pdf".to_string(),
|
||||
name: "monthly.pdf".to_string(),
|
||||
size: 4096000,
|
||||
@@ -196,6 +232,9 @@ fn create_complex_nested_structure() -> Vec<FileIngestionInfo> {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/FullerDocuments/JonDocuments/Personal/diary.txt".to_string(),
|
||||
full_path: "/FullerDocuments/JonDocuments/Personal/diary.txt".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/FullerDocuments/JonDocuments/Personal/diary.txt".to_string(),
|
||||
name: "diary.txt".to_string(),
|
||||
size: 5120,
|
||||
@@ -222,10 +261,10 @@ async fn test_comprehensive_directory_extraction() {
|
||||
for file in &files {
|
||||
if file.is_directory {
|
||||
// Add the directory itself
|
||||
all_directories.insert(file.path.clone());
|
||||
all_directories.insert(file.relative_path.clone());
|
||||
} else {
|
||||
// Extract all parent directories from file paths
|
||||
let mut path_parts: Vec<&str> = file.path.split('/').collect();
|
||||
let mut path_parts: Vec<&str> = file.relative_path.split('/').collect();
|
||||
path_parts.pop(); // Remove the filename
|
||||
|
||||
// Build directory paths from root down to immediate parent
|
||||
@@ -297,7 +336,7 @@ async fn test_first_time_scan_scenario_logic() {
|
||||
|
||||
// Verify that files actually exist in subdirectories
|
||||
let files_in_subdirs: Vec<_> = files.iter()
|
||||
.filter(|f| f.path.starts_with(parent_path) && f.path != parent_path && !f.is_directory)
|
||||
.filter(|f| f.relative_path.starts_with(parent_path) && f.relative_path != parent_path && !f.is_directory)
|
||||
.collect();
|
||||
|
||||
assert!(!files_in_subdirs.is_empty(), "There should be files in subdirectories");
|
||||
@@ -305,14 +344,14 @@ async fn test_first_time_scan_scenario_logic() {
|
||||
|
||||
// Test that we can correctly identify direct children at each level
|
||||
let direct_children_root: Vec<_> = files.iter()
|
||||
.filter(|f| service.is_direct_child(&f.path, "/FullerDocuments/JonDocuments"))
|
||||
.filter(|f| service.is_direct_child(&f.relative_path, "/FullerDocuments/JonDocuments"))
|
||||
.collect();
|
||||
|
||||
// Should include: index.txt, Work/, Personal/
|
||||
assert_eq!(direct_children_root.len(), 3);
|
||||
|
||||
let direct_children_work: Vec<_> = files.iter()
|
||||
.filter(|f| service.is_direct_child(&f.path, "/FullerDocuments/JonDocuments/Work"))
|
||||
.filter(|f| service.is_direct_child(&f.relative_path, "/FullerDocuments/JonDocuments/Work"))
|
||||
.collect();
|
||||
|
||||
// Should include: schedule.pdf, Projects/, Reports/
|
||||
@@ -330,7 +369,7 @@ async fn test_directory_etag_mapping_accuracy() {
|
||||
let mut directory_etags = std::collections::HashMap::new();
|
||||
for file in &files {
|
||||
if file.is_directory {
|
||||
directory_etags.insert(file.path.clone(), file.etag.clone());
|
||||
directory_etags.insert(file.relative_path.clone(), file.etag.clone());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -358,42 +397,42 @@ async fn test_direct_file_counting_precision() {
|
||||
|
||||
// Root level: should have 1 direct file (index.txt)
|
||||
let root_direct_files: Vec<_> = files.iter()
|
||||
.filter(|f| !f.is_directory && service.is_direct_child(&f.path, "/FullerDocuments/JonDocuments"))
|
||||
.filter(|f| !f.is_directory && service.is_direct_child(&f.relative_path, "/FullerDocuments/JonDocuments"))
|
||||
.collect();
|
||||
assert_eq!(root_direct_files.len(), 1);
|
||||
assert_eq!(root_direct_files[0].name, "index.txt");
|
||||
|
||||
// Work level: should have 1 direct file (schedule.pdf)
|
||||
let work_direct_files: Vec<_> = files.iter()
|
||||
.filter(|f| !f.is_directory && service.is_direct_child(&f.path, "/FullerDocuments/JonDocuments/Work"))
|
||||
.filter(|f| !f.is_directory && service.is_direct_child(&f.relative_path, "/FullerDocuments/JonDocuments/Work"))
|
||||
.collect();
|
||||
assert_eq!(work_direct_files.len(), 1);
|
||||
assert_eq!(work_direct_files[0].name, "schedule.pdf");
|
||||
|
||||
// Projects level: should have 1 direct file (proposal.docx)
|
||||
let projects_direct_files: Vec<_> = files.iter()
|
||||
.filter(|f| !f.is_directory && service.is_direct_child(&f.path, "/FullerDocuments/JonDocuments/Work/Projects"))
|
||||
.filter(|f| !f.is_directory && service.is_direct_child(&f.relative_path, "/FullerDocuments/JonDocuments/Work/Projects"))
|
||||
.collect();
|
||||
assert_eq!(projects_direct_files.len(), 1);
|
||||
assert_eq!(projects_direct_files[0].name, "proposal.docx");
|
||||
|
||||
// WebApp level: should have 1 direct file (design.pdf)
|
||||
let webapp_direct_files: Vec<_> = files.iter()
|
||||
.filter(|f| !f.is_directory && service.is_direct_child(&f.path, "/FullerDocuments/JonDocuments/Work/Projects/WebApp"))
|
||||
.filter(|f| !f.is_directory && service.is_direct_child(&f.relative_path, "/FullerDocuments/JonDocuments/Work/Projects/WebApp"))
|
||||
.collect();
|
||||
assert_eq!(webapp_direct_files.len(), 1);
|
||||
assert_eq!(webapp_direct_files[0].name, "design.pdf");
|
||||
|
||||
// Reports level: should have 1 direct file (monthly.pdf)
|
||||
let reports_direct_files: Vec<_> = files.iter()
|
||||
.filter(|f| !f.is_directory && service.is_direct_child(&f.path, "/FullerDocuments/JonDocuments/Work/Reports"))
|
||||
.filter(|f| !f.is_directory && service.is_direct_child(&f.relative_path, "/FullerDocuments/JonDocuments/Work/Reports"))
|
||||
.collect();
|
||||
assert_eq!(reports_direct_files.len(), 1);
|
||||
assert_eq!(reports_direct_files[0].name, "monthly.pdf");
|
||||
|
||||
// Personal level: should have 1 direct file (diary.txt)
|
||||
let personal_direct_files: Vec<_> = files.iter()
|
||||
.filter(|f| !f.is_directory && service.is_direct_child(&f.path, "/FullerDocuments/JonDocuments/Personal"))
|
||||
.filter(|f| !f.is_directory && service.is_direct_child(&f.relative_path, "/FullerDocuments/JonDocuments/Personal"))
|
||||
.collect();
|
||||
assert_eq!(personal_direct_files.len(), 1);
|
||||
assert_eq!(personal_direct_files[0].name, "diary.txt");
|
||||
@@ -409,37 +448,37 @@ async fn test_total_size_calculation_per_directory() {
|
||||
// Test size calculations match expected values
|
||||
|
||||
let root_size: i64 = files.iter()
|
||||
.filter(|f| !f.is_directory && service.is_direct_child(&f.path, "/FullerDocuments/JonDocuments"))
|
||||
.filter(|f| !f.is_directory && service.is_direct_child(&f.relative_path, "/FullerDocuments/JonDocuments"))
|
||||
.map(|f| f.size)
|
||||
.sum();
|
||||
assert_eq!(root_size, 1500); // index.txt
|
||||
|
||||
let work_size: i64 = files.iter()
|
||||
.filter(|f| !f.is_directory && service.is_direct_child(&f.path, "/FullerDocuments/JonDocuments/Work"))
|
||||
.filter(|f| !f.is_directory && service.is_direct_child(&f.relative_path, "/FullerDocuments/JonDocuments/Work"))
|
||||
.map(|f| f.size)
|
||||
.sum();
|
||||
assert_eq!(work_size, 2048000); // schedule.pdf
|
||||
|
||||
let projects_size: i64 = files.iter()
|
||||
.filter(|f| !f.is_directory && service.is_direct_child(&f.path, "/FullerDocuments/JonDocuments/Work/Projects"))
|
||||
.filter(|f| !f.is_directory && service.is_direct_child(&f.relative_path, "/FullerDocuments/JonDocuments/Work/Projects"))
|
||||
.map(|f| f.size)
|
||||
.sum();
|
||||
assert_eq!(projects_size, 1024000); // proposal.docx
|
||||
|
||||
let webapp_size: i64 = files.iter()
|
||||
.filter(|f| !f.is_directory && service.is_direct_child(&f.path, "/FullerDocuments/JonDocuments/Work/Projects/WebApp"))
|
||||
.filter(|f| !f.is_directory && service.is_direct_child(&f.relative_path, "/FullerDocuments/JonDocuments/Work/Projects/WebApp"))
|
||||
.map(|f| f.size)
|
||||
.sum();
|
||||
assert_eq!(webapp_size, 3072000); // design.pdf
|
||||
|
||||
let reports_size: i64 = files.iter()
|
||||
.filter(|f| !f.is_directory && service.is_direct_child(&f.path, "/FullerDocuments/JonDocuments/Work/Reports"))
|
||||
.filter(|f| !f.is_directory && service.is_direct_child(&f.relative_path, "/FullerDocuments/JonDocuments/Work/Reports"))
|
||||
.map(|f| f.size)
|
||||
.sum();
|
||||
assert_eq!(reports_size, 4096000); // monthly.pdf
|
||||
|
||||
let personal_size: i64 = files.iter()
|
||||
.filter(|f| !f.is_directory && service.is_direct_child(&f.path, "/FullerDocuments/JonDocuments/Personal"))
|
||||
.filter(|f| !f.is_directory && service.is_direct_child(&f.relative_path, "/FullerDocuments/JonDocuments/Personal"))
|
||||
.map(|f| f.size)
|
||||
.sum();
|
||||
assert_eq!(personal_size, 5120); // diary.txt
|
||||
@@ -497,7 +536,7 @@ async fn test_bug_scenario_file_count_verification() {
|
||||
// Verify all files would be discovered in a full scan
|
||||
let parent_path = "/FullerDocuments/JonDocuments";
|
||||
let files_under_parent: Vec<_> = files.iter()
|
||||
.filter(|f| f.path.starts_with(parent_path) && !f.is_directory)
|
||||
.filter(|f| f.relative_path.starts_with(parent_path) && !f.is_directory)
|
||||
.collect();
|
||||
|
||||
// All 6 files should be under the parent (all files in our mock are under this path)
|
||||
@@ -508,7 +547,7 @@ async fn test_bug_scenario_file_count_verification() {
|
||||
|
||||
// But with the fix, a full scan would discover them all
|
||||
let discovered_files: Vec<_> = files.iter()
|
||||
.filter(|f| f.path.starts_with(parent_path))
|
||||
.filter(|f| f.relative_path.starts_with(parent_path))
|
||||
.collect();
|
||||
|
||||
// Should include both directories and files
|
||||
|
||||
@@ -398,6 +398,9 @@ async fn test_connection_get_url_for_path_normalization() {
|
||||
for (input_path, expected_url) in test_cases {
|
||||
let result_url = connection.get_url_for_path(input_path);
|
||||
|
||||
// Verify the URL matches expected
|
||||
assert_eq!(result_url, expected_url, "URL construction failed for path: {}", input_path);
|
||||
|
||||
// Ensure no double slashes in the final URL (except after protocol)
|
||||
let url_without_protocol = result_url.replace("https://", "");
|
||||
assert!(!url_without_protocol.contains("//"), "URL should not contain double slashes: {}", result_url);
|
||||
|
||||
186
src/services/webdav/url_management.rs
Normal file
186
src/services/webdav/url_management.rs
Normal file
@@ -0,0 +1,186 @@
|
||||
use anyhow::Result;
|
||||
use crate::models::FileIngestionInfo;
|
||||
use super::config::WebDAVConfig;
|
||||
|
||||
/// Centralized URL and path management for WebDAV operations
|
||||
///
|
||||
/// This module handles all the messy WebDAV URL construction, path normalization,
|
||||
/// and conversion between full WebDAV paths and relative paths. It's designed to
|
||||
/// prevent the URL doubling issues that plague WebDAV integrations.
|
||||
pub struct WebDAVUrlManager {
|
||||
config: WebDAVConfig,
|
||||
}
|
||||
|
||||
impl WebDAVUrlManager {
|
||||
pub fn new(config: WebDAVConfig) -> Self {
|
||||
Self { config }
|
||||
}
|
||||
|
||||
/// Get the base WebDAV URL for the configured server
|
||||
/// Returns something like: "https://nas.example.com/remote.php/dav/files/username"
|
||||
pub fn base_url(&self) -> String {
|
||||
self.config.webdav_url()
|
||||
}
|
||||
|
||||
/// Convert full WebDAV href (from XML response) to relative path
|
||||
///
|
||||
/// Input: "/remote.php/dav/files/username/Photos/image.jpg"
|
||||
/// Output: "/Photos/image.jpg"
|
||||
pub fn href_to_relative_path(&self, href: &str) -> String {
|
||||
match self.config.server_type.as_deref() {
|
||||
Some("nextcloud") => {
|
||||
let prefix = format!("/remote.php/dav/files/{}", self.config.username);
|
||||
if href.starts_with(&prefix) {
|
||||
let relative = &href[prefix.len()..];
|
||||
if relative.is_empty() { "/" } else { relative }.to_string()
|
||||
} else {
|
||||
href.to_string()
|
||||
}
|
||||
}
|
||||
Some("owncloud") => {
|
||||
if href.starts_with("/remote.php/webdav") {
|
||||
let relative = &href[18..]; // Remove "/remote.php/webdav"
|
||||
if relative.is_empty() { "/" } else { relative }.to_string()
|
||||
} else {
|
||||
href.to_string()
|
||||
}
|
||||
}
|
||||
Some("generic") => {
|
||||
if href.starts_with("/webdav") {
|
||||
let relative = &href[7..]; // Remove "/webdav"
|
||||
if relative.is_empty() { "/" } else { relative }.to_string()
|
||||
} else {
|
||||
href.to_string()
|
||||
}
|
||||
}
|
||||
_ => href.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert relative path to full URL for WebDAV requests
|
||||
///
|
||||
/// Input: "/Photos/image.jpg"
|
||||
/// Output: "https://nas.example.com/remote.php/dav/files/username/Photos/image.jpg"
|
||||
pub fn relative_path_to_url(&self, relative_path: &str) -> String {
|
||||
let base_url = self.base_url();
|
||||
let clean_path = relative_path.trim_start_matches('/');
|
||||
|
||||
if clean_path.is_empty() {
|
||||
base_url
|
||||
} else {
|
||||
let normalized_base = base_url.trim_end_matches('/');
|
||||
format!("{}/{}", normalized_base, clean_path)
|
||||
}
|
||||
}
|
||||
|
||||
/// Process FileIngestionInfo from XML parser to set correct paths
|
||||
///
|
||||
/// This takes the raw XML parser output and fixes the path fields:
|
||||
/// - Sets relative_path from href conversion
|
||||
/// - Keeps full_path as the original href
|
||||
/// - Sets legacy path field for backward compatibility
|
||||
pub fn process_file_info(&self, mut file_info: FileIngestionInfo) -> FileIngestionInfo {
|
||||
// The XML parser puts the href in full_path (which is correct)
|
||||
let href = &file_info.full_path;
|
||||
|
||||
// Convert to relative path
|
||||
file_info.relative_path = self.href_to_relative_path(href);
|
||||
|
||||
// Legacy path field should be relative for backward compatibility
|
||||
#[allow(deprecated)]
|
||||
{
|
||||
file_info.path = file_info.relative_path.clone();
|
||||
}
|
||||
|
||||
file_info
|
||||
}
|
||||
|
||||
/// Process a collection of FileIngestionInfo items
|
||||
pub fn process_file_infos(&self, file_infos: Vec<FileIngestionInfo>) -> Vec<FileIngestionInfo> {
|
||||
file_infos.into_iter()
|
||||
.map(|file_info| self.process_file_info(file_info))
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn create_nextcloud_config() -> WebDAVConfig {
|
||||
WebDAVConfig {
|
||||
server_url: "https://nas.example.com".to_string(),
|
||||
username: "testuser".to_string(),
|
||||
password: "password".to_string(),
|
||||
watch_folders: vec!["/Photos".to_string()],
|
||||
file_extensions: vec!["jpg".to_string(), "pdf".to_string()],
|
||||
timeout_seconds: 30,
|
||||
server_type: Some("nextcloud".to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_nextcloud_href_to_relative_path() {
|
||||
let manager = WebDAVUrlManager::new(create_nextcloud_config());
|
||||
|
||||
// Test file path conversion
|
||||
let href = "/remote.php/dav/files/testuser/Photos/image.jpg";
|
||||
let relative = manager.href_to_relative_path(href);
|
||||
assert_eq!(relative, "/Photos/image.jpg");
|
||||
|
||||
// Test directory path conversion
|
||||
let href = "/remote.php/dav/files/testuser/Photos/";
|
||||
let relative = manager.href_to_relative_path(href);
|
||||
assert_eq!(relative, "/Photos/");
|
||||
|
||||
// Test root path
|
||||
let href = "/remote.php/dav/files/testuser";
|
||||
let relative = manager.href_to_relative_path(href);
|
||||
assert_eq!(relative, "/");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_relative_path_to_url() {
|
||||
let manager = WebDAVUrlManager::new(create_nextcloud_config());
|
||||
|
||||
// Test file URL construction
|
||||
let relative = "/Photos/image.jpg";
|
||||
let url = manager.relative_path_to_url(relative);
|
||||
assert_eq!(url, "https://nas.example.com/remote.php/dav/files/testuser/Photos/image.jpg");
|
||||
|
||||
// Test root URL
|
||||
let relative = "/";
|
||||
let url = manager.relative_path_to_url(relative);
|
||||
assert_eq!(url, "https://nas.example.com/remote.php/dav/files/testuser");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_process_file_info() {
|
||||
let manager = WebDAVUrlManager::new(create_nextcloud_config());
|
||||
|
||||
let file_info = FileIngestionInfo {
|
||||
relative_path: "TEMP".to_string(), // Will be overwritten
|
||||
full_path: "/remote.php/dav/files/testuser/Photos/image.jpg".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "OLD".to_string(), // Will be overwritten
|
||||
name: "image.jpg".to_string(),
|
||||
size: 1024,
|
||||
mime_type: "image/jpeg".to_string(),
|
||||
last_modified: None,
|
||||
etag: "abc123".to_string(),
|
||||
is_directory: false,
|
||||
created_at: None,
|
||||
permissions: None,
|
||||
owner: None,
|
||||
group: None,
|
||||
metadata: None,
|
||||
};
|
||||
|
||||
let processed = manager.process_file_info(file_info);
|
||||
|
||||
assert_eq!(processed.relative_path, "/Photos/image.jpg");
|
||||
assert_eq!(processed.full_path, "/remote.php/dav/files/testuser/Photos/image.jpg");
|
||||
#[allow(deprecated)]
|
||||
assert_eq!(processed.path, "/Photos/image.jpg");
|
||||
}
|
||||
}
|
||||
@@ -201,7 +201,10 @@ pub fn parse_propfind_response(xml_text: &str) -> Result<Vec<FileIngestionInfo>>
|
||||
let metadata = resp.metadata;
|
||||
|
||||
let file_info = FileIngestionInfo {
|
||||
path: resp.href.clone(),
|
||||
relative_path: "TEMP".to_string(), // Will be set by discovery layer
|
||||
full_path: resp.href.clone(),
|
||||
#[allow(deprecated)]
|
||||
path: resp.href.clone(), // Legacy field - keep for compatibility
|
||||
name,
|
||||
size: resp.content_length.unwrap_or(0),
|
||||
mime_type: resp.content_type.unwrap_or_else(|| "application/octet-stream".to_string()),
|
||||
@@ -416,7 +419,10 @@ pub fn parse_propfind_response_with_directories(xml_text: &str) -> Result<Vec<Fi
|
||||
});
|
||||
|
||||
let file_info = FileIngestionInfo {
|
||||
path: resp.href.clone(),
|
||||
relative_path: "TEMP".to_string(), // Will be set by discovery layer
|
||||
full_path: resp.href.clone(),
|
||||
#[allow(deprecated)]
|
||||
path: resp.href.clone(), // Legacy field - keep for compatibility
|
||||
name,
|
||||
size: resp.content_length.unwrap_or(0),
|
||||
mime_type: if resp.is_collection {
|
||||
@@ -801,7 +807,7 @@ mod tests {
|
||||
|
||||
let file = &files[0];
|
||||
assert_eq!(file.name, "report.pdf");
|
||||
assert_eq!(file.path, "/remote.php/dav/files/admin/Documents/report.pdf");
|
||||
assert_eq!(file.full_path, "/remote.php/dav/files/admin/Documents/report.pdf");
|
||||
assert_eq!(file.size, 2048000);
|
||||
assert_eq!(file.etag, "pdf123"); // ETag should be normalized (quotes removed)
|
||||
assert!(file.last_modified.is_some());
|
||||
|
||||
2
tests/fixtures/webdav/nextcloud_photos_propfind_response.xml
vendored
Normal file
2
tests/fixtures/webdav/nextcloud_photos_propfind_response.xml
vendored
Normal file
File diff suppressed because one or more lines are too long
@@ -23,6 +23,9 @@ fn calculate_file_hash(data: &[u8]) -> String {
|
||||
fn create_test_file_info(name: &str, path: &str, content: &[u8]) -> FileIngestionInfo {
|
||||
FileIngestionInfo {
|
||||
name: name.to_string(),
|
||||
relative_path: path.to_string(),
|
||||
full_path: path.to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: path.to_string(),
|
||||
size: content.len() as i64,
|
||||
last_modified: Some(Utc::now()),
|
||||
|
||||
@@ -26,6 +26,9 @@ fn mock_realistic_directory_structure() -> Vec<FileIngestionInfo> {
|
||||
vec![
|
||||
// Parent root directory
|
||||
FileIngestionInfo {
|
||||
relative_path: "/FullerDocuments".to_string(),
|
||||
full_path: "/FullerDocuments".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/FullerDocuments".to_string(),
|
||||
name: "FullerDocuments".to_string(),
|
||||
size: 0,
|
||||
@@ -41,6 +44,9 @@ fn mock_realistic_directory_structure() -> Vec<FileIngestionInfo> {
|
||||
},
|
||||
// Root directory
|
||||
FileIngestionInfo {
|
||||
relative_path: "/FullerDocuments/JonDocuments".to_string(),
|
||||
full_path: "/FullerDocuments/JonDocuments".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/FullerDocuments/JonDocuments".to_string(),
|
||||
name: "JonDocuments".to_string(),
|
||||
size: 0,
|
||||
@@ -56,6 +62,9 @@ fn mock_realistic_directory_structure() -> Vec<FileIngestionInfo> {
|
||||
},
|
||||
// Subdirectory level 1
|
||||
FileIngestionInfo {
|
||||
relative_path: "/FullerDocuments/JonDocuments/Projects".to_string(),
|
||||
full_path: "/FullerDocuments/JonDocuments/Projects".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/FullerDocuments/JonDocuments/Projects".to_string(),
|
||||
name: "Projects".to_string(),
|
||||
size: 0,
|
||||
@@ -70,6 +79,9 @@ fn mock_realistic_directory_structure() -> Vec<FileIngestionInfo> {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/FullerDocuments/JonDocuments/Archive".to_string(),
|
||||
full_path: "/FullerDocuments/JonDocuments/Archive".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/FullerDocuments/JonDocuments/Archive".to_string(),
|
||||
name: "Archive".to_string(),
|
||||
size: 0,
|
||||
@@ -85,6 +97,9 @@ fn mock_realistic_directory_structure() -> Vec<FileIngestionInfo> {
|
||||
},
|
||||
// Subdirectory level 2
|
||||
FileIngestionInfo {
|
||||
relative_path: "/FullerDocuments/JonDocuments/Projects/WebDev".to_string(),
|
||||
full_path: "/FullerDocuments/JonDocuments/Projects/WebDev".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/FullerDocuments/JonDocuments/Projects/WebDev".to_string(),
|
||||
name: "WebDev".to_string(),
|
||||
size: 0,
|
||||
@@ -99,6 +114,9 @@ fn mock_realistic_directory_structure() -> Vec<FileIngestionInfo> {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/FullerDocuments/JonDocuments/Projects/Mobile".to_string(),
|
||||
full_path: "/FullerDocuments/JonDocuments/Projects/Mobile".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/FullerDocuments/JonDocuments/Projects/Mobile".to_string(),
|
||||
name: "Mobile".to_string(),
|
||||
size: 0,
|
||||
@@ -114,6 +132,9 @@ fn mock_realistic_directory_structure() -> Vec<FileIngestionInfo> {
|
||||
},
|
||||
// Files in various directories
|
||||
FileIngestionInfo {
|
||||
relative_path: "/FullerDocuments/JonDocuments/readme.txt".to_string(),
|
||||
full_path: "/FullerDocuments/JonDocuments/readme.txt".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/FullerDocuments/JonDocuments/readme.txt".to_string(),
|
||||
name: "readme.txt".to_string(),
|
||||
size: 1024,
|
||||
@@ -128,6 +149,9 @@ fn mock_realistic_directory_structure() -> Vec<FileIngestionInfo> {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/FullerDocuments/JonDocuments/Projects/project-overview.pdf".to_string(),
|
||||
full_path: "/FullerDocuments/JonDocuments/Projects/project-overview.pdf".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/FullerDocuments/JonDocuments/Projects/project-overview.pdf".to_string(),
|
||||
name: "project-overview.pdf".to_string(),
|
||||
size: 2048000,
|
||||
@@ -142,6 +166,9 @@ fn mock_realistic_directory_structure() -> Vec<FileIngestionInfo> {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/FullerDocuments/JonDocuments/Projects/WebDev/website-specs.docx".to_string(),
|
||||
full_path: "/FullerDocuments/JonDocuments/Projects/WebDev/website-specs.docx".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/FullerDocuments/JonDocuments/Projects/WebDev/website-specs.docx".to_string(),
|
||||
name: "website-specs.docx".to_string(),
|
||||
size: 512000,
|
||||
@@ -156,6 +183,9 @@ fn mock_realistic_directory_structure() -> Vec<FileIngestionInfo> {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/FullerDocuments/JonDocuments/Projects/Mobile/app-design.pdf".to_string(),
|
||||
full_path: "/FullerDocuments/JonDocuments/Projects/Mobile/app-design.pdf".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/FullerDocuments/JonDocuments/Projects/Mobile/app-design.pdf".to_string(),
|
||||
name: "app-design.pdf".to_string(),
|
||||
size: 1536000,
|
||||
@@ -170,6 +200,9 @@ fn mock_realistic_directory_structure() -> Vec<FileIngestionInfo> {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/FullerDocuments/JonDocuments/Archive/old-notes.txt".to_string(),
|
||||
full_path: "/FullerDocuments/JonDocuments/Archive/old-notes.txt".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/FullerDocuments/JonDocuments/Archive/old-notes.txt".to_string(),
|
||||
name: "old-notes.txt".to_string(),
|
||||
size: 256,
|
||||
|
||||
@@ -23,6 +23,9 @@ fn calculate_file_hash(data: &[u8]) -> String {
|
||||
fn create_test_file_info(name: &str, path: &str, size: i64) -> FileIngestionInfo {
|
||||
FileIngestionInfo {
|
||||
name: name.to_string(),
|
||||
relative_path: path.to_string(),
|
||||
full_path: path.to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: path.to_string(),
|
||||
size,
|
||||
last_modified: Some(Utc::now()),
|
||||
@@ -290,6 +293,9 @@ async fn test_webdav_sync_etag_change_detection() -> Result<()> {
|
||||
// Simulate file with new ETag (indicating change)
|
||||
let file_info = FileIngestionInfo {
|
||||
name: "updated.pdf".to_string(),
|
||||
relative_path: webdav_path.to_string(),
|
||||
full_path: webdav_path.to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: webdav_path.to_string(),
|
||||
size: 1024,
|
||||
last_modified: Some(Utc::now()),
|
||||
|
||||
@@ -30,6 +30,9 @@ async fn create_test_state() -> (TestContext, Arc<AppState>, Uuid) {
|
||||
/// Helper function to create directory info for testing
|
||||
fn create_directory_info(path: &str, etag: &str) -> FileIngestionInfo {
|
||||
FileIngestionInfo {
|
||||
relative_path: path.to_string(),
|
||||
full_path: path.to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: path.to_string(),
|
||||
name: path.split('/').last().unwrap_or("").to_string(),
|
||||
size: 0,
|
||||
|
||||
@@ -42,6 +42,9 @@ fn mock_nested_directory_files() -> Vec<FileIngestionInfo> {
|
||||
vec![
|
||||
// Root directory
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents".to_string(),
|
||||
full_path: "/Documents".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents".to_string(),
|
||||
name: "Documents".to_string(),
|
||||
size: 0,
|
||||
@@ -57,6 +60,9 @@ fn mock_nested_directory_files() -> Vec<FileIngestionInfo> {
|
||||
},
|
||||
// Level 1 directories
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents/2024".to_string(),
|
||||
full_path: "/Documents/2024".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/2024".to_string(),
|
||||
name: "2024".to_string(),
|
||||
size: 0,
|
||||
@@ -71,6 +77,9 @@ fn mock_nested_directory_files() -> Vec<FileIngestionInfo> {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents/Archive".to_string(),
|
||||
full_path: "/Documents/Archive".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/Archive".to_string(),
|
||||
name: "Archive".to_string(),
|
||||
size: 0,
|
||||
@@ -86,6 +95,9 @@ fn mock_nested_directory_files() -> Vec<FileIngestionInfo> {
|
||||
},
|
||||
// Level 2 directories
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents/2024/Q1".to_string(),
|
||||
full_path: "/Documents/2024/Q1".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/2024/Q1".to_string(),
|
||||
name: "Q1".to_string(),
|
||||
size: 0,
|
||||
@@ -100,6 +112,9 @@ fn mock_nested_directory_files() -> Vec<FileIngestionInfo> {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents/2024/Q2".to_string(),
|
||||
full_path: "/Documents/2024/Q2".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/2024/Q2".to_string(),
|
||||
name: "Q2".to_string(),
|
||||
size: 0,
|
||||
@@ -115,6 +130,9 @@ fn mock_nested_directory_files() -> Vec<FileIngestionInfo> {
|
||||
},
|
||||
// Level 3 directory
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents/2024/Q1/Reports".to_string(),
|
||||
full_path: "/Documents/2024/Q1/Reports".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/2024/Q1/Reports".to_string(),
|
||||
name: "Reports".to_string(),
|
||||
size: 0,
|
||||
@@ -130,6 +148,9 @@ fn mock_nested_directory_files() -> Vec<FileIngestionInfo> {
|
||||
},
|
||||
// Files at various levels
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents/root-file.pdf".to_string(),
|
||||
full_path: "/Documents/root-file.pdf".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/root-file.pdf".to_string(),
|
||||
name: "root-file.pdf".to_string(),
|
||||
size: 1024000,
|
||||
@@ -144,6 +165,9 @@ fn mock_nested_directory_files() -> Vec<FileIngestionInfo> {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents/2024/annual-report.pdf".to_string(),
|
||||
full_path: "/Documents/2024/annual-report.pdf".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/2024/annual-report.pdf".to_string(),
|
||||
name: "annual-report.pdf".to_string(),
|
||||
size: 2048000,
|
||||
@@ -158,6 +182,9 @@ fn mock_nested_directory_files() -> Vec<FileIngestionInfo> {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents/2024/Q1/q1-summary.pdf".to_string(),
|
||||
full_path: "/Documents/2024/Q1/q1-summary.pdf".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/2024/Q1/q1-summary.pdf".to_string(),
|
||||
name: "q1-summary.pdf".to_string(),
|
||||
size: 512000,
|
||||
@@ -172,6 +199,9 @@ fn mock_nested_directory_files() -> Vec<FileIngestionInfo> {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents/2024/Q1/Reports/detailed-report.pdf".to_string(),
|
||||
full_path: "/Documents/2024/Q1/Reports/detailed-report.pdf".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/2024/Q1/Reports/detailed-report.pdf".to_string(),
|
||||
name: "detailed-report.pdf".to_string(),
|
||||
size: 4096000,
|
||||
@@ -186,6 +216,9 @@ fn mock_nested_directory_files() -> Vec<FileIngestionInfo> {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents/Archive/old-document.pdf".to_string(),
|
||||
full_path: "/Documents/Archive/old-document.pdf".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/Archive/old-document.pdf".to_string(),
|
||||
name: "old-document.pdf".to_string(),
|
||||
size: 256000,
|
||||
|
||||
@@ -58,6 +58,9 @@ async fn test_directory_only_structure() {
|
||||
// Test structure with only directories, no files
|
||||
let directory_only_files = vec![
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents".to_string(),
|
||||
full_path: "/Documents".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents".to_string(),
|
||||
name: "Documents".to_string(),
|
||||
size: 0,
|
||||
@@ -72,6 +75,9 @@ async fn test_directory_only_structure() {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents/Empty1".to_string(),
|
||||
full_path: "/Documents/Empty1".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/Empty1".to_string(),
|
||||
name: "Empty1".to_string(),
|
||||
size: 0,
|
||||
@@ -86,6 +92,9 @@ async fn test_directory_only_structure() {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents/Empty2".to_string(),
|
||||
full_path: "/Documents/Empty2".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/Empty2".to_string(),
|
||||
name: "Empty2".to_string(),
|
||||
size: 0,
|
||||
@@ -137,6 +146,9 @@ async fn test_very_deep_nesting() {
|
||||
let deep_files = vec![
|
||||
// All directories in the path
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents".to_string(),
|
||||
full_path: "/Documents".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents".to_string(),
|
||||
name: "Documents".to_string(),
|
||||
size: 0,
|
||||
@@ -152,6 +164,9 @@ async fn test_very_deep_nesting() {
|
||||
},
|
||||
// All intermediate directories from L1 to L10
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents/L1".to_string(),
|
||||
full_path: "/Documents/L1".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/L1".to_string(),
|
||||
name: "L1".to_string(),
|
||||
size: 0,
|
||||
@@ -166,6 +181,9 @@ async fn test_very_deep_nesting() {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents/L1/L2".to_string(),
|
||||
full_path: "/Documents/L1/L2".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/L1/L2".to_string(),
|
||||
name: "L2".to_string(),
|
||||
size: 0,
|
||||
@@ -180,6 +198,9 @@ async fn test_very_deep_nesting() {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents/L1/L2/L3".to_string(),
|
||||
full_path: "/Documents/L1/L2/L3".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/L1/L2/L3".to_string(),
|
||||
name: "L3".to_string(),
|
||||
size: 0,
|
||||
@@ -194,6 +215,9 @@ async fn test_very_deep_nesting() {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: deep_path.to_string(),
|
||||
full_path: deep_path.to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: deep_path.to_string(),
|
||||
name: "L10".to_string(),
|
||||
size: 0,
|
||||
@@ -209,6 +233,9 @@ async fn test_very_deep_nesting() {
|
||||
},
|
||||
// File at the deepest level
|
||||
FileIngestionInfo {
|
||||
relative_path: file_path.clone(),
|
||||
full_path: file_path.clone(),
|
||||
#[allow(deprecated)]
|
||||
path: file_path.clone(),
|
||||
name: "deep-file.pdf".to_string(),
|
||||
size: 1024000,
|
||||
@@ -267,6 +294,9 @@ async fn test_special_characters_in_paths() {
|
||||
// Test paths with special characters, spaces, unicode
|
||||
let special_files = vec![
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents/Folder with spaces".to_string(),
|
||||
full_path: "/Documents/Folder with spaces".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/Folder with spaces".to_string(),
|
||||
name: "Folder with spaces".to_string(),
|
||||
size: 0,
|
||||
@@ -281,6 +311,9 @@ async fn test_special_characters_in_paths() {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents/Folder-with-dashes".to_string(),
|
||||
full_path: "/Documents/Folder-with-dashes".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/Folder-with-dashes".to_string(),
|
||||
name: "Folder-with-dashes".to_string(),
|
||||
size: 0,
|
||||
@@ -295,7 +328,10 @@ async fn test_special_characters_in_paths() {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
path: "/Documents/Документы".to_string(), // Cyrillic
|
||||
relative_path: "/Documents/Документы".to_string(), // Cyrillic
|
||||
full_path: "/Documents/Документы".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/Документы".to_string(),
|
||||
name: "Документы".to_string(),
|
||||
size: 0,
|
||||
mime_type: "".to_string(),
|
||||
@@ -309,6 +345,9 @@ async fn test_special_characters_in_paths() {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents/Folder with spaces/file with spaces.pdf".to_string(),
|
||||
full_path: "/Documents/Folder with spaces/file with spaces.pdf".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/Folder with spaces/file with spaces.pdf".to_string(),
|
||||
name: "file with spaces.pdf".to_string(),
|
||||
size: 1024000,
|
||||
@@ -687,6 +726,9 @@ async fn test_large_directory_structures() {
|
||||
|
||||
// Add root directory
|
||||
large_files.push(FileIngestionInfo {
|
||||
relative_path: "/Documents".to_string(),
|
||||
full_path: "/Documents".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents".to_string(),
|
||||
name: "Documents".to_string(),
|
||||
size: 0,
|
||||
@@ -707,6 +749,9 @@ async fn test_large_directory_structures() {
|
||||
|
||||
// Add level-1 directory
|
||||
large_files.push(FileIngestionInfo {
|
||||
relative_path: level1_path.clone(),
|
||||
full_path: level1_path.clone(),
|
||||
#[allow(deprecated)]
|
||||
path: level1_path.clone(),
|
||||
name: format!("Dir{:03}", i),
|
||||
size: 0,
|
||||
@@ -725,6 +770,9 @@ async fn test_large_directory_structures() {
|
||||
for j in 0..10 {
|
||||
let level2_path = format!("{}/SubDir{:02}", level1_path, j);
|
||||
large_files.push(FileIngestionInfo {
|
||||
relative_path: level2_path.clone(),
|
||||
full_path: level2_path.clone(),
|
||||
#[allow(deprecated)]
|
||||
path: level2_path.clone(),
|
||||
name: format!("SubDir{:02}", j),
|
||||
size: 0,
|
||||
@@ -741,8 +789,12 @@ async fn test_large_directory_structures() {
|
||||
|
||||
// Add 5 files in each subdirectory
|
||||
for k in 0..5 {
|
||||
let file_path = format!("{}/file{:02}.pdf", level2_path, k);
|
||||
large_files.push(FileIngestionInfo {
|
||||
path: format!("{}/file{:02}.pdf", level2_path, k),
|
||||
relative_path: file_path.clone(),
|
||||
full_path: file_path.clone(),
|
||||
#[allow(deprecated)]
|
||||
path: file_path,
|
||||
name: format!("file{:02}.pdf", k),
|
||||
size: 1024 * (k + 1) as i64,
|
||||
mime_type: "application/pdf".to_string(),
|
||||
|
||||
@@ -608,6 +608,9 @@ fn test_special_characters_in_paths() {
|
||||
|
||||
for path in test_paths {
|
||||
let file_info = FileIngestionInfo {
|
||||
relative_path: path.to_string(),
|
||||
full_path: path.to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: path.to_string(),
|
||||
name: std::path::Path::new(path)
|
||||
.file_name()
|
||||
|
||||
@@ -39,6 +39,9 @@ fn create_mock_directory_structure() -> Vec<FileIngestionInfo> {
|
||||
vec![
|
||||
// Root directory
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents".to_string(),
|
||||
full_path: "/Documents".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents".to_string(),
|
||||
name: "Documents".to_string(),
|
||||
size: 0,
|
||||
@@ -54,6 +57,9 @@ fn create_mock_directory_structure() -> Vec<FileIngestionInfo> {
|
||||
},
|
||||
// Subdirectory 1 - Changed
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents/Projects".to_string(),
|
||||
full_path: "/Documents/Projects".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/Projects".to_string(),
|
||||
name: "Projects".to_string(),
|
||||
size: 0,
|
||||
@@ -69,6 +75,9 @@ fn create_mock_directory_structure() -> Vec<FileIngestionInfo> {
|
||||
},
|
||||
// File in changed subdirectory
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents/Projects/report.pdf".to_string(),
|
||||
full_path: "/Documents/Projects/report.pdf".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/Projects/report.pdf".to_string(),
|
||||
name: "report.pdf".to_string(),
|
||||
size: 1024000,
|
||||
@@ -84,6 +93,9 @@ fn create_mock_directory_structure() -> Vec<FileIngestionInfo> {
|
||||
},
|
||||
// Subdirectory 2 - Unchanged
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents/Archive".to_string(),
|
||||
full_path: "/Documents/Archive".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/Archive".to_string(),
|
||||
name: "Archive".to_string(),
|
||||
size: 0,
|
||||
|
||||
@@ -99,6 +99,9 @@ async fn test_update_single_directory_tracking() {
|
||||
// Create mock files representing a shallow directory scan
|
||||
let files = vec![
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents".to_string(),
|
||||
full_path: "/Documents".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents".to_string(),
|
||||
name: "Documents".to_string(),
|
||||
size: 0,
|
||||
@@ -113,6 +116,9 @@ async fn test_update_single_directory_tracking() {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents/file1.pdf".to_string(),
|
||||
full_path: "/Documents/file1.pdf".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/file1.pdf".to_string(),
|
||||
name: "file1.pdf".to_string(),
|
||||
size: 1024000,
|
||||
@@ -127,6 +133,9 @@ async fn test_update_single_directory_tracking() {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents/file2.pdf".to_string(),
|
||||
full_path: "/Documents/file2.pdf".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/file2.pdf".to_string(),
|
||||
name: "file2.pdf".to_string(),
|
||||
size: 2048000,
|
||||
@@ -141,6 +150,9 @@ async fn test_update_single_directory_tracking() {
|
||||
metadata: None,
|
||||
},
|
||||
FileIngestionInfo {
|
||||
relative_path: "/Documents/SubFolder".to_string(),
|
||||
full_path: "/Documents/SubFolder".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/Documents/SubFolder".to_string(),
|
||||
name: "SubFolder".to_string(),
|
||||
size: 0,
|
||||
|
||||
340
tests/unit_webdav_url_management_tests.rs
Normal file
340
tests/unit_webdav_url_management_tests.rs
Normal file
@@ -0,0 +1,340 @@
|
||||
use readur::models::FileIngestionInfo;
|
||||
use readur::services::webdav::{WebDAVConfig, WebDAVUrlManager};
|
||||
|
||||
#[test]
|
||||
fn test_nextcloud_directory_path_handling() {
|
||||
let config = WebDAVConfig {
|
||||
server_url: "https://nas.example.com".to_string(),
|
||||
username: "testuser".to_string(),
|
||||
password: "password".to_string(),
|
||||
watch_folders: vec!["/Photos".to_string()],
|
||||
file_extensions: vec!["jpg".to_string()],
|
||||
timeout_seconds: 30,
|
||||
server_type: Some("nextcloud".to_string()),
|
||||
};
|
||||
|
||||
let manager = WebDAVUrlManager::new(config);
|
||||
|
||||
// Test a directory from Nextcloud WebDAV response
|
||||
let directory_info = FileIngestionInfo {
|
||||
relative_path: "TEMP".to_string(),
|
||||
full_path: "/remote.php/dav/files/testuser/Photos/Subfolder/".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/remote.php/dav/files/testuser/Photos/Subfolder/".to_string(),
|
||||
name: "Subfolder".to_string(),
|
||||
size: 0,
|
||||
mime_type: "".to_string(),
|
||||
last_modified: None,
|
||||
etag: "dir123".to_string(),
|
||||
is_directory: true,
|
||||
created_at: None,
|
||||
permissions: None,
|
||||
owner: None,
|
||||
group: None,
|
||||
metadata: None,
|
||||
};
|
||||
|
||||
let processed = manager.process_file_info(directory_info);
|
||||
|
||||
// The relative_path should be correct for subdirectory scanning
|
||||
assert_eq!(processed.relative_path, "/Photos/Subfolder/");
|
||||
assert_eq!(processed.full_path, "/remote.php/dav/files/testuser/Photos/Subfolder/");
|
||||
|
||||
// The legacy path field should also be set to relative path for backward compatibility
|
||||
#[allow(deprecated)]
|
||||
assert_eq!(processed.path, "/Photos/Subfolder/");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_nextcloud_file_path_handling() {
|
||||
let config = WebDAVConfig {
|
||||
server_url: "https://nas.example.com".to_string(),
|
||||
username: "testuser".to_string(),
|
||||
password: "password".to_string(),
|
||||
watch_folders: vec!["/Photos".to_string()],
|
||||
file_extensions: vec!["jpg".to_string()],
|
||||
timeout_seconds: 30,
|
||||
server_type: Some("nextcloud".to_string()),
|
||||
};
|
||||
|
||||
let manager = WebDAVUrlManager::new(config);
|
||||
|
||||
// Test a file from Nextcloud WebDAV response
|
||||
let file_info = FileIngestionInfo {
|
||||
relative_path: "TEMP".to_string(),
|
||||
full_path: "/remote.php/dav/files/testuser/Photos/image.jpg".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/remote.php/dav/files/testuser/Photos/image.jpg".to_string(),
|
||||
name: "image.jpg".to_string(),
|
||||
size: 1024,
|
||||
mime_type: "image/jpeg".to_string(),
|
||||
last_modified: None,
|
||||
etag: "file123".to_string(),
|
||||
is_directory: false,
|
||||
created_at: None,
|
||||
permissions: None,
|
||||
owner: None,
|
||||
group: None,
|
||||
metadata: None,
|
||||
};
|
||||
|
||||
let processed = manager.process_file_info(file_info);
|
||||
|
||||
// The relative_path should be correct for file processing
|
||||
assert_eq!(processed.relative_path, "/Photos/image.jpg");
|
||||
assert_eq!(processed.full_path, "/remote.php/dav/files/testuser/Photos/image.jpg");
|
||||
|
||||
// The legacy path field should also be set to relative path for backward compatibility
|
||||
#[allow(deprecated)]
|
||||
assert_eq!(processed.path, "/Photos/image.jpg");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_webdav_root_path_handling() {
|
||||
let config = WebDAVConfig {
|
||||
server_url: "https://nas.example.com".to_string(),
|
||||
username: "testuser".to_string(),
|
||||
password: "password".to_string(),
|
||||
watch_folders: vec!["/".to_string()],
|
||||
file_extensions: vec!["jpg".to_string()],
|
||||
timeout_seconds: 30,
|
||||
server_type: Some("nextcloud".to_string()),
|
||||
};
|
||||
|
||||
let manager = WebDAVUrlManager::new(config);
|
||||
|
||||
// Test root directory handling
|
||||
let root_info = FileIngestionInfo {
|
||||
relative_path: "TEMP".to_string(),
|
||||
full_path: "/remote.php/dav/files/testuser".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/remote.php/dav/files/testuser".to_string(),
|
||||
name: "testuser".to_string(),
|
||||
size: 0,
|
||||
mime_type: "".to_string(),
|
||||
last_modified: None,
|
||||
etag: "root123".to_string(),
|
||||
is_directory: true,
|
||||
created_at: None,
|
||||
permissions: None,
|
||||
owner: None,
|
||||
group: None,
|
||||
metadata: None,
|
||||
};
|
||||
|
||||
let processed = manager.process_file_info(root_info);
|
||||
|
||||
// Root should map to "/"
|
||||
assert_eq!(processed.relative_path, "/");
|
||||
assert_eq!(processed.full_path, "/remote.php/dav/files/testuser");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_url_construction_from_relative_path() {
|
||||
let config = WebDAVConfig {
|
||||
server_url: "https://nas.example.com".to_string(),
|
||||
username: "testuser".to_string(),
|
||||
password: "password".to_string(),
|
||||
watch_folders: vec!["/Photos".to_string()],
|
||||
file_extensions: vec!["jpg".to_string()],
|
||||
timeout_seconds: 30,
|
||||
server_type: Some("nextcloud".to_string()),
|
||||
};
|
||||
|
||||
let manager = WebDAVUrlManager::new(config);
|
||||
|
||||
// Test URL construction for scanning subdirectories
|
||||
let subfolder_url = manager.relative_path_to_url("/Photos/Subfolder/");
|
||||
assert_eq!(subfolder_url, "https://nas.example.com/remote.php/dav/files/testuser/Photos/Subfolder/");
|
||||
|
||||
let file_url = manager.relative_path_to_url("/Photos/image.jpg");
|
||||
assert_eq!(file_url, "https://nas.example.com/remote.php/dav/files/testuser/Photos/image.jpg");
|
||||
|
||||
let root_url = manager.relative_path_to_url("/");
|
||||
assert_eq!(root_url, "https://nas.example.com/remote.php/dav/files/testuser");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_owncloud_path_handling() {
|
||||
let config = WebDAVConfig {
|
||||
server_url: "https://cloud.example.com".to_string(),
|
||||
username: "user123".to_string(),
|
||||
password: "password".to_string(),
|
||||
watch_folders: vec!["/Documents".to_string()],
|
||||
file_extensions: vec!["pdf".to_string()],
|
||||
timeout_seconds: 30,
|
||||
server_type: Some("owncloud".to_string()),
|
||||
};
|
||||
|
||||
let manager = WebDAVUrlManager::new(config);
|
||||
|
||||
// Test ownCloud path conversion
|
||||
let file_info = FileIngestionInfo {
|
||||
relative_path: "TEMP".to_string(),
|
||||
full_path: "/remote.php/webdav/Documents/report.pdf".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/remote.php/webdav/Documents/report.pdf".to_string(),
|
||||
name: "report.pdf".to_string(),
|
||||
size: 2048,
|
||||
mime_type: "application/pdf".to_string(),
|
||||
last_modified: None,
|
||||
etag: "pdf456".to_string(),
|
||||
is_directory: false,
|
||||
created_at: None,
|
||||
permissions: None,
|
||||
owner: None,
|
||||
group: None,
|
||||
metadata: None,
|
||||
};
|
||||
|
||||
let processed = manager.process_file_info(file_info);
|
||||
assert_eq!(processed.relative_path, "/Documents/report.pdf");
|
||||
assert_eq!(processed.full_path, "/remote.php/webdav/Documents/report.pdf");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_generic_webdav_path_handling() {
|
||||
let config = WebDAVConfig {
|
||||
server_url: "https://webdav.example.com".to_string(),
|
||||
username: "user".to_string(),
|
||||
password: "password".to_string(),
|
||||
watch_folders: vec!["/files".to_string()],
|
||||
file_extensions: vec!["txt".to_string()],
|
||||
timeout_seconds: 30,
|
||||
server_type: Some("generic".to_string()),
|
||||
};
|
||||
|
||||
let manager = WebDAVUrlManager::new(config);
|
||||
|
||||
// Test generic WebDAV path conversion
|
||||
let file_info = FileIngestionInfo {
|
||||
relative_path: "TEMP".to_string(),
|
||||
full_path: "/webdav/files/document.txt".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/webdav/files/document.txt".to_string(),
|
||||
name: "document.txt".to_string(),
|
||||
size: 512,
|
||||
mime_type: "text/plain".to_string(),
|
||||
last_modified: None,
|
||||
etag: "txt789".to_string(),
|
||||
is_directory: false,
|
||||
created_at: None,
|
||||
permissions: None,
|
||||
owner: None,
|
||||
group: None,
|
||||
metadata: None,
|
||||
};
|
||||
|
||||
let processed = manager.process_file_info(file_info);
|
||||
assert_eq!(processed.relative_path, "/files/document.txt");
|
||||
assert_eq!(processed.full_path, "/webdav/files/document.txt");
|
||||
}
|
||||
|
||||
/// Test download path resolution for WebDAV service compatibility
|
||||
#[test]
|
||||
fn test_download_path_resolution() {
|
||||
let config = WebDAVConfig {
|
||||
server_url: "https://nas.example.com".to_string(),
|
||||
username: "testuser".to_string(),
|
||||
password: "password".to_string(),
|
||||
watch_folders: vec!["/Photos".to_string()],
|
||||
file_extensions: vec!["jpg".to_string()],
|
||||
timeout_seconds: 30,
|
||||
server_type: Some("nextcloud".to_string()),
|
||||
};
|
||||
|
||||
let manager = WebDAVUrlManager::new(config);
|
||||
|
||||
// Test that processed file info has correct paths for download operations
|
||||
let file_info = FileIngestionInfo {
|
||||
relative_path: "TEMP".to_string(),
|
||||
full_path: "/remote.php/dav/files/testuser/Photos/image.jpg".to_string(),
|
||||
#[allow(deprecated)]
|
||||
path: "/remote.php/dav/files/testuser/Photos/image.jpg".to_string(),
|
||||
name: "image.jpg".to_string(),
|
||||
size: 1024,
|
||||
mime_type: "image/jpeg".to_string(),
|
||||
last_modified: None,
|
||||
etag: "file123".to_string(),
|
||||
is_directory: false,
|
||||
created_at: None,
|
||||
permissions: None,
|
||||
owner: None,
|
||||
group: None,
|
||||
metadata: None,
|
||||
};
|
||||
|
||||
let processed = manager.process_file_info(file_info);
|
||||
|
||||
// The relative_path should be clean and usable for download operations
|
||||
assert_eq!(processed.relative_path, "/Photos/image.jpg");
|
||||
|
||||
// The download URL should be correctly constructed from relative path
|
||||
let download_url = manager.relative_path_to_url(&processed.relative_path);
|
||||
assert_eq!(download_url, "https://nas.example.com/remote.php/dav/files/testuser/Photos/image.jpg");
|
||||
|
||||
// The full_path should preserve the original server response
|
||||
assert_eq!(processed.full_path, "/remote.php/dav/files/testuser/Photos/image.jpg");
|
||||
}
|
||||
|
||||
/// Test using the actual Nextcloud XML fixture to ensure our path handling works with real data
|
||||
#[test]
|
||||
fn test_with_nextcloud_fixture_data() {
|
||||
use readur::webdav_xml_parser::parse_propfind_response_with_directories;
|
||||
|
||||
let config = WebDAVConfig {
|
||||
server_url: "https://nas.jonathonfuller.com".to_string(),
|
||||
username: "perf3ct".to_string(),
|
||||
password: "password".to_string(),
|
||||
watch_folders: vec!["/Photos".to_string()],
|
||||
file_extensions: vec!["jpg".to_string(), "jpeg".to_string(), "png".to_string()],
|
||||
timeout_seconds: 30,
|
||||
server_type: Some("nextcloud".to_string()),
|
||||
};
|
||||
|
||||
let manager = WebDAVUrlManager::new(config);
|
||||
|
||||
// Load the real Nextcloud XML fixture
|
||||
let fixture_path = "tests/fixtures/webdav/nextcloud_photos_propfind_response.xml";
|
||||
let xml_content = std::fs::read_to_string(fixture_path)
|
||||
.expect("Should be able to read the Nextcloud fixture file");
|
||||
|
||||
// Parse the XML
|
||||
let parsed_items = parse_propfind_response_with_directories(&xml_content)
|
||||
.expect("Should be able to parse the Nextcloud XML");
|
||||
|
||||
// Process the items through url_manager
|
||||
let processed_items = manager.process_file_infos(parsed_items);
|
||||
|
||||
// Verify that we got some items and they're properly processed
|
||||
assert!(!processed_items.is_empty(), "Should have parsed some items from the fixture");
|
||||
|
||||
// Check that all items have proper relative paths (not the temp value)
|
||||
for item in &processed_items {
|
||||
assert_ne!(item.relative_path, "TEMP", "All items should have processed relative_path");
|
||||
assert!(item.relative_path.starts_with("/"), "Relative paths should start with /");
|
||||
|
||||
// Relative paths should not contain the Nextcloud WebDAV prefix
|
||||
assert!(!item.relative_path.contains("/remote.php/dav/files/"),
|
||||
"Relative path should not contain WebDAV prefix: {}", item.relative_path);
|
||||
|
||||
// But full_path should contain the prefix
|
||||
assert!(item.full_path.contains("/remote.php/dav/files/"),
|
||||
"Full path should contain WebDAV prefix: {}", item.full_path);
|
||||
}
|
||||
|
||||
// Check for both files and directories
|
||||
let files: Vec<_> = processed_items.iter().filter(|item| !item.is_directory).collect();
|
||||
let directories: Vec<_> = processed_items.iter().filter(|item| item.is_directory).collect();
|
||||
|
||||
println!("Parsed {} files and {} directories from fixture", files.len(), directories.len());
|
||||
|
||||
// There should be at least some files and directories in the Photos folder
|
||||
assert!(!files.is_empty(), "Should have found some files");
|
||||
|
||||
// Verify file relative paths look correct
|
||||
for file in files {
|
||||
assert!(file.relative_path.starts_with("/Photos/") || file.relative_path == "/Photos",
|
||||
"File relative path should be under Photos: {}", file.relative_path);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user