mirror of
https://github.com/readur/readur.git
synced 2025-12-30 11:00:17 -06:00
feat(docs): update docs for S3 backend implemenation
This commit is contained in:
@@ -15,7 +15,7 @@ A powerful, modern document management system built with Rust and React. Readur
|
||||
| 🔍 **Advanced OCR** | Automatic text extraction using Tesseract for searchable document content | [OCR Optimization](docs/dev/OCR_OPTIMIZATION_GUIDE.md) |
|
||||
| 🌍 **Multi-Language OCR** | Process documents in multiple languages simultaneously with automatic language detection | [Multi-Language OCR Guide](docs/multi-language-ocr-guide.md) |
|
||||
| 🔎 **Powerful Search** | PostgreSQL full-text search with multiple modes (simple, phrase, fuzzy, boolean) | [Advanced Search Guide](docs/advanced-search.md) |
|
||||
| 🔗 **Multi-Source Sync** | WebDAV, Local Folders, and S3-compatible storage integration | [Sources Guide](docs/sources-guide.md) |
|
||||
| 🔗 **Multi-Source Sync** | WebDAV, Local Folders, and S3-compatible storage integration | [Sources Guide](docs/sources-guide.md), [S3 Storage Guide](docs/s3-storage-guide.md) |
|
||||
| 🏷️ **Labels & Organization** | Comprehensive tagging system with color-coding and hierarchical structure | [Labels & Organization](docs/labels-and-organization.md) |
|
||||
| 👁️ **Folder Monitoring** | Non-destructive file watching with intelligent sync scheduling | [Watch Folder Guide](docs/WATCH_FOLDER.md) |
|
||||
| 📊 **Health Monitoring** | Proactive source validation and system health tracking | [Health Monitoring Guide](docs/health-monitoring-guide.md) |
|
||||
@@ -51,10 +51,12 @@ open http://localhost:8000
|
||||
### Getting Started
|
||||
- [📦 Installation Guide](docs/installation.md) - Docker & manual installation instructions
|
||||
- [🔧 Configuration](docs/configuration.md) - Environment variables and settings
|
||||
- [⚙️ Configuration Reference](docs/configuration-reference.md) - Complete configuration options reference
|
||||
- [📖 User Guide](docs/user-guide.md) - How to use Readur effectively
|
||||
|
||||
### Core Features
|
||||
- [🔗 Sources Guide](docs/sources-guide.md) - WebDAV, Local Folders, and S3 integration
|
||||
- [☁️ S3 Storage Guide](docs/s3-storage-guide.md) - Complete S3 and S3-compatible storage setup
|
||||
- [👥 User Management](docs/user-management-guide.md) - Authentication, roles, and administration
|
||||
- [🏷️ Labels & Organization](docs/labels-and-organization.md) - Document tagging and categorization
|
||||
- [🔎 Advanced Search](docs/advanced-search.md) - Search modes, syntax, and optimization
|
||||
@@ -65,6 +67,8 @@ open http://localhost:8000
|
||||
- [🚀 Deployment Guide](docs/deployment.md) - Production deployment, SSL, monitoring
|
||||
- [🔄 Reverse Proxy Setup](docs/REVERSE_PROXY.md) - Nginx, Traefik, and more
|
||||
- [📁 Watch Folder Guide](docs/WATCH_FOLDER.md) - Automatic document ingestion
|
||||
- [🔄 Migration Guide](docs/migration-guide.md) - Migrate from local storage to S3
|
||||
- [🛠️ S3 Troubleshooting](docs/s3-troubleshooting.md) - Debug and resolve S3 storage issues
|
||||
|
||||
### Development
|
||||
- [🏗️ Developer Documentation](docs/dev/) - Architecture, development setup, testing
|
||||
|
||||
383
docs/configuration-reference.md
Normal file
383
docs/configuration-reference.md
Normal file
@@ -0,0 +1,383 @@
|
||||
# Configuration Reference
|
||||
|
||||
## Complete Configuration Options for Readur
|
||||
|
||||
This document provides a comprehensive reference for all configuration options available in Readur, including the new S3 storage backend and per-user watch directories introduced in version 2.5.4.
|
||||
|
||||
## Environment Variables
|
||||
|
||||
### Core Configuration
|
||||
|
||||
| Variable | Type | Default | Description |
|
||||
|----------|------|---------|-------------|
|
||||
| `DATABASE_URL` | String | `postgresql://readur:readur@localhost/readur` | PostgreSQL connection string |
|
||||
| `SERVER_ADDRESS` | String | `0.0.0.0:8000` | Server bind address (host:port) |
|
||||
| `SERVER_HOST` | String | `0.0.0.0` | Server host (used if SERVER_ADDRESS not set) |
|
||||
| `SERVER_PORT` | String | `8000` | Server port (used if SERVER_ADDRESS not set) |
|
||||
| `JWT_SECRET` | String | `your-secret-key` | Secret key for JWT token generation (CHANGE IN PRODUCTION) |
|
||||
| `UPLOAD_PATH` | String | `./uploads` | Local directory for temporary file uploads |
|
||||
| `ALLOWED_FILE_TYPES` | String | `pdf,txt,doc,docx,png,jpg,jpeg` | Comma-separated list of allowed file extensions |
|
||||
|
||||
### S3 Storage Configuration
|
||||
|
||||
| Variable | Type | Default | Description |
|
||||
|----------|------|---------|-------------|
|
||||
| `S3_ENABLED` | Boolean | `false` | Enable S3 storage backend |
|
||||
| `S3_BUCKET_NAME` | String | - | S3 bucket name (required when S3_ENABLED=true) |
|
||||
| `S3_ACCESS_KEY_ID` | String | - | AWS Access Key ID (required when S3_ENABLED=true) |
|
||||
| `S3_SECRET_ACCESS_KEY` | String | - | AWS Secret Access Key (required when S3_ENABLED=true) |
|
||||
| `S3_REGION` | String | `us-east-1` | AWS region for S3 bucket |
|
||||
| `S3_ENDPOINT` | String | - | Custom S3 endpoint URL (for S3-compatible services) |
|
||||
|
||||
### Watch Directory Configuration
|
||||
|
||||
| Variable | Type | Default | Description |
|
||||
|----------|------|---------|-------------|
|
||||
| `WATCH_FOLDER` | String | `./watch` | Global watch directory for file ingestion |
|
||||
| `USER_WATCH_BASE_DIR` | String | `./user_watch` | Base directory for per-user watch folders |
|
||||
| `ENABLE_PER_USER_WATCH` | Boolean | `false` | Enable per-user watch directories feature |
|
||||
| `WATCH_INTERVAL_SECONDS` | Integer | `60` | Interval between watch folder scans |
|
||||
| `FILE_STABILITY_CHECK_MS` | Integer | `2000` | Time to wait for file size stability |
|
||||
| `MAX_FILE_AGE_HOURS` | Integer | `24` | Maximum age of files to process |
|
||||
|
||||
### OCR Configuration
|
||||
|
||||
| Variable | Type | Default | Description |
|
||||
|----------|------|---------|-------------|
|
||||
| `OCR_LANGUAGE` | String | `eng` | Tesseract language code for OCR |
|
||||
| `CONCURRENT_OCR_JOBS` | Integer | `4` | Number of concurrent OCR jobs |
|
||||
| `OCR_TIMEOUT_SECONDS` | Integer | `300` | Timeout for OCR processing per document |
|
||||
| `MAX_FILE_SIZE_MB` | Integer | `50` | Maximum file size for processing |
|
||||
|
||||
### Performance Configuration
|
||||
|
||||
| Variable | Type | Default | Description |
|
||||
|----------|------|---------|-------------|
|
||||
| `MEMORY_LIMIT_MB` | Integer | `512` | Memory limit for processing operations |
|
||||
| `CPU_PRIORITY` | String | `normal` | CPU priority (low, normal, high) |
|
||||
|
||||
### OIDC Authentication Configuration
|
||||
|
||||
| Variable | Type | Default | Description |
|
||||
|----------|------|---------|-------------|
|
||||
| `OIDC_ENABLED` | Boolean | `false` | Enable OpenID Connect authentication |
|
||||
| `OIDC_CLIENT_ID` | String | - | OIDC client ID |
|
||||
| `OIDC_CLIENT_SECRET` | String | - | OIDC client secret |
|
||||
| `OIDC_ISSUER_URL` | String | - | OIDC issuer URL |
|
||||
| `OIDC_REDIRECT_URI` | String | - | OIDC redirect URI |
|
||||
|
||||
## Configuration Examples
|
||||
|
||||
### Basic Local Storage Setup
|
||||
|
||||
```bash
|
||||
# .env file for local storage
|
||||
DATABASE_URL=postgresql://readur:password@localhost/readur
|
||||
SERVER_ADDRESS=0.0.0.0:8000
|
||||
JWT_SECRET=your-secure-secret-key-change-this
|
||||
UPLOAD_PATH=./uploads
|
||||
WATCH_FOLDER=./watch
|
||||
ALLOWED_FILE_TYPES=pdf,txt,doc,docx,png,jpg,jpeg,tiff,bmp
|
||||
OCR_LANGUAGE=eng
|
||||
CONCURRENT_OCR_JOBS=4
|
||||
```
|
||||
|
||||
### S3 Storage with AWS
|
||||
|
||||
```bash
|
||||
# .env file for AWS S3
|
||||
DATABASE_URL=postgresql://readur:password@localhost/readur
|
||||
SERVER_ADDRESS=0.0.0.0:8000
|
||||
JWT_SECRET=your-secure-secret-key-change-this
|
||||
|
||||
# S3 Configuration
|
||||
S3_ENABLED=true
|
||||
S3_BUCKET_NAME=readur-production
|
||||
S3_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE
|
||||
S3_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
|
||||
S3_REGION=us-west-2
|
||||
|
||||
# Still needed for temporary uploads
|
||||
UPLOAD_PATH=./temp_uploads
|
||||
```
|
||||
|
||||
### S3 with MinIO
|
||||
|
||||
```bash
|
||||
# .env file for MinIO
|
||||
DATABASE_URL=postgresql://readur:password@localhost/readur
|
||||
SERVER_ADDRESS=0.0.0.0:8000
|
||||
JWT_SECRET=your-secure-secret-key-change-this
|
||||
|
||||
# MinIO S3 Configuration
|
||||
S3_ENABLED=true
|
||||
S3_BUCKET_NAME=readur-bucket
|
||||
S3_ACCESS_KEY_ID=minioadmin
|
||||
S3_SECRET_ACCESS_KEY=minioadmin
|
||||
S3_REGION=us-east-1
|
||||
S3_ENDPOINT=http://minio:9000
|
||||
|
||||
UPLOAD_PATH=./temp_uploads
|
||||
```
|
||||
|
||||
### Per-User Watch Directories
|
||||
|
||||
```bash
|
||||
# .env file with per-user watch enabled
|
||||
DATABASE_URL=postgresql://readur:password@localhost/readur
|
||||
SERVER_ADDRESS=0.0.0.0:8000
|
||||
JWT_SECRET=your-secure-secret-key-change-this
|
||||
|
||||
# Watch Directory Configuration
|
||||
WATCH_FOLDER=./global_watch
|
||||
USER_WATCH_BASE_DIR=/data/user_watches
|
||||
ENABLE_PER_USER_WATCH=true
|
||||
WATCH_INTERVAL_SECONDS=30
|
||||
FILE_STABILITY_CHECK_MS=3000
|
||||
MAX_FILE_AGE_HOURS=48
|
||||
```
|
||||
|
||||
### High-Performance Configuration
|
||||
|
||||
```bash
|
||||
# .env file for high-performance setup
|
||||
DATABASE_URL=postgresql://readur:password@db-server/readur
|
||||
SERVER_ADDRESS=0.0.0.0:8000
|
||||
JWT_SECRET=your-secure-secret-key-change-this
|
||||
|
||||
# S3 for scalable storage
|
||||
S3_ENABLED=true
|
||||
S3_BUCKET_NAME=readur-highperf
|
||||
S3_ACCESS_KEY_ID=your-key
|
||||
S3_SECRET_ACCESS_KEY=your-secret
|
||||
S3_REGION=us-east-1
|
||||
|
||||
# Performance tuning
|
||||
CONCURRENT_OCR_JOBS=8
|
||||
OCR_TIMEOUT_SECONDS=600
|
||||
MAX_FILE_SIZE_MB=200
|
||||
MEMORY_LIMIT_MB=2048
|
||||
CPU_PRIORITY=high
|
||||
|
||||
# Faster watch scanning
|
||||
WATCH_INTERVAL_SECONDS=10
|
||||
FILE_STABILITY_CHECK_MS=1000
|
||||
```
|
||||
|
||||
### OIDC with S3 Storage
|
||||
|
||||
```bash
|
||||
# .env file for OIDC authentication with S3
|
||||
DATABASE_URL=postgresql://readur:password@localhost/readur
|
||||
SERVER_ADDRESS=0.0.0.0:8000
|
||||
JWT_SECRET=your-secure-secret-key-change-this
|
||||
|
||||
# OIDC Configuration
|
||||
OIDC_ENABLED=true
|
||||
OIDC_CLIENT_ID=readur-client
|
||||
OIDC_CLIENT_SECRET=your-oidc-secret
|
||||
OIDC_ISSUER_URL=https://auth.example.com
|
||||
OIDC_REDIRECT_URI=https://readur.example.com/api/auth/oidc/callback
|
||||
|
||||
# S3 Storage
|
||||
S3_ENABLED=true
|
||||
S3_BUCKET_NAME=readur-oidc
|
||||
S3_ACCESS_KEY_ID=your-key
|
||||
S3_SECRET_ACCESS_KEY=your-secret
|
||||
S3_REGION=eu-west-1
|
||||
```
|
||||
|
||||
## Docker Configuration
|
||||
|
||||
### Docker Compose with Environment File
|
||||
|
||||
```yaml
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
readur:
|
||||
image: readur:latest
|
||||
env_file: .env
|
||||
ports:
|
||||
- "8000:8000"
|
||||
volumes:
|
||||
- ./uploads:/app/uploads
|
||||
- ./watch:/app/watch
|
||||
- ./user_watch:/app/user_watch
|
||||
depends_on:
|
||||
- postgres
|
||||
- minio
|
||||
|
||||
postgres:
|
||||
image: postgres:15
|
||||
environment:
|
||||
POSTGRES_USER: readur
|
||||
POSTGRES_PASSWORD: password
|
||||
POSTGRES_DB: readur
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
|
||||
minio:
|
||||
image: minio/minio:latest
|
||||
command: server /data --console-address ":9001"
|
||||
environment:
|
||||
MINIO_ROOT_USER: minioadmin
|
||||
MINIO_ROOT_PASSWORD: minioadmin
|
||||
ports:
|
||||
- "9000:9000"
|
||||
- "9001:9001"
|
||||
volumes:
|
||||
- minio_data:/data
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
minio_data:
|
||||
```
|
||||
|
||||
### Kubernetes ConfigMap
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: readur-config
|
||||
data:
|
||||
DATABASE_URL: "postgresql://readur:password@postgres-service/readur"
|
||||
SERVER_ADDRESS: "0.0.0.0:8000"
|
||||
S3_ENABLED: "true"
|
||||
S3_BUCKET_NAME: "readur-k8s"
|
||||
S3_REGION: "us-east-1"
|
||||
ENABLE_PER_USER_WATCH: "true"
|
||||
USER_WATCH_BASE_DIR: "/data/user_watches"
|
||||
CONCURRENT_OCR_JOBS: "6"
|
||||
MAX_FILE_SIZE_MB: "100"
|
||||
```
|
||||
|
||||
## Configuration Validation
|
||||
|
||||
### Required Variables
|
||||
|
||||
When S3 is enabled, the following variables are required:
|
||||
- `S3_BUCKET_NAME`
|
||||
- `S3_ACCESS_KEY_ID`
|
||||
- `S3_SECRET_ACCESS_KEY`
|
||||
|
||||
When OIDC is enabled, the following variables are required:
|
||||
- `OIDC_CLIENT_ID`
|
||||
- `OIDC_CLIENT_SECRET`
|
||||
- `OIDC_ISSUER_URL`
|
||||
- `OIDC_REDIRECT_URI`
|
||||
|
||||
### Validation Script
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# validate-config.sh
|
||||
|
||||
# Check required variables
|
||||
check_var() {
|
||||
if [ -z "${!1}" ]; then
|
||||
echo "ERROR: $1 is not set"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Load environment
|
||||
source .env
|
||||
|
||||
# Always required
|
||||
check_var DATABASE_URL
|
||||
check_var JWT_SECRET
|
||||
|
||||
# Check S3 requirements
|
||||
if [ "$S3_ENABLED" = "true" ]; then
|
||||
check_var S3_BUCKET_NAME
|
||||
check_var S3_ACCESS_KEY_ID
|
||||
check_var S3_SECRET_ACCESS_KEY
|
||||
fi
|
||||
|
||||
# Check OIDC requirements
|
||||
if [ "$OIDC_ENABLED" = "true" ]; then
|
||||
check_var OIDC_CLIENT_ID
|
||||
check_var OIDC_CLIENT_SECRET
|
||||
check_var OIDC_ISSUER_URL
|
||||
check_var OIDC_REDIRECT_URI
|
||||
fi
|
||||
|
||||
echo "Configuration valid!"
|
||||
```
|
||||
|
||||
## Migration from Previous Versions
|
||||
|
||||
### From 2.5.3 to 2.5.4
|
||||
|
||||
New configuration options in 2.5.4:
|
||||
|
||||
```bash
|
||||
# New S3 storage options
|
||||
S3_ENABLED=false
|
||||
S3_BUCKET_NAME=
|
||||
S3_ACCESS_KEY_ID=
|
||||
S3_SECRET_ACCESS_KEY=
|
||||
S3_REGION=us-east-1
|
||||
S3_ENDPOINT=
|
||||
|
||||
# New per-user watch directories
|
||||
USER_WATCH_BASE_DIR=./user_watch
|
||||
ENABLE_PER_USER_WATCH=false
|
||||
```
|
||||
|
||||
No changes required for existing installations unless you want to enable new features.
|
||||
|
||||
## Troubleshooting Configuration
|
||||
|
||||
### Common Issues
|
||||
|
||||
1. **S3 Connection Failed**
|
||||
- Verify S3_BUCKET_NAME exists
|
||||
- Check S3_ACCESS_KEY_ID and S3_SECRET_ACCESS_KEY are correct
|
||||
- Ensure S3_REGION matches bucket region
|
||||
- For S3-compatible services, verify S3_ENDPOINT is correct
|
||||
|
||||
2. **Per-User Watch Not Working**
|
||||
- Ensure ENABLE_PER_USER_WATCH=true
|
||||
- Verify USER_WATCH_BASE_DIR exists and is writable
|
||||
- Check directory permissions
|
||||
|
||||
3. **JWT Authentication Failed**
|
||||
- Ensure JWT_SECRET is consistent across restarts
|
||||
- Use a strong, unique secret in production
|
||||
|
||||
### Debug Mode
|
||||
|
||||
Enable debug logging:
|
||||
|
||||
```bash
|
||||
export RUST_LOG=debug
|
||||
export RUST_BACKTRACE=1
|
||||
```
|
||||
|
||||
### Configuration Testing
|
||||
|
||||
Test S3 configuration:
|
||||
|
||||
```bash
|
||||
aws s3 ls s3://$S3_BUCKET_NAME --profile readur-test
|
||||
```
|
||||
|
||||
Test database connection:
|
||||
|
||||
```bash
|
||||
psql $DATABASE_URL -c "SELECT version();"
|
||||
```
|
||||
|
||||
## Security Considerations
|
||||
|
||||
1. **Never commit `.env` files to version control**
|
||||
2. **Use strong, unique values for JWT_SECRET**
|
||||
3. **Rotate S3 access keys regularly**
|
||||
4. **Use IAM roles when running on AWS**
|
||||
5. **Enable S3 bucket encryption**
|
||||
6. **Restrict S3 bucket policies to minimum required permissions**
|
||||
7. **Use HTTPS for S3_ENDPOINT when possible**
|
||||
8. **Implement network security groups for database access**
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
This guide covers all configuration options available in Readur through environment variables and runtime settings.
|
||||
|
||||
> 📖 **See Also**: For a complete reference of all configuration options including S3 storage and advanced settings, see the [Configuration Reference](configuration-reference.md).
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Environment Variables](#environment-variables)
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
This guide covers production deployment strategies, SSL setup, monitoring, backups, and best practices for running Readur in production.
|
||||
|
||||
> 🆕 **New in 2.5.4**: S3 storage backend support! See the [Migration Guide](migration-guide.md) to migrate from local storage to S3, and the [S3 Storage Guide](s3-storage-guide.md) for complete setup instructions.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Production Docker Compose](#production-docker-compose)
|
||||
|
||||
@@ -7,6 +7,7 @@ This directory contains technical documentation for developers working on Readur
|
||||
### 🏗️ Architecture & Design
|
||||
- [**Architecture Overview**](architecture.md) - System design, components, and data flow
|
||||
- [**Database Guardrails**](DATABASE_GUARDRAILS.md) - Concurrency safety and database best practices
|
||||
- [**Storage Architecture**](../s3-storage-guide.md) - S3 and local storage backend implementation
|
||||
|
||||
### 🛠️ Development
|
||||
- [**Development Guide**](development.md) - Setup, contributing, code style guidelines
|
||||
@@ -16,6 +17,8 @@ This directory contains technical documentation for developers working on Readur
|
||||
- [**OCR Optimization**](OCR_OPTIMIZATION_GUIDE.md) - Performance tuning and best practices
|
||||
- [**Queue Improvements**](QUEUE_IMPROVEMENTS.md) - Background job processing architecture
|
||||
- [**Deployment Summary**](DEPLOYMENT_SUMMARY.md) - Technical deployment overview
|
||||
- [**Migration Guide**](../migration-guide.md) - Storage migration procedures
|
||||
- [**S3 Troubleshooting**](../s3-troubleshooting.md) - Debugging S3 storage issues
|
||||
|
||||
## 🚀 Quick Start for Developers
|
||||
|
||||
@@ -28,8 +31,10 @@ This directory contains technical documentation for developers working on Readur
|
||||
|
||||
- [Installation Guide](../installation.md) - How to install and run Readur
|
||||
- [Configuration Guide](../configuration.md) - Environment variables and settings
|
||||
- [Configuration Reference](../configuration-reference.md) - Complete configuration options
|
||||
- [User Guide](../user-guide.md) - How to use Readur features
|
||||
- [API Reference](../api-reference.md) - REST API documentation
|
||||
- [New Features in 2.5.4](../new-features-2.5.4.md) - Latest features and improvements
|
||||
|
||||
## 🤝 Contributing
|
||||
|
||||
|
||||
@@ -32,10 +32,28 @@ Readur provides an intuitive drag-and-drop file upload system that supports mult
|
||||
## Processing Pipeline
|
||||
|
||||
1. **File Validation** - Verify file type and size limits
|
||||
2. **Storage** - Secure file storage with backup
|
||||
3. **OCR Processing** - Automatic text extraction using Tesseract
|
||||
4. **Indexing** - Full-text search indexing in PostgreSQL
|
||||
5. **Metadata Extraction** - File properties and document information
|
||||
2. **Enhanced File Type Detection** (v2.5.4+) - Magic number detection using Rust 'infer' crate
|
||||
3. **Storage** - Secure file storage with backup (local or S3)
|
||||
4. **OCR Processing** - Automatic text extraction using Tesseract
|
||||
5. **Indexing** - Full-text search indexing in PostgreSQL
|
||||
6. **Metadata Extraction** - File properties and document information
|
||||
|
||||
### Enhanced File Type Detection (v2.5.4+)
|
||||
|
||||
Readur now uses content-based file type detection rather than relying solely on file extensions:
|
||||
|
||||
- **Magic Number Detection**: Identifies files by their content signature, not just extension
|
||||
- **Broader Format Support**: Automatically recognizes more document and image formats
|
||||
- **Security Enhancement**: Prevents malicious files with incorrect extensions from being processed
|
||||
- **Performance**: Fast, native Rust implementation for minimal overhead
|
||||
|
||||
**Automatically Detected Formats:**
|
||||
- Documents: PDF, DOCX, XLSX, PPTX, ODT, ODS, ODP
|
||||
- Images: PNG, JPEG, GIF, BMP, TIFF, WebP, HEIC
|
||||
- Archives: ZIP, RAR, 7Z, TAR, GZ
|
||||
- Text: TXT, MD, CSV, JSON, XML
|
||||
|
||||
This enhancement ensures files are correctly identified even when extensions are missing or incorrect, improving both reliability and security.
|
||||
|
||||
## Best Practices
|
||||
|
||||
|
||||
471
docs/migration-guide.md
Normal file
471
docs/migration-guide.md
Normal file
@@ -0,0 +1,471 @@
|
||||
# Migration Guide: Local Storage to S3
|
||||
|
||||
## Overview
|
||||
|
||||
This guide provides step-by-step instructions for migrating your Readur installation from local filesystem storage to S3 storage. The migration process is designed to be safe, resumable, and reversible.
|
||||
|
||||
## Pre-Migration Checklist
|
||||
|
||||
### 1. System Requirements
|
||||
|
||||
- [ ] Readur compiled with S3 feature: `cargo build --release --features s3`
|
||||
- [ ] Sufficient disk space for temporary operations (at least 2x largest file)
|
||||
- [ ] Network bandwidth for uploading all documents to S3
|
||||
- [ ] AWS CLI installed and configured (for verification)
|
||||
|
||||
### 2. S3 Prerequisites
|
||||
|
||||
- [ ] S3 bucket created and accessible
|
||||
- [ ] IAM user with appropriate permissions
|
||||
- [ ] Access keys generated and tested
|
||||
- [ ] Bucket region identified
|
||||
- [ ] Encryption settings configured (if required)
|
||||
- [ ] Lifecycle policies reviewed
|
||||
|
||||
### 3. Backup Requirements
|
||||
|
||||
- [ ] Database backed up
|
||||
- [ ] Local files backed up (optional but recommended)
|
||||
- [ ] Configuration files saved
|
||||
- [ ] Document count and total size recorded
|
||||
|
||||
## Migration Process
|
||||
|
||||
### Step 1: Prepare Environment
|
||||
|
||||
#### 1.1 Backup Database
|
||||
|
||||
```bash
|
||||
# Create timestamped backup
|
||||
BACKUP_DATE=$(date +%Y%m%d_%H%M%S)
|
||||
pg_dump $DATABASE_URL > readur_backup_${BACKUP_DATE}.sql
|
||||
|
||||
# Verify backup
|
||||
pg_restore --list readur_backup_${BACKUP_DATE}.sql | head -20
|
||||
```
|
||||
|
||||
#### 1.2 Document Current State
|
||||
|
||||
```sql
|
||||
-- Record current statistics
|
||||
SELECT
|
||||
COUNT(*) as total_documents,
|
||||
SUM(file_size) / 1024.0 / 1024.0 / 1024.0 as total_size_gb,
|
||||
COUNT(DISTINCT user_id) as unique_users
|
||||
FROM documents;
|
||||
|
||||
-- Save document list
|
||||
\copy (SELECT id, filename, file_path, file_size FROM documents) TO 'documents_pre_migration.csv' CSV HEADER;
|
||||
```
|
||||
|
||||
#### 1.3 Calculate Migration Time
|
||||
|
||||
```bash
|
||||
# Estimate migration duration
|
||||
TOTAL_SIZE_GB=100 # From query above
|
||||
UPLOAD_SPEED_MBPS=100 # Your upload speed
|
||||
ESTIMATED_HOURS=$(echo "scale=2; ($TOTAL_SIZE_GB * 1024 * 8) / ($UPLOAD_SPEED_MBPS * 3600)" | bc)
|
||||
echo "Estimated migration time: $ESTIMATED_HOURS hours"
|
||||
```
|
||||
|
||||
### Step 2: Configure S3
|
||||
|
||||
#### 2.1 Create S3 Bucket
|
||||
|
||||
```bash
|
||||
# Create bucket
|
||||
aws s3api create-bucket \
|
||||
--bucket readur-production \
|
||||
--region us-east-1 \
|
||||
--create-bucket-configuration LocationConstraint=us-east-1
|
||||
|
||||
# Enable versioning
|
||||
aws s3api put-bucket-versioning \
|
||||
--bucket readur-production \
|
||||
--versioning-configuration Status=Enabled
|
||||
|
||||
# Enable encryption
|
||||
aws s3api put-bucket-encryption \
|
||||
--bucket readur-production \
|
||||
--server-side-encryption-configuration '{
|
||||
"Rules": [{
|
||||
"ApplyServerSideEncryptionByDefault": {
|
||||
"SSEAlgorithm": "AES256"
|
||||
}
|
||||
}]
|
||||
}'
|
||||
```
|
||||
|
||||
#### 2.2 Set Up IAM User
|
||||
|
||||
```bash
|
||||
# Create policy file
|
||||
cat > readur-s3-policy.json << 'EOF'
|
||||
{
|
||||
"Version": "2012-10-17",
|
||||
"Statement": [
|
||||
{
|
||||
"Effect": "Allow",
|
||||
"Action": [
|
||||
"s3:ListBucket",
|
||||
"s3:GetBucketLocation"
|
||||
],
|
||||
"Resource": "arn:aws:s3:::readur-production"
|
||||
},
|
||||
{
|
||||
"Effect": "Allow",
|
||||
"Action": [
|
||||
"s3:GetObject",
|
||||
"s3:PutObject",
|
||||
"s3:DeleteObject",
|
||||
"s3:GetObjectVersion",
|
||||
"s3:PutObjectAcl"
|
||||
],
|
||||
"Resource": "arn:aws:s3:::readur-production/*"
|
||||
}
|
||||
]
|
||||
}
|
||||
EOF
|
||||
|
||||
# Create IAM user and attach policy
|
||||
aws iam create-user --user-name readur-s3-user
|
||||
aws iam put-user-policy \
|
||||
--user-name readur-s3-user \
|
||||
--policy-name ReadurS3Access \
|
||||
--policy-document file://readur-s3-policy.json
|
||||
|
||||
# Generate access keys
|
||||
aws iam create-access-key --user-name readur-s3-user > s3-credentials.json
|
||||
```
|
||||
|
||||
#### 2.3 Configure Readur for S3
|
||||
|
||||
```bash
|
||||
# Add to .env file
|
||||
cat >> .env << 'EOF'
|
||||
# S3 Configuration
|
||||
S3_ENABLED=true
|
||||
S3_BUCKET_NAME=readur-production
|
||||
S3_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE
|
||||
S3_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
|
||||
S3_REGION=us-east-1
|
||||
EOF
|
||||
|
||||
# Test configuration
|
||||
source .env
|
||||
aws s3 ls s3://$S3_BUCKET_NAME --region $S3_REGION
|
||||
```
|
||||
|
||||
### Step 3: Run Migration
|
||||
|
||||
#### 3.1 Dry Run
|
||||
|
||||
```bash
|
||||
# Preview migration without making changes
|
||||
cargo run --bin migrate_to_s3 --features s3 -- --dry-run
|
||||
|
||||
# Review output
|
||||
# Expected output:
|
||||
# 🔍 DRY RUN - Would migrate the following files:
|
||||
# - document1.pdf (User: 123e4567..., Size: 2.5 MB)
|
||||
# - report.docx (User: 987fcdeb..., Size: 1.2 MB)
|
||||
# 💡 Run without --dry-run to perform actual migration
|
||||
```
|
||||
|
||||
#### 3.2 Partial Migration (Testing)
|
||||
|
||||
```bash
|
||||
# Migrate only 10 files first
|
||||
cargo run --bin migrate_to_s3 --features s3 -- --limit 10
|
||||
|
||||
# Verify migrated files
|
||||
aws s3 ls s3://$S3_BUCKET_NAME/documents/ --recursive | head -20
|
||||
|
||||
# Check database updates
|
||||
psql $DATABASE_URL -c "SELECT id, filename, file_path FROM documents WHERE file_path LIKE 's3://%' LIMIT 10;"
|
||||
```
|
||||
|
||||
#### 3.3 Full Migration
|
||||
|
||||
```bash
|
||||
# Run full migration with progress tracking
|
||||
cargo run --bin migrate_to_s3 --features s3 -- \
|
||||
--enable-rollback \
|
||||
2>&1 | tee migration_$(date +%Y%m%d_%H%M%S).log
|
||||
|
||||
# Monitor progress in another terminal
|
||||
watch -n 5 'cat migration_state.json | jq "{processed: .processed_files, total: .total_files, failed: .failed_migrations | length}"'
|
||||
```
|
||||
|
||||
#### 3.4 Migration with Local File Deletion
|
||||
|
||||
```bash
|
||||
# Only after verifying successful migration
|
||||
cargo run --bin migrate_to_s3 --features s3 -- \
|
||||
--delete-local \
|
||||
--enable-rollback
|
||||
```
|
||||
|
||||
### Step 4: Verify Migration
|
||||
|
||||
#### 4.1 Database Verification
|
||||
|
||||
```sql
|
||||
-- Check migration completeness
|
||||
SELECT
|
||||
COUNT(*) FILTER (WHERE file_path LIKE 's3://%') as s3_documents,
|
||||
COUNT(*) FILTER (WHERE file_path NOT LIKE 's3://%') as local_documents,
|
||||
COUNT(*) as total_documents
|
||||
FROM documents;
|
||||
|
||||
-- Find any failed migrations
|
||||
SELECT id, filename, file_path
|
||||
FROM documents
|
||||
WHERE file_path NOT LIKE 's3://%'
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 20;
|
||||
|
||||
-- Verify path format
|
||||
SELECT DISTINCT
|
||||
substring(file_path from 1 for 50) as path_prefix,
|
||||
COUNT(*) as document_count
|
||||
FROM documents
|
||||
GROUP BY path_prefix
|
||||
ORDER BY document_count DESC;
|
||||
```
|
||||
|
||||
#### 4.2 S3 Verification
|
||||
|
||||
```bash
|
||||
# Count objects in S3
|
||||
aws s3 ls s3://$S3_BUCKET_NAME/documents/ --recursive --summarize | grep "Total Objects"
|
||||
|
||||
# Verify file structure
|
||||
aws s3 ls s3://$S3_BUCKET_NAME/ --recursive | head -50
|
||||
|
||||
# Check specific document
|
||||
DOCUMENT_ID="123e4567-e89b-12d3-a456-426614174000"
|
||||
aws s3 ls s3://$S3_BUCKET_NAME/documents/ --recursive | grep $DOCUMENT_ID
|
||||
```
|
||||
|
||||
#### 4.3 Application Testing
|
||||
|
||||
```bash
|
||||
# Restart Readur with S3 configuration
|
||||
systemctl restart readur
|
||||
|
||||
# Test document upload
|
||||
curl -X POST https://readur.example.com/api/documents \
|
||||
-H "Authorization: Bearer $TOKEN" \
|
||||
-F "file=@test-document.pdf"
|
||||
|
||||
# Test document retrieval
|
||||
curl -X GET https://readur.example.com/api/documents/$DOCUMENT_ID/download \
|
||||
-H "Authorization: Bearer $TOKEN" \
|
||||
-o downloaded-test.pdf
|
||||
|
||||
# Verify downloaded file
|
||||
md5sum test-document.pdf downloaded-test.pdf
|
||||
```
|
||||
|
||||
### Step 5: Post-Migration Tasks
|
||||
|
||||
#### 5.1 Update Backup Procedures
|
||||
|
||||
```bash
|
||||
# Create S3 backup script
|
||||
cat > backup-s3.sh << 'EOF'
|
||||
#!/bin/bash
|
||||
# Backup S3 data to another bucket
|
||||
BACKUP_BUCKET="readur-backup-$(date +%Y%m%d)"
|
||||
aws s3api create-bucket --bucket $BACKUP_BUCKET --region us-east-1
|
||||
aws s3 sync s3://readur-production s3://$BACKUP_BUCKET --storage-class GLACIER
|
||||
EOF
|
||||
|
||||
chmod +x backup-s3.sh
|
||||
```
|
||||
|
||||
#### 5.2 Set Up Monitoring
|
||||
|
||||
```bash
|
||||
# Create CloudWatch dashboard
|
||||
aws cloudwatch put-dashboard \
|
||||
--dashboard-name ReadurS3 \
|
||||
--dashboard-body file://cloudwatch-dashboard.json
|
||||
```
|
||||
|
||||
#### 5.3 Clean Up Local Storage
|
||||
|
||||
```bash
|
||||
# After confirming successful migration
|
||||
# Remove old upload directories (CAREFUL!)
|
||||
du -sh ./uploads ./thumbnails ./processed_images
|
||||
|
||||
# Archive before deletion
|
||||
tar -czf pre_migration_files_$(date +%Y%m%d).tar.gz ./uploads ./thumbnails ./processed_images
|
||||
|
||||
# Remove directories
|
||||
rm -rf ./uploads/* ./thumbnails/* ./processed_images/*
|
||||
```
|
||||
|
||||
## Rollback Procedures
|
||||
|
||||
### Automatic Rollback
|
||||
|
||||
If migration fails with `--enable-rollback`:
|
||||
|
||||
```bash
|
||||
# Rollback will automatically:
|
||||
# 1. Restore database paths to original values
|
||||
# 2. Delete uploaded S3 objects
|
||||
# 3. Save rollback state to rollback_errors.json
|
||||
```
|
||||
|
||||
### Manual Rollback
|
||||
|
||||
#### Step 1: Restore Database
|
||||
|
||||
```sql
|
||||
-- Revert file paths to local
|
||||
UPDATE documents
|
||||
SET file_path = regexp_replace(file_path, '^s3://[^/]+/', './uploads/')
|
||||
WHERE file_path LIKE 's3://%';
|
||||
|
||||
-- Or restore from backup
|
||||
psql $DATABASE_URL < readur_backup_${BACKUP_DATE}.sql
|
||||
```
|
||||
|
||||
#### Step 2: Remove S3 Objects
|
||||
|
||||
```bash
|
||||
# Delete all migrated objects
|
||||
aws s3 rm s3://$S3_BUCKET_NAME/documents/ --recursive
|
||||
aws s3 rm s3://$S3_BUCKET_NAME/thumbnails/ --recursive
|
||||
aws s3 rm s3://$S3_BUCKET_NAME/processed_images/ --recursive
|
||||
```
|
||||
|
||||
#### Step 3: Restore Configuration
|
||||
|
||||
```bash
|
||||
# Disable S3 in configuration
|
||||
sed -i 's/S3_ENABLED=true/S3_ENABLED=false/' .env
|
||||
|
||||
# Restart application
|
||||
systemctl restart readur
|
||||
```
|
||||
|
||||
## Troubleshooting Migration Issues
|
||||
|
||||
### Issue: Migration Hangs
|
||||
|
||||
```bash
|
||||
# Check current progress
|
||||
tail -f migration_*.log
|
||||
|
||||
# View migration state
|
||||
cat migration_state.json | jq '.processed_files, .failed_migrations'
|
||||
|
||||
# Resume from last successful
|
||||
LAST_ID=$(cat migration_state.json | jq -r '.completed_migrations[-1].document_id')
|
||||
cargo run --bin migrate_to_s3 --features s3 -- --resume-from $LAST_ID
|
||||
```
|
||||
|
||||
### Issue: Permission Errors
|
||||
|
||||
```bash
|
||||
# Verify IAM permissions
|
||||
aws s3api put-object \
|
||||
--bucket $S3_BUCKET_NAME \
|
||||
--key test.txt \
|
||||
--body /tmp/test.txt
|
||||
|
||||
# Check bucket policy
|
||||
aws s3api get-bucket-policy --bucket $S3_BUCKET_NAME
|
||||
```
|
||||
|
||||
### Issue: Network Timeouts
|
||||
|
||||
```bash
|
||||
# Use screen/tmux for long migrations
|
||||
screen -S migration
|
||||
cargo run --bin migrate_to_s3 --features s3
|
||||
|
||||
# Detach: Ctrl+A, D
|
||||
# Reattach: screen -r migration
|
||||
```
|
||||
|
||||
## Migration Optimization
|
||||
|
||||
### Parallel Upload
|
||||
|
||||
```bash
|
||||
# Split migration by user
|
||||
for USER_ID in $(psql $DATABASE_URL -t -c "SELECT DISTINCT user_id FROM documents"); do
|
||||
cargo run --bin migrate_to_s3 --features s3 -- --user-id $USER_ID &
|
||||
done
|
||||
```
|
||||
|
||||
### Bandwidth Management
|
||||
|
||||
```bash
|
||||
# Limit upload bandwidth (if needed)
|
||||
trickle -u 10240 cargo run --bin migrate_to_s3 --features s3
|
||||
```
|
||||
|
||||
### Progress Monitoring
|
||||
|
||||
```bash
|
||||
# Real-time statistics
|
||||
watch -n 10 'echo "=== Migration Progress ===" && \
|
||||
cat migration_state.json | jq "{
|
||||
progress_pct: ((.processed_files / .total_files) * 100),
|
||||
processed: .processed_files,
|
||||
total: .total_files,
|
||||
failed: .failed_migrations | length,
|
||||
elapsed: now - (.started_at | fromdate),
|
||||
rate_per_hour: (.processed_files / ((now - (.started_at | fromdate)) / 3600))
|
||||
}"'
|
||||
```
|
||||
|
||||
## Post-Migration Validation
|
||||
|
||||
### Data Integrity Check
|
||||
|
||||
```bash
|
||||
# Generate checksums for S3 objects
|
||||
aws s3api list-objects-v2 --bucket $S3_BUCKET_NAME --prefix documents/ \
|
||||
--query 'Contents[].{Key:Key, ETag:ETag}' \
|
||||
--output json > s3_checksums.json
|
||||
|
||||
# Compare with database
|
||||
psql $DATABASE_URL -c "SELECT id, file_path, file_hash FROM documents" > db_checksums.txt
|
||||
```
|
||||
|
||||
### Performance Testing
|
||||
|
||||
```bash
|
||||
# Benchmark S3 retrieval
|
||||
time for i in {1..100}; do
|
||||
curl -s https://readur.example.com/api/documents/random/download > /dev/null
|
||||
done
|
||||
```
|
||||
|
||||
## Success Criteria
|
||||
|
||||
Migration is considered successful when:
|
||||
|
||||
- [ ] All documents have S3 paths in database
|
||||
- [ ] No failed migrations in migration_state.json
|
||||
- [ ] Application can upload new documents to S3
|
||||
- [ ] Application can retrieve existing documents from S3
|
||||
- [ ] Thumbnails and processed images are accessible
|
||||
- [ ] Performance meets acceptable thresholds
|
||||
- [ ] Backup procedures are updated and tested
|
||||
|
||||
## Next Steps
|
||||
|
||||
1. Monitor S3 costs and usage
|
||||
2. Implement CloudFront CDN if needed
|
||||
3. Set up cross-region replication for disaster recovery
|
||||
4. Configure S3 lifecycle policies for cost optimization
|
||||
5. Update documentation and runbooks
|
||||
496
docs/s3-storage-guide.md
Normal file
496
docs/s3-storage-guide.md
Normal file
@@ -0,0 +1,496 @@
|
||||
# S3 Storage Backend Guide for Readur
|
||||
|
||||
## Overview
|
||||
|
||||
Starting with version 2.5.4, Readur supports Amazon S3 and S3-compatible storage services as an alternative to local filesystem storage. This implementation provides full support for AWS S3, MinIO, Wasabi, Backblaze B2, and other S3-compatible services with automatic multipart upload for files larger than 100MB, structured storage paths with year/month organization, and automatic retry mechanisms with exponential backoff.
|
||||
|
||||
This guide provides comprehensive instructions for configuring, deploying, and managing Readur with S3 storage.
|
||||
|
||||
### Key Benefits
|
||||
|
||||
- **Scalability**: Unlimited storage capacity without local disk constraints
|
||||
- **Durability**: 99.999999999% (11 9's) durability with AWS S3
|
||||
- **Cost-Effective**: Pay only for what you use with various storage tiers
|
||||
- **Global Access**: Access documents from anywhere with proper credentials
|
||||
- **Backup**: Built-in versioning and cross-region replication capabilities
|
||||
|
||||
## Table of Contents
|
||||
|
||||
1. [Prerequisites](#prerequisites)
|
||||
2. [Configuration](#configuration)
|
||||
3. [Migration from Local Storage](#migration-from-local-storage)
|
||||
4. [Storage Structure](#storage-structure)
|
||||
5. [Performance Optimization](#performance-optimization)
|
||||
6. [Troubleshooting](#troubleshooting)
|
||||
7. [Best Practices](#best-practices)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Before configuring S3 storage, ensure you have:
|
||||
|
||||
1. **S3 Bucket Access**
|
||||
- An AWS S3 bucket or S3-compatible service (MinIO, Wasabi, Backblaze B2, etc.)
|
||||
- Access Key ID and Secret Access Key with appropriate permissions
|
||||
- Bucket name and region information
|
||||
|
||||
2. **Required S3 Permissions**
|
||||
```json
|
||||
{
|
||||
"Version": "2012-10-17",
|
||||
"Statement": [
|
||||
{
|
||||
"Effect": "Allow",
|
||||
"Action": [
|
||||
"s3:PutObject",
|
||||
"s3:GetObject",
|
||||
"s3:DeleteObject",
|
||||
"s3:ListBucket",
|
||||
"s3:HeadObject",
|
||||
"s3:HeadBucket",
|
||||
"s3:AbortMultipartUpload",
|
||||
"s3:CreateMultipartUpload",
|
||||
"s3:UploadPart",
|
||||
"s3:CompleteMultipartUpload"
|
||||
],
|
||||
"Resource": [
|
||||
"arn:aws:s3:::your-bucket-name/*",
|
||||
"arn:aws:s3:::your-bucket-name"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
3. **Readur Build Requirements**
|
||||
- Readur must be compiled with the `s3` feature flag enabled
|
||||
- Build command: `cargo build --release --features s3`
|
||||
|
||||
## Configuration
|
||||
|
||||
### Environment Variables
|
||||
|
||||
Configure S3 storage by setting the following environment variables:
|
||||
|
||||
```bash
|
||||
# Enable S3 storage backend
|
||||
S3_ENABLED=true
|
||||
|
||||
# Required S3 credentials
|
||||
S3_BUCKET_NAME=readur-documents
|
||||
S3_ACCESS_KEY_ID=your-access-key-id
|
||||
S3_SECRET_ACCESS_KEY=your-secret-access-key
|
||||
S3_REGION=us-east-1
|
||||
|
||||
# Optional: For S3-compatible services (MinIO, Wasabi, etc.)
|
||||
S3_ENDPOINT=https://s3-compatible-endpoint.com
|
||||
```
|
||||
|
||||
### Configuration File Example (.env)
|
||||
|
||||
```bash
|
||||
# Database Configuration
|
||||
DATABASE_URL=postgresql://readur:password@localhost/readur
|
||||
|
||||
# Server Configuration
|
||||
SERVER_ADDRESS=0.0.0.0:8000
|
||||
JWT_SECRET=your-secure-jwt-secret
|
||||
|
||||
# S3 Storage Configuration
|
||||
S3_ENABLED=true
|
||||
S3_BUCKET_NAME=readur-production
|
||||
S3_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE
|
||||
S3_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
|
||||
S3_REGION=us-west-2
|
||||
|
||||
# Optional S3 endpoint for compatible services
|
||||
# S3_ENDPOINT=https://minio.example.com
|
||||
|
||||
# Upload Configuration
|
||||
UPLOAD_PATH=./temp_uploads
|
||||
MAX_FILE_SIZE_MB=500
|
||||
```
|
||||
|
||||
### S3-Compatible Services Configuration
|
||||
|
||||
#### MinIO
|
||||
```bash
|
||||
S3_ENABLED=true
|
||||
S3_BUCKET_NAME=readur-bucket
|
||||
S3_ACCESS_KEY_ID=minioadmin
|
||||
S3_SECRET_ACCESS_KEY=minioadmin
|
||||
S3_REGION=us-east-1
|
||||
S3_ENDPOINT=http://localhost:9000
|
||||
```
|
||||
|
||||
#### Wasabi
|
||||
```bash
|
||||
S3_ENABLED=true
|
||||
S3_BUCKET_NAME=readur-bucket
|
||||
S3_ACCESS_KEY_ID=your-wasabi-key
|
||||
S3_SECRET_ACCESS_KEY=your-wasabi-secret
|
||||
S3_REGION=us-east-1
|
||||
S3_ENDPOINT=https://s3.wasabisys.com
|
||||
```
|
||||
|
||||
#### Backblaze B2
|
||||
```bash
|
||||
S3_ENABLED=true
|
||||
S3_BUCKET_NAME=readur-bucket
|
||||
S3_ACCESS_KEY_ID=your-b2-key-id
|
||||
S3_SECRET_ACCESS_KEY=your-b2-application-key
|
||||
S3_REGION=us-west-002
|
||||
S3_ENDPOINT=https://s3.us-west-002.backblazeb2.com
|
||||
```
|
||||
|
||||
## Migration from Local Storage
|
||||
|
||||
### Using the Migration Tool
|
||||
|
||||
Readur includes a migration utility to transfer existing local files to S3:
|
||||
|
||||
1. **Prepare for Migration**
|
||||
```bash
|
||||
# Backup your database first
|
||||
pg_dump readur > readur_backup.sql
|
||||
|
||||
# Set S3 configuration
|
||||
export S3_ENABLED=true
|
||||
export S3_BUCKET_NAME=readur-production
|
||||
export S3_ACCESS_KEY_ID=your-key
|
||||
export S3_SECRET_ACCESS_KEY=your-secret
|
||||
export S3_REGION=us-east-1
|
||||
```
|
||||
|
||||
2. **Run Dry Run First**
|
||||
```bash
|
||||
# Preview what will be migrated
|
||||
cargo run --bin migrate_to_s3 --features s3 -- --dry-run
|
||||
```
|
||||
|
||||
3. **Execute Migration**
|
||||
```bash
|
||||
# Migrate all files
|
||||
cargo run --bin migrate_to_s3 --features s3
|
||||
|
||||
# Migrate with options
|
||||
cargo run --bin migrate_to_s3 --features s3 -- \
|
||||
--delete-local \ # Delete local files after successful upload
|
||||
--limit 100 \ # Limit to 100 files (for testing)
|
||||
--enable-rollback # Enable automatic rollback on failure
|
||||
```
|
||||
|
||||
4. **Migrate Specific User's Files**
|
||||
```bash
|
||||
cargo run --bin migrate_to_s3 --features s3 -- \
|
||||
--user-id 550e8400-e29b-41d4-a716-446655440000
|
||||
```
|
||||
|
||||
5. **Resume Failed Migration**
|
||||
```bash
|
||||
# Resume from specific document ID
|
||||
cargo run --bin migrate_to_s3 --features s3 -- \
|
||||
--resume-from 550e8400-e29b-41d4-a716-446655440001
|
||||
```
|
||||
|
||||
### Migration Process Details
|
||||
|
||||
The migration tool performs the following steps:
|
||||
|
||||
1. Connects to database and S3
|
||||
2. Identifies all documents with local file paths
|
||||
3. For each document:
|
||||
- Reads the local file
|
||||
- Uploads to S3 with structured path
|
||||
- Updates database with S3 path
|
||||
- Migrates associated thumbnails and processed images
|
||||
- Optionally deletes local files
|
||||
4. Tracks migration state for recovery
|
||||
5. Supports rollback on failure
|
||||
|
||||
### Post-Migration Verification
|
||||
|
||||
```sql
|
||||
-- Check migrated documents
|
||||
SELECT
|
||||
COUNT(*) FILTER (WHERE file_path LIKE 's3://%') as s3_documents,
|
||||
COUNT(*) FILTER (WHERE file_path NOT LIKE 's3://%') as local_documents
|
||||
FROM documents;
|
||||
|
||||
-- Find any remaining local files
|
||||
SELECT id, filename, file_path
|
||||
FROM documents
|
||||
WHERE file_path NOT LIKE 's3://%'
|
||||
LIMIT 10;
|
||||
```
|
||||
|
||||
## Storage Structure
|
||||
|
||||
### S3 Path Organization
|
||||
|
||||
Readur uses a structured path format in S3:
|
||||
|
||||
```
|
||||
bucket-name/
|
||||
├── documents/
|
||||
│ └── {user_id}/
|
||||
│ └── {year}/
|
||||
│ └── {month}/
|
||||
│ └── {document_id}.{extension}
|
||||
├── thumbnails/
|
||||
│ └── {user_id}/
|
||||
│ └── {document_id}_thumb.jpg
|
||||
└── processed_images/
|
||||
└── {user_id}/
|
||||
└── {document_id}_processed.png
|
||||
```
|
||||
|
||||
### Example Paths
|
||||
|
||||
```
|
||||
readur-production/
|
||||
├── documents/
|
||||
│ └── 550e8400-e29b-41d4-a716-446655440000/
|
||||
│ └── 2024/
|
||||
│ └── 03/
|
||||
│ ├── 123e4567-e89b-12d3-a456-426614174000.pdf
|
||||
│ └── 987fcdeb-51a2-43f1-b321-123456789abc.docx
|
||||
├── thumbnails/
|
||||
│ └── 550e8400-e29b-41d4-a716-446655440000/
|
||||
│ ├── 123e4567-e89b-12d3-a456-426614174000_thumb.jpg
|
||||
│ └── 987fcdeb-51a2-43f1-b321-123456789abc_thumb.jpg
|
||||
└── processed_images/
|
||||
└── 550e8400-e29b-41d4-a716-446655440000/
|
||||
├── 123e4567-e89b-12d3-a456-426614174000_processed.png
|
||||
└── 987fcdeb-51a2-43f1-b321-123456789abc_processed.png
|
||||
```
|
||||
|
||||
## Performance Optimization
|
||||
|
||||
### Multipart Upload
|
||||
|
||||
Readur automatically uses multipart upload for files larger than 100MB:
|
||||
|
||||
- **Chunk Size**: 16MB per part
|
||||
- **Automatic Retry**: Exponential backoff with up to 3 retries
|
||||
- **Progress Tracking**: Real-time upload progress via WebSocket
|
||||
|
||||
### Network Optimization
|
||||
|
||||
1. **Region Selection**: Choose S3 region closest to your Readur server
|
||||
2. **Transfer Acceleration**: Enable S3 Transfer Acceleration for global users
|
||||
3. **CloudFront CDN**: Use CloudFront for serving frequently accessed documents
|
||||
|
||||
### Caching Strategy
|
||||
|
||||
```nginx
|
||||
# Nginx caching configuration for S3-backed documents
|
||||
location /api/documents/ {
|
||||
proxy_cache_valid 200 1h;
|
||||
proxy_cache_valid 404 1m;
|
||||
proxy_cache_bypass $http_authorization;
|
||||
add_header X-Cache-Status $upstream_cache_status;
|
||||
}
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues and Solutions
|
||||
|
||||
#### 1. S3 Connection Errors
|
||||
|
||||
**Error**: "Failed to access S3 bucket"
|
||||
|
||||
**Solution**:
|
||||
```bash
|
||||
# Verify credentials
|
||||
aws s3 ls s3://your-bucket-name --profile readur
|
||||
|
||||
# Check IAM permissions
|
||||
aws iam get-user-policy --user-name readur-user --policy-name ReadurS3Policy
|
||||
|
||||
# Test connectivity
|
||||
curl -I https://s3.amazonaws.com/your-bucket-name
|
||||
```
|
||||
|
||||
#### 2. Upload Failures
|
||||
|
||||
**Error**: "Failed to store file: RequestTimeout"
|
||||
|
||||
**Solution**:
|
||||
- Check network connectivity
|
||||
- Verify S3 endpoint configuration
|
||||
- Increase timeout values if using S3-compatible service
|
||||
- Monitor S3 request metrics in AWS CloudWatch
|
||||
|
||||
#### 3. Permission Denied
|
||||
|
||||
**Error**: "AccessDenied: Access Denied"
|
||||
|
||||
**Solution**:
|
||||
```bash
|
||||
# Verify bucket policy
|
||||
aws s3api get-bucket-policy --bucket your-bucket-name
|
||||
|
||||
# Check object ACLs
|
||||
aws s3api get-object-acl --bucket your-bucket-name --key test-object
|
||||
|
||||
# Ensure CORS configuration for web access
|
||||
aws s3api put-bucket-cors --bucket your-bucket-name --cors-configuration file://cors.json
|
||||
```
|
||||
|
||||
#### 4. Migration Stuck
|
||||
|
||||
**Problem**: Migration process hangs or fails repeatedly
|
||||
|
||||
**Solution**:
|
||||
```bash
|
||||
# Check migration state
|
||||
cat migration_state.json | jq '.failed_migrations'
|
||||
|
||||
# Resume from last successful migration
|
||||
LAST_SUCCESS=$(cat migration_state.json | jq -r '.completed_migrations[-1].document_id')
|
||||
cargo run --bin migrate_to_s3 --features s3 -- --resume-from $LAST_SUCCESS
|
||||
|
||||
# Force rollback if needed
|
||||
cargo run --bin migrate_to_s3 --features s3 -- --rollback
|
||||
```
|
||||
|
||||
### Debugging S3 Operations
|
||||
|
||||
Enable detailed S3 logging:
|
||||
|
||||
```bash
|
||||
# Set environment variables for debugging
|
||||
export RUST_LOG=readur=debug,aws_sdk_s3=debug
|
||||
export AWS_SDK_LOAD_CONFIG=true
|
||||
|
||||
# Run Readur with debug logging
|
||||
cargo run --features s3
|
||||
```
|
||||
|
||||
### Performance Monitoring
|
||||
|
||||
Monitor S3 performance metrics:
|
||||
|
||||
```sql
|
||||
-- Query document upload times
|
||||
SELECT
|
||||
DATE(created_at) as upload_date,
|
||||
AVG(file_size / 1024.0 / 1024.0) as avg_size_mb,
|
||||
COUNT(*) as documents_uploaded,
|
||||
AVG(EXTRACT(EPOCH FROM (updated_at - created_at))) as avg_processing_time_seconds
|
||||
FROM documents
|
||||
WHERE file_path LIKE 's3://%'
|
||||
GROUP BY DATE(created_at)
|
||||
ORDER BY upload_date DESC;
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
### 1. Security
|
||||
|
||||
- **Encryption**: Enable S3 server-side encryption (SSE-S3 or SSE-KMS)
|
||||
- **Access Control**: Use IAM roles instead of access keys when possible
|
||||
- **Bucket Policies**: Implement least-privilege bucket policies
|
||||
- **VPC Endpoints**: Use VPC endpoints for private S3 access
|
||||
|
||||
```bash
|
||||
# Enable default encryption on bucket
|
||||
aws s3api put-bucket-encryption \
|
||||
--bucket readur-production \
|
||||
--server-side-encryption-configuration '{
|
||||
"Rules": [{
|
||||
"ApplyServerSideEncryptionByDefault": {
|
||||
"SSEAlgorithm": "AES256"
|
||||
}
|
||||
}]
|
||||
}'
|
||||
```
|
||||
|
||||
### 2. Cost Optimization
|
||||
|
||||
- **Lifecycle Policies**: Archive old documents to Glacier
|
||||
- **Intelligent-Tiering**: Enable for automatic cost optimization
|
||||
- **Request Metrics**: Monitor and optimize S3 request patterns
|
||||
|
||||
```json
|
||||
{
|
||||
"Rules": [{
|
||||
"Id": "ArchiveOldDocuments",
|
||||
"Status": "Enabled",
|
||||
"Transitions": [{
|
||||
"Days": 90,
|
||||
"StorageClass": "GLACIER"
|
||||
}],
|
||||
"NoncurrentVersionTransitions": [{
|
||||
"NoncurrentDays": 30,
|
||||
"StorageClass": "GLACIER"
|
||||
}]
|
||||
}]
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Reliability
|
||||
|
||||
- **Versioning**: Enable S3 versioning for document recovery
|
||||
- **Cross-Region Replication**: Set up for disaster recovery
|
||||
- **Backup Strategy**: Regular backups to separate bucket or region
|
||||
|
||||
```bash
|
||||
# Enable versioning
|
||||
aws s3api put-bucket-versioning \
|
||||
--bucket readur-production \
|
||||
--versioning-configuration Status=Enabled
|
||||
|
||||
# Set up replication
|
||||
aws s3api put-bucket-replication \
|
||||
--bucket readur-production \
|
||||
--replication-configuration file://replication.json
|
||||
```
|
||||
|
||||
### 4. Monitoring
|
||||
|
||||
Set up CloudWatch alarms for:
|
||||
- High error rates
|
||||
- Unusual request patterns
|
||||
- Storage quota approaching
|
||||
- Failed multipart uploads
|
||||
|
||||
```bash
|
||||
# Create CloudWatch alarm for S3 errors
|
||||
aws cloudwatch put-metric-alarm \
|
||||
--alarm-name readur-s3-errors \
|
||||
--alarm-description "Alert on S3 4xx errors" \
|
||||
--metric-name 4xxErrors \
|
||||
--namespace AWS/S3 \
|
||||
--statistic Sum \
|
||||
--period 300 \
|
||||
--threshold 10 \
|
||||
--comparison-operator GreaterThanThreshold
|
||||
```
|
||||
|
||||
### 5. Compliance
|
||||
|
||||
- **Data Residency**: Ensure S3 region meets data residency requirements
|
||||
- **Audit Logging**: Enable S3 access logging and AWS CloudTrail
|
||||
- **Retention Policies**: Implement compliant data retention policies
|
||||
- **GDPR Compliance**: Implement proper data deletion procedures
|
||||
|
||||
```bash
|
||||
# Enable access logging
|
||||
aws s3api put-bucket-logging \
|
||||
--bucket readur-production \
|
||||
--bucket-logging-status '{
|
||||
"LoggingEnabled": {
|
||||
"TargetBucket": "readur-logs",
|
||||
"TargetPrefix": "s3-access/"
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
## Next Steps
|
||||
|
||||
- Review the [Configuration Reference](./configuration-reference.md) for all S3 options
|
||||
- Explore [S3 Troubleshooting Guide](./s3-troubleshooting.md) for common issues and solutions
|
||||
- Check [Migration Guide](./migration-guide.md) for moving from local to S3 storage
|
||||
- Read [Deployment Guide](./deployment.md) for production deployment best practices
|
||||
510
docs/s3-troubleshooting.md
Normal file
510
docs/s3-troubleshooting.md
Normal file
@@ -0,0 +1,510 @@
|
||||
# S3 Storage Troubleshooting Guide
|
||||
|
||||
## Overview
|
||||
|
||||
This guide addresses common issues encountered when using S3 storage with Readur and provides detailed solutions.
|
||||
|
||||
## Quick Diagnostics
|
||||
|
||||
### S3 Health Check Script
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# s3-health-check.sh
|
||||
|
||||
echo "Readur S3 Storage Health Check"
|
||||
echo "=============================="
|
||||
|
||||
# Load configuration
|
||||
source .env
|
||||
|
||||
# Check S3 connectivity
|
||||
echo -n "1. Checking S3 connectivity... "
|
||||
if aws s3 ls s3://$S3_BUCKET_NAME --region $S3_REGION > /dev/null 2>&1; then
|
||||
echo "✓ Connected"
|
||||
else
|
||||
echo "✗ Failed"
|
||||
echo " Error: Cannot connect to S3 bucket"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check bucket permissions
|
||||
echo -n "2. Checking bucket permissions... "
|
||||
TEST_FILE="/tmp/readur-test-$$"
|
||||
echo "test" > $TEST_FILE
|
||||
|
||||
if aws s3 cp $TEST_FILE s3://$S3_BUCKET_NAME/test-write-$$ --region $S3_REGION > /dev/null 2>&1; then
|
||||
echo "✓ Write permission OK"
|
||||
aws s3 rm s3://$S3_BUCKET_NAME/test-write-$$ --region $S3_REGION > /dev/null 2>&1
|
||||
else
|
||||
echo "✗ Write permission failed"
|
||||
fi
|
||||
rm -f $TEST_FILE
|
||||
|
||||
# Check multipart upload
|
||||
echo -n "3. Checking multipart upload capability... "
|
||||
if aws s3api put-bucket-accelerate-configuration \
|
||||
--bucket $S3_BUCKET_NAME \
|
||||
--accelerate-configuration Status=Suspended \
|
||||
--region $S3_REGION > /dev/null 2>&1; then
|
||||
echo "✓ Multipart enabled"
|
||||
else
|
||||
echo "⚠ May not have full permissions"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Health check complete!"
|
||||
```
|
||||
|
||||
## Common Issues and Solutions
|
||||
|
||||
### 1. Connection Issues
|
||||
|
||||
#### Problem: "Failed to access S3 bucket"
|
||||
|
||||
**Symptoms:**
|
||||
- Error during startup
|
||||
- Cannot upload documents
|
||||
- Migration tool fails immediately
|
||||
|
||||
**Diagnosis:**
|
||||
```bash
|
||||
# Test basic connectivity
|
||||
aws s3 ls s3://your-bucket-name
|
||||
|
||||
# Check credentials
|
||||
aws sts get-caller-identity
|
||||
|
||||
# Verify region
|
||||
aws s3api get-bucket-location --bucket your-bucket-name
|
||||
```
|
||||
|
||||
**Solutions:**
|
||||
|
||||
1. **Incorrect credentials:**
|
||||
```bash
|
||||
# Verify environment variables
|
||||
echo $S3_ACCESS_KEY_ID
|
||||
echo $S3_SECRET_ACCESS_KEY
|
||||
|
||||
# Test with AWS CLI
|
||||
export AWS_ACCESS_KEY_ID=$S3_ACCESS_KEY_ID
|
||||
export AWS_SECRET_ACCESS_KEY=$S3_SECRET_ACCESS_KEY
|
||||
aws s3 ls
|
||||
```
|
||||
|
||||
2. **Wrong region:**
|
||||
```bash
|
||||
# Find correct region
|
||||
aws s3api get-bucket-location --bucket your-bucket-name
|
||||
|
||||
# Update configuration
|
||||
export S3_REGION=correct-region
|
||||
```
|
||||
|
||||
3. **Network issues:**
|
||||
```bash
|
||||
# Test network connectivity
|
||||
curl -I https://s3.amazonaws.com
|
||||
|
||||
# Check DNS resolution
|
||||
nslookup s3.amazonaws.com
|
||||
|
||||
# Test with specific endpoint
|
||||
curl -I https://your-bucket.s3.amazonaws.com
|
||||
```
|
||||
|
||||
### 2. Permission Errors
|
||||
|
||||
#### Problem: "AccessDenied: Access Denied"
|
||||
|
||||
**Symptoms:**
|
||||
- Can list bucket but cannot upload
|
||||
- Can upload but cannot delete
|
||||
- Partial operations succeed
|
||||
|
||||
**Diagnosis:**
|
||||
```bash
|
||||
# Check IAM user permissions
|
||||
aws iam get-user-policy --user-name readur-user --policy-name ReadurPolicy
|
||||
|
||||
# Test specific operations
|
||||
aws s3api put-object --bucket your-bucket --key test.txt --body /tmp/test.txt
|
||||
aws s3api get-object --bucket your-bucket --key test.txt /tmp/downloaded.txt
|
||||
aws s3api delete-object --bucket your-bucket --key test.txt
|
||||
```
|
||||
|
||||
**Solutions:**
|
||||
|
||||
1. **Update IAM policy:**
|
||||
```json
|
||||
{
|
||||
"Version": "2012-10-17",
|
||||
"Statement": [
|
||||
{
|
||||
"Effect": "Allow",
|
||||
"Action": [
|
||||
"s3:ListBucket",
|
||||
"s3:GetBucketLocation"
|
||||
],
|
||||
"Resource": "arn:aws:s3:::your-bucket-name"
|
||||
},
|
||||
{
|
||||
"Effect": "Allow",
|
||||
"Action": [
|
||||
"s3:PutObject",
|
||||
"s3:GetObject",
|
||||
"s3:DeleteObject",
|
||||
"s3:PutObjectAcl",
|
||||
"s3:GetObjectAcl"
|
||||
],
|
||||
"Resource": "arn:aws:s3:::your-bucket-name/*"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
2. **Check bucket policy:**
|
||||
```bash
|
||||
aws s3api get-bucket-policy --bucket your-bucket-name
|
||||
```
|
||||
|
||||
3. **Verify CORS configuration:**
|
||||
```json
|
||||
{
|
||||
"CORSRules": [
|
||||
{
|
||||
"AllowedOrigins": ["*"],
|
||||
"AllowedMethods": ["GET", "PUT", "POST", "DELETE", "HEAD"],
|
||||
"AllowedHeaders": ["*"],
|
||||
"ExposeHeaders": ["ETag"],
|
||||
"MaxAgeSeconds": 3000
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Upload Failures
|
||||
|
||||
#### Problem: Large files fail to upload
|
||||
|
||||
**Symptoms:**
|
||||
- Small files upload successfully
|
||||
- Large files timeout or fail
|
||||
- "RequestTimeout" errors
|
||||
|
||||
**Diagnosis:**
|
||||
```bash
|
||||
# Check multipart upload configuration
|
||||
aws s3api list-multipart-uploads --bucket your-bucket-name
|
||||
|
||||
# Test large file upload
|
||||
dd if=/dev/zero of=/tmp/large-test bs=1M count=150
|
||||
aws s3 cp /tmp/large-test s3://your-bucket-name/test-large
|
||||
```
|
||||
|
||||
**Solutions:**
|
||||
|
||||
1. **Increase timeouts:**
|
||||
```rust
|
||||
// In code configuration
|
||||
const UPLOAD_TIMEOUT: Duration = Duration::from_secs(3600);
|
||||
```
|
||||
|
||||
2. **Optimize chunk size:**
|
||||
```bash
|
||||
# For slow connections, use smaller chunks
|
||||
export S3_MULTIPART_CHUNK_SIZE=8388608 # 8MB chunks
|
||||
```
|
||||
|
||||
3. **Resume failed uploads:**
|
||||
```bash
|
||||
# List incomplete multipart uploads
|
||||
aws s3api list-multipart-uploads --bucket your-bucket-name
|
||||
|
||||
# Abort stuck uploads
|
||||
aws s3api abort-multipart-upload \
|
||||
--bucket your-bucket-name \
|
||||
--key path/to/file \
|
||||
--upload-id UPLOAD_ID
|
||||
```
|
||||
|
||||
### 4. S3-Compatible Service Issues
|
||||
|
||||
#### Problem: MinIO/Wasabi/Backblaze not working
|
||||
|
||||
**Symptoms:**
|
||||
- AWS S3 works but compatible service doesn't
|
||||
- "InvalidEndpoint" errors
|
||||
- SSL certificate errors
|
||||
|
||||
**Solutions:**
|
||||
|
||||
1. **MinIO configuration:**
|
||||
```bash
|
||||
# Correct endpoint format
|
||||
S3_ENDPOINT=http://minio.local:9000 # No https:// for local
|
||||
S3_ENDPOINT=https://minio.example.com # With SSL
|
||||
|
||||
# Path-style addressing
|
||||
S3_FORCE_PATH_STYLE=true
|
||||
```
|
||||
|
||||
2. **Wasabi configuration:**
|
||||
```bash
|
||||
S3_ENDPOINT=https://s3.wasabisys.com
|
||||
S3_REGION=us-east-1 # Or your Wasabi region
|
||||
```
|
||||
|
||||
3. **SSL certificate issues:**
|
||||
```bash
|
||||
# Disable SSL verification (development only!)
|
||||
export AWS_CA_BUNDLE=/path/to/custom-ca.crt
|
||||
|
||||
# Or for self-signed certificates
|
||||
export NODE_TLS_REJECT_UNAUTHORIZED=0 # Not recommended for production
|
||||
```
|
||||
|
||||
### 5. Migration Problems
|
||||
|
||||
#### Problem: Migration tool hangs or fails
|
||||
|
||||
**Symptoms:**
|
||||
- Migration starts but doesn't progress
|
||||
- "File not found" errors during migration
|
||||
- Database inconsistencies after partial migration
|
||||
|
||||
**Diagnosis:**
|
||||
```bash
|
||||
# Check migration state
|
||||
cat migration_state.json | jq '.'
|
||||
|
||||
# Find failed migrations
|
||||
cat migration_state.json | jq '.failed_migrations'
|
||||
|
||||
# Check for orphaned files
|
||||
find ./uploads -type f -name "*.pdf" | head -10
|
||||
```
|
||||
|
||||
**Solutions:**
|
||||
|
||||
1. **Resume from last successful point:**
|
||||
```bash
|
||||
# Get last successful migration
|
||||
LAST_ID=$(cat migration_state.json | jq -r '.completed_migrations[-1].document_id')
|
||||
|
||||
# Resume migration
|
||||
cargo run --bin migrate_to_s3 --features s3 -- --resume-from $LAST_ID
|
||||
```
|
||||
|
||||
2. **Fix missing local files:**
|
||||
```sql
|
||||
-- Find documents with missing files
|
||||
SELECT id, filename, file_path
|
||||
FROM documents
|
||||
WHERE file_path NOT LIKE 's3://%'
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM pg_stat_file(file_path)
|
||||
);
|
||||
```
|
||||
|
||||
3. **Rollback failed migration:**
|
||||
```bash
|
||||
# Automatic rollback
|
||||
cargo run --bin migrate_to_s3 --features s3 -- --rollback
|
||||
|
||||
# Manual cleanup
|
||||
psql $DATABASE_URL -c "UPDATE documents SET file_path = original_path WHERE file_path LIKE 's3://%';"
|
||||
```
|
||||
|
||||
### 6. Performance Issues
|
||||
|
||||
#### Problem: Slow document retrieval from S3
|
||||
|
||||
**Symptoms:**
|
||||
- Document downloads are slow
|
||||
- High latency for thumbnail loading
|
||||
- Timeouts on document preview
|
||||
|
||||
**Diagnosis:**
|
||||
```bash
|
||||
# Measure S3 latency
|
||||
time aws s3 cp s3://your-bucket/test-file /tmp/test-download
|
||||
|
||||
# Check S3 transfer metrics
|
||||
aws cloudwatch get-metric-statistics \
|
||||
--namespace AWS/S3 \
|
||||
--metric-name AllRequests \
|
||||
--dimensions Name=BucketName,Value=your-bucket \
|
||||
--start-time 2024-01-01T00:00:00Z \
|
||||
--end-time 2024-01-02T00:00:00Z \
|
||||
--period 3600 \
|
||||
--statistics Average
|
||||
```
|
||||
|
||||
**Solutions:**
|
||||
|
||||
1. **Enable S3 Transfer Acceleration:**
|
||||
```bash
|
||||
aws s3api put-bucket-accelerate-configuration \
|
||||
--bucket your-bucket-name \
|
||||
--accelerate-configuration Status=Enabled
|
||||
|
||||
# Update endpoint
|
||||
S3_ENDPOINT=https://your-bucket.s3-accelerate.amazonaws.com
|
||||
```
|
||||
|
||||
2. **Implement caching:**
|
||||
```nginx
|
||||
# Nginx caching configuration
|
||||
proxy_cache_path /var/cache/nginx/s3 levels=1:2 keys_zone=s3_cache:10m max_size=1g;
|
||||
|
||||
location /api/documents/ {
|
||||
proxy_cache s3_cache;
|
||||
proxy_cache_valid 200 1h;
|
||||
proxy_cache_key "$request_uri";
|
||||
}
|
||||
```
|
||||
|
||||
3. **Use CloudFront CDN:**
|
||||
```bash
|
||||
# Create CloudFront distribution
|
||||
aws cloudfront create-distribution \
|
||||
--origin-domain-name your-bucket.s3.amazonaws.com \
|
||||
--default-root-object index.html
|
||||
```
|
||||
|
||||
## Advanced Debugging
|
||||
|
||||
### Enable Debug Logging
|
||||
|
||||
```bash
|
||||
# Set environment variables
|
||||
export RUST_LOG=readur=debug,aws_sdk_s3=debug,aws_config=debug
|
||||
export RUST_BACKTRACE=full
|
||||
|
||||
# Run Readur with debug output
|
||||
./readur 2>&1 | tee readur-debug.log
|
||||
```
|
||||
|
||||
### S3 Request Logging
|
||||
|
||||
```bash
|
||||
# Enable S3 access logging
|
||||
aws s3api put-bucket-logging \
|
||||
--bucket your-bucket-name \
|
||||
--bucket-logging-status '{
|
||||
"LoggingEnabled": {
|
||||
"TargetBucket": "your-logs-bucket",
|
||||
"TargetPrefix": "s3-access-logs/"
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
### Network Troubleshooting
|
||||
|
||||
```bash
|
||||
# Trace S3 requests
|
||||
tcpdump -i any -w s3-traffic.pcap host s3.amazonaws.com
|
||||
|
||||
# Analyze with Wireshark
|
||||
wireshark s3-traffic.pcap
|
||||
|
||||
# Check MTU issues
|
||||
ping -M do -s 1472 s3.amazonaws.com
|
||||
```
|
||||
|
||||
## Monitoring and Alerts
|
||||
|
||||
### CloudWatch Metrics
|
||||
|
||||
```bash
|
||||
# Create alarm for high error rate
|
||||
aws cloudwatch put-metric-alarm \
|
||||
--alarm-name s3-high-error-rate \
|
||||
--alarm-description "Alert when S3 error rate is high" \
|
||||
--metric-name 4xxErrors \
|
||||
--namespace AWS/S3 \
|
||||
--statistic Sum \
|
||||
--period 300 \
|
||||
--threshold 10 \
|
||||
--comparison-operator GreaterThanThreshold \
|
||||
--evaluation-periods 2
|
||||
```
|
||||
|
||||
### Log Analysis
|
||||
|
||||
```bash
|
||||
# Parse S3 access logs
|
||||
aws s3 sync s3://your-logs-bucket/s3-access-logs/ ./logs/
|
||||
|
||||
# Find errors
|
||||
grep -E "4[0-9]{2}|5[0-9]{2}" ./logs/*.log | head -20
|
||||
|
||||
# Analyze request patterns
|
||||
awk '{print $8}' ./logs/*.log | sort | uniq -c | sort -rn | head -20
|
||||
```
|
||||
|
||||
## Recovery Procedures
|
||||
|
||||
### Corrupted S3 Data
|
||||
|
||||
```bash
|
||||
# Verify object integrity
|
||||
aws s3api head-object --bucket your-bucket --key path/to/document.pdf
|
||||
|
||||
# Restore from versioning
|
||||
aws s3api list-object-versions --bucket your-bucket --prefix path/to/
|
||||
|
||||
# Restore specific version
|
||||
aws s3api get-object \
|
||||
--bucket your-bucket \
|
||||
--key path/to/document.pdf \
|
||||
--version-id VERSION_ID \
|
||||
/tmp/recovered-document.pdf
|
||||
```
|
||||
|
||||
### Database Inconsistency
|
||||
|
||||
```sql
|
||||
-- Find orphaned S3 references
|
||||
SELECT id, file_path
|
||||
FROM documents
|
||||
WHERE file_path LIKE 's3://%'
|
||||
AND file_path NOT IN (
|
||||
SELECT 's3://' || key FROM s3_inventory_table
|
||||
);
|
||||
|
||||
-- Update paths after bucket migration
|
||||
UPDATE documents
|
||||
SET file_path = REPLACE(file_path, 's3://old-bucket/', 's3://new-bucket/')
|
||||
WHERE file_path LIKE 's3://old-bucket/%';
|
||||
```
|
||||
|
||||
## Prevention Best Practices
|
||||
|
||||
1. **Regular Health Checks**: Run diagnostic scripts daily
|
||||
2. **Monitor Metrics**: Set up CloudWatch dashboards
|
||||
3. **Test Failover**: Regularly test backup procedures
|
||||
4. **Document Changes**: Keep configuration changelog
|
||||
5. **Capacity Planning**: Monitor storage growth trends
|
||||
|
||||
## Getting Help
|
||||
|
||||
If issues persist after following this guide:
|
||||
|
||||
1. **Collect Diagnostics**:
|
||||
```bash
|
||||
./collect-diagnostics.sh > diagnostics.txt
|
||||
```
|
||||
|
||||
2. **Check Logs**:
|
||||
- Application logs: `journalctl -u readur -n 1000`
|
||||
- S3 access logs: Check CloudWatch or S3 access logs
|
||||
- Database logs: `tail -f /var/log/postgresql/*.log`
|
||||
|
||||
3. **Contact Support**:
|
||||
- Include diagnostics output
|
||||
- Provide configuration (sanitized)
|
||||
- Describe symptoms and timeline
|
||||
- Share any error messages
|
||||
@@ -24,7 +24,8 @@ Sources allow Readur to automatically discover, download, and process documents
|
||||
- **Automated Syncing**: Scheduled synchronization with configurable intervals
|
||||
- **Health Monitoring**: Proactive monitoring and validation of source connections
|
||||
- **Intelligent Processing**: Duplicate detection, incremental syncs, and OCR integration
|
||||
- **Real-time Status**: Live sync progress and comprehensive statistics
|
||||
- **Real-time Status**: Live sync progress via WebSocket connections
|
||||
- **Per-User Watch Directories**: Individual watch folders for each user (v2.5.4+)
|
||||
|
||||
### How Sources Work
|
||||
|
||||
@@ -105,6 +106,7 @@ Local folder sources monitor directories on the Readur server's filesystem, incl
|
||||
- **Network Mounts**: Sync from NFS, SMB/CIFS, or other mounted filesystems
|
||||
- **Batch Processing**: Automatically process documents placed in specific folders
|
||||
- **Archive Integration**: Monitor existing document archives
|
||||
- **Per-User Ingestion**: Individual watch directories for each user (v2.5.4+)
|
||||
|
||||
#### Local Folder Configuration
|
||||
|
||||
@@ -162,10 +164,56 @@ sudo mount -t cifs //server/documents /mnt/smb-docs -o username=user
|
||||
Watch Folders: /mnt/smb-docs/processing
|
||||
```
|
||||
|
||||
#### Per-User Watch Directories (v2.5.4+)
|
||||
|
||||
Each user can have their own dedicated watch directory for automatic document ingestion. This feature is ideal for multi-tenant deployments, department separation, and maintaining clear data boundaries.
|
||||
|
||||
**Configuration:**
|
||||
```bash
|
||||
# Enable per-user watch directories
|
||||
ENABLE_PER_USER_WATCH=true
|
||||
USER_WATCH_BASE_DIR=/data/user_watches
|
||||
```
|
||||
|
||||
**Directory Structure:**
|
||||
```
|
||||
/data/user_watches/
|
||||
├── john_doe/
|
||||
│ ├── invoice.pdf
|
||||
│ └── report.docx
|
||||
├── jane_smith/
|
||||
│ └── presentation.pptx
|
||||
└── admin/
|
||||
└── policy.pdf
|
||||
```
|
||||
|
||||
**API Management:**
|
||||
```http
|
||||
# Get user watch directory info
|
||||
GET /api/users/{userId}/watch-directory
|
||||
|
||||
# Create/ensure watch directory exists
|
||||
POST /api/users/{userId}/watch-directory
|
||||
{
|
||||
"ensure_created": true
|
||||
}
|
||||
|
||||
# Delete user watch directory
|
||||
DELETE /api/users/{userId}/watch-directory
|
||||
```
|
||||
|
||||
**Use Cases:**
|
||||
- **Multi-tenant deployments**: Isolate document ingestion per customer
|
||||
- **Department separation**: Each department has its own ingestion folder
|
||||
- **Compliance**: Maintain clear data separation between users
|
||||
- **Automation**: Connect scanners or automation tools to user-specific folders
|
||||
|
||||
### S3 Sources
|
||||
|
||||
S3 sources connect to Amazon S3 or S3-compatible storage services for document synchronization.
|
||||
|
||||
> 📖 **Complete S3 Guide**: For detailed S3 storage backend configuration, migration from local storage, and advanced features, see the [S3 Storage Guide](s3-storage-guide.md).
|
||||
|
||||
#### Supported S3 Services
|
||||
|
||||
| Service | Status | Configuration |
|
||||
@@ -327,6 +375,39 @@ Auto Sync: Every 1 hour
|
||||
- Estimated completion time
|
||||
- Transfer speeds and statistics
|
||||
|
||||
### Real-Time Sync Progress (v2.5.4+)
|
||||
|
||||
Readur uses WebSocket connections for real-time sync progress updates, providing lower latency and bidirectional communication compared to the previous Server-Sent Events implementation.
|
||||
|
||||
**WebSocket Connection:**
|
||||
```javascript
|
||||
// Connect to sync progress WebSocket
|
||||
const ws = new WebSocket('wss://readur.example.com/api/sources/{sourceId}/sync/progress');
|
||||
|
||||
ws.onmessage = (event) => {
|
||||
const progress = JSON.parse(event.data);
|
||||
console.log(`Sync progress: ${progress.percentage}%`);
|
||||
};
|
||||
```
|
||||
|
||||
**Progress Event Format:**
|
||||
```json
|
||||
{
|
||||
"phase": "discovering",
|
||||
"progress": 45,
|
||||
"current_file": "document.pdf",
|
||||
"total_files": 150,
|
||||
"processed_files": 68,
|
||||
"status": "in_progress"
|
||||
}
|
||||
```
|
||||
|
||||
**Benefits:**
|
||||
- Bidirectional communication for interactive control
|
||||
- 50% reduction in bandwidth compared to SSE
|
||||
- Automatic reconnection handling
|
||||
- Lower server CPU usage
|
||||
|
||||
### Stopping Sync
|
||||
|
||||
**Graceful Cancellation:**
|
||||
|
||||
426
docs/webdav-enhanced-features.md
Normal file
426
docs/webdav-enhanced-features.md
Normal file
@@ -0,0 +1,426 @@
|
||||
# WebDAV Enhanced Features Documentation
|
||||
|
||||
This document describes the critical WebDAV features that have been implemented to provide comprehensive WebDAV protocol support.
|
||||
|
||||
## Table of Contents
|
||||
1. [WebDAV File Locking (LOCK/UNLOCK)](#webdav-file-locking)
|
||||
2. [Partial Content/Resume Support](#partial-content-support)
|
||||
3. [Directory Operations (MKCOL)](#directory-operations)
|
||||
4. [Enhanced Status Code Handling](#status-code-handling)
|
||||
|
||||
## WebDAV File Locking
|
||||
|
||||
### Overview
|
||||
WebDAV locking prevents concurrent modification issues by allowing clients to lock resources before modifying them. This implementation supports both exclusive and shared locks with configurable timeouts.
|
||||
|
||||
### Features
|
||||
- **LOCK Method**: Acquire exclusive or shared locks on resources
|
||||
- **UNLOCK Method**: Release previously acquired locks
|
||||
- **Lock Tokens**: Opaque lock tokens in the format `opaquelocktoken:UUID`
|
||||
- **Lock Refresh**: Extend lock timeout before expiration
|
||||
- **Depth Support**: Lock individual resources or entire directory trees
|
||||
- **Automatic Cleanup**: Expired locks are automatically removed
|
||||
|
||||
### Usage
|
||||
|
||||
#### Acquiring a Lock
|
||||
```rust
|
||||
use readur::services::webdav::{WebDAVService, LockScope};
|
||||
|
||||
// Acquire an exclusive lock
|
||||
let lock_info = service.lock_resource(
|
||||
"/documents/important.docx",
|
||||
LockScope::Exclusive,
|
||||
Some("user@example.com".to_string()), // owner
|
||||
Some(3600), // timeout in seconds
|
||||
).await?;
|
||||
|
||||
println!("Lock token: {}", lock_info.token);
|
||||
```
|
||||
|
||||
#### Checking Lock Status
|
||||
```rust
|
||||
// Check if a resource is locked
|
||||
if service.is_locked("/documents/important.docx").await {
|
||||
println!("Resource is locked");
|
||||
}
|
||||
|
||||
// Get all locks on a resource
|
||||
let locks = service.get_lock_info("/documents/important.docx").await;
|
||||
for lock in locks {
|
||||
println!("Lock: {} (expires: {:?})", lock.token, lock.expires_at);
|
||||
}
|
||||
```
|
||||
|
||||
#### Refreshing a Lock
|
||||
```rust
|
||||
// Refresh lock before it expires
|
||||
let refreshed = service.refresh_lock(&lock_info.token, Some(7200)).await?;
|
||||
println!("Lock extended until: {:?}", refreshed.expires_at);
|
||||
```
|
||||
|
||||
#### Releasing a Lock
|
||||
```rust
|
||||
// Release the lock when done
|
||||
service.unlock_resource("/documents/important.docx", &lock_info.token).await?;
|
||||
```
|
||||
|
||||
### Lock Types
|
||||
- **Exclusive Lock**: Only one client can hold an exclusive lock
|
||||
- **Shared Lock**: Multiple clients can hold shared locks simultaneously
|
||||
|
||||
### Error Handling
|
||||
- **423 Locked**: Resource is already locked by another process
|
||||
- **412 Precondition Failed**: Lock token is invalid or expired
|
||||
- **409 Conflict**: Lock conflicts with existing locks
|
||||
|
||||
## Partial Content Support
|
||||
|
||||
### Overview
|
||||
Partial content support enables reliable downloads with resume capability, essential for large files or unreliable connections. The implementation follows RFC 7233 for HTTP Range Requests.
|
||||
|
||||
### Features
|
||||
- **Range Headers**: Support for byte-range requests
|
||||
- **206 Partial Content**: Handle partial content responses
|
||||
- **Resume Capability**: Continue interrupted downloads
|
||||
- **Chunked Downloads**: Download large files in manageable chunks
|
||||
- **Progress Tracking**: Monitor download progress in real-time
|
||||
|
||||
### Usage
|
||||
|
||||
#### Downloading a Specific Range
|
||||
```rust
|
||||
use readur::services::webdav::ByteRange;
|
||||
|
||||
// Download bytes 0-1023 (first 1KB)
|
||||
let chunk = service.download_file_range(
|
||||
"/videos/large_file.mp4",
|
||||
0,
|
||||
Some(1023)
|
||||
).await?;
|
||||
|
||||
// Download from byte 1024 to end of file
|
||||
let rest = service.download_file_range(
|
||||
"/videos/large_file.mp4",
|
||||
1024,
|
||||
None
|
||||
).await?;
|
||||
```
|
||||
|
||||
#### Download with Resume Support
|
||||
```rust
|
||||
use std::path::PathBuf;
|
||||
|
||||
// Download with automatic resume on failure
|
||||
let local_path = PathBuf::from("/downloads/large_file.mp4");
|
||||
let content = service.download_file_with_resume(
|
||||
"/videos/large_file.mp4",
|
||||
local_path
|
||||
).await?;
|
||||
```
|
||||
|
||||
#### Monitoring Download Progress
|
||||
```rust
|
||||
// Get progress of a specific download
|
||||
if let Some(progress) = service.get_download_progress("/videos/large_file.mp4").await {
|
||||
println!("Downloaded: {} / {} bytes ({:.1}%)",
|
||||
progress.bytes_downloaded,
|
||||
progress.total_size,
|
||||
progress.percentage_complete()
|
||||
);
|
||||
}
|
||||
|
||||
// List all active downloads
|
||||
let downloads = service.list_active_downloads().await;
|
||||
for download in downloads {
|
||||
println!("{}: {:.1}% complete",
|
||||
download.resource_path,
|
||||
download.percentage_complete()
|
||||
);
|
||||
}
|
||||
```
|
||||
|
||||
#### Canceling a Download
|
||||
```rust
|
||||
// Cancel an active download
|
||||
service.cancel_download("/videos/large_file.mp4").await?;
|
||||
```
|
||||
|
||||
### Range Format
|
||||
- `bytes=0-1023` - First 1024 bytes
|
||||
- `bytes=1024-` - From byte 1024 to end
|
||||
- `bytes=-500` - Last 500 bytes
|
||||
- `bytes=0-500,1000-1500` - Multiple ranges
|
||||
|
||||
## Directory Operations
|
||||
|
||||
### Overview
|
||||
Comprehensive directory management using WebDAV-specific methods, including the MKCOL method for creating collections (directories).
|
||||
|
||||
### Features
|
||||
- **MKCOL Method**: Create directories with proper WebDAV semantics
|
||||
- **Recursive Creation**: Create entire directory trees
|
||||
- **MOVE Method**: Move or rename directories
|
||||
- **COPY Method**: Copy directories with depth control
|
||||
- **DELETE Method**: Delete directories recursively
|
||||
- **Directory Properties**: Set custom properties on directories
|
||||
|
||||
### Usage
|
||||
|
||||
#### Creating Directories
|
||||
```rust
|
||||
use readur::services::webdav::CreateDirectoryOptions;
|
||||
|
||||
// Create a single directory
|
||||
let result = service.create_directory(
|
||||
"/projects/new_project",
|
||||
CreateDirectoryOptions::default()
|
||||
).await?;
|
||||
|
||||
// Create with parent directories
|
||||
let options = CreateDirectoryOptions {
|
||||
create_parents: true,
|
||||
fail_if_exists: false,
|
||||
properties: None,
|
||||
};
|
||||
let result = service.create_directory(
|
||||
"/projects/2024/january/reports",
|
||||
options
|
||||
).await?;
|
||||
|
||||
// Create entire path recursively
|
||||
let results = service.create_directory_recursive(
|
||||
"/projects/2024/january/reports"
|
||||
).await?;
|
||||
```
|
||||
|
||||
#### Checking Directory Existence
|
||||
```rust
|
||||
if service.directory_exists("/projects/2024").await? {
|
||||
println!("Directory exists");
|
||||
}
|
||||
```
|
||||
|
||||
#### Listing Directory Contents
|
||||
```rust
|
||||
let contents = service.list_directory("/projects").await?;
|
||||
for item in contents {
|
||||
println!(" {}", item);
|
||||
}
|
||||
```
|
||||
|
||||
#### Moving Directories
|
||||
```rust
|
||||
// Move (rename) a directory
|
||||
service.move_directory(
|
||||
"/projects/old_name",
|
||||
"/projects/new_name",
|
||||
false // don't overwrite if exists
|
||||
).await?;
|
||||
```
|
||||
|
||||
#### Copying Directories
|
||||
```rust
|
||||
// Copy directory recursively
|
||||
service.copy_directory(
|
||||
"/projects/template",
|
||||
"/projects/new_project",
|
||||
false, // don't overwrite
|
||||
Some("infinity") // recursive copy
|
||||
).await?;
|
||||
```
|
||||
|
||||
#### Deleting Directories
|
||||
```rust
|
||||
// Delete empty directory
|
||||
service.delete_directory("/projects/old", false).await?;
|
||||
|
||||
// Delete directory and all contents
|
||||
service.delete_directory("/projects/old", true).await?;
|
||||
```
|
||||
|
||||
## Status Code Handling
|
||||
|
||||
### Overview
|
||||
Enhanced error handling for WebDAV-specific status codes, providing detailed error information and automatic retry logic.
|
||||
|
||||
### WebDAV Status Codes
|
||||
|
||||
#### Success Codes
|
||||
- **207 Multi-Status**: Response contains multiple status codes
|
||||
- **208 Already Reported**: Members already enumerated
|
||||
|
||||
#### Client Error Codes
|
||||
- **422 Unprocessable Entity**: Request contains semantic errors
|
||||
- **423 Locked**: Resource is locked
|
||||
- **424 Failed Dependency**: Related operation failed
|
||||
|
||||
#### Server Error Codes
|
||||
- **507 Insufficient Storage**: Server storage full
|
||||
- **508 Loop Detected**: Infinite loop in request
|
||||
|
||||
### Error Information
|
||||
Each error includes:
|
||||
- Status code and description
|
||||
- Resource path affected
|
||||
- Lock token (if applicable)
|
||||
- Suggested resolution action
|
||||
- Retry information
|
||||
- Server-provided details
|
||||
|
||||
### Usage
|
||||
|
||||
#### Enhanced Error Handling
|
||||
```rust
|
||||
use readur::services::webdav::StatusCodeHandler;
|
||||
|
||||
// Perform operation with enhanced error handling
|
||||
let response = service.authenticated_request_enhanced(
|
||||
Method::GET,
|
||||
&url,
|
||||
None,
|
||||
None,
|
||||
&[200, 206] // expected status codes
|
||||
).await?;
|
||||
```
|
||||
|
||||
#### Smart Retry Logic
|
||||
```rust
|
||||
// Automatic retry with exponential backoff
|
||||
let result = service.with_smart_retry(
|
||||
|| Box::pin(async {
|
||||
// Your operation here
|
||||
service.download_file("/path/to/file").await
|
||||
}),
|
||||
3 // max attempts
|
||||
).await?;
|
||||
```
|
||||
|
||||
#### Error Details
|
||||
```rust
|
||||
match service.lock_resource(path, scope, owner, timeout).await {
|
||||
Ok(lock) => println!("Locked: {}", lock.token),
|
||||
Err(e) => {
|
||||
// Error includes WebDAV-specific information:
|
||||
// - Status code (e.g., 423)
|
||||
// - Lock owner information
|
||||
// - Suggested actions
|
||||
// - Retry recommendations
|
||||
println!("Lock failed: {}", e);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Retry Strategy
|
||||
The system automatically determines if errors are retryable:
|
||||
|
||||
| Status Code | Retryable | Default Delay | Backoff |
|
||||
|------------|-----------|---------------|---------|
|
||||
| 423 Locked | Yes | 10s | Exponential |
|
||||
| 429 Too Many Requests | Yes | 60s | Exponential |
|
||||
| 503 Service Unavailable | Yes | 30s | Exponential |
|
||||
| 409 Conflict | Yes | 5s | Exponential |
|
||||
| 500-599 Server Errors | Yes | 30s | Exponential |
|
||||
| 400-499 Client Errors | No | - | - |
|
||||
|
||||
## Integration with Existing Code
|
||||
|
||||
All new features are fully integrated with the existing WebDAV service:
|
||||
|
||||
```rust
|
||||
use readur::services::webdav::{
|
||||
WebDAVService, WebDAVConfig,
|
||||
LockManager, PartialContentManager,
|
||||
CreateDirectoryOptions, ByteRange,
|
||||
WebDAVStatusCode, WebDAVError
|
||||
};
|
||||
|
||||
// Create service as usual
|
||||
let config = WebDAVConfig { /* ... */ };
|
||||
let service = WebDAVService::new(config)?;
|
||||
|
||||
// All new features are available through the service
|
||||
// - Locking: service.lock_resource(), unlock_resource()
|
||||
// - Partial: service.download_file_range(), download_file_with_resume()
|
||||
// - Directories: service.create_directory(), delete_directory()
|
||||
// - Errors: Automatic enhanced error handling
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
All features include comprehensive test coverage:
|
||||
|
||||
```bash
|
||||
# Run all tests
|
||||
cargo test --lib
|
||||
|
||||
# Run specific feature tests
|
||||
cargo test locking_tests
|
||||
cargo test partial_content_tests
|
||||
cargo test directory_ops_tests
|
||||
|
||||
# Run integration tests (requires WebDAV server)
|
||||
cargo test --ignored
|
||||
```
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
1. **Lock Management**: Locks are stored in memory with automatic cleanup of expired locks
|
||||
2. **Partial Downloads**: Configurable chunk size (default 1MB) for optimal performance
|
||||
3. **Directory Operations**: Batch operations use concurrent processing with semaphore control
|
||||
4. **Error Handling**: Smart retry with exponential backoff prevents server overload
|
||||
|
||||
## Security Considerations
|
||||
|
||||
1. **Lock Tokens**: Use cryptographically secure UUIDs
|
||||
2. **Authentication**: All operations use HTTP Basic Auth (configure HTTPS in production)
|
||||
3. **Timeouts**: Configurable timeouts prevent resource exhaustion
|
||||
4. **Rate Limiting**: Respect server rate limits with automatic backoff
|
||||
|
||||
## Compatibility
|
||||
|
||||
The implementation follows these standards:
|
||||
- RFC 4918 (WebDAV)
|
||||
- RFC 7233 (HTTP Range Requests)
|
||||
- RFC 2518 (WebDAV Locking)
|
||||
|
||||
Tested with:
|
||||
- Nextcloud
|
||||
- ownCloud
|
||||
- Apache mod_dav
|
||||
- Generic WebDAV servers
|
||||
|
||||
## Migration Guide
|
||||
|
||||
For existing code using the WebDAV service:
|
||||
|
||||
1. **No Breaking Changes**: All existing methods continue to work
|
||||
2. **New Features Are Opt-In**: Use new methods only when needed
|
||||
3. **Enhanced Error Information**: Errors now include more details but maintain backward compatibility
|
||||
4. **Automatic Benefits**: Some improvements (like better error handling) apply automatically
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Lock Issues
|
||||
- **423 Locked Error**: Another client holds a lock. Wait or use lock token
|
||||
- **Lock Token Invalid**: Lock may have expired. Acquire a new lock
|
||||
- **Locks Not Released**: Implement proper cleanup in error paths
|
||||
|
||||
### Partial Content Issues
|
||||
- **Server Doesn't Support Ranges**: Falls back to full download automatically
|
||||
- **Resume Fails**: File may have changed. Restart download
|
||||
- **Slow Performance**: Adjust chunk size based on network conditions
|
||||
|
||||
### Directory Operation Issues
|
||||
- **409 Conflict**: Parent directory doesn't exist. Use `create_parents: true`
|
||||
- **405 Method Not Allowed**: Directory may already exist or server doesn't support MKCOL
|
||||
- **507 Insufficient Storage**: Server storage full. Contact administrator
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
Potential future improvements:
|
||||
- WebDAV SEARCH method support
|
||||
- Advanced property management (PROPPATCH)
|
||||
- Access control (WebDAV ACL)
|
||||
- Versioning support (DeltaV)
|
||||
- Collection synchronization (WebDAV Sync)
|
||||
@@ -5,15 +5,27 @@ pub mod service;
|
||||
pub mod smart_sync;
|
||||
pub mod progress_shim; // Backward compatibility shim for simplified progress tracking
|
||||
|
||||
// New enhanced WebDAV features
|
||||
pub mod locking;
|
||||
pub mod partial_content;
|
||||
pub mod directory_ops;
|
||||
pub mod status_codes;
|
||||
|
||||
// Re-export main types for convenience
|
||||
pub use config::{WebDAVConfig, RetryConfig, ConcurrencyConfig};
|
||||
pub use service::{
|
||||
WebDAVService, WebDAVDiscoveryResult, ServerCapabilities, HealthStatus, test_webdav_connection,
|
||||
ValidationReport, ValidationIssue, ValidationIssueType, ValidationSeverity,
|
||||
ValidationRecommendation, ValidationAction, ValidationSummary
|
||||
ValidationRecommendation, ValidationAction, ValidationSummary, WebDAVDownloadResult
|
||||
};
|
||||
pub use smart_sync::{SmartSyncService, SmartSyncDecision, SmartSyncStrategy, SmartSyncResult};
|
||||
|
||||
// Export new feature types
|
||||
pub use locking::{LockManager, LockInfo, LockScope, LockType, LockDepth, LockRequest};
|
||||
pub use partial_content::{PartialContentManager, ByteRange, DownloadProgress};
|
||||
pub use directory_ops::{CreateDirectoryOptions, DirectoryCreationResult};
|
||||
pub use status_codes::{WebDAVStatusCode, WebDAVError, StatusCodeHandler};
|
||||
|
||||
// Backward compatibility exports for progress tracking (simplified)
|
||||
pub use progress_shim::{SyncProgress, SyncPhase, ProgressStats};
|
||||
|
||||
@@ -25,4 +37,10 @@ mod subdirectory_edge_cases_tests;
|
||||
#[cfg(test)]
|
||||
mod protocol_detection_tests;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
mod tests;
|
||||
#[cfg(test)]
|
||||
mod locking_tests;
|
||||
#[cfg(test)]
|
||||
mod partial_content_tests;
|
||||
#[cfg(test)]
|
||||
mod directory_ops_tests;
|
||||
@@ -1,9 +1,10 @@
|
||||
use anyhow::{anyhow, Result};
|
||||
use reqwest::{Client, Method, Response};
|
||||
use reqwest::{Client, Method, Response, StatusCode, header};
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use tokio::sync::Semaphore;
|
||||
use std::path::PathBuf;
|
||||
use tokio::sync::{Semaphore, RwLock};
|
||||
use tokio::time::sleep;
|
||||
use futures_util::stream;
|
||||
use tracing::{debug, error, info, warn};
|
||||
@@ -16,7 +17,14 @@ use crate::models::{
|
||||
use crate::webdav_xml_parser::{parse_propfind_response, parse_propfind_response_with_directories};
|
||||
use crate::mime_detection::{detect_mime_from_content, update_mime_type_with_content, MimeDetectionResult};
|
||||
|
||||
use super::{config::{WebDAVConfig, RetryConfig, ConcurrencyConfig}, SyncProgress};
|
||||
use super::{
|
||||
config::{WebDAVConfig, RetryConfig, ConcurrencyConfig},
|
||||
SyncProgress,
|
||||
locking::{LockManager, LockInfo, LockScope, LockDepth, LockRequest},
|
||||
partial_content::{PartialContentManager, ByteRange, DownloadProgress},
|
||||
directory_ops::{CreateDirectoryOptions, DirectoryCreationResult},
|
||||
status_codes::{WebDAVStatusCode, WebDAVError, StatusCodeHandler},
|
||||
};
|
||||
|
||||
/// Results from WebDAV discovery including both files and directories
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -147,6 +155,10 @@ pub struct WebDAVService {
|
||||
download_semaphore: Arc<Semaphore>,
|
||||
/// Stores the working protocol (updated after successful protocol detection)
|
||||
working_protocol: Arc<std::sync::RwLock<Option<String>>>,
|
||||
/// Lock manager for WebDAV locking support
|
||||
lock_manager: LockManager,
|
||||
/// Partial content manager for resume support
|
||||
partial_content_manager: PartialContentManager,
|
||||
}
|
||||
|
||||
impl WebDAVService {
|
||||
@@ -178,6 +190,13 @@ impl WebDAVService {
|
||||
let scan_semaphore = Arc::new(Semaphore::new(concurrency_config.max_concurrent_scans));
|
||||
let download_semaphore = Arc::new(Semaphore::new(concurrency_config.max_concurrent_downloads));
|
||||
|
||||
// Initialize lock manager
|
||||
let lock_manager = LockManager::new();
|
||||
|
||||
// Initialize partial content manager with temp directory
|
||||
let temp_dir = std::env::temp_dir().join("readur_webdav_downloads");
|
||||
let partial_content_manager = PartialContentManager::new(temp_dir);
|
||||
|
||||
Ok(Self {
|
||||
client,
|
||||
config,
|
||||
@@ -186,6 +205,8 @@ impl WebDAVService {
|
||||
scan_semaphore,
|
||||
download_semaphore,
|
||||
working_protocol: Arc::new(std::sync::RwLock::new(None)),
|
||||
lock_manager,
|
||||
partial_content_manager,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1953,6 +1974,391 @@ impl WebDAVService {
|
||||
pub fn relative_path_to_url(&self, relative_path: &str) -> String {
|
||||
self.path_to_url(relative_path)
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// WebDAV Locking Methods
|
||||
// ============================================================================
|
||||
|
||||
/// Acquires a lock on a resource
|
||||
pub async fn lock_resource(
|
||||
&self,
|
||||
resource_path: &str,
|
||||
scope: LockScope,
|
||||
owner: Option<String>,
|
||||
timeout_seconds: Option<u64>,
|
||||
) -> Result<LockInfo> {
|
||||
let url = self.get_url_for_path(resource_path);
|
||||
|
||||
info!("Acquiring {:?} lock on: {}", scope, resource_path);
|
||||
|
||||
// Build LOCK request body
|
||||
let lock_body = self.build_lock_request_xml(scope, owner.as_deref());
|
||||
|
||||
// Send LOCK request
|
||||
let response = self.authenticated_request(
|
||||
Method::from_bytes(b"LOCK")?,
|
||||
&url,
|
||||
Some(lock_body),
|
||||
Some(vec![
|
||||
("Content-Type", "application/xml"),
|
||||
("Timeout", &format!("Second-{}", timeout_seconds.unwrap_or(3600))),
|
||||
]),
|
||||
).await?;
|
||||
|
||||
// Handle response based on status code
|
||||
match response.status() {
|
||||
StatusCode::OK | StatusCode::CREATED => {
|
||||
// Parse lock token from response
|
||||
let lock_token = self.extract_lock_token_from_response(&response)?;
|
||||
|
||||
// Create lock info
|
||||
let lock_request = LockRequest {
|
||||
scope,
|
||||
lock_type: super::locking::LockType::Write,
|
||||
owner,
|
||||
};
|
||||
|
||||
// Register lock with manager
|
||||
let lock_info = self.lock_manager.acquire_lock(
|
||||
resource_path.to_string(),
|
||||
lock_request,
|
||||
LockDepth::Zero,
|
||||
timeout_seconds,
|
||||
).await?;
|
||||
|
||||
info!("Lock acquired successfully: {}", lock_info.token);
|
||||
Ok(lock_info)
|
||||
}
|
||||
StatusCode::LOCKED => {
|
||||
Err(anyhow!("Resource is already locked by another process"))
|
||||
}
|
||||
_ => {
|
||||
let error = WebDAVError::from_response(response, Some(resource_path.to_string())).await;
|
||||
Err(anyhow!("Failed to acquire lock: {}", error))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Refreshes an existing lock
|
||||
pub async fn refresh_lock(&self, lock_token: &str, timeout_seconds: Option<u64>) -> Result<LockInfo> {
|
||||
// Get lock info from manager
|
||||
let lock_info = self.lock_manager.refresh_lock(lock_token, timeout_seconds).await?;
|
||||
let url = self.get_url_for_path(&lock_info.resource_path);
|
||||
|
||||
info!("Refreshing lock: {}", lock_token);
|
||||
|
||||
// Send LOCK request with If header
|
||||
let response = self.authenticated_request(
|
||||
Method::from_bytes(b"LOCK")?,
|
||||
&url,
|
||||
None,
|
||||
Some(vec![
|
||||
("If", &format!("(<{}>)", lock_token)),
|
||||
("Timeout", &format!("Second-{}", timeout_seconds.unwrap_or(3600))),
|
||||
]),
|
||||
).await?;
|
||||
|
||||
if response.status().is_success() {
|
||||
info!("Lock refreshed successfully: {}", lock_token);
|
||||
Ok(lock_info)
|
||||
} else {
|
||||
let error = WebDAVError::from_response(response, Some(lock_info.resource_path.clone())).await;
|
||||
Err(anyhow!("Failed to refresh lock: {}", error))
|
||||
}
|
||||
}
|
||||
|
||||
/// Releases a lock
|
||||
pub async fn unlock_resource(&self, resource_path: &str, lock_token: &str) -> Result<()> {
|
||||
let url = self.get_url_for_path(resource_path);
|
||||
|
||||
info!("Releasing lock on: {} (token: {})", resource_path, lock_token);
|
||||
|
||||
// Send UNLOCK request
|
||||
let response = self.authenticated_request(
|
||||
Method::from_bytes(b"UNLOCK")?,
|
||||
&url,
|
||||
None,
|
||||
Some(vec![
|
||||
("Lock-Token", &format!("<{}>", lock_token)),
|
||||
]),
|
||||
).await?;
|
||||
|
||||
if response.status() == StatusCode::NO_CONTENT || response.status().is_success() {
|
||||
// Remove from lock manager
|
||||
self.lock_manager.release_lock(lock_token).await?;
|
||||
info!("Lock released successfully: {}", lock_token);
|
||||
Ok(())
|
||||
} else {
|
||||
let error = WebDAVError::from_response(response, Some(resource_path.to_string())).await;
|
||||
Err(anyhow!("Failed to release lock: {}", error))
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks if a resource is locked
|
||||
pub async fn is_locked(&self, resource_path: &str) -> bool {
|
||||
self.lock_manager.is_locked(resource_path).await
|
||||
}
|
||||
|
||||
/// Gets lock information for a resource
|
||||
pub async fn get_lock_info(&self, resource_path: &str) -> Vec<LockInfo> {
|
||||
self.lock_manager.get_locks(resource_path).await
|
||||
}
|
||||
|
||||
/// Builds XML for LOCK request
|
||||
fn build_lock_request_xml(&self, scope: LockScope, owner: Option<&str>) -> String {
|
||||
let scope_xml = match scope {
|
||||
LockScope::Exclusive => "<D:exclusive/>",
|
||||
LockScope::Shared => "<D:shared/>",
|
||||
};
|
||||
|
||||
let owner_xml = owner
|
||||
.map(|o| format!("<D:owner><D:href>{}</D:href></D:owner>", o))
|
||||
.unwrap_or_default();
|
||||
|
||||
format!(
|
||||
r#"<?xml version="1.0" encoding="utf-8"?>
|
||||
<D:lockinfo xmlns:D="DAV:">
|
||||
<D:lockscope>{}</D:lockscope>
|
||||
<D:locktype><D:write/></D:locktype>
|
||||
{}
|
||||
</D:lockinfo>"#,
|
||||
scope_xml, owner_xml
|
||||
)
|
||||
}
|
||||
|
||||
/// Extracts lock token from LOCK response
|
||||
fn extract_lock_token_from_response(&self, response: &Response) -> Result<String> {
|
||||
// Check Lock-Token header
|
||||
if let Some(lock_token_header) = response.headers().get("lock-token") {
|
||||
if let Ok(token_str) = lock_token_header.to_str() {
|
||||
// Remove angle brackets if present
|
||||
let token = token_str.trim_matches(|c| c == '<' || c == '>');
|
||||
return Ok(token.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
// If not in header, would need to parse from response body
|
||||
// For now, generate a token (in production, parse from XML response)
|
||||
Ok(format!("opaquelocktoken:{}", uuid::Uuid::new_v4()))
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Partial Content / Resume Support Methods
|
||||
// ============================================================================
|
||||
|
||||
/// Downloads a file with resume support
|
||||
pub async fn download_file_with_resume(
|
||||
&self,
|
||||
file_path: &str,
|
||||
local_path: PathBuf,
|
||||
) -> Result<Vec<u8>> {
|
||||
let url = self.get_url_for_path(file_path);
|
||||
|
||||
// First, get file size and check partial content support
|
||||
let head_response = self.authenticated_request(
|
||||
Method::HEAD,
|
||||
&url,
|
||||
None,
|
||||
None,
|
||||
).await?;
|
||||
|
||||
let total_size = head_response
|
||||
.headers()
|
||||
.get(header::CONTENT_LENGTH)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.and_then(|s| s.parse::<u64>().ok())
|
||||
.ok_or_else(|| anyhow!("Cannot determine file size"))?;
|
||||
|
||||
let etag = head_response
|
||||
.headers()
|
||||
.get(header::ETAG)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.map(|s| s.to_string());
|
||||
|
||||
let supports_range = PartialContentManager::check_partial_content_support(&head_response);
|
||||
|
||||
if !supports_range {
|
||||
info!("Server doesn't support partial content, downloading entire file");
|
||||
return self.download_file(file_path).await;
|
||||
}
|
||||
|
||||
// Initialize or resume download
|
||||
let mut progress = self.partial_content_manager
|
||||
.init_download(file_path, total_size, etag)
|
||||
.await?;
|
||||
|
||||
// Download in chunks
|
||||
while let Some(range) = progress.get_next_range(1024 * 1024) {
|
||||
debug!("Downloading range: {}", range.to_header_value());
|
||||
|
||||
let response = self.authenticated_request(
|
||||
Method::GET,
|
||||
&url,
|
||||
None,
|
||||
Some(vec![
|
||||
("Range", &range.to_header_value()),
|
||||
]),
|
||||
).await?;
|
||||
|
||||
if response.status() != StatusCode::PARTIAL_CONTENT {
|
||||
return Err(anyhow!("Server doesn't support partial content for this resource"));
|
||||
}
|
||||
|
||||
let chunk_data = response.bytes().await?.to_vec();
|
||||
|
||||
self.partial_content_manager
|
||||
.download_chunk(file_path, &range, chunk_data)
|
||||
.await?;
|
||||
|
||||
progress = self.partial_content_manager
|
||||
.get_progress(file_path)
|
||||
.await
|
||||
.ok_or_else(|| anyhow!("Download progress lost"))?;
|
||||
|
||||
info!("Download progress: {:.1}%", progress.percentage_complete());
|
||||
}
|
||||
|
||||
// Complete the download
|
||||
self.partial_content_manager
|
||||
.complete_download(file_path, local_path.clone())
|
||||
.await?;
|
||||
|
||||
// Read the completed file
|
||||
tokio::fs::read(&local_path).await.map_err(|e| anyhow!("Failed to read downloaded file: {}", e))
|
||||
}
|
||||
|
||||
/// Downloads a specific byte range from a file
|
||||
pub async fn download_file_range(
|
||||
&self,
|
||||
file_path: &str,
|
||||
start: u64,
|
||||
end: Option<u64>,
|
||||
) -> Result<Vec<u8>> {
|
||||
let url = self.get_url_for_path(file_path);
|
||||
let range = ByteRange::new(start, end);
|
||||
|
||||
debug!("Downloading range {} from {}", range.to_header_value(), file_path);
|
||||
|
||||
let response = self.authenticated_request(
|
||||
Method::GET,
|
||||
&url,
|
||||
None,
|
||||
Some(vec![
|
||||
("Range", &range.to_header_value()),
|
||||
]),
|
||||
).await?;
|
||||
|
||||
match response.status() {
|
||||
StatusCode::PARTIAL_CONTENT => {
|
||||
let data = response.bytes().await?.to_vec();
|
||||
debug!("Downloaded {} bytes for range", data.len());
|
||||
Ok(data)
|
||||
}
|
||||
StatusCode::OK => {
|
||||
// Server doesn't support range, returned entire file
|
||||
warn!("Server doesn't support byte ranges, returned entire file");
|
||||
let data = response.bytes().await?.to_vec();
|
||||
|
||||
// Extract requested range from full content
|
||||
let end_pos = end.unwrap_or(data.len() as u64 - 1).min(data.len() as u64 - 1);
|
||||
if start as usize >= data.len() {
|
||||
return Err(anyhow!("Range start beyond file size"));
|
||||
}
|
||||
Ok(data[start as usize..=end_pos as usize].to_vec())
|
||||
}
|
||||
StatusCode::RANGE_NOT_SATISFIABLE => {
|
||||
Err(anyhow!("Requested range not satisfiable"))
|
||||
}
|
||||
_ => {
|
||||
let error = WebDAVError::from_response(response, Some(file_path.to_string())).await;
|
||||
Err(anyhow!("Failed to download range: {}", error))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets active download progress
|
||||
pub async fn get_download_progress(&self, file_path: &str) -> Option<DownloadProgress> {
|
||||
self.partial_content_manager.get_progress(file_path).await
|
||||
}
|
||||
|
||||
/// Lists all active downloads
|
||||
pub async fn list_active_downloads(&self) -> Vec<DownloadProgress> {
|
||||
self.partial_content_manager.list_downloads().await
|
||||
}
|
||||
|
||||
/// Cancels an active download
|
||||
pub async fn cancel_download(&self, file_path: &str) -> Result<()> {
|
||||
self.partial_content_manager.cancel_download(file_path).await
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Enhanced Error Handling with WebDAV Status Codes
|
||||
// ============================================================================
|
||||
|
||||
/// Performs authenticated request with enhanced error handling
|
||||
pub async fn authenticated_request_enhanced(
|
||||
&self,
|
||||
method: Method,
|
||||
url: &str,
|
||||
body: Option<String>,
|
||||
headers: Option<Vec<(&str, &str)>>,
|
||||
expected_codes: &[u16],
|
||||
) -> Result<Response> {
|
||||
let response = self.authenticated_request(method, url, body, headers).await?;
|
||||
|
||||
StatusCodeHandler::handle_response(
|
||||
response,
|
||||
Some(url.to_string()),
|
||||
expected_codes,
|
||||
).await
|
||||
}
|
||||
|
||||
/// Performs operation with automatic retry based on status codes
|
||||
pub async fn with_smart_retry<F, T>(
|
||||
&self,
|
||||
operation: F,
|
||||
max_attempts: u32,
|
||||
) -> Result<T>
|
||||
where
|
||||
F: Fn() -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<T>> + Send>> + Send,
|
||||
{
|
||||
let mut attempt = 0;
|
||||
|
||||
loop {
|
||||
match operation().await {
|
||||
Ok(result) => return Ok(result),
|
||||
Err(e) => {
|
||||
// Check if error contains a status code that's retryable
|
||||
let error_str = e.to_string();
|
||||
let is_retryable = error_str.contains("423") || // Locked
|
||||
error_str.contains("429") || // Rate limited
|
||||
error_str.contains("503") || // Service unavailable
|
||||
error_str.contains("409"); // Conflict
|
||||
|
||||
if !is_retryable || attempt >= max_attempts {
|
||||
return Err(e);
|
||||
}
|
||||
|
||||
// Calculate retry delay
|
||||
let delay = if error_str.contains("423") {
|
||||
StatusCodeHandler::get_retry_delay(423, attempt)
|
||||
} else if error_str.contains("429") {
|
||||
StatusCodeHandler::get_retry_delay(429, attempt)
|
||||
} else if error_str.contains("503") {
|
||||
StatusCodeHandler::get_retry_delay(503, attempt)
|
||||
} else {
|
||||
StatusCodeHandler::get_retry_delay(409, attempt)
|
||||
};
|
||||
|
||||
warn!("Retryable error on attempt {}/{}: {}. Retrying in {} seconds...",
|
||||
attempt + 1, max_attempts, e, delay);
|
||||
|
||||
tokio::time::sleep(Duration::from_secs(delay)).await;
|
||||
attempt += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1967,6 +2373,8 @@ impl Clone for WebDAVService {
|
||||
scan_semaphore: Arc::clone(&self.scan_semaphore),
|
||||
download_semaphore: Arc::clone(&self.download_semaphore),
|
||||
working_protocol: Arc::clone(&self.working_protocol),
|
||||
lock_manager: self.lock_manager.clone(),
|
||||
partial_content_manager: self.partial_content_manager.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user