Merge pull request #409 from readur/label-filter-search-improvements

feat: Label filter search improvements
2025-12-16 20:04:32 -06:00 · 2025-12-16 16:58:42 -08:00
parent 535a572709 b8a73b9912
commit ccf0e3813e
11 changed files with 900 additions and 78 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4907,7 +4907,7 @@ dependencies = [
 "getrandom 0.3.3",
 "once_cell",
 "rustix 1.0.7",
- "windows-sys 0.60.2",
+ "windows-sys 0.61.2",
 ]

 [[package]]
@@ -5746,7 +5746,7 @@ version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
 dependencies = [
- "windows-sys 0.48.0",
+ "windows-sys 0.59.0",
 ]

 [[package]]
--- a/frontend/public/locales/en/translation.json
+++ b/frontend/public/locales/en/translation.json
@@ -1023,6 +1023,7 @@
      "inUse": "Cannot delete label because it is currently assigned to documents. Please remove the label from all documents first.",
      "systemDelete": "System labels cannot be deleted. Only user-created labels can be removed.",
      "invalidName": "Label name contains invalid characters. Please use only letters, numbers, and basic punctuation.",
+      "commaNotAllowed": "Label names cannot contain commas.",
      "invalidColor": "Invalid color format. Please use a valid hex color like #0969da.",
      "maxLabelsReached": "Maximum number of labels reached. Please delete some labels before creating new ones."
    }
--- a/frontend/src/components/Labels/LabelCreateDialog.tsx
+++ b/frontend/src/components/Labels/LabelCreateDialog.tsx
@@ -12,6 +12,7 @@ import {
  Paper,
  Tooltip,
 } from '@mui/material';
+import axios from 'axios';
 import Grid from '@mui/material/GridLegacy';
 import {
  Star as StarIcon,
@@ -121,6 +122,13 @@ const LabelCreateDialog: React.FC<LabelCreateDialogProps> = ({
      return;
    }

+    // Disallow commas in label names (breaks comma-separated search filters)
+    // Also check for URL-encoded commas (%2c) which could cause issues in query parameters
+    if (formData.name.includes(',') || formData.name.toLowerCase().includes('%2c')) {
+      setNameError(t('labels.errors.commaNotAllowed'));
+      return;
+    }
+
    setLoading(true);
    try {
      await onSubmit({
@@ -135,7 +143,14 @@ const LabelCreateDialog: React.FC<LabelCreateDialogProps> = ({
      onClose();
    } catch (error) {
      console.error('Failed to save label:', error);
-      // Could add error handling UI here
+      // Extract error message from backend JSON response
+      if (axios.isAxiosError(error) && error.response?.data?.error) {
+        setNameError(error.response.data.error);
+      } else if (error instanceof Error) {
+        setNameError(error.message);
+      } else {
+        setNameError(t('labels.errors.serverError'));
+      }
    } finally {
      setLoading(false);
    }
@@ -176,8 +191,8 @@ const LabelCreateDialog: React.FC<LabelCreateDialogProps> = ({
        {editingLabel ? t('labels.create.editTitle') : t('labels.create.title')}
      </DialogTitle>

-      <DialogContent sx={{ pt: 2 }}>
-        <Grid container spacing={3}>
+      <DialogContent>
+        <Grid container spacing={3} sx={{ mt: 0.5 }}>
          {/* Name Field */}
          <Grid item xs={12}>
            <TextField
--- a/frontend/src/pages/SearchPage.tsx
+++ b/frontend/src/pages/SearchPage.tsx
@@ -62,7 +62,7 @@ import {
  TrendingUp as TrendingIcon,
  TextFormat as TextFormatIcon,
 } from '@mui/icons-material';
-import { documentService, SearchRequest } from '../services/api';
+import { documentService, SearchRequest, api } from '../services/api';
 import SearchGuidance from '../components/SearchGuidance';
 import EnhancedSearchGuide from '../components/EnhancedSearchGuide';
 import MimeTypeFacetFilter from '../components/MimeTypeFacetFilter';
@@ -250,7 +250,9 @@ const SearchPage: React.FC = () => {
  }, []);

  const performSearch = useCallback(async (query: string, filters: SearchFilters = {}, page: number = 1): Promise<void> => {
-    if (!query.trim()) {
+    const hasFilters = (filters.tags?.length ?? 0) > 0 ||
+                       (filters.mimeTypes?.length ?? 0) > 0;
+    if (!query.trim() && !hasFilters) {
      setSearchResults([]);
      setTotalResults(0);
      setQueryTime(0);
@@ -271,8 +273,8 @@ const SearchPage: React.FC = () => {
      
      const searchRequest: SearchRequest = {
        query: query.trim(),
-        tags: filters.tags?.length ? filters.tags : undefined,
-        mime_types: filters.mimeTypes?.length ? filters.mimeTypes : undefined,
+        tags: filters.tags?.length ? filters.tags.join(',') : undefined,
+        mime_types: filters.mimeTypes?.length ? filters.mimeTypes.join(',') : undefined,
        limit: resultsPerPage,
        offset: (page - 1) * resultsPerPage,
        include_snippets: advancedSettings.includeSnippets,
@@ -321,11 +323,7 @@ const SearchPage: React.FC = () => {
      setTotalResults(response.data.total || results.length);
      setQueryTime(response.data.query_time_ms || 0);
      setSuggestions(response.data.suggestions || []);
-      
-      // Extract unique tags for filter options
-      const tags = [...new Set(results.flatMap(doc => doc.tags || []))].filter(tag => typeof tag === 'string');
-      setAvailableTags(tags);
-      
+
      // Clear progress after a brief delay
      setTimeout(() => setSearchProgress(0), 500);
      
@@ -356,6 +354,20 @@ const SearchPage: React.FC = () => {
    [generateQuickSuggestions]
  );

+  // Load available tags from labels API on component mount
+  useEffect(() => {
+    const loadLabels = async () => {
+      try {
+        const response = await api.get('/labels?include_counts=true');
+        const labelNames = (response.data || []).map((label: any) => label.name);
+        setAvailableTags(labelNames);
+      } catch (error) {
+        console.error('Failed to load labels:', error);
+      }
+    };
+    loadLabels();
+  }, []);
+
  // Handle URL search params
  useEffect(() => {
    const queryFromUrl = searchParams.get('q');
--- a/frontend/src/services/api.ts
+++ b/frontend/src/services/api.ts
@@ -69,8 +69,8 @@ export interface Document {

 export interface SearchRequest {
  query: string
-  tags?: string[]
-  mime_types?: string[]
+  tags?: string  // Comma-separated label names (e.g., "important,work")
+  mime_types?: string  // Comma-separated MIME types (e.g., "application/pdf,image/png")
  limit?: number
  offset?: number
  include_snippets?: boolean
--- a/src/db/documents/management.rs
+++ b/src/db/documents/management.rs
@@ -166,20 +166,25 @@ impl Database {
        }).collect())
    }

-    /// Gets tag facets (aggregated counts by tag)
-    pub async fn get_tag_facets(&self, user_id: Uuid, user_role: UserRole) -> Result<Vec<FacetItem>> {
-        let mut query = QueryBuilder::<Postgres>::new(
-            "SELECT unnest(tags) as value, COUNT(*) as count FROM documents WHERE 1=1"
-        );
+    /// Gets tag facets (aggregated counts by label)
+    pub async fn get_tag_facets(&self, user_id: Uuid, _user_role: UserRole) -> Result<Vec<FacetItem>> {
+        let query = sqlx::query_as::<_, (String, i64)>(
+            r#"
+            SELECT l.name as value, COUNT(DISTINCT dl.document_id) as count
+            FROM labels l
+            LEFT JOIN document_labels dl ON l.id = dl.label_id
+            WHERE (l.user_id = $1 OR l.is_system = true)
+            GROUP BY l.id, l.name
+            ORDER BY count DESC, l.name
+            "#
+        )
+        .bind(user_id);

-        apply_role_based_filter(&mut query, user_id, user_role);
-        query.push(" GROUP BY unnest(tags) ORDER BY count DESC, value");
+        let rows = query.fetch_all(&self.pool).await?;

-        let rows = query.build().fetch_all(&self.pool).await?;
-
-        Ok(rows.into_iter().map(|row| FacetItem {
-            value: row.get("value"),
-            count: row.get("count"),
+        Ok(rows.into_iter().map(|(value, count)| FacetItem {
+            value,
+            count,
        }).collect())
    }

--- a/src/db/documents/search.rs
+++ b/src/db/documents/search.rs
@@ -23,11 +23,12 @@ impl Database {
            query.push("))");
        }

-        // Add tag filtering
+        // Add label filtering (tags param contains label names)
        if let Some(ref tags) = search_request.tags {
            if !tags.is_empty() {
-                query.push(" AND tags && ");
+                query.push(" AND documents.id IN (SELECT dl.document_id FROM document_labels dl JOIN labels l ON dl.label_id = l.id WHERE l.name = ANY(");
                query.push_bind(tags);
+                query.push("))");
            }
        }

@@ -128,11 +129,12 @@ impl Database {
            }
        }

-        // Add filtering
+        // Add label filtering (tags param contains label names)
        if let Some(ref tags) = search_request.tags {
            if !tags.is_empty() {
-                query.push(" AND tags && ");
+                query.push(" AND documents.id IN (SELECT dl.document_id FROM document_labels dl JOIN labels l ON dl.label_id = l.id WHERE l.name = ANY(");
                query.push_bind(tags);
+                query.push("))");
            }
        }

@@ -256,4 +258,66 @@ impl Database {
        snippets.truncate(5);
        snippets
    }
+
+    /// Counts total matching documents for pagination (without applying LIMIT/OFFSET)
+    pub async fn count_search_documents(&self, user_id: Uuid, user_role: UserRole, search_request: &SearchRequest) -> Result<i64> {
+        let search_query = search_request.query.trim();
+
+        let mut query = QueryBuilder::<Postgres>::new("SELECT COUNT(*) FROM documents WHERE 1=1");
+
+        apply_role_based_filter(&mut query, user_id, user_role);
+
+        // Add search conditions (same as enhanced_search_documents_with_role)
+        if !search_query.is_empty() {
+            match search_request.search_mode.as_ref().unwrap_or(&SearchMode::Simple) {
+                SearchMode::Simple => {
+                    query.push(" AND (to_tsvector('english', COALESCE(content, '')) @@ plainto_tsquery('english', ");
+                    query.push_bind(search_query);
+                    query.push(") OR to_tsvector('english', COALESCE(ocr_text, '')) @@ plainto_tsquery('english', ");
+                    query.push_bind(search_query);
+                    query.push("))");
+                }
+                SearchMode::Phrase => {
+                    query.push(" AND (to_tsvector('english', COALESCE(content, '')) @@ phraseto_tsquery('english', ");
+                    query.push_bind(search_query);
+                    query.push(") OR to_tsvector('english', COALESCE(ocr_text, '')) @@ phraseto_tsquery('english', ");
+                    query.push_bind(search_query);
+                    query.push("))");
+                }
+                SearchMode::Boolean => {
+                    query.push(" AND (to_tsvector('english', COALESCE(content, '')) @@ to_tsquery('english', ");
+                    query.push_bind(search_query);
+                    query.push(") OR to_tsvector('english', COALESCE(ocr_text, '')) @@ to_tsquery('english', ");
+                    query.push_bind(search_query);
+                    query.push("))");
+                }
+                SearchMode::Fuzzy => {
+                    query.push(" AND similarity(COALESCE(content, '') || ' ' || COALESCE(ocr_text, ''), ");
+                    query.push_bind(search_query);
+                    query.push(") > 0.3");
+                }
+            }
+        }
+
+        // Add label filtering (tags param contains label names)
+        if let Some(ref tags) = search_request.tags {
+            if !tags.is_empty() {
+                query.push(" AND documents.id IN (SELECT dl.document_id FROM document_labels dl JOIN labels l ON dl.label_id = l.id WHERE l.name = ANY(");
+                query.push_bind(tags);
+                query.push("))");
+            }
+        }
+
+        // Add MIME type filtering
+        if let Some(ref mime_types) = search_request.mime_types {
+            if !mime_types.is_empty() {
+                query.push(" AND mime_type = ANY(");
+                query.push_bind(mime_types);
+                query.push(")");
+            }
+        }
+
+        let row: (i64,) = query.build_query_as().fetch_one(&self.pool).await?;
+        Ok(row.0)
+    }
 }
--- a/src/models/search.rs
+++ b/src/models/search.rs
@@ -1,15 +1,51 @@
-use serde::{Deserialize, Serialize};
+use serde::{Deserialize, Deserializer, Serialize};
 use utoipa::{ToSchema, IntoParams};

 use super::responses::EnhancedDocumentResponse;

+/// Maximum length for comma-separated query parameters (DoS protection)
+const MAX_COMMA_SEPARATED_LENGTH: usize = 2000;
+/// Maximum number of items in a comma-separated list (DoS protection)
+const MAX_COMMA_SEPARATED_ITEMS: usize = 50;
+
+/// Deserializes a comma-separated string into Vec<String>.
+///
+/// Handles: "a,b,c" -> vec!["a", "b", "c"]
+/// - Trims whitespace from each value
+/// - Filters empty values
+/// - Returns None if result is empty or input exceeds limits
+fn deserialize_comma_separated<'de, D>(deserializer: D) -> Result<Option<Vec<String>>, D::Error>
+where
+    D: Deserializer<'de>,
+{
+    let opt: Option<String> = Option::deserialize(deserializer)?;
+    Ok(opt.and_then(|s| {
+        // DoS protection: reject overly long inputs
+        if s.len() > MAX_COMMA_SEPARATED_LENGTH {
+            return None;
+        }
+
+        let vec: Vec<String> = s
+            .split(',')
+            .map(|x| x.trim().to_string())
+            .filter(|x| !x.is_empty())
+            .take(MAX_COMMA_SEPARATED_ITEMS)
+            .collect();
+
+        if vec.is_empty() { None } else { Some(vec) }
+    }))
+}
+
 #[derive(Debug, Serialize, Deserialize, ToSchema, IntoParams)]
 pub struct SearchRequest {
    /// Search query text (searches both document content and OCR-extracted text)
+    #[serde(default)]
    pub query: String,
-    /// Filter by specific tags
+    /// Filter by specific tags (label names)
+    #[serde(default, deserialize_with = "deserialize_comma_separated")]
    pub tags: Option<Vec<String>>,
    /// Filter by MIME types (e.g., "application/pdf", "image/png")
+    #[serde(default, deserialize_with = "deserialize_comma_separated")]
    pub mime_types: Option<Vec<String>>,
    /// Maximum number of results to return (default: 25)
    pub limit: Option<i64>,
@@ -48,28 +84,20 @@ impl Default for SearchMode {

 #[derive(Debug, Serialize, Deserialize, ToSchema)]
 pub struct SearchResponse {
-    /// List of matching documents with enhanced metadata and snippets
    pub documents: Vec<EnhancedDocumentResponse>,
-    /// Total number of documents matching the search criteria
    pub total: i64,
-    /// Time taken to execute the search in milliseconds
    pub query_time_ms: u64,
-    /// Search suggestions for query improvement
    pub suggestions: Vec<String>,
 }

 #[derive(Debug, Serialize, Deserialize, ToSchema)]
 pub struct FacetItem {
-    /// The facet value (e.g., mime type or tag)
    pub value: String,
-    /// Number of documents with this value
    pub count: i64,
 }

 #[derive(Debug, Serialize, Deserialize, ToSchema)]
 pub struct SearchFacetsResponse {
-    /// MIME type facets with counts
    pub mime_types: Vec<FacetItem>,
-    /// Tag facets with counts
    pub tags: Vec<FacetItem>,
-}
+}
--- a/src/routes/labels.rs
+++ b/src/routes/labels.rs
@@ -12,7 +12,7 @@ use uuid::Uuid;
 use chrono::{DateTime, Utc};
 use sqlx::{FromRow, Row};

-use crate::{auth::AuthUser, AppState};
+use crate::{auth::AuthUser, errors::label::LabelError, AppState};

 #[derive(Debug, Clone, Serialize, Deserialize, FromRow, ToSchema)]
 pub struct Label {
@@ -166,22 +166,28 @@ pub async fn create_label(
    State(state): State<Arc<AppState>>,
    auth_user: AuthUser,
    Json(payload): Json<CreateLabel>,
-) -> Result<Json<Label>, StatusCode> {
+) -> Result<Json<Label>, LabelError> {
    let user_id = auth_user.user.id;

    // Validate name is not empty
    if payload.name.trim().is_empty() {
-        return Err(StatusCode::BAD_REQUEST);
+        return Err(LabelError::invalid_name(payload.name.clone(), "Name cannot be empty".to_string()));
+    }
+
+    // Disallow commas in label names (breaks comma-separated search filters)
+    // Note: URL-encoded values (%2c) are decoded by serde before reaching here
+    if payload.name.contains(',') {
+        return Err(LabelError::invalid_name(payload.name.clone(), "Name cannot contain commas".to_string()));
    }

    // Validate color format
    if !payload.color.starts_with('#') || payload.color.len() != 7 {
-        return Err(StatusCode::BAD_REQUEST);
+        return Err(LabelError::invalid_color(&payload.color));
    }

    if let Some(ref bg_color) = payload.background_color {
        if !bg_color.starts_with('#') || bg_color.len() != 7 {
-            return Err(StatusCode::BAD_REQUEST);
+            return Err(LabelError::invalid_color(bg_color));
        }
    }

@@ -189,14 +195,14 @@ pub async fn create_label(
        r#"
        INSERT INTO labels (user_id, name, description, color, background_color, icon)
        VALUES ($1, $2, $3, $4, $5, $6)
-        RETURNING 
-            id, user_id, name, description, color, background_color, icon, 
+        RETURNING
+            id, user_id, name, description, color, background_color, icon,
            is_system, created_at, updated_at,
            0::bigint as document_count, 0::bigint as source_count
        "#
    )
    .bind(user_id)
-    .bind(payload.name)
+    .bind(&payload.name)
    .bind(payload.description)
    .bind(payload.color)
    .bind(payload.background_color)
@@ -206,9 +212,9 @@ pub async fn create_label(
    .map_err(|e| {
        tracing::error!("Failed to create label: {}", e);
        if e.to_string().contains("duplicate key") {
-            StatusCode::CONFLICT
+            LabelError::duplicate_name(payload.name.clone())
        } else {
-            StatusCode::INTERNAL_SERVER_ERROR
+            LabelError::invalid_name(payload.name.clone(), e.to_string())
        }
    })?;

@@ -285,19 +291,31 @@ pub async fn update_label(
    State(state): State<Arc<AppState>>,
    auth_user: AuthUser,
    Json(payload): Json<UpdateLabel>,
-) -> Result<Json<Label>, StatusCode> {
+) -> Result<Json<Label>, LabelError> {
    let user_id = auth_user.user.id;

+    // Validate name if provided
+    if let Some(ref name) = payload.name {
+        if name.trim().is_empty() {
+            return Err(LabelError::invalid_name(name.clone(), "Name cannot be empty".to_string()));
+        }
+        // Disallow commas in label names (breaks comma-separated search filters)
+        // Note: URL-encoded values (%2c) are decoded by serde before reaching here
+        if name.contains(',') {
+            return Err(LabelError::invalid_name(name.clone(), "Name cannot contain commas".to_string()));
+        }
+    }
+
    // Validate color formats if provided
    if let Some(ref color) = payload.color {
        if !color.starts_with('#') || color.len() != 7 {
-            return Err(StatusCode::BAD_REQUEST);
+            return Err(LabelError::invalid_color(color));
        }
    }

-    if let Some(ref bg_color) = payload.background_color.as_ref() {
+    if let Some(ref bg_color) = payload.background_color {
        if !bg_color.starts_with('#') || bg_color.len() != 7 {
-            return Err(StatusCode::BAD_REQUEST);
+            return Err(LabelError::invalid_color(bg_color));
        }
    }

@@ -311,18 +329,18 @@ pub async fn update_label(
    .await
    .map_err(|e| {
        tracing::error!("Failed to check label existence: {}", e);
-        StatusCode::INTERNAL_SERVER_ERROR
+        LabelError::NotFound
    })?;

    if existing.is_none() {
-        return Err(StatusCode::NOT_FOUND);
+        return Err(LabelError::NotFound);
    }

    // Use COALESCE to update only provided fields
    let label = sqlx::query_as::<_, Label>(
        r#"
-        UPDATE labels 
-        SET 
+        UPDATE labels
+        SET
            name = COALESCE($2, name),
            description = COALESCE($3, description),
            color = COALESCE($4, color),
@@ -330,14 +348,14 @@ pub async fn update_label(
            icon = COALESCE($6, icon),
            updated_at = CURRENT_TIMESTAMP
        WHERE id = $1
-        RETURNING 
-            id, user_id, name, description, color, background_color, icon, 
+        RETURNING
+            id, user_id, name, description, color, background_color, icon,
            is_system, created_at, updated_at,
            0::bigint as document_count, 0::bigint as source_count
        "#
    )
    .bind(label_id)
-    .bind(payload.name)
+    .bind(&payload.name)
    .bind(payload.description)
    .bind(payload.color)
    .bind(payload.background_color)
@@ -347,9 +365,9 @@ pub async fn update_label(
    .map_err(|e| {
        tracing::error!("Failed to update label: {}", e);
        if e.to_string().contains("duplicate key") {
-            StatusCode::CONFLICT
+            LabelError::duplicate_name(payload.name.clone().unwrap_or_default())
        } else {
-            StatusCode::INTERNAL_SERVER_ERROR
+            LabelError::invalid_name(payload.name.clone().unwrap_or_default(), e.to_string())
        }
    })?;

--- a/src/routes/search.rs
+++ b/src/routes/search.rs
@@ -43,8 +43,10 @@ async fn search_documents(
    auth_user: AuthUser,
    Query(search_request): Query<SearchRequest>,
 ) -> Result<Json<SearchResponse>, SearchError> {
-    // Validate query length
-    if search_request.query.len() < 2 {
+    // Validate query length (allow empty query if filters are present)
+    let has_filters = search_request.tags.as_ref().map_or(false, |t| !t.is_empty())
+        || search_request.mime_types.as_ref().map_or(false, |m| !m.is_empty());
+    if search_request.query.len() < 2 && !has_filters {
        return Err(SearchError::query_too_short(search_request.query.len(), 2));
    }
    if search_request.query.len() > 1000 {
@@ -58,18 +60,23 @@ async fn search_documents(
        return Err(SearchError::invalid_pagination(offset, limit));
    }
    
+    // Get total count (without pagination) for proper pagination support
+    let total = state
+        .db
+        .count_search_documents(auth_user.user.id, auth_user.user.role.clone(), &search_request)
+        .await
+        .map_err(|e| SearchError::index_unavailable(format!("Count failed: {}", e)))?;
+
+    // Check if too many results
+    if total > 10000 {
+        return Err(SearchError::too_many_results(total, 10000));
+    }
+
    let documents = state
        .db
        .search_documents(auth_user.user.id, &search_request)
        .await
        .map_err(|e| SearchError::index_unavailable(format!("Search failed: {}", e)))?;
-    
-    let total = documents.len() as i64;
-    
-    // Check if too many results
-    if total > 10000 {
-        return Err(SearchError::too_many_results(total, 10000));
-    }

    let response = SearchResponse {
        documents: documents.into_iter().map(|doc| EnhancedDocumentResponse {
@@ -118,18 +125,32 @@ async fn enhanced_search_documents(
    auth_user: AuthUser,
    Query(search_request): Query<SearchRequest>,
 ) -> Result<Json<SearchResponse>, StatusCode> {
+    // Validate query length (allow empty query if filters are present)
+    let has_filters = search_request.tags.as_ref().map_or(false, |t| !t.is_empty())
+        || search_request.mime_types.as_ref().map_or(false, |m| !m.is_empty());
+    if search_request.query.len() < 2 && !has_filters {
+        return Err(StatusCode::BAD_REQUEST);
+    }
+
    // Generate suggestions before moving search_request
    let suggestions = generate_search_suggestions(&search_request.query);
-    
+
    let start_time = std::time::Instant::now();
+
+    // Get total count (without pagination) for proper pagination support
+    let total = state
+        .db
+        .count_search_documents(auth_user.user.id, auth_user.user.role.clone(), &search_request)
+        .await
+        .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
+
    let documents = state
        .db
        .enhanced_search_documents_with_role(auth_user.user.id, auth_user.user.role, &search_request)
        .await
        .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
-    
+
    let query_time = start_time.elapsed().as_millis() as u64;
-    let total = documents.len() as i64;

    let response = SearchResponse {
        documents,
--- a/tests/integration_search_pagination_tests.rs
+++ b/tests/integration_search_pagination_tests.rs
@@ -0,0 +1,658 @@
+//! Integration tests for search pagination functionality.
+//!
+//! These tests verify that the `count_search_documents` method returns accurate
+//! total counts for pagination, ensuring the fix for the pagination bug doesn't regress.
+
+#[cfg(test)]
+mod tests {
+    use anyhow::Result;
+    use readur::test_utils::TestContext;
+    use readur::models::{CreateUser, Document, SearchRequest, UserRole};
+    use chrono::Utc;
+    use uuid::Uuid;
+    use std::collections::HashSet;
+    use sqlx;
+
+    /// Creates unique test user data with a given suffix for test isolation
+    fn create_test_user_data(suffix: &str) -> CreateUser {
+        let test_id = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap()
+            .as_nanos()
+            .to_string();
+        let unique_suffix = &test_id[test_id.len().saturating_sub(8)..];
+
+        CreateUser {
+            username: format!("testuser_{}_{}", suffix, unique_suffix),
+            email: format!("test_{}_{}@example.com", suffix, unique_suffix),
+            password: "password123".to_string(),
+            role: Some(UserRole::User),
+        }
+    }
+
+    /// Creates an admin user for role-based access tests
+    fn create_admin_user_data(suffix: &str) -> CreateUser {
+        let test_id = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap()
+            .as_nanos()
+            .to_string();
+        let unique_suffix = &test_id[test_id.len().saturating_sub(8)..];
+
+        CreateUser {
+            username: format!("admin_{}_{}", suffix, unique_suffix),
+            email: format!("admin_{}_{}@example.com", suffix, unique_suffix),
+            password: "password123".to_string(),
+            role: Some(UserRole::Admin),
+        }
+    }
+
+    /// Creates a searchable document with unique content
+    fn create_searchable_document(user_id: Uuid, index: i32, mime_type: &str) -> Document {
+        Document {
+            id: Uuid::new_v4(),
+            filename: format!("test_{}.txt", index),
+            original_filename: format!("test_{}.txt", index),
+            file_path: format!("/path/to/test_{}.txt", index),
+            file_size: 1024,
+            mime_type: mime_type.to_string(),
+            content: Some(format!("Document {} with searchable content for pagination testing", index)),
+            ocr_text: Some(format!("OCR text {} searchable pagination", index)),
+            ocr_confidence: Some(95.0),
+            ocr_word_count: Some(10),
+            ocr_processing_time_ms: Some(800),
+            ocr_status: Some("completed".to_string()),
+            ocr_error: None,
+            ocr_completed_at: Some(Utc::now()),
+            tags: vec!["test".to_string(), "pagination".to_string()],
+            created_at: Utc::now(),
+            updated_at: Utc::now(),
+            user_id,
+            file_hash: Some(format!("{:x}", Uuid::new_v4().as_u128())),
+            original_created_at: None,
+            original_modified_at: None,
+            source_path: None,
+            source_type: None,
+            source_id: None,
+            file_permissions: None,
+            file_owner: None,
+            file_group: None,
+            source_metadata: None,
+            ocr_retry_count: None,
+            ocr_failure_reason: None,
+        }
+    }
+
+    /// Test that count returns actual matching documents, not the limit
+    #[tokio::test]
+    async fn test_count_matches_actual_documents() {
+        let ctx = TestContext::new().await;
+
+        let result: Result<()> = async {
+            let db = &ctx.state.db;
+            let user = db.create_user(create_test_user_data("count1")).await?;
+
+            // Create 15 documents with searchable content
+            for i in 0..15 {
+                db.create_document(create_searchable_document(user.id, i, "text/plain")).await?;
+            }
+
+            // Search with limit=5
+            let request = SearchRequest {
+                query: "searchable".to_string(),
+                tags: None,
+                mime_types: None,
+                limit: Some(5),
+                offset: Some(0),
+                include_snippets: Some(false),
+                snippet_length: None,
+                search_mode: None,
+            };
+
+            let count = db.count_search_documents(user.id, UserRole::User, &request).await?;
+            let results = db.search_documents(user.id, &request).await?;
+
+            // Total should be 15, not 5
+            assert_eq!(count, 15, "Count should be total matching docs (15), not limit (5)");
+            assert_eq!(results.len(), 5, "Results should respect the limit of 5");
+
+            Ok(())
+        }.await;
+
+        if let Err(e) = ctx.cleanup_and_close().await {
+            eprintln!("Warning: Test cleanup failed: {}", e);
+        }
+
+        result.unwrap();
+    }
+
+    /// Test that total remains consistent across all pages
+    #[tokio::test]
+    async fn test_pagination_total_consistent() {
+        let ctx = TestContext::new().await;
+
+        let result: Result<()> = async {
+            let db = &ctx.state.db;
+            let user = db.create_user(create_test_user_data("consistent1")).await?;
+
+            // Create 20 documents
+            for i in 0..20 {
+                db.create_document(create_searchable_document(user.id, i, "text/plain")).await?;
+            }
+
+            // Check total is same across all pages
+            for offset in [0, 5, 10, 15] {
+                let request = SearchRequest {
+                    query: "searchable".to_string(),
+                    tags: None,
+                    mime_types: None,
+                    limit: Some(5),
+                    offset: Some(offset),
+                    include_snippets: Some(false),
+                    snippet_length: None,
+                    search_mode: None,
+                };
+                let count = db.count_search_documents(user.id, UserRole::User, &request).await?;
+                assert_eq!(count, 20, "Total should be consistent (20) at offset {}", offset);
+            }
+
+            Ok(())
+        }.await;
+
+        if let Err(e) = ctx.cleanup_and_close().await {
+            eprintln!("Warning: Test cleanup failed: {}", e);
+        }
+
+        result.unwrap();
+    }
+
+    /// Test that iterating through all pages fetches all documents exactly once
+    #[tokio::test]
+    async fn test_pagination_fetches_all_documents() {
+        let ctx = TestContext::new().await;
+
+        let result: Result<()> = async {
+            let db = &ctx.state.db;
+            let user = db.create_user(create_test_user_data("fetchall1")).await?;
+
+            // Create 17 documents (not evenly divisible by page size)
+            for i in 0..17 {
+                db.create_document(create_searchable_document(user.id, i, "text/plain")).await?;
+            }
+
+            let mut all_ids: HashSet<Uuid> = HashSet::new();
+            let page_size = 5i64;
+
+            // Fetch all pages
+            for page in 0..4 {
+                let request = SearchRequest {
+                    query: "searchable".to_string(),
+                    tags: None,
+                    mime_types: None,
+                    limit: Some(page_size),
+                    offset: Some(page * page_size),
+                    include_snippets: Some(false),
+                    snippet_length: None,
+                    search_mode: None,
+                };
+                let results = db.search_documents(user.id, &request).await?;
+
+                for doc in results {
+                    let is_new = all_ids.insert(doc.id);
+                    assert!(is_new, "Document {} appeared on multiple pages", doc.id);
+                }
+            }
+
+            assert_eq!(all_ids.len(), 17, "Should have fetched all 17 documents exactly once");
+
+            Ok(())
+        }.await;
+
+        if let Err(e) = ctx.cleanup_and_close().await {
+            eprintln!("Warning: Test cleanup failed: {}", e);
+        }
+
+        result.unwrap();
+    }
+
+    /// Test that count correctly filters by MIME type
+    #[tokio::test]
+    async fn test_pagination_with_mime_filter() {
+        let ctx = TestContext::new().await;
+
+        let result: Result<()> = async {
+            let db = &ctx.state.db;
+            let user = db.create_user(create_test_user_data("mime1")).await?;
+
+            // Create 10 text/plain documents
+            for i in 0..10 {
+                db.create_document(create_searchable_document(user.id, i, "text/plain")).await?;
+            }
+
+            // Create 5 application/pdf documents
+            for i in 10..15 {
+                db.create_document(create_searchable_document(user.id, i, "application/pdf")).await?;
+            }
+
+            // Filter by text/plain only
+            let request = SearchRequest {
+                query: "searchable".to_string(),
+                tags: None,
+                mime_types: Some(vec!["text/plain".to_string()]),
+                limit: Some(5),
+                offset: Some(0),
+                include_snippets: Some(false),
+                snippet_length: None,
+                search_mode: None,
+            };
+
+            let count = db.count_search_documents(user.id, UserRole::User, &request).await?;
+            assert_eq!(count, 10, "Count should be 10 (only text/plain), not 15 (all docs)");
+
+            // Filter by PDF only
+            let request_pdf = SearchRequest {
+                query: "searchable".to_string(),
+                tags: None,
+                mime_types: Some(vec!["application/pdf".to_string()]),
+                limit: Some(5),
+                offset: Some(0),
+                include_snippets: Some(false),
+                snippet_length: None,
+                search_mode: None,
+            };
+
+            let count_pdf = db.count_search_documents(user.id, UserRole::User, &request_pdf).await?;
+            assert_eq!(count_pdf, 5, "Count should be 5 (only PDFs)");
+
+            Ok(())
+        }.await;
+
+        if let Err(e) = ctx.cleanup_and_close().await {
+            eprintln!("Warning: Test cleanup failed: {}", e);
+        }
+
+        result.unwrap();
+    }
+
+    /// Test that count returns 0 when no documents match
+    #[tokio::test]
+    async fn test_pagination_empty_results() {
+        let ctx = TestContext::new().await;
+
+        let result: Result<()> = async {
+            let db = &ctx.state.db;
+            let user = db.create_user(create_test_user_data("empty1")).await?;
+
+            // Create documents that won't match our query
+            for i in 0..5 {
+                let mut doc = create_searchable_document(user.id, i, "text/plain");
+                doc.content = Some("This content has no matching words".to_string());
+                doc.ocr_text = Some("OCR text without matches".to_string());
+                db.create_document(doc).await?;
+            }
+
+            // Search for something that doesn't exist
+            let request = SearchRequest {
+                query: "xyznonexistent".to_string(),
+                tags: None,
+                mime_types: None,
+                limit: Some(10),
+                offset: Some(0),
+                include_snippets: Some(false),
+                snippet_length: None,
+                search_mode: None,
+            };
+
+            let count = db.count_search_documents(user.id, UserRole::User, &request).await?;
+            let results = db.search_documents(user.id, &request).await?;
+
+            assert_eq!(count, 0, "Count should be 0 when no matches");
+            assert_eq!(results.len(), 0, "Results should be empty when no matches");
+
+            Ok(())
+        }.await;
+
+        if let Err(e) = ctx.cleanup_and_close().await {
+            eprintln!("Warning: Test cleanup failed: {}", e);
+        }
+
+        result.unwrap();
+    }
+
+    /// Test that the last page returns remaining documents correctly
+    #[tokio::test]
+    async fn test_pagination_boundary_last_page() {
+        let ctx = TestContext::new().await;
+
+        let result: Result<()> = async {
+            let db = &ctx.state.db;
+            let user = db.create_user(create_test_user_data("boundary1")).await?;
+
+            // Create 13 documents (13 % 5 = 3 on last page)
+            for i in 0..13 {
+                db.create_document(create_searchable_document(user.id, i, "text/plain")).await?;
+            }
+
+            // Request last page (offset 10, should return 3 docs)
+            let request = SearchRequest {
+                query: "searchable".to_string(),
+                tags: None,
+                mime_types: None,
+                limit: Some(5),
+                offset: Some(10),
+                include_snippets: Some(false),
+                snippet_length: None,
+                search_mode: None,
+            };
+
+            let count = db.count_search_documents(user.id, UserRole::User, &request).await?;
+            let results = db.search_documents(user.id, &request).await?;
+
+            assert_eq!(count, 13, "Total count should still be 13");
+            assert_eq!(results.len(), 3, "Last page should have 3 remaining documents");
+
+            Ok(())
+        }.await;
+
+        if let Err(e) = ctx.cleanup_and_close().await {
+            eprintln!("Warning: Test cleanup failed: {}", e);
+        }
+
+        result.unwrap();
+    }
+
+    /// Test that count is unaffected by limit/offset values
+    #[tokio::test]
+    async fn test_count_ignores_limit_offset() {
+        let ctx = TestContext::new().await;
+
+        let result: Result<()> = async {
+            let db = &ctx.state.db;
+            let user = db.create_user(create_test_user_data("ignore1")).await?;
+
+            // Create 25 documents
+            for i in 0..25 {
+                db.create_document(create_searchable_document(user.id, i, "text/plain")).await?;
+            }
+
+            // Test with various limit/offset combinations
+            let test_cases = vec![
+                (1, 0),    // Tiny limit
+                (100, 0),  // Large limit
+                (5, 0),    // First page
+                (5, 20),   // Near last page
+                (5, 100),  // Past end
+            ];
+
+            for (limit, offset) in test_cases {
+                let request = SearchRequest {
+                    query: "searchable".to_string(),
+                    tags: None,
+                    mime_types: None,
+                    limit: Some(limit),
+                    offset: Some(offset),
+                    include_snippets: Some(false),
+                    snippet_length: None,
+                    search_mode: None,
+                };
+
+                let count = db.count_search_documents(user.id, UserRole::User, &request).await?;
+                assert_eq!(count, 25, "Count should always be 25 regardless of limit={}, offset={}", limit, offset);
+            }
+
+            Ok(())
+        }.await;
+
+        if let Err(e) = ctx.cleanup_and_close().await {
+            eprintln!("Warning: Test cleanup failed: {}", e);
+        }
+
+        result.unwrap();
+    }
+
+    /// Test role-based access: users see only their own documents, admins see all
+    #[tokio::test]
+    async fn test_admin_sees_all_user_sees_own() {
+        let ctx = TestContext::new().await;
+
+        let result: Result<()> = async {
+            let db = &ctx.state.db;
+
+            // Create two regular users
+            let user_a = db.create_user(create_test_user_data("usera")).await?;
+            let user_b = db.create_user(create_test_user_data("userb")).await?;
+            let admin = db.create_user(create_admin_user_data("admin")).await?;
+
+            // Create 10 documents for user A
+            for i in 0..10 {
+                db.create_document(create_searchable_document(user_a.id, i, "text/plain")).await?;
+            }
+
+            // Create 5 documents for user B
+            for i in 10..15 {
+                db.create_document(create_searchable_document(user_b.id, i, "text/plain")).await?;
+            }
+
+            let request = SearchRequest {
+                query: "searchable".to_string(),
+                tags: None,
+                mime_types: None,
+                limit: Some(100),
+                offset: Some(0),
+                include_snippets: Some(false),
+                snippet_length: None,
+                search_mode: None,
+            };
+
+            // User A should see only their 10 documents
+            let count_a = db.count_search_documents(user_a.id, UserRole::User, &request).await?;
+            assert_eq!(count_a, 10, "User A should see only their 10 documents");
+
+            // User B should see only their 5 documents
+            let count_b = db.count_search_documents(user_b.id, UserRole::User, &request).await?;
+            assert_eq!(count_b, 5, "User B should see only their 5 documents");
+
+            // Admin should see all 15 documents
+            let count_admin = db.count_search_documents(admin.id, UserRole::Admin, &request).await?;
+            assert_eq!(count_admin, 15, "Admin should see all 15 documents");
+
+            Ok(())
+        }.await;
+
+        if let Err(e) = ctx.cleanup_and_close().await {
+            eprintln!("Warning: Test cleanup failed: {}", e);
+        }
+
+        result.unwrap();
+    }
+
+    /// Test pagination with text query filtering
+    #[tokio::test]
+    async fn test_pagination_with_text_query() {
+        let ctx = TestContext::new().await;
+
+        let result: Result<()> = async {
+            let db = &ctx.state.db;
+            let user = db.create_user(create_test_user_data("textq1")).await?;
+
+            // Create documents with different content
+            for i in 0..10 {
+                let mut doc = create_searchable_document(user.id, i, "text/plain");
+                if i < 6 {
+                    doc.content = Some(format!("Document {} contains the word apple", i));
+                } else {
+                    doc.content = Some(format!("Document {} contains the word orange", i));
+                }
+                db.create_document(doc).await?;
+            }
+
+            // Search for "apple"
+            let request_apple = SearchRequest {
+                query: "apple".to_string(),
+                tags: None,
+                mime_types: None,
+                limit: Some(3),
+                offset: Some(0),
+                include_snippets: Some(false),
+                snippet_length: None,
+                search_mode: None,
+            };
+
+            let count_apple = db.count_search_documents(user.id, UserRole::User, &request_apple).await?;
+            let results_apple = db.search_documents(user.id, &request_apple).await?;
+
+            assert_eq!(count_apple, 6, "Should find 6 documents with 'apple'");
+            assert_eq!(results_apple.len(), 3, "Should return 3 (limit)");
+
+            // Search for "orange"
+            let request_orange = SearchRequest {
+                query: "orange".to_string(),
+                tags: None,
+                mime_types: None,
+                limit: Some(10),
+                offset: Some(0),
+                include_snippets: Some(false),
+                snippet_length: None,
+                search_mode: None,
+            };
+
+            let count_orange = db.count_search_documents(user.id, UserRole::User, &request_orange).await?;
+            assert_eq!(count_orange, 4, "Should find 4 documents with 'orange'");
+
+            Ok(())
+        }.await;
+
+        if let Err(e) = ctx.cleanup_and_close().await {
+            eprintln!("Warning: Test cleanup failed: {}", e);
+        }
+
+        result.unwrap();
+    }
+
+    /// Test that count correctly filters by labels
+    #[tokio::test]
+    async fn test_pagination_with_label_filter() {
+        let ctx = TestContext::new().await;
+
+        let result: Result<()> = async {
+            let db = &ctx.state.db;
+            let user = db.create_user(create_test_user_data("label1")).await?;
+
+            // Create a label using direct SQL (no db.create_label method exists)
+            let label_id = Uuid::new_v4();
+            sqlx::query(
+                r#"
+                INSERT INTO labels (id, user_id, name, description, color, is_system)
+                VALUES ($1, $2, $3, $4, $5, $6)
+                "#
+            )
+            .bind(label_id)
+            .bind(user.id)
+            .bind("important")
+            .bind("Important documents")
+            .bind("#ff0000")
+            .bind(false)
+            .execute(db.get_pool())
+            .await?;
+
+            // Create 10 documents, assign label to 6 of them
+            for i in 0..10 {
+                let doc = db.create_document(create_searchable_document(user.id, i, "text/plain")).await?;
+                if i < 6 {
+                    // Assign label to first 6 documents
+                    sqlx::query(
+                        "INSERT INTO document_labels (document_id, label_id, assigned_by) VALUES ($1, $2, $3)"
+                    )
+                    .bind(doc.id)
+                    .bind(label_id)
+                    .bind(user.id)
+                    .execute(db.get_pool())
+                    .await?;
+                }
+            }
+
+            // Filter by label name
+            let request = SearchRequest {
+                query: "searchable".to_string(),
+                tags: Some(vec!["important".to_string()]),
+                mime_types: None,
+                limit: Some(3),
+                offset: Some(0),
+                include_snippets: Some(false),
+                snippet_length: None,
+                search_mode: None,
+            };
+
+            let count = db.count_search_documents(user.id, UserRole::User, &request).await?;
+            let results = db.search_documents(user.id, &request).await?;
+
+            assert_eq!(count, 6, "Count should be 6 (only labeled docs), not 10 (all docs)");
+            assert_eq!(results.len(), 3, "Results should respect limit of 3");
+
+            // Test with non-existent label
+            let request_none = SearchRequest {
+                query: "searchable".to_string(),
+                tags: Some(vec!["nonexistent".to_string()]),
+                mime_types: None,
+                limit: Some(3),
+                offset: Some(0),
+                include_snippets: Some(false),
+                snippet_length: None,
+                search_mode: None,
+            };
+
+            let count_none = db.count_search_documents(user.id, UserRole::User, &request_none).await?;
+            assert_eq!(count_none, 0, "Count should be 0 for non-existent label");
+
+            Ok(())
+        }.await;
+
+        if let Err(e) = ctx.cleanup_and_close().await {
+            eprintln!("Warning: Test cleanup failed: {}", e);
+        }
+
+        result.unwrap();
+    }
+
+    /// Test filter-only search (empty query with MIME filter)
+    #[tokio::test]
+    async fn test_pagination_filter_only_no_query() {
+        let ctx = TestContext::new().await;
+
+        let result: Result<()> = async {
+            let db = &ctx.state.db;
+            let user = db.create_user(create_test_user_data("filteronly1")).await?;
+
+            // Create mixed documents
+            for i in 0..8 {
+                db.create_document(create_searchable_document(user.id, i, "text/plain")).await?;
+            }
+            for i in 8..12 {
+                db.create_document(create_searchable_document(user.id, i, "image/png")).await?;
+            }
+
+            // Filter by MIME type only (no text query)
+            let request = SearchRequest {
+                query: String::new(), // Empty query
+                tags: None,
+                mime_types: Some(vec!["image/png".to_string()]),
+                limit: Some(2),
+                offset: Some(0),
+                include_snippets: Some(false),
+                snippet_length: None,
+                search_mode: None,
+            };
+
+            let count = db.count_search_documents(user.id, UserRole::User, &request).await?;
+            assert_eq!(count, 4, "Should count 4 PNG images with empty query");
+
+            Ok(())
+        }.await;
+
+        if let Err(e) = ctx.cleanup_and_close().await {
+            eprintln!("Warning: Test cleanup failed: {}", e);
+        }
+
+        result.unwrap();
+    }
+}