Merge pull request #409 from readur/label-filter-search-improvements

feat: Label filter search improvements
This commit is contained in:
Alex
2025-12-16 16:58:42 -08:00
committed by GitHub
11 changed files with 900 additions and 78 deletions

4
Cargo.lock generated
View File

@@ -4907,7 +4907,7 @@ dependencies = [
"getrandom 0.3.3",
"once_cell",
"rustix 1.0.7",
"windows-sys 0.60.2",
"windows-sys 0.61.2",
]
[[package]]
@@ -5746,7 +5746,7 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
dependencies = [
"windows-sys 0.48.0",
"windows-sys 0.59.0",
]
[[package]]

View File

@@ -1023,6 +1023,7 @@
"inUse": "Cannot delete label because it is currently assigned to documents. Please remove the label from all documents first.",
"systemDelete": "System labels cannot be deleted. Only user-created labels can be removed.",
"invalidName": "Label name contains invalid characters. Please use only letters, numbers, and basic punctuation.",
"commaNotAllowed": "Label names cannot contain commas.",
"invalidColor": "Invalid color format. Please use a valid hex color like #0969da.",
"maxLabelsReached": "Maximum number of labels reached. Please delete some labels before creating new ones."
}

View File

@@ -12,6 +12,7 @@ import {
Paper,
Tooltip,
} from '@mui/material';
import axios from 'axios';
import Grid from '@mui/material/GridLegacy';
import {
Star as StarIcon,
@@ -121,6 +122,13 @@ const LabelCreateDialog: React.FC<LabelCreateDialogProps> = ({
return;
}
// Disallow commas in label names (breaks comma-separated search filters)
// Also check for URL-encoded commas (%2c) which could cause issues in query parameters
if (formData.name.includes(',') || formData.name.toLowerCase().includes('%2c')) {
setNameError(t('labels.errors.commaNotAllowed'));
return;
}
setLoading(true);
try {
await onSubmit({
@@ -135,7 +143,14 @@ const LabelCreateDialog: React.FC<LabelCreateDialogProps> = ({
onClose();
} catch (error) {
console.error('Failed to save label:', error);
// Could add error handling UI here
// Extract error message from backend JSON response
if (axios.isAxiosError(error) && error.response?.data?.error) {
setNameError(error.response.data.error);
} else if (error instanceof Error) {
setNameError(error.message);
} else {
setNameError(t('labels.errors.serverError'));
}
} finally {
setLoading(false);
}
@@ -176,8 +191,8 @@ const LabelCreateDialog: React.FC<LabelCreateDialogProps> = ({
{editingLabel ? t('labels.create.editTitle') : t('labels.create.title')}
</DialogTitle>
<DialogContent sx={{ pt: 2 }}>
<Grid container spacing={3}>
<DialogContent>
<Grid container spacing={3} sx={{ mt: 0.5 }}>
{/* Name Field */}
<Grid item xs={12}>
<TextField

View File

@@ -62,7 +62,7 @@ import {
TrendingUp as TrendingIcon,
TextFormat as TextFormatIcon,
} from '@mui/icons-material';
import { documentService, SearchRequest } from '../services/api';
import { documentService, SearchRequest, api } from '../services/api';
import SearchGuidance from '../components/SearchGuidance';
import EnhancedSearchGuide from '../components/EnhancedSearchGuide';
import MimeTypeFacetFilter from '../components/MimeTypeFacetFilter';
@@ -250,7 +250,9 @@ const SearchPage: React.FC = () => {
}, []);
const performSearch = useCallback(async (query: string, filters: SearchFilters = {}, page: number = 1): Promise<void> => {
if (!query.trim()) {
const hasFilters = (filters.tags?.length ?? 0) > 0 ||
(filters.mimeTypes?.length ?? 0) > 0;
if (!query.trim() && !hasFilters) {
setSearchResults([]);
setTotalResults(0);
setQueryTime(0);
@@ -271,8 +273,8 @@ const SearchPage: React.FC = () => {
const searchRequest: SearchRequest = {
query: query.trim(),
tags: filters.tags?.length ? filters.tags : undefined,
mime_types: filters.mimeTypes?.length ? filters.mimeTypes : undefined,
tags: filters.tags?.length ? filters.tags.join(',') : undefined,
mime_types: filters.mimeTypes?.length ? filters.mimeTypes.join(',') : undefined,
limit: resultsPerPage,
offset: (page - 1) * resultsPerPage,
include_snippets: advancedSettings.includeSnippets,
@@ -321,11 +323,7 @@ const SearchPage: React.FC = () => {
setTotalResults(response.data.total || results.length);
setQueryTime(response.data.query_time_ms || 0);
setSuggestions(response.data.suggestions || []);
// Extract unique tags for filter options
const tags = [...new Set(results.flatMap(doc => doc.tags || []))].filter(tag => typeof tag === 'string');
setAvailableTags(tags);
// Clear progress after a brief delay
setTimeout(() => setSearchProgress(0), 500);
@@ -356,6 +354,20 @@ const SearchPage: React.FC = () => {
[generateQuickSuggestions]
);
// Load available tags from labels API on component mount
useEffect(() => {
const loadLabels = async () => {
try {
const response = await api.get('/labels?include_counts=true');
const labelNames = (response.data || []).map((label: any) => label.name);
setAvailableTags(labelNames);
} catch (error) {
console.error('Failed to load labels:', error);
}
};
loadLabels();
}, []);
// Handle URL search params
useEffect(() => {
const queryFromUrl = searchParams.get('q');

View File

@@ -69,8 +69,8 @@ export interface Document {
export interface SearchRequest {
query: string
tags?: string[]
mime_types?: string[]
tags?: string // Comma-separated label names (e.g., "important,work")
mime_types?: string // Comma-separated MIME types (e.g., "application/pdf,image/png")
limit?: number
offset?: number
include_snippets?: boolean

View File

@@ -166,20 +166,25 @@ impl Database {
}).collect())
}
/// Gets tag facets (aggregated counts by tag)
pub async fn get_tag_facets(&self, user_id: Uuid, user_role: UserRole) -> Result<Vec<FacetItem>> {
let mut query = QueryBuilder::<Postgres>::new(
"SELECT unnest(tags) as value, COUNT(*) as count FROM documents WHERE 1=1"
);
/// Gets tag facets (aggregated counts by label)
pub async fn get_tag_facets(&self, user_id: Uuid, _user_role: UserRole) -> Result<Vec<FacetItem>> {
let query = sqlx::query_as::<_, (String, i64)>(
r#"
SELECT l.name as value, COUNT(DISTINCT dl.document_id) as count
FROM labels l
LEFT JOIN document_labels dl ON l.id = dl.label_id
WHERE (l.user_id = $1 OR l.is_system = true)
GROUP BY l.id, l.name
ORDER BY count DESC, l.name
"#
)
.bind(user_id);
apply_role_based_filter(&mut query, user_id, user_role);
query.push(" GROUP BY unnest(tags) ORDER BY count DESC, value");
let rows = query.fetch_all(&self.pool).await?;
let rows = query.build().fetch_all(&self.pool).await?;
Ok(rows.into_iter().map(|row| FacetItem {
value: row.get("value"),
count: row.get("count"),
Ok(rows.into_iter().map(|(value, count)| FacetItem {
value,
count,
}).collect())
}

View File

@@ -23,11 +23,12 @@ impl Database {
query.push("))");
}
// Add tag filtering
// Add label filtering (tags param contains label names)
if let Some(ref tags) = search_request.tags {
if !tags.is_empty() {
query.push(" AND tags && ");
query.push(" AND documents.id IN (SELECT dl.document_id FROM document_labels dl JOIN labels l ON dl.label_id = l.id WHERE l.name = ANY(");
query.push_bind(tags);
query.push("))");
}
}
@@ -128,11 +129,12 @@ impl Database {
}
}
// Add filtering
// Add label filtering (tags param contains label names)
if let Some(ref tags) = search_request.tags {
if !tags.is_empty() {
query.push(" AND tags && ");
query.push(" AND documents.id IN (SELECT dl.document_id FROM document_labels dl JOIN labels l ON dl.label_id = l.id WHERE l.name = ANY(");
query.push_bind(tags);
query.push("))");
}
}
@@ -256,4 +258,66 @@ impl Database {
snippets.truncate(5);
snippets
}
/// Counts total matching documents for pagination (without applying LIMIT/OFFSET)
pub async fn count_search_documents(&self, user_id: Uuid, user_role: UserRole, search_request: &SearchRequest) -> Result<i64> {
let search_query = search_request.query.trim();
let mut query = QueryBuilder::<Postgres>::new("SELECT COUNT(*) FROM documents WHERE 1=1");
apply_role_based_filter(&mut query, user_id, user_role);
// Add search conditions (same as enhanced_search_documents_with_role)
if !search_query.is_empty() {
match search_request.search_mode.as_ref().unwrap_or(&SearchMode::Simple) {
SearchMode::Simple => {
query.push(" AND (to_tsvector('english', COALESCE(content, '')) @@ plainto_tsquery('english', ");
query.push_bind(search_query);
query.push(") OR to_tsvector('english', COALESCE(ocr_text, '')) @@ plainto_tsquery('english', ");
query.push_bind(search_query);
query.push("))");
}
SearchMode::Phrase => {
query.push(" AND (to_tsvector('english', COALESCE(content, '')) @@ phraseto_tsquery('english', ");
query.push_bind(search_query);
query.push(") OR to_tsvector('english', COALESCE(ocr_text, '')) @@ phraseto_tsquery('english', ");
query.push_bind(search_query);
query.push("))");
}
SearchMode::Boolean => {
query.push(" AND (to_tsvector('english', COALESCE(content, '')) @@ to_tsquery('english', ");
query.push_bind(search_query);
query.push(") OR to_tsvector('english', COALESCE(ocr_text, '')) @@ to_tsquery('english', ");
query.push_bind(search_query);
query.push("))");
}
SearchMode::Fuzzy => {
query.push(" AND similarity(COALESCE(content, '') || ' ' || COALESCE(ocr_text, ''), ");
query.push_bind(search_query);
query.push(") > 0.3");
}
}
}
// Add label filtering (tags param contains label names)
if let Some(ref tags) = search_request.tags {
if !tags.is_empty() {
query.push(" AND documents.id IN (SELECT dl.document_id FROM document_labels dl JOIN labels l ON dl.label_id = l.id WHERE l.name = ANY(");
query.push_bind(tags);
query.push("))");
}
}
// Add MIME type filtering
if let Some(ref mime_types) = search_request.mime_types {
if !mime_types.is_empty() {
query.push(" AND mime_type = ANY(");
query.push_bind(mime_types);
query.push(")");
}
}
let row: (i64,) = query.build_query_as().fetch_one(&self.pool).await?;
Ok(row.0)
}
}

View File

@@ -1,15 +1,51 @@
use serde::{Deserialize, Serialize};
use serde::{Deserialize, Deserializer, Serialize};
use utoipa::{ToSchema, IntoParams};
use super::responses::EnhancedDocumentResponse;
/// Maximum length for comma-separated query parameters (DoS protection)
const MAX_COMMA_SEPARATED_LENGTH: usize = 2000;
/// Maximum number of items in a comma-separated list (DoS protection)
const MAX_COMMA_SEPARATED_ITEMS: usize = 50;
/// Deserializes a comma-separated string into Vec<String>.
///
/// Handles: "a,b,c" -> vec!["a", "b", "c"]
/// - Trims whitespace from each value
/// - Filters empty values
/// - Returns None if result is empty or input exceeds limits
fn deserialize_comma_separated<'de, D>(deserializer: D) -> Result<Option<Vec<String>>, D::Error>
where
D: Deserializer<'de>,
{
let opt: Option<String> = Option::deserialize(deserializer)?;
Ok(opt.and_then(|s| {
// DoS protection: reject overly long inputs
if s.len() > MAX_COMMA_SEPARATED_LENGTH {
return None;
}
let vec: Vec<String> = s
.split(',')
.map(|x| x.trim().to_string())
.filter(|x| !x.is_empty())
.take(MAX_COMMA_SEPARATED_ITEMS)
.collect();
if vec.is_empty() { None } else { Some(vec) }
}))
}
#[derive(Debug, Serialize, Deserialize, ToSchema, IntoParams)]
pub struct SearchRequest {
/// Search query text (searches both document content and OCR-extracted text)
#[serde(default)]
pub query: String,
/// Filter by specific tags
/// Filter by specific tags (label names)
#[serde(default, deserialize_with = "deserialize_comma_separated")]
pub tags: Option<Vec<String>>,
/// Filter by MIME types (e.g., "application/pdf", "image/png")
#[serde(default, deserialize_with = "deserialize_comma_separated")]
pub mime_types: Option<Vec<String>>,
/// Maximum number of results to return (default: 25)
pub limit: Option<i64>,
@@ -48,28 +84,20 @@ impl Default for SearchMode {
#[derive(Debug, Serialize, Deserialize, ToSchema)]
pub struct SearchResponse {
/// List of matching documents with enhanced metadata and snippets
pub documents: Vec<EnhancedDocumentResponse>,
/// Total number of documents matching the search criteria
pub total: i64,
/// Time taken to execute the search in milliseconds
pub query_time_ms: u64,
/// Search suggestions for query improvement
pub suggestions: Vec<String>,
}
#[derive(Debug, Serialize, Deserialize, ToSchema)]
pub struct FacetItem {
/// The facet value (e.g., mime type or tag)
pub value: String,
/// Number of documents with this value
pub count: i64,
}
#[derive(Debug, Serialize, Deserialize, ToSchema)]
pub struct SearchFacetsResponse {
/// MIME type facets with counts
pub mime_types: Vec<FacetItem>,
/// Tag facets with counts
pub tags: Vec<FacetItem>,
}
}

View File

@@ -12,7 +12,7 @@ use uuid::Uuid;
use chrono::{DateTime, Utc};
use sqlx::{FromRow, Row};
use crate::{auth::AuthUser, AppState};
use crate::{auth::AuthUser, errors::label::LabelError, AppState};
#[derive(Debug, Clone, Serialize, Deserialize, FromRow, ToSchema)]
pub struct Label {
@@ -166,22 +166,28 @@ pub async fn create_label(
State(state): State<Arc<AppState>>,
auth_user: AuthUser,
Json(payload): Json<CreateLabel>,
) -> Result<Json<Label>, StatusCode> {
) -> Result<Json<Label>, LabelError> {
let user_id = auth_user.user.id;
// Validate name is not empty
if payload.name.trim().is_empty() {
return Err(StatusCode::BAD_REQUEST);
return Err(LabelError::invalid_name(payload.name.clone(), "Name cannot be empty".to_string()));
}
// Disallow commas in label names (breaks comma-separated search filters)
// Note: URL-encoded values (%2c) are decoded by serde before reaching here
if payload.name.contains(',') {
return Err(LabelError::invalid_name(payload.name.clone(), "Name cannot contain commas".to_string()));
}
// Validate color format
if !payload.color.starts_with('#') || payload.color.len() != 7 {
return Err(StatusCode::BAD_REQUEST);
return Err(LabelError::invalid_color(&payload.color));
}
if let Some(ref bg_color) = payload.background_color {
if !bg_color.starts_with('#') || bg_color.len() != 7 {
return Err(StatusCode::BAD_REQUEST);
return Err(LabelError::invalid_color(bg_color));
}
}
@@ -189,14 +195,14 @@ pub async fn create_label(
r#"
INSERT INTO labels (user_id, name, description, color, background_color, icon)
VALUES ($1, $2, $3, $4, $5, $6)
RETURNING
id, user_id, name, description, color, background_color, icon,
RETURNING
id, user_id, name, description, color, background_color, icon,
is_system, created_at, updated_at,
0::bigint as document_count, 0::bigint as source_count
"#
)
.bind(user_id)
.bind(payload.name)
.bind(&payload.name)
.bind(payload.description)
.bind(payload.color)
.bind(payload.background_color)
@@ -206,9 +212,9 @@ pub async fn create_label(
.map_err(|e| {
tracing::error!("Failed to create label: {}", e);
if e.to_string().contains("duplicate key") {
StatusCode::CONFLICT
LabelError::duplicate_name(payload.name.clone())
} else {
StatusCode::INTERNAL_SERVER_ERROR
LabelError::invalid_name(payload.name.clone(), e.to_string())
}
})?;
@@ -285,19 +291,31 @@ pub async fn update_label(
State(state): State<Arc<AppState>>,
auth_user: AuthUser,
Json(payload): Json<UpdateLabel>,
) -> Result<Json<Label>, StatusCode> {
) -> Result<Json<Label>, LabelError> {
let user_id = auth_user.user.id;
// Validate name if provided
if let Some(ref name) = payload.name {
if name.trim().is_empty() {
return Err(LabelError::invalid_name(name.clone(), "Name cannot be empty".to_string()));
}
// Disallow commas in label names (breaks comma-separated search filters)
// Note: URL-encoded values (%2c) are decoded by serde before reaching here
if name.contains(',') {
return Err(LabelError::invalid_name(name.clone(), "Name cannot contain commas".to_string()));
}
}
// Validate color formats if provided
if let Some(ref color) = payload.color {
if !color.starts_with('#') || color.len() != 7 {
return Err(StatusCode::BAD_REQUEST);
return Err(LabelError::invalid_color(color));
}
}
if let Some(ref bg_color) = payload.background_color.as_ref() {
if let Some(ref bg_color) = payload.background_color {
if !bg_color.starts_with('#') || bg_color.len() != 7 {
return Err(StatusCode::BAD_REQUEST);
return Err(LabelError::invalid_color(bg_color));
}
}
@@ -311,18 +329,18 @@ pub async fn update_label(
.await
.map_err(|e| {
tracing::error!("Failed to check label existence: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
LabelError::NotFound
})?;
if existing.is_none() {
return Err(StatusCode::NOT_FOUND);
return Err(LabelError::NotFound);
}
// Use COALESCE to update only provided fields
let label = sqlx::query_as::<_, Label>(
r#"
UPDATE labels
SET
UPDATE labels
SET
name = COALESCE($2, name),
description = COALESCE($3, description),
color = COALESCE($4, color),
@@ -330,14 +348,14 @@ pub async fn update_label(
icon = COALESCE($6, icon),
updated_at = CURRENT_TIMESTAMP
WHERE id = $1
RETURNING
id, user_id, name, description, color, background_color, icon,
RETURNING
id, user_id, name, description, color, background_color, icon,
is_system, created_at, updated_at,
0::bigint as document_count, 0::bigint as source_count
"#
)
.bind(label_id)
.bind(payload.name)
.bind(&payload.name)
.bind(payload.description)
.bind(payload.color)
.bind(payload.background_color)
@@ -347,9 +365,9 @@ pub async fn update_label(
.map_err(|e| {
tracing::error!("Failed to update label: {}", e);
if e.to_string().contains("duplicate key") {
StatusCode::CONFLICT
LabelError::duplicate_name(payload.name.clone().unwrap_or_default())
} else {
StatusCode::INTERNAL_SERVER_ERROR
LabelError::invalid_name(payload.name.clone().unwrap_or_default(), e.to_string())
}
})?;

View File

@@ -43,8 +43,10 @@ async fn search_documents(
auth_user: AuthUser,
Query(search_request): Query<SearchRequest>,
) -> Result<Json<SearchResponse>, SearchError> {
// Validate query length
if search_request.query.len() < 2 {
// Validate query length (allow empty query if filters are present)
let has_filters = search_request.tags.as_ref().map_or(false, |t| !t.is_empty())
|| search_request.mime_types.as_ref().map_or(false, |m| !m.is_empty());
if search_request.query.len() < 2 && !has_filters {
return Err(SearchError::query_too_short(search_request.query.len(), 2));
}
if search_request.query.len() > 1000 {
@@ -58,18 +60,23 @@ async fn search_documents(
return Err(SearchError::invalid_pagination(offset, limit));
}
// Get total count (without pagination) for proper pagination support
let total = state
.db
.count_search_documents(auth_user.user.id, auth_user.user.role.clone(), &search_request)
.await
.map_err(|e| SearchError::index_unavailable(format!("Count failed: {}", e)))?;
// Check if too many results
if total > 10000 {
return Err(SearchError::too_many_results(total, 10000));
}
let documents = state
.db
.search_documents(auth_user.user.id, &search_request)
.await
.map_err(|e| SearchError::index_unavailable(format!("Search failed: {}", e)))?;
let total = documents.len() as i64;
// Check if too many results
if total > 10000 {
return Err(SearchError::too_many_results(total, 10000));
}
let response = SearchResponse {
documents: documents.into_iter().map(|doc| EnhancedDocumentResponse {
@@ -118,18 +125,32 @@ async fn enhanced_search_documents(
auth_user: AuthUser,
Query(search_request): Query<SearchRequest>,
) -> Result<Json<SearchResponse>, StatusCode> {
// Validate query length (allow empty query if filters are present)
let has_filters = search_request.tags.as_ref().map_or(false, |t| !t.is_empty())
|| search_request.mime_types.as_ref().map_or(false, |m| !m.is_empty());
if search_request.query.len() < 2 && !has_filters {
return Err(StatusCode::BAD_REQUEST);
}
// Generate suggestions before moving search_request
let suggestions = generate_search_suggestions(&search_request.query);
let start_time = std::time::Instant::now();
// Get total count (without pagination) for proper pagination support
let total = state
.db
.count_search_documents(auth_user.user.id, auth_user.user.role.clone(), &search_request)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let documents = state
.db
.enhanced_search_documents_with_role(auth_user.user.id, auth_user.user.role, &search_request)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let query_time = start_time.elapsed().as_millis() as u64;
let total = documents.len() as i64;
let response = SearchResponse {
documents,

View File

@@ -0,0 +1,658 @@
//! Integration tests for search pagination functionality.
//!
//! These tests verify that the `count_search_documents` method returns accurate
//! total counts for pagination, ensuring the fix for the pagination bug doesn't regress.
#[cfg(test)]
mod tests {
use anyhow::Result;
use readur::test_utils::TestContext;
use readur::models::{CreateUser, Document, SearchRequest, UserRole};
use chrono::Utc;
use uuid::Uuid;
use std::collections::HashSet;
use sqlx;
/// Creates unique test user data with a given suffix for test isolation
fn create_test_user_data(suffix: &str) -> CreateUser {
let test_id = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos()
.to_string();
let unique_suffix = &test_id[test_id.len().saturating_sub(8)..];
CreateUser {
username: format!("testuser_{}_{}", suffix, unique_suffix),
email: format!("test_{}_{}@example.com", suffix, unique_suffix),
password: "password123".to_string(),
role: Some(UserRole::User),
}
}
/// Creates an admin user for role-based access tests
fn create_admin_user_data(suffix: &str) -> CreateUser {
let test_id = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos()
.to_string();
let unique_suffix = &test_id[test_id.len().saturating_sub(8)..];
CreateUser {
username: format!("admin_{}_{}", suffix, unique_suffix),
email: format!("admin_{}_{}@example.com", suffix, unique_suffix),
password: "password123".to_string(),
role: Some(UserRole::Admin),
}
}
/// Creates a searchable document with unique content
fn create_searchable_document(user_id: Uuid, index: i32, mime_type: &str) -> Document {
Document {
id: Uuid::new_v4(),
filename: format!("test_{}.txt", index),
original_filename: format!("test_{}.txt", index),
file_path: format!("/path/to/test_{}.txt", index),
file_size: 1024,
mime_type: mime_type.to_string(),
content: Some(format!("Document {} with searchable content for pagination testing", index)),
ocr_text: Some(format!("OCR text {} searchable pagination", index)),
ocr_confidence: Some(95.0),
ocr_word_count: Some(10),
ocr_processing_time_ms: Some(800),
ocr_status: Some("completed".to_string()),
ocr_error: None,
ocr_completed_at: Some(Utc::now()),
tags: vec!["test".to_string(), "pagination".to_string()],
created_at: Utc::now(),
updated_at: Utc::now(),
user_id,
file_hash: Some(format!("{:x}", Uuid::new_v4().as_u128())),
original_created_at: None,
original_modified_at: None,
source_path: None,
source_type: None,
source_id: None,
file_permissions: None,
file_owner: None,
file_group: None,
source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
}
}
/// Test that count returns actual matching documents, not the limit
#[tokio::test]
async fn test_count_matches_actual_documents() {
let ctx = TestContext::new().await;
let result: Result<()> = async {
let db = &ctx.state.db;
let user = db.create_user(create_test_user_data("count1")).await?;
// Create 15 documents with searchable content
for i in 0..15 {
db.create_document(create_searchable_document(user.id, i, "text/plain")).await?;
}
// Search with limit=5
let request = SearchRequest {
query: "searchable".to_string(),
tags: None,
mime_types: None,
limit: Some(5),
offset: Some(0),
include_snippets: Some(false),
snippet_length: None,
search_mode: None,
};
let count = db.count_search_documents(user.id, UserRole::User, &request).await?;
let results = db.search_documents(user.id, &request).await?;
// Total should be 15, not 5
assert_eq!(count, 15, "Count should be total matching docs (15), not limit (5)");
assert_eq!(results.len(), 5, "Results should respect the limit of 5");
Ok(())
}.await;
if let Err(e) = ctx.cleanup_and_close().await {
eprintln!("Warning: Test cleanup failed: {}", e);
}
result.unwrap();
}
/// Test that total remains consistent across all pages
#[tokio::test]
async fn test_pagination_total_consistent() {
let ctx = TestContext::new().await;
let result: Result<()> = async {
let db = &ctx.state.db;
let user = db.create_user(create_test_user_data("consistent1")).await?;
// Create 20 documents
for i in 0..20 {
db.create_document(create_searchable_document(user.id, i, "text/plain")).await?;
}
// Check total is same across all pages
for offset in [0, 5, 10, 15] {
let request = SearchRequest {
query: "searchable".to_string(),
tags: None,
mime_types: None,
limit: Some(5),
offset: Some(offset),
include_snippets: Some(false),
snippet_length: None,
search_mode: None,
};
let count = db.count_search_documents(user.id, UserRole::User, &request).await?;
assert_eq!(count, 20, "Total should be consistent (20) at offset {}", offset);
}
Ok(())
}.await;
if let Err(e) = ctx.cleanup_and_close().await {
eprintln!("Warning: Test cleanup failed: {}", e);
}
result.unwrap();
}
/// Test that iterating through all pages fetches all documents exactly once
#[tokio::test]
async fn test_pagination_fetches_all_documents() {
let ctx = TestContext::new().await;
let result: Result<()> = async {
let db = &ctx.state.db;
let user = db.create_user(create_test_user_data("fetchall1")).await?;
// Create 17 documents (not evenly divisible by page size)
for i in 0..17 {
db.create_document(create_searchable_document(user.id, i, "text/plain")).await?;
}
let mut all_ids: HashSet<Uuid> = HashSet::new();
let page_size = 5i64;
// Fetch all pages
for page in 0..4 {
let request = SearchRequest {
query: "searchable".to_string(),
tags: None,
mime_types: None,
limit: Some(page_size),
offset: Some(page * page_size),
include_snippets: Some(false),
snippet_length: None,
search_mode: None,
};
let results = db.search_documents(user.id, &request).await?;
for doc in results {
let is_new = all_ids.insert(doc.id);
assert!(is_new, "Document {} appeared on multiple pages", doc.id);
}
}
assert_eq!(all_ids.len(), 17, "Should have fetched all 17 documents exactly once");
Ok(())
}.await;
if let Err(e) = ctx.cleanup_and_close().await {
eprintln!("Warning: Test cleanup failed: {}", e);
}
result.unwrap();
}
/// Test that count correctly filters by MIME type
#[tokio::test]
async fn test_pagination_with_mime_filter() {
let ctx = TestContext::new().await;
let result: Result<()> = async {
let db = &ctx.state.db;
let user = db.create_user(create_test_user_data("mime1")).await?;
// Create 10 text/plain documents
for i in 0..10 {
db.create_document(create_searchable_document(user.id, i, "text/plain")).await?;
}
// Create 5 application/pdf documents
for i in 10..15 {
db.create_document(create_searchable_document(user.id, i, "application/pdf")).await?;
}
// Filter by text/plain only
let request = SearchRequest {
query: "searchable".to_string(),
tags: None,
mime_types: Some(vec!["text/plain".to_string()]),
limit: Some(5),
offset: Some(0),
include_snippets: Some(false),
snippet_length: None,
search_mode: None,
};
let count = db.count_search_documents(user.id, UserRole::User, &request).await?;
assert_eq!(count, 10, "Count should be 10 (only text/plain), not 15 (all docs)");
// Filter by PDF only
let request_pdf = SearchRequest {
query: "searchable".to_string(),
tags: None,
mime_types: Some(vec!["application/pdf".to_string()]),
limit: Some(5),
offset: Some(0),
include_snippets: Some(false),
snippet_length: None,
search_mode: None,
};
let count_pdf = db.count_search_documents(user.id, UserRole::User, &request_pdf).await?;
assert_eq!(count_pdf, 5, "Count should be 5 (only PDFs)");
Ok(())
}.await;
if let Err(e) = ctx.cleanup_and_close().await {
eprintln!("Warning: Test cleanup failed: {}", e);
}
result.unwrap();
}
/// Test that count returns 0 when no documents match
#[tokio::test]
async fn test_pagination_empty_results() {
let ctx = TestContext::new().await;
let result: Result<()> = async {
let db = &ctx.state.db;
let user = db.create_user(create_test_user_data("empty1")).await?;
// Create documents that won't match our query
for i in 0..5 {
let mut doc = create_searchable_document(user.id, i, "text/plain");
doc.content = Some("This content has no matching words".to_string());
doc.ocr_text = Some("OCR text without matches".to_string());
db.create_document(doc).await?;
}
// Search for something that doesn't exist
let request = SearchRequest {
query: "xyznonexistent".to_string(),
tags: None,
mime_types: None,
limit: Some(10),
offset: Some(0),
include_snippets: Some(false),
snippet_length: None,
search_mode: None,
};
let count = db.count_search_documents(user.id, UserRole::User, &request).await?;
let results = db.search_documents(user.id, &request).await?;
assert_eq!(count, 0, "Count should be 0 when no matches");
assert_eq!(results.len(), 0, "Results should be empty when no matches");
Ok(())
}.await;
if let Err(e) = ctx.cleanup_and_close().await {
eprintln!("Warning: Test cleanup failed: {}", e);
}
result.unwrap();
}
/// Test that the last page returns remaining documents correctly
#[tokio::test]
async fn test_pagination_boundary_last_page() {
let ctx = TestContext::new().await;
let result: Result<()> = async {
let db = &ctx.state.db;
let user = db.create_user(create_test_user_data("boundary1")).await?;
// Create 13 documents (13 % 5 = 3 on last page)
for i in 0..13 {
db.create_document(create_searchable_document(user.id, i, "text/plain")).await?;
}
// Request last page (offset 10, should return 3 docs)
let request = SearchRequest {
query: "searchable".to_string(),
tags: None,
mime_types: None,
limit: Some(5),
offset: Some(10),
include_snippets: Some(false),
snippet_length: None,
search_mode: None,
};
let count = db.count_search_documents(user.id, UserRole::User, &request).await?;
let results = db.search_documents(user.id, &request).await?;
assert_eq!(count, 13, "Total count should still be 13");
assert_eq!(results.len(), 3, "Last page should have 3 remaining documents");
Ok(())
}.await;
if let Err(e) = ctx.cleanup_and_close().await {
eprintln!("Warning: Test cleanup failed: {}", e);
}
result.unwrap();
}
/// Test that count is unaffected by limit/offset values
#[tokio::test]
async fn test_count_ignores_limit_offset() {
let ctx = TestContext::new().await;
let result: Result<()> = async {
let db = &ctx.state.db;
let user = db.create_user(create_test_user_data("ignore1")).await?;
// Create 25 documents
for i in 0..25 {
db.create_document(create_searchable_document(user.id, i, "text/plain")).await?;
}
// Test with various limit/offset combinations
let test_cases = vec![
(1, 0), // Tiny limit
(100, 0), // Large limit
(5, 0), // First page
(5, 20), // Near last page
(5, 100), // Past end
];
for (limit, offset) in test_cases {
let request = SearchRequest {
query: "searchable".to_string(),
tags: None,
mime_types: None,
limit: Some(limit),
offset: Some(offset),
include_snippets: Some(false),
snippet_length: None,
search_mode: None,
};
let count = db.count_search_documents(user.id, UserRole::User, &request).await?;
assert_eq!(count, 25, "Count should always be 25 regardless of limit={}, offset={}", limit, offset);
}
Ok(())
}.await;
if let Err(e) = ctx.cleanup_and_close().await {
eprintln!("Warning: Test cleanup failed: {}", e);
}
result.unwrap();
}
/// Test role-based access: users see only their own documents, admins see all
#[tokio::test]
async fn test_admin_sees_all_user_sees_own() {
let ctx = TestContext::new().await;
let result: Result<()> = async {
let db = &ctx.state.db;
// Create two regular users
let user_a = db.create_user(create_test_user_data("usera")).await?;
let user_b = db.create_user(create_test_user_data("userb")).await?;
let admin = db.create_user(create_admin_user_data("admin")).await?;
// Create 10 documents for user A
for i in 0..10 {
db.create_document(create_searchable_document(user_a.id, i, "text/plain")).await?;
}
// Create 5 documents for user B
for i in 10..15 {
db.create_document(create_searchable_document(user_b.id, i, "text/plain")).await?;
}
let request = SearchRequest {
query: "searchable".to_string(),
tags: None,
mime_types: None,
limit: Some(100),
offset: Some(0),
include_snippets: Some(false),
snippet_length: None,
search_mode: None,
};
// User A should see only their 10 documents
let count_a = db.count_search_documents(user_a.id, UserRole::User, &request).await?;
assert_eq!(count_a, 10, "User A should see only their 10 documents");
// User B should see only their 5 documents
let count_b = db.count_search_documents(user_b.id, UserRole::User, &request).await?;
assert_eq!(count_b, 5, "User B should see only their 5 documents");
// Admin should see all 15 documents
let count_admin = db.count_search_documents(admin.id, UserRole::Admin, &request).await?;
assert_eq!(count_admin, 15, "Admin should see all 15 documents");
Ok(())
}.await;
if let Err(e) = ctx.cleanup_and_close().await {
eprintln!("Warning: Test cleanup failed: {}", e);
}
result.unwrap();
}
/// Test pagination with text query filtering
#[tokio::test]
async fn test_pagination_with_text_query() {
let ctx = TestContext::new().await;
let result: Result<()> = async {
let db = &ctx.state.db;
let user = db.create_user(create_test_user_data("textq1")).await?;
// Create documents with different content
for i in 0..10 {
let mut doc = create_searchable_document(user.id, i, "text/plain");
if i < 6 {
doc.content = Some(format!("Document {} contains the word apple", i));
} else {
doc.content = Some(format!("Document {} contains the word orange", i));
}
db.create_document(doc).await?;
}
// Search for "apple"
let request_apple = SearchRequest {
query: "apple".to_string(),
tags: None,
mime_types: None,
limit: Some(3),
offset: Some(0),
include_snippets: Some(false),
snippet_length: None,
search_mode: None,
};
let count_apple = db.count_search_documents(user.id, UserRole::User, &request_apple).await?;
let results_apple = db.search_documents(user.id, &request_apple).await?;
assert_eq!(count_apple, 6, "Should find 6 documents with 'apple'");
assert_eq!(results_apple.len(), 3, "Should return 3 (limit)");
// Search for "orange"
let request_orange = SearchRequest {
query: "orange".to_string(),
tags: None,
mime_types: None,
limit: Some(10),
offset: Some(0),
include_snippets: Some(false),
snippet_length: None,
search_mode: None,
};
let count_orange = db.count_search_documents(user.id, UserRole::User, &request_orange).await?;
assert_eq!(count_orange, 4, "Should find 4 documents with 'orange'");
Ok(())
}.await;
if let Err(e) = ctx.cleanup_and_close().await {
eprintln!("Warning: Test cleanup failed: {}", e);
}
result.unwrap();
}
/// Test that count correctly filters by labels
#[tokio::test]
async fn test_pagination_with_label_filter() {
let ctx = TestContext::new().await;
let result: Result<()> = async {
let db = &ctx.state.db;
let user = db.create_user(create_test_user_data("label1")).await?;
// Create a label using direct SQL (no db.create_label method exists)
let label_id = Uuid::new_v4();
sqlx::query(
r#"
INSERT INTO labels (id, user_id, name, description, color, is_system)
VALUES ($1, $2, $3, $4, $5, $6)
"#
)
.bind(label_id)
.bind(user.id)
.bind("important")
.bind("Important documents")
.bind("#ff0000")
.bind(false)
.execute(db.get_pool())
.await?;
// Create 10 documents, assign label to 6 of them
for i in 0..10 {
let doc = db.create_document(create_searchable_document(user.id, i, "text/plain")).await?;
if i < 6 {
// Assign label to first 6 documents
sqlx::query(
"INSERT INTO document_labels (document_id, label_id, assigned_by) VALUES ($1, $2, $3)"
)
.bind(doc.id)
.bind(label_id)
.bind(user.id)
.execute(db.get_pool())
.await?;
}
}
// Filter by label name
let request = SearchRequest {
query: "searchable".to_string(),
tags: Some(vec!["important".to_string()]),
mime_types: None,
limit: Some(3),
offset: Some(0),
include_snippets: Some(false),
snippet_length: None,
search_mode: None,
};
let count = db.count_search_documents(user.id, UserRole::User, &request).await?;
let results = db.search_documents(user.id, &request).await?;
assert_eq!(count, 6, "Count should be 6 (only labeled docs), not 10 (all docs)");
assert_eq!(results.len(), 3, "Results should respect limit of 3");
// Test with non-existent label
let request_none = SearchRequest {
query: "searchable".to_string(),
tags: Some(vec!["nonexistent".to_string()]),
mime_types: None,
limit: Some(3),
offset: Some(0),
include_snippets: Some(false),
snippet_length: None,
search_mode: None,
};
let count_none = db.count_search_documents(user.id, UserRole::User, &request_none).await?;
assert_eq!(count_none, 0, "Count should be 0 for non-existent label");
Ok(())
}.await;
if let Err(e) = ctx.cleanup_and_close().await {
eprintln!("Warning: Test cleanup failed: {}", e);
}
result.unwrap();
}
/// Test filter-only search (empty query with MIME filter)
#[tokio::test]
async fn test_pagination_filter_only_no_query() {
let ctx = TestContext::new().await;
let result: Result<()> = async {
let db = &ctx.state.db;
let user = db.create_user(create_test_user_data("filteronly1")).await?;
// Create mixed documents
for i in 0..8 {
db.create_document(create_searchable_document(user.id, i, "text/plain")).await?;
}
for i in 8..12 {
db.create_document(create_searchable_document(user.id, i, "image/png")).await?;
}
// Filter by MIME type only (no text query)
let request = SearchRequest {
query: String::new(), // Empty query
tags: None,
mime_types: Some(vec!["image/png".to_string()]),
limit: Some(2),
offset: Some(0),
include_snippets: Some(false),
snippet_length: None,
search_mode: None,
};
let count = db.count_search_documents(user.id, UserRole::User, &request).await?;
assert_eq!(count, 4, "Should count 4 PNG images with empty query");
Ok(())
}.await;
if let Err(e) = ctx.cleanup_and_close().await {
eprintln!("Warning: Test cleanup failed: {}", e);
}
result.unwrap();
}
}