Add configurable duplicate detection fields for CSV client import

- Add duplicate_detection_fields parameter to import_csv_clients function
- Allow users to specify which fields to use for duplicate detection (name, custom fields, or both)
- Update API route to accept duplicate_detection_fields query parameter
- Add UI controls for selecting duplicate detection fields:
  - Checkbox to include/exclude client name
  - Text input for custom field names (comma-separated)
- Default behavior remains backward compatible (checks name + all custom fields if not specified)
- Enables use cases like detecting duplicates by debtor_number only, allowing multiple clients with same name but different debtor numbers
This commit is contained in:
Dries Peeters
2025-12-01 14:38:33 +01:00
parent de266dbf7d
commit 73b4129662
3 changed files with 104 additions and 20 deletions

View File

@@ -591,7 +591,12 @@ def import_csv_clients_route():
Import clients from CSV file
Expected multipart/form-data with 'file' field
Optional query parameter: skip_duplicates (default: true)
Optional query parameters:
- skip_duplicates (default: true): Whether to skip duplicate clients
- duplicate_detection_fields: Comma-separated list of fields to use for duplicate detection.
Can include 'name' for client name, or custom field names (e.g., 'debtor_number').
Examples: "debtor_number", "name,debtor_number", "erp_id"
If not provided, defaults to checking by name and all custom fields found in CSV.
"""
try:
if "file" not in request.files:
@@ -606,6 +611,13 @@ def import_csv_clients_route():
return jsonify({"error": "File must be a CSV"}), 400
skip_duplicates = request.args.get("skip_duplicates", "true").lower() == "true"
# Parse duplicate detection fields
duplicate_detection_fields = None
duplicate_fields_param = request.args.get("duplicate_detection_fields", "").strip()
if duplicate_fields_param:
# Support comma-separated list: "debtor_number,erp_id" or "name,debtor_number"
duplicate_detection_fields = [field.strip() for field in duplicate_fields_param.split(",") if field.strip()]
# Read file content
file_bytes = file.read()
@@ -637,7 +649,11 @@ def import_csv_clients_route():
# Perform import
try:
summary = import_csv_clients(
user_id=current_user.id, csv_content=csv_content, import_record=import_record, skip_duplicates=skip_duplicates
user_id=current_user.id,
csv_content=csv_content,
import_record=import_record,
skip_duplicates=skip_duplicates,
duplicate_detection_fields=duplicate_detection_fields
)
response = jsonify({"success": True, "import_id": import_record.id, "summary": summary})
response.headers["Content-Type"] = "application/json"

View File

@@ -57,11 +57,23 @@
<i class="fas fa-download mr-1"></i>{{ _('Download Template') }}
</a>
</div>
<div class="mt-2">
<div class="mt-2 space-y-2">
<label class="flex items-center space-x-2 text-sm text-gray-600 dark:text-gray-400">
<input type="checkbox" id="skip-duplicates" checked class="rounded">
<span>{{ _('Skip duplicate clients (by name or custom field)') }}</span>
<span>{{ _('Skip duplicate clients') }}</span>
</label>
<div id="duplicate-detection-fields" class="ml-6 space-y-2 text-sm text-gray-600 dark:text-gray-400">
<div class="font-medium">{{ _('Use these fields for duplicate detection:') }}</div>
<label class="flex items-center space-x-2">
<input type="checkbox" id="dup-field-name" class="rounded dup-field-checkbox">
<span>{{ _('Client Name') }}</span>
</label>
<div class="flex items-center space-x-2">
<label for="dup-custom-fields" class="text-sm">{{ _('Custom fields (comma-separated):') }}</label>
<input type="text" id="dup-custom-fields" placeholder="{{ _('e.g., debtor_number, erp_id') }}" class="flex-1 px-2 py-1 text-sm border border-gray-300 dark:border-gray-600 rounded-md dark:bg-gray-700 dark:text-white">
</div>
<div class="text-xs text-gray-500 dark:text-gray-500">{{ _('Example: Enter "debtor_number" to detect duplicates by debtor number only (not by name)') }}</div>
</div>
</div>
<div id="csv-clients-upload-status" class="mt-2 text-sm"></div>
</div>
@@ -328,8 +340,23 @@ async function handleCsvClientsUpload(input) {
formData.append('csrf_token', csrfToken);
}
// Collect duplicate detection fields
const duplicateFields = [];
if (document.getElementById('dup-field-name').checked) {
duplicateFields.push('name');
}
const customFieldsInput = document.getElementById('dup-custom-fields').value.trim();
if (customFieldsInput) {
// Split by comma and add each field
const customFields = customFieldsInput.split(',').map(f => f.trim()).filter(f => f);
duplicateFields.push(...customFields);
}
try {
const url = `/api/import/csv/clients?skip_duplicates=${skipDuplicates}`;
let url = `/api/import/csv/clients?skip_duplicates=${skipDuplicates}`;
if (duplicateFields.length > 0) {
url += `&duplicate_detection_fields=${encodeURIComponent(duplicateFields.join(','))}`;
}
const response = await fetch(url, {
method: 'POST',
body: formData
@@ -647,6 +674,25 @@ function showFilteredExportForm() {
document.addEventListener('DOMContentLoaded', function() {
loadImportHistory();
loadExportHistory();
// Toggle duplicate detection fields visibility based on skip-duplicates checkbox
const skipDuplicatesCheckbox = document.getElementById('skip-duplicates');
const duplicateDetectionFields = document.getElementById('duplicate-detection-fields');
function toggleDuplicateFields() {
if (skipDuplicatesCheckbox && duplicateDetectionFields) {
if (skipDuplicatesCheckbox.checked) {
duplicateDetectionFields.style.display = 'block';
} else {
duplicateDetectionFields.style.display = 'none';
}
}
}
if (skipDuplicatesCheckbox && duplicateDetectionFields) {
skipDuplicatesCheckbox.addEventListener('change', toggleDuplicateFields);
toggleDuplicateFields(); // Initialize on page load
}
});
</script>
{% endblock %}

View File

@@ -560,7 +560,7 @@ def _parse_datetime(datetime_str):
return None
def import_csv_clients(user_id, csv_content, import_record, skip_duplicates=True):
def import_csv_clients(user_id, csv_content, import_record, skip_duplicates=True, duplicate_detection_fields=None):
"""
Import clients from CSV file
@@ -579,7 +579,11 @@ def import_csv_clients(user_id, csv_content, import_record, skip_duplicates=True
user_id: ID of the user importing data
csv_content: String content of CSV file
import_record: DataImport model instance to track progress
skip_duplicates: If True, skip clients that already exist (by name or custom field match)
skip_duplicates: If True, skip clients that already exist
duplicate_detection_fields: List of field names to use for duplicate detection.
Can include 'name' for client name, or custom field names (e.g., 'debtor_number').
If None, defaults to ['name'] plus all custom fields found in the CSV.
Examples: ['debtor_number'], ['name', 'debtor_number'], ['erp_id']
Returns:
Dictionary with import statistics
@@ -617,24 +621,42 @@ def import_csv_clients(user_id, csv_content, import_record, skip_duplicates=True
# Check for duplicates if skip_duplicates is True
if skip_duplicates:
existing_client = Client.query.filter_by(name=client_name).first()
existing_client = None
# Also check by custom fields if provided (e.g., ERP ID)
if not existing_client:
# Look for common custom field keys that might indicate duplicates
# Determine which fields to use for duplicate detection
if duplicate_detection_fields is not None:
# Use explicitly specified fields
detection_fields = duplicate_detection_fields
else:
# Default: check by name + all custom fields found in CSV
detection_fields = ['name']
# Add all custom fields found in CSV
for key in row.keys():
if key.startswith("custom_field_"):
field_name = key.replace("custom_field_", "")
field_value = row.get(key, "").strip()
if field_value:
# Check if any client has this custom field value
all_clients = Client.query.all()
for client in all_clients:
if client.custom_fields and client.custom_fields.get(field_name) == field_value:
existing_client = client
break
if existing_client:
if field_name not in detection_fields:
detection_fields.append(field_name)
# Check each specified field for duplicates
for field in detection_fields:
if field == 'name':
# Check by client name
existing_client = Client.query.filter_by(name=client_name).first()
if existing_client:
break
else:
# Check by custom field
csv_key = f"custom_field_{field}"
field_value = row.get(csv_key, "").strip()
if field_value:
# Check if any client has this custom field value
all_clients = Client.query.all()
for client in all_clients:
if client.custom_fields and client.custom_fields.get(field) == field_value:
existing_client = client
break
if existing_client:
break
if existing_client:
skipped += 1