fix(bookdrop): improve pattern extractor date extraction (#2032)

* fix: add support for yyyy-MM formats and add tests for new edge case

* fix: add support for MM-yyyy formats
This commit is contained in:
CounterClops
2025-12-29 20:29:55 +08:00
committed by GitHub
parent d9ff9468bd
commit 91f0e86b00
2 changed files with 113 additions and 2 deletions

View File

@@ -13,6 +13,8 @@ import org.springframework.transaction.annotation.Transactional;
import jakarta.annotation.PreDestroy;
import java.time.LocalDate;
import java.time.Year;
import java.time.YearMonth;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.*;
@@ -63,6 +65,8 @@ public class FilenamePatternExtractor {
private static final Pattern FOUR_DIGIT_YEAR_PATTERN = Pattern.compile("\\d{4}");
private static final Pattern TWO_DIGIT_YEAR_PATTERN = Pattern.compile("\\d{2}");
private static final Pattern COMPACT_DATE_PATTERN = Pattern.compile("\\d{8}");
private static final Pattern YEAR_MONTH_PATTERN = Pattern.compile("(\\d{4})([^\\d])(\\d{1,2})");
private static final Pattern MONTH_YEAR_PATTERN = Pattern.compile("(\\d{1,2})([^\\d])(\\d{4})");
private static final Pattern FLEXIBLE_DATE_PATTERN = Pattern.compile("(\\d{1,4})([^\\d])(\\d{1,2})\\2(\\d{1,4})");
@Transactional
@@ -456,18 +460,32 @@ public class FilenamePatternExtractor {
}
try {
if ("yyyy".equals(detectedFormat) || "yy".equals(detectedFormat)) {
if ("yyyy".equals(detectedFormat)) {
Year year = Year.parse(value, DateTimeFormatter.ofPattern("yyyy"));
metadata.setPublishedDate(year.atMonthDay(java.time.MonthDay.of(1, 1)));
return;
}
if ("yy".equals(detectedFormat)) {
int year = Integer.parseInt(value);
if ("yy".equals(detectedFormat) && year < 100) {
if (year < 100) {
year += (year < TWO_DIGIT_YEAR_CUTOFF) ? 2000 : TWO_DIGIT_YEAR_CENTURY_BASE;
}
metadata.setPublishedDate(LocalDate.of(year, 1, 1));
return;
}
if (isYearMonthFormat(detectedFormat)) {
DateTimeFormatter formatter = DateTimeFormatter.ofPattern(detectedFormat);
YearMonth yearMonth = YearMonth.parse(value, formatter);
metadata.setPublishedDate(yearMonth.atDay(1));
return;
}
DateTimeFormatter formatter = DateTimeFormatter.ofPattern(detectedFormat);
LocalDate date = LocalDate.parse(value, formatter);
metadata.setPublishedDate(date);
return;
} catch (NumberFormatException e) {
log.warn("Failed to parse year value '{}': {}", value, e.getMessage());
} catch (DateTimeParseException e) {
@@ -497,6 +515,22 @@ public class FilenamePatternExtractor {
return "yyyyMMdd";
}
Matcher yearMonthMatcher = YEAR_MONTH_PATTERN.matcher(trimmed);
if (yearMonthMatcher.matches()) {
String separator = yearMonthMatcher.group(2);
String monthPart = yearMonthMatcher.group(3);
String monthFormat = monthPart.length() == 1 ? "M" : "MM";
return "yyyy" + separator + monthFormat;
}
Matcher monthYearMatcher = MONTH_YEAR_PATTERN.matcher(trimmed);
if (monthYearMatcher.matches()) {
String monthPart = monthYearMatcher.group(1);
String separator = monthYearMatcher.group(2);
String monthFormat = monthPart.length() == 1 ? "M" : "MM";
return monthFormat + separator + "yyyy";
}
Matcher flexibleMatcher = FLEXIBLE_DATE_PATTERN.matcher(trimmed);
if (flexibleMatcher.matches()) {
String separator = flexibleMatcher.group(2);
@@ -619,6 +653,13 @@ public class FilenamePatternExtractor {
bookdropFileRepository.saveAll(filesToSave);
}
}
private boolean isYearMonthFormat(String format) {
return format != null &&
(format.contains("y") || format.contains("Y")) &&
(format.contains("M")) &&
!(format.contains("d") || format.contains("D"));
}
private record PlaceholderConfig(String regex, String metadataField) {}

View File

@@ -507,6 +507,48 @@ class FilenamePatternExtractorTest {
assertEquals(15, result.getPublishedDate().getDayOfMonth());
}
@Test
void extractFromFilename_WithPublishedYearMonth_ShouldExtractAndDefaultToFirstDay() {
String filename = "The Lost City (2012-05).epub";
String pattern = "{Title} ({Published:yyyy-MM})";
BookMetadata result = extractor.extractFromFilename(filename, pattern);
assertNotNull(result);
assertEquals("The Lost City", result.getTitle());
assertEquals(2012, result.getPublishedDate().getYear());
assertEquals(5, result.getPublishedDate().getMonthValue());
assertEquals(1, result.getPublishedDate().getDayOfMonth());
}
@Test
void extractFromFilename_WithPublishedYearMonthDots_ShouldExtractAndDefaultToFirstDay() {
String filename = "Chronicles of Tomorrow (2025.12).epub";
String pattern = "{Title} ({Published:yyyy.MM})";
BookMetadata result = extractor.extractFromFilename(filename, pattern);
assertNotNull(result);
assertEquals("Chronicles of Tomorrow", result.getTitle());
assertEquals(2025, result.getPublishedDate().getYear());
assertEquals(12, result.getPublishedDate().getMonthValue());
assertEquals(1, result.getPublishedDate().getDayOfMonth());
}
@Test
void extractFromFilename_WithPublishedMonthYear_ShouldExtractAndDefaultToFirstDay() {
String filename = "The Lost City (05-2012).epub";
String pattern = "{Title} ({Published:MM-yyyy})";
BookMetadata result = extractor.extractFromFilename(filename, pattern);
assertNotNull(result);
assertEquals("The Lost City", result.getTitle());
assertEquals(2012, result.getPublishedDate().getYear());
assertEquals(5, result.getPublishedDate().getMonthValue());
assertEquals(1, result.getPublishedDate().getDayOfMonth());
}
@Test
void extractFromFilename_PublishedWithoutFormat_AutoDetectsISODate() {
String filename = "The Lost City (2023-05-15).epub";
@@ -561,6 +603,34 @@ class FilenamePatternExtractorTest {
assertEquals(1999, result.getPublishedDate().getYear());
}
@Test
void extractFromFilename_PublishedWithoutFormat_AutoDetectsYearMonth() {
String filename = "The Lost City (2012-05).epub";
String pattern = "{Title} ({Published})";
BookMetadata result = extractor.extractFromFilename(filename, pattern);
assertNotNull(result);
assertEquals("The Lost City", result.getTitle());
assertEquals(2012, result.getPublishedDate().getYear());
assertEquals(5, result.getPublishedDate().getMonthValue());
assertEquals(1, result.getPublishedDate().getDayOfMonth());
}
@Test
void extractFromFilename_PublishedWithoutFormat_AutoDetectsMonthYear() {
String filename = "Chronicles of Earth (05-2012).epub";
String pattern = "{Title} ({Published})";
BookMetadata result = extractor.extractFromFilename(filename, pattern);
assertNotNull(result);
assertEquals("Chronicles of Earth", result.getTitle());
assertEquals(2012, result.getPublishedDate().getYear());
assertEquals(5, result.getPublishedDate().getMonthValue());
assertEquals(1, result.getPublishedDate().getDayOfMonth());
}
@Test
void extractFromFilename_PublishedWithoutFormat_AutoDetectsFlexibleFormat() {
String filename = "Tomorrow (15|05|2023).epub";