mirror of
https://github.com/adityachandelgit/BookLore.git
synced 2026-01-06 05:59:45 -06:00
fix(bookdrop): improve pattern extractor date extraction (#2032)
* fix: add support for yyyy-MM formats and add tests for new edge case * fix: add support for MM-yyyy formats
This commit is contained in:
@@ -13,6 +13,8 @@ import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
import jakarta.annotation.PreDestroy;
|
||||
import java.time.LocalDate;
|
||||
import java.time.Year;
|
||||
import java.time.YearMonth;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import java.util.*;
|
||||
@@ -63,6 +65,8 @@ public class FilenamePatternExtractor {
|
||||
private static final Pattern FOUR_DIGIT_YEAR_PATTERN = Pattern.compile("\\d{4}");
|
||||
private static final Pattern TWO_DIGIT_YEAR_PATTERN = Pattern.compile("\\d{2}");
|
||||
private static final Pattern COMPACT_DATE_PATTERN = Pattern.compile("\\d{8}");
|
||||
private static final Pattern YEAR_MONTH_PATTERN = Pattern.compile("(\\d{4})([^\\d])(\\d{1,2})");
|
||||
private static final Pattern MONTH_YEAR_PATTERN = Pattern.compile("(\\d{1,2})([^\\d])(\\d{4})");
|
||||
private static final Pattern FLEXIBLE_DATE_PATTERN = Pattern.compile("(\\d{1,4})([^\\d])(\\d{1,2})\\2(\\d{1,4})");
|
||||
|
||||
@Transactional
|
||||
@@ -456,18 +460,32 @@ public class FilenamePatternExtractor {
|
||||
}
|
||||
|
||||
try {
|
||||
if ("yyyy".equals(detectedFormat) || "yy".equals(detectedFormat)) {
|
||||
if ("yyyy".equals(detectedFormat)) {
|
||||
Year year = Year.parse(value, DateTimeFormatter.ofPattern("yyyy"));
|
||||
metadata.setPublishedDate(year.atMonthDay(java.time.MonthDay.of(1, 1)));
|
||||
return;
|
||||
}
|
||||
|
||||
if ("yy".equals(detectedFormat)) {
|
||||
int year = Integer.parseInt(value);
|
||||
if ("yy".equals(detectedFormat) && year < 100) {
|
||||
if (year < 100) {
|
||||
year += (year < TWO_DIGIT_YEAR_CUTOFF) ? 2000 : TWO_DIGIT_YEAR_CENTURY_BASE;
|
||||
}
|
||||
metadata.setPublishedDate(LocalDate.of(year, 1, 1));
|
||||
return;
|
||||
}
|
||||
|
||||
if (isYearMonthFormat(detectedFormat)) {
|
||||
DateTimeFormatter formatter = DateTimeFormatter.ofPattern(detectedFormat);
|
||||
YearMonth yearMonth = YearMonth.parse(value, formatter);
|
||||
metadata.setPublishedDate(yearMonth.atDay(1));
|
||||
return;
|
||||
}
|
||||
|
||||
DateTimeFormatter formatter = DateTimeFormatter.ofPattern(detectedFormat);
|
||||
LocalDate date = LocalDate.parse(value, formatter);
|
||||
metadata.setPublishedDate(date);
|
||||
return;
|
||||
} catch (NumberFormatException e) {
|
||||
log.warn("Failed to parse year value '{}': {}", value, e.getMessage());
|
||||
} catch (DateTimeParseException e) {
|
||||
@@ -497,6 +515,22 @@ public class FilenamePatternExtractor {
|
||||
return "yyyyMMdd";
|
||||
}
|
||||
|
||||
Matcher yearMonthMatcher = YEAR_MONTH_PATTERN.matcher(trimmed);
|
||||
if (yearMonthMatcher.matches()) {
|
||||
String separator = yearMonthMatcher.group(2);
|
||||
String monthPart = yearMonthMatcher.group(3);
|
||||
String monthFormat = monthPart.length() == 1 ? "M" : "MM";
|
||||
return "yyyy" + separator + monthFormat;
|
||||
}
|
||||
|
||||
Matcher monthYearMatcher = MONTH_YEAR_PATTERN.matcher(trimmed);
|
||||
if (monthYearMatcher.matches()) {
|
||||
String monthPart = monthYearMatcher.group(1);
|
||||
String separator = monthYearMatcher.group(2);
|
||||
String monthFormat = monthPart.length() == 1 ? "M" : "MM";
|
||||
return monthFormat + separator + "yyyy";
|
||||
}
|
||||
|
||||
Matcher flexibleMatcher = FLEXIBLE_DATE_PATTERN.matcher(trimmed);
|
||||
if (flexibleMatcher.matches()) {
|
||||
String separator = flexibleMatcher.group(2);
|
||||
@@ -619,6 +653,13 @@ public class FilenamePatternExtractor {
|
||||
bookdropFileRepository.saveAll(filesToSave);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isYearMonthFormat(String format) {
|
||||
return format != null &&
|
||||
(format.contains("y") || format.contains("Y")) &&
|
||||
(format.contains("M")) &&
|
||||
!(format.contains("d") || format.contains("D"));
|
||||
}
|
||||
|
||||
private record PlaceholderConfig(String regex, String metadataField) {}
|
||||
|
||||
|
||||
@@ -507,6 +507,48 @@ class FilenamePatternExtractorTest {
|
||||
assertEquals(15, result.getPublishedDate().getDayOfMonth());
|
||||
}
|
||||
|
||||
@Test
|
||||
void extractFromFilename_WithPublishedYearMonth_ShouldExtractAndDefaultToFirstDay() {
|
||||
String filename = "The Lost City (2012-05).epub";
|
||||
String pattern = "{Title} ({Published:yyyy-MM})";
|
||||
|
||||
BookMetadata result = extractor.extractFromFilename(filename, pattern);
|
||||
|
||||
assertNotNull(result);
|
||||
assertEquals("The Lost City", result.getTitle());
|
||||
assertEquals(2012, result.getPublishedDate().getYear());
|
||||
assertEquals(5, result.getPublishedDate().getMonthValue());
|
||||
assertEquals(1, result.getPublishedDate().getDayOfMonth());
|
||||
}
|
||||
|
||||
@Test
|
||||
void extractFromFilename_WithPublishedYearMonthDots_ShouldExtractAndDefaultToFirstDay() {
|
||||
String filename = "Chronicles of Tomorrow (2025.12).epub";
|
||||
String pattern = "{Title} ({Published:yyyy.MM})";
|
||||
|
||||
BookMetadata result = extractor.extractFromFilename(filename, pattern);
|
||||
|
||||
assertNotNull(result);
|
||||
assertEquals("Chronicles of Tomorrow", result.getTitle());
|
||||
assertEquals(2025, result.getPublishedDate().getYear());
|
||||
assertEquals(12, result.getPublishedDate().getMonthValue());
|
||||
assertEquals(1, result.getPublishedDate().getDayOfMonth());
|
||||
}
|
||||
|
||||
@Test
|
||||
void extractFromFilename_WithPublishedMonthYear_ShouldExtractAndDefaultToFirstDay() {
|
||||
String filename = "The Lost City (05-2012).epub";
|
||||
String pattern = "{Title} ({Published:MM-yyyy})";
|
||||
|
||||
BookMetadata result = extractor.extractFromFilename(filename, pattern);
|
||||
|
||||
assertNotNull(result);
|
||||
assertEquals("The Lost City", result.getTitle());
|
||||
assertEquals(2012, result.getPublishedDate().getYear());
|
||||
assertEquals(5, result.getPublishedDate().getMonthValue());
|
||||
assertEquals(1, result.getPublishedDate().getDayOfMonth());
|
||||
}
|
||||
|
||||
@Test
|
||||
void extractFromFilename_PublishedWithoutFormat_AutoDetectsISODate() {
|
||||
String filename = "The Lost City (2023-05-15).epub";
|
||||
@@ -561,6 +603,34 @@ class FilenamePatternExtractorTest {
|
||||
assertEquals(1999, result.getPublishedDate().getYear());
|
||||
}
|
||||
|
||||
@Test
|
||||
void extractFromFilename_PublishedWithoutFormat_AutoDetectsYearMonth() {
|
||||
String filename = "The Lost City (2012-05).epub";
|
||||
String pattern = "{Title} ({Published})";
|
||||
|
||||
BookMetadata result = extractor.extractFromFilename(filename, pattern);
|
||||
|
||||
assertNotNull(result);
|
||||
assertEquals("The Lost City", result.getTitle());
|
||||
assertEquals(2012, result.getPublishedDate().getYear());
|
||||
assertEquals(5, result.getPublishedDate().getMonthValue());
|
||||
assertEquals(1, result.getPublishedDate().getDayOfMonth());
|
||||
}
|
||||
|
||||
@Test
|
||||
void extractFromFilename_PublishedWithoutFormat_AutoDetectsMonthYear() {
|
||||
String filename = "Chronicles of Earth (05-2012).epub";
|
||||
String pattern = "{Title} ({Published})";
|
||||
|
||||
BookMetadata result = extractor.extractFromFilename(filename, pattern);
|
||||
|
||||
assertNotNull(result);
|
||||
assertEquals("Chronicles of Earth", result.getTitle());
|
||||
assertEquals(2012, result.getPublishedDate().getYear());
|
||||
assertEquals(5, result.getPublishedDate().getMonthValue());
|
||||
assertEquals(1, result.getPublishedDate().getDayOfMonth());
|
||||
}
|
||||
|
||||
@Test
|
||||
void extractFromFilename_PublishedWithoutFormat_AutoDetectsFlexibleFormat() {
|
||||
String filename = "Tomorrow (15|05|2023).epub";
|
||||
|
||||
Reference in New Issue
Block a user