fix(metadata): fallback to filename for title extraction in PDF and EPUB metadata (#1603)

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
Balázs Szücs
2025-11-24 15:17:23 +01:00
committed by GitHub
parent 0190ba8677
commit dddc6d4acf
9 changed files with 115 additions and 2 deletions
@@ -25,5 +25,6 @@ public class KoboSnapshotBookEntity {
private Long bookId;
@Column(nullable = false)
@Builder.Default
private boolean synced = false;
}
@@ -23,9 +23,11 @@ public class KoboUserSettingsEntity {
private String token;
@Column(name = "sync_enabled")
@Builder.Default
private boolean syncEnabled = true;
@Column(name = "progress_mark_as_reading_threshold")
@Builder.Default
private Float progressMarkAsReadingThreshold = 1f;
@Column(name = "progress_mark_as_finished_threshold")
@@ -28,12 +28,14 @@ public class KoreaderUserEntity {
private String passwordMD5;
@Column(name = "created_at", nullable = false, updatable = false)
@Builder.Default
private Instant createdAt = Instant.now();
@Column(name = "updated_at")
private Instant updatedAt;
@Column(name = "sync_enabled", nullable = false)
@Builder.Default
private boolean syncEnabled = false;
@OneToOne(fetch = FetchType.LAZY)
@@ -37,9 +37,11 @@ public class MagicShelfEntity {
private boolean isPublic = false;
@Column(name = "created_at", nullable = false, updatable = false)
@lombok.Builder.Default
private LocalDateTime createdAt = LocalDateTime.now();
@Column(name = "updated_at", nullable = false)
@lombok.Builder.Default
private LocalDateTime updatedAt = LocalDateTime.now();
@PreUpdate
@@ -29,6 +29,7 @@ public class RefreshTokenEntity {
private Instant expiryDate;
@Column(nullable = false)
@Builder.Default
private boolean revoked = false;
@Column(name = "revocation_date")
@@ -21,30 +21,39 @@ public class UserPermissionsEntity {
private BookLoreUserEntity user;
@Column(name = "permission_upload", nullable = false)
@Builder.Default
private boolean permissionUpload = false;
@Column(name = "permission_download", nullable = false)
@Builder.Default
private boolean permissionDownload = false;
@Column(name = "permission_edit_metadata", nullable = false)
@Builder.Default
private boolean permissionEditMetadata = false;
@Column(name = "permission_manipulate_library", nullable = false)
@Builder.Default
private boolean permissionManipulateLibrary = false;
@Column(name = "permission_email_book", nullable = false)
@Builder.Default
private boolean permissionEmailBook = false;
@Column(name = "permission_delete_book", nullable = false)
@Builder.Default
private boolean permissionDeleteBook = false;
@Column(name = "permission_sync_koreader", nullable = false)
@Builder.Default
private boolean permissionSyncKoreader = false;
@Column(name = "permission_access_opds", nullable = false)
@Builder.Default
private boolean permissionAccessOpds = false;
@Column(name = "permission_sync_kobo", nullable = false)
@Builder.Default
private boolean permissionSyncKobo = false;
@Column(name = "permission_admin", nullable = false)
@@ -6,6 +6,7 @@ import io.documentnode.epub4j.epub.EpubReader;
import lombok.extern.slf4j.Slf4j;
import net.lingala.zip4j.ZipFile;
import net.lingala.zip4j.model.FileHeader;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.boot.configurationprocessor.json.JSONException;
import org.springframework.boot.configurationprocessor.json.JSONObject;
@@ -201,7 +202,15 @@ public class EpubMetadataExtractor implements FileMetadataExtractor {
builderMeta.authors(authors);
builderMeta.categories(categories);
return builderMeta.build();
BookMetadata extractedMetadata = builderMeta.build();
// Fallback to filename if no title found in EPUB metadata
if (StringUtils.isBlank(extractedMetadata.getTitle())) {
builderMeta.title(FilenameUtils.getBaseName(epubFile.getName()));
extractedMetadata = builderMeta.build();
}
return extractedMetadata;
}
}
@@ -3,6 +3,7 @@ package com.adityachandel.booklore.service.metadata.extractor;
import com.adityachandel.booklore.model.dto.BookMetadata;
import com.adityachandel.booklore.util.FileUtils;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.Loader;
@@ -73,7 +74,7 @@ public class PdfMetadataExtractor implements FileMetadataExtractor {
if (StringUtils.isNotBlank(info.getTitle())) {
metadataBuilder.title(info.getTitle());
} else {
metadataBuilder.title(file.getName());
metadataBuilder.title(FilenameUtils.getBaseName(file.getName()));
}
if (StringUtils.isNotBlank(info.getAuthor())) {
@@ -0,0 +1,86 @@
package com.adityachandel.booklore.service.metadata.extractor;
import com.adityachandel.booklore.model.dto.BookMetadata;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.PDPage;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import static org.junit.jupiter.api.Assertions.assertEquals;
class PdfMetadataExtractorTest {
private PdfMetadataExtractor extractor;
@TempDir
Path tempDir;
@BeforeEach
void setUp() {
extractor = new PdfMetadataExtractor();
}
@Test
void extractMetadata_shouldUseTitleFromMetadata_whenAvailable() throws IOException {
// Arrange: Create a PDF with an explicit Title in metadata
File pdfFile = tempDir.resolve("ignored-filename.pdf").toFile();
try (PDDocument doc = new PDDocument()) {
doc.addPage(new PDPage());
PDDocumentInformation info = new PDDocumentInformation();
info.setTitle("The Real Book Title");
doc.setDocumentInformation(info);
doc.save(pdfFile);
}
// Act
BookMetadata result = extractor.extractMetadata(pdfFile);
// Assert: Metadata title takes precedence over filename
assertEquals("The Real Book Title", result.getTitle());
}
@Test
void extractMetadata_shouldUseFilenameWithoutExtension_whenMetadataMissing() throws IOException {
// Arrange: Create a PDF with NO metadata title
// Name the file "Dune.pdf"
File pdfFile = tempDir.resolve("Dune.pdf").toFile();
try (PDDocument doc = new PDDocument()) {
doc.addPage(new PDPage());
// explicitly leaving metadata empty
doc.save(pdfFile);
}
// Act
BookMetadata result = extractor.extractMetadata(pdfFile);
// Assert: The extension ".pdf" should be stripped
assertEquals("Dune", result.getTitle());
}
@Test
void extractMetadata_shouldHandleSpacesAndSpecialCharsInFilename() throws IOException {
// Arrange
File pdfFile = tempDir.resolve("Harry Potter and the Sorcerer's Stone.pdf").toFile();
try (PDDocument doc = new PDDocument()) {
doc.addPage(new PDPage());
doc.save(pdfFile);
}
// Act
BookMetadata result = extractor.extractMetadata(pdfFile);
// Assert
assertEquals("Harry Potter and the Sorcerer's Stone", result.getTitle());
}
}