mirror of
https://github.com/adityachandelgit/BookLore.git
synced 2026-01-06 06:50:42 -06:00
fix(epub): fix and improve cover image extraction from EPUB files (specifically support for: EPUB 3 specification) (#1773)
* fix(epub): improve cover image extraction from EPUB files Signed-off-by: Balázs Szücs <bszucs1209@gmail.com> * fix(epub): enhance cover image extraction and path resolution Signed-off-by: Balázs Szücs <bszucs1209@gmail.com> --------- Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
@@ -14,14 +14,13 @@ import com.adityachandel.booklore.service.metadata.MetadataMatchService;
|
||||
import com.adityachandel.booklore.service.metadata.extractor.EpubMetadataExtractor;
|
||||
import com.adityachandel.booklore.util.FileService;
|
||||
import com.adityachandel.booklore.util.FileUtils;
|
||||
import io.documentnode.epub4j.domain.Resource;
|
||||
import io.documentnode.epub4j.epub.EpubReader;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.*;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
@@ -63,34 +62,28 @@ public class EpubProcessor extends AbstractFileProcessor implements BookFileProc
|
||||
public boolean generateCover(BookEntity bookEntity) {
|
||||
try {
|
||||
File epubFile = new File(FileUtils.getBookFullPath(bookEntity));
|
||||
io.documentnode.epub4j.domain.Book epub;
|
||||
try (FileInputStream fis = new FileInputStream(epubFile)) {
|
||||
epub = new EpubReader().readEpub(fis);
|
||||
}
|
||||
Resource coverImage = epub.getCoverImage();
|
||||
byte[] coverData = epubMetadataExtractor.extractCover(epubFile);
|
||||
|
||||
if (coverImage == null) {
|
||||
for (Resource res : epub.getResources().getAll()) {
|
||||
String id = res.getId();
|
||||
String href = res.getHref();
|
||||
if ((id != null && id.toLowerCase().contains("cover")) ||
|
||||
(href != null && href.toLowerCase().contains("cover"))) {
|
||||
if (res.getMediaType() != null && res.getMediaType().getName().startsWith("image")) {
|
||||
coverImage = res;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (coverImage == null) {
|
||||
if (coverData == null) {
|
||||
log.warn("No cover image found in EPUB '{}'", bookEntity.getFileName());
|
||||
return false;
|
||||
}
|
||||
|
||||
boolean saved = saveCoverImage(coverImage, bookEntity.getId());
|
||||
bookEntity.getMetadata().setCoverUpdatedOn(Instant.now());
|
||||
bookMetadataRepository.save(bookEntity.getMetadata());
|
||||
boolean saved;
|
||||
try (ByteArrayInputStream bais = new ByteArrayInputStream(coverData)) {
|
||||
BufferedImage originalImage = ImageIO.read(bais);
|
||||
if (originalImage == null) {
|
||||
log.warn("Cover image found but could not be decoded (possibly SVG or unsupported format) in EPUB '{}'", bookEntity.getFileName());
|
||||
return false;
|
||||
}
|
||||
saved = fileService.saveCoverImages(originalImage, bookEntity.getId());
|
||||
originalImage.flush();
|
||||
}
|
||||
|
||||
if (saved) {
|
||||
bookEntity.getMetadata().setCoverUpdatedOn(Instant.now());
|
||||
bookMetadataRepository.save(bookEntity.getMetadata());
|
||||
}
|
||||
return saved;
|
||||
|
||||
} catch (Exception e) {
|
||||
@@ -148,15 +141,4 @@ public class EpubProcessor extends AbstractFileProcessor implements BookFileProc
|
||||
bookCreatorService.addCategoriesToBook(validSubjects, bookEntity);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean saveCoverImage(Resource coverImage, long bookId) throws IOException {
|
||||
BufferedImage originalImage = ImageIO.read(new ByteArrayInputStream(coverImage.getData()));
|
||||
try {
|
||||
return fileService.saveCoverImages(originalImage, bookId);
|
||||
} finally {
|
||||
if (originalImage != null) {
|
||||
originalImage.flush(); // Release resources after processing
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -41,6 +41,14 @@ public class EpubMetadataExtractor implements FileMetadataExtractor {
|
||||
Book epub = new EpubReader().readEpub(fis);
|
||||
io.documentnode.epub4j.domain.Resource coverImage = epub.getCoverImage();
|
||||
|
||||
if (coverImage == null) {
|
||||
String coverHref = findCoverImageHrefInOpf(epubFile);
|
||||
if (coverHref != null) {
|
||||
byte[] data = extractFileFromZip(epubFile, coverHref);
|
||||
if (data != null) return data;
|
||||
}
|
||||
}
|
||||
|
||||
if (coverImage == null) {
|
||||
for (io.documentnode.epub4j.domain.Resource res : epub.getResources().getAll()) {
|
||||
String id = res.getId();
|
||||
@@ -297,4 +305,82 @@ public class EpubMetadataExtractor implements FileMetadataExtractor {
|
||||
log.warn("Failed to parse date from string: {}", value);
|
||||
return null;
|
||||
}
|
||||
|
||||
private String findCoverImageHrefInOpf(File epubFile) {
|
||||
try (ZipFile zip = new ZipFile(epubFile)) {
|
||||
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
|
||||
dbf.setNamespaceAware(true);
|
||||
dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
|
||||
DocumentBuilder builder = dbf.newDocumentBuilder();
|
||||
|
||||
FileHeader containerHdr = zip.getFileHeader("META-INF/container.xml");
|
||||
if (containerHdr == null) return null;
|
||||
|
||||
try (InputStream cis = zip.getInputStream(containerHdr)) {
|
||||
Document containerDoc = builder.parse(cis);
|
||||
NodeList roots = containerDoc.getElementsByTagName("rootfile");
|
||||
if (roots.getLength() == 0) return null;
|
||||
|
||||
String opfPath = ((Element) roots.item(0)).getAttribute("full-path");
|
||||
if (StringUtils.isBlank(opfPath)) return null;
|
||||
|
||||
FileHeader opfHdr = zip.getFileHeader(opfPath);
|
||||
if (opfHdr == null) return null;
|
||||
|
||||
try (InputStream in = zip.getInputStream(opfHdr)) {
|
||||
Document doc = builder.parse(in);
|
||||
NodeList manifestItems = doc.getElementsByTagName("item");
|
||||
|
||||
for (int i = 0; i < manifestItems.getLength(); i++) {
|
||||
Element item = (Element) manifestItems.item(i);
|
||||
String properties = item.getAttribute("properties");
|
||||
if (properties != null && properties.contains("cover-image")) {
|
||||
String href = item.getAttribute("href");
|
||||
return resolvePath(opfPath, href);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.debug("Failed to find cover image in OPF: {}", e.getMessage());
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private String resolvePath(String opfPath, String href) {
|
||||
if (href == null || href.isEmpty()) return null;
|
||||
|
||||
// If href is absolute within the zip (starts with /), return it without leading /
|
||||
if (href.startsWith("/")) return href.substring(1);
|
||||
|
||||
int lastSlash = opfPath.lastIndexOf('/');
|
||||
String basePath = (lastSlash == -1) ? "" : opfPath.substring(0, lastSlash + 1);
|
||||
|
||||
String combined = basePath + href;
|
||||
|
||||
// Normalize path components to handle ".." and "."
|
||||
java.util.LinkedList<String> parts = new java.util.LinkedList<>();
|
||||
for (String part : combined.split("/")) {
|
||||
if (part.equals("..")) {
|
||||
if (!parts.isEmpty()) parts.removeLast();
|
||||
} else if (!part.equals(".") && !part.isEmpty()) {
|
||||
parts.add(part);
|
||||
}
|
||||
}
|
||||
|
||||
return String.join("/", parts);
|
||||
}
|
||||
|
||||
private byte[] extractFileFromZip(File epubFile, String path) {
|
||||
try (ZipFile zip = new ZipFile(epubFile)) {
|
||||
FileHeader header = zip.getFileHeader(path);
|
||||
if (header == null) return null;
|
||||
try (InputStream is = zip.getInputStream(header)) {
|
||||
return is.readAllBytes();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to extract file {} from zip", path);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -412,6 +412,20 @@ class EpubMetadataExtractorTest {
|
||||
|
||||
assertNull(cover);
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should extract cover declared with properties='cover-image' even if ID/href doesn't contain 'cover'")
|
||||
void extractCover_propertiesCoverImage_returnsCoverBytes() throws IOException {
|
||||
byte[] pngImage = createMinimalPngImage();
|
||||
// Use an ID and HREF that do not contain "cover"
|
||||
File epubFile = createEpubWithPropertiesCover(pngImage, "image123", "images/img001.png");
|
||||
|
||||
byte[] cover = extractor.extractCover(epubFile);
|
||||
|
||||
assertNotNull(cover, "Cover should be extracted");
|
||||
assertTrue(cover.length > 0);
|
||||
assertEquals(pngImage.length, cover.length);
|
||||
}
|
||||
}
|
||||
|
||||
@Nested
|
||||
@@ -662,5 +676,50 @@ class EpubMetadataExtractorTest {
|
||||
|
||||
return epubFile;
|
||||
}
|
||||
|
||||
private File createEpubWithPropertiesCover(byte[] coverImageData, String id, String href) throws IOException {
|
||||
String opfContent = String.format("""
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<package xmlns="http://www.idpf.org/2007/opf" version="3.0">
|
||||
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
|
||||
<dc:title>Book with Properties Cover</dc:title>
|
||||
</metadata>
|
||||
<manifest>
|
||||
<item id="%s" href="%s" media-type="image/png" properties="cover-image"/>
|
||||
</manifest>
|
||||
</package>
|
||||
""", id, href);
|
||||
|
||||
File epubFile = tempDir.resolve("test-prop-cover-" + System.nanoTime() + ".epub").toFile();
|
||||
|
||||
String containerXml = """
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
|
||||
<rootfiles>
|
||||
<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
|
||||
</rootfiles>
|
||||
</container>
|
||||
""";
|
||||
|
||||
try (ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(epubFile))) {
|
||||
zos.putNextEntry(new ZipEntry("mimetype"));
|
||||
zos.write("application/epub+zip".getBytes(StandardCharsets.UTF_8));
|
||||
zos.closeEntry();
|
||||
|
||||
zos.putNextEntry(new ZipEntry("META-INF/container.xml"));
|
||||
zos.write(containerXml.getBytes(StandardCharsets.UTF_8));
|
||||
zos.closeEntry();
|
||||
|
||||
zos.putNextEntry(new ZipEntry("OEBPS/content.opf"));
|
||||
zos.write(opfContent.getBytes(StandardCharsets.UTF_8));
|
||||
zos.closeEntry();
|
||||
|
||||
zos.putNextEntry(new ZipEntry("OEBPS/" + href));
|
||||
zos.write(coverImageData);
|
||||
zos.closeEntry();
|
||||
}
|
||||
|
||||
return epubFile;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user