diff --git a/booklore-api/src/main/java/com/adityachandel/booklore/service/metadata/extractor/EpubMetadataExtractor.java b/booklore-api/src/main/java/com/adityachandel/booklore/service/metadata/extractor/EpubMetadataExtractor.java index 1c51ff3d2..d83502866 100644 --- a/booklore-api/src/main/java/com/adityachandel/booklore/service/metadata/extractor/EpubMetadataExtractor.java +++ b/booklore-api/src/main/java/com/adityachandel/booklore/service/metadata/extractor/EpubMetadataExtractor.java @@ -2,6 +2,9 @@ package com.adityachandel.booklore.service.metadata.extractor; import com.adityachandel.booklore.model.dto.BookMetadata; import io.documentnode.epub4j.domain.Book; +import io.documentnode.epub4j.domain.MediaType; +import io.documentnode.epub4j.domain.MediaTypes; +import io.documentnode.epub4j.domain.Resource; import io.documentnode.epub4j.epub.EpubReader; import lombok.extern.slf4j.Slf4j; import net.lingala.zip4j.ZipFile; @@ -20,13 +23,16 @@ import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import java.io.File; import java.io.FileInputStream; +import java.io.IOException; import java.io.InputStream; import java.net.URLDecoder; import java.nio.charset.StandardCharsets; import java.time.LocalDate; import java.time.OffsetDateTime; +import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Pattern; @@ -38,39 +44,54 @@ public class EpubMetadataExtractor implements FileMetadataExtractor { private static final Pattern YEAR_ONLY_PATTERN = Pattern.compile("^\\d{4}$"); private static final String OPF_NS = "http://www.idpf.org/2007/opf"; + // List of all media types that epub4j has so we can lazy load them. + // Note that we have to add in null to handle files without extentions like mimetype. + private static List MEDIA_TYPES = new ArrayList<>(); + static { + for (int i = 0; i < MediaTypes.mediaTypes.length; i++) { + MEDIA_TYPES.add(MediaTypes.mediaTypes[i]); + } + MEDIA_TYPES.add(null); + } + @Override public byte[] extractCover(File epubFile) { - try (FileInputStream fis = new FileInputStream(epubFile)) { - Book epub = new EpubReader().readEpub(fis); - io.documentnode.epub4j.domain.Resource coverImage = epub.getCoverImage(); + try (ZipFile zip = new ZipFile(epubFile)) { + Book epub = new EpubReader().readEpubLazy(zip, "UTF-8", MEDIA_TYPES); - if (coverImage == null) { - String coverHref = findCoverImageHrefInOpf(epubFile); - if (coverHref != null) { - byte[] data = extractFileFromZip(epubFile, coverHref); - if (data != null) return data; + // First we read the cover image from the epub4j reader. + // We filter to only images since it will default to the first page. + byte[] image = getImageFromEpubResource(epub.getCoverImage()); + if (image != null) { + return image; + } + + // We fall back to reading the image based on the cover-image property. + String coverHref = findCoverImageHrefInOpf(epubFile); + if (coverHref != null) { + image = extractFileFromZip(epubFile, coverHref); + if (image != null) { + return image; } } - if (coverImage == null) { - for (io.documentnode.epub4j.domain.Resource res : epub.getResources().getAll()) { - String id = res.getId(); - String href = res.getHref(); - if ((id != null && id.toLowerCase().contains("cover")) || - (href != null && href.toLowerCase().contains("cover"))) { - if (res.getMediaType() != null && res.getMediaType().getName().startsWith("image")) { - coverImage = res; - break; - } + // As a last resort we look at all of the files in the epub for something cover related. + for (Resource res : epub.getResources().getAll()) { + String id = res.getId(); + String href = res.getHref(); + if ((id != null && id.toLowerCase().contains("cover")) || + (href != null && href.toLowerCase().contains("cover"))) { + image = getImageFromEpubResource(res); + if (image != null) { + return image; } } } - - return (coverImage != null) ? coverImage.getData() : null; } catch (Exception e) { log.warn("Failed to extract cover from EPUB: {}", epubFile.getName(), e); - return null; } + + return null; } @Override @@ -332,6 +353,24 @@ public class EpubMetadataExtractor implements FileMetadataExtractor { return null; } + private byte[] getImageFromEpubResource(Resource res) { + if (res == null) { + return null; + } + + MediaType mt = res.getMediaType(); + if (mt == null || !mt.getName().startsWith("image")) { + return null; + } + + try { + return res.getData(); + } catch (IOException e) { + log.warn("Failed to read data for resource", e); + return null; + } + } + private String findCoverImageHrefInOpf(File epubFile) { try (ZipFile zip = new ZipFile(epubFile)) { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();