mirror of
https://github.com/adityachandelgit/BookLore.git
synced 2026-01-06 09:50:32 -06:00
fix CBX cover extraction (#1155)
This commit is contained in:
@@ -126,6 +126,13 @@ public class CbxMetadataExtractor implements FileMetadataExtractor {
|
||||
private Document buildSecureDocument(InputStream is) throws Exception {
|
||||
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
|
||||
factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
|
||||
try {
|
||||
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
|
||||
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
|
||||
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
|
||||
} catch (Exception ex) {
|
||||
log.debug("XML factory secure feature not supported: {}", ex.getMessage());
|
||||
}
|
||||
factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
|
||||
factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
|
||||
factory.setExpandEntityReferences(false);
|
||||
@@ -282,17 +289,23 @@ public class CbxMetadataExtractor implements FileMetadataExtractor {
|
||||
// CBZ path
|
||||
if (lowerName.endsWith(".cbz")) {
|
||||
try (ZipFile zipFile = new ZipFile(file)) {
|
||||
// Try front cover via ComicInfo
|
||||
ZipEntry coverEntry = findFrontCoverEntry(zipFile);
|
||||
if (coverEntry != null) {
|
||||
try (InputStream is = zipFile.getInputStream(coverEntry)) {
|
||||
return is.readAllBytes();
|
||||
byte[] bytes = is.readAllBytes();
|
||||
if (canDecode(bytes)) return bytes;
|
||||
}
|
||||
} else {
|
||||
// Fallback: first image after sorting alphabetically
|
||||
ZipEntry firstImage = findFirstAlphabeticalImageEntry(zipFile);
|
||||
if (firstImage != null) {
|
||||
try (InputStream is2 = zipFile.getInputStream(firstImage)) {
|
||||
return is2.readAllBytes();
|
||||
}
|
||||
// Fallback: iterate images alphabetically until a decodable one is found
|
||||
ZipEntry firstImage = findFirstAlphabeticalImageEntry(zipFile);
|
||||
if (firstImage != null) {
|
||||
// Build a sorted list and iterate for decodable formats
|
||||
java.util.List<ZipEntry> images = listZipImageEntries(zipFile);
|
||||
for (ZipEntry e : images) {
|
||||
try (InputStream is = zipFile.getInputStream(e)) {
|
||||
byte[] bytes = is.readAllBytes();
|
||||
if (canDecode(bytes)) return bytes;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -316,13 +329,22 @@ public class CbxMetadataExtractor implements FileMetadataExtractor {
|
||||
if (imageName != null) {
|
||||
SevenZArchiveEntry byName = findSevenZEntryByName(sevenZ, imageName);
|
||||
if (byName != null) {
|
||||
return readSevenZEntryBytes(sevenZ, byName);
|
||||
byte[] bytes = readSevenZEntryBytes(sevenZ, byName);
|
||||
if (canDecode(bytes)) return bytes;
|
||||
}
|
||||
try {
|
||||
int index = Integer.parseInt(imageName);
|
||||
SevenZArchiveEntry byIndex = findSevenZImageEntryByIndex(sevenZ, index);
|
||||
if (byIndex != null) {
|
||||
return readSevenZEntryBytes(sevenZ, byIndex);
|
||||
byte[] bytes = readSevenZEntryBytes(sevenZ, byIndex);
|
||||
if (canDecode(bytes)) return bytes;
|
||||
}
|
||||
if (index > 0) {
|
||||
SevenZArchiveEntry offByOne = findSevenZImageEntryByIndex(sevenZ, index - 1);
|
||||
if (offByOne != null) {
|
||||
byte[] bytes = readSevenZEntryBytes(sevenZ, offByOne);
|
||||
if (canDecode(bytes)) return bytes;
|
||||
}
|
||||
}
|
||||
} catch (NumberFormatException ignore) {
|
||||
// continue to fallback
|
||||
@@ -332,10 +354,14 @@ public class CbxMetadataExtractor implements FileMetadataExtractor {
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: first image alphabetically
|
||||
// Fallback: iterate images alphabetically until a decodable one is found
|
||||
SevenZArchiveEntry first = findFirstAlphabeticalSevenZImageEntry(sevenZ);
|
||||
if (first != null) {
|
||||
return readSevenZEntryBytes(sevenZ, first);
|
||||
java.util.List<SevenZArchiveEntry> images = listSevenZImageEntries(sevenZ);
|
||||
for (SevenZArchiveEntry e : images) {
|
||||
byte[] bytes = readSevenZEntryBytes(sevenZ, e);
|
||||
if (canDecode(bytes)) return bytes;
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to extract cover image from CB7", e);
|
||||
@@ -359,13 +385,22 @@ public class CbxMetadataExtractor implements FileMetadataExtractor {
|
||||
if (imageName != null) {
|
||||
FileHeader byName = findRarHeaderByName(archive, imageName);
|
||||
if (byName != null) {
|
||||
return readRarEntryBytes(archive, byName);
|
||||
byte[] bytes = readRarEntryBytes(archive, byName);
|
||||
if (canDecode(bytes)) return bytes;
|
||||
}
|
||||
try {
|
||||
int index = Integer.parseInt(imageName);
|
||||
FileHeader byIndex = findRarImageHeaderByIndex(archive, index);
|
||||
if (byIndex != null) {
|
||||
return readRarEntryBytes(archive, byIndex);
|
||||
byte[] bytes = readRarEntryBytes(archive, byIndex);
|
||||
if (canDecode(bytes)) return bytes;
|
||||
}
|
||||
if (index > 0) {
|
||||
FileHeader offByOne = findRarImageHeaderByIndex(archive, index - 1);
|
||||
if (offByOne != null) {
|
||||
byte[] bytes = readRarEntryBytes(archive, offByOne);
|
||||
if (canDecode(bytes)) return bytes;
|
||||
}
|
||||
}
|
||||
} catch (NumberFormatException ignore) {
|
||||
// ignore and continue fallback
|
||||
@@ -375,10 +410,14 @@ public class CbxMetadataExtractor implements FileMetadataExtractor {
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: first image in alphabetical order
|
||||
// Fallback: iterate images alphabetically until a decodable one is found
|
||||
FileHeader firstImage = findFirstAlphabeticalImageHeader(archive);
|
||||
if (firstImage != null) {
|
||||
return readRarEntryBytes(archive, firstImage);
|
||||
java.util.List<FileHeader> images = listRarImageHeaders(archive);
|
||||
for (FileHeader fh : images) {
|
||||
byte[] bytes = readRarEntryBytes(archive, fh);
|
||||
if (canDecode(bytes)) return bytes;
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to extract cover image from CBR", e);
|
||||
@@ -390,6 +429,16 @@ public class CbxMetadataExtractor implements FileMetadataExtractor {
|
||||
return generatePlaceholderCover(250, 350);
|
||||
}
|
||||
|
||||
private boolean canDecode(byte[] bytes) {
|
||||
if (bytes == null || bytes.length == 0) return false;
|
||||
try (ByteArrayInputStream bais = new ByteArrayInputStream(bytes)) {
|
||||
BufferedImage img = ImageIO.read(bais);
|
||||
return img != null;
|
||||
} catch (IOException e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private ZipEntry findFrontCoverEntry(ZipFile zipFile) {
|
||||
ZipEntry comicInfoEntry = findComicInfoEntry(zipFile);
|
||||
if (comicInfoEntry != null) {
|
||||
@@ -401,12 +450,27 @@ public class CbxMetadataExtractor implements FileMetadataExtractor {
|
||||
if (byName != null) {
|
||||
return byName;
|
||||
}
|
||||
// also try base-name match for archives with directories or odd encodings
|
||||
String imageBase = baseName(imageName);
|
||||
java.util.Enumeration<? extends ZipEntry> it = zipFile.entries();
|
||||
while (it.hasMoreElements()) {
|
||||
ZipEntry e = it.nextElement();
|
||||
if (!e.isDirectory() && isImageEntry(e.getName())) {
|
||||
if (baseName(e.getName()).equalsIgnoreCase(imageBase)) {
|
||||
return e;
|
||||
}
|
||||
}
|
||||
}
|
||||
try {
|
||||
int index = Integer.parseInt(imageName);
|
||||
ZipEntry byIndex = findImageEntryByIndex(zipFile, index);
|
||||
if (byIndex != null) {
|
||||
return byIndex;
|
||||
}
|
||||
if (index > 0) {
|
||||
ZipEntry offByOne = findImageEntryByIndex(zipFile, index - 1);
|
||||
if (offByOne != null) return offByOne;
|
||||
}
|
||||
} catch (NumberFormatException ignore) {
|
||||
// ignore
|
||||
}
|
||||
@@ -415,8 +479,10 @@ public class CbxMetadataExtractor implements FileMetadataExtractor {
|
||||
log.warn("Failed to parse ComicInfo.xml for cover", e);
|
||||
}
|
||||
}
|
||||
ZipEntry firstImage = findFirstAlphabeticalImageEntry(zipFile);
|
||||
return firstImage;
|
||||
// Heuristic filenames before generic fallback
|
||||
ZipEntry heuristic = findHeuristicCover(zipFile);
|
||||
if (heuristic != null) return heuristic;
|
||||
return findFirstAlphabeticalImageEntry(zipFile);
|
||||
}
|
||||
|
||||
private ZipEntry findImageEntryByIndex(ZipFile zipFile, int index) {
|
||||
@@ -457,6 +523,7 @@ public class CbxMetadataExtractor implements FileMetadataExtractor {
|
||||
}
|
||||
|
||||
private boolean isImageEntry(String name) {
|
||||
if (!isContentEntry(name)) return false;
|
||||
String lower = name.toLowerCase();
|
||||
return (
|
||||
lower.endsWith(".jpg") ||
|
||||
@@ -468,6 +535,16 @@ public class CbxMetadataExtractor implements FileMetadataExtractor {
|
||||
);
|
||||
}
|
||||
|
||||
private boolean isContentEntry(String name) {
|
||||
if (name == null) return false;
|
||||
String norm = name.replace('\\', '/');
|
||||
if (norm.startsWith("__MACOSX/") || norm.contains("/__MACOSX/")) return false;
|
||||
String base = baseName(norm);
|
||||
if (base.startsWith(".")) return false;
|
||||
if (base.equalsIgnoreCase(".ds_store")) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
private byte[] generatePlaceholderCover(int width, int height) {
|
||||
BufferedImage image = new BufferedImage(
|
||||
width,
|
||||
@@ -574,10 +651,7 @@ public class CbxMetadataExtractor implements FileMetadataExtractor {
|
||||
}
|
||||
}
|
||||
if (images.isEmpty()) return null;
|
||||
images.sort(Comparator.comparing(
|
||||
fh -> fh.getFileName() == null ? "" : fh.getFileName(),
|
||||
String.CASE_INSENSITIVE_ORDER
|
||||
));
|
||||
images.sort((a, b) -> naturalCompare(a.getFileName(), b.getFileName()));
|
||||
return images.get(0);
|
||||
}
|
||||
|
||||
@@ -591,10 +665,7 @@ public class CbxMetadataExtractor implements FileMetadataExtractor {
|
||||
}
|
||||
}
|
||||
if (images.isEmpty()) return null;
|
||||
images.sort(Comparator.comparing(
|
||||
entry -> entry.getName() == null ? "" : entry.getName(),
|
||||
String.CASE_INSENSITIVE_ORDER
|
||||
));
|
||||
images.sort((a, b) -> naturalCompare(a.getName(), b.getName()));
|
||||
return images.get(0);
|
||||
}
|
||||
|
||||
@@ -642,10 +713,7 @@ public class CbxMetadataExtractor implements FileMetadataExtractor {
|
||||
}
|
||||
}
|
||||
if (images.isEmpty()) return null;
|
||||
images.sort(Comparator.comparing(
|
||||
entry -> entry.getName() == null ? "" : entry.getName(),
|
||||
String.CASE_INSENSITIVE_ORDER
|
||||
));
|
||||
images.sort((a, b) -> naturalCompare(a.getName(), b.getName()));
|
||||
return images.get(0);
|
||||
}
|
||||
|
||||
@@ -657,4 +725,76 @@ public class CbxMetadataExtractor implements FileMetadataExtractor {
|
||||
return baos.toByteArray();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private java.util.List<ZipEntry> listZipImageEntries(ZipFile zipFile) {
|
||||
java.util.List<ZipEntry> images = new java.util.ArrayList<>();
|
||||
java.util.Enumeration<? extends ZipEntry> en = zipFile.entries();
|
||||
while (en.hasMoreElements()) {
|
||||
ZipEntry e = en.nextElement();
|
||||
if (!e.isDirectory() && isImageEntry(e.getName())) images.add(e);
|
||||
}
|
||||
images.sort((a, b) -> naturalCompare(a.getName(), b.getName()));
|
||||
// Heuristic preferred names first
|
||||
images.sort((a, b) -> Boolean.compare(!likelyCoverName(baseName(a.getName())), !likelyCoverName(baseName(b.getName()))));
|
||||
return images;
|
||||
}
|
||||
|
||||
private java.util.List<SevenZArchiveEntry> listSevenZImageEntries(SevenZFile sevenZ) throws IOException {
|
||||
java.util.List<SevenZArchiveEntry> images = new java.util.ArrayList<>();
|
||||
for (SevenZArchiveEntry e : sevenZ.getEntries()) {
|
||||
if (!e.isDirectory() && isImageEntry(e.getName())) images.add(e);
|
||||
}
|
||||
images.sort((a, b) -> naturalCompare(a.getName(), b.getName()));
|
||||
images.sort((a, b) -> Boolean.compare(!likelyCoverName(baseName(a.getName())), !likelyCoverName(baseName(b.getName()))));
|
||||
return images;
|
||||
}
|
||||
|
||||
private java.util.List<FileHeader> listRarImageHeaders(Archive archive) {
|
||||
java.util.List<FileHeader> images = new java.util.ArrayList<>();
|
||||
for (FileHeader fh : archive.getFileHeaders()) {
|
||||
if (fh != null && !fh.isDirectory() && isImageEntry(fh.getFileName())) images.add(fh);
|
||||
}
|
||||
images.sort((a, b) -> naturalCompare(a.getFileName(), b.getFileName()));
|
||||
images.sort((a, b) -> Boolean.compare(!likelyCoverName(baseName(a.getFileName())), !likelyCoverName(baseName(b.getFileName()))));
|
||||
return images;
|
||||
}
|
||||
|
||||
private boolean likelyCoverName(String base) {
|
||||
if (base == null) return false;
|
||||
String n = base.toLowerCase();
|
||||
return n.startsWith("cover") || n.equals("folder") || n.startsWith("front");
|
||||
}
|
||||
|
||||
private int naturalCompare(String a, String b) {
|
||||
if (a == null) return b == null ? 0 : -1;
|
||||
if (b == null) return 1;
|
||||
String s1 = a.toLowerCase();
|
||||
String s2 = b.toLowerCase();
|
||||
int i = 0, j = 0, n1 = s1.length(), n2 = s2.length();
|
||||
while (i < n1 && j < n2) {
|
||||
char c1 = s1.charAt(i);
|
||||
char c2 = s2.charAt(j);
|
||||
if (Character.isDigit(c1) && Character.isDigit(c2)) {
|
||||
int i1 = i; while (i1 < n1 && Character.isDigit(s1.charAt(i1))) i1++;
|
||||
int j1 = j; while (j1 < n2 && Character.isDigit(s2.charAt(j1))) j1++;
|
||||
String num1 = s1.substring(i, i1).replaceFirst("^0+", "");
|
||||
String num2 = s2.substring(j, j1).replaceFirst("^0+", "");
|
||||
int cmp = Integer.compare(num1.isEmpty() ? 0 : Integer.parseInt(num1), num2.isEmpty() ? 0 : Integer.parseInt(num2));
|
||||
if (cmp != 0) return cmp;
|
||||
i = i1; j = j1;
|
||||
} else {
|
||||
if (c1 != c2) return Character.compare(c1, c2);
|
||||
i++; j++;
|
||||
}
|
||||
}
|
||||
return Integer.compare(n1 - i, n2 - j);
|
||||
}
|
||||
|
||||
private ZipEntry findHeuristicCover(ZipFile zipFile) {
|
||||
java.util.List<ZipEntry> images = listZipImageEntries(zipFile);
|
||||
for (ZipEntry e : images) {
|
||||
if (likelyCoverName(baseName(e.getName()))) return e;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user