mirror of
https://github.com/revenz/FileFlowsPlugins.git
synced 2025-12-20 14:59:31 -06:00
FF-1507: Comic PDF Extractor now tries to read image type from PDF and falls back to JPG if cannot
This commit is contained in:
@@ -2,8 +2,14 @@
|
||||
using Docnet.Core.Editors;
|
||||
using Docnet.Core.Models;
|
||||
using SixLabors.ImageSharp;
|
||||
using SixLabors.ImageSharp.Formats;
|
||||
using SixLabors.ImageSharp.Formats.Bmp;
|
||||
using SixLabors.ImageSharp.Formats.Gif;
|
||||
using SixLabors.ImageSharp.Formats.Jpeg;
|
||||
using SixLabors.ImageSharp.Formats.Png;
|
||||
using SixLabors.ImageSharp.Formats.Tiff;
|
||||
using SixLabors.ImageSharp.Formats.Webp;
|
||||
using SixLabors.ImageSharp.PixelFormats;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace FileFlows.ComicNodes.Helpers;
|
||||
|
||||
@@ -27,8 +33,26 @@ internal class PdfHelper
|
||||
var height = pageReader.GetPageHeight();
|
||||
|
||||
using var image = Image.LoadPixelData<Bgra32>(rawBytes, width, height);
|
||||
string file = Path.Combine(destinationDirectory, filePrefix + "-" + i.ToString(new String('0', pageCount.ToString().Length)) + ".png");
|
||||
image.SaveAsPng(file);
|
||||
|
||||
// Infer the image format
|
||||
(IImageFormat? imageFormat, string? fileExtension) = InferImageFormat(rawBytes);
|
||||
if (imageFormat == null)
|
||||
{
|
||||
args?.Logger?.WLog("Failed to inter image type from PDF, failing back to JPG");
|
||||
imageFormat = JpegFormat.Instance;
|
||||
fileExtension = "jpg";
|
||||
}
|
||||
else
|
||||
{
|
||||
args?.Logger?.ILog("File Extension of image: " + fileExtension);
|
||||
}
|
||||
|
||||
var file = Path.Combine(destinationDirectory, filePrefix + "-" + i.ToString(new string('0', pageCount.ToString().Length))) + "." + fileExtension;
|
||||
|
||||
using (var outputStream = File.Create(file + "." + fileExtension))
|
||||
{
|
||||
image.Save(outputStream, imageFormat);
|
||||
}
|
||||
|
||||
if (args?.PartPercentageUpdate != null)
|
||||
{
|
||||
@@ -44,6 +68,41 @@ internal class PdfHelper
|
||||
args?.PartPercentageUpdate(halfProgress ? 50 : 0);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Infers the image format based on the first few bytes of the image data.
|
||||
/// </summary>
|
||||
/// <param name="bytes">The image data bytes.</param>
|
||||
/// <returns>The inferred image format and file extension.</returns>
|
||||
private static (IImageFormat? Format, string Extension) InferImageFormat(byte[] bytes)
|
||||
{
|
||||
// Try to infer image format based on magic numbers
|
||||
if (bytes.Length >= 2 && bytes[0] == 0xFF && bytes[1] == 0xD8) // JPEG
|
||||
return (JpegFormat.Instance, "jpg");
|
||||
if (bytes.Length >= 8 && BitConverter.ToUInt64(bytes, 0) == 0x89504E470D0A1A0A) // PNG
|
||||
return (PngFormat.Instance, "png");
|
||||
if (bytes.Length >= 4 && bytes[0] == 0x47 && bytes[1] == 0x49 && bytes[2] == 0x46 && bytes[3] == 0x38) // GIF
|
||||
return (GifFormat.Instance, "gif");
|
||||
if (bytes.Length >= 4 && bytes[0] == 0x52 && bytes[1] == 0x49 && bytes[2] == 0x46 && bytes[3] == 0x46 &&
|
||||
bytes[8] == 0x57 && bytes[9] == 0x45 && bytes[10] == 0x42 && bytes[11] == 0x50) // WebP
|
||||
return (WebpFormat.Instance, "webp");
|
||||
if (bytes.Length >= 4 && BitConverter.ToUInt32(bytes, 0) == 0x49492A00) // TIFF
|
||||
return (TiffFormat.Instance, "tiff");
|
||||
if (bytes.Length >= 2 && bytes[0] == 0x42 && bytes[1] == 0x4D) // BMP
|
||||
return (BmpFormat.Instance, "bmp");
|
||||
|
||||
// If none of the known formats are detected, fall back to Image.DetectFormat()
|
||||
try
|
||||
{
|
||||
IImageFormat format = Image.DetectFormat(bytes);
|
||||
string extension = format?.DefaultMimeType?.Split('/')[1] ?? "png";
|
||||
return (format, extension);
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
return (null, null);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Creates a PDF from images
|
||||
|
||||
@@ -12,10 +12,10 @@ public class ExtractTests
|
||||
public void Extract_Pdf()
|
||||
{
|
||||
var logger = new TestLogger();
|
||||
var args = new NodeParameters(@"D:\comics\testfiles\fp1.pdf", logger, false, string.Empty, null);
|
||||
var args = new NodeParameters(@"/home/john/Comics/unprocessed/Ghosts Book 1 Excerpt.pdf", logger, false, string.Empty, null);
|
||||
|
||||
var node = new ComicExtractor();
|
||||
node.DestinationPath = @"D:\comics\converted\pdf";
|
||||
node.DestinationPath = @"/home/john/Comics/extracted";
|
||||
if (Directory.Exists(node.DestinationPath))
|
||||
Directory.Delete(node.DestinationPath, true);
|
||||
Directory.CreateDirectory(node.DestinationPath);
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user