mirror of
https://github.com/revenz/FileFlowsPlugins.git
synced 2026-01-06 09:29:33 -06:00
various updates
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
using System.Net;
|
||||
using System.Text.RegularExpressions;
|
||||
using FileFlows.Web.Helpers;
|
||||
|
||||
namespace FileFlows.Web.FlowElements;
|
||||
|
||||
@@ -35,8 +36,6 @@ public class Downloader : Node
|
||||
[TextVariable(1)]
|
||||
public string Url { get; set; } = null!;
|
||||
|
||||
private static HttpClient? client;
|
||||
|
||||
/// <inheritdoc />
|
||||
public override int Execute(NodeParameters args)
|
||||
{
|
||||
@@ -48,7 +47,7 @@ public class Downloader : Node
|
||||
return -1;
|
||||
}
|
||||
|
||||
var result = Download(args.Logger!, url, args.TempPath, (percent) =>
|
||||
var result = DownloadHelper.Download(args.Logger!, url, args.TempPath, (percent) =>
|
||||
{
|
||||
args.PartPercentageUpdate?.Invoke(percent);
|
||||
});
|
||||
@@ -64,196 +63,4 @@ public class Downloader : Node
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Performs the download
|
||||
/// </summary>
|
||||
/// <param name="logger">the logger to use</param>
|
||||
/// <param name="url">the URL to download</param>
|
||||
/// <param name="destinationPath">the destination path</param>
|
||||
/// <param name="percentUpdate">the percent update</param>
|
||||
/// <returns>the name of the file if successful, otherwise an error</returns>
|
||||
public Result<string> Download(ILogger logger, string url, string destinationPath, Action<float> percentUpdate)
|
||||
{
|
||||
if (client == null)
|
||||
{
|
||||
|
||||
var handler = new HttpClientHandler
|
||||
{
|
||||
AllowAutoRedirect = true,
|
||||
AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate
|
||||
};
|
||||
client = new HttpClient(handler);
|
||||
client.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36");
|
||||
}
|
||||
|
||||
|
||||
try
|
||||
{
|
||||
var tempFile = Path.Combine(destinationPath, Guid.NewGuid().ToString());
|
||||
|
||||
using (var response = client.GetAsync(url, HttpCompletionOption.ResponseHeadersRead).Result)
|
||||
{
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return Result<string>.Fail($"Failed to download URL: {url}. Status code: {response.StatusCode}");
|
||||
}
|
||||
|
||||
var contentType = response.Content.Headers.ContentType?.MediaType;
|
||||
if(string.IsNullOrEmpty(contentType) == false)
|
||||
logger?.ILog("ContentType: " + contentType);
|
||||
var fileExtension = GetFileExtensionFromContentType(contentType);
|
||||
|
||||
// Check if the URL response contains a filename
|
||||
if (response.Content.Headers.ContentDisposition?.FileName != null)
|
||||
{
|
||||
var sanitizedFileName = SanitizeFileName(response.Content.Headers.ContentDisposition.FileName.Trim('"'));
|
||||
tempFile = Path.Combine(destinationPath, sanitizedFileName);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (fileExtension == null)
|
||||
{
|
||||
// Check for common file headers if the content type is not recognized
|
||||
var buffer = new byte[512];
|
||||
using (var contentStream = response.Content.ReadAsStreamAsync().Result)
|
||||
{
|
||||
contentStream.Read(buffer, 0, buffer.Length);
|
||||
fileExtension = GetFileExtensionFromHeader(buffer) ?? ".html";
|
||||
contentStream.Position = 0; // Reset stream position for reading again
|
||||
}
|
||||
}
|
||||
|
||||
tempFile += fileExtension;
|
||||
}
|
||||
|
||||
using (var contentStream = response.Content.ReadAsStreamAsync().Result)
|
||||
using (var fileStream = new FileStream(tempFile, FileMode.Create, FileAccess.Write, FileShare.None))
|
||||
{
|
||||
var totalBytes = response.Content.Headers.ContentLength ?? -1L;
|
||||
var totalRead = 0L;
|
||||
var buffer = new byte[8192];
|
||||
var isMoreToRead = true;
|
||||
|
||||
while (isMoreToRead)
|
||||
{
|
||||
var read = contentStream.ReadAsync(buffer, 0, buffer.Length).Result;
|
||||
if (read == 0)
|
||||
{
|
||||
isMoreToRead = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
fileStream.WriteAsync(buffer, 0, read).Wait();
|
||||
totalRead += read;
|
||||
|
||||
if (totalBytes != -1)
|
||||
{
|
||||
var progress = (float)totalRead / totalBytes;
|
||||
percentUpdate?.Invoke(progress);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
logger?.ILog($"Downloaded file saved to: {tempFile}");
|
||||
return tempFile;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return Result<string>.Fail($"Exception during download: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Gets the file extension from the content type.
|
||||
/// </summary>
|
||||
/// <param name="contentType">The content type.</param>
|
||||
/// <returns>The corresponding file extension, or null if not recognized.</returns>
|
||||
private string? GetFileExtensionFromContentType(string? contentType)
|
||||
{
|
||||
switch (contentType)
|
||||
{
|
||||
case "text/html": return ".html";
|
||||
case "image/jpeg": return ".jpg";
|
||||
case "image/png": return ".png";
|
||||
case "image/gif": return ".gif";
|
||||
case "application/pdf": return ".pdf";
|
||||
case "application/zip": return ".zip";
|
||||
case "application/json": return ".json";
|
||||
case "text/plain": return ".txt";
|
||||
case "audio/mpeg": return ".mp3";
|
||||
case "video/mp4": return ".mp4";
|
||||
case "application/vnd.ms-excel": return ".xls";
|
||||
case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": return ".xlsx";
|
||||
case "application/msword": return ".doc";
|
||||
case "application/vnd.openxmlformats-officedocument.wordprocessingml.document": return ".docx";
|
||||
case "application/vnd.ms-powerpoint": return ".ppt";
|
||||
case "application/vnd.openxmlformats-officedocument.presentationml.presentation": return ".pptx";
|
||||
case "application/x-rar-compressed": return ".rar";
|
||||
case "application/x-tar": return ".tar";
|
||||
case "application/x-7z-compressed": return ".7z";
|
||||
// Add more content types and their corresponding file extensions as needed
|
||||
default: return null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the file extension from the file header bytes.
|
||||
/// </summary>
|
||||
/// <param name="fileHeader">The first few bytes of the file to identify its type.</param>
|
||||
/// <returns>The corresponding file extension, or null if not recognized.</returns>
|
||||
private string? GetFileExtensionFromHeader(byte[] fileHeader)
|
||||
{
|
||||
// Implement logic to identify file types based on header bytes
|
||||
// Example: Check for common file signatures
|
||||
if (fileHeader.Length >= 4)
|
||||
{
|
||||
// PDF file signature
|
||||
if (fileHeader[0] == 0x25 && fileHeader[1] == 0x50 && fileHeader[2] == 0x44 && fileHeader[3] == 0x46)
|
||||
{
|
||||
return ".pdf";
|
||||
}
|
||||
|
||||
// ZIP file signature
|
||||
if (fileHeader[0] == 0x50 && fileHeader[1] == 0x4B &&
|
||||
(fileHeader[2] == 0x03 || fileHeader[2] == 0x05 || fileHeader[2] == 0x07) && fileHeader[3] == 0x08)
|
||||
{
|
||||
return ".zip";
|
||||
}
|
||||
|
||||
// PNG file signature
|
||||
if (fileHeader[0] == 0x89 && fileHeader[1] == 0x50 && fileHeader[2] == 0x4E && fileHeader[3] == 0x47)
|
||||
{
|
||||
return ".png";
|
||||
}
|
||||
|
||||
// JPEG file signature
|
||||
if (fileHeader[0] == 0xFF && fileHeader[1] == 0xD8 && fileHeader[fileHeader.Length - 2] == 0xFF &&
|
||||
fileHeader[fileHeader.Length - 1] == 0xD9)
|
||||
{
|
||||
return ".jpg";
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Sanitizes the filename to ensure it does not contain any path traversal characters or invalid characters.
|
||||
/// </summary>
|
||||
/// <param name="fileName">The filename to sanitize.</param>
|
||||
/// <returns>The sanitized filename.</returns>
|
||||
private string SanitizeFileName(string fileName)
|
||||
{
|
||||
// Remove any path traversal characters
|
||||
fileName = Regex.Replace(fileName, @"\.\.\/|\\|\.\.\\|\/", string.Empty);
|
||||
|
||||
// Only allow safe characters in the filename
|
||||
fileName = Regex.Replace(fileName, @"[^a-zA-Z0-9_\-\.]", "_");
|
||||
|
||||
return fileName;
|
||||
}
|
||||
}
|
||||
51
Web/FlowElements/InputUrl.cs
Normal file
51
Web/FlowElements/InputUrl.cs
Normal file
@@ -0,0 +1,51 @@
|
||||
using FileFlows.Web.Helpers;
|
||||
|
||||
namespace FileFlows.Web.FlowElements;
|
||||
|
||||
/// <summary>
|
||||
/// Input for a URL
|
||||
/// </summary>
|
||||
public class InputUrl : Node
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public override int Outputs => 1;
|
||||
/// <inheritdoc />
|
||||
public override FlowElementType Type => FlowElementType.Input;
|
||||
/// <inheritdoc />
|
||||
public override string Icon => "fas fa-globe";
|
||||
/// <inheritdoc />
|
||||
public override string HelpUrl => "https://fileflows.com/docs/plugins/web/input-url";
|
||||
/// <inheritdoc />
|
||||
public override string Group => "Web";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets if this should download the URL
|
||||
/// </summary>
|
||||
[Boolean(1)]
|
||||
public bool Download { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public override int Execute(NodeParameters args)
|
||||
{
|
||||
string url = args.WorkingFile;
|
||||
args.Variables["Url"] = url;
|
||||
if (Download == false)
|
||||
return 1;
|
||||
|
||||
var result = DownloadHelper.Download(args.Logger!, url, args.TempPath, (percent) =>
|
||||
{
|
||||
args.PartPercentageUpdate?.Invoke(percent);
|
||||
});
|
||||
|
||||
if(result.Failed(out var error))
|
||||
{
|
||||
args.FailureReason = error;
|
||||
args.Logger?.ELog(error);
|
||||
return -1;
|
||||
}
|
||||
|
||||
args.SetWorkingFile(result.Value);
|
||||
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
using System.Net;
|
||||
using System.Text.RegularExpressions;
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace FileFlows.Web.FlowElements;
|
||||
|
||||
@@ -33,7 +34,10 @@ public class HtmlImageParser : HtmlParser
|
||||
protected override string VariableName => "ImageUrls";
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override List<string> ParseHtml(ILogger? logger, string html)
|
||||
protected override List<string> ParseHtml(NodeParameters args, string html)
|
||||
=> ParseHtmlForUrls(args, html, ["img"], ["src", "content"]);
|
||||
|
||||
private List<string> ParseHtmlOld(ILogger? logger, string html)
|
||||
{
|
||||
var imageUrls = new List<string>();
|
||||
var regex = new Regex("<img[^>]+src=(\"([^\"]*)\"|'([^']*)'|([^\\s>]+))", RegexOptions.IgnoreCase);
|
||||
|
||||
@@ -33,18 +33,53 @@ public class HtmlLinkParser : HtmlParser
|
||||
protected override string VariableName => "Links";
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override List<string> ParseHtml(ILogger? logger, string html)
|
||||
protected override List<string> ParseHtml(NodeParameters args, string html)
|
||||
=> ParseHtmlForUrls(args, html, ["a"], ["href"]);
|
||||
|
||||
private List<string> ParseHtmlOld(ILogger? logger, string html)
|
||||
{
|
||||
var urls = new List<string>();
|
||||
var regex = new Regex("<a[^>]+href=(\"([^\"]*)\"|'([^']*)'|([^\\s>]+))", RegexOptions.IgnoreCase);
|
||||
var matches = regex.Matches(html);
|
||||
|
||||
string? baseUrl = null;
|
||||
if (Variables.TryGetValue("Url", out var oUrl) && oUrl is string sBaseUrl)
|
||||
{
|
||||
try
|
||||
{
|
||||
var uri = new Uri(sBaseUrl);
|
||||
|
||||
// Get the absolute path without the query parameters
|
||||
baseUrl = uri.GetLeftPart(UriPartial.Path);
|
||||
|
||||
// Ensure the path ends with a slash
|
||||
if (baseUrl.EndsWith("/") == false)
|
||||
baseUrl += "/";
|
||||
|
||||
// Use the folderPath as needed
|
||||
logger?.ILog("Base URL: " + baseUrl);
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
// Ignored
|
||||
}
|
||||
}
|
||||
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
if (match.Groups.Count > 1)
|
||||
{
|
||||
var url = match.Groups[1].Value.TrimStart('"', '\'').TrimEnd('"', '\'');
|
||||
urls.Add(WebUtility.HtmlDecode(url));
|
||||
url = WebUtility.HtmlDecode(url);
|
||||
if (baseUrl != null && Regex.IsMatch(url, "^http(s)://", RegexOptions.IgnoreCase) == false)
|
||||
{
|
||||
logger?.ILog("Relative URL: " + url);
|
||||
if (url.StartsWith("/"))
|
||||
url = url[1..];
|
||||
url = baseUrl + url;
|
||||
logger?.ILog("Absolute URL: " + url);
|
||||
}
|
||||
urls.Add(url);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
using System.Net;
|
||||
using System.Text.RegularExpressions;
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace FileFlows.Web.FlowElements;
|
||||
|
||||
@@ -50,7 +51,7 @@ public abstract class HtmlParser : Node
|
||||
|
||||
var html = result.Value;
|
||||
|
||||
var list = ParseHtml(args.Logger, html);
|
||||
var list = ParseHtml(args, html);
|
||||
|
||||
var pattern = args.ReplaceVariables(Pattern ?? string.Empty, stripMissing: true);
|
||||
if (string.IsNullOrWhiteSpace(pattern) == false)
|
||||
@@ -87,7 +88,8 @@ public abstract class HtmlParser : Node
|
||||
args.Logger?.ILog("Found item: " + item);
|
||||
}
|
||||
|
||||
args.Variables[VariableName] = list;
|
||||
if(string.IsNullOrWhiteSpace(VariableName) == false)
|
||||
args.Variables[VariableName] = list;
|
||||
// current list is the default current list FileFLows will use in a list flow element if no list is specified
|
||||
args.Variables["CurrentList"] = list;
|
||||
|
||||
@@ -97,10 +99,10 @@ public abstract class HtmlParser : Node
|
||||
/// <summary>
|
||||
/// Parses the HTML
|
||||
/// </summary>
|
||||
/// <param name="logger">the logger to use</param>
|
||||
/// <param name="args">the node parameters</param>
|
||||
/// <param name="html">the HTML to parse</param>
|
||||
/// <returns>the items found while pasrsing</returns>
|
||||
protected abstract List<string> ParseHtml(ILogger? logger, string html);
|
||||
protected abstract List<string> ParseHtml(NodeParameters args, string html);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the file content
|
||||
@@ -138,4 +140,60 @@ public abstract class HtmlParser : Node
|
||||
|
||||
return File.ReadAllText(localFileResult.Value);
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parses the HTML for the specified tags and attributes
|
||||
/// </summary>
|
||||
/// <param name="args">the node parameters</param>
|
||||
/// <param name="html">the HTML to parse</param>
|
||||
/// <param name="tags">the HTML tags to look for</param>
|
||||
/// <param name="attributes">the attributes to look for</param>
|
||||
/// <returns>a list of matching URLs</returns>
|
||||
protected List<string> ParseHtmlForUrls(NodeParameters args, string html, string[] tags, string[] attributes)
|
||||
{
|
||||
var htmlDoc = new HtmlDocument();
|
||||
htmlDoc.LoadHtml(html);
|
||||
|
||||
Uri? baseUri = null;
|
||||
if (args.Variables.TryGetValue("Url", out var oUrl) && oUrl is string sBaseUrl)
|
||||
{
|
||||
baseUri = new Uri(sBaseUrl);
|
||||
args.Logger?.ILog("Base URL: " + baseUri);
|
||||
}
|
||||
|
||||
List<string> results = new();
|
||||
|
||||
|
||||
foreach (var tag in tags)
|
||||
{
|
||||
var nodes = htmlDoc.DocumentNode.SelectNodes($"//{tag}");
|
||||
if (nodes == null) continue;
|
||||
|
||||
foreach (var ele in nodes)
|
||||
{
|
||||
foreach (var att in attributes)
|
||||
{
|
||||
var srcValue = ele.GetAttributeValue(att, string.Empty);
|
||||
if (!string.IsNullOrEmpty(srcValue))
|
||||
{
|
||||
if (srcValue.StartsWith("http", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
results.Add(srcValue);
|
||||
}
|
||||
else if (baseUri != null)
|
||||
{
|
||||
if (Uri.TryCreate(srcValue, UriKind.Relative, out var relativeSrcUri))
|
||||
{
|
||||
var absoluteSrcUri = new Uri(baseUri, relativeSrcUri);
|
||||
results.Add(absoluteSrcUri.ToString());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
}
|
||||
@@ -1,39 +0,0 @@
|
||||
namespace FileFlows.Web.FlowElements;
|
||||
|
||||
/// <summary>
|
||||
///
|
||||
/// </summary>
|
||||
public class UrlToRelativePath : Node
|
||||
{
|
||||
public override int Inputs => 1;
|
||||
public override int Outputs => 2;
|
||||
public override string Icon =>
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the URL to get a path for
|
||||
/// </summary>
|
||||
[TextVariable(1)]
|
||||
public string Url { get; set; } = null!;
|
||||
|
||||
public override int Execute(NodeParameters args)
|
||||
{
|
||||
|
||||
// Create a Uri object from the URL
|
||||
var uri = new Uri(x);
|
||||
|
||||
// Get the path without the query
|
||||
var path = uri.AbsolutePath;
|
||||
|
||||
// Get the query part and replace the '=' and '&' with '-'
|
||||
var query = uri.Query.TrimStart('?').Replace('=', '-').Replace('&', '/');
|
||||
|
||||
// Combine the path and the modified query
|
||||
var fakePath = path.TrimEnd('/') + (string.IsNullOrEmpty(query) ? string.Empty : "/" + query);
|
||||
|
||||
// Remove leading slash
|
||||
if (fakePath.StartsWith("/"))
|
||||
{
|
||||
fakePath = fakePath.Substring(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace FileFlows.Web;
|
||||
namespace FileFlows.Web.FlowElements;
|
||||
|
||||
using FileFlows.Plugin;
|
||||
using FileFlows.Plugin.Attributes;
|
||||
@@ -22,6 +22,8 @@ public class WebRequest : Node
|
||||
public override string Icon => "fas fa-globe";
|
||||
/// <inheritdoc />
|
||||
public override string HelpUrl => "https://fileflows.com/docs/plugins/web/web-request";
|
||||
/// <inheritdoc />
|
||||
public override string Group => "Web";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the URL
|
||||
|
||||
Reference in New Issue
Block a user