diff --git a/Web/FlowElements/Downloader.cs b/Web/FlowElements/Downloader.cs
index 988f3a30..c1a1045c 100644
--- a/Web/FlowElements/Downloader.cs
+++ b/Web/FlowElements/Downloader.cs
@@ -1,5 +1,6 @@
using System.Net;
using System.Text.RegularExpressions;
+using FileFlows.Web.Helpers;
namespace FileFlows.Web.FlowElements;
@@ -35,8 +36,6 @@ public class Downloader : Node
[TextVariable(1)]
public string Url { get; set; } = null!;
- private static HttpClient? client;
-
///
public override int Execute(NodeParameters args)
{
@@ -48,7 +47,7 @@ public class Downloader : Node
return -1;
}
- var result = Download(args.Logger!, url, args.TempPath, (percent) =>
+ var result = DownloadHelper.Download(args.Logger!, url, args.TempPath, (percent) =>
{
args.PartPercentageUpdate?.Invoke(percent);
});
@@ -64,196 +63,4 @@ public class Downloader : Node
return 1;
}
-
- ///
- /// Performs the download
- ///
- /// the logger to use
- /// the URL to download
- /// the destination path
- /// the percent update
- /// the name of the file if successful, otherwise an error
- public Result Download(ILogger logger, string url, string destinationPath, Action percentUpdate)
- {
- if (client == null)
- {
-
- var handler = new HttpClientHandler
- {
- AllowAutoRedirect = true,
- AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate
- };
- client = new HttpClient(handler);
- client.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36");
- }
-
-
- try
- {
- var tempFile = Path.Combine(destinationPath, Guid.NewGuid().ToString());
-
- using (var response = client.GetAsync(url, HttpCompletionOption.ResponseHeadersRead).Result)
- {
- if (!response.IsSuccessStatusCode)
- {
- return Result.Fail($"Failed to download URL: {url}. Status code: {response.StatusCode}");
- }
-
- var contentType = response.Content.Headers.ContentType?.MediaType;
- if(string.IsNullOrEmpty(contentType) == false)
- logger?.ILog("ContentType: " + contentType);
- var fileExtension = GetFileExtensionFromContentType(contentType);
-
- // Check if the URL response contains a filename
- if (response.Content.Headers.ContentDisposition?.FileName != null)
- {
- var sanitizedFileName = SanitizeFileName(response.Content.Headers.ContentDisposition.FileName.Trim('"'));
- tempFile = Path.Combine(destinationPath, sanitizedFileName);
- }
- else
- {
- if (fileExtension == null)
- {
- // Check for common file headers if the content type is not recognized
- var buffer = new byte[512];
- using (var contentStream = response.Content.ReadAsStreamAsync().Result)
- {
- contentStream.Read(buffer, 0, buffer.Length);
- fileExtension = GetFileExtensionFromHeader(buffer) ?? ".html";
- contentStream.Position = 0; // Reset stream position for reading again
- }
- }
-
- tempFile += fileExtension;
- }
-
- using (var contentStream = response.Content.ReadAsStreamAsync().Result)
- using (var fileStream = new FileStream(tempFile, FileMode.Create, FileAccess.Write, FileShare.None))
- {
- var totalBytes = response.Content.Headers.ContentLength ?? -1L;
- var totalRead = 0L;
- var buffer = new byte[8192];
- var isMoreToRead = true;
-
- while (isMoreToRead)
- {
- var read = contentStream.ReadAsync(buffer, 0, buffer.Length).Result;
- if (read == 0)
- {
- isMoreToRead = false;
- continue;
- }
-
- fileStream.WriteAsync(buffer, 0, read).Wait();
- totalRead += read;
-
- if (totalBytes != -1)
- {
- var progress = (float)totalRead / totalBytes;
- percentUpdate?.Invoke(progress);
- }
- }
- }
- }
-
- logger?.ILog($"Downloaded file saved to: {tempFile}");
- return tempFile;
- }
- catch (Exception ex)
- {
- return Result.Fail($"Exception during download: {ex.Message}");
- }
- }
-
-
- ///
- /// Gets the file extension from the content type.
- ///
- /// The content type.
- /// The corresponding file extension, or null if not recognized.
- private string? GetFileExtensionFromContentType(string? contentType)
- {
- switch (contentType)
- {
- case "text/html": return ".html";
- case "image/jpeg": return ".jpg";
- case "image/png": return ".png";
- case "image/gif": return ".gif";
- case "application/pdf": return ".pdf";
- case "application/zip": return ".zip";
- case "application/json": return ".json";
- case "text/plain": return ".txt";
- case "audio/mpeg": return ".mp3";
- case "video/mp4": return ".mp4";
- case "application/vnd.ms-excel": return ".xls";
- case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": return ".xlsx";
- case "application/msword": return ".doc";
- case "application/vnd.openxmlformats-officedocument.wordprocessingml.document": return ".docx";
- case "application/vnd.ms-powerpoint": return ".ppt";
- case "application/vnd.openxmlformats-officedocument.presentationml.presentation": return ".pptx";
- case "application/x-rar-compressed": return ".rar";
- case "application/x-tar": return ".tar";
- case "application/x-7z-compressed": return ".7z";
- // Add more content types and their corresponding file extensions as needed
- default: return null;
- }
- }
-
- ///
- /// Gets the file extension from the file header bytes.
- ///
- /// The first few bytes of the file to identify its type.
- /// The corresponding file extension, or null if not recognized.
- private string? GetFileExtensionFromHeader(byte[] fileHeader)
- {
- // Implement logic to identify file types based on header bytes
- // Example: Check for common file signatures
- if (fileHeader.Length >= 4)
- {
- // PDF file signature
- if (fileHeader[0] == 0x25 && fileHeader[1] == 0x50 && fileHeader[2] == 0x44 && fileHeader[3] == 0x46)
- {
- return ".pdf";
- }
-
- // ZIP file signature
- if (fileHeader[0] == 0x50 && fileHeader[1] == 0x4B &&
- (fileHeader[2] == 0x03 || fileHeader[2] == 0x05 || fileHeader[2] == 0x07) && fileHeader[3] == 0x08)
- {
- return ".zip";
- }
-
- // PNG file signature
- if (fileHeader[0] == 0x89 && fileHeader[1] == 0x50 && fileHeader[2] == 0x4E && fileHeader[3] == 0x47)
- {
- return ".png";
- }
-
- // JPEG file signature
- if (fileHeader[0] == 0xFF && fileHeader[1] == 0xD8 && fileHeader[fileHeader.Length - 2] == 0xFF &&
- fileHeader[fileHeader.Length - 1] == 0xD9)
- {
- return ".jpg";
- }
- }
-
- return null;
- }
-
-
- ///
- /// Sanitizes the filename to ensure it does not contain any path traversal characters or invalid characters.
- ///
- /// The filename to sanitize.
- /// The sanitized filename.
- private string SanitizeFileName(string fileName)
- {
- // Remove any path traversal characters
- fileName = Regex.Replace(fileName, @"\.\.\/|\\|\.\.\\|\/", string.Empty);
-
- // Only allow safe characters in the filename
- fileName = Regex.Replace(fileName, @"[^a-zA-Z0-9_\-\.]", "_");
-
- return fileName;
- }
}
\ No newline at end of file
diff --git a/Web/FlowElements/InputUrl.cs b/Web/FlowElements/InputUrl.cs
new file mode 100644
index 00000000..339c652c
--- /dev/null
+++ b/Web/FlowElements/InputUrl.cs
@@ -0,0 +1,51 @@
+using FileFlows.Web.Helpers;
+
+namespace FileFlows.Web.FlowElements;
+
+///
+/// Input for a URL
+///
+public class InputUrl : Node
+{
+ ///
+ public override int Outputs => 1;
+ ///
+ public override FlowElementType Type => FlowElementType.Input;
+ ///
+ public override string Icon => "fas fa-globe";
+ ///
+ public override string HelpUrl => "https://fileflows.com/docs/plugins/web/input-url";
+ ///
+ public override string Group => "Web";
+
+ ///
+ /// Gets or sets if this should download the URL
+ ///
+ [Boolean(1)]
+ public bool Download { get; set; }
+
+ ///
+ public override int Execute(NodeParameters args)
+ {
+ string url = args.WorkingFile;
+ args.Variables["Url"] = url;
+ if (Download == false)
+ return 1;
+
+ var result = DownloadHelper.Download(args.Logger!, url, args.TempPath, (percent) =>
+ {
+ args.PartPercentageUpdate?.Invoke(percent);
+ });
+
+ if(result.Failed(out var error))
+ {
+ args.FailureReason = error;
+ args.Logger?.ELog(error);
+ return -1;
+ }
+
+ args.SetWorkingFile(result.Value);
+
+ return 1;
+ }
+}
\ No newline at end of file
diff --git a/Web/FlowElements/Parsers/HtmlImageParser.cs b/Web/FlowElements/Parsers/HtmlImageParser.cs
index 072a6cc5..503200f3 100644
--- a/Web/FlowElements/Parsers/HtmlImageParser.cs
+++ b/Web/FlowElements/Parsers/HtmlImageParser.cs
@@ -1,5 +1,6 @@
using System.Net;
using System.Text.RegularExpressions;
+using HtmlAgilityPack;
namespace FileFlows.Web.FlowElements;
@@ -33,7 +34,10 @@ public class HtmlImageParser : HtmlParser
protected override string VariableName => "ImageUrls";
///
- protected override List ParseHtml(ILogger? logger, string html)
+ protected override List ParseHtml(NodeParameters args, string html)
+ => ParseHtmlForUrls(args, html, ["img"], ["src", "content"]);
+
+ private List ParseHtmlOld(ILogger? logger, string html)
{
var imageUrls = new List();
var regex = new Regex("
]+src=(\"([^\"]*)\"|'([^']*)'|([^\\s>]+))", RegexOptions.IgnoreCase);
diff --git a/Web/FlowElements/Parsers/HtmlLinkParser.cs b/Web/FlowElements/Parsers/HtmlLinkParser.cs
index 7e63d101..8211f7d8 100644
--- a/Web/FlowElements/Parsers/HtmlLinkParser.cs
+++ b/Web/FlowElements/Parsers/HtmlLinkParser.cs
@@ -33,18 +33,53 @@ public class HtmlLinkParser : HtmlParser
protected override string VariableName => "Links";
///
- protected override List ParseHtml(ILogger? logger, string html)
+ protected override List ParseHtml(NodeParameters args, string html)
+ => ParseHtmlForUrls(args, html, ["a"], ["href"]);
+
+ private List ParseHtmlOld(ILogger? logger, string html)
{
var urls = new List();
var regex = new Regex("]+href=(\"([^\"]*)\"|'([^']*)'|([^\\s>]+))", RegexOptions.IgnoreCase);
var matches = regex.Matches(html);
+ string? baseUrl = null;
+ if (Variables.TryGetValue("Url", out var oUrl) && oUrl is string sBaseUrl)
+ {
+ try
+ {
+ var uri = new Uri(sBaseUrl);
+
+ // Get the absolute path without the query parameters
+ baseUrl = uri.GetLeftPart(UriPartial.Path);
+
+ // Ensure the path ends with a slash
+ if (baseUrl.EndsWith("/") == false)
+ baseUrl += "/";
+
+ // Use the folderPath as needed
+ logger?.ILog("Base URL: " + baseUrl);
+ }
+ catch (Exception)
+ {
+ // Ignored
+ }
+ }
+
foreach (Match match in matches)
{
if (match.Groups.Count > 1)
{
var url = match.Groups[1].Value.TrimStart('"', '\'').TrimEnd('"', '\'');
- urls.Add(WebUtility.HtmlDecode(url));
+ url = WebUtility.HtmlDecode(url);
+ if (baseUrl != null && Regex.IsMatch(url, "^http(s)://", RegexOptions.IgnoreCase) == false)
+ {
+ logger?.ILog("Relative URL: " + url);
+ if (url.StartsWith("/"))
+ url = url[1..];
+ url = baseUrl + url;
+ logger?.ILog("Absolute URL: " + url);
+ }
+ urls.Add(url);
}
}
diff --git a/Web/FlowElements/Parsers/HtmlParser.cs b/Web/FlowElements/Parsers/HtmlParser.cs
index 8163fe68..f502328d 100644
--- a/Web/FlowElements/Parsers/HtmlParser.cs
+++ b/Web/FlowElements/Parsers/HtmlParser.cs
@@ -1,5 +1,6 @@
using System.Net;
using System.Text.RegularExpressions;
+using HtmlAgilityPack;
namespace FileFlows.Web.FlowElements;
@@ -50,7 +51,7 @@ public abstract class HtmlParser : Node
var html = result.Value;
- var list = ParseHtml(args.Logger, html);
+ var list = ParseHtml(args, html);
var pattern = args.ReplaceVariables(Pattern ?? string.Empty, stripMissing: true);
if (string.IsNullOrWhiteSpace(pattern) == false)
@@ -87,7 +88,8 @@ public abstract class HtmlParser : Node
args.Logger?.ILog("Found item: " + item);
}
- args.Variables[VariableName] = list;
+ if(string.IsNullOrWhiteSpace(VariableName) == false)
+ args.Variables[VariableName] = list;
// current list is the default current list FileFLows will use in a list flow element if no list is specified
args.Variables["CurrentList"] = list;
@@ -97,10 +99,10 @@ public abstract class HtmlParser : Node
///
/// Parses the HTML
///
- /// the logger to use
+ /// the node parameters
/// the HTML to parse
/// the items found while pasrsing
- protected abstract List ParseHtml(ILogger? logger, string html);
+ protected abstract List ParseHtml(NodeParameters args, string html);
///
/// Gets the file content
@@ -138,4 +140,60 @@ public abstract class HtmlParser : Node
return File.ReadAllText(localFileResult.Value);
}
+
+
+ ///
+ /// Parses the HTML for the specified tags and attributes
+ ///
+ /// the node parameters
+ /// the HTML to parse
+ /// the HTML tags to look for
+ /// the attributes to look for
+ /// a list of matching URLs
+ protected List ParseHtmlForUrls(NodeParameters args, string html, string[] tags, string[] attributes)
+ {
+ var htmlDoc = new HtmlDocument();
+ htmlDoc.LoadHtml(html);
+
+ Uri? baseUri = null;
+ if (args.Variables.TryGetValue("Url", out var oUrl) && oUrl is string sBaseUrl)
+ {
+ baseUri = new Uri(sBaseUrl);
+ args.Logger?.ILog("Base URL: " + baseUri);
+ }
+
+ List results = new();
+
+
+ foreach (var tag in tags)
+ {
+ var nodes = htmlDoc.DocumentNode.SelectNodes($"//{tag}");
+ if (nodes == null) continue;
+
+ foreach (var ele in nodes)
+ {
+ foreach (var att in attributes)
+ {
+ var srcValue = ele.GetAttributeValue(att, string.Empty);
+ if (!string.IsNullOrEmpty(srcValue))
+ {
+ if (srcValue.StartsWith("http", StringComparison.OrdinalIgnoreCase))
+ {
+ results.Add(srcValue);
+ }
+ else if (baseUri != null)
+ {
+ if (Uri.TryCreate(srcValue, UriKind.Relative, out var relativeSrcUri))
+ {
+ var absoluteSrcUri = new Uri(baseUri, relativeSrcUri);
+ results.Add(absoluteSrcUri.ToString());
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return results;
+ }
}
\ No newline at end of file
diff --git a/Web/FlowElements/UrlToPath.cs b/Web/FlowElements/UrlToPath.cs
deleted file mode 100644
index 3ac8bae0..00000000
--- a/Web/FlowElements/UrlToPath.cs
+++ /dev/null
@@ -1,39 +0,0 @@
-namespace FileFlows.Web.FlowElements;
-
-///
-///
-///
-public class UrlToRelativePath : Node
-{
- public override int Inputs => 1;
- public override int Outputs => 2;
- public override string Icon =>
-
- ///
- /// Gets or sets the URL to get a path for
- ///
- [TextVariable(1)]
- public string Url { get; set; } = null!;
-
- public override int Execute(NodeParameters args)
- {
-
- // Create a Uri object from the URL
- var uri = new Uri(x);
-
- // Get the path without the query
- var path = uri.AbsolutePath;
-
- // Get the query part and replace the '=' and '&' with '-'
- var query = uri.Query.TrimStart('?').Replace('=', '-').Replace('&', '/');
-
- // Combine the path and the modified query
- var fakePath = path.TrimEnd('/') + (string.IsNullOrEmpty(query) ? string.Empty : "/" + query);
-
- // Remove leading slash
- if (fakePath.StartsWith("/"))
- {
- fakePath = fakePath.Substring(1);
- }
- }
-}
\ No newline at end of file
diff --git a/Web/FlowElements/WebRequest.cs b/Web/FlowElements/WebRequest.cs
index 72172ce4..8d7bd177 100644
--- a/Web/FlowElements/WebRequest.cs
+++ b/Web/FlowElements/WebRequest.cs
@@ -1,4 +1,4 @@
-namespace FileFlows.Web;
+namespace FileFlows.Web.FlowElements;
using FileFlows.Plugin;
using FileFlows.Plugin.Attributes;
@@ -22,6 +22,8 @@ public class WebRequest : Node
public override string Icon => "fas fa-globe";
///
public override string HelpUrl => "https://fileflows.com/docs/plugins/web/web-request";
+ ///
+ public override string Group => "Web";
///
/// Gets or sets the URL
diff --git a/Web/Helpers/DownloadHelper.cs b/Web/Helpers/DownloadHelper.cs
new file mode 100644
index 00000000..7e0e5884
--- /dev/null
+++ b/Web/Helpers/DownloadHelper.cs
@@ -0,0 +1,207 @@
+using System.Net;
+using System.Text.RegularExpressions;
+
+namespace FileFlows.Web.Helpers;
+
+///
+/// Helper to do a download
+///
+public static class DownloadHelper
+{
+ ///
+ /// The HttpClient
+ ///
+ private static HttpClient? client;
+
+ ///
+ /// Performs the download
+ ///
+ /// the logger to use
+ /// the URL to download
+ /// the destination path
+ /// the percent update
+ /// the name of the file if successful, otherwise an error
+ public static Result Download(ILogger logger, string url, string destinationPath, Action percentUpdate)
+ {
+ if (client == null)
+ {
+
+ var handler = new HttpClientHandler
+ {
+ AllowAutoRedirect = true,
+ AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate
+ };
+ client = new HttpClient(handler);
+ client.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36");
+ }
+
+
+ try
+ {
+ var tempFile = Path.Combine(destinationPath, Guid.NewGuid().ToString());
+
+ using (var response = client.GetAsync(url, HttpCompletionOption.ResponseHeadersRead).Result)
+ {
+ if (!response.IsSuccessStatusCode)
+ {
+ return Result.Fail($"Failed to download URL: {url}. Status code: {response.StatusCode}");
+ }
+
+ var contentType = response.Content.Headers.ContentType?.MediaType;
+ if(string.IsNullOrEmpty(contentType) == false)
+ logger?.ILog("ContentType: " + contentType);
+ var fileExtension = GetFileExtensionFromContentType(contentType);
+
+ // Check if the URL response contains a filename
+ if (response.Content.Headers.ContentDisposition?.FileName != null)
+ {
+ var sanitizedFileName = SanitizeFileName(response.Content.Headers.ContentDisposition.FileName.Trim('"'));
+ tempFile = Path.Combine(destinationPath, sanitizedFileName);
+ }
+ else
+ {
+ if (fileExtension == null)
+ {
+ // Check for common file headers if the content type is not recognized
+ var buffer = new byte[512];
+ using (var contentStream = response.Content.ReadAsStreamAsync().Result)
+ {
+ contentStream.Read(buffer, 0, buffer.Length);
+ fileExtension = GetFileExtensionFromHeader(buffer) ?? ".html";
+ contentStream.Position = 0; // Reset stream position for reading again
+ }
+ }
+
+ tempFile += fileExtension;
+ }
+
+ using (var contentStream = response.Content.ReadAsStreamAsync().Result)
+ using (var fileStream = new FileStream(tempFile, FileMode.Create, FileAccess.Write, FileShare.None))
+ {
+ var totalBytes = response.Content.Headers.ContentLength ?? -1L;
+ var totalRead = 0L;
+ var buffer = new byte[8192];
+ var isMoreToRead = true;
+
+ while (isMoreToRead)
+ {
+ var read = contentStream.ReadAsync(buffer, 0, buffer.Length).Result;
+ if (read == 0)
+ {
+ isMoreToRead = false;
+ continue;
+ }
+
+ fileStream.WriteAsync(buffer, 0, read).Wait();
+ totalRead += read;
+
+ if (totalBytes != -1)
+ {
+ var progress = (float)totalRead / totalBytes;
+ percentUpdate?.Invoke(progress);
+ }
+ }
+ }
+ }
+
+ logger?.ILog($"Downloaded file saved to: {tempFile}");
+ return tempFile;
+ }
+ catch (Exception ex)
+ {
+ return Result.Fail($"Exception during download: {ex.Message}");
+ }
+ }
+
+
+ ///
+ /// Gets the file extension from the content type.
+ ///
+ /// The content type.
+ /// The corresponding file extension, or null if not recognized.
+ private static string? GetFileExtensionFromContentType(string? contentType)
+ {
+ switch (contentType)
+ {
+ case "text/html": return ".html";
+ case "image/jpeg": return ".jpg";
+ case "image/png": return ".png";
+ case "image/gif": return ".gif";
+ case "application/pdf": return ".pdf";
+ case "application/zip": return ".zip";
+ case "application/json": return ".json";
+ case "text/plain": return ".txt";
+ case "audio/mpeg": return ".mp3";
+ case "video/mp4": return ".mp4";
+ case "application/vnd.ms-excel": return ".xls";
+ case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": return ".xlsx";
+ case "application/msword": return ".doc";
+ case "application/vnd.openxmlformats-officedocument.wordprocessingml.document": return ".docx";
+ case "application/vnd.ms-powerpoint": return ".ppt";
+ case "application/vnd.openxmlformats-officedocument.presentationml.presentation": return ".pptx";
+ case "application/x-rar-compressed": return ".rar";
+ case "application/x-tar": return ".tar";
+ case "application/x-7z-compressed": return ".7z";
+ // Add more content types and their corresponding file extensions as needed
+ default: return null;
+ }
+ }
+
+ ///
+ /// Gets the file extension from the file header bytes.
+ ///
+ /// The first few bytes of the file to identify its type.
+ /// The corresponding file extension, or null if not recognized.
+ private static string? GetFileExtensionFromHeader(byte[] fileHeader)
+ {
+ // Implement logic to identify file types based on header bytes
+ // Example: Check for common file signatures
+ if (fileHeader.Length >= 4)
+ {
+ // PDF file signature
+ if (fileHeader[0] == 0x25 && fileHeader[1] == 0x50 && fileHeader[2] == 0x44 && fileHeader[3] == 0x46)
+ {
+ return ".pdf";
+ }
+
+ // ZIP file signature
+ if (fileHeader[0] == 0x50 && fileHeader[1] == 0x4B &&
+ (fileHeader[2] == 0x03 || fileHeader[2] == 0x05 || fileHeader[2] == 0x07) && fileHeader[3] == 0x08)
+ {
+ return ".zip";
+ }
+
+ // PNG file signature
+ if (fileHeader[0] == 0x89 && fileHeader[1] == 0x50 && fileHeader[2] == 0x4E && fileHeader[3] == 0x47)
+ {
+ return ".png";
+ }
+
+ // JPEG file signature
+ if (fileHeader[0] == 0xFF && fileHeader[1] == 0xD8 && fileHeader[fileHeader.Length - 2] == 0xFF &&
+ fileHeader[fileHeader.Length - 1] == 0xD9)
+ {
+ return ".jpg";
+ }
+ }
+
+ return null;
+ }
+
+
+ ///
+ /// Sanitizes the filename to ensure it does not contain any path traversal characters or invalid characters.
+ ///
+ /// The filename to sanitize.
+ /// The sanitized filename.
+ private static string SanitizeFileName(string fileName)
+ {
+ // Remove any path traversal characters
+ fileName = Regex.Replace(fileName, @"\.\.\/|\\|\.\.\\|\/", string.Empty);
+
+ // Only allow safe characters in the filename
+ fileName = Regex.Replace(fileName, @"[^a-zA-Z0-9_\-\.]", "_");
+
+ return fileName;
+ }
+}
\ No newline at end of file
diff --git a/Web/Helpers/HtmlHelper.cs b/Web/Helpers/HtmlHelper.cs
new file mode 100644
index 00000000..d37a54e7
--- /dev/null
+++ b/Web/Helpers/HtmlHelper.cs
@@ -0,0 +1,57 @@
+using HtmlAgilityPack;
+
+namespace FileFlows.Web.Helpers;
+
+///
+/// HTML Helper
+///
+public class HtmlHelper
+{
+
+ ///
+ /// Converts all relative URLs in the provided HTML content to absolute URLs based on the given base URL.
+ /// This method processes the href attribute of tags and the src and content attributes of
tags.
+ ///
+ /// The HTML content containing relative URLs.
+ /// The base URL to convert relative URLs to absolute URLs.
+ /// The HTML content with all relative URLs converted to absolute URLs.
+ /// Thrown when the base URL is not in a valid format.
+ public static string ConvertRelativeUrlsToAbsolute(string htmlContent, string baseUrl)
+ {
+ var htmlDoc = new HtmlDocument();
+ htmlDoc.LoadHtml(htmlContent);
+
+ var uri = new Uri(baseUrl);
+
+ // Convert relative URLs in tags (href attribute)
+ foreach (var link in htmlDoc.DocumentNode.SelectNodes("//a[@href]"))
+ {
+ var hrefValue = link.GetAttributeValue("href", string.Empty);
+ if (Uri.TryCreate(hrefValue, UriKind.Relative, out var relativeUri))
+ {
+ var absoluteUri = new Uri(uri, relativeUri);
+ link.SetAttributeValue("href", absoluteUri.ToString());
+ }
+ }
+
+ // Convert relative URLs in
tags (src and content attributes)
+ foreach (var img in htmlDoc.DocumentNode.SelectNodes("//img[@src or @content]"))
+ {
+ var srcValue = img.GetAttributeValue("src", string.Empty);
+ if (Uri.TryCreate(srcValue, UriKind.Relative, out var relativeSrcUri))
+ {
+ var absoluteSrcUri = new Uri(uri, relativeSrcUri);
+ img.SetAttributeValue("src", absoluteSrcUri.ToString());
+ }
+
+ var contentValue = img.GetAttributeValue("content", string.Empty);
+ if (Uri.TryCreate(contentValue, UriKind.Relative, out var relativeContentUri))
+ {
+ var absoluteContentUri = new Uri(uri, relativeContentUri);
+ img.SetAttributeValue("content", absoluteContentUri.ToString());
+ }
+ }
+
+ return htmlDoc.DocumentNode.OuterHtml;
+ }
+}
\ No newline at end of file
diff --git a/Web/Web.csproj b/Web/Web.csproj
index f4ee527b..8a7d6322 100644
--- a/Web/Web.csproj
+++ b/Web/Web.csproj
@@ -33,5 +33,8 @@
..\FileFlows.Plugin.dll
+
+
+
diff --git a/Web/i18n/en.json b/Web/i18n/en.json
index 35c462a9..1a60fbaa 100644
--- a/Web/i18n/en.json
+++ b/Web/i18n/en.json
@@ -40,6 +40,9 @@
"Pattern-Help": "An optional case insensitive regular expression to match the results against, only results matching this will be accepted."
}
},
+ "InputUrl": {
+ "Label": "Input URL"
+ },
"WebRequest": {
"Description": "Allows you to send a web request",
"Outputs": {