using System.Net; using System.Text.RegularExpressions; using FileFlows.Plugin.Helpers; namespace FileFlows.Web.Helpers; /// /// Helper to do a download /// public static class DownloadHelper { /// /// The HttpClient /// private static HttpClient? client; /// /// Performs the download /// /// the logger to use /// the URL to download /// the destination path /// the percent update /// the name of the file if successful, otherwise an error public static Result Download(ILogger logger, string url, string destinationPath, Action percentUpdate) { if (client == null) { var handler = new HttpClientHandler { AllowAutoRedirect = true, AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate }; client = new HttpClient(handler); client.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"); } try { logger.ILog("Downloading: " + url); string filename = GetFilenameFromUrl(logger, url)?.EmptyAsNull() ?? Guid.NewGuid().ToString(); logger.ILog("Filename: " + filename); var tempFile = Path.Combine(destinationPath, filename); logger.ILog("Temp File: " + tempFile); using (var response = client.GetAsync(url, HttpCompletionOption.ResponseHeadersRead).Result) { if (!response.IsSuccessStatusCode) { return Result.Fail($"Failed to download URL: {url}. Status code: {response.StatusCode}"); } var contentType = response.Content.Headers.ContentType?.MediaType; if(string.IsNullOrEmpty(contentType) == false) logger?.ILog("ContentType: " + contentType); var fileExtension = GetFileExtensionFromContentType(contentType); // Check if the URL response contains a filename if (response.Content.Headers.ContentDisposition?.FileName != null) { var sanitizedFileName = SanitizeFileName(response.Content.Headers.ContentDisposition.FileName.Trim('"')); tempFile = Path.Combine(destinationPath, sanitizedFileName); } else { if (string.IsNullOrWhiteSpace(fileExtension) == false) { if(string.IsNullOrWhiteSpace(FileHelper.GetExtension(tempFile)) == false) tempFile = FileHelper.ChangeExtension(tempFile, fileExtension); else tempFile += fileExtension; } } using (var contentStream = response.Content.ReadAsStreamAsync().Result) using (var fileStream = new FileStream(tempFile, FileMode.Create, FileAccess.Write, FileShare.None)) { var totalBytes = response.Content.Headers.ContentLength ?? -1L; var totalRead = 0L; var buffer = new byte[8192]; var isMoreToRead = true; while (isMoreToRead) { var read = contentStream.ReadAsync(buffer, 0, buffer.Length).Result; if (read == 0) { isMoreToRead = false; continue; } fileStream.WriteAsync(buffer, 0, read).Wait(); totalRead += read; if (totalBytes != -1) { var progress = (float)totalRead / totalBytes; percentUpdate?.Invoke(progress); } } } } logger?.ILog($"Downloaded file saved to: {tempFile}"); return tempFile; } catch (Exception ex) { return Result.Fail($"Exception during download: {ex.Message}{Environment.NewLine}{ex.StackTrace}"); } } /// /// Extracts the filename from a URL, excluding any query parameters. /// /// the logger to use /// The URL from which to extract the filename. /// The filename if present; otherwise, an empty string. public static string GetFilenameFromUrl(ILogger logger, string url) { try { Uri uri = new Uri(url); string path = uri.AbsolutePath; string filename = Path.GetFileName(path); // If the filename contains a '.', it's likely a file, otherwise, it's a directory if (filename.Contains('.')) { return filename; } } catch (Exception ex) { // Handle any errors logger.WLog($"Error parsing URL: {ex.Message}"); } return string.Empty; } /// /// Gets the file extension from the content type. /// /// The content type. /// The corresponding file extension, or null if not recognized. private static string? GetFileExtensionFromContentType(string? contentType) { switch (contentType) { case "text/html": return ".html"; case "image/jpeg": return ".jpg"; case "image/png": return ".png"; case "image/gif": return ".gif"; case "image/webp": return ".webp"; case "application/pdf": return ".pdf"; case "application/zip": return ".zip"; case "application/json": return ".json"; case "text/plain": return ".txt"; case "audio/mpeg": return ".mp3"; case "video/mp4": return ".mp4"; case "application/vnd.ms-excel": return ".xls"; case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": return ".xlsx"; case "application/msword": return ".doc"; case "application/vnd.openxmlformats-officedocument.wordprocessingml.document": return ".docx"; case "application/vnd.ms-powerpoint": return ".ppt"; case "application/vnd.openxmlformats-officedocument.presentationml.presentation": return ".pptx"; case "application/x-rar-compressed": return ".rar"; case "application/x-tar": return ".tar"; case "application/x-7z-compressed": return ".7z"; // Add more content types and their corresponding file extensions as needed default: return null; } } /// /// Gets the file extension from the file header bytes. /// /// The first few bytes of the file to identify its type. /// The corresponding file extension, or null if not recognized. private static string? GetFileExtensionFromHeader(byte[] fileHeader) { // Implement logic to identify file types based on header bytes // Example: Check for common file signatures if (fileHeader.Length >= 4) { // PDF file signature if (fileHeader[0] == 0x25 && fileHeader[1] == 0x50 && fileHeader[2] == 0x44 && fileHeader[3] == 0x46) { return ".pdf"; } // ZIP file signature if (fileHeader[0] == 0x50 && fileHeader[1] == 0x4B && (fileHeader[2] == 0x03 || fileHeader[2] == 0x05 || fileHeader[2] == 0x07) && fileHeader[3] == 0x08) { return ".zip"; } // PNG file signature if (fileHeader[0] == 0x89 && fileHeader[1] == 0x50 && fileHeader[2] == 0x4E && fileHeader[3] == 0x47) { return ".png"; } // JPEG file signature if (fileHeader[0] == 0xFF && fileHeader[1] == 0xD8 && fileHeader[fileHeader.Length - 2] == 0xFF && fileHeader[fileHeader.Length - 1] == 0xD9) { return ".jpg"; } } return null; } /// /// Sanitizes the filename to ensure it does not contain any path traversal characters or invalid characters. /// /// The filename to sanitize. /// The sanitized filename. private static string SanitizeFileName(string fileName) { // Remove any path traversal characters fileName = Regex.Replace(fileName, @"\.\.\/|\\|\.\.\\|\/", string.Empty); // Only allow safe characters in the filename fileName = Regex.Replace(fileName, @"[^a-zA-Z0-9_\-\.]", "_"); return fileName; } }