From 353f3cf45df13275d76b0b12fcabdccad622f040 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sat, 19 Apr 2025 09:59:32 +0200 Subject: [PATCH] Sparse file optimizations --- libs/lume/scripts/ghcr/push-ghcr.sh | 389 +++++- .../ImageContainerRegistry.swift | 1238 ++++++++++++++--- libs/lume/src/Errors/Errors.swift | 68 +- 3 files changed, 1445 insertions(+), 250 deletions(-) diff --git a/libs/lume/scripts/ghcr/push-ghcr.sh b/libs/lume/scripts/ghcr/push-ghcr.sh index fbe4ab7e..33874122 100755 --- a/libs/lume/scripts/ghcr/push-ghcr.sh +++ b/libs/lume/scripts/ghcr/push-ghcr.sh @@ -9,6 +9,8 @@ folder_path="" image_name="" image_versions="" chunk_size="500M" # Default chunk size for splitting large files +# Define the OCI media type for the compressed disk layer +oci_layer_media_type="application/octet-stream+lzfse" # Apple Archive format # Parse the command line arguments while [[ $# -gt 0 ]]; do @@ -41,6 +43,7 @@ while [[ $# -gt 0 ]]; do echo " --image-name : Name of the image to publish (required)" echo " --image-versions : Comma separated list of versions of the image to publish (required)" echo " --chunk-size : Size of chunks for large files (e.g., 500M, default: 500M)" + echo "Note: The script will automatically resume from the last attempt if available" exit 0 ;; *) @@ -69,7 +72,7 @@ if [[ ! -d "$folder_path" ]]; then fi # Check and install required tools -for tool in "oras" "split" "pv" "gzip"; do +for tool in "oras" "split" "pv" "jq"; do if ! command -v "$tool" &> /dev/null; then echo "$tool is not installed. Installing using Homebrew..." if ! command -v brew &> /dev/null; then @@ -80,80 +83,252 @@ for tool in "oras" "split" "pv" "gzip"; do fi done +# Check if Apple Archive is available +if ! command -v compression_tool &> /dev/null; then + echo "Error: Apple Archive (compression_tool) is required but not found" + echo "This script requires macOS with Apple Archive support" + exit 1 +fi + +echo "Apple Archive detected - will use for optimal sparse file handling" +compressed_ext=".aa" + # Authenticate with GitHub Container Registry echo "$GITHUB_TOKEN" | oras login ghcr.io -u "$organization" --password-stdin # Use the source folder path as the working directory and get its absolute path work_dir=$(cd "$folder_path" && pwd) -echo "Working directory (persistent cache): $work_dir" +echo "Working directory: $work_dir" -# Change to the working directory -cd "$work_dir" +# Function to find the most recent cache directory +find_latest_cache() { + local latest_cache=$(ls -td "$work_dir"/.ghcr_cache_* 2>/dev/null | head -n1) + if [ -n "$latest_cache" ]; then + echo "$latest_cache" + else + echo "" + fi +} + +# Function to check if a cache directory is valid for resuming +is_valid_cache() { + local cache_dir="$1" + # Check if it contains the necessary files + [ -f "$cache_dir/config.json" ] || [ -f "$cache_dir/nvram.bin" ] || \ + [ -f "$cache_dir/disk.img.aa" ] || ls "$cache_dir"/disk.img.aa.part.* 1>/dev/null 2>&1 +} + +# Always try to find and use an existing cache +existing_cache=$(find_latest_cache) +if [ -n "$existing_cache" ] && is_valid_cache "$existing_cache"; then + cache_dir="$existing_cache" + + # Check if the cache contains old gzip format + if [ -f "$cache_dir/disk.img.gz" ] || ls "$cache_dir"/disk.img.gz.part.* 1>/dev/null 2>&1; then + echo "Error: Found legacy gzip format in cache. This script only supports Apple Archive format." + echo "Please delete the cache directory and start fresh: $cache_dir" + exit 1 + fi + + echo "Resuming from existing cache: $cache_dir" +else + echo "No valid cache found. Starting fresh." + cache_dir="$work_dir/.ghcr_cache_$(date +%Y%m%d_%H%M%S)" + mkdir -p "$cache_dir" +fi + +echo "Using cache directory: $cache_dir" + +# Display space information +echo "=== DISK SPACE INFORMATION ===" +df -h "$cache_dir" | head -1 +df -h "$cache_dir" | grep -v "Filesystem" +echo + +# Change to the cache directory +cd "$cache_dir" files=() # Initialize files array here -# Copy config.json if it exists -if [ -f "$folder_path/config.json" ]; then - echo "Copying config.json..." - cp "$folder_path/config.json" config.json +# Function to check if a version was already pushed +version_pushed() { + local version="$1" + local version_file="$cache_dir/.pushed_$version" + [ -f "$version_file" ] +} + +# Function to mark a version as pushed +mark_version_pushed() { + local version="$1" + touch "$cache_dir/.pushed_$version" +} + +# Copy config.json if it exists and not already in cache +config_json_source="$folder_path/config.json" +config_json_dest="$cache_dir/config.json" +if [ -f "$config_json_source" ]; then + if [ ! -f "$config_json_dest" ]; then + echo "Copying config.json..." + # Add the uncompressed disk size annotation if disk.img exists and jq is available + if [ -n "$original_disk_size" ] && command -v jq &> /dev/null; then + echo "Adding uncompressed disk size annotation: $original_disk_size bytes" + jq --arg size "$original_disk_size" '.annotations += {"com.trycua.lume.disk.uncompressed_size": $size}' "$config_json_source" > "$config_json_dest" || \ + (echo "jq failed, copying original config.json"; cp "$config_json_source" "$config_json_dest") # Fallback to copy if jq fails + else + cp "$config_json_source" "$config_json_dest" + fi + fi +fi +if [ -f "$config_json_dest" ]; then files+=("config.json:application/vnd.oci.image.config.v1+json") fi -# Copy nvram.bin if it exists -nvram_bin="$folder_path/nvram.bin" -if [ -f "$nvram_bin" ]; then +# Copy nvram.bin if it exists and not already in cache +if [ -f "$folder_path/nvram.bin" ] && [ ! -f "$cache_dir/nvram.bin" ]; then echo "Copying nvram.bin..." - cp "$nvram_bin" nvram.bin + cp "$folder_path/nvram.bin" nvram.bin +fi +if [ -f "$cache_dir/nvram.bin" ]; then files+=("nvram.bin:application/octet-stream") fi # Process disk.img if it exists -disk_img_orig="disk.img" # Already in work_dir +disk_img_orig="$folder_path/disk.img" +original_disk_size="" if [ -f "$disk_img_orig" ]; then - # --- Compression Step --- - echo "Compressing $disk_img_orig..." - compressed_ext=".gz" - compressor="gzip" - compress_opts="-k -f" + # Get original size *before* compression + original_disk_size=$(stat -f%z "$disk_img_orig") + + # Get real (non-sparse) size + real_size=$(du -k "$disk_img_orig" | cut -f1) + real_size_bytes=$((real_size * 1024)) + sparseness_ratio=$(echo "scale=2; $original_disk_size / $real_size_bytes" | bc) + echo "Disk image: $disk_img_orig" + echo " Logical size: $original_disk_size bytes ($(du -h "$disk_img_orig" | cut -f1))" + echo " Actual disk usage: $((real_size_bytes / 1073741824)) GB" + echo " Sparseness ratio: ${sparseness_ratio}:1" + + # Check if we already have compressed files in the cache compressed_disk_img="disk.img${compressed_ext}" - pv "$disk_img_orig" | $compressor $compress_opts > "$compressed_disk_img" - compressed_size=$(stat -f%z "$compressed_disk_img") - echo "Compressed disk image size: $(du -h "$compressed_disk_img" | cut -f1)" - # --- End Compression Step --- - - # Check if splitting is needed based on *compressed* size - if [ $compressed_size -gt 524288000 ]; then # 500MB threshold - echo "Splitting compressed file: $compressed_disk_img" - split -b "$chunk_size" "$compressed_disk_img" "$compressed_disk_img.part." - # Keep the compressed file and parts in work_dir - - # --- Adjust part processing --- - parts_files=() - total_parts=$(ls "$compressed_disk_img.part."* | wc -l | tr -d ' ') - part_num=0 - for part in "$compressed_disk_img.part."*; do - part_num=$((part_num + 1)) - # *** IMPORTANT: Use the *compressed* OCI media type with part info *** - parts_files+=("$part:${oci_layer_media_type};part.number=$part_num;part.total=$total_parts") - echo "Part $part: $(du -h "$part" | cut -f1)" - done - # Combine non-disk files with disk parts - files+=("${parts_files[@]}") - # --- End Adjust part processing --- - - else - # Add the single compressed file to the list - # *** IMPORTANT: Use the *compressed* OCI media type *** - files+=("$compressed_disk_img:${oci_layer_media_type}") + already_compressed=false + + if [ -f "$cache_dir/$compressed_disk_img" ]; then + already_compressed=true + echo "Using existing compressed file from cache: $compressed_disk_img" + elif ls "$cache_dir"/disk.img${compressed_ext}.part.* 1>/dev/null 2>&1; then + already_compressed=true + echo "Using existing compressed parts from cache" fi - # --- Push Logic (Remains largely the same, but $files now contains compressed parts/file) --- + # Only compress if not already compressed in cache + if [ "$already_compressed" = false ]; then + # Check for free disk space before compression + avail_space=$(df -k "$cache_dir" | tail -1 | awk '{print $4}') + avail_space_bytes=$((avail_space * 1024)) + # Assume compressed size is roughly 30% of real size as a safe estimate + estimated_compressed=$((real_size_bytes * 30 / 100)) + + if [ "$avail_space_bytes" -lt "$estimated_compressed" ]; then + echo "WARNING: Possibly insufficient disk space for compression!" + echo "Available: $((avail_space_bytes / 1073741824)) GB, Estimated required: $((estimated_compressed / 1073741824)) GB" + read -p "Continue anyway? (y/n) " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + echo "Exiting. Free up some space and try again." + exit 1 + fi + fi + + # --- Compression Step --- + echo "Compressing $disk_img_orig with Apple Archive..." + + # Apple Archive compression + echo "Starting compression with Apple Archive (showing output file growth)..." + compression_tool -encode -i "$disk_img_orig" -o "$compressed_disk_img" -a lzfse & + COMP_PID=$! + + sleep 1 # Give compression a moment to start + + # Display progress based on output file growth + while kill -0 $COMP_PID 2>/dev/null; do + if [ -f "$compressed_disk_img" ]; then + current_size=$(stat -f%z "$compressed_disk_img" 2>/dev/null || echo 0) + percent=$(echo "scale=2; 100 * $current_size / $original_disk_size" | bc) + echo -ne "Progress: $percent% ($(du -h "$compressed_disk_img" 2>/dev/null | cut -f1 || echo "0"))\r" + else + echo -ne "Preparing compression...\r" + fi + sleep 2 + done + + wait $COMP_PID + echo -e "\nCompression complete!" + + compressed_size=$(stat -f%z "$compressed_disk_img") + echo "Compressed disk image size: $(du -h "$compressed_disk_img" | cut -f1)" + echo "Compression ratio: $(echo "scale=2; $compressed_size * 100 / $original_disk_size" | bc)%" + # --- End Compression Step --- + + # Check if splitting is needed based on *compressed* size + if [ $compressed_size -gt 524288000 ]; then # 500MB threshold + echo "Splitting compressed file into chunks of $chunk_size..." + pv "$compressed_disk_img" | split -b "$chunk_size" - "$compressed_disk_img.part." + rm -f "$compressed_disk_img" # Remove the unsplit compressed file + # Verify that parts were created + echo "Verifying split parts..." + ls -la "$cache_dir"/disk.img${compressed_ext}.part.* + fi + else + echo "Using existing compressed/split files from cache" + fi + + # --- Adjust part processing --- + echo "Looking for compressed files in $cache_dir..." + + # List all files in the cache directory for debugging + ls -la "$cache_dir" + + if [ -f "$cache_dir/$compressed_disk_img" ]; then + echo "Found single compressed file: $compressed_disk_img" + # Add the single compressed file to the list + files+=("$compressed_disk_img:${oci_layer_media_type}") + else + # Look for split parts + part_files=($(ls "$cache_dir"/disk.img${compressed_ext}.part.* 2>/dev/null || echo "")) + if [ ${#part_files[@]} -gt 0 ]; then + echo "Found ${#part_files[@]} split parts" + parts_files=() + part_num=0 + + for part in "${part_files[@]}"; do + part_num=$((part_num + 1)) + part_basename=$(basename "$part") + parts_files+=("$part_basename:${oci_layer_media_type};part.number=$part_num;part.total=${#part_files[@]}") + echo "Part $part_num: $(du -h "$part" | cut -f1)" + done + + files+=("${parts_files[@]}") + else + echo "ERROR: No compressed files found in cache directory: $cache_dir" + echo "Contents of cache directory:" + find "$cache_dir" -type f | sort + exit 1 + fi + fi + + # --- Push Logic --- push_pids=() IFS=',' read -ra versions <<< "$image_versions" for version in "${versions[@]}"; do - # Trim whitespace if any from version splitting + # Trim whitespace if any from version splitting version=$(echo "$version" | xargs) if [[ -z "$version" ]]; then continue; fi + # Skip if version was already pushed + if version_pushed "$version"; then + echo "Version $version was already pushed, skipping..." + continue + fi + echo "Pushing version $version..." ( # Use process substitution to feed file list safely if it gets long @@ -161,6 +336,7 @@ if [ -f "$disk_img_orig" ]; then "ghcr.io/$organization/$image_name:$version" \ "${files[@]}" echo "Completed push for version $version" + mark_version_pushed "$version" ) & push_pids+=($!) done @@ -170,37 +346,108 @@ if [ -f "$disk_img_orig" ]; then wait "$pid" done - # --- Cleanup compressed files after successful push --- - echo "Push successful, cleaning up compressed artifacts..." - # Check if parts exist first - parts_exist=$(ls "$compressed_disk_img.part."* 2>/dev/null) - if [ -n "$parts_exist" ]; then - echo "Removing split parts: $compressed_disk_img.part.* and $compressed_disk_img" - rm -f "$compressed_disk_img.part."* - # Also remove the original compressed file that was split - rm -f "$compressed_disk_img" - elif [ -f "$compressed_disk_img" ]; then - echo "Removing compressed file: $compressed_disk_img" - rm -f "$compressed_disk_img" + # --- Cleanup only if all versions were pushed successfully --- + all_versions_pushed=true + for version in "${versions[@]}"; do + version=$(echo "$version" | xargs) + if [[ -z "$version" ]]; then continue; fi + if ! version_pushed "$version"; then + all_versions_pushed=false + break + fi + done + + if [ "$all_versions_pushed" = true ]; then + echo "All versions pushed successfully, cleaning up cache directory..." + cd "$work_dir" + rm -rf "$cache_dir" + else + echo "Some versions failed to push. Cache directory preserved at: $cache_dir" + echo "Run again to resume from this point" fi - # --- End Push Logic --- else echo "Warning: $disk_img_orig not found." # Push only config/nvram if they exist if [ ${#files[@]} -gt 0 ]; then - # (Add push logic here too if you want to push even without disk.img) - echo "Pushing non-disk files..." - # ... (similar push loop as above) ... + echo "Pushing non-disk files..." + push_pids=() + IFS=',' read -ra versions <<< "$image_versions" + for version in "${versions[@]}"; do + # Trim whitespace if any from version splitting + version=$(echo "$version" | xargs) + if [[ -z "$version" ]]; then continue; fi + + # Skip if version was already pushed + if version_pushed "$version"; then + echo "Version $version was already pushed, skipping..." + continue + fi + + echo "Pushing version $version (config/nvram only)..." + ( + oras push --disable-path-validation \ + "ghcr.io/$organization/$image_name:$version" \ + "${files[@]}" + echo "Completed push for version $version" + mark_version_pushed "$version" + ) & + push_pids+=($!) + done + + # Wait for all pushes to complete + for pid in "${push_pids[@]}"; do + wait "$pid" + done + + # --- Cleanup only if all versions were pushed successfully --- + all_versions_pushed=true + for version in "${versions[@]}"; do + version=$(echo "$version" | xargs) + if [[ -z "$version" ]]; then continue; fi + if ! version_pushed "$version"; then + all_versions_pushed=false + break + fi + done + + if [ "$all_versions_pushed" = true ]; then + echo "All non-disk versions pushed successfully, cleaning up cache directory..." + cd "$work_dir" + rm -rf "$cache_dir" + else + echo "Some non-disk versions failed to push. Cache directory preserved at: $cache_dir" + echo "Run again to resume from this point" + fi else echo "No files found to push." + cd "$work_dir" + rm -rf "$cache_dir" exit 1 fi fi -for version in "${versions[@]}"; do - # Trim whitespace if any from version splitting - version=$(echo "$version" | xargs) - if [[ -z "$version" ]]; then continue; fi - echo "Upload complete: ghcr.io/$organization/$image_name:$version" -done +# Determine final status based on the success check *before* potential cleanup +echo # Add a newline for better readability +if [ "$all_versions_pushed" = true ]; then + echo "All versions pushed successfully:" + for version in "${versions[@]}"; do + version=$(echo "$version" | xargs) + if [[ -z "$version" ]]; then continue; fi + echo " Upload complete: ghcr.io/$organization/$image_name:$version" + done +else + echo "Final upload status:" + for version in "${versions[@]}"; do + version=$(echo "$version" | xargs) + if [[ -z "$version" ]]; then continue; fi + # Check the marker file only if the overall process failed (cache preserved) + if version_pushed "$version"; then + echo " Upload complete: ghcr.io/$organization/$image_name:$version" + else + echo " Upload failed: ghcr.io/$organization/$image_name:$version" + fi + done + # Exit with error code if any version failed + exit 1 +fi diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 9ef32355..ac7453ca 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -3,6 +3,18 @@ import Darwin import Foundation import Swift +// Define struct to decode relevant parts of config.json +struct OCIConfig: Codable { + struct Annotations: Codable { + let uncompressedSize: String? // Use optional String + + enum CodingKeys: String, CodingKey { + case uncompressedSize = "com.trycua.lume.disk.uncompressed_size" + } + } + let annotations: Annotations? // Optional annotations +} + struct Layer: Codable, Equatable { let mediaType: String let digest: String @@ -178,7 +190,7 @@ actor ProgressTracker { fflush(stdout) } - private func createProgressBar(progress: Double, width: Int = 20) -> String { + private func createProgressBar(progress: Double, width: Int = 30) -> String { let completedWidth = Int(progress * Double(width)) let remainingWidth = width - completedWidth @@ -279,6 +291,17 @@ class ImageContainerRegistry: @unchecked Sendable { private var activeDownloads: [String] = [] private let cachingEnabled: Bool + // Add the createProgressBar function here as a private method + private func createProgressBar(progress: Double, width: Int = 30) -> String { + let completedWidth = Int(progress * Double(width)) + let remainingWidth = width - completedWidth + + let completed = String(repeating: "█", count: completedWidth) + let remaining = String(repeating: "░", count: remainingWidth) + + return "[\(completed)\(remaining)]" + } + init(registry: String, organization: String) { self.registry = registry self.organization = organization @@ -716,7 +739,8 @@ class ImageContainerRegistry: @unchecked Sendable { let outputURL: URL switch mediaType { - case "application/vnd.oci.image.layer.v1.tar": + case "application/vnd.oci.image.layer.v1.tar", + "application/octet-stream+gzip": outputURL = tempDownloadDir.appendingPathComponent("disk.img") case "application/vnd.oci.image.config.v1+json": outputURL = tempDownloadDir.appendingPathComponent("config.json") @@ -787,33 +811,127 @@ class ImageContainerRegistry: @unchecked Sendable { let stats = await progress.getDownloadStats() Logger.info(stats.formattedSummary()) + // Parse config.json to get uncompressed size *before* reassembly + let configURL = tempDownloadDir.appendingPathComponent("config.json") + let uncompressedSize = getUncompressedSizeFromConfig(configPath: configURL) + + // Now also try to get disk size from VM config if OCI annotation not found + var vmConfigDiskSize: UInt64? = nil + if uncompressedSize == nil && FileManager.default.fileExists(atPath: configURL.path) { + do { + let configData = try Data(contentsOf: configURL) + let decoder = JSONDecoder() + if let vmConfig = try? decoder.decode(VMConfig.self, from: configData) { + vmConfigDiskSize = vmConfig.diskSize + if let size = vmConfigDiskSize { + Logger.info("Found diskSize from VM config.json: \(size) bytes") + } + } + } catch { + Logger.error("Failed to parse VM config.json for diskSize: \(error)") + } + } + + // Force explicit use + if uncompressedSize != nil { + Logger.info( + "Will use uncompressed size from annotation for sparse file: \(uncompressedSize!) bytes" + ) + } else if vmConfigDiskSize != nil { + Logger.info( + "Will use diskSize from VM config for sparse file: \(vmConfigDiskSize!) bytes") + } + // Handle disk parts if present if !diskParts.isEmpty { Logger.info("Reassembling disk image using sparse file technique...") let outputURL = tempVMDir.appendingPathComponent("disk.img") - try FileManager.default.createDirectory( - at: outputURL.deletingLastPathComponent(), withIntermediateDirectories: true) - // Ensure the output file exists but is empty - if FileManager.default.fileExists(atPath: outputURL.path) { - try FileManager.default.removeItem(at: outputURL) + // Wrap setup in do-catch for better error reporting + let outputHandle: FileHandle + do { + // 1. Ensure parent directory exists + try FileManager.default.createDirectory( + at: outputURL.deletingLastPathComponent(), withIntermediateDirectories: true + ) + + // 2. Explicitly create the file first, removing old one if needed + if FileManager.default.fileExists(atPath: outputURL.path) { + try FileManager.default.removeItem(at: outputURL) + } + guard FileManager.default.createFile(atPath: outputURL.path, contents: nil) + else { + throw PullError.fileCreationFailed(outputURL.path) + } + + // 3. Now open the handle for writing + outputHandle = try FileHandle(forWritingTo: outputURL) + + } catch { + // Catch errors during directory/file creation or handle opening + Logger.error( + "Failed during setup for disk image reassembly: \(error.localizedDescription)", + metadata: ["path": outputURL.path]) + throw PullError.reassemblySetupFailed( + path: outputURL.path, underlyingError: error) } - // Calculate expected size from the manifest layers - let expectedTotalSize = UInt64( + // Calculate expected size from the manifest layers (sum of compressed parts - for logging only now) + let expectedCompressedTotalSize = UInt64( manifest.layers.filter { extractPartInfo(from: $0.mediaType) != nil }.reduce(0) { $0 + $1.size } ) Logger.info( - "Expected download size: \(ByteCountFormatter.string(fromByteCount: Int64(expectedTotalSize), countStyle: .file)) (actual disk usage will be significantly lower)" + "Total compressed parts size: \(ByteCountFormatter.string(fromByteCount: Int64(expectedCompressedTotalSize), countStyle: .file))" ) - // Create sparse file of the required size - let outputHandle = try FileHandle(forWritingTo: outputURL) + // Calculate fallback size (sum of compressed parts) + let _: UInt64 = diskParts.reduce(UInt64(0)) { + (acc: UInt64, element) -> UInt64 in + let fileSize = + (try? FileManager.default.attributesOfItem(atPath: element.1.path)[.size] + as? UInt64 ?? 0) ?? 0 + return acc + fileSize + } + + // Use: annotation size > VM config diskSize > fallback size + let sizeForTruncate: UInt64 + if let size = uncompressedSize { + Logger.info("Using uncompressed size from annotation: \(size) bytes") + sizeForTruncate = size + } else if let size = vmConfigDiskSize { + Logger.info("Using diskSize from VM config: \(size) bytes") + sizeForTruncate = size + } else { + Logger.error( + "Missing both uncompressed size annotation and VM config diskSize for multi-part image." + ) + throw PullError.missingUncompressedSizeAnnotation + } + defer { try? outputHandle.close() } // Set the file size without writing data (creates a sparse file) - try outputHandle.truncate(atOffset: expectedTotalSize) + try outputHandle.truncate(atOffset: sizeForTruncate) + + // Verify the sparse file was created with the correct size + let initialSize = + (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] + as? UInt64) ?? 0 + Logger.info( + "Sparse file initialized with size: \(ByteCountFormatter.string(fromByteCount: Int64(initialSize), countStyle: .file))" + ) + + // Add a simple test pattern at the beginning and end of the file to verify it's writable + try outputHandle.seek(toOffset: 0) + let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! + try outputHandle.write(contentsOf: testPattern) + + try outputHandle.seek(toOffset: sizeForTruncate - UInt64(testPattern.count)) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.synchronize() + + Logger.info("Test patterns written to sparse file. File is ready for writing.") var reassemblyProgressLogger = ProgressLogger(threshold: 0.05) var currentOffset: UInt64 = 0 // Track position in the final *decompressed* file @@ -849,69 +967,252 @@ class ImageContainerRegistry: @unchecked Sendable { try outputHandle.seek(toOffset: currentOffset) if let decompressCmd = getDecompressionCommand(for: layerMediaType) { // Use extracted mediaType - Logger.info("Decompressing part \(partNum)...") + Logger.info( + "Decompressing part \(partNum) with media type: \(layerMediaType)") + + // Handle Apple Archive format + let toolPath = String(decompressCmd.dropFirst("apple_archive:".count)) + let tempOutputPath = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString) + + // Check input file size before decompression + let inputFileSize = + (try? FileManager.default.attributesOfItem(atPath: partURL.path)[.size] + as? UInt64) ?? 0 + Logger.info( + "Part \(partNum) input size: \(ByteCountFormatter.string(fromByteCount: Int64(inputFileSize), countStyle: .file))" + ) + + // Create a process that decompresses to a temporary file let process = Process() - let pipe = Pipe() - process.executableURL = URL(fileURLWithPath: "/bin/sh") - process.arguments = ["-c", "\(decompressCmd) < \"\(partURL.path)\""] // Feed file via stdin redirection - process.standardOutput = pipe // Capture decompressed output + process.executableURL = URL(fileURLWithPath: toolPath) + process.arguments = [ + "extract", "-i", partURL.path, "-o", tempOutputPath.path, + ] + // Add error output capture + let errorPipe = Pipe() + process.standardError = errorPipe + + Logger.info( + "Decompressing Apple Archive format with: \(toolPath) \(process.arguments?.joined(separator: " ") ?? "")" + ) try process.run() + process.waitUntilExit() - let reader = pipe.fileHandleForReading - var partDecompressedSize: UInt64 = 0 + // Check error output if any + let errorData = errorPipe.fileHandleForReading.readDataToEndOfFile() + if !errorData.isEmpty, + let errorString = String(data: errorData, encoding: .utf8) + { + Logger.error("Decompression error output: \(errorString)") + } + + if process.terminationStatus != 0 { + Logger.error( + "Apple Archive decompression failed with status: \(process.terminationStatus), falling back to direct copy" + ) + // Fall back to direct copying (uncompressed) + Logger.info("Copying part \(partNum) directly without decompression...") + try outputHandle.seek(toOffset: currentOffset) + + let inputHandle = try FileHandle(forReadingFrom: partURL) + defer { try? inputHandle.close() } + + var bytesWritten: UInt64 = 0 + let chunkSize = 1024 * 1024 // 1MB chunks + var chunkCount = 0 + + while true { + let data = autoreleasepool { + try! inputHandle.read(upToCount: chunkSize) ?? Data() + } + if data.isEmpty { break } + + try outputHandle.write(contentsOf: data) + bytesWritten += UInt64(data.count) + chunkCount += 1 + + // Update progress + let totalProgress = + Double(currentOffset + bytesWritten) + / Double(expectedCompressedTotalSize) + let progressBar = createProgressBar( + progress: totalProgress, width: 30) + let progressPercent = Int(totalProgress * 100) + let currentSpeed = + ByteCountFormatter.string( + fromByteCount: Int64(Double(bytesWritten) / 0.5), + countStyle: .file) + "/s" + + print( + "\r\(progressBar) \(progressPercent)% | Speed: \(currentSpeed) | Part \(partNum) | \(ByteCountFormatter.string(fromByteCount: Int64(currentOffset + bytesWritten), countStyle: .file)) ", + terminator: "") + fflush(stdout) + + // Also log to the progress logger for consistency + reassemblyProgressLogger.logProgress( + current: totalProgress, + context: "Direct copying") + } + + Logger.info( + "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" + ) + currentOffset += bytesWritten + continue + } + + // Check if the output file exists and has content + let outputExists = FileManager.default.fileExists( + atPath: tempOutputPath.path) + let outputFileSize = + outputExists + ? ((try? FileManager.default.attributesOfItem( + atPath: tempOutputPath.path)[ + .size] as? UInt64) ?? 0) : 0 + Logger.info( + "Part \(partNum) - Decompressed output exists: \(outputExists), size: \(ByteCountFormatter.string(fromByteCount: Int64(outputFileSize), countStyle: .file))" + ) + + // If decompression produced an empty file, fall back to direct copy + if outputFileSize == 0 { + Logger.info( + "Decompression resulted in empty file, falling back to direct copy for part \(partNum)" + ) + try? FileManager.default.removeItem(at: tempOutputPath) + + // Fall back to direct copying (uncompressed) + Logger.info("Copying part \(partNum) directly without decompression...") + try outputHandle.seek(toOffset: currentOffset) + + let inputHandle = try FileHandle(forReadingFrom: partURL) + defer { try? inputHandle.close() } + + var bytesWritten: UInt64 = 0 + let chunkSize = 1024 * 1024 // 1MB chunks + var chunkCount = 0 + + while true { + let data = autoreleasepool { + try! inputHandle.read(upToCount: chunkSize) ?? Data() + } + if data.isEmpty { break } + + try outputHandle.write(contentsOf: data) + bytesWritten += UInt64(data.count) + chunkCount += 1 + + // Update progress + let totalProgress = + Double(currentOffset + bytesWritten) + / Double(expectedCompressedTotalSize) + let progressBar = createProgressBar( + progress: totalProgress, width: 30) + let progressPercent = Int(totalProgress * 100) + let currentSpeed = + ByteCountFormatter.string( + fromByteCount: Int64(Double(bytesWritten) / 0.5), + countStyle: .file) + "/s" + + print( + "\r\(progressBar) \(progressPercent)% | Speed: \(currentSpeed) | Part \(partNum) | \(ByteCountFormatter.string(fromByteCount: Int64(currentOffset + bytesWritten), countStyle: .file)) ", + terminator: "") + fflush(stdout) + + // Also log to the progress logger for consistency + reassemblyProgressLogger.logProgress( + current: totalProgress, + context: "Direct copying") + } + + Logger.info( + "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" + ) + currentOffset += bytesWritten + continue + } + + // Read the decompressed file and write to our output + let tempInputHandle = try FileHandle(forReadingFrom: tempOutputPath) + defer { + try? tempInputHandle.close() + try? FileManager.default.removeItem(at: tempOutputPath) + } // Read decompressed data in chunks and write to sparse file + var partDecompressedSize: UInt64 = 0 + let chunkSize = 1024 * 1024 // 1MB chunks + var chunkCount = 0 + while true { let data = autoreleasepool { // Help manage memory with large files - reader.readData(ofLength: 1024 * 1024) // Read 1MB chunks + try! tempInputHandle.read(upToCount: chunkSize) ?? Data() } if data.isEmpty { break } // End of stream try outputHandle.write(contentsOf: data) partDecompressedSize += UInt64(data.count) + chunkCount += 1 // Update progress based on decompressed size written let totalProgress = Double(currentOffset + partDecompressedSize) - / Double(expectedTotalSize) - reassemblyProgressLogger.logProgress( - current: totalProgress, - context: "Reassembling/Decompressing") - } - process.waitUntilExit() - if process.terminationStatus != 0 { - throw PullError.decompressionFailed("Part \(partNum)") - } - currentOffset += partDecompressedSize // Advance offset by decompressed size - - } else { - // --- Handle non-compressed parts (if any, or the single file case) --- - // This part is similar to your original copy logic, writing directly - // from inputHandle to outputHandle at currentOffset - Logger.info("Copying non-compressed part \(partNum)...") - let partSize = - (try? FileManager.default.attributesOfItem(atPath: partURL.path)[.size] - as? UInt64) ?? 0 - var bytesWritten: UInt64 = 0 - let chunkSize = 1024 * 1024 - while bytesWritten < partSize { - let data = autoreleasepool { - try! inputHandle.read(upToCount: chunkSize) ?? Data() - } - if data.isEmpty { break } - try outputHandle.write(contentsOf: data) - bytesWritten += UInt64(data.count) - - // Update progress - let totalProgress = - Double(currentOffset + bytesWritten) / Double(expectedTotalSize) + / Double(expectedCompressedTotalSize) reassemblyProgressLogger.logProgress( current: totalProgress, context: "Reassembling") } + + Logger.info( + "Part \(partNum) - Wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(partDecompressedSize), countStyle: .file))" + ) + currentOffset += partDecompressedSize // Advance offset by decompressed size + } else { + // No decompression command available, try direct copy + Logger.info( + "Copying part \(partNum) directly..." + ) + try outputHandle.seek(toOffset: currentOffset) + + let inputHandle = try FileHandle(forReadingFrom: partURL) + defer { try? inputHandle.close() } + + // Get part size + let partSize = + (try? FileManager.default.attributesOfItem(atPath: partURL.path)[.size] + as? UInt64) ?? 0 + Logger.info( + "Direct copy of part \(partNum) with size: \(ByteCountFormatter.string(fromByteCount: Int64(partSize), countStyle: .file))" + ) + + var bytesWritten: UInt64 = 0 + let chunkSize = 1024 * 1024 // 1MB chunks + var chunkCount = 0 + + while true { + let data = autoreleasepool { + try! inputHandle.read(upToCount: chunkSize) ?? Data() + } + if data.isEmpty { break } + + try outputHandle.write(contentsOf: data) + bytesWritten += UInt64(data.count) + chunkCount += 1 + + // Update progress + let totalProgress = + Double(currentOffset + bytesWritten) + / Double(expectedCompressedTotalSize) + reassemblyProgressLogger.logProgress( + current: totalProgress, + context: "Direct copying") + } + + Logger.info( + "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" + ) currentOffset += bytesWritten - // --- End non-compressed handling --- } // Ensure data is written before processing next part (optional but safer) @@ -922,23 +1223,99 @@ class ImageContainerRegistry: @unchecked Sendable { reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") Logger.info("") // Newline + // Ensure output handle is closed before post-processing + try outputHandle.close() + // Verify final size let finalSize = (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] as? UInt64) ?? 0 Logger.info( - "Final disk image size (before sparse file optimization): \(ByteCountFormatter.string(fromByteCount: Int64(finalSize), countStyle: .file))" + "Final disk image size from cache (before sparse file optimization): \(ByteCountFormatter.string(fromByteCount: Int64(finalSize), countStyle: .file))" ) - Logger.info( - "Note: Actual disk usage will be much lower due to macOS sparse file system") - if finalSize != expectedTotalSize { + if finalSize != sizeForTruncate { Logger.info( - "Warning: Final reported size (\(finalSize) bytes) differs from expected size (\(expectedTotalSize) bytes), but this doesn't affect functionality" + "Warning: Final reported size (\(finalSize) bytes) differs from expected size (\(sizeForTruncate) bytes), but this doesn't affect functionality" ) } - Logger.info("Disk image reassembled successfully using sparse file technique") + // Decompress the assembled disk image if it's in LZFSE compressed format + Logger.info( + "Checking if disk image is LZFSE compressed and decompressing if needed...") + decompressLZFSEImage(inputPath: outputURL.path) + + // Create a properly formatted disk image + Logger.info("Converting assembled data to proper disk image format...") + + // Get actual disk usage of the assembled file + let assembledUsage = getActualDiskUsage(path: outputURL.path) + let bufferBytes: UInt64 = 2 * 1024 * 1024 * 1024 // 2GB buffer + let requiredSpace = assembledUsage + bufferBytes + + // Check available disk space in the destination directory + let fileManager = FileManager.default + let availableSpace = + try? fileManager.attributesOfFileSystem( + forPath: outputURL.deletingLastPathComponent().path)[.systemFreeSize] + as? UInt64 + + if let available = availableSpace, available < requiredSpace { + Logger.error( + "Insufficient disk space to convert disk image format. Skipping conversion.", + metadata: [ + "available": ByteCountFormatter.string( + fromByteCount: Int64(available), countStyle: .file), + "required": ByteCountFormatter.string( + fromByteCount: Int64(requiredSpace), countStyle: .file), + ] + ) + } else { + // Prioritize SPARSE format for better sparse file handling + Logger.info("Attempting conversion to SPARSE format...") + let process = Process() + process.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") + process.arguments = [ + "convert", + outputURL.path, // Source: our assembled file + "-format", "SPARSE", // Format: SPARSE (best for sparse images) + "-o", outputURL.path, // Output: overwrite with converted image + ] + + let errorPipe = Pipe() + process.standardError = errorPipe + process.standardOutput = errorPipe + + try process.run() + process.waitUntilExit() + + // Check for errors + let outputData = errorPipe.fileHandleForReading.readDataToEndOfFile() + if !outputData.isEmpty, + let outputString = String(data: outputData, encoding: .utf8) + { + Logger.info("hdiutil output: \(outputString)") + } + + if process.terminationStatus == 0 { + // Find the potentially renamed formatted file + let formattedFile = findFormattedFile(tempFormatted: outputURL) ?? outputURL + // If the output path is different, remove the original and move the new one + if formattedFile.path != outputURL.path { + try? FileManager.default.removeItem(at: outputURL) + try FileManager.default.moveItem(at: formattedFile, to: outputURL) + } + Logger.info("Successfully converted disk image to proper format (SPARSE)") + } else { + Logger.error( + "Failed to convert disk image to SPARSE format. VM might not start properly." + ) + // If SPARSE failed, maybe try UDRW as a last resort? + // For now, we'll just log the error. + } + } + + Logger.info("Disk image reassembly completed") } else { // Copy single disk image if it exists let diskURL = tempDownloadDir.appendingPathComponent("disk.img") @@ -996,9 +1373,9 @@ class ImageContainerRegistry: @unchecked Sendable { async throws { Logger.info("Copying from cache...") + var diskPartSources: [(Int, URL)] = [] var totalParts = 0 - var expectedTotalSize: UInt64 = 0 // First identify disk parts and non-disk files for layer in manifest.layers { @@ -1009,11 +1386,10 @@ class ImageContainerRegistry: @unchecked Sendable { totalParts = total // Just store the reference to source instead of copying diskPartSources.append((partNum, cachedLayer)) - expectedTotalSize += UInt64(layer.size) } else { let fileName: String switch layer.mediaType { - case "application/vnd.oci.image.layer.v1.tar": + case "application/vnd.oci.image.layer.v1.tar", "application/octet-stream+gzip": fileName = "disk.img" case "application/vnd.oci.image.config.v1+json": fileName = "config.json" @@ -1032,14 +1408,76 @@ class ImageContainerRegistry: @unchecked Sendable { // Reassemble disk parts if needed if !diskPartSources.isEmpty { + // Get the uncompressed size from cached config + let configDigest = manifest.config?.digest + let cachedConfigPath = + configDigest != nil + ? getCachedLayerPath(manifestId: manifestId, digest: configDigest!) : nil + let uncompressedSize = cachedConfigPath.flatMap { + getUncompressedSizeFromConfig(configPath: $0) + } + + // Try to get disk size from VM config if OCI annotation not found + var vmConfigDiskSize: UInt64? = nil + if uncompressedSize == nil { + // Find config.json in the copied files + let vmConfigPath = destination.appendingPathComponent("config.json") + if FileManager.default.fileExists(atPath: vmConfigPath.path) { + do { + let configData = try Data(contentsOf: vmConfigPath) + let decoder = JSONDecoder() + if let vmConfig = try? decoder.decode(VMConfig.self, from: configData) { + vmConfigDiskSize = vmConfig.diskSize + if let size = vmConfigDiskSize { + Logger.info( + "Found diskSize from cached VM config.json: \(size) bytes") + } + } + } catch { + Logger.error("Failed to parse cached VM config.json for diskSize: \(error)") + } + } + } + + // Force explicit use + if uncompressedSize != nil { + Logger.info( + "Will use uncompressed size from annotation for sparse file: \(uncompressedSize!) bytes" + ) + } else if vmConfigDiskSize != nil { + Logger.info( + "Will use diskSize from VM config for sparse file: \(vmConfigDiskSize!) bytes") + } + Logger.info( "Reassembling disk image from cached parts using sparse file technique..." ) let outputURL = destination.appendingPathComponent("disk.img") - // Ensure the output file exists but is empty - if FileManager.default.fileExists(atPath: outputURL.path) { - try FileManager.default.removeItem(at: outputURL) + // Wrap setup in do-catch for better error reporting + let outputHandle: FileHandle + do { + // 1. Ensure parent directory exists + try FileManager.default.createDirectory( + at: outputURL.deletingLastPathComponent(), withIntermediateDirectories: true) + + // 2. Explicitly create the file first, removing old one if needed + if FileManager.default.fileExists(atPath: outputURL.path) { + try FileManager.default.removeItem(at: outputURL) + } + guard FileManager.default.createFile(atPath: outputURL.path, contents: nil) else { + throw PullError.fileCreationFailed(outputURL.path) + } + + // 3. Now open the handle for writing + outputHandle = try FileHandle(forWritingTo: outputURL) + + } catch { + // Catch errors during directory/file creation or handle opening + Logger.error( + "Failed during setup for disk image reassembly: \(error.localizedDescription)", + metadata: ["path": outputURL.path]) + throw PullError.reassemblySetupFailed(path: outputURL.path, underlyingError: error) } // Calculate expected total size from the cached files @@ -1054,13 +1492,6 @@ class ImageContainerRegistry: @unchecked Sendable { "Expected download size from cache: \(ByteCountFormatter.string(fromByteCount: Int64(expectedTotalSize), countStyle: .file)) (actual disk usage will be lower)" ) - // Create sparse file of the required size - let outputHandle = try FileHandle(forWritingTo: outputURL) - defer { try? outputHandle.close() } - - // Set the file size without writing data (creates a sparse file) - try outputHandle.truncate(atOffset: expectedTotalSize) - var reassemblyProgressLogger = ProgressLogger(threshold: 0.05) var currentOffset: UInt64 = 0 // Track position in the final *decompressed* file @@ -1090,68 +1521,245 @@ class ImageContainerRegistry: @unchecked Sendable { try outputHandle.seek(toOffset: currentOffset) if let decompressCmd = getDecompressionCommand(for: layerMediaType) { // Use extracted mediaType - Logger.info("Decompressing part \(partNum)...") + Logger.info("Decompressing part \(partNum) with media type: \(layerMediaType)") + + // Handle Apple Archive format + let toolPath = String(decompressCmd.dropFirst("apple_archive:".count)) + let tempOutputPath = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString) + + // Check input file size before decompression + let inputFileSize = + (try? FileManager.default.attributesOfItem(atPath: sourceURL.path)[.size] + as? UInt64) ?? 0 + Logger.info( + "Part \(partNum) input size: \(ByteCountFormatter.string(fromByteCount: Int64(inputFileSize), countStyle: .file))" + ) + + // Create a process that decompresses to a temporary file let process = Process() - let pipe = Pipe() - process.executableURL = URL(fileURLWithPath: "/bin/sh") - process.arguments = ["-c", "\(decompressCmd) < \"\(sourceURL.path)\""] // Feed file via stdin redirection - process.standardOutput = pipe // Capture decompressed output + process.executableURL = URL(fileURLWithPath: toolPath) + process.arguments = [ + "extract", "-i", sourceURL.path, "-o", tempOutputPath.path, + ] + // Add error output capture + let errorPipe = Pipe() + process.standardError = errorPipe + + Logger.info( + "Decompressing Apple Archive format with: \(toolPath) \(process.arguments?.joined(separator: " ") ?? "")" + ) try process.run() + process.waitUntilExit() - let reader = pipe.fileHandleForReading - var partDecompressedSize: UInt64 = 0 + // Check error output if any + let errorData = errorPipe.fileHandleForReading.readDataToEndOfFile() + if !errorData.isEmpty, + let errorString = String(data: errorData, encoding: .utf8) + { + Logger.error("Decompression error output: \(errorString)") + } + + if process.terminationStatus != 0 { + Logger.error( + "Apple Archive decompression failed with status: \(process.terminationStatus), falling back to direct copy" + ) + // Fall back to direct copying (uncompressed) + Logger.info("Copying part \(partNum) directly without decompression...") + try outputHandle.seek(toOffset: currentOffset) + + let inputHandle = try FileHandle(forReadingFrom: sourceURL) + defer { try? inputHandle.close() } + + var bytesWritten: UInt64 = 0 + let chunkSize = 1024 * 1024 // 1MB chunks + var chunkCount = 0 + + while true { + let data = autoreleasepool { + try! inputHandle.read(upToCount: chunkSize) ?? Data() + } + if data.isEmpty { break } + + try outputHandle.write(contentsOf: data) + bytesWritten += UInt64(data.count) + chunkCount += 1 + + // Update progress + let totalProgress = + Double(currentOffset + bytesWritten) / Double(expectedTotalSize) + let progressBar = createProgressBar(progress: totalProgress, width: 30) + let progressPercent = Int(totalProgress * 100) + let currentSpeed = + ByteCountFormatter.string( + fromByteCount: Int64(Double(bytesWritten) / 0.5), + countStyle: .file) + "/s" + + print( + "\r\(progressBar) \(progressPercent)% | Speed: \(currentSpeed) | Part \(partNum) | \(ByteCountFormatter.string(fromByteCount: Int64(currentOffset + bytesWritten), countStyle: .file)) ", + terminator: "") + fflush(stdout) + + // Also log to the progress logger for consistency + reassemblyProgressLogger.logProgress( + current: totalProgress, + context: "Direct copying") + } + + Logger.info( + "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" + ) + currentOffset += bytesWritten + continue + } + + // Check if the output file exists and has content + let outputExists = FileManager.default.fileExists(atPath: tempOutputPath.path) + let outputFileSize = + outputExists + ? ((try? FileManager.default.attributesOfItem(atPath: tempOutputPath.path)[ + .size] as? UInt64) ?? 0) : 0 + Logger.info( + "Part \(partNum) - Decompressed output exists: \(outputExists), size: \(ByteCountFormatter.string(fromByteCount: Int64(outputFileSize), countStyle: .file))" + ) + + // If decompression produced an empty file, fall back to direct copy + if outputFileSize == 0 { + Logger.info( + "Decompression resulted in empty file, falling back to direct copy for part \(partNum)" + ) + try? FileManager.default.removeItem(at: tempOutputPath) + + // Fall back to direct copying (uncompressed) + Logger.info("Copying part \(partNum) directly without decompression...") + try outputHandle.seek(toOffset: currentOffset) + + let inputHandle = try FileHandle(forReadingFrom: sourceURL) + defer { try? inputHandle.close() } + + var bytesWritten: UInt64 = 0 + let chunkSize = 1024 * 1024 // 1MB chunks + var chunkCount = 0 + + while true { + let data = autoreleasepool { + try! inputHandle.read(upToCount: chunkSize) ?? Data() + } + if data.isEmpty { break } + + try outputHandle.write(contentsOf: data) + bytesWritten += UInt64(data.count) + chunkCount += 1 + + // Update progress + let totalProgress = + Double(currentOffset + bytesWritten) / Double(expectedTotalSize) + let progressBar = createProgressBar(progress: totalProgress, width: 30) + let progressPercent = Int(totalProgress * 100) + let currentSpeed = + ByteCountFormatter.string( + fromByteCount: Int64(Double(bytesWritten) / 0.5), + countStyle: .file) + "/s" + + print( + "\r\(progressBar) \(progressPercent)% | Speed: \(currentSpeed) | Part \(partNum) | \(ByteCountFormatter.string(fromByteCount: Int64(currentOffset + bytesWritten), countStyle: .file)) ", + terminator: "") + fflush(stdout) + + // Also log to the progress logger for consistency + reassemblyProgressLogger.logProgress( + current: totalProgress, + context: "Direct copying") + } + + Logger.info( + "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" + ) + currentOffset += bytesWritten + continue + } + + // Read the decompressed file and write to our output + let tempInputHandle = try FileHandle(forReadingFrom: tempOutputPath) + defer { + try? tempInputHandle.close() + try? FileManager.default.removeItem(at: tempOutputPath) + } // Read decompressed data in chunks and write to sparse file + var partDecompressedSize: UInt64 = 0 + let chunkSize = 1024 * 1024 // 1MB chunks + var chunkCount = 0 + while true { let data = autoreleasepool { // Help manage memory with large files - reader.readData(ofLength: 1024 * 1024) // Read 1MB chunks + try! tempInputHandle.read(upToCount: chunkSize) ?? Data() } if data.isEmpty { break } // End of stream try outputHandle.write(contentsOf: data) partDecompressedSize += UInt64(data.count) + chunkCount += 1 // Update progress based on decompressed size written let totalProgress = - Double(currentOffset + partDecompressedSize) / Double(expectedTotalSize) + Double(currentOffset + partDecompressedSize) + / Double(expectedTotalSize) reassemblyProgressLogger.logProgress( current: totalProgress, context: "Reassembling") } - process.waitUntilExit() - if process.terminationStatus != 0 { - throw PullError.decompressionFailed("Part \(partNum)") - } - currentOffset += partDecompressedSize // Advance offset by decompressed size + Logger.info( + "Part \(partNum) - Wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(partDecompressedSize), countStyle: .file))" + ) + currentOffset += partDecompressedSize // Advance offset by decompressed size } else { - // --- Handle non-compressed parts (if any, or the single file case) --- - // This part is similar to your original copy logic, writing directly - // from inputHandle to outputHandle at currentOffset - Logger.info("Copying non-compressed part \(partNum)...") + // No decompression command available, try direct copy + Logger.info( + "Copying part \(partNum) directly..." + ) + try outputHandle.seek(toOffset: currentOffset) + + let inputHandle = try FileHandle(forReadingFrom: sourceURL) + defer { try? inputHandle.close() } + + // Get part size let partSize = (try? FileManager.default.attributesOfItem(atPath: sourceURL.path)[.size] as? UInt64) ?? 0 + Logger.info( + "Direct copy of part \(partNum) with size: \(ByteCountFormatter.string(fromByteCount: Int64(partSize), countStyle: .file))" + ) + var bytesWritten: UInt64 = 0 - let chunkSize = 1024 * 1024 - while bytesWritten < partSize { + let chunkSize = 1024 * 1024 // 1MB chunks + var chunkCount = 0 + + while true { let data = autoreleasepool { try! inputHandle.read(upToCount: chunkSize) ?? Data() } if data.isEmpty { break } + try outputHandle.write(contentsOf: data) bytesWritten += UInt64(data.count) + chunkCount += 1 // Update progress let totalProgress = - Double(currentOffset + bytesWritten) / Double(expectedTotalSize) + Double(currentOffset + bytesWritten) + / Double(expectedTotalSize) reassemblyProgressLogger.logProgress( current: totalProgress, - context: "Reassembling") + context: "Direct copying") } + + Logger.info( + "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" + ) currentOffset += bytesWritten - // --- End non-compressed handling --- } // Ensure data is written before processing next part (optional but safer) @@ -1162,10 +1770,13 @@ class ImageContainerRegistry: @unchecked Sendable { reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") Logger.info("") // Newline + // Ensure output handle is closed before post-processing + try outputHandle.close() + // Verify final size let finalSize = - (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] as? UInt64) - ?? 0 + (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] + as? UInt64) ?? 0 Logger.info( "Final disk image size from cache (before sparse file optimization): \(ByteCountFormatter.string(fromByteCount: Int64(finalSize), countStyle: .file))" ) @@ -1176,8 +1787,79 @@ class ImageContainerRegistry: @unchecked Sendable { ) } - Logger.info( - "Disk image reassembled successfully from cache using sparse file technique") + // Decompress the assembled disk image if it's in LZFSE compressed format + Logger.info("Checking if disk image is LZFSE compressed and decompressing if needed...") + decompressLZFSEImage(inputPath: outputURL.path) + + // Create a properly formatted disk image + Logger.info("Converting assembled data to proper disk image format...") + + // Get actual disk usage of the assembled file + let assembledUsage = getActualDiskUsage(path: outputURL.path) + let bufferBytes: UInt64 = 2 * 1024 * 1024 * 1024 // 2GB buffer + let requiredSpace = assembledUsage + bufferBytes + + // Check available disk space in the destination directory + let fileManager = FileManager.default + let availableSpace = + try? fileManager.attributesOfFileSystem( + forPath: outputURL.deletingLastPathComponent().path)[.systemFreeSize] as? UInt64 + + if let available = availableSpace, available < requiredSpace { + Logger.error( + "Insufficient disk space to convert disk image format. Skipping conversion.", + metadata: [ + "available": ByteCountFormatter.string( + fromByteCount: Int64(available), countStyle: .file), + "required": ByteCountFormatter.string( + fromByteCount: Int64(requiredSpace), countStyle: .file), + ] + ) + } else { + // Prioritize SPARSE format for better sparse file handling + Logger.info("Attempting conversion to SPARSE format...") + let process = Process() + process.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") + process.arguments = [ + "convert", + outputURL.path, // Source: our assembled file + "-format", "SPARSE", // Format: SPARSE (best for sparse images) + "-o", outputURL.path, // Output: overwrite with converted image + ] + + let errorPipe = Pipe() + process.standardError = errorPipe + process.standardOutput = errorPipe + + try process.run() + process.waitUntilExit() + + // Check for errors + let outputData = errorPipe.fileHandleForReading.readDataToEndOfFile() + if !outputData.isEmpty, let outputString = String(data: outputData, encoding: .utf8) + { + Logger.info("hdiutil output: \(outputString)") + } + + if process.terminationStatus == 0 { + // Find the potentially renamed formatted file + let formattedFile = findFormattedFile(tempFormatted: outputURL) ?? outputURL + // If the output path is different, remove the original and move the new one + if formattedFile.path != outputURL.path { + try? FileManager.default.removeItem(at: outputURL) + try FileManager.default.moveItem(at: formattedFile, to: outputURL) + } + Logger.info("Successfully converted disk image to proper format (SPARSE)") + } else { + Logger.error( + "Failed to convert disk image to SPARSE format. VM might not start properly." + ) + // If SPARSE failed, maybe try UDRW as a last resort? + // For now, we'll just log the error. + } + } + + Logger.info("Disk image reassembly completed") } Logger.info("Cache copy complete") @@ -1307,70 +1989,6 @@ class ImageContainerRegistry: @unchecked Sendable { throw lastError ?? PullError.layerDownloadFailed(digest) } - private func decompressGzipFile(at source: URL, to destination: URL) throws { - Logger.info("Decompressing \(source.lastPathComponent)...") - let process = Process() - process.executableURL = URL(fileURLWithPath: "/usr/bin/gunzip") - process.arguments = ["-c"] - - let inputPipe = Pipe() - let outputPipe = Pipe() - process.standardInput = inputPipe - process.standardOutput = outputPipe - - try process.run() - - // Read and pipe the gzipped file in chunks to avoid memory issues - let inputHandle = try FileHandle(forReadingFrom: source) - let outputHandle = try FileHandle(forWritingTo: destination) - defer { - try? inputHandle.close() - try? outputHandle.close() - } - - // Create the output file - FileManager.default.createFile(atPath: destination.path, contents: nil) - - // Process with optimal chunk size - let chunkSize = getOptimalChunkSize() - while let chunk = try inputHandle.read(upToCount: chunkSize) { - try autoreleasepool { - try inputPipe.fileHandleForWriting.write(contentsOf: chunk) - - // Read and write output in chunks as well - while let decompressedChunk = try outputPipe.fileHandleForReading.read( - upToCount: chunkSize) - { - try outputHandle.write(contentsOf: decompressedChunk) - } - } - } - - try inputPipe.fileHandleForWriting.close() - - // Read any remaining output - while let decompressedChunk = try outputPipe.fileHandleForReading.read(upToCount: chunkSize) - { - try autoreleasepool { - try outputHandle.write(contentsOf: decompressedChunk) - } - } - - process.waitUntilExit() - - if process.terminationStatus != 0 { - throw PullError.decompressionFailed(source.lastPathComponent) - } - - // Verify the decompressed size - let decompressedSize = - try FileManager.default.attributesOfItem(atPath: destination.path)[.size] as? UInt64 - ?? 0 - Logger.info( - "Decompressed size: \(ByteCountFormatter.string(fromByteCount: Int64(decompressedSize), countStyle: .file))" - ) - } - private func extractPartInfo(from mediaType: String) -> (partNum: Int, total: Int)? { let pattern = #"part\.number=(\d+);part\.total=(\d+)"# guard let regex = try? NSRegularExpression(pattern: pattern), @@ -1699,19 +2317,47 @@ class ImageContainerRegistry: @unchecked Sendable { // Add helper to check media type and get decompress command private func getDecompressionCommand(for mediaType: String) -> String? { - if mediaType.hasSuffix("+gzip") { - return "/usr/bin/gunzip -c" // -c writes to stdout - } else if mediaType.hasSuffix("+zstd") { - // Check if zstd exists, otherwise handle error? - // Assuming brew install zstd -> /opt/homebrew/bin/zstd or /usr/local/bin/zstd - let zstdPath = findExecutablePath(named: "zstd") ?? "/usr/local/bin/zstd" - return "\(zstdPath) -dc" // -d decompress, -c stdout + // Determine appropriate decompression command based on layer media type + Logger.info("Determining decompression command for media type: \(mediaType)") + + // For the specific format that appears in our GHCR repository, skip decompression attempts + // These files are labeled +lzfse but aren't actually in Apple Archive format + if mediaType.contains("+lzfse;part.number=") { + Logger.info("Detected LZFSE part file, using direct copy instead of decompression") + return nil + } + + // Check for LZFSE or Apple Archive format anywhere in the media type string + // The format may include part information like: application/octet-stream+lzfse;part.number=1;part.total=38 + if mediaType.contains("+lzfse") || mediaType.contains("+aa") { + // Apple Archive format requires special handling + if let aaPath = findExecutablePath(for: "aa") { + Logger.info("Found Apple Archive tool at: \(aaPath)") + return "apple_archive:\(aaPath)" + } else { + Logger.error( + "Apple Archive tool (aa) not found in PATH, falling back to default path") + + // Check if the default path exists + let defaultPath = "/usr/bin/aa" + if FileManager.default.isExecutableFile(atPath: defaultPath) { + Logger.info("Default Apple Archive tool exists at: \(defaultPath)") + } else { + Logger.error("Default Apple Archive tool not found at: \(defaultPath)") + } + + return "apple_archive:/usr/bin/aa" + } + } else { + Logger.info( + "Unsupported media type: \(mediaType) - only Apple Archive (+lzfse/+aa) is supported" + ) + return nil } - return nil // Not compressed or unknown compression } // Helper to find executables (optional, or hardcode paths) - private func findExecutablePath(named executableName: String) -> String? { + private func findExecutablePath(for executableName: String) -> String? { let pathEnv = ProcessInfo.processInfo.environment["PATH"] ?? "/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/bin:/opt/homebrew/bin" @@ -1726,4 +2372,250 @@ class ImageContainerRegistry: @unchecked Sendable { } return nil } + + // Helper function to extract uncompressed disk size from config.json + private func getUncompressedSizeFromConfig(configPath: URL) -> UInt64? { + guard FileManager.default.fileExists(atPath: configPath.path) else { + Logger.info("Config file not found: \(configPath.path)") + return nil + } + + do { + let configData = try Data(contentsOf: configPath) + let decoder = JSONDecoder() + let ociConfig = try decoder.decode(OCIConfig.self, from: configData) + + if let sizeString = ociConfig.annotations?.uncompressedSize, + let size = UInt64(sizeString) + { + Logger.info("Found uncompressed disk size annotation: \(size) bytes") + return size + } else { + Logger.info("No uncompressed disk size annotation found in config.json") + return nil + } + } catch { + Logger.error("Failed to parse config.json for uncompressed size: \(error)") + return nil + } + } + + // Helper function to find formatted file with potential extensions + private func findFormattedFile(tempFormatted: URL) -> URL? { + // Check for the exact path first + if FileManager.default.fileExists(atPath: tempFormatted.path) { + return tempFormatted + } + + // Check with .dmg extension + let dmgPath = tempFormatted.path + ".dmg" + if FileManager.default.fileExists(atPath: dmgPath) { + return URL(fileURLWithPath: dmgPath) + } + + // Check with .sparseimage extension + let sparsePath = tempFormatted.path + ".sparseimage" + if FileManager.default.fileExists(atPath: sparsePath) { + return URL(fileURLWithPath: sparsePath) + } + + // Try to find any file with the same basename + do { + let files = try FileManager.default.contentsOfDirectory( + at: tempFormatted.deletingLastPathComponent(), + includingPropertiesForKeys: nil) + if let matchingFile = files.first(where: { + $0.lastPathComponent.starts(with: tempFormatted.lastPathComponent) + }) { + return matchingFile + } + } catch { + Logger.error("Failed to list directory contents: \(error)") + } + + return nil + } + + // Helper function to decompress LZFSE compressed disk image + @discardableResult + private func decompressLZFSEImage(inputPath: String, outputPath: String? = nil) -> Bool { + Logger.info("Attempting to decompress LZFSE compressed disk image using sparse pipe...") + + let finalOutputPath = outputPath ?? inputPath // If outputPath is nil, we'll overwrite input + let tempFinalPath = finalOutputPath + ".ddsparse.tmp" // Temporary name during dd operation + + // Ensure the temporary file doesn't exist from a previous failed run + try? FileManager.default.removeItem(atPath: tempFinalPath) + + // Process 1: compression_tool + let process1 = Process() + process1.executableURL = URL(fileURLWithPath: "/usr/bin/compression_tool") + process1.arguments = [ + "-decode", + "-i", inputPath, + "-o", "/dev/stdout", // Write to standard output + ] + + // Process 2: dd + let process2 = Process() + process2.executableURL = URL(fileURLWithPath: "/bin/dd") + process2.arguments = [ + "if=/dev/stdin", // Read from standard input + "of=\(tempFinalPath)", // Write to the temporary final path + "conv=sparse", // Use sparse conversion + "bs=1m", // Use a reasonable block size (e.g., 1MB) + ] + + // Create pipes + let pipe = Pipe() // Connects process1 stdout to process2 stdin + let errorPipe1 = Pipe() + let errorPipe2 = Pipe() + + process1.standardOutput = pipe + process1.standardError = errorPipe1 + + process2.standardInput = pipe + process2.standardError = errorPipe2 + + do { + Logger.info("Starting decompression pipe: compression_tool | dd conv=sparse...") + // Start processes + try process1.run() + try process2.run() + + // Close the write end of the pipe for process2 to prevent hanging + // This might not be strictly necessary if process1 exits cleanly, but safer. + // Note: Accessing fileHandleForWriting after run can be tricky. + // We rely on process1 exiting to signal EOF to process2. + + process1.waitUntilExit() + process2.waitUntilExit() // Wait for dd to finish processing the stream + + // --- Check for errors --- + let errorData1 = errorPipe1.fileHandleForReading.readDataToEndOfFile() + if !errorData1.isEmpty, + let errorString = String(data: errorData1, encoding: .utf8)?.trimmingCharacters( + in: .whitespacesAndNewlines), !errorString.isEmpty + { + Logger.error("compression_tool stderr: \(errorString)") + } + let errorData2 = errorPipe2.fileHandleForReading.readDataToEndOfFile() + if !errorData2.isEmpty, + let errorString = String(data: errorData2, encoding: .utf8)?.trimmingCharacters( + in: .whitespacesAndNewlines), !errorString.isEmpty + { + // dd often reports blocks in/out to stderr, filter that if needed, but log for now + Logger.info("dd stderr: \(errorString)") + } + + // Check termination statuses + let status1 = process1.terminationStatus + let status2 = process2.terminationStatus + + if status1 != 0 || status2 != 0 { + Logger.error( + "Pipe command failed. compression_tool status: \(status1), dd status: \(status2)" + ) + try? FileManager.default.removeItem(atPath: tempFinalPath) // Clean up failed attempt + return false + } + + // --- Validation --- + if FileManager.default.fileExists(atPath: tempFinalPath) { + let fileSize = + (try? FileManager.default.attributesOfItem(atPath: tempFinalPath)[.size] + as? UInt64) ?? 0 + let actualUsage = getActualDiskUsage(path: tempFinalPath) + Logger.info( + "Piped decompression successful - Allocated: \(ByteCountFormatter.string(fromByteCount: Int64(fileSize), countStyle: .file)), Actual Usage: \(ByteCountFormatter.string(fromByteCount: Int64(actualUsage), countStyle: .file))" + ) + + // Basic header validation + var isValid = false + if let fileHandle = FileHandle(forReadingAtPath: tempFinalPath) { + if let data = try? fileHandle.read(upToCount: 512), data.count >= 512, + data[510] == 0x55 && data[511] == 0xAA + { + isValid = true + } + // Ensure handle is closed regardless of validation outcome + try? fileHandle.close() + } else { + Logger.error( + "Validation Error: Could not open decompressed file handle for reading.") + } + + if isValid { + Logger.info("Decompressed file appears to be a valid disk image.") + + // Move the final file into place + // If outputPath was nil, we need to replace the original inputPath + if outputPath == nil { + // Backup original only if it's different from the temp path + if inputPath != tempFinalPath { + try? FileManager.default.copyItem( + at: URL(fileURLWithPath: inputPath), + to: URL(fileURLWithPath: inputPath + ".compressed.bak")) + try? FileManager.default.removeItem(at: URL(fileURLWithPath: inputPath)) + } + try FileManager.default.moveItem( + at: URL(fileURLWithPath: tempFinalPath), + to: URL(fileURLWithPath: inputPath)) + Logger.info("Replaced original file with sparsely decompressed version.") + } else { + // If outputPath was specified, move it there (overwrite if needed) + try? FileManager.default.removeItem( + at: URL(fileURLWithPath: finalOutputPath)) // Remove existing if overwriting + try FileManager.default.moveItem( + at: URL(fileURLWithPath: tempFinalPath), + to: URL(fileURLWithPath: finalOutputPath)) + Logger.info("Moved sparsely decompressed file to: \(finalOutputPath)") + } + return true + } else { + Logger.error( + "Validation failed: Decompressed file header is invalid or file couldn't be read. Cleaning up." + ) + try? FileManager.default.removeItem(atPath: tempFinalPath) + return false + } + } else { + Logger.error( + "Piped decompression failed: Output file '\(tempFinalPath)' not found after dd completed." + ) + return false + } + + } catch { + Logger.error("Error running decompression pipe command: \(error)") + try? FileManager.default.removeItem(atPath: tempFinalPath) // Clean up on error + return false + } + } + + // Helper function to get actual disk usage of a file + private func getActualDiskUsage(path: String) -> UInt64 { + let task = Process() + task.executableURL = URL(fileURLWithPath: "/usr/bin/du") + task.arguments = ["-k", path] // -k for 1024-byte blocks + + let pipe = Pipe() + task.standardOutput = pipe + + do { + try task.run() + task.waitUntilExit() + + let data = pipe.fileHandleForReading.readDataToEndOfFile() + if let output = String(data: data, encoding: .utf8), + let size = UInt64(output.split(separator: "\t").first ?? "0") + { + return size * 1024 // Convert from KB to bytes + } + } catch { + Logger.error("Failed to get actual disk usage: \(error)") + } + + return 0 + } } diff --git a/libs/lume/src/Errors/Errors.swift b/libs/lume/src/Errors/Errors.swift index 91b04950..b6568c10 100644 --- a/libs/lume/src/Errors/Errors.swift +++ b/libs/lume/src/Errors/Errors.swift @@ -5,6 +5,15 @@ enum HomeError: Error, LocalizedError { case directoryAccessDenied(path: String) case invalidHomeDirectory case directoryAlreadyExists(path: String) + case homeNotFound + case defaultStorageNotDefined + case storageLocationNotFound(String) + case storageLocationNotADirectory(String) + case storageLocationNotWritable(String) + case invalidStorageLocation(String) + case cannotCreateDirectory(String) + case cannotGetVMsDirectory + case vmDirectoryNotFound(String) var errorDescription: String? { switch self { @@ -16,6 +25,24 @@ enum HomeError: Error, LocalizedError { return "Invalid home directory configuration" case .directoryAlreadyExists(let path): return "Directory already exists at path: \(path)" + case .homeNotFound: + return "Home directory not found." + case .defaultStorageNotDefined: + return "Default storage location is not defined." + case .storageLocationNotFound(let path): + return "Storage location not found: \(path)" + case .storageLocationNotADirectory(let path): + return "Storage location is not a directory: \(path)" + case .storageLocationNotWritable(let path): + return "Storage location is not writable: \(path)" + case .invalidStorageLocation(let path): + return "Invalid storage location specified: \(path)" + case .cannotCreateDirectory(let path): + return "Cannot create directory: \(path)" + case .cannotGetVMsDirectory: + return "Cannot determine the VMs directory." + case .vmDirectoryNotFound(let path): + return "VM directory not found: \(path)" } } } @@ -28,23 +55,32 @@ enum PullError: Error, LocalizedError { case missingPart(Int) case decompressionFailed(String) case reassemblyFailed(String) + case fileCreationFailed(String) + case reassemblySetupFailed(path: String, underlyingError: Error) + case missingUncompressedSizeAnnotation var errorDescription: String? { switch self { case .invalidImageFormat: return "Invalid image format. Expected format: name:tag" case .tokenFetchFailed: - return "Failed to obtain authentication token" + return "Failed to fetch authentication token from registry." case .manifestFetchFailed: - return "Failed to fetch manifest" + return "Failed to fetch image manifest from registry." case .layerDownloadFailed(let digest): return "Failed to download layer: \(digest)" - case .missingPart(let number): - return "Missing disk image part \(number)" - case .decompressionFailed(let filename): - return "Failed to decompress file: \(filename)" + case .missingPart(let partNum): + return "Missing required part number \(partNum) for reassembly." + case .decompressionFailed(let file): + return "Failed to decompress file: \(file)" case .reassemblyFailed(let reason): return "Disk image reassembly failed: \(reason)." + case .fileCreationFailed(let path): + return "Failed to create the necessary file at path: \(path)" + case .reassemblySetupFailed(let path, let underlyingError): + return "Failed to set up for reassembly at path: \(path). Underlying error: \(underlyingError.localizedDescription)" + case .missingUncompressedSizeAnnotation: + return "Could not find the required uncompressed disk size annotation in the image config.json." } } } @@ -165,4 +201,24 @@ enum VMError: Error, LocalizedError { return "Invalid display resolution: \(resolution)" } } +} + +enum ResticError: Error { + case snapshotFailed(String) + case restoreFailed(String) + case genericError(String) +} + +enum VmrunError: Error, LocalizedError { + case commandNotFound + case operationFailed(command: String, output: String?) + + var errorDescription: String? { + switch self { + case .commandNotFound: + return "vmrun command not found. Ensure VMware Fusion is installed and in the system PATH." + case .operationFailed(let command, let output): + return "vmrun command '\(command)' failed. Output: \(output ?? "No output")" + } + } } \ No newline at end of file