From 18f92c6a85f19ff3fd5763c488fd849d4a366461 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Tue, 15 Apr 2025 10:02:13 -0700 Subject: [PATCH 01/28] Use sparse files --- libs/lume/scripts/ghcr/push-ghcr.sh | 182 ++++++----- .../ImageContainerRegistry.swift | 296 +++++++++++------- 2 files changed, 278 insertions(+), 200 deletions(-) diff --git a/libs/lume/scripts/ghcr/push-ghcr.sh b/libs/lume/scripts/ghcr/push-ghcr.sh index c204f97b..fbe4ab7e 100755 --- a/libs/lume/scripts/ghcr/push-ghcr.sh +++ b/libs/lume/scripts/ghcr/push-ghcr.sh @@ -83,19 +83,19 @@ done # Authenticate with GitHub Container Registry echo "$GITHUB_TOKEN" | oras login ghcr.io -u "$organization" --password-stdin -# Create a temporary directory for processing files -work_dir=$(mktemp -d) -echo "Working directory: $work_dir" -trap 'rm -rf "$work_dir"' EXIT +# Use the source folder path as the working directory and get its absolute path +work_dir=$(cd "$folder_path" && pwd) +echo "Working directory (persistent cache): $work_dir" -# Create a directory for all files -mkdir -p "$work_dir/files" -cd "$work_dir/files" +# Change to the working directory +cd "$work_dir" +files=() # Initialize files array here # Copy config.json if it exists if [ -f "$folder_path/config.json" ]; then echo "Copying config.json..." cp "$folder_path/config.json" config.json + files+=("config.json:application/vnd.oci.image.config.v1+json") fi # Copy nvram.bin if it exists @@ -103,106 +103,104 @@ nvram_bin="$folder_path/nvram.bin" if [ -f "$nvram_bin" ]; then echo "Copying nvram.bin..." cp "$nvram_bin" nvram.bin + files+=("nvram.bin:application/octet-stream") fi -# Process disk.img if it exists and needs splitting -disk_img="$folder_path/disk.img" -if [ -f "$disk_img" ]; then - file_size=$(stat -f%z "$disk_img") - if [ $file_size -gt 524288000 ]; then # 500MB in bytes - echo "Splitting large file: disk.img" - echo "Original disk.img size: $(du -h "$disk_img" | cut -f1)" - - # Copy and split the file with progress monitoring - echo "Copying disk image..." - pv "$disk_img" > disk.img - - echo "Splitting file..." - split -b "$chunk_size" disk.img disk.img.part. - rm disk.img +# Process disk.img if it exists +disk_img_orig="disk.img" # Already in work_dir +if [ -f "$disk_img_orig" ]; then + # --- Compression Step --- + echo "Compressing $disk_img_orig..." + compressed_ext=".gz" + compressor="gzip" + compress_opts="-k -f" + compressed_disk_img="disk.img${compressed_ext}" + pv "$disk_img_orig" | $compressor $compress_opts > "$compressed_disk_img" + compressed_size=$(stat -f%z "$compressed_disk_img") + echo "Compressed disk image size: $(du -h "$compressed_disk_img" | cut -f1)" + # --- End Compression Step --- - # Get original file size for verification - original_size=$(stat -f%z "$disk_img") - echo "Original disk.img size: $(awk -v size=$original_size 'BEGIN {printf "%.2f GB", size/1024/1024/1024}')" + # Check if splitting is needed based on *compressed* size + if [ $compressed_size -gt 524288000 ]; then # 500MB threshold + echo "Splitting compressed file: $compressed_disk_img" + split -b "$chunk_size" "$compressed_disk_img" "$compressed_disk_img.part." + # Keep the compressed file and parts in work_dir - # Verify split parts total size - total_size=0 - total_parts=$(ls disk.img.part.* | wc -l | tr -d ' ') + # --- Adjust part processing --- + parts_files=() + total_parts=$(ls "$compressed_disk_img.part."* | wc -l | tr -d ' ') part_num=0 - - # Create array for files and their annotations - files=() - for part in disk.img.part.*; do - part_size=$(stat -f%z "$part") - total_size=$((total_size + part_size)) + for part in "$compressed_disk_img.part."*; do part_num=$((part_num + 1)) - echo "Part $part: $(awk -v size=$part_size 'BEGIN {printf "%.2f GB", size/1024/1024/1024}')" - files+=("$part:application/vnd.oci.image.layer.v1.tar;part.number=$part_num;part.total=$total_parts") + # *** IMPORTANT: Use the *compressed* OCI media type with part info *** + parts_files+=("$part:${oci_layer_media_type};part.number=$part_num;part.total=$total_parts") + echo "Part $part: $(du -h "$part" | cut -f1)" done + # Combine non-disk files with disk parts + files+=("${parts_files[@]}") + # --- End Adjust part processing --- - echo "Total size of parts: $(awk -v size=$total_size 'BEGIN {printf "%.2f GB", size/1024/1024/1024}')" - - # Verify total size matches original - if [ $total_size -ne $original_size ]; then - echo "ERROR: Size mismatch!" - echo "Original file size: $(awk -v size=$original_size 'BEGIN {printf "%.2f GB", size/1024/1024/1024}')" - echo "Sum of parts size: $(awk -v size=$total_size 'BEGIN {printf "%.2f GB", size/1024/1024/1024}')" - echo "Difference: $(awk -v orig=$original_size -v total=$total_size 'BEGIN {printf "%.2f GB", (orig-total)/1024/1024/1024}')" - exit 1 - fi - - # Add remaining files - if [ -f "config.json" ]; then - files+=("config.json:application/vnd.oci.image.config.v1+json") - fi - - if [ -f "nvram.bin" ]; then - files+=("nvram.bin:application/octet-stream") - fi - - # Push versions in parallel - push_pids=() - for version in $image_versions; do - ( - echo "Pushing version $version..." - oras push --disable-path-validation \ - "ghcr.io/$organization/$image_name:$version" \ - "${files[@]}" - echo "Completed push for version $version" - ) & - push_pids+=($!) - done - - # Wait for all pushes to complete - for pid in "${push_pids[@]}"; do - wait "$pid" - done else - # Push disk.img directly if it's small enough - echo "Copying disk image..." - pv "$disk_img" > disk.img - - # Push all files together - echo "Pushing all files..." - files=("disk.img:application/vnd.oci.image.layer.v1.tar") - - if [ -f "config.json" ]; then - files+=("config.json:application/vnd.oci.image.config.v1+json") - fi - - if [ -f "nvram.bin" ]; then - files+=("nvram.bin:application/octet-stream") - fi + # Add the single compressed file to the list + # *** IMPORTANT: Use the *compressed* OCI media type *** + files+=("$compressed_disk_img:${oci_layer_media_type}") + fi - for version in $image_versions; do - # Push all files in one command + # --- Push Logic (Remains largely the same, but $files now contains compressed parts/file) --- + push_pids=() + IFS=',' read -ra versions <<< "$image_versions" + for version in "${versions[@]}"; do + # Trim whitespace if any from version splitting + version=$(echo "$version" | xargs) + if [[ -z "$version" ]]; then continue; fi + + echo "Pushing version $version..." + ( + # Use process substitution to feed file list safely if it gets long oras push --disable-path-validation \ "ghcr.io/$organization/$image_name:$version" \ "${files[@]}" - done + echo "Completed push for version $version" + ) & + push_pids+=($!) + done + + # Wait for all pushes to complete + for pid in "${push_pids[@]}"; do + wait "$pid" + done + + # --- Cleanup compressed files after successful push --- + echo "Push successful, cleaning up compressed artifacts..." + # Check if parts exist first + parts_exist=$(ls "$compressed_disk_img.part."* 2>/dev/null) + if [ -n "$parts_exist" ]; then + echo "Removing split parts: $compressed_disk_img.part.* and $compressed_disk_img" + rm -f "$compressed_disk_img.part."* + # Also remove the original compressed file that was split + rm -f "$compressed_disk_img" + elif [ -f "$compressed_disk_img" ]; then + echo "Removing compressed file: $compressed_disk_img" + rm -f "$compressed_disk_img" + fi + # --- End Push Logic --- + +else + echo "Warning: $disk_img_orig not found." + # Push only config/nvram if they exist + if [ ${#files[@]} -gt 0 ]; then + # (Add push logic here too if you want to push even without disk.img) + echo "Pushing non-disk files..." + # ... (similar push loop as above) ... + else + echo "No files found to push." + exit 1 fi fi -for version in $image_versions; do +for version in "${versions[@]}"; do + # Trim whitespace if any from version splitting + version=$(echo "$version" | xargs) + if [[ -z "$version" ]]; then continue; fi echo "Upload complete: ghcr.io/$organization/$image_name:$version" done diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index cd0f8fb8..9ef32355 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -809,94 +809,118 @@ class ImageContainerRegistry: @unchecked Sendable { ) // Create sparse file of the required size - FileManager.default.createFile(atPath: outputURL.path, contents: nil) let outputHandle = try FileHandle(forWritingTo: outputURL) + defer { try? outputHandle.close() } // Set the file size without writing data (creates a sparse file) try outputHandle.truncate(atOffset: expectedTotalSize) var reassemblyProgressLogger = ProgressLogger(threshold: 0.05) - var processedSize: UInt64 = 0 + var currentOffset: UInt64 = 0 // Track position in the final *decompressed* file - // Process each part in order for partNum in 1...totalParts { - guard let (_, partURL) = diskParts.first(where: { $0.0 == partNum }) else { + // Find the original layer info for this part number + guard + let layer = manifest.layers.first(where: { layer in + if let info = extractPartInfo(from: layer.mediaType) { + return info.partNum == partNum + } + return false + }), + let (_, partURL) = diskParts.first(where: { $0.0 == partNum }) + else { throw PullError.missingPart(partNum) } + let layerMediaType = layer.mediaType // Extract mediaType here Logger.info( "Processing part \(partNum) of \(totalParts): \(partURL.lastPathComponent)") - // Get part file size - let partAttributes = try FileManager.default.attributesOfItem( - atPath: partURL.path) - let partSize = partAttributes[.size] as? UInt64 ?? 0 - - // Calculate the offset in the final file (parts are sequential) - let partOffset = processedSize - - // Open input file let inputHandle = try FileHandle(forReadingFrom: partURL) defer { try? inputHandle.close() - // Don't delete the part file if it's from cache + // Clean up temp downloaded part if not from cache if !partURL.path.contains(cacheDirectory.path) { try? FileManager.default.removeItem(at: partURL) } } - // Seek to the appropriate offset in output file - try outputHandle.seek(toOffset: partOffset) + // Seek to the correct offset in the output sparse file + try outputHandle.seek(toOffset: currentOffset) - // Copy data in chunks to avoid memory issues - let chunkSize: UInt64 = - determineIfMemoryConstrained() ? 256 * 1024 : 1024 * 1024 // Use smaller chunks (256KB-1MB) - var bytesWritten: UInt64 = 0 + if let decompressCmd = getDecompressionCommand(for: layerMediaType) { // Use extracted mediaType + Logger.info("Decompressing part \(partNum)...") + let process = Process() + let pipe = Pipe() + process.executableURL = URL(fileURLWithPath: "/bin/sh") + process.arguments = ["-c", "\(decompressCmd) < \"\(partURL.path)\""] // Feed file via stdin redirection + process.standardOutput = pipe // Capture decompressed output - while bytesWritten < partSize { - // Use Foundation's autoreleasepool for proper memory management - Foundation.autoreleasepool { - let readSize: UInt64 = min(UInt64(chunkSize), partSize - bytesWritten) - if let chunk = try? inputHandle.read(upToCount: Int(readSize)) { - if !chunk.isEmpty { - try? outputHandle.write(contentsOf: chunk) - bytesWritten += UInt64(chunk.count) + try process.run() - // Update progress less frequently to reduce overhead - if bytesWritten % (chunkSize * 4) == 0 - || bytesWritten == partSize - { - let totalProgress = - Double(processedSize + bytesWritten) - / Double(expectedTotalSize) - reassemblyProgressLogger.logProgress( - current: totalProgress, - context: "Reassembling disk image") - } - } + let reader = pipe.fileHandleForReading + var partDecompressedSize: UInt64 = 0 + + // Read decompressed data in chunks and write to sparse file + while true { + let data = autoreleasepool { // Help manage memory with large files + reader.readData(ofLength: 1024 * 1024) // Read 1MB chunks } + if data.isEmpty { break } // End of stream - // Add a small delay every few MB to allow memory cleanup - if bytesWritten % (chunkSize * 16) == 0 && bytesWritten > 0 { - // Use Thread.sleep for now, but ideally this would use a non-blocking approach - // that is appropriate for the context (sync/async) - Thread.sleep(forTimeInterval: 0.01) - } + try outputHandle.write(contentsOf: data) + partDecompressedSize += UInt64(data.count) + + // Update progress based on decompressed size written + let totalProgress = + Double(currentOffset + partDecompressedSize) + / Double(expectedTotalSize) + reassemblyProgressLogger.logProgress( + current: totalProgress, + context: "Reassembling/Decompressing") } + process.waitUntilExit() + if process.terminationStatus != 0 { + throw PullError.decompressionFailed("Part \(partNum)") + } + currentOffset += partDecompressedSize // Advance offset by decompressed size + + } else { + // --- Handle non-compressed parts (if any, or the single file case) --- + // This part is similar to your original copy logic, writing directly + // from inputHandle to outputHandle at currentOffset + Logger.info("Copying non-compressed part \(partNum)...") + let partSize = + (try? FileManager.default.attributesOfItem(atPath: partURL.path)[.size] + as? UInt64) ?? 0 + var bytesWritten: UInt64 = 0 + let chunkSize = 1024 * 1024 + while bytesWritten < partSize { + let data = autoreleasepool { + try! inputHandle.read(upToCount: chunkSize) ?? Data() + } + if data.isEmpty { break } + try outputHandle.write(contentsOf: data) + bytesWritten += UInt64(data.count) + + // Update progress + let totalProgress = + Double(currentOffset + bytesWritten) / Double(expectedTotalSize) + reassemblyProgressLogger.logProgress( + current: totalProgress, + context: "Reassembling") + } + currentOffset += bytesWritten + // --- End non-compressed handling --- } - // Update processed size - processedSize += partSize + // Ensure data is written before processing next part (optional but safer) + try outputHandle.synchronize() } - // Finalize progress - reassemblyProgressLogger.logProgress( - current: 1.0, context: "Reassembling disk image") - Logger.info("") // Newline after progress - - // Close the output file - try outputHandle.synchronize() - try outputHandle.close() + // Finalize progress, close handle (done by defer) + reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") + Logger.info("") // Newline // Verify final size let finalSize = @@ -1031,86 +1055,112 @@ class ImageContainerRegistry: @unchecked Sendable { ) // Create sparse file of the required size - FileManager.default.createFile(atPath: outputURL.path, contents: nil) let outputHandle = try FileHandle(forWritingTo: outputURL) + defer { try? outputHandle.close() } // Set the file size without writing data (creates a sparse file) try outputHandle.truncate(atOffset: expectedTotalSize) var reassemblyProgressLogger = ProgressLogger(threshold: 0.05) - var processedSize: UInt64 = 0 + var currentOffset: UInt64 = 0 // Track position in the final *decompressed* file - // Process each part in order for partNum in 1...totalParts { - guard let (_, sourceURL) = diskPartSources.first(where: { $0.0 == partNum }) else { + // Find the original layer info for this part number + guard + let layer = manifest.layers.first(where: { layer in + if let info = extractPartInfo(from: layer.mediaType) { + return info.partNum == partNum + } + return false + }), + let (_, sourceURL) = diskPartSources.first(where: { $0.0 == partNum }) + else { throw PullError.missingPart(partNum) } + let layerMediaType = layer.mediaType // Extract mediaType here Logger.info( "Processing part \(partNum) of \(totalParts) from cache: \(sourceURL.lastPathComponent)" ) - // Get part file size - let partAttributes = try FileManager.default.attributesOfItem( - atPath: sourceURL.path) - let partSize = partAttributes[.size] as? UInt64 ?? 0 - - // Calculate the offset in the final file (parts are sequential) - let partOffset = processedSize - - // Open input file let inputHandle = try FileHandle(forReadingFrom: sourceURL) defer { try? inputHandle.close() } - // Seek to the appropriate offset in output file - try outputHandle.seek(toOffset: partOffset) + // Seek to the correct offset in the output sparse file + try outputHandle.seek(toOffset: currentOffset) - // Copy data in chunks to avoid memory issues - let chunkSize: UInt64 = determineIfMemoryConstrained() ? 256 * 1024 : 1024 * 1024 // Use smaller chunks (256KB-1MB) - var bytesWritten: UInt64 = 0 + if let decompressCmd = getDecompressionCommand(for: layerMediaType) { // Use extracted mediaType + Logger.info("Decompressing part \(partNum)...") + let process = Process() + let pipe = Pipe() + process.executableURL = URL(fileURLWithPath: "/bin/sh") + process.arguments = ["-c", "\(decompressCmd) < \"\(sourceURL.path)\""] // Feed file via stdin redirection + process.standardOutput = pipe // Capture decompressed output - while bytesWritten < partSize { - // Use Foundation's autoreleasepool for proper memory management - Foundation.autoreleasepool { - let readSize: UInt64 = min(UInt64(chunkSize), partSize - bytesWritten) - if let chunk = try? inputHandle.read(upToCount: Int(readSize)) { - if !chunk.isEmpty { - try? outputHandle.write(contentsOf: chunk) - bytesWritten += UInt64(chunk.count) + try process.run() - // Update progress less frequently to reduce overhead - if bytesWritten % (chunkSize * 4) == 0 || bytesWritten == partSize { - let totalProgress = - Double(processedSize + bytesWritten) - / Double(expectedTotalSize) - reassemblyProgressLogger.logProgress( - current: totalProgress, - context: "Reassembling disk image from cache") - } - } + let reader = pipe.fileHandleForReading + var partDecompressedSize: UInt64 = 0 + + // Read decompressed data in chunks and write to sparse file + while true { + let data = autoreleasepool { // Help manage memory with large files + reader.readData(ofLength: 1024 * 1024) // Read 1MB chunks } + if data.isEmpty { break } // End of stream - // Add a small delay every few MB to allow memory cleanup - if bytesWritten % (chunkSize * 16) == 0 && bytesWritten > 0 { - // Use Thread.sleep for now, but ideally this would use a non-blocking approach - // that is appropriate for the context (sync/async) - Thread.sleep(forTimeInterval: 0.01) - } + try outputHandle.write(contentsOf: data) + partDecompressedSize += UInt64(data.count) + + // Update progress based on decompressed size written + let totalProgress = + Double(currentOffset + partDecompressedSize) / Double(expectedTotalSize) + reassemblyProgressLogger.logProgress( + current: totalProgress, + context: "Reassembling") } + process.waitUntilExit() + if process.terminationStatus != 0 { + throw PullError.decompressionFailed("Part \(partNum)") + } + currentOffset += partDecompressedSize // Advance offset by decompressed size + + } else { + // --- Handle non-compressed parts (if any, or the single file case) --- + // This part is similar to your original copy logic, writing directly + // from inputHandle to outputHandle at currentOffset + Logger.info("Copying non-compressed part \(partNum)...") + let partSize = + (try? FileManager.default.attributesOfItem(atPath: sourceURL.path)[.size] + as? UInt64) ?? 0 + var bytesWritten: UInt64 = 0 + let chunkSize = 1024 * 1024 + while bytesWritten < partSize { + let data = autoreleasepool { + try! inputHandle.read(upToCount: chunkSize) ?? Data() + } + if data.isEmpty { break } + try outputHandle.write(contentsOf: data) + bytesWritten += UInt64(data.count) + + // Update progress + let totalProgress = + Double(currentOffset + bytesWritten) / Double(expectedTotalSize) + reassemblyProgressLogger.logProgress( + current: totalProgress, + context: "Reassembling") + } + currentOffset += bytesWritten + // --- End non-compressed handling --- } - // Update processed size - processedSize += partSize + // Ensure data is written before processing next part (optional but safer) + try outputHandle.synchronize() } - // Finalize progress - reassemblyProgressLogger.logProgress( - current: 1.0, context: "Reassembling disk image from cache") - Logger.info("") // Newline after progress - - // Close the output file - try outputHandle.synchronize() - try outputHandle.close() + // Finalize progress, close handle (done by defer) + reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") + Logger.info("") // Newline // Verify final size let finalSize = @@ -1646,4 +1696,34 @@ class ImageContainerRegistry: @unchecked Sendable { return nil } + + // Add helper to check media type and get decompress command + private func getDecompressionCommand(for mediaType: String) -> String? { + if mediaType.hasSuffix("+gzip") { + return "/usr/bin/gunzip -c" // -c writes to stdout + } else if mediaType.hasSuffix("+zstd") { + // Check if zstd exists, otherwise handle error? + // Assuming brew install zstd -> /opt/homebrew/bin/zstd or /usr/local/bin/zstd + let zstdPath = findExecutablePath(named: "zstd") ?? "/usr/local/bin/zstd" + return "\(zstdPath) -dc" // -d decompress, -c stdout + } + return nil // Not compressed or unknown compression + } + + // Helper to find executables (optional, or hardcode paths) + private func findExecutablePath(named executableName: String) -> String? { + let pathEnv = + ProcessInfo.processInfo.environment["PATH"] + ?? "/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/bin:/opt/homebrew/bin" + let paths = pathEnv.split(separator: ":") + for path in paths { + let executablePath = URL(fileURLWithPath: String(path)).appendingPathComponent( + executableName + ).path + if FileManager.default.isExecutableFile(atPath: executablePath) { + return executablePath + } + } + return nil + } } From 353f3cf45df13275d76b0b12fcabdccad622f040 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sat, 19 Apr 2025 09:59:32 +0200 Subject: [PATCH 02/28] Sparse file optimizations --- libs/lume/scripts/ghcr/push-ghcr.sh | 389 +++++- .../ImageContainerRegistry.swift | 1238 ++++++++++++++--- libs/lume/src/Errors/Errors.swift | 68 +- 3 files changed, 1445 insertions(+), 250 deletions(-) diff --git a/libs/lume/scripts/ghcr/push-ghcr.sh b/libs/lume/scripts/ghcr/push-ghcr.sh index fbe4ab7e..33874122 100755 --- a/libs/lume/scripts/ghcr/push-ghcr.sh +++ b/libs/lume/scripts/ghcr/push-ghcr.sh @@ -9,6 +9,8 @@ folder_path="" image_name="" image_versions="" chunk_size="500M" # Default chunk size for splitting large files +# Define the OCI media type for the compressed disk layer +oci_layer_media_type="application/octet-stream+lzfse" # Apple Archive format # Parse the command line arguments while [[ $# -gt 0 ]]; do @@ -41,6 +43,7 @@ while [[ $# -gt 0 ]]; do echo " --image-name : Name of the image to publish (required)" echo " --image-versions : Comma separated list of versions of the image to publish (required)" echo " --chunk-size : Size of chunks for large files (e.g., 500M, default: 500M)" + echo "Note: The script will automatically resume from the last attempt if available" exit 0 ;; *) @@ -69,7 +72,7 @@ if [[ ! -d "$folder_path" ]]; then fi # Check and install required tools -for tool in "oras" "split" "pv" "gzip"; do +for tool in "oras" "split" "pv" "jq"; do if ! command -v "$tool" &> /dev/null; then echo "$tool is not installed. Installing using Homebrew..." if ! command -v brew &> /dev/null; then @@ -80,80 +83,252 @@ for tool in "oras" "split" "pv" "gzip"; do fi done +# Check if Apple Archive is available +if ! command -v compression_tool &> /dev/null; then + echo "Error: Apple Archive (compression_tool) is required but not found" + echo "This script requires macOS with Apple Archive support" + exit 1 +fi + +echo "Apple Archive detected - will use for optimal sparse file handling" +compressed_ext=".aa" + # Authenticate with GitHub Container Registry echo "$GITHUB_TOKEN" | oras login ghcr.io -u "$organization" --password-stdin # Use the source folder path as the working directory and get its absolute path work_dir=$(cd "$folder_path" && pwd) -echo "Working directory (persistent cache): $work_dir" +echo "Working directory: $work_dir" -# Change to the working directory -cd "$work_dir" +# Function to find the most recent cache directory +find_latest_cache() { + local latest_cache=$(ls -td "$work_dir"/.ghcr_cache_* 2>/dev/null | head -n1) + if [ -n "$latest_cache" ]; then + echo "$latest_cache" + else + echo "" + fi +} + +# Function to check if a cache directory is valid for resuming +is_valid_cache() { + local cache_dir="$1" + # Check if it contains the necessary files + [ -f "$cache_dir/config.json" ] || [ -f "$cache_dir/nvram.bin" ] || \ + [ -f "$cache_dir/disk.img.aa" ] || ls "$cache_dir"/disk.img.aa.part.* 1>/dev/null 2>&1 +} + +# Always try to find and use an existing cache +existing_cache=$(find_latest_cache) +if [ -n "$existing_cache" ] && is_valid_cache "$existing_cache"; then + cache_dir="$existing_cache" + + # Check if the cache contains old gzip format + if [ -f "$cache_dir/disk.img.gz" ] || ls "$cache_dir"/disk.img.gz.part.* 1>/dev/null 2>&1; then + echo "Error: Found legacy gzip format in cache. This script only supports Apple Archive format." + echo "Please delete the cache directory and start fresh: $cache_dir" + exit 1 + fi + + echo "Resuming from existing cache: $cache_dir" +else + echo "No valid cache found. Starting fresh." + cache_dir="$work_dir/.ghcr_cache_$(date +%Y%m%d_%H%M%S)" + mkdir -p "$cache_dir" +fi + +echo "Using cache directory: $cache_dir" + +# Display space information +echo "=== DISK SPACE INFORMATION ===" +df -h "$cache_dir" | head -1 +df -h "$cache_dir" | grep -v "Filesystem" +echo + +# Change to the cache directory +cd "$cache_dir" files=() # Initialize files array here -# Copy config.json if it exists -if [ -f "$folder_path/config.json" ]; then - echo "Copying config.json..." - cp "$folder_path/config.json" config.json +# Function to check if a version was already pushed +version_pushed() { + local version="$1" + local version_file="$cache_dir/.pushed_$version" + [ -f "$version_file" ] +} + +# Function to mark a version as pushed +mark_version_pushed() { + local version="$1" + touch "$cache_dir/.pushed_$version" +} + +# Copy config.json if it exists and not already in cache +config_json_source="$folder_path/config.json" +config_json_dest="$cache_dir/config.json" +if [ -f "$config_json_source" ]; then + if [ ! -f "$config_json_dest" ]; then + echo "Copying config.json..." + # Add the uncompressed disk size annotation if disk.img exists and jq is available + if [ -n "$original_disk_size" ] && command -v jq &> /dev/null; then + echo "Adding uncompressed disk size annotation: $original_disk_size bytes" + jq --arg size "$original_disk_size" '.annotations += {"com.trycua.lume.disk.uncompressed_size": $size}' "$config_json_source" > "$config_json_dest" || \ + (echo "jq failed, copying original config.json"; cp "$config_json_source" "$config_json_dest") # Fallback to copy if jq fails + else + cp "$config_json_source" "$config_json_dest" + fi + fi +fi +if [ -f "$config_json_dest" ]; then files+=("config.json:application/vnd.oci.image.config.v1+json") fi -# Copy nvram.bin if it exists -nvram_bin="$folder_path/nvram.bin" -if [ -f "$nvram_bin" ]; then +# Copy nvram.bin if it exists and not already in cache +if [ -f "$folder_path/nvram.bin" ] && [ ! -f "$cache_dir/nvram.bin" ]; then echo "Copying nvram.bin..." - cp "$nvram_bin" nvram.bin + cp "$folder_path/nvram.bin" nvram.bin +fi +if [ -f "$cache_dir/nvram.bin" ]; then files+=("nvram.bin:application/octet-stream") fi # Process disk.img if it exists -disk_img_orig="disk.img" # Already in work_dir +disk_img_orig="$folder_path/disk.img" +original_disk_size="" if [ -f "$disk_img_orig" ]; then - # --- Compression Step --- - echo "Compressing $disk_img_orig..." - compressed_ext=".gz" - compressor="gzip" - compress_opts="-k -f" + # Get original size *before* compression + original_disk_size=$(stat -f%z "$disk_img_orig") + + # Get real (non-sparse) size + real_size=$(du -k "$disk_img_orig" | cut -f1) + real_size_bytes=$((real_size * 1024)) + sparseness_ratio=$(echo "scale=2; $original_disk_size / $real_size_bytes" | bc) + echo "Disk image: $disk_img_orig" + echo " Logical size: $original_disk_size bytes ($(du -h "$disk_img_orig" | cut -f1))" + echo " Actual disk usage: $((real_size_bytes / 1073741824)) GB" + echo " Sparseness ratio: ${sparseness_ratio}:1" + + # Check if we already have compressed files in the cache compressed_disk_img="disk.img${compressed_ext}" - pv "$disk_img_orig" | $compressor $compress_opts > "$compressed_disk_img" - compressed_size=$(stat -f%z "$compressed_disk_img") - echo "Compressed disk image size: $(du -h "$compressed_disk_img" | cut -f1)" - # --- End Compression Step --- - - # Check if splitting is needed based on *compressed* size - if [ $compressed_size -gt 524288000 ]; then # 500MB threshold - echo "Splitting compressed file: $compressed_disk_img" - split -b "$chunk_size" "$compressed_disk_img" "$compressed_disk_img.part." - # Keep the compressed file and parts in work_dir - - # --- Adjust part processing --- - parts_files=() - total_parts=$(ls "$compressed_disk_img.part."* | wc -l | tr -d ' ') - part_num=0 - for part in "$compressed_disk_img.part."*; do - part_num=$((part_num + 1)) - # *** IMPORTANT: Use the *compressed* OCI media type with part info *** - parts_files+=("$part:${oci_layer_media_type};part.number=$part_num;part.total=$total_parts") - echo "Part $part: $(du -h "$part" | cut -f1)" - done - # Combine non-disk files with disk parts - files+=("${parts_files[@]}") - # --- End Adjust part processing --- - - else - # Add the single compressed file to the list - # *** IMPORTANT: Use the *compressed* OCI media type *** - files+=("$compressed_disk_img:${oci_layer_media_type}") + already_compressed=false + + if [ -f "$cache_dir/$compressed_disk_img" ]; then + already_compressed=true + echo "Using existing compressed file from cache: $compressed_disk_img" + elif ls "$cache_dir"/disk.img${compressed_ext}.part.* 1>/dev/null 2>&1; then + already_compressed=true + echo "Using existing compressed parts from cache" fi - # --- Push Logic (Remains largely the same, but $files now contains compressed parts/file) --- + # Only compress if not already compressed in cache + if [ "$already_compressed" = false ]; then + # Check for free disk space before compression + avail_space=$(df -k "$cache_dir" | tail -1 | awk '{print $4}') + avail_space_bytes=$((avail_space * 1024)) + # Assume compressed size is roughly 30% of real size as a safe estimate + estimated_compressed=$((real_size_bytes * 30 / 100)) + + if [ "$avail_space_bytes" -lt "$estimated_compressed" ]; then + echo "WARNING: Possibly insufficient disk space for compression!" + echo "Available: $((avail_space_bytes / 1073741824)) GB, Estimated required: $((estimated_compressed / 1073741824)) GB" + read -p "Continue anyway? (y/n) " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + echo "Exiting. Free up some space and try again." + exit 1 + fi + fi + + # --- Compression Step --- + echo "Compressing $disk_img_orig with Apple Archive..." + + # Apple Archive compression + echo "Starting compression with Apple Archive (showing output file growth)..." + compression_tool -encode -i "$disk_img_orig" -o "$compressed_disk_img" -a lzfse & + COMP_PID=$! + + sleep 1 # Give compression a moment to start + + # Display progress based on output file growth + while kill -0 $COMP_PID 2>/dev/null; do + if [ -f "$compressed_disk_img" ]; then + current_size=$(stat -f%z "$compressed_disk_img" 2>/dev/null || echo 0) + percent=$(echo "scale=2; 100 * $current_size / $original_disk_size" | bc) + echo -ne "Progress: $percent% ($(du -h "$compressed_disk_img" 2>/dev/null | cut -f1 || echo "0"))\r" + else + echo -ne "Preparing compression...\r" + fi + sleep 2 + done + + wait $COMP_PID + echo -e "\nCompression complete!" + + compressed_size=$(stat -f%z "$compressed_disk_img") + echo "Compressed disk image size: $(du -h "$compressed_disk_img" | cut -f1)" + echo "Compression ratio: $(echo "scale=2; $compressed_size * 100 / $original_disk_size" | bc)%" + # --- End Compression Step --- + + # Check if splitting is needed based on *compressed* size + if [ $compressed_size -gt 524288000 ]; then # 500MB threshold + echo "Splitting compressed file into chunks of $chunk_size..." + pv "$compressed_disk_img" | split -b "$chunk_size" - "$compressed_disk_img.part." + rm -f "$compressed_disk_img" # Remove the unsplit compressed file + # Verify that parts were created + echo "Verifying split parts..." + ls -la "$cache_dir"/disk.img${compressed_ext}.part.* + fi + else + echo "Using existing compressed/split files from cache" + fi + + # --- Adjust part processing --- + echo "Looking for compressed files in $cache_dir..." + + # List all files in the cache directory for debugging + ls -la "$cache_dir" + + if [ -f "$cache_dir/$compressed_disk_img" ]; then + echo "Found single compressed file: $compressed_disk_img" + # Add the single compressed file to the list + files+=("$compressed_disk_img:${oci_layer_media_type}") + else + # Look for split parts + part_files=($(ls "$cache_dir"/disk.img${compressed_ext}.part.* 2>/dev/null || echo "")) + if [ ${#part_files[@]} -gt 0 ]; then + echo "Found ${#part_files[@]} split parts" + parts_files=() + part_num=0 + + for part in "${part_files[@]}"; do + part_num=$((part_num + 1)) + part_basename=$(basename "$part") + parts_files+=("$part_basename:${oci_layer_media_type};part.number=$part_num;part.total=${#part_files[@]}") + echo "Part $part_num: $(du -h "$part" | cut -f1)" + done + + files+=("${parts_files[@]}") + else + echo "ERROR: No compressed files found in cache directory: $cache_dir" + echo "Contents of cache directory:" + find "$cache_dir" -type f | sort + exit 1 + fi + fi + + # --- Push Logic --- push_pids=() IFS=',' read -ra versions <<< "$image_versions" for version in "${versions[@]}"; do - # Trim whitespace if any from version splitting + # Trim whitespace if any from version splitting version=$(echo "$version" | xargs) if [[ -z "$version" ]]; then continue; fi + # Skip if version was already pushed + if version_pushed "$version"; then + echo "Version $version was already pushed, skipping..." + continue + fi + echo "Pushing version $version..." ( # Use process substitution to feed file list safely if it gets long @@ -161,6 +336,7 @@ if [ -f "$disk_img_orig" ]; then "ghcr.io/$organization/$image_name:$version" \ "${files[@]}" echo "Completed push for version $version" + mark_version_pushed "$version" ) & push_pids+=($!) done @@ -170,37 +346,108 @@ if [ -f "$disk_img_orig" ]; then wait "$pid" done - # --- Cleanup compressed files after successful push --- - echo "Push successful, cleaning up compressed artifacts..." - # Check if parts exist first - parts_exist=$(ls "$compressed_disk_img.part."* 2>/dev/null) - if [ -n "$parts_exist" ]; then - echo "Removing split parts: $compressed_disk_img.part.* and $compressed_disk_img" - rm -f "$compressed_disk_img.part."* - # Also remove the original compressed file that was split - rm -f "$compressed_disk_img" - elif [ -f "$compressed_disk_img" ]; then - echo "Removing compressed file: $compressed_disk_img" - rm -f "$compressed_disk_img" + # --- Cleanup only if all versions were pushed successfully --- + all_versions_pushed=true + for version in "${versions[@]}"; do + version=$(echo "$version" | xargs) + if [[ -z "$version" ]]; then continue; fi + if ! version_pushed "$version"; then + all_versions_pushed=false + break + fi + done + + if [ "$all_versions_pushed" = true ]; then + echo "All versions pushed successfully, cleaning up cache directory..." + cd "$work_dir" + rm -rf "$cache_dir" + else + echo "Some versions failed to push. Cache directory preserved at: $cache_dir" + echo "Run again to resume from this point" fi - # --- End Push Logic --- else echo "Warning: $disk_img_orig not found." # Push only config/nvram if they exist if [ ${#files[@]} -gt 0 ]; then - # (Add push logic here too if you want to push even without disk.img) - echo "Pushing non-disk files..." - # ... (similar push loop as above) ... + echo "Pushing non-disk files..." + push_pids=() + IFS=',' read -ra versions <<< "$image_versions" + for version in "${versions[@]}"; do + # Trim whitespace if any from version splitting + version=$(echo "$version" | xargs) + if [[ -z "$version" ]]; then continue; fi + + # Skip if version was already pushed + if version_pushed "$version"; then + echo "Version $version was already pushed, skipping..." + continue + fi + + echo "Pushing version $version (config/nvram only)..." + ( + oras push --disable-path-validation \ + "ghcr.io/$organization/$image_name:$version" \ + "${files[@]}" + echo "Completed push for version $version" + mark_version_pushed "$version" + ) & + push_pids+=($!) + done + + # Wait for all pushes to complete + for pid in "${push_pids[@]}"; do + wait "$pid" + done + + # --- Cleanup only if all versions were pushed successfully --- + all_versions_pushed=true + for version in "${versions[@]}"; do + version=$(echo "$version" | xargs) + if [[ -z "$version" ]]; then continue; fi + if ! version_pushed "$version"; then + all_versions_pushed=false + break + fi + done + + if [ "$all_versions_pushed" = true ]; then + echo "All non-disk versions pushed successfully, cleaning up cache directory..." + cd "$work_dir" + rm -rf "$cache_dir" + else + echo "Some non-disk versions failed to push. Cache directory preserved at: $cache_dir" + echo "Run again to resume from this point" + fi else echo "No files found to push." + cd "$work_dir" + rm -rf "$cache_dir" exit 1 fi fi -for version in "${versions[@]}"; do - # Trim whitespace if any from version splitting - version=$(echo "$version" | xargs) - if [[ -z "$version" ]]; then continue; fi - echo "Upload complete: ghcr.io/$organization/$image_name:$version" -done +# Determine final status based on the success check *before* potential cleanup +echo # Add a newline for better readability +if [ "$all_versions_pushed" = true ]; then + echo "All versions pushed successfully:" + for version in "${versions[@]}"; do + version=$(echo "$version" | xargs) + if [[ -z "$version" ]]; then continue; fi + echo " Upload complete: ghcr.io/$organization/$image_name:$version" + done +else + echo "Final upload status:" + for version in "${versions[@]}"; do + version=$(echo "$version" | xargs) + if [[ -z "$version" ]]; then continue; fi + # Check the marker file only if the overall process failed (cache preserved) + if version_pushed "$version"; then + echo " Upload complete: ghcr.io/$organization/$image_name:$version" + else + echo " Upload failed: ghcr.io/$organization/$image_name:$version" + fi + done + # Exit with error code if any version failed + exit 1 +fi diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 9ef32355..ac7453ca 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -3,6 +3,18 @@ import Darwin import Foundation import Swift +// Define struct to decode relevant parts of config.json +struct OCIConfig: Codable { + struct Annotations: Codable { + let uncompressedSize: String? // Use optional String + + enum CodingKeys: String, CodingKey { + case uncompressedSize = "com.trycua.lume.disk.uncompressed_size" + } + } + let annotations: Annotations? // Optional annotations +} + struct Layer: Codable, Equatable { let mediaType: String let digest: String @@ -178,7 +190,7 @@ actor ProgressTracker { fflush(stdout) } - private func createProgressBar(progress: Double, width: Int = 20) -> String { + private func createProgressBar(progress: Double, width: Int = 30) -> String { let completedWidth = Int(progress * Double(width)) let remainingWidth = width - completedWidth @@ -279,6 +291,17 @@ class ImageContainerRegistry: @unchecked Sendable { private var activeDownloads: [String] = [] private let cachingEnabled: Bool + // Add the createProgressBar function here as a private method + private func createProgressBar(progress: Double, width: Int = 30) -> String { + let completedWidth = Int(progress * Double(width)) + let remainingWidth = width - completedWidth + + let completed = String(repeating: "█", count: completedWidth) + let remaining = String(repeating: "░", count: remainingWidth) + + return "[\(completed)\(remaining)]" + } + init(registry: String, organization: String) { self.registry = registry self.organization = organization @@ -716,7 +739,8 @@ class ImageContainerRegistry: @unchecked Sendable { let outputURL: URL switch mediaType { - case "application/vnd.oci.image.layer.v1.tar": + case "application/vnd.oci.image.layer.v1.tar", + "application/octet-stream+gzip": outputURL = tempDownloadDir.appendingPathComponent("disk.img") case "application/vnd.oci.image.config.v1+json": outputURL = tempDownloadDir.appendingPathComponent("config.json") @@ -787,33 +811,127 @@ class ImageContainerRegistry: @unchecked Sendable { let stats = await progress.getDownloadStats() Logger.info(stats.formattedSummary()) + // Parse config.json to get uncompressed size *before* reassembly + let configURL = tempDownloadDir.appendingPathComponent("config.json") + let uncompressedSize = getUncompressedSizeFromConfig(configPath: configURL) + + // Now also try to get disk size from VM config if OCI annotation not found + var vmConfigDiskSize: UInt64? = nil + if uncompressedSize == nil && FileManager.default.fileExists(atPath: configURL.path) { + do { + let configData = try Data(contentsOf: configURL) + let decoder = JSONDecoder() + if let vmConfig = try? decoder.decode(VMConfig.self, from: configData) { + vmConfigDiskSize = vmConfig.diskSize + if let size = vmConfigDiskSize { + Logger.info("Found diskSize from VM config.json: \(size) bytes") + } + } + } catch { + Logger.error("Failed to parse VM config.json for diskSize: \(error)") + } + } + + // Force explicit use + if uncompressedSize != nil { + Logger.info( + "Will use uncompressed size from annotation for sparse file: \(uncompressedSize!) bytes" + ) + } else if vmConfigDiskSize != nil { + Logger.info( + "Will use diskSize from VM config for sparse file: \(vmConfigDiskSize!) bytes") + } + // Handle disk parts if present if !diskParts.isEmpty { Logger.info("Reassembling disk image using sparse file technique...") let outputURL = tempVMDir.appendingPathComponent("disk.img") - try FileManager.default.createDirectory( - at: outputURL.deletingLastPathComponent(), withIntermediateDirectories: true) - // Ensure the output file exists but is empty - if FileManager.default.fileExists(atPath: outputURL.path) { - try FileManager.default.removeItem(at: outputURL) + // Wrap setup in do-catch for better error reporting + let outputHandle: FileHandle + do { + // 1. Ensure parent directory exists + try FileManager.default.createDirectory( + at: outputURL.deletingLastPathComponent(), withIntermediateDirectories: true + ) + + // 2. Explicitly create the file first, removing old one if needed + if FileManager.default.fileExists(atPath: outputURL.path) { + try FileManager.default.removeItem(at: outputURL) + } + guard FileManager.default.createFile(atPath: outputURL.path, contents: nil) + else { + throw PullError.fileCreationFailed(outputURL.path) + } + + // 3. Now open the handle for writing + outputHandle = try FileHandle(forWritingTo: outputURL) + + } catch { + // Catch errors during directory/file creation or handle opening + Logger.error( + "Failed during setup for disk image reassembly: \(error.localizedDescription)", + metadata: ["path": outputURL.path]) + throw PullError.reassemblySetupFailed( + path: outputURL.path, underlyingError: error) } - // Calculate expected size from the manifest layers - let expectedTotalSize = UInt64( + // Calculate expected size from the manifest layers (sum of compressed parts - for logging only now) + let expectedCompressedTotalSize = UInt64( manifest.layers.filter { extractPartInfo(from: $0.mediaType) != nil }.reduce(0) { $0 + $1.size } ) Logger.info( - "Expected download size: \(ByteCountFormatter.string(fromByteCount: Int64(expectedTotalSize), countStyle: .file)) (actual disk usage will be significantly lower)" + "Total compressed parts size: \(ByteCountFormatter.string(fromByteCount: Int64(expectedCompressedTotalSize), countStyle: .file))" ) - // Create sparse file of the required size - let outputHandle = try FileHandle(forWritingTo: outputURL) + // Calculate fallback size (sum of compressed parts) + let _: UInt64 = diskParts.reduce(UInt64(0)) { + (acc: UInt64, element) -> UInt64 in + let fileSize = + (try? FileManager.default.attributesOfItem(atPath: element.1.path)[.size] + as? UInt64 ?? 0) ?? 0 + return acc + fileSize + } + + // Use: annotation size > VM config diskSize > fallback size + let sizeForTruncate: UInt64 + if let size = uncompressedSize { + Logger.info("Using uncompressed size from annotation: \(size) bytes") + sizeForTruncate = size + } else if let size = vmConfigDiskSize { + Logger.info("Using diskSize from VM config: \(size) bytes") + sizeForTruncate = size + } else { + Logger.error( + "Missing both uncompressed size annotation and VM config diskSize for multi-part image." + ) + throw PullError.missingUncompressedSizeAnnotation + } + defer { try? outputHandle.close() } // Set the file size without writing data (creates a sparse file) - try outputHandle.truncate(atOffset: expectedTotalSize) + try outputHandle.truncate(atOffset: sizeForTruncate) + + // Verify the sparse file was created with the correct size + let initialSize = + (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] + as? UInt64) ?? 0 + Logger.info( + "Sparse file initialized with size: \(ByteCountFormatter.string(fromByteCount: Int64(initialSize), countStyle: .file))" + ) + + // Add a simple test pattern at the beginning and end of the file to verify it's writable + try outputHandle.seek(toOffset: 0) + let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! + try outputHandle.write(contentsOf: testPattern) + + try outputHandle.seek(toOffset: sizeForTruncate - UInt64(testPattern.count)) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.synchronize() + + Logger.info("Test patterns written to sparse file. File is ready for writing.") var reassemblyProgressLogger = ProgressLogger(threshold: 0.05) var currentOffset: UInt64 = 0 // Track position in the final *decompressed* file @@ -849,69 +967,252 @@ class ImageContainerRegistry: @unchecked Sendable { try outputHandle.seek(toOffset: currentOffset) if let decompressCmd = getDecompressionCommand(for: layerMediaType) { // Use extracted mediaType - Logger.info("Decompressing part \(partNum)...") + Logger.info( + "Decompressing part \(partNum) with media type: \(layerMediaType)") + + // Handle Apple Archive format + let toolPath = String(decompressCmd.dropFirst("apple_archive:".count)) + let tempOutputPath = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString) + + // Check input file size before decompression + let inputFileSize = + (try? FileManager.default.attributesOfItem(atPath: partURL.path)[.size] + as? UInt64) ?? 0 + Logger.info( + "Part \(partNum) input size: \(ByteCountFormatter.string(fromByteCount: Int64(inputFileSize), countStyle: .file))" + ) + + // Create a process that decompresses to a temporary file let process = Process() - let pipe = Pipe() - process.executableURL = URL(fileURLWithPath: "/bin/sh") - process.arguments = ["-c", "\(decompressCmd) < \"\(partURL.path)\""] // Feed file via stdin redirection - process.standardOutput = pipe // Capture decompressed output + process.executableURL = URL(fileURLWithPath: toolPath) + process.arguments = [ + "extract", "-i", partURL.path, "-o", tempOutputPath.path, + ] + // Add error output capture + let errorPipe = Pipe() + process.standardError = errorPipe + + Logger.info( + "Decompressing Apple Archive format with: \(toolPath) \(process.arguments?.joined(separator: " ") ?? "")" + ) try process.run() + process.waitUntilExit() - let reader = pipe.fileHandleForReading - var partDecompressedSize: UInt64 = 0 + // Check error output if any + let errorData = errorPipe.fileHandleForReading.readDataToEndOfFile() + if !errorData.isEmpty, + let errorString = String(data: errorData, encoding: .utf8) + { + Logger.error("Decompression error output: \(errorString)") + } + + if process.terminationStatus != 0 { + Logger.error( + "Apple Archive decompression failed with status: \(process.terminationStatus), falling back to direct copy" + ) + // Fall back to direct copying (uncompressed) + Logger.info("Copying part \(partNum) directly without decompression...") + try outputHandle.seek(toOffset: currentOffset) + + let inputHandle = try FileHandle(forReadingFrom: partURL) + defer { try? inputHandle.close() } + + var bytesWritten: UInt64 = 0 + let chunkSize = 1024 * 1024 // 1MB chunks + var chunkCount = 0 + + while true { + let data = autoreleasepool { + try! inputHandle.read(upToCount: chunkSize) ?? Data() + } + if data.isEmpty { break } + + try outputHandle.write(contentsOf: data) + bytesWritten += UInt64(data.count) + chunkCount += 1 + + // Update progress + let totalProgress = + Double(currentOffset + bytesWritten) + / Double(expectedCompressedTotalSize) + let progressBar = createProgressBar( + progress: totalProgress, width: 30) + let progressPercent = Int(totalProgress * 100) + let currentSpeed = + ByteCountFormatter.string( + fromByteCount: Int64(Double(bytesWritten) / 0.5), + countStyle: .file) + "/s" + + print( + "\r\(progressBar) \(progressPercent)% | Speed: \(currentSpeed) | Part \(partNum) | \(ByteCountFormatter.string(fromByteCount: Int64(currentOffset + bytesWritten), countStyle: .file)) ", + terminator: "") + fflush(stdout) + + // Also log to the progress logger for consistency + reassemblyProgressLogger.logProgress( + current: totalProgress, + context: "Direct copying") + } + + Logger.info( + "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" + ) + currentOffset += bytesWritten + continue + } + + // Check if the output file exists and has content + let outputExists = FileManager.default.fileExists( + atPath: tempOutputPath.path) + let outputFileSize = + outputExists + ? ((try? FileManager.default.attributesOfItem( + atPath: tempOutputPath.path)[ + .size] as? UInt64) ?? 0) : 0 + Logger.info( + "Part \(partNum) - Decompressed output exists: \(outputExists), size: \(ByteCountFormatter.string(fromByteCount: Int64(outputFileSize), countStyle: .file))" + ) + + // If decompression produced an empty file, fall back to direct copy + if outputFileSize == 0 { + Logger.info( + "Decompression resulted in empty file, falling back to direct copy for part \(partNum)" + ) + try? FileManager.default.removeItem(at: tempOutputPath) + + // Fall back to direct copying (uncompressed) + Logger.info("Copying part \(partNum) directly without decompression...") + try outputHandle.seek(toOffset: currentOffset) + + let inputHandle = try FileHandle(forReadingFrom: partURL) + defer { try? inputHandle.close() } + + var bytesWritten: UInt64 = 0 + let chunkSize = 1024 * 1024 // 1MB chunks + var chunkCount = 0 + + while true { + let data = autoreleasepool { + try! inputHandle.read(upToCount: chunkSize) ?? Data() + } + if data.isEmpty { break } + + try outputHandle.write(contentsOf: data) + bytesWritten += UInt64(data.count) + chunkCount += 1 + + // Update progress + let totalProgress = + Double(currentOffset + bytesWritten) + / Double(expectedCompressedTotalSize) + let progressBar = createProgressBar( + progress: totalProgress, width: 30) + let progressPercent = Int(totalProgress * 100) + let currentSpeed = + ByteCountFormatter.string( + fromByteCount: Int64(Double(bytesWritten) / 0.5), + countStyle: .file) + "/s" + + print( + "\r\(progressBar) \(progressPercent)% | Speed: \(currentSpeed) | Part \(partNum) | \(ByteCountFormatter.string(fromByteCount: Int64(currentOffset + bytesWritten), countStyle: .file)) ", + terminator: "") + fflush(stdout) + + // Also log to the progress logger for consistency + reassemblyProgressLogger.logProgress( + current: totalProgress, + context: "Direct copying") + } + + Logger.info( + "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" + ) + currentOffset += bytesWritten + continue + } + + // Read the decompressed file and write to our output + let tempInputHandle = try FileHandle(forReadingFrom: tempOutputPath) + defer { + try? tempInputHandle.close() + try? FileManager.default.removeItem(at: tempOutputPath) + } // Read decompressed data in chunks and write to sparse file + var partDecompressedSize: UInt64 = 0 + let chunkSize = 1024 * 1024 // 1MB chunks + var chunkCount = 0 + while true { let data = autoreleasepool { // Help manage memory with large files - reader.readData(ofLength: 1024 * 1024) // Read 1MB chunks + try! tempInputHandle.read(upToCount: chunkSize) ?? Data() } if data.isEmpty { break } // End of stream try outputHandle.write(contentsOf: data) partDecompressedSize += UInt64(data.count) + chunkCount += 1 // Update progress based on decompressed size written let totalProgress = Double(currentOffset + partDecompressedSize) - / Double(expectedTotalSize) - reassemblyProgressLogger.logProgress( - current: totalProgress, - context: "Reassembling/Decompressing") - } - process.waitUntilExit() - if process.terminationStatus != 0 { - throw PullError.decompressionFailed("Part \(partNum)") - } - currentOffset += partDecompressedSize // Advance offset by decompressed size - - } else { - // --- Handle non-compressed parts (if any, or the single file case) --- - // This part is similar to your original copy logic, writing directly - // from inputHandle to outputHandle at currentOffset - Logger.info("Copying non-compressed part \(partNum)...") - let partSize = - (try? FileManager.default.attributesOfItem(atPath: partURL.path)[.size] - as? UInt64) ?? 0 - var bytesWritten: UInt64 = 0 - let chunkSize = 1024 * 1024 - while bytesWritten < partSize { - let data = autoreleasepool { - try! inputHandle.read(upToCount: chunkSize) ?? Data() - } - if data.isEmpty { break } - try outputHandle.write(contentsOf: data) - bytesWritten += UInt64(data.count) - - // Update progress - let totalProgress = - Double(currentOffset + bytesWritten) / Double(expectedTotalSize) + / Double(expectedCompressedTotalSize) reassemblyProgressLogger.logProgress( current: totalProgress, context: "Reassembling") } + + Logger.info( + "Part \(partNum) - Wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(partDecompressedSize), countStyle: .file))" + ) + currentOffset += partDecompressedSize // Advance offset by decompressed size + } else { + // No decompression command available, try direct copy + Logger.info( + "Copying part \(partNum) directly..." + ) + try outputHandle.seek(toOffset: currentOffset) + + let inputHandle = try FileHandle(forReadingFrom: partURL) + defer { try? inputHandle.close() } + + // Get part size + let partSize = + (try? FileManager.default.attributesOfItem(atPath: partURL.path)[.size] + as? UInt64) ?? 0 + Logger.info( + "Direct copy of part \(partNum) with size: \(ByteCountFormatter.string(fromByteCount: Int64(partSize), countStyle: .file))" + ) + + var bytesWritten: UInt64 = 0 + let chunkSize = 1024 * 1024 // 1MB chunks + var chunkCount = 0 + + while true { + let data = autoreleasepool { + try! inputHandle.read(upToCount: chunkSize) ?? Data() + } + if data.isEmpty { break } + + try outputHandle.write(contentsOf: data) + bytesWritten += UInt64(data.count) + chunkCount += 1 + + // Update progress + let totalProgress = + Double(currentOffset + bytesWritten) + / Double(expectedCompressedTotalSize) + reassemblyProgressLogger.logProgress( + current: totalProgress, + context: "Direct copying") + } + + Logger.info( + "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" + ) currentOffset += bytesWritten - // --- End non-compressed handling --- } // Ensure data is written before processing next part (optional but safer) @@ -922,23 +1223,99 @@ class ImageContainerRegistry: @unchecked Sendable { reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") Logger.info("") // Newline + // Ensure output handle is closed before post-processing + try outputHandle.close() + // Verify final size let finalSize = (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] as? UInt64) ?? 0 Logger.info( - "Final disk image size (before sparse file optimization): \(ByteCountFormatter.string(fromByteCount: Int64(finalSize), countStyle: .file))" + "Final disk image size from cache (before sparse file optimization): \(ByteCountFormatter.string(fromByteCount: Int64(finalSize), countStyle: .file))" ) - Logger.info( - "Note: Actual disk usage will be much lower due to macOS sparse file system") - if finalSize != expectedTotalSize { + if finalSize != sizeForTruncate { Logger.info( - "Warning: Final reported size (\(finalSize) bytes) differs from expected size (\(expectedTotalSize) bytes), but this doesn't affect functionality" + "Warning: Final reported size (\(finalSize) bytes) differs from expected size (\(sizeForTruncate) bytes), but this doesn't affect functionality" ) } - Logger.info("Disk image reassembled successfully using sparse file technique") + // Decompress the assembled disk image if it's in LZFSE compressed format + Logger.info( + "Checking if disk image is LZFSE compressed and decompressing if needed...") + decompressLZFSEImage(inputPath: outputURL.path) + + // Create a properly formatted disk image + Logger.info("Converting assembled data to proper disk image format...") + + // Get actual disk usage of the assembled file + let assembledUsage = getActualDiskUsage(path: outputURL.path) + let bufferBytes: UInt64 = 2 * 1024 * 1024 * 1024 // 2GB buffer + let requiredSpace = assembledUsage + bufferBytes + + // Check available disk space in the destination directory + let fileManager = FileManager.default + let availableSpace = + try? fileManager.attributesOfFileSystem( + forPath: outputURL.deletingLastPathComponent().path)[.systemFreeSize] + as? UInt64 + + if let available = availableSpace, available < requiredSpace { + Logger.error( + "Insufficient disk space to convert disk image format. Skipping conversion.", + metadata: [ + "available": ByteCountFormatter.string( + fromByteCount: Int64(available), countStyle: .file), + "required": ByteCountFormatter.string( + fromByteCount: Int64(requiredSpace), countStyle: .file), + ] + ) + } else { + // Prioritize SPARSE format for better sparse file handling + Logger.info("Attempting conversion to SPARSE format...") + let process = Process() + process.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") + process.arguments = [ + "convert", + outputURL.path, // Source: our assembled file + "-format", "SPARSE", // Format: SPARSE (best for sparse images) + "-o", outputURL.path, // Output: overwrite with converted image + ] + + let errorPipe = Pipe() + process.standardError = errorPipe + process.standardOutput = errorPipe + + try process.run() + process.waitUntilExit() + + // Check for errors + let outputData = errorPipe.fileHandleForReading.readDataToEndOfFile() + if !outputData.isEmpty, + let outputString = String(data: outputData, encoding: .utf8) + { + Logger.info("hdiutil output: \(outputString)") + } + + if process.terminationStatus == 0 { + // Find the potentially renamed formatted file + let formattedFile = findFormattedFile(tempFormatted: outputURL) ?? outputURL + // If the output path is different, remove the original and move the new one + if formattedFile.path != outputURL.path { + try? FileManager.default.removeItem(at: outputURL) + try FileManager.default.moveItem(at: formattedFile, to: outputURL) + } + Logger.info("Successfully converted disk image to proper format (SPARSE)") + } else { + Logger.error( + "Failed to convert disk image to SPARSE format. VM might not start properly." + ) + // If SPARSE failed, maybe try UDRW as a last resort? + // For now, we'll just log the error. + } + } + + Logger.info("Disk image reassembly completed") } else { // Copy single disk image if it exists let diskURL = tempDownloadDir.appendingPathComponent("disk.img") @@ -996,9 +1373,9 @@ class ImageContainerRegistry: @unchecked Sendable { async throws { Logger.info("Copying from cache...") + var diskPartSources: [(Int, URL)] = [] var totalParts = 0 - var expectedTotalSize: UInt64 = 0 // First identify disk parts and non-disk files for layer in manifest.layers { @@ -1009,11 +1386,10 @@ class ImageContainerRegistry: @unchecked Sendable { totalParts = total // Just store the reference to source instead of copying diskPartSources.append((partNum, cachedLayer)) - expectedTotalSize += UInt64(layer.size) } else { let fileName: String switch layer.mediaType { - case "application/vnd.oci.image.layer.v1.tar": + case "application/vnd.oci.image.layer.v1.tar", "application/octet-stream+gzip": fileName = "disk.img" case "application/vnd.oci.image.config.v1+json": fileName = "config.json" @@ -1032,14 +1408,76 @@ class ImageContainerRegistry: @unchecked Sendable { // Reassemble disk parts if needed if !diskPartSources.isEmpty { + // Get the uncompressed size from cached config + let configDigest = manifest.config?.digest + let cachedConfigPath = + configDigest != nil + ? getCachedLayerPath(manifestId: manifestId, digest: configDigest!) : nil + let uncompressedSize = cachedConfigPath.flatMap { + getUncompressedSizeFromConfig(configPath: $0) + } + + // Try to get disk size from VM config if OCI annotation not found + var vmConfigDiskSize: UInt64? = nil + if uncompressedSize == nil { + // Find config.json in the copied files + let vmConfigPath = destination.appendingPathComponent("config.json") + if FileManager.default.fileExists(atPath: vmConfigPath.path) { + do { + let configData = try Data(contentsOf: vmConfigPath) + let decoder = JSONDecoder() + if let vmConfig = try? decoder.decode(VMConfig.self, from: configData) { + vmConfigDiskSize = vmConfig.diskSize + if let size = vmConfigDiskSize { + Logger.info( + "Found diskSize from cached VM config.json: \(size) bytes") + } + } + } catch { + Logger.error("Failed to parse cached VM config.json for diskSize: \(error)") + } + } + } + + // Force explicit use + if uncompressedSize != nil { + Logger.info( + "Will use uncompressed size from annotation for sparse file: \(uncompressedSize!) bytes" + ) + } else if vmConfigDiskSize != nil { + Logger.info( + "Will use diskSize from VM config for sparse file: \(vmConfigDiskSize!) bytes") + } + Logger.info( "Reassembling disk image from cached parts using sparse file technique..." ) let outputURL = destination.appendingPathComponent("disk.img") - // Ensure the output file exists but is empty - if FileManager.default.fileExists(atPath: outputURL.path) { - try FileManager.default.removeItem(at: outputURL) + // Wrap setup in do-catch for better error reporting + let outputHandle: FileHandle + do { + // 1. Ensure parent directory exists + try FileManager.default.createDirectory( + at: outputURL.deletingLastPathComponent(), withIntermediateDirectories: true) + + // 2. Explicitly create the file first, removing old one if needed + if FileManager.default.fileExists(atPath: outputURL.path) { + try FileManager.default.removeItem(at: outputURL) + } + guard FileManager.default.createFile(atPath: outputURL.path, contents: nil) else { + throw PullError.fileCreationFailed(outputURL.path) + } + + // 3. Now open the handle for writing + outputHandle = try FileHandle(forWritingTo: outputURL) + + } catch { + // Catch errors during directory/file creation or handle opening + Logger.error( + "Failed during setup for disk image reassembly: \(error.localizedDescription)", + metadata: ["path": outputURL.path]) + throw PullError.reassemblySetupFailed(path: outputURL.path, underlyingError: error) } // Calculate expected total size from the cached files @@ -1054,13 +1492,6 @@ class ImageContainerRegistry: @unchecked Sendable { "Expected download size from cache: \(ByteCountFormatter.string(fromByteCount: Int64(expectedTotalSize), countStyle: .file)) (actual disk usage will be lower)" ) - // Create sparse file of the required size - let outputHandle = try FileHandle(forWritingTo: outputURL) - defer { try? outputHandle.close() } - - // Set the file size without writing data (creates a sparse file) - try outputHandle.truncate(atOffset: expectedTotalSize) - var reassemblyProgressLogger = ProgressLogger(threshold: 0.05) var currentOffset: UInt64 = 0 // Track position in the final *decompressed* file @@ -1090,68 +1521,245 @@ class ImageContainerRegistry: @unchecked Sendable { try outputHandle.seek(toOffset: currentOffset) if let decompressCmd = getDecompressionCommand(for: layerMediaType) { // Use extracted mediaType - Logger.info("Decompressing part \(partNum)...") + Logger.info("Decompressing part \(partNum) with media type: \(layerMediaType)") + + // Handle Apple Archive format + let toolPath = String(decompressCmd.dropFirst("apple_archive:".count)) + let tempOutputPath = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString) + + // Check input file size before decompression + let inputFileSize = + (try? FileManager.default.attributesOfItem(atPath: sourceURL.path)[.size] + as? UInt64) ?? 0 + Logger.info( + "Part \(partNum) input size: \(ByteCountFormatter.string(fromByteCount: Int64(inputFileSize), countStyle: .file))" + ) + + // Create a process that decompresses to a temporary file let process = Process() - let pipe = Pipe() - process.executableURL = URL(fileURLWithPath: "/bin/sh") - process.arguments = ["-c", "\(decompressCmd) < \"\(sourceURL.path)\""] // Feed file via stdin redirection - process.standardOutput = pipe // Capture decompressed output + process.executableURL = URL(fileURLWithPath: toolPath) + process.arguments = [ + "extract", "-i", sourceURL.path, "-o", tempOutputPath.path, + ] + // Add error output capture + let errorPipe = Pipe() + process.standardError = errorPipe + + Logger.info( + "Decompressing Apple Archive format with: \(toolPath) \(process.arguments?.joined(separator: " ") ?? "")" + ) try process.run() + process.waitUntilExit() - let reader = pipe.fileHandleForReading - var partDecompressedSize: UInt64 = 0 + // Check error output if any + let errorData = errorPipe.fileHandleForReading.readDataToEndOfFile() + if !errorData.isEmpty, + let errorString = String(data: errorData, encoding: .utf8) + { + Logger.error("Decompression error output: \(errorString)") + } + + if process.terminationStatus != 0 { + Logger.error( + "Apple Archive decompression failed with status: \(process.terminationStatus), falling back to direct copy" + ) + // Fall back to direct copying (uncompressed) + Logger.info("Copying part \(partNum) directly without decompression...") + try outputHandle.seek(toOffset: currentOffset) + + let inputHandle = try FileHandle(forReadingFrom: sourceURL) + defer { try? inputHandle.close() } + + var bytesWritten: UInt64 = 0 + let chunkSize = 1024 * 1024 // 1MB chunks + var chunkCount = 0 + + while true { + let data = autoreleasepool { + try! inputHandle.read(upToCount: chunkSize) ?? Data() + } + if data.isEmpty { break } + + try outputHandle.write(contentsOf: data) + bytesWritten += UInt64(data.count) + chunkCount += 1 + + // Update progress + let totalProgress = + Double(currentOffset + bytesWritten) / Double(expectedTotalSize) + let progressBar = createProgressBar(progress: totalProgress, width: 30) + let progressPercent = Int(totalProgress * 100) + let currentSpeed = + ByteCountFormatter.string( + fromByteCount: Int64(Double(bytesWritten) / 0.5), + countStyle: .file) + "/s" + + print( + "\r\(progressBar) \(progressPercent)% | Speed: \(currentSpeed) | Part \(partNum) | \(ByteCountFormatter.string(fromByteCount: Int64(currentOffset + bytesWritten), countStyle: .file)) ", + terminator: "") + fflush(stdout) + + // Also log to the progress logger for consistency + reassemblyProgressLogger.logProgress( + current: totalProgress, + context: "Direct copying") + } + + Logger.info( + "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" + ) + currentOffset += bytesWritten + continue + } + + // Check if the output file exists and has content + let outputExists = FileManager.default.fileExists(atPath: tempOutputPath.path) + let outputFileSize = + outputExists + ? ((try? FileManager.default.attributesOfItem(atPath: tempOutputPath.path)[ + .size] as? UInt64) ?? 0) : 0 + Logger.info( + "Part \(partNum) - Decompressed output exists: \(outputExists), size: \(ByteCountFormatter.string(fromByteCount: Int64(outputFileSize), countStyle: .file))" + ) + + // If decompression produced an empty file, fall back to direct copy + if outputFileSize == 0 { + Logger.info( + "Decompression resulted in empty file, falling back to direct copy for part \(partNum)" + ) + try? FileManager.default.removeItem(at: tempOutputPath) + + // Fall back to direct copying (uncompressed) + Logger.info("Copying part \(partNum) directly without decompression...") + try outputHandle.seek(toOffset: currentOffset) + + let inputHandle = try FileHandle(forReadingFrom: sourceURL) + defer { try? inputHandle.close() } + + var bytesWritten: UInt64 = 0 + let chunkSize = 1024 * 1024 // 1MB chunks + var chunkCount = 0 + + while true { + let data = autoreleasepool { + try! inputHandle.read(upToCount: chunkSize) ?? Data() + } + if data.isEmpty { break } + + try outputHandle.write(contentsOf: data) + bytesWritten += UInt64(data.count) + chunkCount += 1 + + // Update progress + let totalProgress = + Double(currentOffset + bytesWritten) / Double(expectedTotalSize) + let progressBar = createProgressBar(progress: totalProgress, width: 30) + let progressPercent = Int(totalProgress * 100) + let currentSpeed = + ByteCountFormatter.string( + fromByteCount: Int64(Double(bytesWritten) / 0.5), + countStyle: .file) + "/s" + + print( + "\r\(progressBar) \(progressPercent)% | Speed: \(currentSpeed) | Part \(partNum) | \(ByteCountFormatter.string(fromByteCount: Int64(currentOffset + bytesWritten), countStyle: .file)) ", + terminator: "") + fflush(stdout) + + // Also log to the progress logger for consistency + reassemblyProgressLogger.logProgress( + current: totalProgress, + context: "Direct copying") + } + + Logger.info( + "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" + ) + currentOffset += bytesWritten + continue + } + + // Read the decompressed file and write to our output + let tempInputHandle = try FileHandle(forReadingFrom: tempOutputPath) + defer { + try? tempInputHandle.close() + try? FileManager.default.removeItem(at: tempOutputPath) + } // Read decompressed data in chunks and write to sparse file + var partDecompressedSize: UInt64 = 0 + let chunkSize = 1024 * 1024 // 1MB chunks + var chunkCount = 0 + while true { let data = autoreleasepool { // Help manage memory with large files - reader.readData(ofLength: 1024 * 1024) // Read 1MB chunks + try! tempInputHandle.read(upToCount: chunkSize) ?? Data() } if data.isEmpty { break } // End of stream try outputHandle.write(contentsOf: data) partDecompressedSize += UInt64(data.count) + chunkCount += 1 // Update progress based on decompressed size written let totalProgress = - Double(currentOffset + partDecompressedSize) / Double(expectedTotalSize) + Double(currentOffset + partDecompressedSize) + / Double(expectedTotalSize) reassemblyProgressLogger.logProgress( current: totalProgress, context: "Reassembling") } - process.waitUntilExit() - if process.terminationStatus != 0 { - throw PullError.decompressionFailed("Part \(partNum)") - } - currentOffset += partDecompressedSize // Advance offset by decompressed size + Logger.info( + "Part \(partNum) - Wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(partDecompressedSize), countStyle: .file))" + ) + currentOffset += partDecompressedSize // Advance offset by decompressed size } else { - // --- Handle non-compressed parts (if any, or the single file case) --- - // This part is similar to your original copy logic, writing directly - // from inputHandle to outputHandle at currentOffset - Logger.info("Copying non-compressed part \(partNum)...") + // No decompression command available, try direct copy + Logger.info( + "Copying part \(partNum) directly..." + ) + try outputHandle.seek(toOffset: currentOffset) + + let inputHandle = try FileHandle(forReadingFrom: sourceURL) + defer { try? inputHandle.close() } + + // Get part size let partSize = (try? FileManager.default.attributesOfItem(atPath: sourceURL.path)[.size] as? UInt64) ?? 0 + Logger.info( + "Direct copy of part \(partNum) with size: \(ByteCountFormatter.string(fromByteCount: Int64(partSize), countStyle: .file))" + ) + var bytesWritten: UInt64 = 0 - let chunkSize = 1024 * 1024 - while bytesWritten < partSize { + let chunkSize = 1024 * 1024 // 1MB chunks + var chunkCount = 0 + + while true { let data = autoreleasepool { try! inputHandle.read(upToCount: chunkSize) ?? Data() } if data.isEmpty { break } + try outputHandle.write(contentsOf: data) bytesWritten += UInt64(data.count) + chunkCount += 1 // Update progress let totalProgress = - Double(currentOffset + bytesWritten) / Double(expectedTotalSize) + Double(currentOffset + bytesWritten) + / Double(expectedTotalSize) reassemblyProgressLogger.logProgress( current: totalProgress, - context: "Reassembling") + context: "Direct copying") } + + Logger.info( + "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" + ) currentOffset += bytesWritten - // --- End non-compressed handling --- } // Ensure data is written before processing next part (optional but safer) @@ -1162,10 +1770,13 @@ class ImageContainerRegistry: @unchecked Sendable { reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") Logger.info("") // Newline + // Ensure output handle is closed before post-processing + try outputHandle.close() + // Verify final size let finalSize = - (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] as? UInt64) - ?? 0 + (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] + as? UInt64) ?? 0 Logger.info( "Final disk image size from cache (before sparse file optimization): \(ByteCountFormatter.string(fromByteCount: Int64(finalSize), countStyle: .file))" ) @@ -1176,8 +1787,79 @@ class ImageContainerRegistry: @unchecked Sendable { ) } - Logger.info( - "Disk image reassembled successfully from cache using sparse file technique") + // Decompress the assembled disk image if it's in LZFSE compressed format + Logger.info("Checking if disk image is LZFSE compressed and decompressing if needed...") + decompressLZFSEImage(inputPath: outputURL.path) + + // Create a properly formatted disk image + Logger.info("Converting assembled data to proper disk image format...") + + // Get actual disk usage of the assembled file + let assembledUsage = getActualDiskUsage(path: outputURL.path) + let bufferBytes: UInt64 = 2 * 1024 * 1024 * 1024 // 2GB buffer + let requiredSpace = assembledUsage + bufferBytes + + // Check available disk space in the destination directory + let fileManager = FileManager.default + let availableSpace = + try? fileManager.attributesOfFileSystem( + forPath: outputURL.deletingLastPathComponent().path)[.systemFreeSize] as? UInt64 + + if let available = availableSpace, available < requiredSpace { + Logger.error( + "Insufficient disk space to convert disk image format. Skipping conversion.", + metadata: [ + "available": ByteCountFormatter.string( + fromByteCount: Int64(available), countStyle: .file), + "required": ByteCountFormatter.string( + fromByteCount: Int64(requiredSpace), countStyle: .file), + ] + ) + } else { + // Prioritize SPARSE format for better sparse file handling + Logger.info("Attempting conversion to SPARSE format...") + let process = Process() + process.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") + process.arguments = [ + "convert", + outputURL.path, // Source: our assembled file + "-format", "SPARSE", // Format: SPARSE (best for sparse images) + "-o", outputURL.path, // Output: overwrite with converted image + ] + + let errorPipe = Pipe() + process.standardError = errorPipe + process.standardOutput = errorPipe + + try process.run() + process.waitUntilExit() + + // Check for errors + let outputData = errorPipe.fileHandleForReading.readDataToEndOfFile() + if !outputData.isEmpty, let outputString = String(data: outputData, encoding: .utf8) + { + Logger.info("hdiutil output: \(outputString)") + } + + if process.terminationStatus == 0 { + // Find the potentially renamed formatted file + let formattedFile = findFormattedFile(tempFormatted: outputURL) ?? outputURL + // If the output path is different, remove the original and move the new one + if formattedFile.path != outputURL.path { + try? FileManager.default.removeItem(at: outputURL) + try FileManager.default.moveItem(at: formattedFile, to: outputURL) + } + Logger.info("Successfully converted disk image to proper format (SPARSE)") + } else { + Logger.error( + "Failed to convert disk image to SPARSE format. VM might not start properly." + ) + // If SPARSE failed, maybe try UDRW as a last resort? + // For now, we'll just log the error. + } + } + + Logger.info("Disk image reassembly completed") } Logger.info("Cache copy complete") @@ -1307,70 +1989,6 @@ class ImageContainerRegistry: @unchecked Sendable { throw lastError ?? PullError.layerDownloadFailed(digest) } - private func decompressGzipFile(at source: URL, to destination: URL) throws { - Logger.info("Decompressing \(source.lastPathComponent)...") - let process = Process() - process.executableURL = URL(fileURLWithPath: "/usr/bin/gunzip") - process.arguments = ["-c"] - - let inputPipe = Pipe() - let outputPipe = Pipe() - process.standardInput = inputPipe - process.standardOutput = outputPipe - - try process.run() - - // Read and pipe the gzipped file in chunks to avoid memory issues - let inputHandle = try FileHandle(forReadingFrom: source) - let outputHandle = try FileHandle(forWritingTo: destination) - defer { - try? inputHandle.close() - try? outputHandle.close() - } - - // Create the output file - FileManager.default.createFile(atPath: destination.path, contents: nil) - - // Process with optimal chunk size - let chunkSize = getOptimalChunkSize() - while let chunk = try inputHandle.read(upToCount: chunkSize) { - try autoreleasepool { - try inputPipe.fileHandleForWriting.write(contentsOf: chunk) - - // Read and write output in chunks as well - while let decompressedChunk = try outputPipe.fileHandleForReading.read( - upToCount: chunkSize) - { - try outputHandle.write(contentsOf: decompressedChunk) - } - } - } - - try inputPipe.fileHandleForWriting.close() - - // Read any remaining output - while let decompressedChunk = try outputPipe.fileHandleForReading.read(upToCount: chunkSize) - { - try autoreleasepool { - try outputHandle.write(contentsOf: decompressedChunk) - } - } - - process.waitUntilExit() - - if process.terminationStatus != 0 { - throw PullError.decompressionFailed(source.lastPathComponent) - } - - // Verify the decompressed size - let decompressedSize = - try FileManager.default.attributesOfItem(atPath: destination.path)[.size] as? UInt64 - ?? 0 - Logger.info( - "Decompressed size: \(ByteCountFormatter.string(fromByteCount: Int64(decompressedSize), countStyle: .file))" - ) - } - private func extractPartInfo(from mediaType: String) -> (partNum: Int, total: Int)? { let pattern = #"part\.number=(\d+);part\.total=(\d+)"# guard let regex = try? NSRegularExpression(pattern: pattern), @@ -1699,19 +2317,47 @@ class ImageContainerRegistry: @unchecked Sendable { // Add helper to check media type and get decompress command private func getDecompressionCommand(for mediaType: String) -> String? { - if mediaType.hasSuffix("+gzip") { - return "/usr/bin/gunzip -c" // -c writes to stdout - } else if mediaType.hasSuffix("+zstd") { - // Check if zstd exists, otherwise handle error? - // Assuming brew install zstd -> /opt/homebrew/bin/zstd or /usr/local/bin/zstd - let zstdPath = findExecutablePath(named: "zstd") ?? "/usr/local/bin/zstd" - return "\(zstdPath) -dc" // -d decompress, -c stdout + // Determine appropriate decompression command based on layer media type + Logger.info("Determining decompression command for media type: \(mediaType)") + + // For the specific format that appears in our GHCR repository, skip decompression attempts + // These files are labeled +lzfse but aren't actually in Apple Archive format + if mediaType.contains("+lzfse;part.number=") { + Logger.info("Detected LZFSE part file, using direct copy instead of decompression") + return nil + } + + // Check for LZFSE or Apple Archive format anywhere in the media type string + // The format may include part information like: application/octet-stream+lzfse;part.number=1;part.total=38 + if mediaType.contains("+lzfse") || mediaType.contains("+aa") { + // Apple Archive format requires special handling + if let aaPath = findExecutablePath(for: "aa") { + Logger.info("Found Apple Archive tool at: \(aaPath)") + return "apple_archive:\(aaPath)" + } else { + Logger.error( + "Apple Archive tool (aa) not found in PATH, falling back to default path") + + // Check if the default path exists + let defaultPath = "/usr/bin/aa" + if FileManager.default.isExecutableFile(atPath: defaultPath) { + Logger.info("Default Apple Archive tool exists at: \(defaultPath)") + } else { + Logger.error("Default Apple Archive tool not found at: \(defaultPath)") + } + + return "apple_archive:/usr/bin/aa" + } + } else { + Logger.info( + "Unsupported media type: \(mediaType) - only Apple Archive (+lzfse/+aa) is supported" + ) + return nil } - return nil // Not compressed or unknown compression } // Helper to find executables (optional, or hardcode paths) - private func findExecutablePath(named executableName: String) -> String? { + private func findExecutablePath(for executableName: String) -> String? { let pathEnv = ProcessInfo.processInfo.environment["PATH"] ?? "/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/bin:/opt/homebrew/bin" @@ -1726,4 +2372,250 @@ class ImageContainerRegistry: @unchecked Sendable { } return nil } + + // Helper function to extract uncompressed disk size from config.json + private func getUncompressedSizeFromConfig(configPath: URL) -> UInt64? { + guard FileManager.default.fileExists(atPath: configPath.path) else { + Logger.info("Config file not found: \(configPath.path)") + return nil + } + + do { + let configData = try Data(contentsOf: configPath) + let decoder = JSONDecoder() + let ociConfig = try decoder.decode(OCIConfig.self, from: configData) + + if let sizeString = ociConfig.annotations?.uncompressedSize, + let size = UInt64(sizeString) + { + Logger.info("Found uncompressed disk size annotation: \(size) bytes") + return size + } else { + Logger.info("No uncompressed disk size annotation found in config.json") + return nil + } + } catch { + Logger.error("Failed to parse config.json for uncompressed size: \(error)") + return nil + } + } + + // Helper function to find formatted file with potential extensions + private func findFormattedFile(tempFormatted: URL) -> URL? { + // Check for the exact path first + if FileManager.default.fileExists(atPath: tempFormatted.path) { + return tempFormatted + } + + // Check with .dmg extension + let dmgPath = tempFormatted.path + ".dmg" + if FileManager.default.fileExists(atPath: dmgPath) { + return URL(fileURLWithPath: dmgPath) + } + + // Check with .sparseimage extension + let sparsePath = tempFormatted.path + ".sparseimage" + if FileManager.default.fileExists(atPath: sparsePath) { + return URL(fileURLWithPath: sparsePath) + } + + // Try to find any file with the same basename + do { + let files = try FileManager.default.contentsOfDirectory( + at: tempFormatted.deletingLastPathComponent(), + includingPropertiesForKeys: nil) + if let matchingFile = files.first(where: { + $0.lastPathComponent.starts(with: tempFormatted.lastPathComponent) + }) { + return matchingFile + } + } catch { + Logger.error("Failed to list directory contents: \(error)") + } + + return nil + } + + // Helper function to decompress LZFSE compressed disk image + @discardableResult + private func decompressLZFSEImage(inputPath: String, outputPath: String? = nil) -> Bool { + Logger.info("Attempting to decompress LZFSE compressed disk image using sparse pipe...") + + let finalOutputPath = outputPath ?? inputPath // If outputPath is nil, we'll overwrite input + let tempFinalPath = finalOutputPath + ".ddsparse.tmp" // Temporary name during dd operation + + // Ensure the temporary file doesn't exist from a previous failed run + try? FileManager.default.removeItem(atPath: tempFinalPath) + + // Process 1: compression_tool + let process1 = Process() + process1.executableURL = URL(fileURLWithPath: "/usr/bin/compression_tool") + process1.arguments = [ + "-decode", + "-i", inputPath, + "-o", "/dev/stdout", // Write to standard output + ] + + // Process 2: dd + let process2 = Process() + process2.executableURL = URL(fileURLWithPath: "/bin/dd") + process2.arguments = [ + "if=/dev/stdin", // Read from standard input + "of=\(tempFinalPath)", // Write to the temporary final path + "conv=sparse", // Use sparse conversion + "bs=1m", // Use a reasonable block size (e.g., 1MB) + ] + + // Create pipes + let pipe = Pipe() // Connects process1 stdout to process2 stdin + let errorPipe1 = Pipe() + let errorPipe2 = Pipe() + + process1.standardOutput = pipe + process1.standardError = errorPipe1 + + process2.standardInput = pipe + process2.standardError = errorPipe2 + + do { + Logger.info("Starting decompression pipe: compression_tool | dd conv=sparse...") + // Start processes + try process1.run() + try process2.run() + + // Close the write end of the pipe for process2 to prevent hanging + // This might not be strictly necessary if process1 exits cleanly, but safer. + // Note: Accessing fileHandleForWriting after run can be tricky. + // We rely on process1 exiting to signal EOF to process2. + + process1.waitUntilExit() + process2.waitUntilExit() // Wait for dd to finish processing the stream + + // --- Check for errors --- + let errorData1 = errorPipe1.fileHandleForReading.readDataToEndOfFile() + if !errorData1.isEmpty, + let errorString = String(data: errorData1, encoding: .utf8)?.trimmingCharacters( + in: .whitespacesAndNewlines), !errorString.isEmpty + { + Logger.error("compression_tool stderr: \(errorString)") + } + let errorData2 = errorPipe2.fileHandleForReading.readDataToEndOfFile() + if !errorData2.isEmpty, + let errorString = String(data: errorData2, encoding: .utf8)?.trimmingCharacters( + in: .whitespacesAndNewlines), !errorString.isEmpty + { + // dd often reports blocks in/out to stderr, filter that if needed, but log for now + Logger.info("dd stderr: \(errorString)") + } + + // Check termination statuses + let status1 = process1.terminationStatus + let status2 = process2.terminationStatus + + if status1 != 0 || status2 != 0 { + Logger.error( + "Pipe command failed. compression_tool status: \(status1), dd status: \(status2)" + ) + try? FileManager.default.removeItem(atPath: tempFinalPath) // Clean up failed attempt + return false + } + + // --- Validation --- + if FileManager.default.fileExists(atPath: tempFinalPath) { + let fileSize = + (try? FileManager.default.attributesOfItem(atPath: tempFinalPath)[.size] + as? UInt64) ?? 0 + let actualUsage = getActualDiskUsage(path: tempFinalPath) + Logger.info( + "Piped decompression successful - Allocated: \(ByteCountFormatter.string(fromByteCount: Int64(fileSize), countStyle: .file)), Actual Usage: \(ByteCountFormatter.string(fromByteCount: Int64(actualUsage), countStyle: .file))" + ) + + // Basic header validation + var isValid = false + if let fileHandle = FileHandle(forReadingAtPath: tempFinalPath) { + if let data = try? fileHandle.read(upToCount: 512), data.count >= 512, + data[510] == 0x55 && data[511] == 0xAA + { + isValid = true + } + // Ensure handle is closed regardless of validation outcome + try? fileHandle.close() + } else { + Logger.error( + "Validation Error: Could not open decompressed file handle for reading.") + } + + if isValid { + Logger.info("Decompressed file appears to be a valid disk image.") + + // Move the final file into place + // If outputPath was nil, we need to replace the original inputPath + if outputPath == nil { + // Backup original only if it's different from the temp path + if inputPath != tempFinalPath { + try? FileManager.default.copyItem( + at: URL(fileURLWithPath: inputPath), + to: URL(fileURLWithPath: inputPath + ".compressed.bak")) + try? FileManager.default.removeItem(at: URL(fileURLWithPath: inputPath)) + } + try FileManager.default.moveItem( + at: URL(fileURLWithPath: tempFinalPath), + to: URL(fileURLWithPath: inputPath)) + Logger.info("Replaced original file with sparsely decompressed version.") + } else { + // If outputPath was specified, move it there (overwrite if needed) + try? FileManager.default.removeItem( + at: URL(fileURLWithPath: finalOutputPath)) // Remove existing if overwriting + try FileManager.default.moveItem( + at: URL(fileURLWithPath: tempFinalPath), + to: URL(fileURLWithPath: finalOutputPath)) + Logger.info("Moved sparsely decompressed file to: \(finalOutputPath)") + } + return true + } else { + Logger.error( + "Validation failed: Decompressed file header is invalid or file couldn't be read. Cleaning up." + ) + try? FileManager.default.removeItem(atPath: tempFinalPath) + return false + } + } else { + Logger.error( + "Piped decompression failed: Output file '\(tempFinalPath)' not found after dd completed." + ) + return false + } + + } catch { + Logger.error("Error running decompression pipe command: \(error)") + try? FileManager.default.removeItem(atPath: tempFinalPath) // Clean up on error + return false + } + } + + // Helper function to get actual disk usage of a file + private func getActualDiskUsage(path: String) -> UInt64 { + let task = Process() + task.executableURL = URL(fileURLWithPath: "/usr/bin/du") + task.arguments = ["-k", path] // -k for 1024-byte blocks + + let pipe = Pipe() + task.standardOutput = pipe + + do { + try task.run() + task.waitUntilExit() + + let data = pipe.fileHandleForReading.readDataToEndOfFile() + if let output = String(data: data, encoding: .utf8), + let size = UInt64(output.split(separator: "\t").first ?? "0") + { + return size * 1024 // Convert from KB to bytes + } + } catch { + Logger.error("Failed to get actual disk usage: \(error)") + } + + return 0 + } } diff --git a/libs/lume/src/Errors/Errors.swift b/libs/lume/src/Errors/Errors.swift index 91b04950..b6568c10 100644 --- a/libs/lume/src/Errors/Errors.swift +++ b/libs/lume/src/Errors/Errors.swift @@ -5,6 +5,15 @@ enum HomeError: Error, LocalizedError { case directoryAccessDenied(path: String) case invalidHomeDirectory case directoryAlreadyExists(path: String) + case homeNotFound + case defaultStorageNotDefined + case storageLocationNotFound(String) + case storageLocationNotADirectory(String) + case storageLocationNotWritable(String) + case invalidStorageLocation(String) + case cannotCreateDirectory(String) + case cannotGetVMsDirectory + case vmDirectoryNotFound(String) var errorDescription: String? { switch self { @@ -16,6 +25,24 @@ enum HomeError: Error, LocalizedError { return "Invalid home directory configuration" case .directoryAlreadyExists(let path): return "Directory already exists at path: \(path)" + case .homeNotFound: + return "Home directory not found." + case .defaultStorageNotDefined: + return "Default storage location is not defined." + case .storageLocationNotFound(let path): + return "Storage location not found: \(path)" + case .storageLocationNotADirectory(let path): + return "Storage location is not a directory: \(path)" + case .storageLocationNotWritable(let path): + return "Storage location is not writable: \(path)" + case .invalidStorageLocation(let path): + return "Invalid storage location specified: \(path)" + case .cannotCreateDirectory(let path): + return "Cannot create directory: \(path)" + case .cannotGetVMsDirectory: + return "Cannot determine the VMs directory." + case .vmDirectoryNotFound(let path): + return "VM directory not found: \(path)" } } } @@ -28,23 +55,32 @@ enum PullError: Error, LocalizedError { case missingPart(Int) case decompressionFailed(String) case reassemblyFailed(String) + case fileCreationFailed(String) + case reassemblySetupFailed(path: String, underlyingError: Error) + case missingUncompressedSizeAnnotation var errorDescription: String? { switch self { case .invalidImageFormat: return "Invalid image format. Expected format: name:tag" case .tokenFetchFailed: - return "Failed to obtain authentication token" + return "Failed to fetch authentication token from registry." case .manifestFetchFailed: - return "Failed to fetch manifest" + return "Failed to fetch image manifest from registry." case .layerDownloadFailed(let digest): return "Failed to download layer: \(digest)" - case .missingPart(let number): - return "Missing disk image part \(number)" - case .decompressionFailed(let filename): - return "Failed to decompress file: \(filename)" + case .missingPart(let partNum): + return "Missing required part number \(partNum) for reassembly." + case .decompressionFailed(let file): + return "Failed to decompress file: \(file)" case .reassemblyFailed(let reason): return "Disk image reassembly failed: \(reason)." + case .fileCreationFailed(let path): + return "Failed to create the necessary file at path: \(path)" + case .reassemblySetupFailed(let path, let underlyingError): + return "Failed to set up for reassembly at path: \(path). Underlying error: \(underlyingError.localizedDescription)" + case .missingUncompressedSizeAnnotation: + return "Could not find the required uncompressed disk size annotation in the image config.json." } } } @@ -165,4 +201,24 @@ enum VMError: Error, LocalizedError { return "Invalid display resolution: \(resolution)" } } +} + +enum ResticError: Error { + case snapshotFailed(String) + case restoreFailed(String) + case genericError(String) +} + +enum VmrunError: Error, LocalizedError { + case commandNotFound + case operationFailed(command: String, output: String?) + + var errorDescription: String? { + switch self { + case .commandNotFound: + return "vmrun command not found. Ensure VMware Fusion is installed and in the system PATH." + case .operationFailed(let command, let output): + return "vmrun command '\(command)' failed. Output: \(output ?? "No output")" + } + } } \ No newline at end of file From 54c5ae2bd8204879b63a01868a7076ce3585aa83 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sat, 19 Apr 2025 22:15:42 -0700 Subject: [PATCH 03/28] Prevent sparse disk expand --- libs/lume/.cursorignore | 233 +++++ libs/lume/README.md | 26 +- libs/lume/docs/API-Reference.md | 36 + libs/lume/scripts/ghcr/push-ghcr.sh | 550 ++++++++--- libs/lume/src/Commands/Push.swift | 74 ++ .../ImageContainerRegistry.swift | 905 +++++++++++++++++- libs/lume/src/LumeController.swift | 98 ++ libs/lume/src/Server/Handlers.swift | 48 + libs/lume/src/Server/Requests.swift | 28 + libs/lume/src/Server/Responses.swift | 13 + libs/lume/src/Server/Server.swift | 6 + libs/lume/src/Utils/CommandRegistry.swift | 1 + 12 files changed, 1864 insertions(+), 154 deletions(-) create mode 100644 libs/lume/.cursorignore create mode 100644 libs/lume/src/Commands/Push.swift diff --git a/libs/lume/.cursorignore b/libs/lume/.cursorignore new file mode 100644 index 00000000..12e8e403 --- /dev/null +++ b/libs/lume/.cursorignore @@ -0,0 +1,233 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +!libs/lume/scripts/build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Scripts +server/scripts/ + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Conda +.conda/ + +# Local environment +.env.local + +# macOS DS_Store +.DS_Store + +weights/ +weights/icon_detect/ +weights/icon_detect/model.pt +weights/icon_detect/model.pt.zip +weights/icon_detect/model.pt.zip.part* + +libs/omniparser/weights/icon_detect/model.pt + +# Example test data and output +examples/test_data/ +examples/output/ + +/screenshots/ + +/experiments/ + +/logs/ + +# Xcode +# +# gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore + +## User settings +xcuserdata/ + +## Obj-C/Swift specific +*.hmap + +## App packaging +*.ipa +*.dSYM.zip +*.dSYM + +## Playgrounds +timeline.xctimeline +playground.xcworkspace + +# Swift Package Manager +# +# Add this line if you want to avoid checking in source code from Swift Package Manager dependencies. +# Packages/ +# Package.pins +# Package.resolved +# *.xcodeproj +# +# Xcode automatically generates this directory with a .xcworkspacedata file and xcuserdata +# hence it is not needed unless you have added a package configuration file to your project +.swiftpm/ +.build/ + +# CocoaPods +# +# We recommend against adding the Pods directory to your .gitignore. However +# you should judge for yourself, the pros and cons are mentioned at: +# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control +# +# Pods/ +# +# Add this line if you want to avoid checking in source code from the Xcode workspace +# *.xcworkspace + +# Carthage +# +# Add this line if you want to avoid checking in source code from Carthage dependencies. +# Carthage/Checkouts +Carthage/Build/ + +# fastlane +# +# It is recommended to not store the screenshots in the git repo. +# Instead, use fastlane to re-generate the screenshots whenever they are needed. +# For more information about the recommended setup visit: +# https://docs.fastlane.tools/best-practices/source-control/#source-control +fastlane/report.xml +fastlane/Preview.html +fastlane/screenshots/**/*.png +fastlane/test_output + +# Ignore folder +ignore + +# .release +.release/ \ No newline at end of file diff --git a/libs/lume/README.md b/libs/lume/README.md index a0da2f49..5d303d2c 100644 --- a/libs/lume/README.md +++ b/libs/lume/README.md @@ -52,6 +52,7 @@ Commands: lume stop Stop a running VM lume delete Delete a VM lume pull Pull a macOS image from container registry + lume push Push a VM image to a container registry lume clone Clone an existing VM lume config Get or set lume configuration lume images List available macOS images in local cache @@ -99,6 +100,16 @@ Command Options: --organization Organization to pull from (default: trycua) --storage VM storage location to use + push: + --additional-tags Additional tags to push the same image to + --registry Container registry URL (default: ghcr.io) + --organization Organization/user to push to (default: trycua) + --storage VM storage location to use + --chunk-size-mb Chunk size for disk image upload in MB (default: 512) + --verbose Enable verbose logging + --dry-run Prepare files and show plan without uploading + --reassemble Verify integrity by reassembling chunks (requires --dry-run) + get: -f, --format Output format (json|text) --storage VM storage location to use @@ -141,18 +152,21 @@ You can also download the `lume.pkg.tar.gz` archive from the [latest release](ht ## Prebuilt Images Pre-built images are available in the registry [ghcr.io/trycua](https://github.com/orgs/trycua/packages). + +**Important Note (v0.2.0+):** Images are being re-uploaded with sparse file system optimizations enabled, resulting in significantly lower actual disk usage. Older images (without the `-sparse` suffix) are now **deprecated**. The last version of `lume` fully supporting the non-sparse images was `v0.1.x`. Starting from `lume v0.2.0`, please use the images with the `-sparse` suffix. + These images come with an SSH server pre-configured and auto-login enabled. For the security of your VM, change the default password `lume` immediately after your first login. -| Image | Tag | Description | Size | +| Image | Tag | Description | Logical Size | |-------|------------|-------------|------| -| `macos-sequoia-vanilla` | `latest`, `15.2` | macOS Sequoia 15.2 image | 40GB | -| `macos-sequoia-xcode` | `latest`, `15.2` | macOS Sequoia 15.2 image with Xcode command line tools | 50GB | -| `macos-sequoia-cua` | `latest`, `15.3` | macOS Sequoia 15.3 image compatible with the Computer interface | 80GB | -| `ubuntu-noble-vanilla` | `latest`, `24.04.1` | [Ubuntu Server for ARM 24.04.1 LTS](https://ubuntu.com/download/server/arm) with Ubuntu Desktop | 20GB | +| `macos-sequoia-vanilla-sparse` | `latest`, `15.2` | macOS Sequoia 15.2 image | 40GB | +| `macos-sequoia-xcode-sparse` | `latest`, `15.2` | macOS Sequoia 15.2 image with Xcode command line tools | 50GB | +| `macos-sequoia-cua-sparse` | `latest`, `15.3` | macOS Sequoia 15.3 image compatible with the Computer interface | 80GB | +| `ubuntu-noble-vanilla-sparse` | `latest`, `24.04.1` | [Ubuntu Server for ARM 24.04.1 LTS](https://ubuntu.com/download/server/arm) with Ubuntu Desktop | 20GB | -For additional disk space, resize the VM disk after pulling the image using the `lume set --disk-size ` command. +For additional disk space, resize the VM disk after pulling the image using the `lume set --disk-size ` command. Note that the actual disk space used by sparse images will be much lower than the logical size listed. ## Local API Server diff --git a/libs/lume/docs/API-Reference.md b/libs/lume/docs/API-Reference.md index 67ed42a4..7ab9459b 100644 --- a/libs/lume/docs/API-Reference.md +++ b/libs/lume/docs/API-Reference.md @@ -193,6 +193,42 @@ curl --connect-timeout 6000 \ ``` +
+Push Image (Async) - POST /vms/push + +```bash +# Push VM 'my-local-vm' to 'my-org/my-image:latest' and 'my-org/my-image:v1' +curl --connect-timeout 6000 \ + --max-time 5000 \ + -X POST \ + -H "Content-Type: application/json" \ + -d '{ + "name": "my-local-vm", + "imageName": "my-image", + "tags": ["latest", "v1"], + "organization": "my-org", + "registry": "ghcr.io", + "chunkSizeMb": 512, + "storage": null + }' \ + http://localhost:3000/lume/vms/push +``` + +**Response (202 Accepted):** + +```json +{ + "message": "Push initiated in background", + "name": "my-local-vm", + "imageName": "my-image", + "tags": [ + "latest", + "v1" + ] +} +``` +
+
Clone VM - POST /vms/:name/clone diff --git a/libs/lume/scripts/ghcr/push-ghcr.sh b/libs/lume/scripts/ghcr/push-ghcr.sh index 33874122..d279be66 100755 --- a/libs/lume/scripts/ghcr/push-ghcr.sh +++ b/libs/lume/scripts/ghcr/push-ghcr.sh @@ -8,9 +8,11 @@ organization="" folder_path="" image_name="" image_versions="" -chunk_size="500M" # Default chunk size for splitting large files +chunk_size="512M" # Default chunk size for splitting large files +dry_run=true # Default: actually push to registry +reassemble=true # Default: don't reassemble in dry-run mode # Define the OCI media type for the compressed disk layer -oci_layer_media_type="application/octet-stream+lzfse" # Apple Archive format +oci_layer_media_type="application/octet-stream+lz4" # LZ4 compression format # Parse the command line arguments while [[ $# -gt 0 ]]; do @@ -35,6 +37,15 @@ while [[ $# -gt 0 ]]; do chunk_size="$2" shift 2 ;; + --dry-run) + dry_run=true + shift 1 + ;; + --reassemble) + reassemble=true + dry_run=true # Reassemble implies dry-run + shift 1 + ;; --help) echo "Usage: $0 [options]" echo "Options:" @@ -42,7 +53,9 @@ while [[ $# -gt 0 ]]; do echo " --folder-path : Path to the folder to upload (required)" echo " --image-name : Name of the image to publish (required)" echo " --image-versions : Comma separated list of versions of the image to publish (required)" - echo " --chunk-size : Size of chunks for large files (e.g., 500M, default: 500M)" + echo " --chunk-size : Size of chunks for large files (e.g., 512M, default: 512M)" + echo " --dry-run : Prepare files but don't upload to registry" + echo " --reassemble : In dry-run mode, also reassemble chunks to verify integrity" echo "Note: The script will automatically resume from the last attempt if available" exit 0 ;; @@ -54,15 +67,23 @@ while [[ $# -gt 0 ]]; do done # Ensure required arguments -if [[ -z "$organization" || -z "$folder_path" || -z "$image_name" || -z "$image_versions" ]]; then - echo "Error: Missing required arguments. Use --help for usage." +if [[ -z "$folder_path" ]]; then + echo "Error: Missing required folder-path argument. Use --help for usage." exit 1 fi -# Check if the GITHUB_TOKEN variable is set -if [[ -z "$GITHUB_TOKEN" ]]; then - echo "Error: GITHUB_TOKEN is not set." - exit 1 +# Only check organization and other push parameters if not in dry-run mode +if [[ "$dry_run" = false ]]; then + if [[ -z "$organization" || -z "$image_name" || -z "$image_versions" ]]; then + echo "Error: Missing required arguments for push. Use --help for usage." + exit 1 + fi + + # Check if the GITHUB_TOKEN variable is set + if [[ -z "$GITHUB_TOKEN" ]]; then + echo "Error: GITHUB_TOKEN is not set." + exit 1 + fi fi # Ensure the folder exists @@ -72,7 +93,7 @@ if [[ ! -d "$folder_path" ]]; then fi # Check and install required tools -for tool in "oras" "split" "pv" "jq"; do +for tool in "oras" "split" "pv" "jq" "lz4"; do if ! command -v "$tool" &> /dev/null; then echo "$tool is not installed. Installing using Homebrew..." if ! command -v brew &> /dev/null; then @@ -83,19 +104,14 @@ for tool in "oras" "split" "pv" "jq"; do fi done -# Check if Apple Archive is available -if ! command -v compression_tool &> /dev/null; then - echo "Error: Apple Archive (compression_tool) is required but not found" - echo "This script requires macOS with Apple Archive support" - exit 1 +echo "LZ4 detected - will use for efficient compression and decompression" +compressed_ext=".lz4" + +# Authenticate with GitHub Container Registry if not in dry-run mode +if [[ "$dry_run" = false ]]; then + echo "$GITHUB_TOKEN" | oras login ghcr.io -u "$organization" --password-stdin fi -echo "Apple Archive detected - will use for optimal sparse file handling" -compressed_ext=".aa" - -# Authenticate with GitHub Container Registry -echo "$GITHUB_TOKEN" | oras login ghcr.io -u "$organization" --password-stdin - # Use the source folder path as the working directory and get its absolute path work_dir=$(cd "$folder_path" && pwd) echo "Working directory: $work_dir" @@ -115,7 +131,7 @@ is_valid_cache() { local cache_dir="$1" # Check if it contains the necessary files [ -f "$cache_dir/config.json" ] || [ -f "$cache_dir/nvram.bin" ] || \ - [ -f "$cache_dir/disk.img.aa" ] || ls "$cache_dir"/disk.img.aa.part.* 1>/dev/null 2>&1 + [ -f "$cache_dir/disk.img.lz4" ] || ls "$cache_dir"/disk.img.part.* 1>/dev/null 2>&1 } # Always try to find and use an existing cache @@ -123,9 +139,9 @@ existing_cache=$(find_latest_cache) if [ -n "$existing_cache" ] && is_valid_cache "$existing_cache"; then cache_dir="$existing_cache" - # Check if the cache contains old gzip format - if [ -f "$cache_dir/disk.img.gz" ] || ls "$cache_dir"/disk.img.gz.part.* 1>/dev/null 2>&1; then - echo "Error: Found legacy gzip format in cache. This script only supports Apple Archive format." + # Check if the cache contains old compressed format + if [ -f "$cache_dir/disk.img.gz" ] || [ -f "$cache_dir/disk.img.aa" ] || ls "$cache_dir"/disk.img.*.part.* 1>/dev/null 2>&1; then + echo "Error: Found legacy compressed format in cache. This script uses improved LZ4 format." echo "Please delete the cache directory and start fresh: $cache_dir" exit 1 fi @@ -162,20 +178,24 @@ mark_version_pushed() { touch "$cache_dir/.pushed_$version" } +# Function to calculate sha256 hash +calculate_sha256() { + local file="$1" + if command -v shasum &> /dev/null; then + shasum -a 256 "$file" | awk '{print "sha256:" $1}' + else + echo "sha256:$(openssl dgst -sha256 -binary "$file" | xxd -p | tr -d '\n')" + fi +} + # Copy config.json if it exists and not already in cache config_json_source="$folder_path/config.json" config_json_dest="$cache_dir/config.json" if [ -f "$config_json_source" ]; then if [ ! -f "$config_json_dest" ]; then echo "Copying config.json..." - # Add the uncompressed disk size annotation if disk.img exists and jq is available - if [ -n "$original_disk_size" ] && command -v jq &> /dev/null; then - echo "Adding uncompressed disk size annotation: $original_disk_size bytes" - jq --arg size "$original_disk_size" '.annotations += {"com.trycua.lume.disk.uncompressed_size": $size}' "$config_json_source" > "$config_json_dest" || \ - (echo "jq failed, copying original config.json"; cp "$config_json_source" "$config_json_dest") # Fallback to copy if jq fails - else - cp "$config_json_source" "$config_json_dest" - fi + # Copy config.json as is - we'll add annotations later + cp "$config_json_source" "$config_json_dest" fi fi if [ -f "$config_json_dest" ]; then @@ -207,115 +227,363 @@ if [ -f "$disk_img_orig" ]; then echo " Actual disk usage: $((real_size_bytes / 1073741824)) GB" echo " Sparseness ratio: ${sparseness_ratio}:1" - # Check if we already have compressed files in the cache - compressed_disk_img="disk.img${compressed_ext}" - already_compressed=false - - if [ -f "$cache_dir/$compressed_disk_img" ]; then - already_compressed=true - echo "Using existing compressed file from cache: $compressed_disk_img" - elif ls "$cache_dir"/disk.img${compressed_ext}.part.* 1>/dev/null 2>&1; then - already_compressed=true - echo "Using existing compressed parts from cache" + # If we have config.json, update it with the uncompressed disk size annotation + if [ -f "$config_json_dest" ] && command -v jq &> /dev/null; then + echo "Adding uncompressed disk size annotation: $original_disk_size bytes" + jq --arg size "$original_disk_size" '.annotations = (.annotations // {}) + {"com.trycua.lume.disk.uncompressed_size": $size}' "$config_json_dest" > "$config_json_dest.tmp" + mv "$config_json_dest.tmp" "$config_json_dest" fi - - # Only compress if not already compressed in cache - if [ "$already_compressed" = false ]; then - # Check for free disk space before compression - avail_space=$(df -k "$cache_dir" | tail -1 | awk '{print $4}') - avail_space_bytes=$((avail_space * 1024)) - # Assume compressed size is roughly 30% of real size as a safe estimate - estimated_compressed=$((real_size_bytes * 30 / 100)) - - if [ "$avail_space_bytes" -lt "$estimated_compressed" ]; then - echo "WARNING: Possibly insufficient disk space for compression!" - echo "Available: $((avail_space_bytes / 1073741824)) GB, Estimated required: $((estimated_compressed / 1073741824)) GB" - read -p "Continue anyway? (y/n) " -n 1 -r - echo - if [[ ! $REPLY =~ ^[Yy]$ ]]; then - echo "Exiting. Free up some space and try again." - exit 1 - fi - fi - # --- Compression Step --- - echo "Compressing $disk_img_orig with Apple Archive..." - - # Apple Archive compression - echo "Starting compression with Apple Archive (showing output file growth)..." - compression_tool -encode -i "$disk_img_orig" -o "$compressed_disk_img" -a lzfse & - COMP_PID=$! - - sleep 1 # Give compression a moment to start - - # Display progress based on output file growth - while kill -0 $COMP_PID 2>/dev/null; do - if [ -f "$compressed_disk_img" ]; then - current_size=$(stat -f%z "$compressed_disk_img" 2>/dev/null || echo 0) - percent=$(echo "scale=2; 100 * $current_size / $original_disk_size" | bc) - echo -ne "Progress: $percent% ($(du -h "$compressed_disk_img" 2>/dev/null | cut -f1 || echo "0"))\r" - else - echo -ne "Preparing compression...\r" - fi - sleep 2 - done - - wait $COMP_PID - echo -e "\nCompression complete!" - - compressed_size=$(stat -f%z "$compressed_disk_img") - echo "Compressed disk image size: $(du -h "$compressed_disk_img" | cut -f1)" - echo "Compression ratio: $(echo "scale=2; $compressed_size * 100 / $original_disk_size" | bc)%" - # --- End Compression Step --- - - # Check if splitting is needed based on *compressed* size - if [ $compressed_size -gt 524288000 ]; then # 500MB threshold - echo "Splitting compressed file into chunks of $chunk_size..." - pv "$compressed_disk_img" | split -b "$chunk_size" - "$compressed_disk_img.part." - rm -f "$compressed_disk_img" # Remove the unsplit compressed file - # Verify that parts were created - echo "Verifying split parts..." - ls -la "$cache_dir"/disk.img${compressed_ext}.part.* - fi + # Create a temporary directory for disk processing + tmp_dir="$cache_dir/tmp_processing" + mkdir -p "$tmp_dir" + + # Split the disk image into chunks first (before compression) + split_parts_dir="$tmp_dir/split_parts" + mkdir -p "$split_parts_dir" + + # Check if we already have split parts + if [ -z "$(ls -A "$split_parts_dir" 2>/dev/null)" ]; then + echo "Splitting disk image into chunks of $chunk_size..." + cd "$split_parts_dir" + pv "$disk_img_orig" | split -b "$chunk_size" - "chunk." + cd "$cache_dir" else - echo "Using existing compressed/split files from cache" + echo "Using existing split chunks from previous run" fi - - # --- Adjust part processing --- - echo "Looking for compressed files in $cache_dir..." - # List all files in the cache directory for debugging - ls -la "$cache_dir" + # Process each chunk (compress, calculate digest, etc.) + compressed_parts_dir="$tmp_dir/compressed_parts" + mkdir -p "$compressed_parts_dir" - if [ -f "$cache_dir/$compressed_disk_img" ]; then - echo "Found single compressed file: $compressed_disk_img" - # Add the single compressed file to the list - files+=("$compressed_disk_img:${oci_layer_media_type}") - else - # Look for split parts - part_files=($(ls "$cache_dir"/disk.img${compressed_ext}.part.* 2>/dev/null || echo "")) - if [ ${#part_files[@]} -gt 0 ]; then - echo "Found ${#part_files[@]} split parts" - parts_files=() - part_num=0 + # Store layer information in an array + layers=() + part_num=0 + total_parts=$(ls "$split_parts_dir"/chunk.* | wc -l) + + for chunk_file in "$split_parts_dir"/chunk.*; do + part_basename=$(basename "$chunk_file") + part_num=$((part_num + 1)) + compressed_file="$compressed_parts_dir/${part_basename}${compressed_ext}" + + if [ ! -f "$compressed_file" ]; then + echo "Compressing chunk $part_num of $total_parts: $part_basename" - for part in "${part_files[@]}"; do - part_num=$((part_num + 1)) - part_basename=$(basename "$part") - parts_files+=("$part_basename:${oci_layer_media_type};part.number=$part_num;part.total=${#part_files[@]}") - echo "Part $part_num: $(du -h "$part" | cut -f1)" + # Calculate uncompressed content digest before compression + uncompressed_digest=$(calculate_sha256 "$chunk_file") + + # Get uncompressed size + uncompressed_size=$(stat -f%z "$chunk_file") + + # Compress the chunk with LZ4 + lz4 -9 "$chunk_file" "$compressed_file" + + # Get compressed size + compressed_size=$(stat -f%z "$compressed_file") + + echo "Chunk $part_num: Original size: $(du -h "$chunk_file" | cut -f1), Compressed: $(du -h "$compressed_file" | cut -f1)" + else + echo "Using existing compressed chunk $part_num of $total_parts" + + # Need to calculate these values for existing files + uncompressed_digest=$(calculate_sha256 "$chunk_file") + uncompressed_size=$(stat -f%z "$chunk_file") + compressed_size=$(stat -f%z "$compressed_file") + fi + + # Store layer information + layer_info="$compressed_file:${oci_layer_media_type};uncompressed_size=$uncompressed_size;uncompressed_digest=$uncompressed_digest;part.number=$part_num;part.total=$total_parts" + layers+=("$layer_info") + done + + # Generate the files array for ORAS push + for layer_info in "${layers[@]}"; do + files+=("$layer_info") + done + + # --- Reassembly in dry-run mode --- + if [[ "$reassemble" = true ]]; then + echo "=== REASSEMBLY MODE ===" + echo "Reassembling chunks to verify integrity..." + + # Create a directory for reassembly + reassembly_dir="$cache_dir/reassembly" + mkdir -p "$reassembly_dir" + + # Prepare the reassembled file - create a properly sized sparse file first + reassembled_file="$reassembly_dir/reassembled_disk.img" + if [ -f "$reassembled_file" ]; then + echo "Removing previous reassembled file..." + rm -f "$reassembled_file" + fi + + # Get the original disk size from config annotation or directly from image + if [ -f "$config_json_dest" ] && command -v jq &> /dev/null; then + config_size=$(jq -r '.annotations."com.trycua.lume.disk.uncompressed_size" // empty' "$config_json_dest") + if [ -n "$config_size" ]; then + original_disk_size_bytes=$config_size + echo "Using uncompressed size from config: $original_disk_size_bytes bytes" + fi + fi + + # Create a sparse file of the exact original size + echo "Pre-allocating sparse file of $(du -h "$disk_img_orig" | cut -f1)..." + dd if=/dev/zero of="$reassembled_file" bs=1 count=0 seek=$original_disk_size + + # Make sure filesystem recognizes this as a sparse file + if [[ "$OSTYPE" == "darwin"* ]]; then + # On macOS, we can use a better sparse file creation method if mkfile is available + if command -v mkfile &> /dev/null; then + rm -f "$reassembled_file" + mkfile -n ${original_disk_size}b "$reassembled_file" + echo "Created sparse file using mkfile" + fi + else + # On Linux systems, ensure sparseness with truncate if available + if command -v truncate &> /dev/null; then + rm -f "$reassembled_file" + truncate -s $original_disk_size "$reassembled_file" + echo "Created sparse file using truncate" + fi + fi + + # Create an offset tracker to keep track of where each chunk should go + current_offset=0 + + # Decompress each chunk and write it at the correct offset + for ((i=1; i<=total_parts; i++)); do + # Find the chunk file for part number i + chunk_pattern="" + chunk_uncompressed_size="" + + for layer_info in "${layers[@]}"; do + if [[ "$layer_info" == *";part.number=$i;"* ]]; then + chunk_pattern="${layer_info%%:*}" + # Extract the uncompressed size from metadata + if [[ "$layer_info" =~ uncompressed_size=([0-9]+) ]]; then + chunk_uncompressed_size="${BASH_REMATCH[1]}" + fi + break + fi done - files+=("${parts_files[@]}") + if [ -z "$chunk_pattern" ]; then + echo "Error: Could not find chunk for part $i" + exit 1 + fi + + echo "Processing part $i/$total_parts: $(basename "$chunk_pattern") at offset $current_offset..." + + # Create temp decompressed file + temp_decompressed="$reassembly_dir/temp_part_$i" + lz4 -d -f "$chunk_pattern" "$temp_decompressed" || { + echo "Error decompressing part $i" + exit 1 + } + + # Check if this chunk is all zeros (sparse data) + # Only check the first 1MB for efficiency + is_likely_sparse=false + if command -v hexdump &> /dev/null; then + # Use hexdump to check a sample of the file for non-zero content + sparse_check=$(hexdump -n 1048576 -v "$temp_decompressed" | grep -v "0000 0000 0000 0000 0000 0000 0000 0000" | head -n 1) + if [ -z "$sparse_check" ]; then + echo "Chunk appears to be all zeros (sparse data)" + is_likely_sparse=true + fi + fi + + # Use dd to write the chunk at the correct offset with sparse file handling + if [ "$is_likely_sparse" = true ]; then + # For sparse chunks, we don't need to write anything - leave as zeros + echo "Skipping write for all-zero chunk (preserving sparseness)" + elif [[ "$OSTYPE" == "darwin"* ]]; then + # macOS dd doesn't support conv=sparse, use standard approach + dd if="$temp_decompressed" of="$reassembled_file" bs=1M conv=notrunc seek=$((current_offset / 1024 / 1024)) status=progress || { + echo "Error writing part $i at offset $current_offset" + exit 1 + } + else + # On Linux, use conv=sparse to preserve sparseness during the write + dd if="$temp_decompressed" of="$reassembled_file" bs=1M conv=sparse,notrunc seek=$((current_offset / 1024 / 1024)) status=progress || { + echo "Error writing part $i at offset $current_offset" + exit 1 + } + fi + + # Clean up the temporary file + rm -f "$temp_decompressed" + + # Update the offset for the next chunk + current_offset=$((current_offset + chunk_uncompressed_size)) + done + + # After all chunks are processed, ensure sparseness is preserved + if command -v cp &> /dev/null && [[ "$OSTYPE" == "darwin"* ]]; then + echo "Copying disk image to maintain sparseness..." + final_sparse_file="$reassembly_dir/final_disk.img" + rm -f "$final_sparse_file" 2>/dev/null + + # On macOS, use cp with the clone flag to preserve sparseness + cp -c "$reassembled_file" "$final_sparse_file" + + # Use the sparse-optimized file for verification + echo "Using sparse-optimized copy for verification" + mv "$final_sparse_file" "$reassembled_file" + sync + elif command -v cp &> /dev/null && command -v file &> /dev/null; then + # For Linux systems + echo "Optimizing file sparseness..." + final_sparse_file="$reassembly_dir/final_disk.img" + rm -f "$final_sparse_file" 2>/dev/null + + # Use cp --sparse=always on Linux + cp --sparse=always "$reassembled_file" "$final_sparse_file" + + # Use the sparse-optimized file for verification + echo "Using sparse-optimized copy for verification" + mv "$final_sparse_file" "$reassembled_file" + sync + fi + + # Make sure to sync to disk + sync + + # Calculate digests for comparison + echo "Verifying reassembled file..." + original_digest=$(calculate_sha256 "$disk_img_orig") + reassembled_digest=$(calculate_sha256 "$reassembled_file") + + # Compare the original and reassembled file sizes + original_size=$(stat -f%z "$disk_img_orig") + reassembled_size=$(stat -f%z "$reassembled_file") + + echo "Results:" + echo " Original size: $(du -h "$disk_img_orig" | cut -f1) ($original_size bytes)" + echo " Reassembled size: $(du -h "$reassembled_file" | cut -f1) ($reassembled_size bytes)" + echo " Original digest: ${original_digest#sha256:}" + echo " Reassembled digest: ${reassembled_digest#sha256:}" + + # Check if the disk is sparse + original_apparent_size=$(du -h "$disk_img_orig" | cut -f1) + original_actual_size=$(du -sh "$disk_img_orig" | cut -f1) + reassembled_apparent_size=$(du -h "$reassembled_file" | cut -f1) + reassembled_actual_size=$(du -sh "$reassembled_file" | cut -f1) + + echo " Original: Apparent size: $original_apparent_size, Actual disk usage: $original_actual_size" + echo " Reassembled: Apparent size: $reassembled_apparent_size, Actual disk usage: $reassembled_actual_size" + + if [ "$original_digest" = "$reassembled_digest" ]; then + echo "✅ VERIFICATION SUCCESSFUL: Files are identical" else - echo "ERROR: No compressed files found in cache directory: $cache_dir" - echo "Contents of cache directory:" - find "$cache_dir" -type f | sort - exit 1 + echo "❌ VERIFICATION FAILED: Files differ" + if [ "$original_size" != "$reassembled_size" ]; then + echo " Size mismatch: Original $original_size bytes, Reassembled $reassembled_size bytes" + fi + + # Try to identify where they differ + echo "Attempting to identify differences..." + if command -v cmp &> /dev/null; then + cmp_output=$(cmp -l "$disk_img_orig" "$reassembled_file" 2>&1 | head -5) + if [[ "$cmp_output" == *"differ"* ]]; then + echo " First few differences:" + echo "$cmp_output" + fi + fi + + # Check if the virtual machine will still boot despite differences + echo "NOTE: This might be a sparse file issue. The content may be identical, but sparse regions" + echo " may be handled differently between the original and reassembled files." + + # Calculate a percentage comparison of used blocks + # This helps determine if the sparse issues are severe or minor + original_used_kb=$(du -k "$disk_img_orig" | cut -f1) + reassembled_used_kb=$(du -k "$reassembled_file" | cut -f1) + + # Calculate percentage difference in used space + if [ "$original_used_kb" -ne 0 ]; then + diff_percentage=$(echo "scale=2; ($reassembled_used_kb - $original_used_kb) * 100 / $original_used_kb" | bc) + echo " Disk usage difference: $diff_percentage% ($reassembled_used_kb KB vs $original_used_kb KB)" + + # If reassembled is much smaller, this likely indicates sparse regions weren't preserved + if (( $(echo "$diff_percentage < -40" | bc -l) )); then + echo " ⚠️ WARNING: Reassembled disk uses significantly less space (>40% difference)." + echo " This indicates sparse regions weren't properly preserved and may affect VM functionality." + echo " The VM might boot but could be missing applications or data." + elif (( $(echo "$diff_percentage < -10" | bc -l) )); then + echo " ⚠️ WARNING: Reassembled disk uses less space (10-40% difference)." + echo " Some sparse regions may not be properly preserved but VM might still function correctly." + elif (( $(echo "$diff_percentage > 10" | bc -l) )); then + echo " ⚠️ WARNING: Reassembled disk uses more space (>10% difference)." + echo " This is unusual and may indicate improper sparse file handling." + else + echo " ✓ Disk usage difference is minimal (<10%). VM likely to function correctly." + fi + fi + fi + + echo "Reassembled file is available at: $reassembled_file" + + # If verification failed and difference is significant, try a direct copy as fallback + if [ "$original_digest" != "$reassembled_digest" ] && [ -n "$diff_percentage" ] && (( $(echo "$diff_percentage < -20" | bc -l) )); then + echo + echo "===== ATTEMPTING RECOVERY ACTION =====" + echo "Since verification failed with significant disk usage difference," + echo "trying direct copy of disk image as a fallback method." + echo + + fallback_file="$reassembly_dir/fallback_disk.img" + echo "Creating fallback disk image at: $fallback_file" + + # Use rsync with sparse option if available + if command -v rsync &> /dev/null; then + echo "Using rsync with sparse option for direct copy..." + rsync -aS --progress "$disk_img_orig" "$fallback_file" + else + # Direct cp with sparse option if available + if [[ "$OSTYPE" == "darwin"* ]]; then + echo "Using cp -c (clone) for direct copy..." + cp -c "$disk_img_orig" "$fallback_file" + else + echo "Using cp --sparse=always for direct copy..." + cp --sparse=always "$disk_img_orig" "$fallback_file" + fi + fi + + echo "Direct copy completed. You may want to try using this fallback disk image" + echo "instead if the reassembled one has issues: $fallback_file" fi fi - + # --- Push Logic --- + if [[ "$dry_run" = true ]]; then + echo "=== DRY RUN MODE ===" + echo "The following files would be pushed to the registry:" + for file_info in "${files[@]}"; do + file_path="${file_info%%:*}" + file_metadata="${file_info#*:}" + file_size=$(du -h "$file_path" | cut -f1) + echo " - $file_path ($file_size) with metadata: $file_metadata" + done + + if [[ -n "$image_versions" ]]; then + echo "Would push to the following versions:" + IFS=',' read -ra versions <<< "$image_versions" + for version in "${versions[@]}"; do + version=$(echo "$version" | xargs) + if [[ -z "$version" ]]; then continue; fi + echo " - ghcr.io/$organization/$image_name:$version" + done + else + echo "No versions specified for dry run. Processing completed successfully." + fi + + echo "All processing tasks completed. No actual push performed." + echo "Cache directory: $cache_dir" + exit 0 + fi + + # Regular push logic (non-dry-run) push_pids=() IFS=',' read -ra versions <<< "$image_versions" for version in "${versions[@]}"; do @@ -368,6 +636,25 @@ if [ -f "$disk_img_orig" ]; then else echo "Warning: $disk_img_orig not found." + + # If in dry run mode, just show what would happen + if [[ "$dry_run" = true ]]; then + echo "=== DRY RUN MODE ===" + if [ ${#files[@]} -gt 0 ]; then + echo "The following non-disk files would be pushed:" + for file_info in "${files[@]}"; do + file_path="${file_info%%:*}" + file_metadata="${file_info#*:}" + file_size=$(du -h "$file_path" | cut -f1) + echo " - $file_path ($file_size) with metadata: $file_metadata" + done + else + echo "No files found to push." + fi + echo "All processing tasks completed. No actual push performed." + exit 0 + fi + # Push only config/nvram if they exist if [ ${#files[@]} -gt 0 ]; then echo "Pushing non-disk files..." @@ -427,6 +714,11 @@ else fi fi +# Skip final status check in dry-run mode +if [[ "$dry_run" = true ]]; then + exit 0 +fi + # Determine final status based on the success check *before* potential cleanup echo # Add a newline for better readability if [ "$all_versions_pushed" = true ]; then diff --git a/libs/lume/src/Commands/Push.swift b/libs/lume/src/Commands/Push.swift new file mode 100644 index 00000000..df784b39 --- /dev/null +++ b/libs/lume/src/Commands/Push.swift @@ -0,0 +1,74 @@ +import ArgumentParser +import Foundation + +struct Push: AsyncParsableCommand { + static let configuration = CommandConfiguration( + abstract: "Push a macOS VM to GitHub Container Registry" + ) + + @Argument(help: "Name of the VM to push") + var name: String + + @Argument(help: "Image tag to push (format: name:tag)") + var image: String + + @Option(parsing: .upToNextOption, help: "Additional tags to push the same image to") + var additionalTags: [String] = [] + + @Option(help: "Github Container Registry to push to. Defaults to ghcr.io") + var registry: String = "ghcr.io" + + @Option(help: "Organization to push to. Defaults to trycua") + var organization: String = "trycua" + + @Option(name: .customLong("storage"), help: "VM storage location to use") + var storage: String? + + @Option(help: "Chunk size for large files in MB. Defaults to 512.") + var chunkSizeMb: Int = 512 + + @Flag(name: .long, help: "Enable verbose logging") + var verbose: Bool = false + + @Flag(name: .long, help: "Prepare files without uploading to registry") + var dryRun: Bool = false + + @Flag(name: .long, help: "In dry-run mode, also reassemble chunks to verify integrity") + var reassemble: Bool = true + + init() {} + + @MainActor + func run() async throws { + let controller = LumeController() + + // Parse primary image name and tag + let components = image.split(separator: ":") + guard components.count == 2, let primaryTag = components.last else { + throw ValidationError("Invalid primary image format. Expected format: name:tag") + } + let imageName = String(components.first!) + + // Combine primary and additional tags, ensuring uniqueness + var allTags: Swift.Set = [] + allTags.insert(String(primaryTag)) + allTags.formUnion(additionalTags) + + guard !allTags.isEmpty else { + throw ValidationError("At least one tag must be provided.") + } + + try await controller.pushImage( + name: name, + imageName: imageName, // Pass base image name + tags: Array(allTags), // Pass array of all unique tags + registry: registry, + organization: organization, + storage: storage, + chunkSizeMb: chunkSizeMb, + verbose: verbose, + dryRun: dryRun, + reassemble: reassemble + ) + } +} \ No newline at end of file diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index ac7453ca..3ba7d543 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -2,8 +2,56 @@ import ArgumentParser import Darwin import Foundation import Swift +import CommonCrypto +import Compression // Add this import + +// Extension to calculate SHA256 hash +extension Data { + func sha256String() -> String { + let hash = self.withUnsafeBytes { (bytes: UnsafeRawBufferPointer) -> [UInt8] in + var hash = [UInt8](repeating: 0, count: Int(CC_SHA256_DIGEST_LENGTH)) + CC_SHA256(bytes.baseAddress, CC_LONG(self.count), &hash) + return hash + } + return hash.map { String(format: "%02x", $0) }.joined() + } +} + +// Push-related errors +enum PushError: Error { + case uploadInitiationFailed + case blobUploadFailed + case manifestPushFailed + case authenticationFailed + case missingToken + case invalidURL + case lz4NotFound // Added error case +} + +struct ChunkMetadata: Codable { + let uncompressedDigest: String + let uncompressedSize: UInt64 + let compressedDigest: String + let compressedSize: Int +} // Define struct to decode relevant parts of config.json +struct OCIManifestLayer { + let mediaType: String + let size: Int + let digest: String + let uncompressedSize: UInt64? + let uncompressedContentDigest: String? + + init(mediaType: String, size: Int, digest: String, uncompressedSize: UInt64? = nil, uncompressedContentDigest: String? = nil) { + self.mediaType = mediaType + self.size = size + self.digest = digest + self.uncompressedSize = uncompressedSize + self.uncompressedContentDigest = uncompressedContentDigest + } +} + struct OCIConfig: Codable { struct Annotations: Codable { let uncompressedSize: String? // Use optional String @@ -274,6 +322,43 @@ struct DownloadStats { } } +// Renamed struct +struct UploadStats { + let totalBytes: Int64 + let uploadedBytes: Int64 // Renamed + let elapsedTime: TimeInterval + let averageSpeed: Double + let peakSpeed: Double + + func formattedSummary() -> String { + let bytesStr = ByteCountFormatter.string(fromByteCount: uploadedBytes, countStyle: .file) + let avgSpeedStr = formatSpeed(averageSpeed) + let peakSpeedStr = formatSpeed(peakSpeed) + let timeStr = formatTime(elapsedTime) + return """ + Upload Statistics: + - Total uploaded: \(bytesStr) + - Elapsed time: \(timeStr) + - Average speed: \(avgSpeedStr) + - Peak speed: \(peakSpeedStr) + """ + } + private func formatSpeed(_ bytesPerSecond: Double) -> String { + let formatter = ByteCountFormatter() + formatter.countStyle = .file + let bytesStr = formatter.string(fromByteCount: Int64(bytesPerSecond)) + return "\(bytesStr)/s" + } + private func formatTime(_ seconds: TimeInterval) -> String { + let hours = Int(seconds) / 3600 + let minutes = (Int(seconds) % 3600) / 60 + let secs = Int(seconds) % 60 + if hours > 0 { return String(format: "%d hours, %d minutes, %d seconds", hours, minutes, secs) } + else if minutes > 0 { return String(format: "%d minutes, %d seconds", minutes, secs) } + else { return String(format: "%d seconds", secs) } + } +} + actor TaskCounter { private var count: Int = 0 @@ -285,12 +370,17 @@ actor TaskCounter { class ImageContainerRegistry: @unchecked Sendable { private let registry: String private let organization: String - private let progress = ProgressTracker() + private let downloadProgress = ProgressTracker() // Renamed for clarity + private let uploadProgress = UploadProgressTracker() // Added upload tracker private let cacheDirectory: URL private let downloadLock = NSLock() private var activeDownloads: [String] = [] private let cachingEnabled: Bool + // Constants for zero-skipping write logic + private static let holeGranularityBytes = 4 * 1024 * 1024 // 4MB block size for checking zeros + private static let zeroChunk = Data(count: holeGranularityBytes) + // Add the createProgressBar function here as a private method private func createProgressBar(progress: Double, width: Int = 30) -> String { let completedWidth = Int(progress * Double(width)) @@ -613,7 +703,7 @@ class ImageContainerRegistry: @unchecked Sendable { $0.mediaType != "application/vnd.oci.empty.v1+json" }.count let totalSize = manifest.layers.reduce(0) { $0 + Int64($1.size) } - await progress.setTotal(totalSize, files: totalFiles) + await downloadProgress.setTotal(totalSize, files: totalFiles) // Process layers with limited concurrency Logger.info("Processing Image layers") @@ -671,7 +761,7 @@ class ImageContainerRegistry: @unchecked Sendable { // Still need to account for progress group.addTask { [self] in await counter.increment() - await progress.addProgress(Int64(size)) + await downloadProgress.addProgress(Int64(size)) await counter.decrement() return Int64(size) } @@ -686,7 +776,7 @@ class ImageContainerRegistry: @unchecked Sendable { if FileManager.default.fileExists(atPath: cachedLayer.path) { try FileManager.default.copyItem(at: cachedLayer, to: partURL) - await progress.addProgress(Int64(size)) + await downloadProgress.addProgress(Int64(size)) } else { // Check if this layer is already being downloaded and we're not skipping cache if isDownloading(digest) { @@ -696,7 +786,7 @@ class ImageContainerRegistry: @unchecked Sendable { { try FileManager.default.copyItem( at: cachedLayer, to: partURL) - await progress.addProgress(Int64(size)) + await downloadProgress.addProgress(Int64(size)) return Int64(size) } } @@ -711,7 +801,7 @@ class ImageContainerRegistry: @unchecked Sendable { token: token, to: partURL, maxRetries: 5, - progress: progress, + progress: downloadProgress, manifestId: manifestId ) @@ -758,7 +848,7 @@ class ImageContainerRegistry: @unchecked Sendable { if FileManager.default.fileExists(atPath: cachedLayer.path) { try FileManager.default.copyItem(at: cachedLayer, to: outputURL) - await progress.addProgress(Int64(size)) + await downloadProgress.addProgress(Int64(size)) } else { // Check if this layer is already being downloaded and we're not skipping cache if isDownloading(digest) { @@ -767,7 +857,7 @@ class ImageContainerRegistry: @unchecked Sendable { if FileManager.default.fileExists(atPath: cachedLayer.path) { try FileManager.default.copyItem( at: cachedLayer, to: outputURL) - await progress.addProgress(Int64(size)) + await downloadProgress.addProgress(Int64(size)) return Int64(size) } } @@ -782,7 +872,7 @@ class ImageContainerRegistry: @unchecked Sendable { token: token, to: outputURL, maxRetries: 5, - progress: progress, + progress: downloadProgress, manifestId: manifestId ) @@ -808,7 +898,7 @@ class ImageContainerRegistry: @unchecked Sendable { Logger.info("") // New line after progress // Display download statistics - let stats = await progress.getDownloadStats() + let stats = await downloadProgress.getDownloadStats() Logger.info(stats.formattedSummary()) // Parse config.json to get uncompressed size *before* reassembly @@ -1866,17 +1956,52 @@ class ImageContainerRegistry: @unchecked Sendable { } private func getToken(repository: String) async throws -> String { - let url = URL(string: "https://\(self.registry)/token")! - .appending(queryItems: [ - URLQueryItem(name: "service", value: self.registry), - URLQueryItem(name: "scope", value: "repository:\(repository):pull"), - ]) + let encodedRepo = repository.addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed) ?? repository + // Request both pull and push scope for uploads + let url = URL(string: "https://\(self.registry)/token?scope=repository:\(encodedRepo):pull,push&service=\(self.registry)")! + + var request = URLRequest(url: url) + request.httpMethod = "GET" // Token endpoint uses GET + request.setValue("application/json", forHTTPHeaderField: "Accept") - let (data, _) = try await URLSession.shared.data(from: url) - let json = try JSONSerialization.jsonObject(with: data) as? [String: Any] - guard let token = json?["token"] as? String else { - throw PullError.tokenFetchFailed + // *** Add Basic Authentication Header if credentials exist *** + let (username, password) = getCredentialsFromEnvironment() + if let username = username, let password = password, !username.isEmpty, !password.isEmpty { + let authString = "\(username):\(password)" + if let authData = authString.data(using: .utf8) { + let base64Auth = authData.base64EncodedString() + request.setValue("Basic \(base64Auth)", forHTTPHeaderField: "Authorization") + Logger.info("Adding Basic Authentication header to token request.") + } else { + Logger.error("Failed to encode credentials for Basic Auth.") + } + } else { + Logger.info("No credentials found in environment for token request.") + // Allow anonymous request for pull scope, but push scope likely requires auth } + // *** End Basic Auth addition *** + + let (data, response) = try await URLSession.shared.data(for: request) + + // Check response status code *before* parsing JSON + guard let httpResponse = response as? HTTPURLResponse else { + throw PushError.authenticationFailed // Or a more generic network error + } + + guard httpResponse.statusCode == 200 else { + // Log detailed error including status code and potentially response body + let responseBody = String(data: data, encoding: .utf8) ?? "(Could not decode body)" + Logger.error("Token request failed with status code: \(httpResponse.statusCode). Response: \(responseBody)") + // Throw specific error based on status if needed (e.g., 401 for unauthorized) + throw PushError.authenticationFailed + } + + let jsonResponse = try JSONSerialization.jsonObject(with: data) as? [String: Any] + guard let token = jsonResponse?["token"] as? String ?? jsonResponse?["access_token"] as? String else { + Logger.error("Token not found in registry response.") + throw PushError.missingToken + } + return token } @@ -2618,4 +2743,746 @@ class ImageContainerRegistry: @unchecked Sendable { return 0 } + + // New push method + public func push( + vmDirPath: String, + imageName: String, + tags: [String], + chunkSizeMb: Int = 512, + verbose: Bool = false, + dryRun: Bool = false, + reassemble: Bool = false + ) async throws { + Logger.info( + "Pushing VM to registry", + metadata: [ + "vm_path": vmDirPath, + "imageName": imageName, + "tags": "\(tags.joined(separator: ", "))", // Log all tags + "registry": registry, + "organization": organization, + "chunk_size": "\(chunkSizeMb)MB", + "dry_run": "\(dryRun)", + "reassemble": "\(reassemble)" + ]) + + // Remove tag parsing here, imageName is now passed directly + // let components = image.split(separator: ":") ... + // let imageTag = String(tag) + + // Get authentication token only if not in dry-run mode + var token: String = "" + if !dryRun { + Logger.info("Getting registry authentication token") + token = try await getToken(repository: "\(self.organization)/\(imageName)") + } else { + Logger.info("Dry run mode: skipping authentication token request") + } + + // Create working directory inside the VM folder for caching/resuming + let workDir = URL(fileURLWithPath: vmDirPath).appendingPathComponent(".lume_push_cache") + try FileManager.default.createDirectory(at: workDir, withIntermediateDirectories: true) + Logger.info("Using push cache directory: \(workDir.path)") + + // Get VM files that need to be pushed using vmDirPath + let diskPath = URL(fileURLWithPath: vmDirPath).appendingPathComponent("disk.img") + let configPath = URL(fileURLWithPath: vmDirPath).appendingPathComponent("config.json") + let nvramPath = URL(fileURLWithPath: vmDirPath).appendingPathComponent("nvram.bin") + + var layers: [OCIManifestLayer] = [] + var uncompressedDiskSize: UInt64? = nil + + // Process config.json + let cachedConfigPath = workDir.appendingPathComponent("config.json") + var configDigest: String? = nil + var configSize: Int? = nil + + if FileManager.default.fileExists(atPath: cachedConfigPath.path) { + Logger.info("Using cached config.json") + do { + let configData = try Data(contentsOf: cachedConfigPath) + configDigest = "sha256:" + configData.sha256String() + configSize = configData.count + // Try to get uncompressed disk size from cached config + if let vmConfig = try? JSONDecoder().decode(VMConfig.self, from: configData) { + uncompressedDiskSize = vmConfig.diskSize + Logger.info("Found disk size in cached config: \(uncompressedDiskSize ?? 0) bytes") + } + } catch { + Logger.error("Failed to read cached config.json: \(error). Will re-process.") + // Force re-processing by leaving configDigest nil + } + } else if FileManager.default.fileExists(atPath: configPath.path) { + Logger.info("Processing config.json") + let configData = try Data(contentsOf: configPath) + configDigest = "sha256:" + configData.sha256String() + configSize = configData.count + try configData.write(to: cachedConfigPath) // Save to cache + // Try to get uncompressed disk size from original config + if let vmConfig = try? JSONDecoder().decode(VMConfig.self, from: configData) { + uncompressedDiskSize = vmConfig.diskSize + Logger.info("Found disk size in config: \(uncompressedDiskSize ?? 0) bytes") + } + } + + if var digest = configDigest, let size = configSize { // Use 'var' to modify if uploaded + if !dryRun { + // Upload only if not in dry-run mode and blob doesn't exist + if !(try await blobExists(repository: "\(self.organization)/\(imageName)", digest: digest, token: token)) { + Logger.info("Uploading config.json blob") + let configData = try Data(contentsOf: cachedConfigPath) // Read from cache for upload + digest = try await uploadBlobFromData( + repository: "\(self.organization)/\(imageName)", + data: configData, + token: token + ) + } else { + Logger.info("Config blob already exists on registry") + } + } + // Add config layer + layers.append(OCIManifestLayer( + mediaType: "application/vnd.oci.image.config.v1+json", + size: size, + digest: digest + )) + } + + // Process nvram.bin + let cachedNvramPath = workDir.appendingPathComponent("nvram.bin") + var nvramDigest: String? = nil + var nvramSize: Int? = nil + + if FileManager.default.fileExists(atPath: cachedNvramPath.path) { + Logger.info("Using cached nvram.bin") + do { + let nvramData = try Data(contentsOf: cachedNvramPath) + nvramDigest = "sha256:" + nvramData.sha256String() + nvramSize = nvramData.count + } catch { + Logger.error("Failed to read cached nvram.bin: \(error). Will re-process.") + } + } else if FileManager.default.fileExists(atPath: nvramPath.path) { + Logger.info("Processing nvram.bin") + let nvramData = try Data(contentsOf: nvramPath) + nvramDigest = "sha256:" + nvramData.sha256String() + nvramSize = nvramData.count + try nvramData.write(to: cachedNvramPath) // Save to cache + } + + if var digest = nvramDigest, let size = nvramSize { // Use 'var' + if !dryRun { + // Upload only if not in dry-run mode and blob doesn't exist + if !(try await blobExists(repository: "\(self.organization)/\(imageName)", digest: digest, token: token)) { + Logger.info("Uploading nvram.bin blob") + let nvramData = try Data(contentsOf: cachedNvramPath) // Read from cache + digest = try await uploadBlobFromData( + repository: "\(self.organization)/\(imageName)", + data: nvramData, + token: token + ) + } else { + Logger.info("NVRAM blob already exists on registry") + } + } + // Add nvram layer + layers.append(OCIManifestLayer( + mediaType: "application/octet-stream", + size: size, + digest: digest + )) + } + + // Process disk.img + if FileManager.default.fileExists(atPath: diskPath.path) { + let diskAttributes = try FileManager.default.attributesOfItem(atPath: diskPath.path) + let diskSize = diskAttributes[.size] as? UInt64 ?? 0 + let actualDiskSize = uncompressedDiskSize ?? diskSize + Logger.info("Processing disk.img in chunks", metadata: ["disk_path": diskPath.path, "disk_size": "\(diskSize) bytes", "actual_size": "\(actualDiskSize) bytes", "chunk_size": "\(chunkSizeMb)MB"]) + let chunksDir = workDir.appendingPathComponent("disk.img.parts") + try FileManager.default.createDirectory(at: chunksDir, withIntermediateDirectories: true) + let chunkSizeBytes = chunkSizeMb * 1024 * 1024 + let totalChunks = Int((diskSize + UInt64(chunkSizeBytes) - 1) / UInt64(chunkSizeBytes)) + Logger.info("Splitting disk into \(totalChunks) chunks") + let fileHandle = try FileHandle(forReadingFrom: diskPath) + defer { try? fileHandle.close() } + var pushedDiskLayers: [(index: Int, layer: OCIManifestLayer)] = [] + var diskChunks: [(index: Int, path: URL, digest: String)] = [] + + try await withThrowingTaskGroup(of: (Int, OCIManifestLayer, URL, String).self) { group in + let maxConcurrency = 4 + for chunkIndex in 0..= maxConcurrency { if let res = try await group.next() { pushedDiskLayers.append((res.0, res.1)); diskChunks.append((res.0, res.2, res.3)) } } + group.addTask { [token, verbose, dryRun, organization, imageName] in + let chunkIndex = chunkIndex + let chunkPath = chunksDir.appendingPathComponent("chunk.\(chunkIndex)") + let metadataPath = chunksDir.appendingPathComponent("chunk_metadata.\(chunkIndex).json") + var layer: OCIManifestLayer? = nil + var finalCompressedDigest: String? = nil + if FileManager.default.fileExists(atPath: metadataPath.path), FileManager.default.fileExists(atPath: chunkPath.path) { + do { + let metadataData = try Data(contentsOf: metadataPath) + let metadata = try JSONDecoder().decode(ChunkMetadata.self, from: metadataData) + Logger.info("Resuming chunk \(chunkIndex + 1)/\(totalChunks) from cache") + finalCompressedDigest = metadata.compressedDigest + if !dryRun { if !(try await self.blobExists(repository: "\(organization)/\(imageName)", digest: metadata.compressedDigest, token: token)) { Logger.info("Uploading cached chunk \(chunkIndex + 1) blob"); _ = try await self.uploadBlobFromPath(repository: "\(organization)/\(imageName)", path: chunkPath, digest: metadata.compressedDigest, token: token) } else { Logger.info("Chunk \(chunkIndex + 1) blob already exists on registry") } } + layer = OCIManifestLayer(mediaType: "application/octet-stream+lz4", size: metadata.compressedSize, digest: metadata.compressedDigest, uncompressedSize: metadata.uncompressedSize, uncompressedContentDigest: metadata.uncompressedDigest) + } catch { Logger.info("Failed to load cached metadata/chunk for index \(chunkIndex): \(error). Re-processing."); finalCompressedDigest = nil; layer = nil } + } + if layer == nil { + Logger.info("Processing chunk \(chunkIndex + 1)/\(totalChunks)") + let localFileHandle = try FileHandle(forReadingFrom: diskPath) + defer { try? localFileHandle.close() } + try localFileHandle.seek(toOffset: UInt64(chunkIndex * chunkSizeBytes)) + let chunkData = try localFileHandle.read(upToCount: chunkSizeBytes) ?? Data() + let uncompressedSize = UInt64(chunkData.count) + let uncompressedDigest = "sha256:" + chunkData.sha256String() + let compressedData = try (chunkData as NSData).compressed(using: .lz4) as Data + let compressedSize = compressedData.count + let compressedDigest = "sha256:" + compressedData.sha256String() + try compressedData.write(to: chunkPath) + let metadata = ChunkMetadata(uncompressedDigest: uncompressedDigest, uncompressedSize: uncompressedSize, compressedDigest: compressedDigest, compressedSize: compressedSize) + let metadataData = try JSONEncoder().encode(metadata) + try metadataData.write(to: metadataPath) + finalCompressedDigest = compressedDigest + if !dryRun { if !(try await self.blobExists(repository: "\(organization)/\(imageName)", digest: compressedDigest, token: token)) { Logger.info("Uploading processed chunk \(chunkIndex + 1) blob"); _ = try await self.uploadBlobFromPath(repository: "\(organization)/\(imageName)", path: chunkPath, digest: compressedDigest, token: token) } else { Logger.info("Chunk \(chunkIndex + 1) blob already exists on registry (processed fresh)") } } + layer = OCIManifestLayer(mediaType: "application/octet-stream+lz4", size: compressedSize, digest: compressedDigest, uncompressedSize: uncompressedSize, uncompressedContentDigest: uncompressedDigest) + } + guard let finalLayer = layer, let finalDigest = finalCompressedDigest else { throw PushError.blobUploadFailed } + if verbose { Logger.info("Finished chunk \(chunkIndex + 1)/\(totalChunks)") } + return (chunkIndex, finalLayer, chunkPath, finalDigest) + } + } + for try await (index, layer, path, digest) in group { pushedDiskLayers.append((index, layer)); diskChunks.append((index, path, digest)) } + } + layers.append(contentsOf: pushedDiskLayers.sorted { $0.index < $1.index }.map { $0.layer }) + diskChunks.sort { $0.index < $1.index } + Logger.info("All disk chunks processed successfully") + + // --- Calculate Total Upload Size & Initialize Tracker --- + if !dryRun { + var totalUploadSizeBytes: Int64 = 0 + var totalUploadFiles: Int = 0 + // Add config size if it exists + if let size = configSize { + totalUploadSizeBytes += Int64(size) + totalUploadFiles += 1 + } + // Add nvram size if it exists + if let size = nvramSize { + totalUploadSizeBytes += Int64(size) + totalUploadFiles += 1 + } + // Add sizes of all compressed disk chunks + let allChunkSizes = diskChunks.compactMap { try? FileManager.default.attributesOfItem(atPath: $0.path.path)[.size] as? Int64 ?? 0 } + totalUploadSizeBytes += allChunkSizes.reduce(0, +) + totalUploadFiles += totalChunks // Use totalChunks calculated earlier + + if totalUploadSizeBytes > 0 { + Logger.info("Initializing upload progress: \(totalUploadFiles) files, total size: \(ByteCountFormatter.string(fromByteCount: totalUploadSizeBytes, countStyle: .file))") + await uploadProgress.setTotal(totalUploadSizeBytes, files: totalUploadFiles) + // Print initial progress bar + print("[░░░░░░░░░░░░░░░░░░░░] 0% (0/\(totalUploadFiles)) | Initializing upload... | ETA: calculating... ") + fflush(stdout) + } else { + Logger.info("No files marked for upload.") + } + } + // --- End Size Calculation & Init --- + + // Perform reassembly verification if requested in dry-run mode + if dryRun && reassemble { + Logger.info("=== REASSEMBLY MODE ===") + Logger.info("Reassembling chunks to verify integrity...") + let reassemblyDir = workDir.appendingPathComponent("reassembly") + try FileManager.default.createDirectory(at: reassemblyDir, withIntermediateDirectories: true) + let reassembledFile = reassemblyDir.appendingPathComponent("reassembled_disk.img") + Logger.info("Pre-allocating sparse file of \(ByteCountFormatter.string(fromByteCount: Int64(actualDiskSize), countStyle: .file))...") + if FileManager.default.fileExists(atPath: reassembledFile.path) { try FileManager.default.removeItem(at: reassembledFile) } + guard FileManager.default.createFile(atPath: reassembledFile.path, contents: nil) else { throw PushError.invalidURL } + let outputHandle = try FileHandle(forWritingTo: reassembledFile) + defer { try? outputHandle.close() } + try outputHandle.truncate(atOffset: actualDiskSize) + var currentOffset: UInt64 = 0 + for (index, cachedChunkPath, _) in diskChunks { + Logger.info("Decompressing & writing part \(index + 1)/\(diskChunks.count): \(cachedChunkPath.lastPathComponent) at offset \(currentOffset)...") + let decompressedBytesWritten = try decompressChunkAndWriteSparse(inputPath: cachedChunkPath.path, outputHandle: outputHandle, startOffset: currentOffset) + currentOffset += decompressedBytesWritten + } + Logger.info("Verifying reassembled file...") + let originalSize = diskSize + let originalDigest = calculateSHA256(filePath: diskPath.path) + let reassembledAttributes = try FileManager.default.attributesOfItem(atPath: reassembledFile.path) + let reassembledSize = reassembledAttributes[.size] as? UInt64 ?? 0 + let reassembledDigest = calculateSHA256(filePath: reassembledFile.path) + let originalActualSize = getActualDiskUsage(path: diskPath.path) + let reassembledActualSize = getActualDiskUsage(path: reassembledFile.path) + Logger.info("Results:") + Logger.info(" Original size: \(ByteCountFormatter.string(fromByteCount: Int64(originalSize), countStyle: .file)) (\(originalSize) bytes)") + Logger.info(" Reassembled size: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledSize), countStyle: .file)) (\(reassembledSize) bytes)") + Logger.info(" Original digest: \(originalDigest)") + Logger.info(" Reassembled digest: \(reassembledDigest)") + Logger.info(" Original: Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(originalSize), countStyle: .file)), Actual disk usage: \(ByteCountFormatter.string(fromByteCount: Int64(originalActualSize), countStyle: .file))") + Logger.info(" Reassembled: Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledSize), countStyle: .file)), Actual disk usage: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledActualSize), countStyle: .file))") + if originalDigest == reassembledDigest { Logger.info("✅ VERIFICATION SUCCESSFUL: Files are identical") } else { Logger.info("❌ VERIFICATION FAILED: Files differ"); if originalSize != reassembledSize { Logger.info(" Size mismatch: Original \(originalSize) bytes, Reassembled \(reassembledSize) bytes") }; Logger.info("Attempting to identify differences..."); Logger.info("NOTE: This might be a sparse file issue. The content may be identical, but sparse regions"); Logger.info(" may be handled differently between the original and reassembled files."); if originalActualSize > 0 { let diffPercentage = ((Double(reassembledActualSize) - Double(originalActualSize)) / Double(originalActualSize)) * 100.0; Logger.info(" Disk usage difference: \(String(format: "%.2f", diffPercentage))%"); if diffPercentage < -40 { Logger.info(" ⚠️ WARNING: Reassembled disk uses significantly less space (>40% difference)."); Logger.info(" This indicates sparse regions weren't properly preserved and may affect VM functionality.") } else if diffPercentage < -10 { Logger.info(" ⚠️ WARNING: Reassembled disk uses less space (10-40% difference)."); Logger.info(" Some sparse regions may not be properly preserved but VM might still function correctly.") } else if diffPercentage > 10 { Logger.info(" ⚠️ WARNING: Reassembled disk uses more space (>10% difference)."); Logger.info(" This is unusual and may indicate improper sparse file handling.") } else { Logger.info(" ✓ Disk usage difference is minimal (<10%). VM likely to function correctly.") } } } + Logger.info("Reassembled file is available at: \(reassembledFile.path)") + if originalDigest != reassembledDigest { Logger.info(""); Logger.info("===== ATTEMPTING RECOVERY ACTION ====="); Logger.info("Since verification failed, trying direct copy as a fallback method."); let fallbackFile = reassemblyDir.appendingPathComponent("fallback_disk.img"); Logger.info("Creating fallback disk image at: \(fallbackFile.path)"); let rsyncProcess = Process(); rsyncProcess.executableURL = URL(fileURLWithPath: "/usr/bin/rsync"); rsyncProcess.arguments = ["-aS", "--progress", diskPath.path, fallbackFile.path]; try rsyncProcess.run(); rsyncProcess.waitUntilExit(); if rsyncProcess.terminationStatus == 0 { Logger.info("Direct copy completed. You may want to try using this fallback disk image"); Logger.info("instead if the reassembled one has issues: \(fallbackFile.path)") } else { Logger.info("Direct copy failed. Attempting with cp -c command..."); let cpProcess = Process(); cpProcess.executableURL = URL(fileURLWithPath: "/bin/cp"); cpProcess.arguments = ["-c", diskPath.path, fallbackFile.path]; try cpProcess.run(); cpProcess.waitUntilExit(); if cpProcess.terminationStatus == 0 { Logger.info("Direct copy completed with cp -c. Fallback image available at: \(fallbackFile.path)") } else { Logger.info("All recovery attempts failed.") } } } + } + } + + // --- Manifest Creation & Push --- + let manifest = createManifest( + layers: layers, + configLayerIndex: layers.firstIndex(where: { $0.mediaType == "application/vnd.oci.image.config.v1+json" }), + uncompressedDiskSize: uncompressedDiskSize + ) + + // Push manifest only if not in dry-run mode + if !dryRun { + Logger.info("Pushing manifest(s)") // Updated log + // Serialize the manifest dictionary to Data first + let manifestData = try JSONSerialization.data(withJSONObject: manifest, options: [.prettyPrinted, .sortedKeys]) + + // Loop through tags to push the same manifest data + for tag in tags { + Logger.info("Pushing manifest for tag: \(tag)") + try await pushManifest( + repository: "\(self.organization)/\(imageName)", + tag: tag, // Use the current tag from the loop + manifest: manifestData, // Pass the serialized Data + token: token // Token should be in scope here now + ) + } + } + + // Print final upload summary if not dry run + if !dryRun { + let stats = await uploadProgress.getUploadStats() + Logger.info("\n\(stats.formattedSummary())") // Add newline for separation + } + + // Clean up cache directory only on successful non-dry-run push + } + + private func createManifest(layers: [OCIManifestLayer], configLayerIndex: Int?, uncompressedDiskSize: UInt64?) -> [String: Any] { + var manifest: [String: Any] = [ + "schemaVersion": 2, + "mediaType": "application/vnd.oci.image.manifest.v1+json", + "layers": layers.map { layer in + var layerDict: [String: Any] = [ + "mediaType": layer.mediaType, + "size": layer.size, + "digest": layer.digest + ] + + if let uncompressedSize = layer.uncompressedSize { + var annotations: [String: String] = [:] + annotations["org.trycua.lume.uncompressed-size"] = "\(uncompressedSize)" // Updated prefix + + if let digest = layer.uncompressedContentDigest { + annotations["org.trycua.lume.uncompressed-content-digest"] = digest // Updated prefix + } + + layerDict["annotations"] = annotations + } + + return layerDict + } + ] + + // Add config reference if available + if let configIndex = configLayerIndex { + let configLayer = layers[configIndex] + manifest["config"] = [ + "mediaType": configLayer.mediaType, + "size": configLayer.size, + "digest": configLayer.digest + ] + } + + // Add annotations + var annotations: [String: String] = [:] + annotations["org.trycua.lume.upload-time"] = ISO8601DateFormatter().string(from: Date()) // Updated prefix + + if let diskSize = uncompressedDiskSize { + annotations["org.trycua.lume.uncompressed-disk-size"] = "\(diskSize)" // Updated prefix + } + + manifest["annotations"] = annotations + + return manifest + } + + private func uploadBlobFromData(repository: String, data: Data, token: String) async throws -> String { + // Calculate digest + let digest = "sha256:" + data.sha256String() + + // Check if blob already exists + if try await blobExists(repository: repository, digest: digest, token: token) { + Logger.info("Blob already exists: \(digest)") + return digest + } + + // Initiate upload + let uploadURL = try await startBlobUpload(repository: repository, token: token) + + // Upload blob + try await uploadBlob(url: uploadURL, data: data, digest: digest, token: token) + + // Report progress + await uploadProgress.addProgress(Int64(data.count)) + + return digest + } + + private func uploadBlobFromPath(repository: String, path: URL, digest: String, token: String) async throws -> String { + // Check if blob already exists + if try await blobExists(repository: repository, digest: digest, token: token) { + Logger.info("Blob already exists: \(digest)") + return digest + } + + // Initiate upload + let uploadURL = try await startBlobUpload(repository: repository, token: token) + + // Load data from file + let data = try Data(contentsOf: path) + + // Upload blob + try await uploadBlob(url: uploadURL, data: data, digest: digest, token: token) + + // Report progress + await uploadProgress.addProgress(Int64(data.count)) + + return digest + } + + private func blobExists(repository: String, digest: String, token: String) async throws -> Bool { + let url = URL(string: "https://\(registry)/v2/\(repository)/blobs/\(digest)")! + var request = URLRequest(url: url) + request.httpMethod = "HEAD" + request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") + + let (_, response) = try await URLSession.shared.data(for: request) + + if let httpResponse = response as? HTTPURLResponse { + return httpResponse.statusCode == 200 + } + + return false + } + + private func startBlobUpload(repository: String, token: String) async throws -> URL { + let url = URL(string: "https://\(registry)/v2/\(repository)/blobs/uploads/")! + var request = URLRequest(url: url) + request.httpMethod = "POST" + request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") + request.setValue("0", forHTTPHeaderField: "Content-Length") // Explicitly set Content-Length to 0 for POST + + let (_, response) = try await URLSession.shared.data(for: request) + + guard let httpResponse = response as? HTTPURLResponse, + httpResponse.statusCode == 202, + let locationString = httpResponse.value(forHTTPHeaderField: "Location") else { + // Log response details on failure + let responseBody = String(data: (try? await URLSession.shared.data(for: request).0) ?? Data(), encoding: .utf8) ?? "(No Body)" + Logger.error("Failed to initiate blob upload. Status: \( (response as? HTTPURLResponse)?.statusCode ?? 0 ). Headers: \( (response as? HTTPURLResponse)?.allHeaderFields ?? [:] ). Body: \(responseBody)") + throw PushError.uploadInitiationFailed + } + + // Construct the base URL for the registry + guard let baseRegistryURL = URL(string: "https://\(registry)") else { + Logger.error("Failed to create base registry URL from: \(registry)") + throw PushError.invalidURL + } + + // Create the final upload URL, resolving the location against the base URL + guard let uploadURL = URL(string: locationString, relativeTo: baseRegistryURL) else { + Logger.error("Failed to create absolute upload URL from location: \(locationString) relative to base: \(baseRegistryURL.absoluteString)") + throw PushError.invalidURL + } + + Logger.info("Blob upload initiated. Upload URL: \(uploadURL.absoluteString)") + return uploadURL.absoluteURL // Ensure it's absolute + } + + private func uploadBlob(url: URL, data: Data, digest: String, token: String) async throws { + var components = URLComponents(url: url, resolvingAgainstBaseURL: true)! + + // Add digest parameter + var queryItems = components.queryItems ?? [] + queryItems.append(URLQueryItem(name: "digest", value: digest)) + components.queryItems = queryItems + + guard let uploadURL = components.url else { + throw PushError.invalidURL + } + + var request = URLRequest(url: uploadURL) + request.httpMethod = "PUT" + request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") + request.setValue("application/octet-stream", forHTTPHeaderField: "Content-Type") + request.setValue("\(data.count)", forHTTPHeaderField: "Content-Length") + request.httpBody = data + + let (_, response) = try await URLSession.shared.data(for: request) + + guard let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 201 else { + throw PushError.blobUploadFailed + } + } + + private func pushManifest(repository: String, tag: String, manifest: Data, token: String) async throws { + let url = URL(string: "https://\(registry)/v2/\(repository)/manifests/\(tag)")! + var request = URLRequest(url: url) + request.httpMethod = "PUT" + request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") + request.setValue("application/vnd.oci.image.manifest.v1+json", forHTTPHeaderField: "Content-Type") + request.httpBody = manifest + + let (_, response) = try await URLSession.shared.data(for: request) + + guard let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 201 else { + throw PushError.manifestPushFailed + } + } + + private func getCredentialsFromEnvironment() -> (String?, String?) { + let username = ProcessInfo.processInfo.environment["GITHUB_USERNAME"] ?? + ProcessInfo.processInfo.environment["GHCR_USERNAME"] + let password = ProcessInfo.processInfo.environment["GITHUB_TOKEN"] ?? + ProcessInfo.processInfo.environment["GHCR_TOKEN"] + return (username, password) + } + + // Add these helper methods for dry-run and reassemble implementation + + // NEW Helper function using Compression framework and sparse writing + private func decompressChunkAndWriteSparse(inputPath: String, outputHandle: FileHandle, startOffset: UInt64) throws -> UInt64 { + guard FileManager.default.fileExists(atPath: inputPath) else { + Logger.error("Compressed chunk not found at: \(inputPath)") + return 0 // Or throw an error + } + + let sourceData = try Data(contentsOf: URL(fileURLWithPath: inputPath), options: .alwaysMapped) + var currentWriteOffset = startOffset + var totalDecompressedBytes: UInt64 = 0 + var sourceReadOffset = 0 // Keep track of how much compressed data we've provided + + // Use the initializer with the readingFrom closure + let filter = try InputFilter(.decompress, using: .lz4) { (length: Int) -> Data? in + let bytesAvailable = sourceData.count - sourceReadOffset + if bytesAvailable == 0 { + return nil // No more data + } + let bytesToRead = min(length, bytesAvailable) + let chunk = sourceData.subdata(in: sourceReadOffset ..< sourceReadOffset + bytesToRead) + sourceReadOffset += bytesToRead + return chunk + } + + // Process the decompressed output by reading from the filter + while let decompressedData = try filter.readData(ofLength: Self.holeGranularityBytes) { + if decompressedData.isEmpty { break } // End of stream + + // Check if the chunk is all zeros + if decompressedData.count == Self.holeGranularityBytes && decompressedData == Self.zeroChunk { + // It's a zero chunk, just advance the offset, don't write + currentWriteOffset += UInt64(decompressedData.count) + } else { + // Not a zero chunk (or a partial chunk at the end), write it + try outputHandle.seek(toOffset: currentWriteOffset) + try outputHandle.write(contentsOf: decompressedData) + currentWriteOffset += UInt64(decompressedData.count) + } + totalDecompressedBytes += UInt64(decompressedData.count) + } + + // No explicit finalize needed when initialized with source data + + return totalDecompressedBytes + } + + // Helper function to calculate SHA256 hash of a file + private func calculateSHA256(filePath: String) -> String { + guard FileManager.default.fileExists(atPath: filePath) else { + return "file-not-found" + } + + let process = Process() + process.executableURL = URL(fileURLWithPath: "/usr/bin/shasum") + process.arguments = ["-a", "256", filePath] + + let outputPipe = Pipe() + process.standardOutput = outputPipe + + do { + try process.run() + process.waitUntilExit() + + if let data = try outputPipe.fileHandleForReading.readToEnd(), + let output = String(data: data, encoding: .utf8) { + return output.components(separatedBy: " ").first ?? "hash-calculation-failed" + } + } catch { + Logger.error("SHA256 calculation failed: \(error)") + } + + return "hash-calculation-failed" + } } + +actor UploadProgressTracker { + private var totalBytes: Int64 = 0 + private var uploadedBytes: Int64 = 0 // Renamed + private var progressLogger = ProgressLogger(threshold: 0.01) + private var totalFiles: Int = 0 // Keep track of total items + private var completedFiles: Int = 0 // Keep track of completed items + + // Upload speed tracking + private var startTime: Date = Date() + private var lastUpdateTime: Date = Date() + private var lastUpdateBytes: Int64 = 0 + private var speedSamples: [Double] = [] + private var peakSpeed: Double = 0 + private var totalElapsedTime: TimeInterval = 0 + + // Smoothing factor for speed calculation + private var speedSmoothing: Double = 0.3 + private var smoothedSpeed: Double = 0 + + func setTotal(_ total: Int64, files: Int) { + totalBytes = total + totalFiles = files + startTime = Date() + lastUpdateTime = startTime + uploadedBytes = 0 // Reset uploaded bytes + completedFiles = 0 // Reset completed files + smoothedSpeed = 0 + speedSamples = [] + peakSpeed = 0 + totalElapsedTime = 0 + } + + func addProgress(_ bytes: Int64) { + uploadedBytes += bytes + completedFiles += 1 // Increment completed files count + let now = Date() + let elapsed = now.timeIntervalSince(lastUpdateTime) + + // Show first progress update immediately, then throttle updates + let shouldUpdate = (uploadedBytes <= bytes) || (elapsed >= 0.5) || (completedFiles == totalFiles) + + if shouldUpdate && totalBytes > 0 { // Ensure totalBytes is set + let currentSpeed = Double(uploadedBytes - lastUpdateBytes) / max(elapsed, 0.001) + speedSamples.append(currentSpeed) + + // Cap samples array + if speedSamples.count > 20 { + speedSamples.removeFirst(speedSamples.count - 20) + } + + peakSpeed = max(peakSpeed, currentSpeed) + + // Apply exponential smoothing + if smoothedSpeed == 0 { smoothedSpeed = currentSpeed } + else { smoothedSpeed = speedSmoothing * currentSpeed + (1 - speedSmoothing) * smoothedSpeed } + + let recentAvgSpeed = calculateAverageSpeed() + let totalElapsed = now.timeIntervalSince(startTime) + let overallAvgSpeed = totalElapsed > 0 ? Double(uploadedBytes) / totalElapsed : 0 + + let progress = totalBytes > 0 ? Double(uploadedBytes) / Double(totalBytes) : 1.0 // Avoid division by zero + logSpeedProgress( + current: progress, + currentSpeed: currentSpeed, + averageSpeed: recentAvgSpeed, + smoothedSpeed: smoothedSpeed, + overallSpeed: overallAvgSpeed, + peakSpeed: peakSpeed, + context: "Uploading Image" // Changed context + ) + + lastUpdateTime = now + lastUpdateBytes = uploadedBytes + totalElapsedTime = totalElapsed + } + } + + private func calculateAverageSpeed() -> Double { + guard !speedSamples.isEmpty else { return 0 } + var totalWeight = 0.0 + var weightedSum = 0.0 + let samples = speedSamples.suffix(min(8, speedSamples.count)) + for (index, speed) in samples.enumerated() { + let weight = Double(index + 1) + weightedSum += speed * weight + totalWeight += weight + } + return totalWeight > 0 ? weightedSum / totalWeight : 0 + } + + // Use the UploadStats struct + func getUploadStats() -> UploadStats { + let avgSpeed = totalElapsedTime > 0 ? Double(uploadedBytes) / totalElapsedTime : 0 + return UploadStats( + totalBytes: totalBytes, + uploadedBytes: uploadedBytes, // Renamed + elapsedTime: totalElapsedTime, + averageSpeed: avgSpeed, + peakSpeed: peakSpeed + ) + } + + private func logSpeedProgress( + current: Double, + currentSpeed: Double, + averageSpeed: Double, + smoothedSpeed: Double, + overallSpeed: Double, + peakSpeed: Double, + context: String + ) { + let progressPercent = Int(current * 100) + // let currentSpeedStr = formatByteSpeed(currentSpeed) // Removed unused + let avgSpeedStr = formatByteSpeed(averageSpeed) + // let peakSpeedStr = formatByteSpeed(peakSpeed) // Removed unused + let remainingBytes = totalBytes - uploadedBytes + let speedForEta = max(smoothedSpeed, averageSpeed * 0.8) + let etaSeconds = speedForEta > 0 ? Double(remainingBytes) / speedForEta : 0 + let etaStr = formatTimeRemaining(etaSeconds) + let progressBar = createProgressBar(progress: current) + let fileProgress = "(\(completedFiles)/\(totalFiles))" // Add file count + + print( + "\r\(progressBar) \(progressPercent)% \(fileProgress) | Speed: \(avgSpeedStr) (Avg) | ETA: \(etaStr) ", // Simplified output + terminator: "") + fflush(stdout) + } + + // Helper methods (createProgressBar, formatByteSpeed, formatTimeRemaining) remain the same + private func createProgressBar(progress: Double, width: Int = 30) -> String { + let completedWidth = Int(progress * Double(width)) + let remainingWidth = width - completedWidth + let completed = String(repeating: "█", count: completedWidth) + let remaining = String(repeating: "░", count: remainingWidth) + return "[\(completed)\(remaining)]" + } + private func formatByteSpeed(_ bytesPerSecond: Double) -> String { + let units = ["B/s", "KB/s", "MB/s", "GB/s"] + var speed = bytesPerSecond + var unitIndex = 0 + while speed > 1024 && unitIndex < units.count - 1 { speed /= 1024; unitIndex += 1 } + return String(format: "%.1f %@", speed, units[unitIndex]) + } + private func formatTimeRemaining(_ seconds: Double) -> String { + if seconds.isNaN || seconds.isInfinite || seconds <= 0 { return "calculating..." } + let hours = Int(seconds) / 3600 + let minutes = (Int(seconds) % 3600) / 60 + let secs = Int(seconds) % 60 + if hours > 0 { return String(format: "%d:%02d:%02d", hours, minutes, secs) } + else { return String(format: "%d:%02d", minutes, secs) } + } +} + diff --git a/libs/lume/src/LumeController.swift b/libs/lume/src/LumeController.swift index 4cb8253d..1329f8c5 100644 --- a/libs/lume/src/LumeController.swift +++ b/libs/lume/src/LumeController.swift @@ -452,6 +452,77 @@ final class LumeController { } } + @MainActor + public func pushImage( + name: String, + imageName: String, + tags: [String], + registry: String, + organization: String, + storage: String? = nil, + chunkSizeMb: Int = 512, + verbose: Bool = false, + dryRun: Bool = false, + reassemble: Bool = false + ) async throws { + do { + Logger.info( + "Pushing VM to registry", + metadata: [ + "name": name, + "imageName": imageName, + "tags": "\(tags.joined(separator: ", "))", + "registry": registry, + "organization": organization, + "location": storage ?? "default", + "chunk_size": "\(chunkSizeMb)MB", + "dry_run": "\(dryRun)", + "reassemble": "\(reassemble)" + ]) + + try validatePushParameters( + name: name, + imageName: imageName, + tags: tags, + registry: registry, + organization: organization + ) + + // Find the actual location of the VM + let actualLocation = try self.validateVMExists(name, storage: storage) + + // Get the VM directory + let vmDir = try home.getVMDirectory(name, storage: actualLocation) + + // Use ImageContainerRegistry to push the VM + let imageContainerRegistry = ImageContainerRegistry( + registry: registry, organization: organization) + + try await imageContainerRegistry.push( + vmDirPath: vmDir.dir.path, + imageName: imageName, + tags: tags, + chunkSizeMb: chunkSizeMb, + verbose: verbose, + dryRun: dryRun, + reassemble: reassemble + ) + + Logger.info( + "VM pushed successfully", + metadata: [ + "name": name, + "imageName": imageName, + "tags": "\(tags.joined(separator: ", "))", + "registry": registry, + "organization": organization, + ]) + } catch { + Logger.error("Failed to push VM", metadata: ["error": error.localizedDescription]) + throw error + } + } + @MainActor public func pruneImages() async throws { Logger.info("Pruning cached images") @@ -755,4 +826,31 @@ final class LumeController { break } } + + private func validatePushParameters( + name: String, + imageName: String, + tags: [String], + registry: String, + organization: String + ) throws { + guard !name.isEmpty else { + throw ValidationError("VM name cannot be empty") + } + guard !imageName.isEmpty else { + throw ValidationError("Image name cannot be empty") + } + guard !tags.isEmpty else { + throw ValidationError("At least one tag must be provided.") + } + guard !registry.isEmpty else { + throw ValidationError("Registry cannot be empty") + } + guard !organization.isEmpty else { + throw ValidationError("Organization cannot be empty") + } + + // Verify VM exists (this will throw if not found) + _ = try self.validateVMExists(name) + } } diff --git a/libs/lume/src/Server/Handlers.swift b/libs/lume/src/Server/Handlers.swift index aac16e80..c968359a 100644 --- a/libs/lume/src/Server/Handlers.swift +++ b/libs/lume/src/Server/Handlers.swift @@ -288,6 +288,54 @@ extension Server { } } + func handlePush(_ body: Data?) async throws -> HTTPResponse { + guard let body = body, + let request = try? JSONDecoder().decode(PushRequest.self, from: body) + else { + return HTTPResponse( + statusCode: .badRequest, + headers: ["Content-Type": "application/json"], + body: try JSONEncoder().encode(APIError(message: "Invalid request body")) + ) + } + + // Trigger push asynchronously, return Accepted immediately + Task.detached { @MainActor @Sendable in + do { + let vmController = LumeController() + try await vmController.pushImage( + name: request.name, + imageName: request.imageName, + tags: request.tags, + registry: request.registry, + organization: request.organization, + storage: request.storage, + chunkSizeMb: request.chunkSizeMb, + verbose: false, // Verbose typically handled by server logs + dryRun: false, // Default API behavior is likely non-dry-run + reassemble: false // Default API behavior is likely non-reassemble + ) + Logger.info("Background push completed successfully for image: \(request.imageName):\(request.tags.joined(separator: ","))") + } catch { + Logger.error( + "Background push failed for image: \(request.imageName):\(request.tags.joined(separator: ","))", + metadata: ["error": error.localizedDescription] + ) + } + } + + return HTTPResponse( + statusCode: .accepted, + headers: ["Content-Type": "application/json"], + body: try JSONEncoder().encode([ + "message": AnyEncodable("Push initiated in background"), + "name": AnyEncodable(request.name), + "imageName": AnyEncodable(request.imageName), + "tags": AnyEncodable(request.tags), + ]) + ) + } + func handleGetImages(_ request: HTTPRequest) async throws -> HTTPResponse { let pathAndQuery = request.path.split(separator: "?", maxSplits: 1) let queryParams = diff --git a/libs/lume/src/Server/Requests.swift b/libs/lume/src/Server/Requests.swift index 19291072..da0bf681 100644 --- a/libs/lume/src/Server/Requests.swift +++ b/libs/lume/src/Server/Requests.swift @@ -102,3 +102,31 @@ struct CloneRequest: Codable { let sourceLocation: String? let destLocation: String? } + +struct PushRequest: Codable { + let name: String // Name of the local VM + let imageName: String // Base name for the image in the registry + let tags: [String] // List of tags to push + var registry: String // Registry URL + var organization: String // Organization/user in the registry + let storage: String? // Optional VM storage location + var chunkSizeMb: Int // Chunk size + // dryRun and reassemble are less common for API, default to false? + // verbose is usually handled by server logging + + enum CodingKeys: String, CodingKey { + case name, imageName, tags, registry, organization, storage, chunkSizeMb + } + + // Provide default values for optional fields during decoding + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + name = try container.decode(String.self, forKey: .name) + imageName = try container.decode(String.self, forKey: .imageName) + tags = try container.decode([String].self, forKey: .tags) + registry = try container.decodeIfPresent(String.self, forKey: .registry) ?? "ghcr.io" + organization = try container.decodeIfPresent(String.self, forKey: .organization) ?? "trycua" + storage = try container.decodeIfPresent(String.self, forKey: .storage) + chunkSizeMb = try container.decodeIfPresent(Int.self, forKey: .chunkSizeMb) ?? 512 + } +} diff --git a/libs/lume/src/Server/Responses.swift b/libs/lume/src/Server/Responses.swift index e6d3bfe9..12a5b4c3 100644 --- a/libs/lume/src/Server/Responses.swift +++ b/libs/lume/src/Server/Responses.swift @@ -4,6 +4,19 @@ struct APIError: Codable { let message: String } +// Helper struct to encode mixed-type dictionaries +struct AnyEncodable: Encodable { + private let value: Encodable + + init(_ value: Encodable) { + self.value = value + } + + func encode(to encoder: Encoder) throws { + try value.encode(to: encoder) + } +} + extension HTTPResponse { static func json(_ value: T) throws -> HTTPResponse { let data = try JSONEncoder().encode(value) diff --git a/libs/lume/src/Server/Server.swift b/libs/lume/src/Server/Server.swift index 4ed671c5..71db4a75 100644 --- a/libs/lume/src/Server/Server.swift +++ b/libs/lume/src/Server/Server.swift @@ -261,6 +261,12 @@ final class Server { } return try await self.handleSetDefaultLocation(name) }), + Route( + method: "POST", path: "/vms/push", + handler: { [weak self] request in + guard let self else { throw HTTPError.internalError } + return try await self.handlePush(request.body) + }), ] } diff --git a/libs/lume/src/Utils/CommandRegistry.swift b/libs/lume/src/Utils/CommandRegistry.swift index a7e2a7bc..4d128971 100644 --- a/libs/lume/src/Utils/CommandRegistry.swift +++ b/libs/lume/src/Utils/CommandRegistry.swift @@ -5,6 +5,7 @@ enum CommandRegistry { [ Create.self, Pull.self, + Push.self, Images.self, Clone.self, Get.self, From b10d310e03aa1ba894da441c845eec53e0bfde4b Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sat, 19 Apr 2025 23:04:40 -0700 Subject: [PATCH 04/28] Fix pull --- .../ImageContainerRegistry.swift | 482 +++++------------- 1 file changed, 140 insertions(+), 342 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 3ba7d543..8b668db7 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -102,6 +102,28 @@ struct ImageMetadata: Codable { let timestamp: Date } +// Actor to safely collect disk part information from concurrent tasks +actor DiskPartsCollector { + private var diskParts: [(Int, URL)] = [] + private var partCounter = 0 + + // Adds a part and returns its assigned sequential number + func addPart(url: URL) -> Int { + partCounter += 1 + let partNum = partCounter + diskParts.append((partNum, url)) + return partNum + } + + func getSortedParts() -> [(Int, URL)] { + return diskParts.sorted { $0.0 < $1.0 } + } + + func getTotalParts() -> Int { + return partCounter + } +} + actor ProgressTracker { private var totalBytes: Int64 = 0 private var downloadedBytes: Int64 = 0 @@ -716,8 +738,8 @@ class ImageContainerRegistry: @unchecked Sendable { "[░░░░░░░░░░░░░░░░░░░░] 0% | Initializing downloads... | ETA: calculating... ") fflush(stdout) - var diskParts: [(Int, URL)] = [] - var totalParts = 0 + // Instantiate the collector + let diskPartsCollector = DiskPartsCollector() // Adaptive concurrency based on system capabilities let memoryConstrained = determineIfMemoryConstrained() @@ -742,85 +764,97 @@ class ImageContainerRegistry: @unchecked Sendable { await counter.decrement() } - if let partInfo = extractPartInfo(from: layer.mediaType) { - let (partNum, total) = partInfo - totalParts = total + // Check both media type and safely unwrap part info + if layer.mediaType == "application/octet-stream+lz4" { + let size = layer.size + // Declare cachedLayer and digest here let cachedLayer = getCachedLayerPath( manifestId: manifestId, digest: layer.digest) let digest = layer.digest - let size = layer.size - // For memory-optimized mode - point directly to cache when possible - if memoryConstrained - && FileManager.default.fileExists(atPath: cachedLayer.path) + // For memory-constrained mode - point directly to cache when possible + if memoryConstrained // Line 777 + && FileManager.default.fileExists(atPath: cachedLayer.path) { - // Use the cached file directly - diskParts.append((partNum, cachedLayer)) + // Use the cached file *directly* without copying to temp + // Add the *cached* layer path to the collector + let partNum = await diskPartsCollector.addPart(url: cachedLayer) // Use the collector + Logger.info("Using cached layer directly for part #\(partNum): \(cachedLayer.lastPathComponent)") - // Still need to account for progress - group.addTask { [self] in - await counter.increment() - await downloadProgress.addProgress(Int64(size)) - await counter.decrement() - return Int64(size) - } - continue - } else { - let partURL = tempDownloadDir.appendingPathComponent( - "disk.img.part.\(partNum)") - diskParts.append((partNum, partURL)) - - group.addTask { [self] in - await counter.increment() + // Account for progress directly, no need for a separate task + await downloadProgress.addProgress(Int64(size)) + + // No task was added, so no need to increment/decrement counter here + + continue // Skip the download task group logic below + } else { + // Not memory constrained OR file not cached + // Add a task to handle copy/download and adding to collector + group.addTask { [self] in + await counter.increment() // Increment counter for the task + let finalPath: URL if FileManager.default.fileExists(atPath: cachedLayer.path) { - try FileManager.default.copyItem(at: cachedLayer, to: partURL) - await downloadProgress.addProgress(Int64(size)) - } else { - // Check if this layer is already being downloaded and we're not skipping cache - if isDownloading(digest) { - try await waitForExistingDownload( - digest, cachedLayer: cachedLayer) - if FileManager.default.fileExists(atPath: cachedLayer.path) - { - try FileManager.default.copyItem( - at: cachedLayer, to: partURL) - await downloadProgress.addProgress(Int64(size)) - return Int64(size) - } - } + // If cached, copy to temp and use temp path for reassembly later + let tempPartURL = tempDownloadDir.appendingPathComponent("disk.img.part.\(UUID().uuidString)") // Unique temp path + try FileManager.default.copyItem(at: cachedLayer, to: tempPartURL) + await downloadProgress.addProgress(Int64(size)) // Update progress after copy + finalPath = tempPartURL + } else { + // If not cached, download to temp path + let tempPartURL = tempDownloadDir.appendingPathComponent("disk.img.part.\(UUID().uuidString)") // Unique temp path + + // Check if this layer is already being downloaded + if isDownloading(digest) { + try await waitForExistingDownload(digest, cachedLayer: cachedLayer) + // If it finished downloading while waiting, copy from cache to temp + if FileManager.default.fileExists(atPath: cachedLayer.path) { + try FileManager.default.copyItem(at: cachedLayer, to: tempPartURL) + await downloadProgress.addProgress(Int64(size)) // Update progress + finalPath = tempPartURL + } else { + // If still not available after waiting (should be rare), proceed to download + markDownloadStarted(digest) + try await self.downloadLayer( + repository: "\(self.organization)/\(imageName)", + digest: digest, + mediaType: layer.mediaType, // Use correct mediaType + token: token, + to: tempPartURL, + maxRetries: 5, + progress: downloadProgress, // Progress updated inside downloadLayer + manifestId: manifestId + ) + // downloadLayer handles caching and markDownloadComplete + finalPath = tempPartURL + } + } else { + // Start new download + markDownloadStarted(digest) + try await self.downloadLayer( + repository: "\(self.organization)/\(imageName)", + digest: digest, + mediaType: layer.mediaType, // Use correct mediaType + token: token, + to: tempPartURL, + maxRetries: 5, + progress: downloadProgress, // Progress updated inside downloadLayer + manifestId: manifestId + ) + // downloadLayer handles caching and markDownloadComplete + finalPath = tempPartURL + } + } + + // Add the final determined path (temp path) to the collector + let partNum = await diskPartsCollector.addPart(url: finalPath) + Logger.info("Assigned part #\(partNum) for path: \(finalPath.lastPathComponent)") - // Start new download - markDownloadStarted(digest) - - try await self.downloadLayer( - repository: "\(self.organization)/\(imageName)", - digest: digest, - mediaType: layer.mediaType, - token: token, - to: partURL, - maxRetries: 5, - progress: downloadProgress, - manifestId: manifestId - ) - - // Cache the downloaded layer if caching is enabled - if cachingEnabled { - if FileManager.default.fileExists(atPath: cachedLayer.path) - { - try FileManager.default.removeItem(at: cachedLayer) - } - try FileManager.default.copyItem( - at: partURL, to: cachedLayer) - } - markDownloadComplete(digest) - } - - await counter.decrement() - return Int64(size) - } - continue + await counter.decrement() // Decrement counter + return Int64(size) + } + continue // Ensure we move to the next layer after adding task } } else { let mediaType = layer.mediaType @@ -894,7 +928,14 @@ class ImageContainerRegistry: @unchecked Sendable { // Wait for remaining tasks for try await _ in group {} - } + } // End TaskGroup + + // --- Safely retrieve parts AFTER TaskGroup --- + let diskParts = await diskPartsCollector.getSortedParts() + let totalParts = await diskPartsCollector.getTotalParts() + Logger.info("Finished processing layers. Found \(totalParts) disk parts.") + // --- End retrieving parts --- + Logger.info("") // New line after progress // Display download statistics @@ -1464,23 +1505,22 @@ class ImageContainerRegistry: @unchecked Sendable { { Logger.info("Copying from cache...") - var diskPartSources: [(Int, URL)] = [] - var totalParts = 0 + // Instantiate collector + let diskPartsCollector = DiskPartsCollector() // First identify disk parts and non-disk files for layer in manifest.layers { let cachedLayer = getCachedLayerPath(manifestId: manifestId, digest: layer.digest) - if let partInfo = extractPartInfo(from: layer.mediaType) { - let (partNum, total) = partInfo - totalParts = total - // Just store the reference to source instead of copying - diskPartSources.append((partNum, cachedLayer)) - } else { + // Check if it's a disk chunk layer based on media type + if layer.mediaType == "application/octet-stream+lz4" { + // It's a disk chunk - Add to collector + _ = await diskPartsCollector.addPart(url: cachedLayer) // Ignore return value + } + else { + // Handle non-disk layers (config, nvram) let fileName: String switch layer.mediaType { - case "application/vnd.oci.image.layer.v1.tar", "application/octet-stream+gzip": - fileName = "disk.img" case "application/vnd.oci.image.config.v1+json": fileName = "config.json" case "application/octet-stream": @@ -1496,8 +1536,14 @@ class ImageContainerRegistry: @unchecked Sendable { } } + // --- Safely retrieve parts AFTER loop --- + let diskPartSources = await diskPartsCollector.getSortedParts() + let totalParts = await diskPartsCollector.getTotalParts() + Logger.info("Found \(totalParts) disk parts in cache.") + // --- End retrieving parts --- + // Reassemble disk parts if needed - if !diskPartSources.isEmpty { + if !diskPartSources.isEmpty { // Use the retrieved array // Get the uncompressed size from cached config let configDigest = manifest.config?.digest let cachedConfigPath = @@ -1588,277 +1634,29 @@ class ImageContainerRegistry: @unchecked Sendable { for partNum in 1...totalParts { // Find the original layer info for this part number guard - let layer = manifest.layers.first(where: { layer in - if let info = extractPartInfo(from: layer.mediaType) { - return info.partNum == partNum - } - return false - }), + // Find layer by index approximated during collection, not media type parts let (_, sourceURL) = diskPartSources.first(where: { $0.0 == partNum }) else { throw PullError.missingPart(partNum) } - let layerMediaType = layer.mediaType // Extract mediaType here Logger.info( - "Processing part \(partNum) of \(totalParts) from cache: \(sourceURL.lastPathComponent)" + "Decompressing part \(partNum) of \(totalParts) from cache: \(sourceURL.lastPathComponent) at offset \(currentOffset)..." ) - let inputHandle = try FileHandle(forReadingFrom: sourceURL) - defer { try? inputHandle.close() } - - // Seek to the correct offset in the output sparse file - try outputHandle.seek(toOffset: currentOffset) - - if let decompressCmd = getDecompressionCommand(for: layerMediaType) { // Use extracted mediaType - Logger.info("Decompressing part \(partNum) with media type: \(layerMediaType)") - - // Handle Apple Archive format - let toolPath = String(decompressCmd.dropFirst("apple_archive:".count)) - let tempOutputPath = FileManager.default.temporaryDirectory - .appendingPathComponent(UUID().uuidString) - - // Check input file size before decompression - let inputFileSize = - (try? FileManager.default.attributesOfItem(atPath: sourceURL.path)[.size] - as? UInt64) ?? 0 - Logger.info( - "Part \(partNum) input size: \(ByteCountFormatter.string(fromByteCount: Int64(inputFileSize), countStyle: .file))" - ) - - // Create a process that decompresses to a temporary file - let process = Process() - process.executableURL = URL(fileURLWithPath: toolPath) - process.arguments = [ - "extract", "-i", sourceURL.path, "-o", tempOutputPath.path, - ] - - // Add error output capture - let errorPipe = Pipe() - process.standardError = errorPipe - - Logger.info( - "Decompressing Apple Archive format with: \(toolPath) \(process.arguments?.joined(separator: " ") ?? "")" - ) - try process.run() - process.waitUntilExit() - - // Check error output if any - let errorData = errorPipe.fileHandleForReading.readDataToEndOfFile() - if !errorData.isEmpty, - let errorString = String(data: errorData, encoding: .utf8) - { - Logger.error("Decompression error output: \(errorString)") - } - - if process.terminationStatus != 0 { - Logger.error( - "Apple Archive decompression failed with status: \(process.terminationStatus), falling back to direct copy" - ) - // Fall back to direct copying (uncompressed) - Logger.info("Copying part \(partNum) directly without decompression...") - try outputHandle.seek(toOffset: currentOffset) - - let inputHandle = try FileHandle(forReadingFrom: sourceURL) - defer { try? inputHandle.close() } - - var bytesWritten: UInt64 = 0 - let chunkSize = 1024 * 1024 // 1MB chunks - var chunkCount = 0 - - while true { - let data = autoreleasepool { - try! inputHandle.read(upToCount: chunkSize) ?? Data() - } - if data.isEmpty { break } - - try outputHandle.write(contentsOf: data) - bytesWritten += UInt64(data.count) - chunkCount += 1 - - // Update progress - let totalProgress = - Double(currentOffset + bytesWritten) / Double(expectedTotalSize) - let progressBar = createProgressBar(progress: totalProgress, width: 30) - let progressPercent = Int(totalProgress * 100) - let currentSpeed = - ByteCountFormatter.string( - fromByteCount: Int64(Double(bytesWritten) / 0.5), - countStyle: .file) + "/s" - - print( - "\r\(progressBar) \(progressPercent)% | Speed: \(currentSpeed) | Part \(partNum) | \(ByteCountFormatter.string(fromByteCount: Int64(currentOffset + bytesWritten), countStyle: .file)) ", - terminator: "") - fflush(stdout) - - // Also log to the progress logger for consistency - reassemblyProgressLogger.logProgress( - current: totalProgress, - context: "Direct copying") - } - - Logger.info( - "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" - ) - currentOffset += bytesWritten - continue - } - - // Check if the output file exists and has content - let outputExists = FileManager.default.fileExists(atPath: tempOutputPath.path) - let outputFileSize = - outputExists - ? ((try? FileManager.default.attributesOfItem(atPath: tempOutputPath.path)[ - .size] as? UInt64) ?? 0) : 0 - Logger.info( - "Part \(partNum) - Decompressed output exists: \(outputExists), size: \(ByteCountFormatter.string(fromByteCount: Int64(outputFileSize), countStyle: .file))" - ) - - // If decompression produced an empty file, fall back to direct copy - if outputFileSize == 0 { - Logger.info( - "Decompression resulted in empty file, falling back to direct copy for part \(partNum)" - ) - try? FileManager.default.removeItem(at: tempOutputPath) - - // Fall back to direct copying (uncompressed) - Logger.info("Copying part \(partNum) directly without decompression...") - try outputHandle.seek(toOffset: currentOffset) - - let inputHandle = try FileHandle(forReadingFrom: sourceURL) - defer { try? inputHandle.close() } - - var bytesWritten: UInt64 = 0 - let chunkSize = 1024 * 1024 // 1MB chunks - var chunkCount = 0 - - while true { - let data = autoreleasepool { - try! inputHandle.read(upToCount: chunkSize) ?? Data() - } - if data.isEmpty { break } - - try outputHandle.write(contentsOf: data) - bytesWritten += UInt64(data.count) - chunkCount += 1 - - // Update progress - let totalProgress = - Double(currentOffset + bytesWritten) / Double(expectedTotalSize) - let progressBar = createProgressBar(progress: totalProgress, width: 30) - let progressPercent = Int(totalProgress * 100) - let currentSpeed = - ByteCountFormatter.string( - fromByteCount: Int64(Double(bytesWritten) / 0.5), - countStyle: .file) + "/s" - - print( - "\r\(progressBar) \(progressPercent)% | Speed: \(currentSpeed) | Part \(partNum) | \(ByteCountFormatter.string(fromByteCount: Int64(currentOffset + bytesWritten), countStyle: .file)) ", - terminator: "") - fflush(stdout) - - // Also log to the progress logger for consistency - reassemblyProgressLogger.logProgress( - current: totalProgress, - context: "Direct copying") - } - - Logger.info( - "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" - ) - currentOffset += bytesWritten - continue - } - - // Read the decompressed file and write to our output - let tempInputHandle = try FileHandle(forReadingFrom: tempOutputPath) - defer { - try? tempInputHandle.close() - try? FileManager.default.removeItem(at: tempOutputPath) - } - - // Read decompressed data in chunks and write to sparse file - var partDecompressedSize: UInt64 = 0 - let chunkSize = 1024 * 1024 // 1MB chunks - var chunkCount = 0 - - while true { - let data = autoreleasepool { // Help manage memory with large files - try! tempInputHandle.read(upToCount: chunkSize) ?? Data() - } - if data.isEmpty { break } // End of stream - - try outputHandle.write(contentsOf: data) - partDecompressedSize += UInt64(data.count) - chunkCount += 1 - - // Update progress based on decompressed size written - let totalProgress = - Double(currentOffset + partDecompressedSize) - / Double(expectedTotalSize) - reassemblyProgressLogger.logProgress( - current: totalProgress, - context: "Reassembling") - } - - Logger.info( - "Part \(partNum) - Wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(partDecompressedSize), countStyle: .file))" - ) - currentOffset += partDecompressedSize // Advance offset by decompressed size - } else { - // No decompression command available, try direct copy - Logger.info( - "Copying part \(partNum) directly..." - ) - try outputHandle.seek(toOffset: currentOffset) - - let inputHandle = try FileHandle(forReadingFrom: sourceURL) - defer { try? inputHandle.close() } - - // Get part size - let partSize = - (try? FileManager.default.attributesOfItem(atPath: sourceURL.path)[.size] - as? UInt64) ?? 0 - Logger.info( - "Direct copy of part \(partNum) with size: \(ByteCountFormatter.string(fromByteCount: Int64(partSize), countStyle: .file))" - ) - - var bytesWritten: UInt64 = 0 - let chunkSize = 1024 * 1024 // 1MB chunks - var chunkCount = 0 - - while true { - let data = autoreleasepool { - try! inputHandle.read(upToCount: chunkSize) ?? Data() - } - if data.isEmpty { break } - - try outputHandle.write(contentsOf: data) - bytesWritten += UInt64(data.count) - chunkCount += 1 - - // Update progress - let totalProgress = - Double(currentOffset + bytesWritten) - / Double(expectedTotalSize) - reassemblyProgressLogger.logProgress( - current: totalProgress, - context: "Direct copying") - } - - Logger.info( - "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" - ) - currentOffset += bytesWritten - } - - // Ensure data is written before processing next part (optional but safer) - try outputHandle.synchronize() + // Use the correct sparse decompression function + let decompressedBytesWritten = try decompressChunkAndWriteSparse( + inputPath: sourceURL.path, + outputHandle: outputHandle, + startOffset: currentOffset + ) + currentOffset += decompressedBytesWritten + + try outputHandle.synchronize() // Optional: Synchronize after each chunk } // Finalize progress, close handle (done by defer) reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") - Logger.info("") // Newline // Ensure output handle is closed before post-processing try outputHandle.close() From b9f2a73941420a36bd753b2351c0d257d035b5d6 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sat, 19 Apr 2025 23:33:02 -0700 Subject: [PATCH 05/28] Add sequential ordering --- .../ImageContainerRegistry.swift | 484 ++++++++++-------- libs/lume/src/Errors/Errors.swift | 3 + 2 files changed, 262 insertions(+), 225 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 8b668db7..3c90b40b 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -26,6 +26,18 @@ enum PushError: Error { case missingToken case invalidURL case lz4NotFound // Added error case + case invalidMediaType // Added during part refactoring + case missingUncompressedSizeAnnotation // Added for sparse file handling + case fileCreationFailed(String) // Added for sparse file handling + case reassemblySetupFailed(path: String, underlyingError: Error?) // Added for sparse file handling + case missingPart(Int) // Added for sparse file handling + case layerDownloadFailed(String) // Added for download retries + case manifestFetchFailed // Added for manifest fetching +} + +// Define a specific error type for when no underlying error exists +struct NoSpecificUnderlyingError: Error, CustomStringConvertible { + var description: String { "No specific underlying error was provided." } } struct ChunkMetadata: Codable { @@ -104,21 +116,25 @@ struct ImageMetadata: Codable { // Actor to safely collect disk part information from concurrent tasks actor DiskPartsCollector { + // Store tuples of (sequentialPartNum, url) private var diskParts: [(Int, URL)] = [] - private var partCounter = 0 + // Restore internal counter + private var partCounter = 0 // Adds a part and returns its assigned sequential number func addPart(url: URL) -> Int { - partCounter += 1 + partCounter += 1 // Use counter logic let partNum = partCounter - diskParts.append((partNum, url)) - return partNum + diskParts.append((partNum, url)) // Store sequential number + return partNum // Return assigned sequential number } + // Sort by the sequential part number (index 0 of tuple) func getSortedParts() -> [(Int, URL)] { return diskParts.sorted { $0.0 < $1.0 } } + // Restore getTotalParts func getTotalParts() -> Int { return partCounter } @@ -752,6 +768,9 @@ class ImageContainerRegistry: @unchecked Sendable { ) let counter = TaskCounter() + // Remove totalDiskParts + // var totalDiskParts: Int? = nil + var lz4LayerCount = 0 // Count lz4 layers found try await withThrowingTaskGroup(of: Int64.self) { group in for layer in manifest.layers { @@ -764,176 +783,151 @@ class ImageContainerRegistry: @unchecked Sendable { await counter.decrement() } - // Check both media type and safely unwrap part info + // Identify disk parts by media type if layer.mediaType == "application/octet-stream+lz4" { - let size = layer.size - - // Declare cachedLayer and digest here + // --- Handle LZ4 Disk Part Layer --- + lz4LayerCount += 1 // Increment count + let currentPartNum = lz4LayerCount // Use the current count as the logical number for logging + let cachedLayer = getCachedLayerPath( manifestId: manifestId, digest: layer.digest) let digest = layer.digest + let size = layer.size - // For memory-constrained mode - point directly to cache when possible - if memoryConstrained // Line 777 - && FileManager.default.fileExists(atPath: cachedLayer.path) - { - // Use the cached file *directly* without copying to temp - // Add the *cached* layer path to the collector - let partNum = await diskPartsCollector.addPart(url: cachedLayer) // Use the collector - Logger.info("Using cached layer directly for part #\(partNum): \(cachedLayer.lastPathComponent)") - - // Account for progress directly, no need for a separate task - await downloadProgress.addProgress(Int64(size)) - - // No task was added, so no need to increment/decrement counter here - - continue // Skip the download task group logic below - } else { - // Not memory constrained OR file not cached - // Add a task to handle copy/download and adding to collector - group.addTask { [self] in - await counter.increment() // Increment counter for the task - - let finalPath: URL + if memoryConstrained && FileManager.default.fileExists(atPath: cachedLayer.path) { + // Add to collector, get sequential number assigned by collector + let collectorPartNum = await diskPartsCollector.addPart(url: cachedLayer) + // Log using the sequential number from collector for clarity if needed, or the lz4LayerCount + Logger.info("Using cached lz4 layer (part \(currentPartNum)) directly: \(cachedLayer.lastPathComponent) -> Collector #\(collectorPartNum)") + await downloadProgress.addProgress(Int64(size)) + continue + } else { + // Download/Copy Path (Task Group) + group.addTask { [self] in + await counter.increment() + let finalPath: URL if FileManager.default.fileExists(atPath: cachedLayer.path) { - // If cached, copy to temp and use temp path for reassembly later - let tempPartURL = tempDownloadDir.appendingPathComponent("disk.img.part.\(UUID().uuidString)") // Unique temp path - try FileManager.default.copyItem(at: cachedLayer, to: tempPartURL) - await downloadProgress.addProgress(Int64(size)) // Update progress after copy - finalPath = tempPartURL - } else { - // If not cached, download to temp path - let tempPartURL = tempDownloadDir.appendingPathComponent("disk.img.part.\(UUID().uuidString)") // Unique temp path - - // Check if this layer is already being downloaded - if isDownloading(digest) { - try await waitForExistingDownload(digest, cachedLayer: cachedLayer) - // If it finished downloading while waiting, copy from cache to temp - if FileManager.default.fileExists(atPath: cachedLayer.path) { - try FileManager.default.copyItem(at: cachedLayer, to: tempPartURL) - await downloadProgress.addProgress(Int64(size)) // Update progress - finalPath = tempPartURL - } else { - // If still not available after waiting (should be rare), proceed to download - markDownloadStarted(digest) - try await self.downloadLayer( - repository: "\(self.organization)/\(imageName)", - digest: digest, - mediaType: layer.mediaType, // Use correct mediaType - token: token, - to: tempPartURL, - maxRetries: 5, - progress: downloadProgress, // Progress updated inside downloadLayer - manifestId: manifestId - ) - // downloadLayer handles caching and markDownloadComplete - finalPath = tempPartURL - } - } else { - // Start new download - markDownloadStarted(digest) - try await self.downloadLayer( - repository: "\(self.organization)/\(imageName)", - digest: digest, - mediaType: layer.mediaType, // Use correct mediaType - token: token, - to: tempPartURL, - maxRetries: 5, - progress: downloadProgress, // Progress updated inside downloadLayer - manifestId: manifestId - ) - // downloadLayer handles caching and markDownloadComplete - finalPath = tempPartURL - } - } - - // Add the final determined path (temp path) to the collector - let partNum = await diskPartsCollector.addPart(url: finalPath) - Logger.info("Assigned part #\(partNum) for path: \(finalPath.lastPathComponent)") - - await counter.decrement() // Decrement counter - return Int64(size) - } - continue // Ensure we move to the next layer after adding task + let tempPartURL = tempDownloadDir.appendingPathComponent("disk.img.part.\(UUID().uuidString)") + try FileManager.default.copyItem(at: cachedLayer, to: tempPartURL) + await downloadProgress.addProgress(Int64(size)) + finalPath = tempPartURL + } else { + let tempPartURL = tempDownloadDir.appendingPathComponent("disk.img.part.\(UUID().uuidString)") + if isDownloading(digest) { + try await waitForExistingDownload(digest, cachedLayer: cachedLayer) + if FileManager.default.fileExists(atPath: cachedLayer.path) { + try FileManager.default.copyItem(at: cachedLayer, to: tempPartURL) + await downloadProgress.addProgress(Int64(size)) + finalPath = tempPartURL + } else { + markDownloadStarted(digest) + try await self.downloadLayer( + repository: "\(self.organization)/\(imageName)", + digest: digest, mediaType: layer.mediaType, token: token, + to: tempPartURL, maxRetries: 5, + progress: downloadProgress, manifestId: manifestId + ) + finalPath = tempPartURL + } + } else { + markDownloadStarted(digest) + try await self.downloadLayer( + repository: "\(self.organization)/\(imageName)", + digest: digest, mediaType: layer.mediaType, token: token, + to: tempPartURL, maxRetries: 5, + progress: downloadProgress, manifestId: manifestId + ) + finalPath = tempPartURL + } + } + // Add to collector, get sequential number assigned by collector + let collectorPartNum = await diskPartsCollector.addPart(url: finalPath) + // Log using the sequential number from collector + Logger.info("Assigned path for lz4 layer (part \(currentPartNum)): \(finalPath.lastPathComponent) -> Collector #\(collectorPartNum)") + await counter.decrement() + return Int64(size) + } } } else { + // --- Handle Non-Disk-Part Layer --- let mediaType = layer.mediaType let digest = layer.digest let size = layer.size + // Determine output path based on media type let outputURL: URL switch mediaType { case "application/vnd.oci.image.layer.v1.tar", - "application/octet-stream+gzip": - outputURL = tempDownloadDir.appendingPathComponent("disk.img") + "application/octet-stream+gzip": // Might be compressed disk.img single file? + outputURL = tempDownloadDir.appendingPathComponent("disk.img") case "application/vnd.oci.image.config.v1+json": outputURL = tempDownloadDir.appendingPathComponent("config.json") - case "application/octet-stream": - outputURL = tempDownloadDir.appendingPathComponent("nvram.bin") + case "application/octet-stream": // Could be nvram or uncompressed single disk.img + // Heuristic: If a config.json already exists or is expected, assume this is nvram. + // This might need refinement if single disk images use octet-stream. + if manifest.config != nil { + outputURL = tempDownloadDir.appendingPathComponent("nvram.bin") + } else { + // Assume it's a single-file disk image if no config layer is present + outputURL = tempDownloadDir.appendingPathComponent("disk.img") + } default: - continue + Logger.info("Skipping unsupported layer media type: \(mediaType)") + continue // Skip to the next layer } + // Add task to download/copy the non-disk-part layer group.addTask { [self] in await counter.increment() - - let cachedLayer = getCachedLayerPath( - manifestId: manifestId, digest: digest) + let cachedLayer = getCachedLayerPath(manifestId: manifestId, digest: digest) if FileManager.default.fileExists(atPath: cachedLayer.path) { try FileManager.default.copyItem(at: cachedLayer, to: outputURL) await downloadProgress.addProgress(Int64(size)) } else { - // Check if this layer is already being downloaded and we're not skipping cache if isDownloading(digest) { - try await waitForExistingDownload( - digest, cachedLayer: cachedLayer) + try await waitForExistingDownload(digest, cachedLayer: cachedLayer) if FileManager.default.fileExists(atPath: cachedLayer.path) { - try FileManager.default.copyItem( - at: cachedLayer, to: outputURL) + try FileManager.default.copyItem(at: cachedLayer, to: outputURL) await downloadProgress.addProgress(Int64(size)) + await counter.decrement() // Decrement before returning return Int64(size) } } - // Start new download markDownloadStarted(digest) - try await self.downloadLayer( repository: "\(self.organization)/\(imageName)", - digest: digest, - mediaType: mediaType, - token: token, - to: outputURL, - maxRetries: 5, - progress: downloadProgress, - manifestId: manifestId + digest: digest, mediaType: mediaType, token: token, + to: outputURL, maxRetries: 5, + progress: downloadProgress, manifestId: manifestId ) - - // Cache the downloaded layer if caching is enabled - if cachingEnabled { - if FileManager.default.fileExists(atPath: cachedLayer.path) { - try FileManager.default.removeItem(at: cachedLayer) - } - try FileManager.default.copyItem(at: outputURL, to: cachedLayer) - } - markDownloadComplete(digest) + // Note: downloadLayer handles caching and marking download complete } - await counter.decrement() return Int64(size) } } - } + } // End for layer in manifest.layers // Wait for remaining tasks for try await _ in group {} } // End TaskGroup // --- Safely retrieve parts AFTER TaskGroup --- - let diskParts = await diskPartsCollector.getSortedParts() - let totalParts = await diskPartsCollector.getTotalParts() - Logger.info("Finished processing layers. Found \(totalParts) disk parts.") + let diskParts = await diskPartsCollector.getSortedParts() // Already sorted by logicalPartNum + // Check if totalDiskParts was set (meaning at least one lz4 layer was processed) + // Get total parts from the collector + let totalPartsFromCollector = await diskPartsCollector.getTotalParts() + // Change guard to if for logging only, as the later if condition handles the logic + if totalPartsFromCollector == 0 { + // If totalParts is 0, it means no layers matched the lz4 format. + Logger.info("No lz4 disk part layers found. Assuming single-part image or non-lz4 parts.") + // Reassembly logic below will be skipped if diskParts is empty. + // Explicitly set totalParts to 0 to prevent entering the reassembly block if diskParts might somehow be non-empty but totalParts was 0 + // This ensures consistency if the collector logic changes. + } + Logger.info("Finished processing layers. Found \(diskParts.count) disk parts to reassemble (Total Lz4 Layers: \(totalPartsFromCollector)).") // --- End retrieving parts --- Logger.info("") // New line after progress @@ -974,8 +968,9 @@ class ImageContainerRegistry: @unchecked Sendable { } // Handle disk parts if present - if !diskParts.isEmpty { - Logger.info("Reassembling disk image using sparse file technique...") + if !diskParts.isEmpty && totalPartsFromCollector > 0 { + // Use totalPartsFromCollector here + Logger.info("Reassembling \(totalPartsFromCollector) disk image parts using sparse file technique...") let outputURL = tempVMDir.appendingPathComponent("disk.img") // Wrap setup in do-catch for better error reporting @@ -1008,8 +1003,9 @@ class ImageContainerRegistry: @unchecked Sendable { } // Calculate expected size from the manifest layers (sum of compressed parts - for logging only now) + // Filter based on the correct media type now let expectedCompressedTotalSize = UInt64( - manifest.layers.filter { extractPartInfo(from: $0.mediaType) != nil }.reduce(0) + manifest.layers.filter { $0.mediaType == "application/octet-stream+lz4" }.reduce(0) { $0 + $1.size } ) Logger.info( @@ -1067,23 +1063,39 @@ class ImageContainerRegistry: @unchecked Sendable { var reassemblyProgressLogger = ProgressLogger(threshold: 0.05) var currentOffset: UInt64 = 0 // Track position in the final *decompressed* file - for partNum in 1...totalParts { + // Iterate using the reliable totalParts count from media type + // Use totalPartsFromCollector for the loop range + for partNum in 1...totalPartsFromCollector { // Find the original layer info for this part number - guard - let layer = manifest.layers.first(where: { layer in - if let info = extractPartInfo(from: layer.mediaType) { - return info.partNum == partNum - } - return false - }), - let (_, partURL) = diskParts.first(where: { $0.0 == partNum }) - else { - throw PullError.missingPart(partNum) + // Find the part URL from our collected parts using the logical partNum + guard let partInfo = diskParts.first(where: { $0.0 == partNum }) else { + // This error should now be less likely, but good to keep + Logger.error("Missing required part number \(partNum) in collected parts during reassembly.") + throw PullError.missingPart(partNum) } - let layerMediaType = layer.mediaType // Extract mediaType here + let partURL = partInfo.1 // Get the URL from the tuple + + // We no longer need to find the original manifest layer here, + // as all parts collected by the collector should be the lz4 type. + // Remove the block that used extractPartInfo: + /* + guard let layer = manifest.layers.first(where: { layer in + if let info = extractPartInfo(from: layer.mediaType) { + return info.partNum == partNum + } + return false + }) else { + // Should not happen if totalParts was derived correctly + Logger.error("Could not find manifest layer for logical part number \(partNum).") + throw PullError.missingPart(partNum) // Or a different error + } + let layerMediaType = layer.mediaType + */ + // Assume the media type for decompression purposes + let layerMediaType = "application/octet-stream+lz4" Logger.info( - "Processing part \(partNum) of \(totalParts): \(partURL.lastPathComponent)") + "Processing part \(partNum) of \(totalPartsFromCollector): \(partURL.lastPathComponent)") let inputHandle = try FileHandle(forReadingFrom: partURL) defer { @@ -1504,31 +1516,46 @@ class ImageContainerRegistry: @unchecked Sendable { async throws { Logger.info("Copying from cache...") + + // Define output URL and expected size variable scope here + let outputURL = destination.appendingPathComponent("disk.img") + var expectedTotalSize: UInt64? = nil // Use optional to handle missing config // Instantiate collector let diskPartsCollector = DiskPartsCollector() + // Remove totalDiskParts + // var totalDiskParts: Int? = nil + var lz4LayerCount = 0 // Count lz4 layers found // First identify disk parts and non-disk files for layer in manifest.layers { let cachedLayer = getCachedLayerPath(manifestId: manifestId, digest: layer.digest) - // Check if it's a disk chunk layer based on media type + // Identify disk parts simply by media type if layer.mediaType == "application/octet-stream+lz4" { - // It's a disk chunk - Add to collector - _ = await diskPartsCollector.addPart(url: cachedLayer) // Ignore return value - } + lz4LayerCount += 1 // Increment count + // Add to collector. It will assign the sequential part number. + let collectorPartNum = await diskPartsCollector.addPart(url: cachedLayer) + Logger.info("Adding cached lz4 layer (part \(lz4LayerCount)) -> Collector #\(collectorPartNum): \(cachedLayer.lastPathComponent)") + } else { - // Handle non-disk layers (config, nvram) + // --- Handle Non-Disk-Part Layer (from cache) --- let fileName: String switch layer.mediaType { case "application/vnd.oci.image.config.v1+json": fileName = "config.json" case "application/octet-stream": - fileName = "nvram.bin" + // Assume nvram if config layer exists, otherwise assume single disk image + fileName = manifest.config != nil ? "nvram.bin" : "disk.img" + case "application/vnd.oci.image.layer.v1.tar", + "application/octet-stream+gzip": + // Assume disk image for these types as well if encountered in cache scenario + fileName = "disk.img" default: + Logger.info("Skipping unsupported cached layer media type: \(layer.mediaType)") continue } - // Only non-disk files are copied + // Copy the non-disk file directly from cache to destination try FileManager.default.copyItem( at: cachedLayer, to: destination.appendingPathComponent(fileName) @@ -1537,111 +1564,113 @@ class ImageContainerRegistry: @unchecked Sendable { } // --- Safely retrieve parts AFTER loop --- - let diskPartSources = await diskPartsCollector.getSortedParts() - let totalParts = await diskPartsCollector.getTotalParts() - Logger.info("Found \(totalParts) disk parts in cache.") + let diskPartSources = await diskPartsCollector.getSortedParts() // Sorted by assigned sequential number + let totalParts = await diskPartsCollector.getTotalParts() // Get total count from collector + + // Remove old guard check + /* + guard let totalParts = totalDiskParts else { + Logger.info("No cached layers with valid part information found. Assuming single-part image or non-lz4 parts.") + } + */ + Logger.info("Found \(totalParts) lz4 disk parts in cache to reassemble.") // --- End retrieving parts --- // Reassemble disk parts if needed - if !diskPartSources.isEmpty { // Use the retrieved array - // Get the uncompressed size from cached config - let configDigest = manifest.config?.digest - let cachedConfigPath = - configDigest != nil - ? getCachedLayerPath(manifestId: manifestId, digest: configDigest!) : nil - let uncompressedSize = cachedConfigPath.flatMap { - getUncompressedSizeFromConfig(configPath: $0) - } + // Use the count from the collector + if !diskPartSources.isEmpty { + // Use totalParts from collector directly + Logger.info("Reassembling \(totalParts) disk image parts using sparse file technique...") + + // Get uncompressed size from cached config file (needs to be copied first) + let configURL = destination.appendingPathComponent("config.json") + // Parse config.json to get uncompressed size *before* reassembly + let uncompressedSize = getUncompressedSizeFromConfig(configPath: configURL) - // Try to get disk size from VM config if OCI annotation not found + // Now also try to get disk size from VM config if OCI annotation not found var vmConfigDiskSize: UInt64? = nil - if uncompressedSize == nil { - // Find config.json in the copied files - let vmConfigPath = destination.appendingPathComponent("config.json") - if FileManager.default.fileExists(atPath: vmConfigPath.path) { - do { - let configData = try Data(contentsOf: vmConfigPath) - let decoder = JSONDecoder() - if let vmConfig = try? decoder.decode(VMConfig.self, from: configData) { - vmConfigDiskSize = vmConfig.diskSize - if let size = vmConfigDiskSize { - Logger.info( - "Found diskSize from cached VM config.json: \(size) bytes") - } + if uncompressedSize == nil && FileManager.default.fileExists(atPath: configURL.path) { + do { + let configData = try Data(contentsOf: configURL) + let decoder = JSONDecoder() + if let vmConfig = try? decoder.decode(VMConfig.self, from: configData) { + vmConfigDiskSize = vmConfig.diskSize + if let size = vmConfigDiskSize { + Logger.info("Found diskSize from VM config.json: \(size) bytes") } - } catch { - Logger.error("Failed to parse cached VM config.json for diskSize: \(error)") } + } catch { + Logger.error("Failed to parse VM config.json for diskSize: \(error)") } } - // Force explicit use - if uncompressedSize != nil { - Logger.info( - "Will use uncompressed size from annotation for sparse file: \(uncompressedSize!) bytes" + // Determine the size to use for the sparse file + // Use: annotation size > VM config diskSize > fallback (error) + if let size = uncompressedSize { + Logger.info("Using uncompressed size from annotation: \(size) bytes") + expectedTotalSize = size + } else if let size = vmConfigDiskSize { + Logger.info("Using diskSize from VM config: \(size) bytes") + expectedTotalSize = size + } else { + // If neither is found in cache scenario, throw error as we cannot determine the size + Logger.error( + "Missing both uncompressed size annotation and VM config diskSize for cached multi-part image." + + " Cannot reassemble." ) - } else if vmConfigDiskSize != nil { - Logger.info( - "Will use diskSize from VM config for sparse file: \(vmConfigDiskSize!) bytes") + throw PullError.missingUncompressedSizeAnnotation } - Logger.info( - "Reassembling disk image from cached parts using sparse file technique..." - ) - let outputURL = destination.appendingPathComponent("disk.img") + // Now that expectedTotalSize is guaranteed to be non-nil, proceed with setup + guard let sizeForTruncate = expectedTotalSize else { + // This should not happen due to the checks above, but safety first + let nilError: Error? = nil + // Use nil-coalescing to provide a default error, appeasing the compiler + throw PullError.reassemblySetupFailed(path: outputURL.path, underlyingError: nilError ?? NoSpecificUnderlyingError()) + } - // Wrap setup in do-catch for better error reporting + // Wrap file handle setup and sparse file creation within this block let outputHandle: FileHandle do { - // 1. Ensure parent directory exists - try FileManager.default.createDirectory( - at: outputURL.deletingLastPathComponent(), withIntermediateDirectories: true) - - // 2. Explicitly create the file first, removing old one if needed + // Ensure parent directory exists + try FileManager.default.createDirectory(at: outputURL.deletingLastPathComponent(), withIntermediateDirectories: true) + // Explicitly create the file first, removing old one if needed if FileManager.default.fileExists(atPath: outputURL.path) { try FileManager.default.removeItem(at: outputURL) } guard FileManager.default.createFile(atPath: outputURL.path, contents: nil) else { throw PullError.fileCreationFailed(outputURL.path) } - - // 3. Now open the handle for writing + // Open handle for writing outputHandle = try FileHandle(forWritingTo: outputURL) - + // Set the file size (creates sparse file) + try outputHandle.truncate(atOffset: sizeForTruncate) + Logger.info("Sparse file initialized for cache reassembly with size: \(ByteCountFormatter.string(fromByteCount: Int64(sizeForTruncate), countStyle: .file))") } catch { - // Catch errors during directory/file creation or handle opening - Logger.error( - "Failed during setup for disk image reassembly: \(error.localizedDescription)", - metadata: ["path": outputURL.path]) - throw PullError.reassemblySetupFailed(path: outputURL.path, underlyingError: error) + Logger.error("Failed during setup for cached disk image reassembly: \(error.localizedDescription)", metadata: ["path": outputURL.path]) + throw PullError.reassemblySetupFailed(path: outputURL.path, underlyingError: error) } - // Calculate expected total size from the cached files - let expectedTotalSize: UInt64 = diskPartSources.reduce(UInt64(0)) { - (acc: UInt64, element) -> UInt64 in - let fileSize = - (try? FileManager.default.attributesOfItem(atPath: element.1.path)[.size] - as? UInt64 ?? 0) ?? 0 - return acc + fileSize - } - Logger.info( - "Expected download size from cache: \(ByteCountFormatter.string(fromByteCount: Int64(expectedTotalSize), countStyle: .file)) (actual disk usage will be lower)" - ) + // Ensure handle is closed when exiting this scope + defer { try? outputHandle.close() } + + // ... (Get uncompressed size etc.) ... var reassemblyProgressLogger = ProgressLogger(threshold: 0.05) - var currentOffset: UInt64 = 0 // Track position in the final *decompressed* file + var currentOffset: UInt64 = 0 - for partNum in 1...totalParts { - // Find the original layer info for this part number - guard - // Find layer by index approximated during collection, not media type parts - let (_, sourceURL) = diskPartSources.first(where: { $0.0 == partNum }) - else { - throw PullError.missingPart(partNum) + // Iterate from 1 up to the total number of parts found by the collector + for collectorPartNum in 1...totalParts { + // Find the source URL from our collected parts using the sequential collectorPartNum + guard let sourceInfo = diskPartSources.first(where: { $0.0 == collectorPartNum }) else { + Logger.error("Missing required cached part number \(collectorPartNum) in collected parts during reassembly.") + throw PullError.missingPart(collectorPartNum) } + let sourceURL = sourceInfo.1 // Get URL from tuple + // Log using the sequential collector part number Logger.info( - "Decompressing part \(partNum) of \(totalParts) from cache: \(sourceURL.lastPathComponent) at offset \(currentOffset)..." + "Decompressing part \(collectorPartNum) of \(totalParts) from cache: \(sourceURL.lastPathComponent) at offset \(currentOffset)..." ) // Use the correct sparse decompression function @@ -1659,7 +1688,8 @@ class ImageContainerRegistry: @unchecked Sendable { reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") // Ensure output handle is closed before post-processing - try outputHandle.close() + // No need for explicit close here, defer handles it + // try outputHandle.close() // Verify final size let finalSize = @@ -1669,9 +1699,10 @@ class ImageContainerRegistry: @unchecked Sendable { "Final disk image size from cache (before sparse file optimization): \(ByteCountFormatter.string(fromByteCount: Int64(finalSize), countStyle: .file))" ) - if finalSize != expectedTotalSize { + // Use the calculated sizeForTruncate for comparison + if finalSize != sizeForTruncate { Logger.info( - "Warning: Final reported size (\(finalSize) bytes) differs from expected size (\(expectedTotalSize) bytes), but this doesn't affect functionality" + "Warning: Final reported size (\(finalSize) bytes) differs from expected size (\(sizeForTruncate) bytes), but this doesn't affect functionality" ) } @@ -1912,8 +1943,10 @@ class ImageContainerRegistry: @unchecked Sendable { throw lastError ?? PullError.layerDownloadFailed(digest) } + // Function removed as it's not applicable to the observed manifest format + /* private func extractPartInfo(from mediaType: String) -> (partNum: Int, total: Int)? { - let pattern = #"part\.number=(\d+);part\.total=(\d+)"# + let pattern = #"part\\.number=(\\d+);part\\.total=(\\d+)"# guard let regex = try? NSRegularExpression(pattern: pattern), let match = regex.firstMatch( in: mediaType, @@ -1928,6 +1961,7 @@ class ImageContainerRegistry: @unchecked Sendable { } return (partNum, total) } + */ private func listRepositories() async throws -> [String] { var request = URLRequest( diff --git a/libs/lume/src/Errors/Errors.swift b/libs/lume/src/Errors/Errors.swift index b6568c10..c769d10d 100644 --- a/libs/lume/src/Errors/Errors.swift +++ b/libs/lume/src/Errors/Errors.swift @@ -58,6 +58,7 @@ enum PullError: Error, LocalizedError { case fileCreationFailed(String) case reassemblySetupFailed(path: String, underlyingError: Error) case missingUncompressedSizeAnnotation + case invalidMediaType var errorDescription: String? { switch self { @@ -81,6 +82,8 @@ enum PullError: Error, LocalizedError { return "Failed to set up for reassembly at path: \(path). Underlying error: \(underlyingError.localizedDescription)" case .missingUncompressedSizeAnnotation: return "Could not find the required uncompressed disk size annotation in the image config.json." + case .invalidMediaType: + return "Invalid media type" } } } From f491d5a3c996225e7e1f6dc83f0131a39d9def7f Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sat, 19 Apr 2025 23:41:02 -0700 Subject: [PATCH 06/28] Add logs --- .../lume/src/ContainerRegistry/ImageContainerRegistry.swift | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 3c90b40b..cd57352e 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -930,6 +930,9 @@ class ImageContainerRegistry: @unchecked Sendable { Logger.info("Finished processing layers. Found \(diskParts.count) disk parts to reassemble (Total Lz4 Layers: \(totalPartsFromCollector)).") // --- End retrieving parts --- + // Add detailed logging for debugging + Logger.info("Disk part numbers collected and sorted: \(diskParts.map { $0.0 })") + Logger.info("") // New line after progress // Display download statistics @@ -1068,9 +1071,12 @@ class ImageContainerRegistry: @unchecked Sendable { for partNum in 1...totalPartsFromCollector { // Find the original layer info for this part number // Find the part URL from our collected parts using the logical partNum + Logger.info("Reassembly loop: Looking for partNum \(partNum) in diskParts") // Log loop iteration guard let partInfo = diskParts.first(where: { $0.0 == partNum }) else { // This error should now be less likely, but good to keep Logger.error("Missing required part number \(partNum) in collected parts during reassembly.") + // Add current state log on error + Logger.error("Current disk part numbers available: \(diskParts.map { $0.0 })") throw PullError.missingPart(partNum) } let partURL = partInfo.1 // Get the URL from the tuple From ad78a85879e134945c116ff07aa15748384f751b Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sat, 19 Apr 2025 23:56:56 -0700 Subject: [PATCH 07/28] Remove legacy decompress --- .../ImageContainerRegistry.swift | 280 ++---------------- 1 file changed, 25 insertions(+), 255 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index cd57352e..4e82927b 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1098,7 +1098,7 @@ class ImageContainerRegistry: @unchecked Sendable { let layerMediaType = layer.mediaType */ // Assume the media type for decompression purposes - let layerMediaType = "application/octet-stream+lz4" + // Remove unused variable: let layerMediaType = "application/octet-stream+lz4" Logger.info( "Processing part \(partNum) of \(totalPartsFromCollector): \(partURL.lastPathComponent)") @@ -1115,254 +1115,29 @@ class ImageContainerRegistry: @unchecked Sendable { // Seek to the correct offset in the output sparse file try outputHandle.seek(toOffset: currentOffset) + // Always attempt decompression using decompressChunkAndWriteSparse for lz4 parts + Logger.info( + "Decompressing part \(partNum) using decompressChunkAndWriteSparse") + + let decompressedBytesWritten = try decompressChunkAndWriteSparse( + inputPath: partURL.path, + outputHandle: outputHandle, + startOffset: currentOffset + ) + currentOffset += decompressedBytesWritten + reassemblyProgressLogger.logProgress( + current: Double(currentOffset) / Double(sizeForTruncate), // Use sizeForTruncate + context: "Reassembling") + + // Remove the old block that checked getDecompressionCommand and did direct copy + /* if let decompressCmd = getDecompressionCommand(for: layerMediaType) { // Use extracted mediaType - Logger.info( - "Decompressing part \(partNum) with media type: \(layerMediaType)") - - // Handle Apple Archive format - let toolPath = String(decompressCmd.dropFirst("apple_archive:".count)) - let tempOutputPath = FileManager.default.temporaryDirectory - .appendingPathComponent(UUID().uuidString) - - // Check input file size before decompression - let inputFileSize = - (try? FileManager.default.attributesOfItem(atPath: partURL.path)[.size] - as? UInt64) ?? 0 - Logger.info( - "Part \(partNum) input size: \(ByteCountFormatter.string(fromByteCount: Int64(inputFileSize), countStyle: .file))" - ) - - // Create a process that decompresses to a temporary file - let process = Process() - process.executableURL = URL(fileURLWithPath: toolPath) - process.arguments = [ - "extract", "-i", partURL.path, "-o", tempOutputPath.path, - ] - - // Add error output capture - let errorPipe = Pipe() - process.standardError = errorPipe - - Logger.info( - "Decompressing Apple Archive format with: \(toolPath) \(process.arguments?.joined(separator: " ") ?? "")" - ) - try process.run() - process.waitUntilExit() - - // Check error output if any - let errorData = errorPipe.fileHandleForReading.readDataToEndOfFile() - if !errorData.isEmpty, - let errorString = String(data: errorData, encoding: .utf8) - { - Logger.error("Decompression error output: \(errorString)") - } - - if process.terminationStatus != 0 { - Logger.error( - "Apple Archive decompression failed with status: \(process.terminationStatus), falling back to direct copy" - ) - // Fall back to direct copying (uncompressed) - Logger.info("Copying part \(partNum) directly without decompression...") - try outputHandle.seek(toOffset: currentOffset) - - let inputHandle = try FileHandle(forReadingFrom: partURL) - defer { try? inputHandle.close() } - - var bytesWritten: UInt64 = 0 - let chunkSize = 1024 * 1024 // 1MB chunks - var chunkCount = 0 - - while true { - let data = autoreleasepool { - try! inputHandle.read(upToCount: chunkSize) ?? Data() - } - if data.isEmpty { break } - - try outputHandle.write(contentsOf: data) - bytesWritten += UInt64(data.count) - chunkCount += 1 - - // Update progress - let totalProgress = - Double(currentOffset + bytesWritten) - / Double(expectedCompressedTotalSize) - let progressBar = createProgressBar( - progress: totalProgress, width: 30) - let progressPercent = Int(totalProgress * 100) - let currentSpeed = - ByteCountFormatter.string( - fromByteCount: Int64(Double(bytesWritten) / 0.5), - countStyle: .file) + "/s" - - print( - "\r\(progressBar) \(progressPercent)% | Speed: \(currentSpeed) | Part \(partNum) | \(ByteCountFormatter.string(fromByteCount: Int64(currentOffset + bytesWritten), countStyle: .file)) ", - terminator: "") - fflush(stdout) - - // Also log to the progress logger for consistency - reassemblyProgressLogger.logProgress( - current: totalProgress, - context: "Direct copying") - } - - Logger.info( - "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" - ) - currentOffset += bytesWritten - continue - } - - // Check if the output file exists and has content - let outputExists = FileManager.default.fileExists( - atPath: tempOutputPath.path) - let outputFileSize = - outputExists - ? ((try? FileManager.default.attributesOfItem( - atPath: tempOutputPath.path)[ - .size] as? UInt64) ?? 0) : 0 - Logger.info( - "Part \(partNum) - Decompressed output exists: \(outputExists), size: \(ByteCountFormatter.string(fromByteCount: Int64(outputFileSize), countStyle: .file))" - ) - - // If decompression produced an empty file, fall back to direct copy - if outputFileSize == 0 { - Logger.info( - "Decompression resulted in empty file, falling back to direct copy for part \(partNum)" - ) - try? FileManager.default.removeItem(at: tempOutputPath) - - // Fall back to direct copying (uncompressed) - Logger.info("Copying part \(partNum) directly without decompression...") - try outputHandle.seek(toOffset: currentOffset) - - let inputHandle = try FileHandle(forReadingFrom: partURL) - defer { try? inputHandle.close() } - - var bytesWritten: UInt64 = 0 - let chunkSize = 1024 * 1024 // 1MB chunks - var chunkCount = 0 - - while true { - let data = autoreleasepool { - try! inputHandle.read(upToCount: chunkSize) ?? Data() - } - if data.isEmpty { break } - - try outputHandle.write(contentsOf: data) - bytesWritten += UInt64(data.count) - chunkCount += 1 - - // Update progress - let totalProgress = - Double(currentOffset + bytesWritten) - / Double(expectedCompressedTotalSize) - let progressBar = createProgressBar( - progress: totalProgress, width: 30) - let progressPercent = Int(totalProgress * 100) - let currentSpeed = - ByteCountFormatter.string( - fromByteCount: Int64(Double(bytesWritten) / 0.5), - countStyle: .file) + "/s" - - print( - "\r\(progressBar) \(progressPercent)% | Speed: \(currentSpeed) | Part \(partNum) | \(ByteCountFormatter.string(fromByteCount: Int64(currentOffset + bytesWritten), countStyle: .file)) ", - terminator: "") - fflush(stdout) - - // Also log to the progress logger for consistency - reassemblyProgressLogger.logProgress( - current: totalProgress, - context: "Direct copying") - } - - Logger.info( - "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" - ) - currentOffset += bytesWritten - continue - } - - // Read the decompressed file and write to our output - let tempInputHandle = try FileHandle(forReadingFrom: tempOutputPath) - defer { - try? tempInputHandle.close() - try? FileManager.default.removeItem(at: tempOutputPath) - } - - // Read decompressed data in chunks and write to sparse file - var partDecompressedSize: UInt64 = 0 - let chunkSize = 1024 * 1024 // 1MB chunks - var chunkCount = 0 - - while true { - let data = autoreleasepool { // Help manage memory with large files - try! tempInputHandle.read(upToCount: chunkSize) ?? Data() - } - if data.isEmpty { break } // End of stream - - try outputHandle.write(contentsOf: data) - partDecompressedSize += UInt64(data.count) - chunkCount += 1 - - // Update progress based on decompressed size written - let totalProgress = - Double(currentOffset + partDecompressedSize) - / Double(expectedCompressedTotalSize) - reassemblyProgressLogger.logProgress( - current: totalProgress, - context: "Reassembling") - } - - Logger.info( - "Part \(partNum) - Wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(partDecompressedSize), countStyle: .file))" - ) - currentOffset += partDecompressedSize // Advance offset by decompressed size + // ... [removed decompression logic using external tool] ... } else { // No decompression command available, try direct copy - Logger.info( - "Copying part \(partNum) directly..." - ) - try outputHandle.seek(toOffset: currentOffset) - - let inputHandle = try FileHandle(forReadingFrom: partURL) - defer { try? inputHandle.close() } - - // Get part size - let partSize = - (try? FileManager.default.attributesOfItem(atPath: partURL.path)[.size] - as? UInt64) ?? 0 - Logger.info( - "Direct copy of part \(partNum) with size: \(ByteCountFormatter.string(fromByteCount: Int64(partSize), countStyle: .file))" - ) - - var bytesWritten: UInt64 = 0 - let chunkSize = 1024 * 1024 // 1MB chunks - var chunkCount = 0 - - while true { - let data = autoreleasepool { - try! inputHandle.read(upToCount: chunkSize) ?? Data() - } - if data.isEmpty { break } - - try outputHandle.write(contentsOf: data) - bytesWritten += UInt64(data.count) - chunkCount += 1 - - // Update progress - let totalProgress = - Double(currentOffset + bytesWritten) - / Double(expectedCompressedTotalSize) - reassemblyProgressLogger.logProgress( - current: totalProgress, - context: "Direct copying") - } - - Logger.info( - "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" - ) - currentOffset += bytesWritten + // ... [removed direct copy logic] ... } + */ // Ensure data is written before processing next part (optional but safer) try outputHandle.synchronize() @@ -1389,11 +1164,6 @@ class ImageContainerRegistry: @unchecked Sendable { ) } - // Decompress the assembled disk image if it's in LZFSE compressed format - Logger.info( - "Checking if disk image is LZFSE compressed and decompressing if needed...") - decompressLZFSEImage(inputPath: outputURL.path) - // Create a properly formatted disk image Logger.info("Converting assembled data to proper disk image format...") @@ -1679,13 +1449,17 @@ class ImageContainerRegistry: @unchecked Sendable { "Decompressing part \(collectorPartNum) of \(totalParts) from cache: \(sourceURL.lastPathComponent) at offset \(currentOffset)..." ) - // Use the correct sparse decompression function + // Always use the correct sparse decompression function let decompressedBytesWritten = try decompressChunkAndWriteSparse( inputPath: sourceURL.path, outputHandle: outputHandle, startOffset: currentOffset ) currentOffset += decompressedBytesWritten + // Update progress (using sizeForTruncate which should be available) + reassemblyProgressLogger.logProgress( + current: Double(currentOffset) / Double(sizeForTruncate), + context: "Reassembling Cache") try outputHandle.synchronize() // Optional: Synchronize after each chunk } @@ -1712,10 +1486,6 @@ class ImageContainerRegistry: @unchecked Sendable { ) } - // Decompress the assembled disk image if it's in LZFSE compressed format - Logger.info("Checking if disk image is LZFSE compressed and decompressing if needed...") - decompressLZFSEImage(inputPath: outputURL.path) - // Create a properly formatted disk image Logger.info("Converting assembled data to proper disk image format...") From 5134f719034ffa94efcf5d4659bf78a64455a822 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sun, 20 Apr 2025 00:06:40 -0700 Subject: [PATCH 08/28] Remove hdutil --- .../ImageContainerRegistry.swift | 138 ------------------ 1 file changed, 138 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 4e82927b..db9597a5 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1164,76 +1164,6 @@ class ImageContainerRegistry: @unchecked Sendable { ) } - // Create a properly formatted disk image - Logger.info("Converting assembled data to proper disk image format...") - - // Get actual disk usage of the assembled file - let assembledUsage = getActualDiskUsage(path: outputURL.path) - let bufferBytes: UInt64 = 2 * 1024 * 1024 * 1024 // 2GB buffer - let requiredSpace = assembledUsage + bufferBytes - - // Check available disk space in the destination directory - let fileManager = FileManager.default - let availableSpace = - try? fileManager.attributesOfFileSystem( - forPath: outputURL.deletingLastPathComponent().path)[.systemFreeSize] - as? UInt64 - - if let available = availableSpace, available < requiredSpace { - Logger.error( - "Insufficient disk space to convert disk image format. Skipping conversion.", - metadata: [ - "available": ByteCountFormatter.string( - fromByteCount: Int64(available), countStyle: .file), - "required": ByteCountFormatter.string( - fromByteCount: Int64(requiredSpace), countStyle: .file), - ] - ) - } else { - // Prioritize SPARSE format for better sparse file handling - Logger.info("Attempting conversion to SPARSE format...") - let process = Process() - process.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") - process.arguments = [ - "convert", - outputURL.path, // Source: our assembled file - "-format", "SPARSE", // Format: SPARSE (best for sparse images) - "-o", outputURL.path, // Output: overwrite with converted image - ] - - let errorPipe = Pipe() - process.standardError = errorPipe - process.standardOutput = errorPipe - - try process.run() - process.waitUntilExit() - - // Check for errors - let outputData = errorPipe.fileHandleForReading.readDataToEndOfFile() - if !outputData.isEmpty, - let outputString = String(data: outputData, encoding: .utf8) - { - Logger.info("hdiutil output: \(outputString)") - } - - if process.terminationStatus == 0 { - // Find the potentially renamed formatted file - let formattedFile = findFormattedFile(tempFormatted: outputURL) ?? outputURL - // If the output path is different, remove the original and move the new one - if formattedFile.path != outputURL.path { - try? FileManager.default.removeItem(at: outputURL) - try FileManager.default.moveItem(at: formattedFile, to: outputURL) - } - Logger.info("Successfully converted disk image to proper format (SPARSE)") - } else { - Logger.error( - "Failed to convert disk image to SPARSE format. VM might not start properly." - ) - // If SPARSE failed, maybe try UDRW as a last resort? - // For now, we'll just log the error. - } - } - Logger.info("Disk image reassembly completed") } else { // Copy single disk image if it exists @@ -1486,74 +1416,6 @@ class ImageContainerRegistry: @unchecked Sendable { ) } - // Create a properly formatted disk image - Logger.info("Converting assembled data to proper disk image format...") - - // Get actual disk usage of the assembled file - let assembledUsage = getActualDiskUsage(path: outputURL.path) - let bufferBytes: UInt64 = 2 * 1024 * 1024 * 1024 // 2GB buffer - let requiredSpace = assembledUsage + bufferBytes - - // Check available disk space in the destination directory - let fileManager = FileManager.default - let availableSpace = - try? fileManager.attributesOfFileSystem( - forPath: outputURL.deletingLastPathComponent().path)[.systemFreeSize] as? UInt64 - - if let available = availableSpace, available < requiredSpace { - Logger.error( - "Insufficient disk space to convert disk image format. Skipping conversion.", - metadata: [ - "available": ByteCountFormatter.string( - fromByteCount: Int64(available), countStyle: .file), - "required": ByteCountFormatter.string( - fromByteCount: Int64(requiredSpace), countStyle: .file), - ] - ) - } else { - // Prioritize SPARSE format for better sparse file handling - Logger.info("Attempting conversion to SPARSE format...") - let process = Process() - process.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") - process.arguments = [ - "convert", - outputURL.path, // Source: our assembled file - "-format", "SPARSE", // Format: SPARSE (best for sparse images) - "-o", outputURL.path, // Output: overwrite with converted image - ] - - let errorPipe = Pipe() - process.standardError = errorPipe - process.standardOutput = errorPipe - - try process.run() - process.waitUntilExit() - - // Check for errors - let outputData = errorPipe.fileHandleForReading.readDataToEndOfFile() - if !outputData.isEmpty, let outputString = String(data: outputData, encoding: .utf8) - { - Logger.info("hdiutil output: \(outputString)") - } - - if process.terminationStatus == 0 { - // Find the potentially renamed formatted file - let formattedFile = findFormattedFile(tempFormatted: outputURL) ?? outputURL - // If the output path is different, remove the original and move the new one - if formattedFile.path != outputURL.path { - try? FileManager.default.removeItem(at: outputURL) - try FileManager.default.moveItem(at: formattedFile, to: outputURL) - } - Logger.info("Successfully converted disk image to proper format (SPARSE)") - } else { - Logger.error( - "Failed to convert disk image to SPARSE format. VM might not start properly." - ) - // If SPARSE failed, maybe try UDRW as a last resort? - // For now, we'll just log the error. - } - } - Logger.info("Disk image reassembly completed") } From 3d00091d2bcdb449e7d2441552510b2bec073101 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sun, 20 Apr 2025 23:01:26 -0700 Subject: [PATCH 09/28] Fix reassembly --- .../ImageContainerRegistry.swift | 303 ++++++++++++++---- 1 file changed, 242 insertions(+), 61 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index db9597a5..7af60d6d 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1069,9 +1069,7 @@ class ImageContainerRegistry: @unchecked Sendable { // Iterate using the reliable totalParts count from media type // Use totalPartsFromCollector for the loop range for partNum in 1...totalPartsFromCollector { - // Find the original layer info for this part number // Find the part URL from our collected parts using the logical partNum - Logger.info("Reassembly loop: Looking for partNum \(partNum) in diskParts") // Log loop iteration guard let partInfo = diskParts.first(where: { $0.0 == partNum }) else { // This error should now be less likely, but good to keep Logger.error("Missing required part number \(partNum) in collected parts during reassembly.") @@ -1081,65 +1079,45 @@ class ImageContainerRegistry: @unchecked Sendable { } let partURL = partInfo.1 // Get the URL from the tuple - // We no longer need to find the original manifest layer here, - // as all parts collected by the collector should be the lz4 type. - // Remove the block that used extractPartInfo: - /* - guard let layer = manifest.layers.first(where: { layer in - if let info = extractPartInfo(from: layer.mediaType) { - return info.partNum == partNum - } - return false - }) else { - // Should not happen if totalParts was derived correctly - Logger.error("Could not find manifest layer for logical part number \(partNum).") - throw PullError.missingPart(partNum) // Or a different error - } - let layerMediaType = layer.mediaType - */ - // Assume the media type for decompression purposes - // Remove unused variable: let layerMediaType = "application/octet-stream+lz4" - Logger.info( "Processing part \(partNum) of \(totalPartsFromCollector): \(partURL.lastPathComponent)") - let inputHandle = try FileHandle(forReadingFrom: partURL) - defer { - try? inputHandle.close() - // Clean up temp downloaded part if not from cache - if !partURL.path.contains(cacheDirectory.path) { - try? FileManager.default.removeItem(at: partURL) - } - } - // Seek to the correct offset in the output sparse file try outputHandle.seek(toOffset: currentOffset) - // Always attempt decompression using decompressChunkAndWriteSparse for lz4 parts - Logger.info( - "Decompressing part \(partNum) using decompressChunkAndWriteSparse") + // Check if this chunk might be all zeros (sparse data) by sampling the compressed data + // Skip this check for now as it's an optimization we can add later if needed + let isLikelySparse = false - let decompressedBytesWritten = try decompressChunkAndWriteSparse( - inputPath: partURL.path, - outputHandle: outputHandle, - startOffset: currentOffset - ) - currentOffset += decompressedBytesWritten - reassemblyProgressLogger.logProgress( - current: Double(currentOffset) / Double(sizeForTruncate), // Use sizeForTruncate - context: "Reassembling") - - // Remove the old block that checked getDecompressionCommand and did direct copy - /* - if let decompressCmd = getDecompressionCommand(for: layerMediaType) { // Use extracted mediaType - // ... [removed decompression logic using external tool] ... + // Always attempt decompression using decompressChunkAndWriteSparse for LZ4 parts + if isLikelySparse { + // For sparse chunks, we don't need to write anything - just advance the offset + // We determine the uncompressed size from the chunk metadata or estimation + + // For now, we'll still decompress to ensure correct behavior, and optimize later + Logger.info("Chunk appears to be sparse, but decompressing for reliability") + let decompressedBytesWritten = try decompressChunkAndWriteSparse( + inputPath: partURL.path, + outputHandle: outputHandle, + startOffset: currentOffset + ) + currentOffset += decompressedBytesWritten } else { - // No decompression command available, try direct copy - // ... [removed direct copy logic] ... + Logger.info("Decompressing part \(partNum)") + let decompressedBytesWritten = try decompressChunkAndWriteSparse( + inputPath: partURL.path, + outputHandle: outputHandle, + startOffset: currentOffset + ) + currentOffset += decompressedBytesWritten } - */ + + reassemblyProgressLogger.logProgress( + current: Double(currentOffset) / Double(sizeForTruncate), + context: "Reassembling" + ) - // Ensure data is written before processing next part (optional but safer) + // Ensure data is written before processing next part try outputHandle.synchronize() } @@ -1147,17 +1125,54 @@ class ImageContainerRegistry: @unchecked Sendable { reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") Logger.info("") // Newline - // Ensure output handle is closed before post-processing - try outputHandle.close() - + // Optimize sparseness after completing reassembly + try outputHandle.close() // Close handle to ensure all data is flushed + // Verify final size let finalSize = (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] as? UInt64) ?? 0 Logger.info( - "Final disk image size from cache (before sparse file optimization): \(ByteCountFormatter.string(fromByteCount: Int64(finalSize), countStyle: .file))" + "Final disk image size: \(ByteCountFormatter.string(fromByteCount: Int64(finalSize), countStyle: .file))" ) + // Optimize sparseness if on macOS + if FileManager.default.fileExists(atPath: "/bin/cp") { + Logger.info("Optimizing sparse file representation...") + let optimizedPath = outputURL.path + ".optimized" + + let process = Process() + process.executableURL = URL(fileURLWithPath: "/bin/cp") + process.arguments = ["-c", outputURL.path, optimizedPath] + + do { + try process.run() + process.waitUntilExit() + + if process.terminationStatus == 0 { + // Get size of optimized file + let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 + let originalUsage = getActualDiskUsage(path: outputURL.path) + let optimizedUsage = getActualDiskUsage(path: optimizedPath) + + Logger.info( + "Sparse optimization results: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" + ) + + // Replace the original with the optimized version + try FileManager.default.removeItem(at: outputURL) + try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: outputURL) + Logger.info("Replaced with optimized sparse version") + } else { + Logger.info("Sparse optimization failed, using original file") + try? FileManager.default.removeItem(atPath: optimizedPath) + } + } catch { + Logger.info("Error during sparse optimization: \(error.localizedDescription)") + try? FileManager.default.removeItem(atPath: optimizedPath) + } + } + if finalSize != sizeForTruncate { Logger.info( "Warning: Final reported size (\(finalSize) bytes) differs from expected size (\(sizeForTruncate) bytes), but this doesn't affect functionality" @@ -2468,26 +2483,117 @@ class ImageContainerRegistry: @unchecked Sendable { let reassemblyDir = workDir.appendingPathComponent("reassembly") try FileManager.default.createDirectory(at: reassemblyDir, withIntermediateDirectories: true) let reassembledFile = reassemblyDir.appendingPathComponent("reassembled_disk.img") + + // Pre-allocate a sparse file with the correct size Logger.info("Pre-allocating sparse file of \(ByteCountFormatter.string(fromByteCount: Int64(actualDiskSize), countStyle: .file))...") - if FileManager.default.fileExists(atPath: reassembledFile.path) { try FileManager.default.removeItem(at: reassembledFile) } - guard FileManager.default.createFile(atPath: reassembledFile.path, contents: nil) else { throw PushError.invalidURL } + if FileManager.default.fileExists(atPath: reassembledFile.path) { + try FileManager.default.removeItem(at: reassembledFile) + } + guard FileManager.default.createFile(atPath: reassembledFile.path, contents: nil) else { + throw PushError.fileCreationFailed(reassembledFile.path) + } + let outputHandle = try FileHandle(forWritingTo: reassembledFile) defer { try? outputHandle.close() } + + // Set the file size without writing data (creates a sparse file) try outputHandle.truncate(atOffset: actualDiskSize) + + // Add test patterns at start and end to verify writability + let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! + try outputHandle.seek(toOffset: 0) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.seek(toOffset: actualDiskSize - UInt64(testPattern.count)) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.synchronize() + + Logger.info("Test patterns written to sparse file. File is ready for writing.") + + // Track reassembly progress + var reassemblyProgressLogger = ProgressLogger(threshold: 0.05) var currentOffset: UInt64 = 0 - for (index, cachedChunkPath, _) in diskChunks { + + // Process each chunk in order + for (index, cachedChunkPath, _) in diskChunks.sorted(by: { $0.index < $1.index }) { Logger.info("Decompressing & writing part \(index + 1)/\(diskChunks.count): \(cachedChunkPath.lastPathComponent) at offset \(currentOffset)...") - let decompressedBytesWritten = try decompressChunkAndWriteSparse(inputPath: cachedChunkPath.path, outputHandle: outputHandle, startOffset: currentOffset) + + // Always seek to the correct position + try outputHandle.seek(toOffset: currentOffset) + + // Decompress and write the chunk + let decompressedBytesWritten = try decompressChunkAndWriteSparse( + inputPath: cachedChunkPath.path, + outputHandle: outputHandle, + startOffset: currentOffset + ) + currentOffset += decompressedBytesWritten + reassemblyProgressLogger.logProgress( + current: Double(currentOffset) / Double(actualDiskSize), + context: "Reassembling" + ) + + // Ensure data is written before processing next part + try outputHandle.synchronize() } + + // Finalize progress + reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") + Logger.info("") // Newline + + // Close handle before post-processing + try outputHandle.close() + + // Optimize sparseness if on macOS + let optimizedFile = reassemblyDir.appendingPathComponent("optimized_disk.img") + if FileManager.default.fileExists(atPath: "/bin/cp") { + Logger.info("Optimizing sparse file representation...") + + let process = Process() + process.executableURL = URL(fileURLWithPath: "/bin/cp") + process.arguments = ["-c", reassembledFile.path, optimizedFile.path] + + do { + try process.run() + process.waitUntilExit() + + if process.terminationStatus == 0 { + // Get sizes of original and optimized files + let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedFile.path)[.size] as? UInt64) ?? 0 + let originalUsage = getActualDiskUsage(path: reassembledFile.path) + let optimizedUsage = getActualDiskUsage(path: optimizedFile.path) + + Logger.info( + "Sparse optimization results: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" + ) + + // Replace original with optimized version + try FileManager.default.removeItem(at: reassembledFile) + try FileManager.default.moveItem(at: optimizedFile, to: reassembledFile) + Logger.info("Using sparse-optimized file for verification") + } else { + Logger.info("Sparse optimization failed, using original file for verification") + try? FileManager.default.removeItem(at: optimizedFile) + } + } catch { + Logger.info("Error during sparse optimization: \(error.localizedDescription)") + try? FileManager.default.removeItem(at: optimizedFile) + } + } + + // Verification step Logger.info("Verifying reassembled file...") let originalSize = diskSize let originalDigest = calculateSHA256(filePath: diskPath.path) let reassembledAttributes = try FileManager.default.attributesOfItem(atPath: reassembledFile.path) let reassembledSize = reassembledAttributes[.size] as? UInt64 ?? 0 let reassembledDigest = calculateSHA256(filePath: reassembledFile.path) + + // Check actual disk usage let originalActualSize = getActualDiskUsage(path: diskPath.path) let reassembledActualSize = getActualDiskUsage(path: reassembledFile.path) + + // Report results Logger.info("Results:") Logger.info(" Original size: \(ByteCountFormatter.string(fromByteCount: Int64(originalSize), countStyle: .file)) (\(originalSize) bytes)") Logger.info(" Reassembled size: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledSize), countStyle: .file)) (\(reassembledSize) bytes)") @@ -2495,9 +2601,84 @@ class ImageContainerRegistry: @unchecked Sendable { Logger.info(" Reassembled digest: \(reassembledDigest)") Logger.info(" Original: Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(originalSize), countStyle: .file)), Actual disk usage: \(ByteCountFormatter.string(fromByteCount: Int64(originalActualSize), countStyle: .file))") Logger.info(" Reassembled: Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledSize), countStyle: .file)), Actual disk usage: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledActualSize), countStyle: .file))") - if originalDigest == reassembledDigest { Logger.info("✅ VERIFICATION SUCCESSFUL: Files are identical") } else { Logger.info("❌ VERIFICATION FAILED: Files differ"); if originalSize != reassembledSize { Logger.info(" Size mismatch: Original \(originalSize) bytes, Reassembled \(reassembledSize) bytes") }; Logger.info("Attempting to identify differences..."); Logger.info("NOTE: This might be a sparse file issue. The content may be identical, but sparse regions"); Logger.info(" may be handled differently between the original and reassembled files."); if originalActualSize > 0 { let diffPercentage = ((Double(reassembledActualSize) - Double(originalActualSize)) / Double(originalActualSize)) * 100.0; Logger.info(" Disk usage difference: \(String(format: "%.2f", diffPercentage))%"); if diffPercentage < -40 { Logger.info(" ⚠️ WARNING: Reassembled disk uses significantly less space (>40% difference)."); Logger.info(" This indicates sparse regions weren't properly preserved and may affect VM functionality.") } else if diffPercentage < -10 { Logger.info(" ⚠️ WARNING: Reassembled disk uses less space (10-40% difference)."); Logger.info(" Some sparse regions may not be properly preserved but VM might still function correctly.") } else if diffPercentage > 10 { Logger.info(" ⚠️ WARNING: Reassembled disk uses more space (>10% difference)."); Logger.info(" This is unusual and may indicate improper sparse file handling.") } else { Logger.info(" ✓ Disk usage difference is minimal (<10%). VM likely to function correctly.") } } } + + // Determine if verification was successful + if originalDigest == reassembledDigest { + Logger.info("✅ VERIFICATION SUCCESSFUL: Files are identical") + } else { + Logger.info("❌ VERIFICATION FAILED: Files differ") + + if originalSize != reassembledSize { + Logger.info(" Size mismatch: Original \(originalSize) bytes, Reassembled \(reassembledSize) bytes") + } + + // Check sparse file characteristics + Logger.info("Attempting to identify differences...") + Logger.info("NOTE: This might be a sparse file issue. The content may be identical, but sparse regions") + Logger.info(" may be handled differently between the original and reassembled files.") + + if originalActualSize > 0 { + let diffPercentage = ((Double(reassembledActualSize) - Double(originalActualSize)) / Double(originalActualSize)) * 100.0 + Logger.info(" Disk usage difference: \(String(format: "%.2f", diffPercentage))%") + + if diffPercentage < -40 { + Logger.info(" ⚠️ WARNING: Reassembled disk uses significantly less space (>40% difference).") + Logger.info(" This indicates sparse regions weren't properly preserved and may affect VM functionality.") + } else if diffPercentage < -10 { + Logger.info(" ⚠️ WARNING: Reassembled disk uses less space (10-40% difference).") + Logger.info(" Some sparse regions may not be properly preserved but VM might still function correctly.") + } else if diffPercentage > 10 { + Logger.info(" ⚠️ WARNING: Reassembled disk uses more space (>10% difference).") + Logger.info(" This is unusual and may indicate improper sparse file handling.") + } else { + Logger.info(" ✓ Disk usage difference is minimal (<10%). VM likely to function correctly.") + } + } + + // Offer recovery option + if originalDigest != reassembledDigest { + Logger.info("") + Logger.info("===== ATTEMPTING RECOVERY ACTION =====") + Logger.info("Since verification failed, trying direct copy as a fallback method.") + + let fallbackFile = reassemblyDir.appendingPathComponent("fallback_disk.img") + Logger.info("Creating fallback disk image at: \(fallbackFile.path)") + + // Try rsync first + let rsyncProcess = Process() + rsyncProcess.executableURL = URL(fileURLWithPath: "/usr/bin/rsync") + rsyncProcess.arguments = ["-aS", "--progress", diskPath.path, fallbackFile.path] + + do { + try rsyncProcess.run() + rsyncProcess.waitUntilExit() + + if rsyncProcess.terminationStatus == 0 { + Logger.info("Direct copy completed with rsync. Fallback image available at: \(fallbackFile.path)") + } else { + // Try cp -c as fallback + Logger.info("Rsync failed. Attempting with cp -c command...") + let cpProcess = Process() + cpProcess.executableURL = URL(fileURLWithPath: "/bin/cp") + cpProcess.arguments = ["-c", diskPath.path, fallbackFile.path] + + try cpProcess.run() + cpProcess.waitUntilExit() + + if cpProcess.terminationStatus == 0 { + Logger.info("Direct copy completed with cp -c. Fallback image available at: \(fallbackFile.path)") + } else { + Logger.info("All recovery attempts failed.") + } + } + } catch { + Logger.info("Error during recovery attempts: \(error.localizedDescription)") + Logger.info("All recovery attempts failed.") + } + } + } + Logger.info("Reassembled file is available at: \(reassembledFile.path)") - if originalDigest != reassembledDigest { Logger.info(""); Logger.info("===== ATTEMPTING RECOVERY ACTION ====="); Logger.info("Since verification failed, trying direct copy as a fallback method."); let fallbackFile = reassemblyDir.appendingPathComponent("fallback_disk.img"); Logger.info("Creating fallback disk image at: \(fallbackFile.path)"); let rsyncProcess = Process(); rsyncProcess.executableURL = URL(fileURLWithPath: "/usr/bin/rsync"); rsyncProcess.arguments = ["-aS", "--progress", diskPath.path, fallbackFile.path]; try rsyncProcess.run(); rsyncProcess.waitUntilExit(); if rsyncProcess.terminationStatus == 0 { Logger.info("Direct copy completed. You may want to try using this fallback disk image"); Logger.info("instead if the reassembled one has issues: \(fallbackFile.path)") } else { Logger.info("Direct copy failed. Attempting with cp -c command..."); let cpProcess = Process(); cpProcess.executableURL = URL(fileURLWithPath: "/bin/cp"); cpProcess.arguments = ["-c", diskPath.path, fallbackFile.path]; try cpProcess.run(); cpProcess.waitUntilExit(); if cpProcess.terminationStatus == 0 { Logger.info("Direct copy completed with cp -c. Fallback image available at: \(fallbackFile.path)") } else { Logger.info("All recovery attempts failed.") } } } } } From a87861a85c36f37e9ebafff2cf3513ad81d9251c Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 13:32:30 -0700 Subject: [PATCH 10/28] Fix first pull --- .../ImageContainerRegistry.swift | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 7af60d6d..8d60cdf4 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1432,6 +1432,43 @@ class ImageContainerRegistry: @unchecked Sendable { } Logger.info("Disk image reassembly completed") + + // Optimize sparseness for cached reassembly if on macOS + if FileManager.default.fileExists(atPath: "/bin/cp") { + Logger.info("Optimizing sparse file representation for cached reassembly...") + let optimizedPath = outputURL.path + ".optimized" + + let process = Process() + process.executableURL = URL(fileURLWithPath: "/bin/cp") + process.arguments = ["-c", outputURL.path, optimizedPath] + + do { + try process.run() + process.waitUntilExit() + + if process.terminationStatus == 0 { + // Get size of optimized file + let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 + let originalUsage = getActualDiskUsage(path: outputURL.path) + let optimizedUsage = getActualDiskUsage(path: optimizedPath) + + Logger.info( + "Sparse optimization results for cache: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" + ) + + // Replace the original with the optimized version + try FileManager.default.removeItem(at: outputURL) + try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: outputURL) + Logger.info("Replaced cached reassembly with optimized sparse version") + } else { + Logger.info("Sparse optimization failed for cache, using original file") + try? FileManager.default.removeItem(atPath: optimizedPath) + } + } catch { + Logger.info("Error during sparse optimization for cache: \(error.localizedDescription)") + try? FileManager.default.removeItem(atPath: optimizedPath) + } + } } Logger.info("Cache copy complete") From c43a870794e16a91becd429765ee0f5d728ca4d7 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 14:44:38 -0700 Subject: [PATCH 11/28] Fix first pull --- .../ImageContainerRegistry.swift | 79 +++++++++++++++++-- 1 file changed, 74 insertions(+), 5 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 8d60cdf4..c42324ec 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1224,6 +1224,64 @@ class ImageContainerRegistry: @unchecked Sendable { // Move files to final location try FileManager.default.moveItem(at: tempVMDir, to: URL(fileURLWithPath: vmDir.dir.path)) + // Apply proper ownership and permissions to ensure VM can start + Logger.info("Setting proper file permissions and ownership...") + let chmodProcess = Process() + chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") + chmodProcess.arguments = ["-R", "u+rw", vmDir.dir.path] + try chmodProcess.run() + chmodProcess.waitUntilExit() + + // Ensure disk image has proper permissions + let diskImgPath = URL(fileURLWithPath: vmDir.dir.path).appendingPathComponent("disk.img").path + if FileManager.default.fileExists(atPath: diskImgPath) { + let diskChmodProcess = Process() + diskChmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") + diskChmodProcess.arguments = ["0644", diskImgPath] + try diskChmodProcess.run() + diskChmodProcess.waitUntilExit() + + Logger.info("Applied file permissions to disk image") + + // Ensure disk image is properly synchronized to disk + Logger.info("Ensuring disk image is properly synchronized to disk...") + let syncProcess = Process() + syncProcess.executableURL = URL(fileURLWithPath: "/usr/sbin/diskutil") + syncProcess.arguments = ["apfs", "resetFusionStats"] // This forces disk cache flush + try? syncProcess.run() + syncProcess.waitUntilExit() + + // Alternative sync method if needed + let syncProcess2 = Process() + syncProcess2.executableURL = URL(fileURLWithPath: "/bin/sync") + try? syncProcess2.run() + syncProcess2.waitUntilExit() + + Logger.info("Disk image sync complete") + + // Verify the disk image is readable + Logger.info("Verifying disk image integrity...") + let fileHandle = try? FileHandle(forReadingFrom: URL(fileURLWithPath: diskImgPath)) + if let handle = fileHandle { + // Try to read the first 512 bytes (boot sector) + if let data = try? handle.read(upToCount: 512), data.count == 512 { + Logger.info("Disk image verification: Successfully read first 512 bytes") + + // Check for boot signature (0x55AA at the end of the boot sector) + if data.count >= 512 && data[510] == 0x55 && data[511] == 0xAA { + Logger.info("Disk image verification: Boot signature valid (0x55AA)") + } else { + Logger.info("Disk image verification: No valid boot signature found") + } + } else { + Logger.error("Disk image verification: Failed to read first 512 bytes") + } + try? handle.close() + } else { + Logger.error("Disk image verification: Failed to open file for reading") + } + } + Logger.info("Download complete: Files extracted to \(vmDir.dir.path)") Logger.info( "Note: Actual disk usage is significantly lower than reported size due to macOS sparse file system" @@ -1406,16 +1464,27 @@ class ImageContainerRegistry: @unchecked Sendable { current: Double(currentOffset) / Double(sizeForTruncate), context: "Reassembling Cache") - try outputHandle.synchronize() // Optional: Synchronize after each chunk + try outputHandle.synchronize() // Explicitly synchronize after each chunk } // Finalize progress, close handle (done by defer) reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") - // Ensure output handle is closed before post-processing - // No need for explicit close here, defer handles it - // try outputHandle.close() - + // Add test patterns at the beginning and end of the file + Logger.info("Writing test patterns to sparse file to verify integrity...") + let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! + try outputHandle.seek(toOffset: 0) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.seek(toOffset: sizeForTruncate - UInt64(testPattern.count)) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.synchronize() + + // Ensure handle is properly synchronized before closing + try outputHandle.synchronize() + + // Close handle explicitly instead of relying on defer + try outputHandle.close() + // Verify final size let finalSize = (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] From 5972ebd4257038d1643a304180911da8247fcf12 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 14:56:44 -0700 Subject: [PATCH 12/28] Fix first pull --- .../ImageContainerRegistry.swift | 225 +++++++++++++----- 1 file changed, 167 insertions(+), 58 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index c42324ec..84959104 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1203,6 +1203,11 @@ class ImageContainerRegistry: @unchecked Sendable { } } + // Simulate cache pull behavior if this is a first pull + if !cachingEnabled || !validateCache(manifest: manifest, manifestId: manifestId) { + try simulateCachePull(tempVMDir: tempVMDir) + } + // Only move to final location once everything is complete if FileManager.default.fileExists(atPath: vmDir.dir.path) { try FileManager.default.removeItem(at: URL(fileURLWithPath: vmDir.dir.path)) @@ -1224,64 +1229,6 @@ class ImageContainerRegistry: @unchecked Sendable { // Move files to final location try FileManager.default.moveItem(at: tempVMDir, to: URL(fileURLWithPath: vmDir.dir.path)) - // Apply proper ownership and permissions to ensure VM can start - Logger.info("Setting proper file permissions and ownership...") - let chmodProcess = Process() - chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") - chmodProcess.arguments = ["-R", "u+rw", vmDir.dir.path] - try chmodProcess.run() - chmodProcess.waitUntilExit() - - // Ensure disk image has proper permissions - let diskImgPath = URL(fileURLWithPath: vmDir.dir.path).appendingPathComponent("disk.img").path - if FileManager.default.fileExists(atPath: diskImgPath) { - let diskChmodProcess = Process() - diskChmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") - diskChmodProcess.arguments = ["0644", diskImgPath] - try diskChmodProcess.run() - diskChmodProcess.waitUntilExit() - - Logger.info("Applied file permissions to disk image") - - // Ensure disk image is properly synchronized to disk - Logger.info("Ensuring disk image is properly synchronized to disk...") - let syncProcess = Process() - syncProcess.executableURL = URL(fileURLWithPath: "/usr/sbin/diskutil") - syncProcess.arguments = ["apfs", "resetFusionStats"] // This forces disk cache flush - try? syncProcess.run() - syncProcess.waitUntilExit() - - // Alternative sync method if needed - let syncProcess2 = Process() - syncProcess2.executableURL = URL(fileURLWithPath: "/bin/sync") - try? syncProcess2.run() - syncProcess2.waitUntilExit() - - Logger.info("Disk image sync complete") - - // Verify the disk image is readable - Logger.info("Verifying disk image integrity...") - let fileHandle = try? FileHandle(forReadingFrom: URL(fileURLWithPath: diskImgPath)) - if let handle = fileHandle { - // Try to read the first 512 bytes (boot sector) - if let data = try? handle.read(upToCount: 512), data.count == 512 { - Logger.info("Disk image verification: Successfully read first 512 bytes") - - // Check for boot signature (0x55AA at the end of the boot sector) - if data.count >= 512 && data[510] == 0x55 && data[511] == 0xAA { - Logger.info("Disk image verification: Boot signature valid (0x55AA)") - } else { - Logger.info("Disk image verification: No valid boot signature found") - } - } else { - Logger.error("Disk image verification: Failed to read first 512 bytes") - } - try? handle.close() - } else { - Logger.error("Disk image verification: Failed to open file for reading") - } - } - Logger.info("Download complete: Files extracted to \(vmDir.dir.path)") Logger.info( "Note: Actual disk usage is significantly lower than reported size due to macOS sparse file system" @@ -1543,6 +1490,168 @@ class ImageContainerRegistry: @unchecked Sendable { Logger.info("Cache copy complete") } + // Function to simulate cache pull behavior for freshly downloaded images + private func simulateCachePull(tempVMDir: URL) throws { + Logger.info("Simulating cache pull behavior for freshly downloaded image...") + + // 1. Find disk.img in tempVMDir + let diskImgPath = tempVMDir.appendingPathComponent("disk.img") + guard FileManager.default.fileExists(atPath: diskImgPath.path) else { + Logger.info("No disk.img found to simulate cache pull behavior") + return + } + + // 2. Create a temporary directory for the simulation + let simCacheDir = FileManager.default.temporaryDirectory.appendingPathComponent( + "lume_simcache_\(UUID().uuidString)") + try FileManager.default.createDirectory(at: simCacheDir, withIntermediateDirectories: true) + defer { + try? FileManager.default.removeItem(at: simCacheDir) + } + + // 3. Copy the disk.img to the simulation directory + let cachedDiskPath = simCacheDir.appendingPathComponent("cached_disk.img") + try FileManager.default.copyItem(at: diskImgPath, to: cachedDiskPath) + + // 4. Delete original disk.img (will be replaced by the simulated cache pull) + try FileManager.default.removeItem(at: diskImgPath) + + // 5. Get disk size which will be needed for the sparse file + var diskSize: UInt64 = 0 + if let attributes = try? FileManager.default.attributesOfItem(atPath: cachedDiskPath.path), + let size = attributes[.size] as? UInt64 { + diskSize = size + } else { + // If size can't be determined, read config.json + let configPath = tempVMDir.appendingPathComponent("config.json") + if let configDiskSize = getUncompressedSizeFromConfig(configPath: configPath) { + diskSize = configDiskSize + } else { + // Try to get from VM config + if FileManager.default.fileExists(atPath: configPath.path) { + do { + let configData = try Data(contentsOf: configPath) + if let vmConfig = try? JSONDecoder().decode(VMConfig.self, from: configData), + let size = vmConfig.diskSize { + diskSize = size + } + } catch { + Logger.error("Failed to read config for disk size: \(error)") + } + } + } + } + + // Fallback if no size could be determined + if diskSize == 0 { + diskSize = 10 * 1024 * 1024 * 1024 // 10GB default + Logger.error("Could not determine disk size, using default: \(diskSize) bytes") + } + + // 6. Create the sparse file with proper size + guard FileManager.default.createFile(atPath: diskImgPath.path, contents: nil) else { + throw PullError.fileCreationFailed(diskImgPath.path) + } + + let outputHandle = try FileHandle(forWritingTo: diskImgPath) + defer { try? outputHandle.close() } + + // Set the file size (creates sparse file) + try outputHandle.truncate(atOffset: diskSize) + Logger.info("Sparse file initialized for simulated cache pull with size: \(ByteCountFormatter.string(fromByteCount: Int64(diskSize), countStyle: .file))") + + // 7. Add test patterns at beginning and end + let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! + try outputHandle.seek(toOffset: 0) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.seek(toOffset: diskSize - UInt64(testPattern.count)) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.synchronize() + + // 8. Copy data from the cached file + let sourceHandle = try FileHandle(forReadingFrom: cachedDiskPath) + defer { try? sourceHandle.close() } + + // Copy in 50MB chunks to maintain sparse files + let chunkSize = 50 * 1024 * 1024 + var currentOffset: UInt64 = 0 + var progressLogger = ProgressLogger(threshold: 0.05) + + while currentOffset < diskSize { + try sourceHandle.seek(toOffset: currentOffset) + if let chunkData = try sourceHandle.read(upToCount: chunkSize) { + if chunkData.isEmpty { break } + + try outputHandle.seek(toOffset: currentOffset) + try outputHandle.write(contentsOf: chunkData) + currentOffset += UInt64(chunkData.count) + + progressLogger.logProgress( + current: Double(currentOffset) / Double(diskSize), + context: "Simulating Cache Pull" + ) + } else { + break + } + } + + try outputHandle.synchronize() + try outputHandle.close() // Close explicitly before optimizing + + // 9. Optimize the sparse file with cp -c (same as in copyFromCache) + if FileManager.default.fileExists(atPath: "/bin/cp") { + Logger.info("Optimizing sparse file representation for simulated cache pull...") + let optimizedPath = diskImgPath.path + ".optimized" + + let process = Process() + process.executableURL = URL(fileURLWithPath: "/bin/cp") + process.arguments = ["-c", diskImgPath.path, optimizedPath] + + do { + try process.run() + process.waitUntilExit() + + if process.terminationStatus == 0 { + // Get size of optimized file + let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 + let originalUsage = getActualDiskUsage(path: diskImgPath.path) + let optimizedUsage = getActualDiskUsage(path: optimizedPath) + + Logger.info( + "Sparse optimization results for simulated cache: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" + ) + + // Replace the original with the optimized version + try FileManager.default.removeItem(at: diskImgPath) + try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: diskImgPath) + Logger.info("Replaced with optimized sparse version for simulated cache") + } else { + Logger.info("Sparse optimization failed for simulated cache, using original file") + try? FileManager.default.removeItem(atPath: optimizedPath) + } + } catch { + Logger.info("Error during sparse optimization for simulated cache: \(error.localizedDescription)") + try? FileManager.default.removeItem(atPath: optimizedPath) + } + } + + // 10. Ensure disk image is properly synchronized to disk + Logger.info("Ensuring disk image is properly synchronized for simulated cache...") + let syncProcess = Process() + syncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") + try? syncProcess.run() + syncProcess.waitUntilExit() + + // Set proper permissions on the disk image + let chmodProcess = Process() + chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") + chmodProcess.arguments = ["0644", diskImgPath.path] + try chmodProcess.run() + chmodProcess.waitUntilExit() + + Logger.info("Simulated cache pull completed successfully") + } + private func getToken(repository: String) async throws -> String { let encodedRepo = repository.addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed) ?? repository // Request both pull and push scope for uploads From 134afc59ee745cf2ee2de1c8427bf476add0e64a Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 15:17:49 -0700 Subject: [PATCH 13/28] Fix first pull --- .../ImageContainerRegistry.swift | 132 ++++++------------ 1 file changed, 42 insertions(+), 90 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 84959104..25d4c34f 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1501,66 +1501,32 @@ class ImageContainerRegistry: @unchecked Sendable { return } - // 2. Create a temporary directory for the simulation - let simCacheDir = FileManager.default.temporaryDirectory.appendingPathComponent( - "lume_simcache_\(UUID().uuidString)") - try FileManager.default.createDirectory(at: simCacheDir, withIntermediateDirectories: true) - defer { - try? FileManager.default.removeItem(at: simCacheDir) + // Get the file size + let attributes = try FileManager.default.attributesOfItem(atPath: diskImgPath.path) + guard let diskSize = attributes[.size] as? UInt64, diskSize > 0 else { + Logger.error("Could not determine disk.img size for simulation") + return } - // 3. Copy the disk.img to the simulation directory - let cachedDiskPath = simCacheDir.appendingPathComponent("cached_disk.img") - try FileManager.default.copyItem(at: diskImgPath, to: cachedDiskPath) + // 2. Rename the original file to .original + let originalPath = tempVMDir.appendingPathComponent("disk.img.original") + try FileManager.default.moveItem(at: diskImgPath, to: originalPath) - // 4. Delete original disk.img (will be replaced by the simulated cache pull) - try FileManager.default.removeItem(at: diskImgPath) - - // 5. Get disk size which will be needed for the sparse file - var diskSize: UInt64 = 0 - if let attributes = try? FileManager.default.attributesOfItem(atPath: cachedDiskPath.path), - let size = attributes[.size] as? UInt64 { - diskSize = size - } else { - // If size can't be determined, read config.json - let configPath = tempVMDir.appendingPathComponent("config.json") - if let configDiskSize = getUncompressedSizeFromConfig(configPath: configPath) { - diskSize = configDiskSize - } else { - // Try to get from VM config - if FileManager.default.fileExists(atPath: configPath.path) { - do { - let configData = try Data(contentsOf: configPath) - if let vmConfig = try? JSONDecoder().decode(VMConfig.self, from: configData), - let size = vmConfig.diskSize { - diskSize = size - } - } catch { - Logger.error("Failed to read config for disk size: \(error)") - } - } - } - } - - // Fallback if no size could be determined - if diskSize == 0 { - diskSize = 10 * 1024 * 1024 * 1024 // 10GB default - Logger.error("Could not determine disk size, using default: \(diskSize) bytes") - } - - // 6. Create the sparse file with proper size + // 3. Create a new empty file with the same name guard FileManager.default.createFile(atPath: diskImgPath.path, contents: nil) else { + // If creation fails, restore the original + try? FileManager.default.moveItem(at: originalPath, to: diskImgPath) throw PullError.fileCreationFailed(diskImgPath.path) } + // 4. Open a file handle for writing to the new file let outputHandle = try FileHandle(forWritingTo: diskImgPath) - defer { try? outputHandle.close() } - // Set the file size (creates sparse file) + // 5. Set the total size (creates a sparse file) try outputHandle.truncate(atOffset: diskSize) - Logger.info("Sparse file initialized for simulated cache pull with size: \(ByteCountFormatter.string(fromByteCount: Int64(diskSize), countStyle: .file))") + Logger.info("Created sparse file with size: \(ByteCountFormatter.string(fromByteCount: Int64(diskSize), countStyle: .file))") - // 7. Add test patterns at beginning and end + // 6. Add test patterns at beginning and end (same as in copyFromCache) let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! try outputHandle.seek(toOffset: 0) try outputHandle.write(contentsOf: testPattern) @@ -1568,37 +1534,22 @@ class ImageContainerRegistry: @unchecked Sendable { try outputHandle.write(contentsOf: testPattern) try outputHandle.synchronize() - // 8. Copy data from the cached file - let sourceHandle = try FileHandle(forReadingFrom: cachedDiskPath) - defer { try? sourceHandle.close() } + Logger.info("Test patterns written, starting decompression simulation...") - // Copy in 50MB chunks to maintain sparse files - let chunkSize = 50 * 1024 * 1024 - var currentOffset: UInt64 = 0 - var progressLogger = ProgressLogger(threshold: 0.05) - - while currentOffset < diskSize { - try sourceHandle.seek(toOffset: currentOffset) - if let chunkData = try sourceHandle.read(upToCount: chunkSize) { - if chunkData.isEmpty { break } - - try outputHandle.seek(toOffset: currentOffset) - try outputHandle.write(contentsOf: chunkData) - currentOffset += UInt64(chunkData.count) - - progressLogger.logProgress( - current: Double(currentOffset) / Double(diskSize), - context: "Simulating Cache Pull" - ) - } else { - break - } - } + // 7. Use decompressChunkAndWriteSparse - the EXACT same function used by copyFromCache + let bytesWritten = try decompressChunkAndWriteSparse( + inputPath: originalPath.path, + outputHandle: outputHandle, + startOffset: 0 + ) + // 8. Make sure the file handle is properly synchronized before closing try outputHandle.synchronize() - try outputHandle.close() // Close explicitly before optimizing + try outputHandle.close() - // 9. Optimize the sparse file with cp -c (same as in copyFromCache) + Logger.info("Decompressed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) using the same method as cache pull") + + // 9. Use the same sparse file optimization as copyFromCache if FileManager.default.fileExists(atPath: "/bin/cp") { Logger.info("Optimizing sparse file representation for simulated cache pull...") let optimizedPath = diskImgPath.path + ".optimized" @@ -1612,43 +1563,44 @@ class ImageContainerRegistry: @unchecked Sendable { process.waitUntilExit() if process.terminationStatus == 0 { - // Get size of optimized file let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 let originalUsage = getActualDiskUsage(path: diskImgPath.path) let optimizedUsage = getActualDiskUsage(path: optimizedPath) Logger.info( - "Sparse optimization results for simulated cache: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" + "Sparse optimization results: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" ) - // Replace the original with the optimized version + // Replace original with optimized try FileManager.default.removeItem(at: diskImgPath) try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: diskImgPath) - Logger.info("Replaced with optimized sparse version for simulated cache") + Logger.info("Replaced with optimized sparse version") } else { - Logger.info("Sparse optimization failed for simulated cache, using original file") + Logger.info("Sparse optimization failed, using original file") try? FileManager.default.removeItem(atPath: optimizedPath) } } catch { - Logger.info("Error during sparse optimization for simulated cache: \(error.localizedDescription)") + Logger.info("Error during sparse optimization: \(error.localizedDescription)") try? FileManager.default.removeItem(atPath: optimizedPath) } } - // 10. Ensure disk image is properly synchronized to disk - Logger.info("Ensuring disk image is properly synchronized for simulated cache...") - let syncProcess = Process() - syncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") - try? syncProcess.run() - syncProcess.waitUntilExit() - - // Set proper permissions on the disk image + // 10. Set permissions and do final sync let chmodProcess = Process() chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") chmodProcess.arguments = ["0644", diskImgPath.path] try chmodProcess.run() chmodProcess.waitUntilExit() + // Final sync + let syncProcess = Process() + syncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") + try? syncProcess.run() + syncProcess.waitUntilExit() + + // 11. Clean up + try? FileManager.default.removeItem(at: originalPath) + Logger.info("Simulated cache pull completed successfully") } From e017a7c641d2b0b8802930f5015bf153446db3a0 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 15:34:17 -0700 Subject: [PATCH 14/28] Fix first pull --- .../ImageContainerRegistry.swift | 51 +++++++++---------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 25d4c34f..057e6bed 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1501,32 +1501,32 @@ class ImageContainerRegistry: @unchecked Sendable { return } - // Get the file size + // 2. Get disk size and other attributes let attributes = try FileManager.default.attributesOfItem(atPath: diskImgPath.path) guard let diskSize = attributes[.size] as? UInt64, diskSize > 0 else { Logger.error("Could not determine disk.img size for simulation") return } - // 2. Rename the original file to .original - let originalPath = tempVMDir.appendingPathComponent("disk.img.original") - try FileManager.default.moveItem(at: diskImgPath, to: originalPath) + // 3. Create backup of original + let backupPath = tempVMDir.appendingPathComponent("disk.img.original") + try FileManager.default.moveItem(at: diskImgPath, to: backupPath) - // 3. Create a new empty file with the same name + Logger.info("Creating sparse file with size: \(ByteCountFormatter.string(fromByteCount: Int64(diskSize), countStyle: .file))") + + // 4. Create empty sparse file guard FileManager.default.createFile(atPath: diskImgPath.path, contents: nil) else { // If creation fails, restore the original - try? FileManager.default.moveItem(at: originalPath, to: diskImgPath) + try? FileManager.default.moveItem(at: backupPath, to: diskImgPath) throw PullError.fileCreationFailed(diskImgPath.path) } - // 4. Open a file handle for writing to the new file + // 5. Open the file and truncate to desired size (creates sparse file) let outputHandle = try FileHandle(forWritingTo: diskImgPath) - - // 5. Set the total size (creates a sparse file) try outputHandle.truncate(atOffset: diskSize) - Logger.info("Created sparse file with size: \(ByteCountFormatter.string(fromByteCount: Int64(diskSize), countStyle: .file))") - // 6. Add test patterns at beginning and end (same as in copyFromCache) + // 6. Add test patterns at beginning and end exactly as in copyFromCache + Logger.info("Writing test patterns to verify writability...") let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! try outputHandle.seek(toOffset: 0) try outputHandle.write(contentsOf: testPattern) @@ -1534,24 +1534,24 @@ class ImageContainerRegistry: @unchecked Sendable { try outputHandle.write(contentsOf: testPattern) try outputHandle.synchronize() - Logger.info("Test patterns written, starting decompression simulation...") + // 7. Now decompress the original disk image exactly as we would with cache parts + Logger.info("Processing disk image using the same mechanism as cache pull...") - // 7. Use decompressChunkAndWriteSparse - the EXACT same function used by copyFromCache let bytesWritten = try decompressChunkAndWriteSparse( - inputPath: originalPath.path, + inputPath: backupPath.path, outputHandle: outputHandle, startOffset: 0 ) - // 8. Make sure the file handle is properly synchronized before closing + Logger.info("Processed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) of disk image data") + + // 8. Ensure all data is written to disk try outputHandle.synchronize() try outputHandle.close() - Logger.info("Decompressed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) using the same method as cache pull") - - // 9. Use the same sparse file optimization as copyFromCache + // 9. Run sparse file optimization with cp -c exactly as in the cache pull process if FileManager.default.fileExists(atPath: "/bin/cp") { - Logger.info("Optimizing sparse file representation for simulated cache pull...") + Logger.info("Optimizing sparse file representation...") let optimizedPath = diskImgPath.path + ".optimized" let process = Process() @@ -1571,7 +1571,6 @@ class ImageContainerRegistry: @unchecked Sendable { "Sparse optimization results: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" ) - // Replace original with optimized try FileManager.default.removeItem(at: diskImgPath) try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: diskImgPath) Logger.info("Replaced with optimized sparse version") @@ -1585,23 +1584,23 @@ class ImageContainerRegistry: @unchecked Sendable { } } - // 10. Set permissions and do final sync + // 10. Ensure file has correct permissions let chmodProcess = Process() chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") chmodProcess.arguments = ["0644", diskImgPath.path] try chmodProcess.run() chmodProcess.waitUntilExit() - // Final sync + // 11. Final sync to ensure all data is on disk let syncProcess = Process() syncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") - try? syncProcess.run() + try syncProcess.run() syncProcess.waitUntilExit() - // 11. Clean up - try? FileManager.default.removeItem(at: originalPath) + // 12. Clean up the backup file + try FileManager.default.removeItem(at: backupPath) - Logger.info("Simulated cache pull completed successfully") + Logger.info("Simulation of cache pull behavior completed") } private func getToken(repository: String) async throws -> String { From a3895d424dda04e26ec5e938acc934e7636d9ac3 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 15:47:42 -0700 Subject: [PATCH 15/28] Fix first pull --- .../ImageContainerRegistry.swift | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 057e6bed..7022858b 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1501,55 +1501,53 @@ class ImageContainerRegistry: @unchecked Sendable { return } - // 2. Get disk size and other attributes + // 2. Get file size and other attributes let attributes = try FileManager.default.attributesOfItem(atPath: diskImgPath.path) guard let diskSize = attributes[.size] as? UInt64, diskSize > 0 else { Logger.error("Could not determine disk.img size for simulation") return } - // 3. Create backup of original + // 3. Rename the original file to backup let backupPath = tempVMDir.appendingPathComponent("disk.img.original") try FileManager.default.moveItem(at: diskImgPath, to: backupPath) Logger.info("Creating sparse file with size: \(ByteCountFormatter.string(fromByteCount: Int64(diskSize), countStyle: .file))") - // 4. Create empty sparse file + // 4. Create a new empty file guard FileManager.default.createFile(atPath: diskImgPath.path, contents: nil) else { // If creation fails, restore the original try? FileManager.default.moveItem(at: backupPath, to: diskImgPath) throw PullError.fileCreationFailed(diskImgPath.path) } - // 5. Open the file and truncate to desired size (creates sparse file) + // 5. Open the file handle and set size (creates sparse file) let outputHandle = try FileHandle(forWritingTo: diskImgPath) try outputHandle.truncate(atOffset: diskSize) - // 6. Add test patterns at beginning and end exactly as in copyFromCache + // 6. Add test patterns at beginning and end Logger.info("Writing test patterns to verify writability...") let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! try outputHandle.seek(toOffset: 0) try outputHandle.write(contentsOf: testPattern) try outputHandle.seek(toOffset: diskSize - UInt64(testPattern.count)) try outputHandle.write(contentsOf: testPattern) - try outputHandle.synchronize() - - // 7. Now decompress the original disk image exactly as we would with cache parts - Logger.info("Processing disk image using the same mechanism as cache pull...") + // 7. Decompress the original disk image at offset 0 + Logger.info("Decompressing original disk image at offset 0...") let bytesWritten = try decompressChunkAndWriteSparse( inputPath: backupPath.path, outputHandle: outputHandle, startOffset: 0 ) - Logger.info("Processed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) of disk image data") + Logger.info("Decompressed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) of disk image data") // 8. Ensure all data is written to disk try outputHandle.synchronize() try outputHandle.close() - // 9. Run sparse file optimization with cp -c exactly as in the cache pull process + // 9. Optimize sparse representation with cp -c if FileManager.default.fileExists(atPath: "/bin/cp") { Logger.info("Optimizing sparse file representation...") let optimizedPath = diskImgPath.path + ".optimized" @@ -1584,7 +1582,7 @@ class ImageContainerRegistry: @unchecked Sendable { } } - // 10. Ensure file has correct permissions + // 10. Set permissions to match cache hit (0644) let chmodProcess = Process() chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") chmodProcess.arguments = ["0644", diskImgPath.path] @@ -1600,7 +1598,7 @@ class ImageContainerRegistry: @unchecked Sendable { // 12. Clean up the backup file try FileManager.default.removeItem(at: backupPath) - Logger.info("Simulation of cache pull behavior completed") + Logger.info("Cache pull simulation completed successfully") } private func getToken(repository: String) async throws -> String { From 62f90afb2c795a8acf7348247b4ae617ab6c5d68 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 16:01:33 -0700 Subject: [PATCH 16/28] Fix first pull --- .../ImageContainerRegistry.swift | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 7022858b..dc1b32ef 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1501,40 +1501,39 @@ class ImageContainerRegistry: @unchecked Sendable { return } - // 2. Get file size and other attributes + // 2. Get file attributes and size let attributes = try FileManager.default.attributesOfItem(atPath: diskImgPath.path) guard let diskSize = attributes[.size] as? UInt64, diskSize > 0 else { Logger.error("Could not determine disk.img size for simulation") return } - // 3. Rename the original file to backup + Logger.info("Creating sparse file with size: \(ByteCountFormatter.string(fromByteCount: Int64(diskSize), countStyle: .file))") + + // 3. Create backup of original file let backupPath = tempVMDir.appendingPathComponent("disk.img.original") try FileManager.default.moveItem(at: diskImgPath, to: backupPath) - Logger.info("Creating sparse file with size: \(ByteCountFormatter.string(fromByteCount: Int64(diskSize), countStyle: .file))") - - // 4. Create a new empty file + // 4. Create empty file and prepare for sparse file creation guard FileManager.default.createFile(atPath: diskImgPath.path, contents: nil) else { // If creation fails, restore the original try? FileManager.default.moveItem(at: backupPath, to: diskImgPath) throw PullError.fileCreationFailed(diskImgPath.path) } - // 5. Open the file handle and set size (creates sparse file) + // 5. Set up file handle and create sparse file let outputHandle = try FileHandle(forWritingTo: diskImgPath) try outputHandle.truncate(atOffset: diskSize) - // 6. Add test patterns at beginning and end - Logger.info("Writing test patterns to verify writability...") + // 6. Write test patterns at beginning and end let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! try outputHandle.seek(toOffset: 0) try outputHandle.write(contentsOf: testPattern) try outputHandle.seek(toOffset: diskSize - UInt64(testPattern.count)) try outputHandle.write(contentsOf: testPattern) - // 7. Decompress the original disk image at offset 0 - Logger.info("Decompressing original disk image at offset 0...") + // 7. Decompress the original data at offset 0 + Logger.info("Decompressing original disk image with same mechanism as cache pull...") let bytesWritten = try decompressChunkAndWriteSparse( inputPath: backupPath.path, outputHandle: outputHandle, @@ -1547,7 +1546,7 @@ class ImageContainerRegistry: @unchecked Sendable { try outputHandle.synchronize() try outputHandle.close() - // 9. Optimize sparse representation with cp -c + // 9. Optimize sparse file with cp -c (exactly matching cache pull process) if FileManager.default.fileExists(atPath: "/bin/cp") { Logger.info("Optimizing sparse file representation...") let optimizedPath = diskImgPath.path + ".optimized" @@ -1582,14 +1581,14 @@ class ImageContainerRegistry: @unchecked Sendable { } } - // 10. Set permissions to match cache hit (0644) + // 10. Explicitly set permissions to match cache hit (0644) let chmodProcess = Process() chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") chmodProcess.arguments = ["0644", diskImgPath.path] try chmodProcess.run() chmodProcess.waitUntilExit() - // 11. Final sync to ensure all data is on disk + // 11. Final sync to ensure all data is flushed to disk let syncProcess = Process() syncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") try syncProcess.run() From c321c8affc28588fbb5833ef54e3058e4cacc277 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 16:20:10 -0700 Subject: [PATCH 17/28] Fix first pull --- .../ContainerRegistry/ImageContainerRegistry.swift | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index dc1b32ef..63dde180 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1526,11 +1526,13 @@ class ImageContainerRegistry: @unchecked Sendable { try outputHandle.truncate(atOffset: diskSize) // 6. Write test patterns at beginning and end + Logger.info("Writing test patterns to verify writability...") let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! try outputHandle.seek(toOffset: 0) try outputHandle.write(contentsOf: testPattern) try outputHandle.seek(toOffset: diskSize - UInt64(testPattern.count)) try outputHandle.write(contentsOf: testPattern) + try outputHandle.synchronize() // 7. Decompress the original data at offset 0 Logger.info("Decompressing original disk image with same mechanism as cache pull...") @@ -1542,8 +1544,10 @@ class ImageContainerRegistry: @unchecked Sendable { Logger.info("Decompressed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) of disk image data") - // 8. Ensure all data is written to disk + // 8. Ensure all data is written to disk with an explicit sync try outputHandle.synchronize() + + // Very important: close the handle before optimization try outputHandle.close() // 9. Optimize sparse file with cp -c (exactly matching cache pull process) @@ -1568,6 +1572,13 @@ class ImageContainerRegistry: @unchecked Sendable { "Sparse optimization results: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" ) + // Before replacing the file, make sure to synchronize the filesystem + let syncBeforeReplace = Process() + syncBeforeReplace.executableURL = URL(fileURLWithPath: "/bin/sync") + try syncBeforeReplace.run() + syncBeforeReplace.waitUntilExit() + + // Now replace the original with the optimized version try FileManager.default.removeItem(at: diskImgPath) try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: diskImgPath) Logger.info("Replaced with optimized sparse version") From ae7a8f526ec2f2a5d8856c7d9ffeeb12e0149968 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 16:35:56 -0700 Subject: [PATCH 18/28] Fix first pull --- .../ImageContainerRegistry.swift | 67 +++++++++++-------- 1 file changed, 40 insertions(+), 27 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 63dde180..5db9b411 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1521,40 +1521,53 @@ class ImageContainerRegistry: @unchecked Sendable { throw PullError.fileCreationFailed(diskImgPath.path) } - // 5. Set up file handle and create sparse file - let outputHandle = try FileHandle(forWritingTo: diskImgPath) - try outputHandle.truncate(atOffset: diskSize) + // IMPORTANT: Use autoreleasepool to ensure file handle is released promptly + try autoreleasepool { + // 5. Set up file handle and create sparse file + let outputHandle = try FileHandle(forWritingTo: diskImgPath) + try outputHandle.truncate(atOffset: diskSize) + + // 6. Write test patterns at beginning and end + Logger.info("Writing test patterns to verify writability...") + let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! + try outputHandle.seek(toOffset: 0) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.seek(toOffset: diskSize - UInt64(testPattern.count)) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.synchronize() + + // 7. Decompress the original data at offset 0 + Logger.info("Decompressing original disk image with same mechanism as cache pull...") + let bytesWritten = try decompressChunkAndWriteSparse( + inputPath: backupPath.path, + outputHandle: outputHandle, + startOffset: 0 + ) + + Logger.info("Decompressed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) of disk image data") + + // 8. Ensure all data is written to disk with an explicit sync + try outputHandle.synchronize() + + // Very important: explicitly close the handle here inside the autorelease pool + try outputHandle.close() + Logger.info("File handle explicitly closed after decompression and synchronization") + } - // 6. Write test patterns at beginning and end - Logger.info("Writing test patterns to verify writability...") - let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! - try outputHandle.seek(toOffset: 0) - try outputHandle.write(contentsOf: testPattern) - try outputHandle.seek(toOffset: diskSize - UInt64(testPattern.count)) - try outputHandle.write(contentsOf: testPattern) - try outputHandle.synchronize() - - // 7. Decompress the original data at offset 0 - Logger.info("Decompressing original disk image with same mechanism as cache pull...") - let bytesWritten = try decompressChunkAndWriteSparse( - inputPath: backupPath.path, - outputHandle: outputHandle, - startOffset: 0 - ) - - Logger.info("Decompressed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) of disk image data") - - // 8. Ensure all data is written to disk with an explicit sync - try outputHandle.synchronize() - - // Very important: close the handle before optimization - try outputHandle.close() + // Wait a moment for file system operations to complete + Thread.sleep(forTimeInterval: 0.5) // 9. Optimize sparse file with cp -c (exactly matching cache pull process) if FileManager.default.fileExists(atPath: "/bin/cp") { Logger.info("Optimizing sparse file representation...") let optimizedPath = diskImgPath.path + ".optimized" + // Run a sync before optimization + let syncBeforeOptimize = Process() + syncBeforeOptimize.executableURL = URL(fileURLWithPath: "/bin/sync") + try syncBeforeOptimize.run() + syncBeforeOptimize.waitUntilExit() + let process = Process() process.executableURL = URL(fileURLWithPath: "/bin/cp") process.arguments = ["-c", diskImgPath.path, optimizedPath] From 661556f3d110d110871bb5193c95acd1117db76e Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 16:52:08 -0700 Subject: [PATCH 19/28] Fix first pull --- .../ImageContainerRegistry.swift | 43 +++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 5db9b411..9d748aab 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1521,6 +1521,12 @@ class ImageContainerRegistry: @unchecked Sendable { throw PullError.fileCreationFailed(diskImgPath.path) } + // Run an initial filesystem sync + let initialSyncProcess = Process() + initialSyncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") + try initialSyncProcess.run() + initialSyncProcess.waitUntilExit() + // IMPORTANT: Use autoreleasepool to ensure file handle is released promptly try autoreleasepool { // 5. Set up file handle and create sparse file @@ -1534,6 +1540,8 @@ class ImageContainerRegistry: @unchecked Sendable { try outputHandle.write(contentsOf: testPattern) try outputHandle.seek(toOffset: diskSize - UInt64(testPattern.count)) try outputHandle.write(contentsOf: testPattern) + + // Make sure test patterns are synced to disk first try outputHandle.synchronize() // 7. Decompress the original data at offset 0 @@ -1546,16 +1554,30 @@ class ImageContainerRegistry: @unchecked Sendable { Logger.info("Decompressed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) of disk image data") - // 8. Ensure all data is written to disk with an explicit sync + // 8. Ensure all data is written to disk with multiple explicit syncs try outputHandle.synchronize() + // Force an fsync using lower-level API for the file descriptor + let fd = outputHandle.fileDescriptor + if fd >= 0 { + fsync(fd) + Logger.info("Performed low-level fsync on file descriptor") + } + // Very important: explicitly close the handle here inside the autorelease pool try outputHandle.close() Logger.info("File handle explicitly closed after decompression and synchronization") } - // Wait a moment for file system operations to complete - Thread.sleep(forTimeInterval: 0.5) + // Perform an explicit filesystem sync after closing the file handle + let postCloseSyncProcess = Process() + postCloseSyncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") + try postCloseSyncProcess.run() + postCloseSyncProcess.waitUntilExit() + + // Wait longer to ensure all filesystem operations are complete + Logger.info("Waiting for filesystem operations to complete...") + Thread.sleep(forTimeInterval: 1.0) // 9. Optimize sparse file with cp -c (exactly matching cache pull process) if FileManager.default.fileExists(atPath: "/bin/cp") { @@ -1595,6 +1617,12 @@ class ImageContainerRegistry: @unchecked Sendable { try FileManager.default.removeItem(at: diskImgPath) try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: diskImgPath) Logger.info("Replaced with optimized sparse version") + + // Additional sync after replacement + let syncAfterReplace = Process() + syncAfterReplace.executableURL = URL(fileURLWithPath: "/bin/sync") + try syncAfterReplace.run() + syncAfterReplace.waitUntilExit() } else { Logger.info("Sparse optimization failed, using original file") try? FileManager.default.removeItem(atPath: optimizedPath) @@ -1618,6 +1646,15 @@ class ImageContainerRegistry: @unchecked Sendable { try syncProcess.run() syncProcess.waitUntilExit() + // One more filesystem sync for good measure + let finalSyncProcess = Process() + finalSyncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") + try finalSyncProcess.run() + finalSyncProcess.waitUntilExit() + + // Wait a moment for final filesystem operations + Thread.sleep(forTimeInterval: 0.5) + // 12. Clean up the backup file try FileManager.default.removeItem(at: backupPath) From b47201f1ee90a7ad6e185e04fa1cb08d0d3bec1b Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 17:44:59 -0700 Subject: [PATCH 20/28] Fix --- .../ImageContainerRegistry.swift | 573 +++++++++--------- 1 file changed, 271 insertions(+), 302 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 9d748aab..d075f445 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1238,6 +1238,137 @@ class ImageContainerRegistry: @unchecked Sendable { ) } + // Shared function to handle disk image creation - can be used by both cache hit and cache miss paths + private func createDiskImageFromSource( + sourceURL: URL, // Source data to decompress + destinationURL: URL, // Where to create the disk image + diskSize: UInt64 // Total size for the sparse file + ) throws { + Logger.info("Creating sparse disk image...") + + // Create empty destination file + if FileManager.default.fileExists(atPath: destinationURL.path) { + try FileManager.default.removeItem(at: destinationURL) + } + guard FileManager.default.createFile(atPath: destinationURL.path, contents: nil) else { + throw PullError.fileCreationFailed(destinationURL.path) + } + + // Create sparse file + let outputHandle = try FileHandle(forWritingTo: destinationURL) + try outputHandle.truncate(atOffset: diskSize) + + // Write test patterns at beginning and end + Logger.info("Writing test patterns to verify writability...") + let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! + try outputHandle.seek(toOffset: 0) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.seek(toOffset: diskSize - UInt64(testPattern.count)) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.synchronize() + + // Decompress the source data at offset 0 + Logger.info("Decompressing source data...") + let bytesWritten = try decompressChunkAndWriteSparse( + inputPath: sourceURL.path, + outputHandle: outputHandle, + startOffset: 0 + ) + Logger.info("Decompressed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) of data") + + // Ensure data is written and close handle + try outputHandle.synchronize() + try outputHandle.close() + + // Run sync to flush filesystem + let syncProcess = Process() + syncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") + try syncProcess.run() + syncProcess.waitUntilExit() + + // Optimize with cp -c + if FileManager.default.fileExists(atPath: "/bin/cp") { + Logger.info("Optimizing sparse file representation...") + let optimizedPath = destinationURL.path + ".optimized" + + let process = Process() + process.executableURL = URL(fileURLWithPath: "/bin/cp") + process.arguments = ["-c", destinationURL.path, optimizedPath] + + try process.run() + process.waitUntilExit() + + if process.terminationStatus == 0 { + // Get optimization results + let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 + let originalUsage = getActualDiskUsage(path: destinationURL.path) + let optimizedUsage = getActualDiskUsage(path: optimizedPath) + + Logger.info( + "Sparse optimization results: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" + ) + + // Replace original with optimized + try FileManager.default.removeItem(at: destinationURL) + try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: destinationURL) + Logger.info("Replaced with optimized sparse version") + } else { + Logger.info("Sparse optimization failed, using original file") + try? FileManager.default.removeItem(atPath: optimizedPath) + } + } + + // Set permissions to 0644 + let chmodProcess = Process() + chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") + chmodProcess.arguments = ["0644", destinationURL.path] + try chmodProcess.run() + chmodProcess.waitUntilExit() + + // Final sync + let finalSyncProcess = Process() + finalSyncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") + try finalSyncProcess.run() + finalSyncProcess.waitUntilExit() + } + + // Function to simulate cache pull behavior for freshly downloaded images + private func simulateCachePull(tempVMDir: URL) throws { + Logger.info("Simulating cache pull behavior for freshly downloaded image...") + + // Find disk.img in tempVMDir + let diskImgPath = tempVMDir.appendingPathComponent("disk.img") + guard FileManager.default.fileExists(atPath: diskImgPath.path) else { + Logger.info("No disk.img found to simulate cache pull behavior") + return + } + + // Get file attributes and size + let attributes = try FileManager.default.attributesOfItem(atPath: diskImgPath.path) + guard let diskSize = attributes[.size] as? UInt64, diskSize > 0 else { + Logger.error("Could not determine disk.img size for simulation") + return + } + + Logger.info("Creating sparse file with size: \(ByteCountFormatter.string(fromByteCount: Int64(diskSize), countStyle: .file))") + + // Create backup of original file + let backupPath = tempVMDir.appendingPathComponent("disk.img.original") + try FileManager.default.moveItem(at: diskImgPath, to: backupPath) + + // Use shared function to create the disk image + try createDiskImageFromSource( + sourceURL: backupPath, + destinationURL: diskImgPath, + diskSize: diskSize + ) + + // Clean up backup + try FileManager.default.removeItem(at: backupPath) + + Logger.info("Cache pull simulation completed successfully") + } + private func copyFromCache(manifest: Manifest, manifestId: String, to destination: URL) async throws { @@ -1249,8 +1380,6 @@ class ImageContainerRegistry: @unchecked Sendable { // Instantiate collector let diskPartsCollector = DiskPartsCollector() - // Remove totalDiskParts - // var totalDiskParts: Int? = nil var lz4LayerCount = 0 // Count lz4 layers found // First identify disk parts and non-disk files @@ -1293,12 +1422,6 @@ class ImageContainerRegistry: @unchecked Sendable { let diskPartSources = await diskPartsCollector.getSortedParts() // Sorted by assigned sequential number let totalParts = await diskPartsCollector.getTotalParts() // Get total count from collector - // Remove old guard check - /* - guard let totalParts = totalDiskParts else { - Logger.info("No cached layers with valid part information found. Assuming single-part image or non-lz4 parts.") - } - */ Logger.info("Found \(totalParts) lz4 disk parts in cache to reassemble.") // --- End retrieving parts --- @@ -1355,312 +1478,158 @@ class ImageContainerRegistry: @unchecked Sendable { throw PullError.reassemblySetupFailed(path: outputURL.path, underlyingError: nilError ?? NoSpecificUnderlyingError()) } - // Wrap file handle setup and sparse file creation within this block - let outputHandle: FileHandle - do { - // Ensure parent directory exists - try FileManager.default.createDirectory(at: outputURL.deletingLastPathComponent(), withIntermediateDirectories: true) - // Explicitly create the file first, removing old one if needed - if FileManager.default.fileExists(atPath: outputURL.path) { - try FileManager.default.removeItem(at: outputURL) - } - guard FileManager.default.createFile(atPath: outputURL.path, contents: nil) else { - throw PullError.fileCreationFailed(outputURL.path) - } - // Open handle for writing - outputHandle = try FileHandle(forWritingTo: outputURL) - // Set the file size (creates sparse file) - try outputHandle.truncate(atOffset: sizeForTruncate) - Logger.info("Sparse file initialized for cache reassembly with size: \(ByteCountFormatter.string(fromByteCount: Int64(sizeForTruncate), countStyle: .file))") - } catch { - Logger.error("Failed during setup for cached disk image reassembly: \(error.localizedDescription)", metadata: ["path": outputURL.path]) - throw PullError.reassemblySetupFailed(path: outputURL.path, underlyingError: error) - } - - // Ensure handle is closed when exiting this scope - defer { try? outputHandle.close() } - - // ... (Get uncompressed size etc.) ... - - var reassemblyProgressLogger = ProgressLogger(threshold: 0.05) - var currentOffset: UInt64 = 0 - - // Iterate from 1 up to the total number of parts found by the collector - for collectorPartNum in 1...totalParts { - // Find the source URL from our collected parts using the sequential collectorPartNum - guard let sourceInfo = diskPartSources.first(where: { $0.0 == collectorPartNum }) else { - Logger.error("Missing required cached part number \(collectorPartNum) in collected parts during reassembly.") - throw PullError.missingPart(collectorPartNum) - } - let sourceURL = sourceInfo.1 // Get URL from tuple - - // Log using the sequential collector part number - Logger.info( - "Decompressing part \(collectorPartNum) of \(totalParts) from cache: \(sourceURL.lastPathComponent) at offset \(currentOffset)..." + // If we have just one disk part, use the shared function + if totalParts == 1 { + // Single part - use shared function + let sourceURL = diskPartSources[0].1 // Get the first source URL (index 1 of the tuple) + try createDiskImageFromSource( + sourceURL: sourceURL, + destinationURL: outputURL, + diskSize: sizeForTruncate ) - - // Always use the correct sparse decompression function - let decompressedBytesWritten = try decompressChunkAndWriteSparse( - inputPath: sourceURL.path, - outputHandle: outputHandle, - startOffset: currentOffset - ) - currentOffset += decompressedBytesWritten - // Update progress (using sizeForTruncate which should be available) - reassemblyProgressLogger.logProgress( - current: Double(currentOffset) / Double(sizeForTruncate), - context: "Reassembling Cache") - - try outputHandle.synchronize() // Explicitly synchronize after each chunk - } - - // Finalize progress, close handle (done by defer) - reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") - - // Add test patterns at the beginning and end of the file - Logger.info("Writing test patterns to sparse file to verify integrity...") - let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! - try outputHandle.seek(toOffset: 0) - try outputHandle.write(contentsOf: testPattern) - try outputHandle.seek(toOffset: sizeForTruncate - UInt64(testPattern.count)) - try outputHandle.write(contentsOf: testPattern) - try outputHandle.synchronize() - - // Ensure handle is properly synchronized before closing - try outputHandle.synchronize() - - // Close handle explicitly instead of relying on defer - try outputHandle.close() - - // Verify final size - let finalSize = - (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] - as? UInt64) ?? 0 - Logger.info( - "Final disk image size from cache (before sparse file optimization): \(ByteCountFormatter.string(fromByteCount: Int64(finalSize), countStyle: .file))" - ) - - // Use the calculated sizeForTruncate for comparison - if finalSize != sizeForTruncate { - Logger.info( - "Warning: Final reported size (\(finalSize) bytes) differs from expected size (\(sizeForTruncate) bytes), but this doesn't affect functionality" - ) - } - - Logger.info("Disk image reassembly completed") - - // Optimize sparseness for cached reassembly if on macOS - if FileManager.default.fileExists(atPath: "/bin/cp") { - Logger.info("Optimizing sparse file representation for cached reassembly...") - let optimizedPath = outputURL.path + ".optimized" - - let process = Process() - process.executableURL = URL(fileURLWithPath: "/bin/cp") - process.arguments = ["-c", outputURL.path, optimizedPath] - + } else { + // Multiple parts - we need to reassemble + // Wrap file handle setup and sparse file creation within this block + let outputHandle: FileHandle do { - try process.run() - process.waitUntilExit() - - if process.terminationStatus == 0 { - // Get size of optimized file - let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 - let originalUsage = getActualDiskUsage(path: outputURL.path) - let optimizedUsage = getActualDiskUsage(path: optimizedPath) - - Logger.info( - "Sparse optimization results for cache: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" - ) - - // Replace the original with the optimized version + // Ensure parent directory exists + try FileManager.default.createDirectory(at: outputURL.deletingLastPathComponent(), withIntermediateDirectories: true) + // Explicitly create the file first, removing old one if needed + if FileManager.default.fileExists(atPath: outputURL.path) { try FileManager.default.removeItem(at: outputURL) - try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: outputURL) - Logger.info("Replaced cached reassembly with optimized sparse version") - } else { - Logger.info("Sparse optimization failed for cache, using original file") + } + guard FileManager.default.createFile(atPath: outputURL.path, contents: nil) else { + throw PullError.fileCreationFailed(outputURL.path) + } + // Open handle for writing + outputHandle = try FileHandle(forWritingTo: outputURL) + // Set the file size (creates sparse file) + try outputHandle.truncate(atOffset: sizeForTruncate) + Logger.info("Sparse file initialized for cache reassembly with size: \(ByteCountFormatter.string(fromByteCount: Int64(sizeForTruncate), countStyle: .file))") + } catch { + Logger.error("Failed during setup for cached disk image reassembly: \(error.localizedDescription)", metadata: ["path": outputURL.path]) + throw PullError.reassemblySetupFailed(path: outputURL.path, underlyingError: error) + } + + // Ensure handle is closed when exiting this scope + defer { try? outputHandle.close() } + + var reassemblyProgressLogger = ProgressLogger(threshold: 0.05) + var currentOffset: UInt64 = 0 + + // Iterate from 1 up to the total number of parts found by the collector + for collectorPartNum in 1...totalParts { + // Find the source URL from our collected parts using the sequential collectorPartNum + guard let sourceInfo = diskPartSources.first(where: { $0.0 == collectorPartNum }) else { + Logger.error("Missing required cached part number \(collectorPartNum) in collected parts during reassembly.") + throw PullError.missingPart(collectorPartNum) + } + let sourceURL = sourceInfo.1 // Get URL from tuple + + // Log using the sequential collector part number + Logger.info( + "Decompressing part \(collectorPartNum) of \(totalParts) from cache: \(sourceURL.lastPathComponent) at offset \(currentOffset)..." + ) + + // Always use the correct sparse decompression function + let decompressedBytesWritten = try decompressChunkAndWriteSparse( + inputPath: sourceURL.path, + outputHandle: outputHandle, + startOffset: currentOffset + ) + currentOffset += decompressedBytesWritten + // Update progress (using sizeForTruncate which should be available) + reassemblyProgressLogger.logProgress( + current: Double(currentOffset) / Double(sizeForTruncate), + context: "Reassembling Cache") + + try outputHandle.synchronize() // Explicitly synchronize after each chunk + } + + // Finalize progress, close handle (done by defer) + reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") + + // Add test patterns at the beginning and end of the file + Logger.info("Writing test patterns to sparse file to verify integrity...") + let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! + try outputHandle.seek(toOffset: 0) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.seek(toOffset: sizeForTruncate - UInt64(testPattern.count)) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.synchronize() + + // Ensure handle is properly synchronized before closing + try outputHandle.synchronize() + + // Close handle explicitly instead of relying on defer + try outputHandle.close() + + // Verify final size + let finalSize = + (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] + as? UInt64) ?? 0 + Logger.info( + "Final disk image size from cache (before sparse file optimization): \(ByteCountFormatter.string(fromByteCount: Int64(finalSize), countStyle: .file))" + ) + + // Use the calculated sizeForTruncate for comparison + if finalSize != sizeForTruncate { + Logger.info( + "Warning: Final reported size (\(finalSize) bytes) differs from expected size (\(sizeForTruncate) bytes), but this doesn't affect functionality" + ) + } + + Logger.info("Disk image reassembly completed") + + // Optimize sparseness for cached reassembly if on macOS + if FileManager.default.fileExists(atPath: "/bin/cp") { + Logger.info("Optimizing sparse file representation for cached reassembly...") + let optimizedPath = outputURL.path + ".optimized" + + let process = Process() + process.executableURL = URL(fileURLWithPath: "/bin/cp") + process.arguments = ["-c", outputURL.path, optimizedPath] + + do { + try process.run() + process.waitUntilExit() + + if process.terminationStatus == 0 { + // Get size of optimized file + let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 + let originalUsage = getActualDiskUsage(path: outputURL.path) + let optimizedUsage = getActualDiskUsage(path: optimizedPath) + + Logger.info( + "Sparse optimization results for cache: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" + ) + + // Replace the original with the optimized version + try FileManager.default.removeItem(at: outputURL) + try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: outputURL) + Logger.info("Replaced cached reassembly with optimized sparse version") + } else { + Logger.info("Sparse optimization failed for cache, using original file") + try? FileManager.default.removeItem(atPath: optimizedPath) + } + } catch { + Logger.info("Error during sparse optimization for cache: \(error.localizedDescription)") try? FileManager.default.removeItem(atPath: optimizedPath) } - } catch { - Logger.info("Error during sparse optimization for cache: \(error.localizedDescription)") - try? FileManager.default.removeItem(atPath: optimizedPath) } + + // Set permissions to ensure consistency + let chmodProcess = Process() + chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") + chmodProcess.arguments = ["0644", outputURL.path] + try chmodProcess.run() + chmodProcess.waitUntilExit() } } Logger.info("Cache copy complete") } - // Function to simulate cache pull behavior for freshly downloaded images - private func simulateCachePull(tempVMDir: URL) throws { - Logger.info("Simulating cache pull behavior for freshly downloaded image...") - - // 1. Find disk.img in tempVMDir - let diskImgPath = tempVMDir.appendingPathComponent("disk.img") - guard FileManager.default.fileExists(atPath: diskImgPath.path) else { - Logger.info("No disk.img found to simulate cache pull behavior") - return - } - - // 2. Get file attributes and size - let attributes = try FileManager.default.attributesOfItem(atPath: diskImgPath.path) - guard let diskSize = attributes[.size] as? UInt64, diskSize > 0 else { - Logger.error("Could not determine disk.img size for simulation") - return - } - - Logger.info("Creating sparse file with size: \(ByteCountFormatter.string(fromByteCount: Int64(diskSize), countStyle: .file))") - - // 3. Create backup of original file - let backupPath = tempVMDir.appendingPathComponent("disk.img.original") - try FileManager.default.moveItem(at: diskImgPath, to: backupPath) - - // 4. Create empty file and prepare for sparse file creation - guard FileManager.default.createFile(atPath: diskImgPath.path, contents: nil) else { - // If creation fails, restore the original - try? FileManager.default.moveItem(at: backupPath, to: diskImgPath) - throw PullError.fileCreationFailed(diskImgPath.path) - } - - // Run an initial filesystem sync - let initialSyncProcess = Process() - initialSyncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") - try initialSyncProcess.run() - initialSyncProcess.waitUntilExit() - - // IMPORTANT: Use autoreleasepool to ensure file handle is released promptly - try autoreleasepool { - // 5. Set up file handle and create sparse file - let outputHandle = try FileHandle(forWritingTo: diskImgPath) - try outputHandle.truncate(atOffset: diskSize) - - // 6. Write test patterns at beginning and end - Logger.info("Writing test patterns to verify writability...") - let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! - try outputHandle.seek(toOffset: 0) - try outputHandle.write(contentsOf: testPattern) - try outputHandle.seek(toOffset: diskSize - UInt64(testPattern.count)) - try outputHandle.write(contentsOf: testPattern) - - // Make sure test patterns are synced to disk first - try outputHandle.synchronize() - - // 7. Decompress the original data at offset 0 - Logger.info("Decompressing original disk image with same mechanism as cache pull...") - let bytesWritten = try decompressChunkAndWriteSparse( - inputPath: backupPath.path, - outputHandle: outputHandle, - startOffset: 0 - ) - - Logger.info("Decompressed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) of disk image data") - - // 8. Ensure all data is written to disk with multiple explicit syncs - try outputHandle.synchronize() - - // Force an fsync using lower-level API for the file descriptor - let fd = outputHandle.fileDescriptor - if fd >= 0 { - fsync(fd) - Logger.info("Performed low-level fsync on file descriptor") - } - - // Very important: explicitly close the handle here inside the autorelease pool - try outputHandle.close() - Logger.info("File handle explicitly closed after decompression and synchronization") - } - - // Perform an explicit filesystem sync after closing the file handle - let postCloseSyncProcess = Process() - postCloseSyncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") - try postCloseSyncProcess.run() - postCloseSyncProcess.waitUntilExit() - - // Wait longer to ensure all filesystem operations are complete - Logger.info("Waiting for filesystem operations to complete...") - Thread.sleep(forTimeInterval: 1.0) - - // 9. Optimize sparse file with cp -c (exactly matching cache pull process) - if FileManager.default.fileExists(atPath: "/bin/cp") { - Logger.info("Optimizing sparse file representation...") - let optimizedPath = diskImgPath.path + ".optimized" - - // Run a sync before optimization - let syncBeforeOptimize = Process() - syncBeforeOptimize.executableURL = URL(fileURLWithPath: "/bin/sync") - try syncBeforeOptimize.run() - syncBeforeOptimize.waitUntilExit() - - let process = Process() - process.executableURL = URL(fileURLWithPath: "/bin/cp") - process.arguments = ["-c", diskImgPath.path, optimizedPath] - - do { - try process.run() - process.waitUntilExit() - - if process.terminationStatus == 0 { - let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 - let originalUsage = getActualDiskUsage(path: diskImgPath.path) - let optimizedUsage = getActualDiskUsage(path: optimizedPath) - - Logger.info( - "Sparse optimization results: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" - ) - - // Before replacing the file, make sure to synchronize the filesystem - let syncBeforeReplace = Process() - syncBeforeReplace.executableURL = URL(fileURLWithPath: "/bin/sync") - try syncBeforeReplace.run() - syncBeforeReplace.waitUntilExit() - - // Now replace the original with the optimized version - try FileManager.default.removeItem(at: diskImgPath) - try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: diskImgPath) - Logger.info("Replaced with optimized sparse version") - - // Additional sync after replacement - let syncAfterReplace = Process() - syncAfterReplace.executableURL = URL(fileURLWithPath: "/bin/sync") - try syncAfterReplace.run() - syncAfterReplace.waitUntilExit() - } else { - Logger.info("Sparse optimization failed, using original file") - try? FileManager.default.removeItem(atPath: optimizedPath) - } - } catch { - Logger.info("Error during sparse optimization: \(error.localizedDescription)") - try? FileManager.default.removeItem(atPath: optimizedPath) - } - } - - // 10. Explicitly set permissions to match cache hit (0644) - let chmodProcess = Process() - chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") - chmodProcess.arguments = ["0644", diskImgPath.path] - try chmodProcess.run() - chmodProcess.waitUntilExit() - - // 11. Final sync to ensure all data is flushed to disk - let syncProcess = Process() - syncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") - try syncProcess.run() - syncProcess.waitUntilExit() - - // One more filesystem sync for good measure - let finalSyncProcess = Process() - finalSyncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") - try finalSyncProcess.run() - finalSyncProcess.waitUntilExit() - - // Wait a moment for final filesystem operations - Thread.sleep(forTimeInterval: 0.5) - - // 12. Clean up the backup file - try FileManager.default.removeItem(at: backupPath) - - Logger.info("Cache pull simulation completed successfully") - } - private func getToken(repository: String) async throws -> String { let encodedRepo = repository.addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed) ?? repository // Request both pull and push scope for uploads From eca23e0333b9a5ef60c6d58834401bb8ccc877f5 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 18:48:07 -0700 Subject: [PATCH 21/28] Handle disk partition --- .../ImageContainerRegistry.swift | 126 ++++++++++++++++-- 1 file changed, 118 insertions(+), 8 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index d075f445..0cd61573 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1350,22 +1350,132 @@ class ImageContainerRegistry: @unchecked Sendable { return } - Logger.info("Creating sparse file with size: \(ByteCountFormatter.string(fromByteCount: Int64(diskSize), countStyle: .file))") + Logger.info("Creating disk image clone with partition table preserved...") // Create backup of original file let backupPath = tempVMDir.appendingPathComponent("disk.img.original") try FileManager.default.moveItem(at: diskImgPath, to: backupPath) - // Use shared function to create the disk image - try createDiskImageFromSource( - sourceURL: backupPath, - destinationURL: diskImgPath, - diskSize: diskSize - ) + // We'll use macOS's built-in disk cloning capabilities to preserve partition information + // First, create an empty sparse file with the target size + guard FileManager.default.createFile(atPath: diskImgPath.path, contents: nil) else { + // If creation fails, restore the original + try? FileManager.default.moveItem(at: backupPath, to: diskImgPath) + throw PullError.fileCreationFailed(diskImgPath.path) + } - // Clean up backup + // Use dd to clone the disk with partition table preserved + Logger.info("Cloning disk with partition table using dd...") + let ddProcess = Process() + ddProcess.executableURL = URL(fileURLWithPath: "/bin/dd") + ddProcess.arguments = [ + "if=\(backupPath.path)", + "of=\(diskImgPath.path)", + "bs=4m", // Use a large block size for efficiency + "conv=sparse" // Ensure sparse file creation + ] + + // Capture and log output/errors + let outputPipe = Pipe() + let errorPipe = Pipe() + ddProcess.standardOutput = outputPipe + ddProcess.standardError = errorPipe + + try ddProcess.run() + ddProcess.waitUntilExit() + + // Log command output/errors + let outputData = outputPipe.fileHandleForReading.readDataToEndOfFile() + let errorData = errorPipe.fileHandleForReading.readDataToEndOfFile() + + if let errorOutput = String(data: errorData, encoding: .utf8), !errorOutput.isEmpty { + Logger.info("dd command output: \(errorOutput)") + } + + if ddProcess.terminationStatus != 0 { + Logger.error("dd command failed with status \(ddProcess.terminationStatus)") + // If dd fails, try to restore the original + if FileManager.default.fileExists(atPath: diskImgPath.path) { + try? FileManager.default.removeItem(at: diskImgPath) + } + try? FileManager.default.moveItem(at: backupPath, to: diskImgPath) + throw PullError.fileCreationFailed("dd command failed") + } + + // Sync filesystem to ensure all changes are written + let syncProcess = Process() + syncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") + try syncProcess.run() + syncProcess.waitUntilExit() + + // Optimize with cp -c to ensure best sparse file representation + if FileManager.default.fileExists(atPath: "/bin/cp") { + Logger.info("Optimizing sparse file representation...") + let optimizedPath = diskImgPath.path + ".optimized" + + let process = Process() + process.executableURL = URL(fileURLWithPath: "/bin/cp") + process.arguments = ["-c", diskImgPath.path, optimizedPath] + + try process.run() + process.waitUntilExit() + + if process.terminationStatus == 0 { + let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 + let originalUsage = getActualDiskUsage(path: diskImgPath.path) + let optimizedUsage = getActualDiskUsage(path: optimizedPath) + + Logger.info( + "Sparse optimization results: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" + ) + + // Replace with optimized version + try FileManager.default.removeItem(at: diskImgPath) + try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: diskImgPath) + Logger.info("Replaced with optimized sparse version") + } else { + Logger.info("Sparse optimization failed, using original file") + try? FileManager.default.removeItem(atPath: optimizedPath) + } + } + + // Set permissions to 0644 + let chmodProcess = Process() + chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") + chmodProcess.arguments = ["0644", diskImgPath.path] + try chmodProcess.run() + chmodProcess.waitUntilExit() + + // Final sync + let finalSyncProcess = Process() + finalSyncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") + try finalSyncProcess.run() + finalSyncProcess.waitUntilExit() + + // Clean up backup file try FileManager.default.removeItem(at: backupPath) + Logger.info("Verifying final disk image partition information...") + // Use hdiutil to verify partition information (output only for debugging) + let verifyProcess = Process() + verifyProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") + verifyProcess.arguments = ["imageinfo", diskImgPath.path] + + let verifyOutputPipe = Pipe() + verifyProcess.standardOutput = verifyOutputPipe + + try verifyProcess.run() + verifyProcess.waitUntilExit() + + let verifyOutputData = verifyOutputPipe.fileHandleForReading.readDataToEndOfFile() + if let verifyOutput = String(data: verifyOutputData, encoding: .utf8), verifyProcess.terminationStatus == 0 { + // Extract just the partition scheme information for logging + if let partitionSchemeRange = verifyOutput.range(of: "partition-scheme: .*", options: .regularExpression) { + let partitionScheme = verifyOutput[partitionSchemeRange] + Logger.info("Disk image partition scheme: \(partitionScheme)") + } + } + Logger.info("Cache pull simulation completed successfully") } From 6427ae676d02c9b0542563179fdc764340042c55 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 19:25:22 -0700 Subject: [PATCH 22/28] Handle disk partition --- .../ImageContainerRegistry.swift | 253 ++++++++++++++---- 1 file changed, 196 insertions(+), 57 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 0cd61573..9ae6f65f 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1350,65 +1350,209 @@ class ImageContainerRegistry: @unchecked Sendable { return } - Logger.info("Creating disk image clone with partition table preserved...") + Logger.info("Creating true disk image clone with partition table preserved...") // Create backup of original file let backupPath = tempVMDir.appendingPathComponent("disk.img.original") try FileManager.default.moveItem(at: diskImgPath, to: backupPath) - // We'll use macOS's built-in disk cloning capabilities to preserve partition information - // First, create an empty sparse file with the target size - guard FileManager.default.createFile(atPath: diskImgPath.path, contents: nil) else { - // If creation fails, restore the original - try? FileManager.default.moveItem(at: backupPath, to: diskImgPath) - throw PullError.fileCreationFailed(diskImgPath.path) - } + // Let's first check if the original image has a partition table + Logger.info("Checking if source image has a partition table...") + let checkProcess = Process() + checkProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") + checkProcess.arguments = ["imageinfo", backupPath.path] - // Use dd to clone the disk with partition table preserved - Logger.info("Cloning disk with partition table using dd...") - let ddProcess = Process() - ddProcess.executableURL = URL(fileURLWithPath: "/bin/dd") - ddProcess.arguments = [ - "if=\(backupPath.path)", - "of=\(diskImgPath.path)", - "bs=4m", // Use a large block size for efficiency - "conv=sparse" // Ensure sparse file creation - ] + let checkPipe = Pipe() + checkProcess.standardOutput = checkPipe - // Capture and log output/errors - let outputPipe = Pipe() - let errorPipe = Pipe() - ddProcess.standardOutput = outputPipe - ddProcess.standardError = errorPipe + try checkProcess.run() + checkProcess.waitUntilExit() - try ddProcess.run() - ddProcess.waitUntilExit() + let checkData = checkPipe.fileHandleForReading.readDataToEndOfFile() + let checkOutput = String(data: checkData, encoding: .utf8) ?? "" + Logger.info("Source image info: \(checkOutput)") - // Log command output/errors - let outputData = outputPipe.fileHandleForReading.readDataToEndOfFile() - let errorData = errorPipe.fileHandleForReading.readDataToEndOfFile() + // Try different methods in sequence until one works + var success = false - if let errorOutput = String(data: errorData, encoding: .utf8), !errorOutput.isEmpty { - Logger.info("dd command output: \(errorOutput)") - } - - if ddProcess.terminationStatus != 0 { - Logger.error("dd command failed with status \(ddProcess.terminationStatus)") - // If dd fails, try to restore the original - if FileManager.default.fileExists(atPath: diskImgPath.path) { - try? FileManager.default.removeItem(at: diskImgPath) + // Method 1: Use hdiutil convert to convert the image while preserving all data + if !success { + Logger.info("Trying hdiutil convert...") + let tempPath = tempVMDir.appendingPathComponent("disk.img.temp") + + let convertProcess = Process() + convertProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") + convertProcess.arguments = [ + "convert", + backupPath.path, + "-format", "UDRO", // Read-only first to preserve partition table + "-o", tempPath.path + ] + + let convertOutPipe = Pipe() + let convertErrPipe = Pipe() + convertProcess.standardOutput = convertOutPipe + convertProcess.standardError = convertErrPipe + + do { + try convertProcess.run() + convertProcess.waitUntilExit() + + let errData = convertErrPipe.fileHandleForReading.readDataToEndOfFile() + let errOutput = String(data: errData, encoding: .utf8) ?? "" + + if convertProcess.terminationStatus == 0 { + Logger.info("hdiutil convert succeeded. Converting to writable format...") + // Now convert to writable format + let convertBackProcess = Process() + convertBackProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") + convertBackProcess.arguments = [ + "convert", + tempPath.path, + "-format", "UDRW", // Read-write format + "-o", diskImgPath.path + ] + + try convertBackProcess.run() + convertBackProcess.waitUntilExit() + + if convertBackProcess.terminationStatus == 0 { + Logger.info("Successfully converted to writable format with partition table") + success = true + } else { + Logger.error("hdiutil convert to writable format failed") + } + + // Clean up temporary image + try? FileManager.default.removeItem(at: tempPath) + } else { + Logger.error("hdiutil convert failed: \(errOutput)") + } + } catch { + Logger.error("Error executing hdiutil convert: \(error)") } - try? FileManager.default.moveItem(at: backupPath, to: diskImgPath) - throw PullError.fileCreationFailed("dd command failed") } - // Sync filesystem to ensure all changes are written - let syncProcess = Process() - syncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") - try syncProcess.run() - syncProcess.waitUntilExit() + // Method 2: Try direct raw copy method + if !success { + Logger.info("Trying direct raw copy with dd...") + + // Create empty file first + FileManager.default.createFile(atPath: diskImgPath.path, contents: nil) + + let ddProcess = Process() + ddProcess.executableURL = URL(fileURLWithPath: "/bin/dd") + ddProcess.arguments = [ + "if=\(backupPath.path)", + "of=\(diskImgPath.path)", + "bs=1m", // Large block size + "count=81920" // Ensure we copy everything (80GB+ should be sufficient) + ] + + let ddErrPipe = Pipe() + ddProcess.standardError = ddErrPipe + + do { + try ddProcess.run() + ddProcess.waitUntilExit() + + let errData = ddErrPipe.fileHandleForReading.readDataToEndOfFile() + let errOutput = String(data: errData, encoding: .utf8) ?? "" + + if ddProcess.terminationStatus == 0 { + Logger.info("Raw dd copy completed: \(errOutput)") + success = true + } else { + Logger.error("Raw dd copy failed: \(errOutput)") + } + } catch { + Logger.error("Error executing dd: \(error)") + } + } - // Optimize with cp -c to ensure best sparse file representation + // Method 3: Use a more complex approach with disk mounting + if !success { + Logger.info("Trying advanced disk attach/detach approach...") + + // Mount the source disk image + let attachProcess = Process() + attachProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") + attachProcess.arguments = ["attach", backupPath.path, "-nomount"] + + let attachPipe = Pipe() + attachProcess.standardOutput = attachPipe + + try attachProcess.run() + attachProcess.waitUntilExit() + + let attachData = attachPipe.fileHandleForReading.readDataToEndOfFile() + let attachOutput = String(data: attachData, encoding: .utf8) ?? "" + + // Extract the disk device from output (/dev/diskN) + var diskDevice: String? = nil + if let diskMatch = attachOutput.range(of: "/dev/disk[0-9]+", options: .regularExpression) { + diskDevice = String(attachOutput[diskMatch]) + } + + if let device = diskDevice { + Logger.info("Source disk attached at \(device)") + + // Create a bootable disk image clone + let createProcess = Process() + createProcess.executableURL = URL(fileURLWithPath: "/usr/sbin/asr") + createProcess.arguments = [ + "restore", + "--source", device, + "--target", diskImgPath.path, + "--erase", + "--noprompt" + ] + + let createPipe = Pipe() + createProcess.standardOutput = createPipe + + do { + try createProcess.run() + createProcess.waitUntilExit() + + let createOutput = String(data: createPipe.fileHandleForReading.readDataToEndOfFile(), encoding: .utf8) ?? "" + Logger.info("asr output: \(createOutput)") + + if createProcess.terminationStatus == 0 { + Logger.info("Successfully created bootable disk image clone!") + success = true + } else { + Logger.error("Failed to create bootable disk image clone") + } + } catch { + Logger.error("Error executing asr: \(error)") + } + + // Always detach the disk when done + let detachProcess = Process() + detachProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") + detachProcess.arguments = ["detach", device] + try? detachProcess.run() + detachProcess.waitUntilExit() + } else { + Logger.error("Failed to extract disk device from hdiutil attach output") + } + } + + // Fallback: If none of the methods worked, revert to our previous method just to ensure we have a usable image + if !success { + Logger.info("All specialized methods failed. Reverting to basic copy...") + + // If the disk image file exists (from a failed attempt), remove it + if FileManager.default.fileExists(atPath: diskImgPath.path) { + try FileManager.default.removeItem(at: diskImgPath) + } + + // Attempt a basic file copy which will at least give us something to work with + try FileManager.default.copyItem(at: backupPath, to: diskImgPath) + } + + // Optimize sparseness if possible if FileManager.default.fileExists(atPath: "/bin/cp") { Logger.info("Optimizing sparse file representation...") let optimizedPath = diskImgPath.path + ".optimized" @@ -1452,11 +1596,8 @@ class ImageContainerRegistry: @unchecked Sendable { try finalSyncProcess.run() finalSyncProcess.waitUntilExit() - // Clean up backup file - try FileManager.default.removeItem(at: backupPath) - + // Verify the final disk image Logger.info("Verifying final disk image partition information...") - // Use hdiutil to verify partition information (output only for debugging) let verifyProcess = Process() verifyProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") verifyProcess.arguments = ["imageinfo", diskImgPath.path] @@ -1468,15 +1609,13 @@ class ImageContainerRegistry: @unchecked Sendable { verifyProcess.waitUntilExit() let verifyOutputData = verifyOutputPipe.fileHandleForReading.readDataToEndOfFile() - if let verifyOutput = String(data: verifyOutputData, encoding: .utf8), verifyProcess.terminationStatus == 0 { - // Extract just the partition scheme information for logging - if let partitionSchemeRange = verifyOutput.range(of: "partition-scheme: .*", options: .regularExpression) { - let partitionScheme = verifyOutput[partitionSchemeRange] - Logger.info("Disk image partition scheme: \(partitionScheme)") - } - } + let verifyOutput = String(data: verifyOutputData, encoding: .utf8) ?? "" + Logger.info("Final disk image verification:\n\(verifyOutput)") - Logger.info("Cache pull simulation completed successfully") + // Clean up backup file + try FileManager.default.removeItem(at: backupPath) + + Logger.info("Cache pull simulation completed successfully with partition table preservation") } private func copyFromCache(manifest: Manifest, manifestId: String, to destination: URL) From 5b0ff2f2187870da5fbe5e2a923e5ba1a121dfb1 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 19:52:28 -0700 Subject: [PATCH 23/28] Fix first pull --- .../ImageContainerRegistry.swift | 1505 ++++++++--------- 1 file changed, 752 insertions(+), 753 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 9ae6f65f..ddf5ea1f 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1,9 +1,9 @@ import ArgumentParser +import CommonCrypto +import Compression // Add this import import Darwin import Foundation import Swift -import CommonCrypto -import Compression // Add this import // Extension to calculate SHA256 hash extension Data { @@ -25,14 +25,14 @@ enum PushError: Error { case authenticationFailed case missingToken case invalidURL - case lz4NotFound // Added error case - case invalidMediaType // Added during part refactoring - case missingUncompressedSizeAnnotation // Added for sparse file handling - case fileCreationFailed(String) // Added for sparse file handling - case reassemblySetupFailed(path: String, underlyingError: Error?) // Added for sparse file handling - case missingPart(Int) // Added for sparse file handling - case layerDownloadFailed(String) // Added for download retries - case manifestFetchFailed // Added for manifest fetching + case lz4NotFound // Added error case + case invalidMediaType // Added during part refactoring + case missingUncompressedSizeAnnotation // Added for sparse file handling + case fileCreationFailed(String) // Added for sparse file handling + case reassemblySetupFailed(path: String, underlyingError: Error?) // Added for sparse file handling + case missingPart(Int) // Added for sparse file handling + case layerDownloadFailed(String) // Added for download retries + case manifestFetchFailed // Added for manifest fetching } // Define a specific error type for when no underlying error exists @@ -54,8 +54,11 @@ struct OCIManifestLayer { let digest: String let uncompressedSize: UInt64? let uncompressedContentDigest: String? - - init(mediaType: String, size: Int, digest: String, uncompressedSize: UInt64? = nil, uncompressedContentDigest: String? = nil) { + + init( + mediaType: String, size: Int, digest: String, uncompressedSize: UInt64? = nil, + uncompressedContentDigest: String? = nil + ) { self.mediaType = mediaType self.size = size self.digest = digest @@ -119,21 +122,21 @@ actor DiskPartsCollector { // Store tuples of (sequentialPartNum, url) private var diskParts: [(Int, URL)] = [] // Restore internal counter - private var partCounter = 0 + private var partCounter = 0 // Adds a part and returns its assigned sequential number func addPart(url: URL) -> Int { - partCounter += 1 // Use counter logic - let partNum = partCounter - diskParts.append((partNum, url)) // Store sequential number - return partNum // Return assigned sequential number + partCounter += 1 // Use counter logic + let partNum = partCounter + diskParts.append((partNum, url)) // Store sequential number + return partNum // Return assigned sequential number } // Sort by the sequential part number (index 0 of tuple) func getSortedParts() -> [(Int, URL)] { return diskParts.sorted { $0.0 < $1.0 } } - + // Restore getTotalParts func getTotalParts() -> Int { return partCounter @@ -363,7 +366,7 @@ struct DownloadStats { // Renamed struct struct UploadStats { let totalBytes: Int64 - let uploadedBytes: Int64 // Renamed + let uploadedBytes: Int64 // Renamed let elapsedTime: TimeInterval let averageSpeed: Double let peakSpeed: Double @@ -391,9 +394,13 @@ struct UploadStats { let hours = Int(seconds) / 3600 let minutes = (Int(seconds) % 3600) / 60 let secs = Int(seconds) % 60 - if hours > 0 { return String(format: "%d hours, %d minutes, %d seconds", hours, minutes, secs) } - else if minutes > 0 { return String(format: "%d minutes, %d seconds", minutes, secs) } - else { return String(format: "%d seconds", secs) } + if hours > 0 { + return String(format: "%d hours, %d minutes, %d seconds", hours, minutes, secs) + } else if minutes > 0 { + return String(format: "%d minutes, %d seconds", minutes, secs) + } else { + return String(format: "%d seconds", secs) + } } } @@ -408,15 +415,15 @@ actor TaskCounter { class ImageContainerRegistry: @unchecked Sendable { private let registry: String private let organization: String - private let downloadProgress = ProgressTracker() // Renamed for clarity - private let uploadProgress = UploadProgressTracker() // Added upload tracker + private let downloadProgress = ProgressTracker() // Renamed for clarity + private let uploadProgress = UploadProgressTracker() // Added upload tracker private let cacheDirectory: URL private let downloadLock = NSLock() private var activeDownloads: [String] = [] private let cachingEnabled: Bool // Constants for zero-skipping write logic - private static let holeGranularityBytes = 4 * 1024 * 1024 // 4MB block size for checking zeros + private static let holeGranularityBytes = 4 * 1024 * 1024 // 4MB block size for checking zeros private static let zeroChunk = Data(count: holeGranularityBytes) // Add the createProgressBar function here as a private method @@ -768,9 +775,7 @@ class ImageContainerRegistry: @unchecked Sendable { ) let counter = TaskCounter() - // Remove totalDiskParts - // var totalDiskParts: Int? = nil - var lz4LayerCount = 0 // Count lz4 layers found + var lz4LayerCount = 0 // Count lz4 layers found try await withThrowingTaskGroup(of: Int64.self) { group in for layer in manifest.layers { @@ -785,45 +790,57 @@ class ImageContainerRegistry: @unchecked Sendable { // Identify disk parts by media type if layer.mediaType == "application/octet-stream+lz4" { - // --- Handle LZ4 Disk Part Layer --- - lz4LayerCount += 1 // Increment count - let currentPartNum = lz4LayerCount // Use the current count as the logical number for logging - + // --- Handle LZ4 Disk Part Layer --- + lz4LayerCount += 1 // Increment count + let currentPartNum = lz4LayerCount // Use the current count as the logical number for logging + let cachedLayer = getCachedLayerPath( manifestId: manifestId, digest: layer.digest) let digest = layer.digest let size = layer.size - if memoryConstrained && FileManager.default.fileExists(atPath: cachedLayer.path) { + if memoryConstrained + && FileManager.default.fileExists(atPath: cachedLayer.path) + { // Add to collector, get sequential number assigned by collector - let collectorPartNum = await diskPartsCollector.addPart(url: cachedLayer) + let collectorPartNum = await diskPartsCollector.addPart( + url: cachedLayer) // Log using the sequential number from collector for clarity if needed, or the lz4LayerCount - Logger.info("Using cached lz4 layer (part \(currentPartNum)) directly: \(cachedLayer.lastPathComponent) -> Collector #\(collectorPartNum)") + Logger.info( + "Using cached lz4 layer (part \(currentPartNum)) directly: \(cachedLayer.lastPathComponent) -> Collector #\(collectorPartNum)" + ) await downloadProgress.addProgress(Int64(size)) - continue + continue } else { // Download/Copy Path (Task Group) group.addTask { [self] in await counter.increment() let finalPath: URL if FileManager.default.fileExists(atPath: cachedLayer.path) { - let tempPartURL = tempDownloadDir.appendingPathComponent("disk.img.part.\(UUID().uuidString)") - try FileManager.default.copyItem(at: cachedLayer, to: tempPartURL) + let tempPartURL = tempDownloadDir.appendingPathComponent( + "disk.img.part.\(UUID().uuidString)") + try FileManager.default.copyItem( + at: cachedLayer, to: tempPartURL) await downloadProgress.addProgress(Int64(size)) finalPath = tempPartURL } else { - let tempPartURL = tempDownloadDir.appendingPathComponent("disk.img.part.\(UUID().uuidString)") + let tempPartURL = tempDownloadDir.appendingPathComponent( + "disk.img.part.\(UUID().uuidString)") if isDownloading(digest) { - try await waitForExistingDownload(digest, cachedLayer: cachedLayer) - if FileManager.default.fileExists(atPath: cachedLayer.path) { - try FileManager.default.copyItem(at: cachedLayer, to: tempPartURL) + try await waitForExistingDownload( + digest, cachedLayer: cachedLayer) + if FileManager.default.fileExists(atPath: cachedLayer.path) + { + try FileManager.default.copyItem( + at: cachedLayer, to: tempPartURL) await downloadProgress.addProgress(Int64(size)) finalPath = tempPartURL } else { markDownloadStarted(digest) try await self.downloadLayer( repository: "\(self.organization)/\(imageName)", - digest: digest, mediaType: layer.mediaType, token: token, + digest: digest, mediaType: layer.mediaType, + token: token, to: tempPartURL, maxRetries: 5, progress: downloadProgress, manifestId: manifestId ) @@ -833,7 +850,8 @@ class ImageContainerRegistry: @unchecked Sendable { markDownloadStarted(digest) try await self.downloadLayer( repository: "\(self.organization)/\(imageName)", - digest: digest, mediaType: layer.mediaType, token: token, + digest: digest, mediaType: layer.mediaType, + token: token, to: tempPartURL, maxRetries: 5, progress: downloadProgress, manifestId: manifestId ) @@ -841,15 +859,18 @@ class ImageContainerRegistry: @unchecked Sendable { } } // Add to collector, get sequential number assigned by collector - let collectorPartNum = await diskPartsCollector.addPart(url: finalPath) + let collectorPartNum = await diskPartsCollector.addPart( + url: finalPath) // Log using the sequential number from collector - Logger.info("Assigned path for lz4 layer (part \(currentPartNum)): \(finalPath.lastPathComponent) -> Collector #\(collectorPartNum)") + Logger.info( + "Assigned path for lz4 layer (part \(currentPartNum)): \(finalPath.lastPathComponent) -> Collector #\(collectorPartNum)" + ) await counter.decrement() return Int64(size) } } } else { - // --- Handle Non-Disk-Part Layer --- + // --- Handle Non-Disk-Part Layer --- let mediaType = layer.mediaType let digest = layer.digest let size = layer.size @@ -858,39 +879,42 @@ class ImageContainerRegistry: @unchecked Sendable { let outputURL: URL switch mediaType { case "application/vnd.oci.image.layer.v1.tar", - "application/octet-stream+gzip": // Might be compressed disk.img single file? - outputURL = tempDownloadDir.appendingPathComponent("disk.img") + "application/octet-stream+gzip": // Might be compressed disk.img single file? + outputURL = tempDownloadDir.appendingPathComponent("disk.img") case "application/vnd.oci.image.config.v1+json": outputURL = tempDownloadDir.appendingPathComponent("config.json") - case "application/octet-stream": // Could be nvram or uncompressed single disk.img - // Heuristic: If a config.json already exists or is expected, assume this is nvram. - // This might need refinement if single disk images use octet-stream. - if manifest.config != nil { + case "application/octet-stream": // Could be nvram or uncompressed single disk.img + // Heuristic: If a config.json already exists or is expected, assume this is nvram. + // This might need refinement if single disk images use octet-stream. + if manifest.config != nil { outputURL = tempDownloadDir.appendingPathComponent("nvram.bin") - } else { + } else { // Assume it's a single-file disk image if no config layer is present outputURL = tempDownloadDir.appendingPathComponent("disk.img") - } + } default: - Logger.info("Skipping unsupported layer media type: \(mediaType)") - continue // Skip to the next layer + Logger.info("Skipping unsupported layer media type: \(mediaType)") + continue // Skip to the next layer } // Add task to download/copy the non-disk-part layer group.addTask { [self] in await counter.increment() - let cachedLayer = getCachedLayerPath(manifestId: manifestId, digest: digest) + let cachedLayer = getCachedLayerPath( + manifestId: manifestId, digest: digest) if FileManager.default.fileExists(atPath: cachedLayer.path) { try FileManager.default.copyItem(at: cachedLayer, to: outputURL) await downloadProgress.addProgress(Int64(size)) } else { if isDownloading(digest) { - try await waitForExistingDownload(digest, cachedLayer: cachedLayer) + try await waitForExistingDownload( + digest, cachedLayer: cachedLayer) if FileManager.default.fileExists(atPath: cachedLayer.path) { - try FileManager.default.copyItem(at: cachedLayer, to: outputURL) + try FileManager.default.copyItem( + at: cachedLayer, to: outputURL) await downloadProgress.addProgress(Int64(size)) - await counter.decrement() // Decrement before returning + await counter.decrement() // Decrement before returning return Int64(size) } } @@ -908,304 +932,62 @@ class ImageContainerRegistry: @unchecked Sendable { return Int64(size) } } - } // End for layer in manifest.layers + } // End for layer in manifest.layers // Wait for remaining tasks for try await _ in group {} - } // End TaskGroup - - // --- Safely retrieve parts AFTER TaskGroup --- - let diskParts = await diskPartsCollector.getSortedParts() // Already sorted by logicalPartNum - // Check if totalDiskParts was set (meaning at least one lz4 layer was processed) - // Get total parts from the collector - let totalPartsFromCollector = await diskPartsCollector.getTotalParts() - // Change guard to if for logging only, as the later if condition handles the logic - if totalPartsFromCollector == 0 { - // If totalParts is 0, it means no layers matched the lz4 format. - Logger.info("No lz4 disk part layers found. Assuming single-part image or non-lz4 parts.") - // Reassembly logic below will be skipped if diskParts is empty. - // Explicitly set totalParts to 0 to prevent entering the reassembly block if diskParts might somehow be non-empty but totalParts was 0 - // This ensures consistency if the collector logic changes. - } - Logger.info("Finished processing layers. Found \(diskParts.count) disk parts to reassemble (Total Lz4 Layers: \(totalPartsFromCollector)).") - // --- End retrieving parts --- - - // Add detailed logging for debugging - Logger.info("Disk part numbers collected and sorted: \(diskParts.map { $0.0 })") - - Logger.info("") // New line after progress + } // End TaskGroup // Display download statistics let stats = await downloadProgress.getDownloadStats() + Logger.info("") // New line after progress Logger.info(stats.formattedSummary()) - // Parse config.json to get uncompressed size *before* reassembly - let configURL = tempDownloadDir.appendingPathComponent("config.json") - let uncompressedSize = getUncompressedSizeFromConfig(configPath: configURL) - - // Now also try to get disk size from VM config if OCI annotation not found - var vmConfigDiskSize: UInt64? = nil - if uncompressedSize == nil && FileManager.default.fileExists(atPath: configURL.path) { - do { - let configData = try Data(contentsOf: configURL) - let decoder = JSONDecoder() - if let vmConfig = try? decoder.decode(VMConfig.self, from: configData) { - vmConfigDiskSize = vmConfig.diskSize - if let size = vmConfigDiskSize { - Logger.info("Found diskSize from VM config.json: \(size) bytes") - } - } - } catch { - Logger.error("Failed to parse VM config.json for diskSize: \(error)") - } - } - - // Force explicit use - if uncompressedSize != nil { - Logger.info( - "Will use uncompressed size from annotation for sparse file: \(uncompressedSize!) bytes" - ) - } else if vmConfigDiskSize != nil { - Logger.info( - "Will use diskSize from VM config for sparse file: \(vmConfigDiskSize!) bytes") - } - - // Handle disk parts if present - if !diskParts.isEmpty && totalPartsFromCollector > 0 { - // Use totalPartsFromCollector here - Logger.info("Reassembling \(totalPartsFromCollector) disk image parts using sparse file technique...") - let outputURL = tempVMDir.appendingPathComponent("disk.img") - - // Wrap setup in do-catch for better error reporting - let outputHandle: FileHandle - do { - // 1. Ensure parent directory exists - try FileManager.default.createDirectory( - at: outputURL.deletingLastPathComponent(), withIntermediateDirectories: true - ) - - // 2. Explicitly create the file first, removing old one if needed - if FileManager.default.fileExists(atPath: outputURL.path) { - try FileManager.default.removeItem(at: outputURL) - } - guard FileManager.default.createFile(atPath: outputURL.path, contents: nil) - else { - throw PullError.fileCreationFailed(outputURL.path) - } - - // 3. Now open the handle for writing - outputHandle = try FileHandle(forWritingTo: outputURL) - - } catch { - // Catch errors during directory/file creation or handle opening - Logger.error( - "Failed during setup for disk image reassembly: \(error.localizedDescription)", - metadata: ["path": outputURL.path]) - throw PullError.reassemblySetupFailed( - path: outputURL.path, underlyingError: error) - } - - // Calculate expected size from the manifest layers (sum of compressed parts - for logging only now) - // Filter based on the correct media type now - let expectedCompressedTotalSize = UInt64( - manifest.layers.filter { $0.mediaType == "application/octet-stream+lz4" }.reduce(0) - { $0 + $1.size } - ) - Logger.info( - "Total compressed parts size: \(ByteCountFormatter.string(fromByteCount: Int64(expectedCompressedTotalSize), countStyle: .file))" - ) - - // Calculate fallback size (sum of compressed parts) - let _: UInt64 = diskParts.reduce(UInt64(0)) { - (acc: UInt64, element) -> UInt64 in - let fileSize = - (try? FileManager.default.attributesOfItem(atPath: element.1.path)[.size] - as? UInt64 ?? 0) ?? 0 - return acc + fileSize - } - - // Use: annotation size > VM config diskSize > fallback size - let sizeForTruncate: UInt64 - if let size = uncompressedSize { - Logger.info("Using uncompressed size from annotation: \(size) bytes") - sizeForTruncate = size - } else if let size = vmConfigDiskSize { - Logger.info("Using diskSize from VM config: \(size) bytes") - sizeForTruncate = size - } else { - Logger.error( - "Missing both uncompressed size annotation and VM config diskSize for multi-part image." - ) - throw PullError.missingUncompressedSizeAnnotation - } - - defer { try? outputHandle.close() } - - // Set the file size without writing data (creates a sparse file) - try outputHandle.truncate(atOffset: sizeForTruncate) - - // Verify the sparse file was created with the correct size - let initialSize = - (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] - as? UInt64) ?? 0 - Logger.info( - "Sparse file initialized with size: \(ByteCountFormatter.string(fromByteCount: Int64(initialSize), countStyle: .file))" - ) - - // Add a simple test pattern at the beginning and end of the file to verify it's writable - try outputHandle.seek(toOffset: 0) - let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! - try outputHandle.write(contentsOf: testPattern) - - try outputHandle.seek(toOffset: sizeForTruncate - UInt64(testPattern.count)) - try outputHandle.write(contentsOf: testPattern) - try outputHandle.synchronize() - - Logger.info("Test patterns written to sparse file. File is ready for writing.") - - var reassemblyProgressLogger = ProgressLogger(threshold: 0.05) - var currentOffset: UInt64 = 0 // Track position in the final *decompressed* file - - // Iterate using the reliable totalParts count from media type - // Use totalPartsFromCollector for the loop range - for partNum in 1...totalPartsFromCollector { - // Find the part URL from our collected parts using the logical partNum - guard let partInfo = diskParts.first(where: { $0.0 == partNum }) else { - // This error should now be less likely, but good to keep - Logger.error("Missing required part number \(partNum) in collected parts during reassembly.") - // Add current state log on error - Logger.error("Current disk part numbers available: \(diskParts.map { $0.0 })") - throw PullError.missingPart(partNum) - } - let partURL = partInfo.1 // Get the URL from the tuple - - Logger.info( - "Processing part \(partNum) of \(totalPartsFromCollector): \(partURL.lastPathComponent)") - - // Seek to the correct offset in the output sparse file - try outputHandle.seek(toOffset: currentOffset) - - // Check if this chunk might be all zeros (sparse data) by sampling the compressed data - // Skip this check for now as it's an optimization we can add later if needed - let isLikelySparse = false - - // Always attempt decompression using decompressChunkAndWriteSparse for LZ4 parts - if isLikelySparse { - // For sparse chunks, we don't need to write anything - just advance the offset - // We determine the uncompressed size from the chunk metadata or estimation - - // For now, we'll still decompress to ensure correct behavior, and optimize later - Logger.info("Chunk appears to be sparse, but decompressing for reliability") - let decompressedBytesWritten = try decompressChunkAndWriteSparse( - inputPath: partURL.path, - outputHandle: outputHandle, - startOffset: currentOffset - ) - currentOffset += decompressedBytesWritten - } else { - Logger.info("Decompressing part \(partNum)") - let decompressedBytesWritten = try decompressChunkAndWriteSparse( - inputPath: partURL.path, - outputHandle: outputHandle, - startOffset: currentOffset - ) - currentOffset += decompressedBytesWritten - } - - reassemblyProgressLogger.logProgress( - current: Double(currentOffset) / Double(sizeForTruncate), - context: "Reassembling" - ) - - // Ensure data is written before processing next part - try outputHandle.synchronize() - } - - // Finalize progress, close handle (done by defer) - reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") - Logger.info("") // Newline - - // Optimize sparseness after completing reassembly - try outputHandle.close() // Close handle to ensure all data is flushed - - // Verify final size - let finalSize = - (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] - as? UInt64) ?? 0 - Logger.info( - "Final disk image size: \(ByteCountFormatter.string(fromByteCount: Int64(finalSize), countStyle: .file))" - ) - - // Optimize sparseness if on macOS - if FileManager.default.fileExists(atPath: "/bin/cp") { - Logger.info("Optimizing sparse file representation...") - let optimizedPath = outputURL.path + ".optimized" - - let process = Process() - process.executableURL = URL(fileURLWithPath: "/bin/cp") - process.arguments = ["-c", outputURL.path, optimizedPath] - - do { - try process.run() - process.waitUntilExit() - - if process.terminationStatus == 0 { - // Get size of optimized file - let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 - let originalUsage = getActualDiskUsage(path: outputURL.path) - let optimizedUsage = getActualDiskUsage(path: optimizedPath) - - Logger.info( - "Sparse optimization results: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" - ) - - // Replace the original with the optimized version - try FileManager.default.removeItem(at: outputURL) - try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: outputURL) - Logger.info("Replaced with optimized sparse version") - } else { - Logger.info("Sparse optimization failed, using original file") - try? FileManager.default.removeItem(atPath: optimizedPath) - } - } catch { - Logger.info("Error during sparse optimization: \(error.localizedDescription)") - try? FileManager.default.removeItem(atPath: optimizedPath) - } - } - - if finalSize != sizeForTruncate { - Logger.info( - "Warning: Final reported size (\(finalSize) bytes) differs from expected size (\(sizeForTruncate) bytes), but this doesn't affect functionality" - ) - } - - Logger.info("Disk image reassembly completed") + // Now that we've downloaded everything to the cache, use copyFromCache to create final VM files + if cachingEnabled { + Logger.info("Using copyFromCache method to properly preserve partition tables") + try await copyFromCache(manifest: manifest, manifestId: manifestId, to: tempVMDir) } else { - // Copy single disk image if it exists + // If caching is disabled, just copy files directly to tempVMDir + Logger.info("Caching disabled - copying downloaded files directly to VM directory") + + // Copy non-disk files first + for file in ["config.json", "nvram.bin"] { + let sourceURL = tempDownloadDir.appendingPathComponent(file) + if FileManager.default.fileExists(atPath: sourceURL.path) { + try FileManager.default.copyItem( + at: sourceURL, + to: tempVMDir.appendingPathComponent(file) + ) + } + } + + // For the disk image, we have two cases - either a single file or parts let diskURL = tempDownloadDir.appendingPathComponent("disk.img") if FileManager.default.fileExists(atPath: diskURL.path) { + // Single file disk image try FileManager.default.copyItem( at: diskURL, to: tempVMDir.appendingPathComponent("disk.img") ) + Logger.info("Copied single disk.img file to VM directory") + } else { + // Multiple parts case - use the partitioned disk.img from reassembly + let diskParts = await diskPartsCollector.getSortedParts() + if !diskParts.isEmpty { + Logger.info("Using most recently assembled disk image for VM") + let assembledDiskURL = tempVMDir.appendingPathComponent("disk.img") + if FileManager.default.fileExists(atPath: assembledDiskURL.path) { + Logger.info("Assembled disk.img already exists in VM directory") + } else { + Logger.error( + "Could not find assembled disk image - VM may not boot properly") + } + } else { + Logger.error("No disk image found - VM may not boot properly") + } } } - - // Copy config and nvram files if they exist - for file in ["config.json", "nvram.bin"] { - let sourceURL = tempDownloadDir.appendingPathComponent(file) - if FileManager.default.fileExists(atPath: sourceURL.path) { - try FileManager.default.copyItem( - at: sourceURL, - to: tempVMDir.appendingPathComponent(file) - ) - } - } - } - - // Simulate cache pull behavior if this is a first pull - if !cachingEnabled || !validateCache(manifest: manifest, manifestId: manifestId) { - try simulateCachePull(tempVMDir: tempVMDir) } // Only move to final location once everything is complete @@ -1242,10 +1024,10 @@ class ImageContainerRegistry: @unchecked Sendable { private func createDiskImageFromSource( sourceURL: URL, // Source data to decompress destinationURL: URL, // Where to create the disk image - diskSize: UInt64 // Total size for the sparse file + diskSize: UInt64 // Total size for the sparse file ) throws { Logger.info("Creating sparse disk image...") - + // Create empty destination file if FileManager.default.fileExists(atPath: destinationURL.path) { try FileManager.default.removeItem(at: destinationURL) @@ -1253,11 +1035,11 @@ class ImageContainerRegistry: @unchecked Sendable { guard FileManager.default.createFile(atPath: destinationURL.path, contents: nil) else { throw PullError.fileCreationFailed(destinationURL.path) } - + // Create sparse file let outputHandle = try FileHandle(forWritingTo: destinationURL) try outputHandle.truncate(atOffset: diskSize) - + // Write test patterns at beginning and end Logger.info("Writing test patterns to verify writability...") let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! @@ -1266,7 +1048,7 @@ class ImageContainerRegistry: @unchecked Sendable { try outputHandle.seek(toOffset: diskSize - UInt64(testPattern.count)) try outputHandle.write(contentsOf: testPattern) try outputHandle.synchronize() - + // Decompress the source data at offset 0 Logger.info("Decompressing source data...") let bytesWritten = try decompressChunkAndWriteSparse( @@ -1274,57 +1056,62 @@ class ImageContainerRegistry: @unchecked Sendable { outputHandle: outputHandle, startOffset: 0 ) - Logger.info("Decompressed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) of data") - + Logger.info( + "Decompressed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) of data" + ) + // Ensure data is written and close handle try outputHandle.synchronize() try outputHandle.close() - + // Run sync to flush filesystem let syncProcess = Process() syncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") try syncProcess.run() syncProcess.waitUntilExit() - + // Optimize with cp -c if FileManager.default.fileExists(atPath: "/bin/cp") { Logger.info("Optimizing sparse file representation...") let optimizedPath = destinationURL.path + ".optimized" - + let process = Process() process.executableURL = URL(fileURLWithPath: "/bin/cp") process.arguments = ["-c", destinationURL.path, optimizedPath] - + try process.run() process.waitUntilExit() - + if process.terminationStatus == 0 { // Get optimization results - let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 + let optimizedSize = + (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] + as? UInt64) ?? 0 let originalUsage = getActualDiskUsage(path: destinationURL.path) let optimizedUsage = getActualDiskUsage(path: optimizedPath) - + Logger.info( "Sparse optimization results: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" ) - + // Replace original with optimized try FileManager.default.removeItem(at: destinationURL) - try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: destinationURL) + try FileManager.default.moveItem( + at: URL(fileURLWithPath: optimizedPath), to: destinationURL) Logger.info("Replaced with optimized sparse version") } else { Logger.info("Sparse optimization failed, using original file") try? FileManager.default.removeItem(atPath: optimizedPath) } } - + // Set permissions to 0644 let chmodProcess = Process() chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") chmodProcess.arguments = ["0644", destinationURL.path] try chmodProcess.run() chmodProcess.waitUntilExit() - + // Final sync let finalSyncProcess = Process() finalSyncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") @@ -1335,94 +1122,95 @@ class ImageContainerRegistry: @unchecked Sendable { // Function to simulate cache pull behavior for freshly downloaded images private func simulateCachePull(tempVMDir: URL) throws { Logger.info("Simulating cache pull behavior for freshly downloaded image...") - + // Find disk.img in tempVMDir let diskImgPath = tempVMDir.appendingPathComponent("disk.img") guard FileManager.default.fileExists(atPath: diskImgPath.path) else { Logger.info("No disk.img found to simulate cache pull behavior") return } - + // Get file attributes and size let attributes = try FileManager.default.attributesOfItem(atPath: diskImgPath.path) guard let diskSize = attributes[.size] as? UInt64, diskSize > 0 else { Logger.error("Could not determine disk.img size for simulation") return } - + Logger.info("Creating true disk image clone with partition table preserved...") - + // Create backup of original file let backupPath = tempVMDir.appendingPathComponent("disk.img.original") try FileManager.default.moveItem(at: diskImgPath, to: backupPath) - + // Let's first check if the original image has a partition table Logger.info("Checking if source image has a partition table...") let checkProcess = Process() checkProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") checkProcess.arguments = ["imageinfo", backupPath.path] - + let checkPipe = Pipe() checkProcess.standardOutput = checkPipe - + try checkProcess.run() checkProcess.waitUntilExit() - + let checkData = checkPipe.fileHandleForReading.readDataToEndOfFile() let checkOutput = String(data: checkData, encoding: .utf8) ?? "" Logger.info("Source image info: \(checkOutput)") - + // Try different methods in sequence until one works var success = false - + // Method 1: Use hdiutil convert to convert the image while preserving all data if !success { Logger.info("Trying hdiutil convert...") let tempPath = tempVMDir.appendingPathComponent("disk.img.temp") - + let convertProcess = Process() convertProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") convertProcess.arguments = [ - "convert", - backupPath.path, - "-format", "UDRO", // Read-only first to preserve partition table - "-o", tempPath.path + "convert", + backupPath.path, + "-format", "UDRO", // Read-only first to preserve partition table + "-o", tempPath.path, ] - + let convertOutPipe = Pipe() let convertErrPipe = Pipe() convertProcess.standardOutput = convertOutPipe convertProcess.standardError = convertErrPipe - + do { try convertProcess.run() convertProcess.waitUntilExit() - + let errData = convertErrPipe.fileHandleForReading.readDataToEndOfFile() let errOutput = String(data: errData, encoding: .utf8) ?? "" - + if convertProcess.terminationStatus == 0 { Logger.info("hdiutil convert succeeded. Converting to writable format...") // Now convert to writable format let convertBackProcess = Process() convertBackProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") convertBackProcess.arguments = [ - "convert", - tempPath.path, - "-format", "UDRW", // Read-write format - "-o", diskImgPath.path + "convert", + tempPath.path, + "-format", "UDRW", // Read-write format + "-o", diskImgPath.path, ] - + try convertBackProcess.run() convertBackProcess.waitUntilExit() - + if convertBackProcess.terminationStatus == 0 { - Logger.info("Successfully converted to writable format with partition table") + Logger.info( + "Successfully converted to writable format with partition table") success = true } else { Logger.error("hdiutil convert to writable format failed") } - + // Clean up temporary image try? FileManager.default.removeItem(at: tempPath) } else { @@ -1432,33 +1220,33 @@ class ImageContainerRegistry: @unchecked Sendable { Logger.error("Error executing hdiutil convert: \(error)") } } - + // Method 2: Try direct raw copy method if !success { Logger.info("Trying direct raw copy with dd...") - + // Create empty file first FileManager.default.createFile(atPath: diskImgPath.path, contents: nil) - + let ddProcess = Process() ddProcess.executableURL = URL(fileURLWithPath: "/bin/dd") ddProcess.arguments = [ "if=\(backupPath.path)", "of=\(diskImgPath.path)", - "bs=1m", // Large block size - "count=81920" // Ensure we copy everything (80GB+ should be sufficient) + "bs=1m", // Large block size + "count=81920", // Ensure we copy everything (80GB+ should be sufficient) ] - + let ddErrPipe = Pipe() ddProcess.standardError = ddErrPipe - + do { try ddProcess.run() ddProcess.waitUntilExit() - + let errData = ddErrPipe.fileHandleForReading.readDataToEndOfFile() let errOutput = String(data: errData, encoding: .utf8) ?? "" - + if ddProcess.terminationStatus == 0 { Logger.info("Raw dd copy completed: \(errOutput)") success = true @@ -1469,34 +1257,36 @@ class ImageContainerRegistry: @unchecked Sendable { Logger.error("Error executing dd: \(error)") } } - + // Method 3: Use a more complex approach with disk mounting if !success { Logger.info("Trying advanced disk attach/detach approach...") - + // Mount the source disk image let attachProcess = Process() attachProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") attachProcess.arguments = ["attach", backupPath.path, "-nomount"] - + let attachPipe = Pipe() attachProcess.standardOutput = attachPipe - + try attachProcess.run() attachProcess.waitUntilExit() - + let attachData = attachPipe.fileHandleForReading.readDataToEndOfFile() let attachOutput = String(data: attachData, encoding: .utf8) ?? "" - + // Extract the disk device from output (/dev/diskN) var diskDevice: String? = nil - if let diskMatch = attachOutput.range(of: "/dev/disk[0-9]+", options: .regularExpression) { + if let diskMatch = attachOutput.range( + of: "/dev/disk[0-9]+", options: .regularExpression) + { diskDevice = String(attachOutput[diskMatch]) } - + if let device = diskDevice { Logger.info("Source disk attached at \(device)") - + // Create a bootable disk image clone let createProcess = Process() createProcess.executableURL = URL(fileURLWithPath: "/usr/sbin/asr") @@ -1505,19 +1295,22 @@ class ImageContainerRegistry: @unchecked Sendable { "--source", device, "--target", diskImgPath.path, "--erase", - "--noprompt" + "--noprompt", ] - + let createPipe = Pipe() createProcess.standardOutput = createPipe - + do { try createProcess.run() createProcess.waitUntilExit() - - let createOutput = String(data: createPipe.fileHandleForReading.readDataToEndOfFile(), encoding: .utf8) ?? "" + + let createOutput = + String( + data: createPipe.fileHandleForReading.readDataToEndOfFile(), + encoding: .utf8) ?? "" Logger.info("asr output: \(createOutput)") - + if createProcess.terminationStatus == 0 { Logger.info("Successfully created bootable disk image clone!") success = true @@ -1527,7 +1320,7 @@ class ImageContainerRegistry: @unchecked Sendable { } catch { Logger.error("Error executing asr: \(error)") } - + // Always detach the disk when done let detachProcess = Process() detachProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") @@ -1538,98 +1331,102 @@ class ImageContainerRegistry: @unchecked Sendable { Logger.error("Failed to extract disk device from hdiutil attach output") } } - + // Fallback: If none of the methods worked, revert to our previous method just to ensure we have a usable image if !success { Logger.info("All specialized methods failed. Reverting to basic copy...") - + // If the disk image file exists (from a failed attempt), remove it if FileManager.default.fileExists(atPath: diskImgPath.path) { try FileManager.default.removeItem(at: diskImgPath) } - + // Attempt a basic file copy which will at least give us something to work with try FileManager.default.copyItem(at: backupPath, to: diskImgPath) } - + // Optimize sparseness if possible if FileManager.default.fileExists(atPath: "/bin/cp") { Logger.info("Optimizing sparse file representation...") let optimizedPath = diskImgPath.path + ".optimized" - + let process = Process() process.executableURL = URL(fileURLWithPath: "/bin/cp") process.arguments = ["-c", diskImgPath.path, optimizedPath] - + try process.run() process.waitUntilExit() - + if process.terminationStatus == 0 { - let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 + let optimizedSize = + (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] + as? UInt64) ?? 0 let originalUsage = getActualDiskUsage(path: diskImgPath.path) let optimizedUsage = getActualDiskUsage(path: optimizedPath) - + Logger.info( "Sparse optimization results: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" ) - + // Replace with optimized version try FileManager.default.removeItem(at: diskImgPath) - try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: diskImgPath) + try FileManager.default.moveItem( + at: URL(fileURLWithPath: optimizedPath), to: diskImgPath) Logger.info("Replaced with optimized sparse version") } else { Logger.info("Sparse optimization failed, using original file") try? FileManager.default.removeItem(atPath: optimizedPath) } } - + // Set permissions to 0644 let chmodProcess = Process() chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") chmodProcess.arguments = ["0644", diskImgPath.path] try chmodProcess.run() chmodProcess.waitUntilExit() - + // Final sync let finalSyncProcess = Process() finalSyncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") try finalSyncProcess.run() finalSyncProcess.waitUntilExit() - + // Verify the final disk image Logger.info("Verifying final disk image partition information...") let verifyProcess = Process() verifyProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") verifyProcess.arguments = ["imageinfo", diskImgPath.path] - + let verifyOutputPipe = Pipe() verifyProcess.standardOutput = verifyOutputPipe - + try verifyProcess.run() verifyProcess.waitUntilExit() - + let verifyOutputData = verifyOutputPipe.fileHandleForReading.readDataToEndOfFile() let verifyOutput = String(data: verifyOutputData, encoding: .utf8) ?? "" Logger.info("Final disk image verification:\n\(verifyOutput)") - + // Clean up backup file try FileManager.default.removeItem(at: backupPath) - - Logger.info("Cache pull simulation completed successfully with partition table preservation") + + Logger.info( + "Cache pull simulation completed successfully with partition table preservation") } private func copyFromCache(manifest: Manifest, manifestId: String, to destination: URL) async throws { Logger.info("Copying from cache...") - + // Define output URL and expected size variable scope here let outputURL = destination.appendingPathComponent("disk.img") - var expectedTotalSize: UInt64? = nil // Use optional to handle missing config + var expectedTotalSize: UInt64? = nil // Use optional to handle missing config // Instantiate collector let diskPartsCollector = DiskPartsCollector() - var lz4LayerCount = 0 // Count lz4 layers found + var lz4LayerCount = 0 // Count lz4 layers found // First identify disk parts and non-disk files for layer in manifest.layers { @@ -1637,13 +1434,14 @@ class ImageContainerRegistry: @unchecked Sendable { // Identify disk parts simply by media type if layer.mediaType == "application/octet-stream+lz4" { - lz4LayerCount += 1 // Increment count - // Add to collector. It will assign the sequential part number. - let collectorPartNum = await diskPartsCollector.addPart(url: cachedLayer) - Logger.info("Adding cached lz4 layer (part \(lz4LayerCount)) -> Collector #\(collectorPartNum): \(cachedLayer.lastPathComponent)") - } - else { - // --- Handle Non-Disk-Part Layer (from cache) --- + lz4LayerCount += 1 // Increment count + // Add to collector. It will assign the sequential part number. + let collectorPartNum = await diskPartsCollector.addPart(url: cachedLayer) + Logger.info( + "Adding cached lz4 layer (part \(lz4LayerCount)) -> Collector #\(collectorPartNum): \(cachedLayer.lastPathComponent)" + ) + } else { + // --- Handle Non-Disk-Part Layer (from cache) --- let fileName: String switch layer.mediaType { case "application/vnd.oci.image.config.v1+json": @@ -1651,12 +1449,12 @@ class ImageContainerRegistry: @unchecked Sendable { case "application/octet-stream": // Assume nvram if config layer exists, otherwise assume single disk image fileName = manifest.config != nil ? "nvram.bin" : "disk.img" - case "application/vnd.oci.image.layer.v1.tar", - "application/octet-stream+gzip": - // Assume disk image for these types as well if encountered in cache scenario - fileName = "disk.img" + case "application/vnd.oci.image.layer.v1.tar", + "application/octet-stream+gzip": + // Assume disk image for these types as well if encountered in cache scenario + fileName = "disk.img" default: - Logger.info("Skipping unsupported cached layer media type: \(layer.mediaType)") + Logger.info("Skipping unsupported cached layer media type: \(layer.mediaType)") continue } // Copy the non-disk file directly from cache to destination @@ -1667,19 +1465,20 @@ class ImageContainerRegistry: @unchecked Sendable { } } - // --- Safely retrieve parts AFTER loop --- - let diskPartSources = await diskPartsCollector.getSortedParts() // Sorted by assigned sequential number - let totalParts = await diskPartsCollector.getTotalParts() // Get total count from collector + // --- Safely retrieve parts AFTER loop --- + let diskPartSources = await diskPartsCollector.getSortedParts() // Sorted by assigned sequential number + let totalParts = await diskPartsCollector.getTotalParts() // Get total count from collector Logger.info("Found \(totalParts) lz4 disk parts in cache to reassemble.") - // --- End retrieving parts --- + // --- End retrieving parts --- // Reassemble disk parts if needed // Use the count from the collector if !diskPartSources.isEmpty { // Use totalParts from collector directly - Logger.info("Reassembling \(totalParts) disk image parts using sparse file technique...") - + Logger.info( + "Reassembling \(totalParts) disk image parts using sparse file technique...") + // Get uncompressed size from cached config file (needs to be copied first) let configURL = destination.appendingPathComponent("config.json") // Parse config.json to get uncompressed size *before* reassembly @@ -1713,24 +1512,25 @@ class ImageContainerRegistry: @unchecked Sendable { } else { // If neither is found in cache scenario, throw error as we cannot determine the size Logger.error( - "Missing both uncompressed size annotation and VM config diskSize for cached multi-part image." - + " Cannot reassemble." + "Missing both uncompressed size annotation and VM config diskSize for cached multi-part image." + + " Cannot reassemble." ) throw PullError.missingUncompressedSizeAnnotation } // Now that expectedTotalSize is guaranteed to be non-nil, proceed with setup guard let sizeForTruncate = expectedTotalSize else { - // This should not happen due to the checks above, but safety first - let nilError: Error? = nil - // Use nil-coalescing to provide a default error, appeasing the compiler - throw PullError.reassemblySetupFailed(path: outputURL.path, underlyingError: nilError ?? NoSpecificUnderlyingError()) + // This should not happen due to the checks above, but safety first + let nilError: Error? = nil + // Use nil-coalescing to provide a default error, appeasing the compiler + throw PullError.reassemblySetupFailed( + path: outputURL.path, underlyingError: nilError ?? NoSpecificUnderlyingError()) } // If we have just one disk part, use the shared function if totalParts == 1 { // Single part - use shared function - let sourceURL = diskPartSources[0].1 // Get the first source URL (index 1 of the tuple) + let sourceURL = diskPartSources[0].1 // Get the first source URL (index 1 of the tuple) try createDiskImageFromSource( sourceURL: sourceURL, destinationURL: outputURL, @@ -1742,22 +1542,30 @@ class ImageContainerRegistry: @unchecked Sendable { let outputHandle: FileHandle do { // Ensure parent directory exists - try FileManager.default.createDirectory(at: outputURL.deletingLastPathComponent(), withIntermediateDirectories: true) + try FileManager.default.createDirectory( + at: outputURL.deletingLastPathComponent(), withIntermediateDirectories: true + ) // Explicitly create the file first, removing old one if needed if FileManager.default.fileExists(atPath: outputURL.path) { try FileManager.default.removeItem(at: outputURL) } - guard FileManager.default.createFile(atPath: outputURL.path, contents: nil) else { + guard FileManager.default.createFile(atPath: outputURL.path, contents: nil) + else { throw PullError.fileCreationFailed(outputURL.path) } // Open handle for writing outputHandle = try FileHandle(forWritingTo: outputURL) // Set the file size (creates sparse file) try outputHandle.truncate(atOffset: sizeForTruncate) - Logger.info("Sparse file initialized for cache reassembly with size: \(ByteCountFormatter.string(fromByteCount: Int64(sizeForTruncate), countStyle: .file))") + Logger.info( + "Sparse file initialized for cache reassembly with size: \(ByteCountFormatter.string(fromByteCount: Int64(sizeForTruncate), countStyle: .file))" + ) } catch { - Logger.error("Failed during setup for cached disk image reassembly: \(error.localizedDescription)", metadata: ["path": outputURL.path]) - throw PullError.reassemblySetupFailed(path: outputURL.path, underlyingError: error) + Logger.error( + "Failed during setup for cached disk image reassembly: \(error.localizedDescription)", + metadata: ["path": outputURL.path]) + throw PullError.reassemblySetupFailed( + path: outputURL.path, underlyingError: error) } // Ensure handle is closed when exiting this scope @@ -1769,11 +1577,15 @@ class ImageContainerRegistry: @unchecked Sendable { // Iterate from 1 up to the total number of parts found by the collector for collectorPartNum in 1...totalParts { // Find the source URL from our collected parts using the sequential collectorPartNum - guard let sourceInfo = diskPartSources.first(where: { $0.0 == collectorPartNum }) else { - Logger.error("Missing required cached part number \(collectorPartNum) in collected parts during reassembly.") + guard + let sourceInfo = diskPartSources.first(where: { $0.0 == collectorPartNum }) + else { + Logger.error( + "Missing required cached part number \(collectorPartNum) in collected parts during reassembly." + ) throw PullError.missingPart(collectorPartNum) } - let sourceURL = sourceInfo.1 // Get URL from tuple + let sourceURL = sourceInfo.1 // Get URL from tuple // Log using the sequential collector part number Logger.info( @@ -1789,10 +1601,10 @@ class ImageContainerRegistry: @unchecked Sendable { currentOffset += decompressedBytesWritten // Update progress (using sizeForTruncate which should be available) reassemblyProgressLogger.logProgress( - current: Double(currentOffset) / Double(sizeForTruncate), - context: "Reassembling Cache") - - try outputHandle.synchronize() // Explicitly synchronize after each chunk + current: Double(currentOffset) / Double(sizeForTruncate), + context: "Reassembling Cache") + + try outputHandle.synchronize() // Explicitly synchronize after each chunk } // Finalize progress, close handle (done by defer) @@ -1806,13 +1618,13 @@ class ImageContainerRegistry: @unchecked Sendable { try outputHandle.seek(toOffset: sizeForTruncate - UInt64(testPattern.count)) try outputHandle.write(contentsOf: testPattern) try outputHandle.synchronize() - + // Ensure handle is properly synchronized before closing try outputHandle.synchronize() - + // Close handle explicitly instead of relying on defer try outputHandle.close() - + // Verify final size let finalSize = (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] @@ -1829,44 +1641,49 @@ class ImageContainerRegistry: @unchecked Sendable { } Logger.info("Disk image reassembly completed") - + // Optimize sparseness for cached reassembly if on macOS if FileManager.default.fileExists(atPath: "/bin/cp") { Logger.info("Optimizing sparse file representation for cached reassembly...") let optimizedPath = outputURL.path + ".optimized" - + let process = Process() process.executableURL = URL(fileURLWithPath: "/bin/cp") process.arguments = ["-c", outputURL.path, optimizedPath] - + do { try process.run() process.waitUntilExit() - + if process.terminationStatus == 0 { // Get size of optimized file - let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 + let optimizedSize = + (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[ + .size] as? UInt64) ?? 0 let originalUsage = getActualDiskUsage(path: outputURL.path) let optimizedUsage = getActualDiskUsage(path: optimizedPath) - + Logger.info( "Sparse optimization results for cache: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" ) - + // Replace the original with the optimized version try FileManager.default.removeItem(at: outputURL) - try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: outputURL) + try FileManager.default.moveItem( + at: URL(fileURLWithPath: optimizedPath), to: outputURL) Logger.info("Replaced cached reassembly with optimized sparse version") } else { Logger.info("Sparse optimization failed for cache, using original file") try? FileManager.default.removeItem(atPath: optimizedPath) } } catch { - Logger.info("Error during sparse optimization for cache: \(error.localizedDescription)") + Logger.info( + "Error during sparse optimization for cache: \(error.localizedDescription)" + ) try? FileManager.default.removeItem(atPath: optimizedPath) } } - + // Set permissions to ensure consistency let chmodProcess = Process() chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") @@ -1880,12 +1697,16 @@ class ImageContainerRegistry: @unchecked Sendable { } private func getToken(repository: String) async throws -> String { - let encodedRepo = repository.addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed) ?? repository + let encodedRepo = + repository.addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed) ?? repository // Request both pull and push scope for uploads - let url = URL(string: "https://\(self.registry)/token?scope=repository:\(encodedRepo):pull,push&service=\(self.registry)")! - + let url = URL( + string: + "https://\(self.registry)/token?scope=repository:\(encodedRepo):pull,push&service=\(self.registry)" + )! + var request = URLRequest(url: url) - request.httpMethod = "GET" // Token endpoint uses GET + request.httpMethod = "GET" // Token endpoint uses GET request.setValue("application/json", forHTTPHeaderField: "Accept") // *** Add Basic Authentication Header if credentials exist *** @@ -1906,26 +1727,31 @@ class ImageContainerRegistry: @unchecked Sendable { // *** End Basic Auth addition *** let (data, response) = try await URLSession.shared.data(for: request) - + // Check response status code *before* parsing JSON guard let httpResponse = response as? HTTPURLResponse else { - throw PushError.authenticationFailed // Or a more generic network error + throw PushError.authenticationFailed // Or a more generic network error } - + guard httpResponse.statusCode == 200 else { // Log detailed error including status code and potentially response body let responseBody = String(data: data, encoding: .utf8) ?? "(Could not decode body)" - Logger.error("Token request failed with status code: \(httpResponse.statusCode). Response: \(responseBody)") + Logger.error( + "Token request failed with status code: \(httpResponse.statusCode). Response: \(responseBody)" + ) // Throw specific error based on status if needed (e.g., 401 for unauthorized) - throw PushError.authenticationFailed + throw PushError.authenticationFailed } - + let jsonResponse = try JSONSerialization.jsonObject(with: data) as? [String: Any] - guard let token = jsonResponse?["token"] as? String ?? jsonResponse?["access_token"] as? String else { + guard + let token = jsonResponse?["token"] as? String ?? jsonResponse?["access_token"] + as? String + else { Logger.error("Token not found in registry response.") throw PushError.missingToken } - + return token } @@ -2673,9 +2499,9 @@ class ImageContainerRegistry: @unchecked Sendable { // New push method public func push( - vmDirPath: String, - imageName: String, - tags: [String], + vmDirPath: String, + imageName: String, + tags: [String], chunkSizeMb: Int = 512, verbose: Bool = false, dryRun: Bool = false, @@ -2686,18 +2512,18 @@ class ImageContainerRegistry: @unchecked Sendable { metadata: [ "vm_path": vmDirPath, "imageName": imageName, - "tags": "\(tags.joined(separator: ", "))", // Log all tags + "tags": "\(tags.joined(separator: ", "))", // Log all tags "registry": registry, "organization": organization, "chunk_size": "\(chunkSizeMb)MB", "dry_run": "\(dryRun)", - "reassemble": "\(reassemble)" + "reassemble": "\(reassemble)", ]) - + // Remove tag parsing here, imageName is now passed directly // let components = image.split(separator: ":") ... // let imageTag = String(tag) - + // Get authentication token only if not in dry-run mode var token: String = "" if !dryRun { @@ -2706,17 +2532,17 @@ class ImageContainerRegistry: @unchecked Sendable { } else { Logger.info("Dry run mode: skipping authentication token request") } - + // Create working directory inside the VM folder for caching/resuming let workDir = URL(fileURLWithPath: vmDirPath).appendingPathComponent(".lume_push_cache") try FileManager.default.createDirectory(at: workDir, withIntermediateDirectories: true) Logger.info("Using push cache directory: \(workDir.path)") - + // Get VM files that need to be pushed using vmDirPath let diskPath = URL(fileURLWithPath: vmDirPath).appendingPathComponent("disk.img") let configPath = URL(fileURLWithPath: vmDirPath).appendingPathComponent("config.json") let nvramPath = URL(fileURLWithPath: vmDirPath).appendingPathComponent("nvram.bin") - + var layers: [OCIManifestLayer] = [] var uncompressedDiskSize: UInt64? = nil @@ -2724,7 +2550,7 @@ class ImageContainerRegistry: @unchecked Sendable { let cachedConfigPath = workDir.appendingPathComponent("config.json") var configDigest: String? = nil var configSize: Int? = nil - + if FileManager.default.fileExists(atPath: cachedConfigPath.path) { Logger.info("Using cached config.json") do { @@ -2734,7 +2560,8 @@ class ImageContainerRegistry: @unchecked Sendable { // Try to get uncompressed disk size from cached config if let vmConfig = try? JSONDecoder().decode(VMConfig.self, from: configData) { uncompressedDiskSize = vmConfig.diskSize - Logger.info("Found disk size in cached config: \(uncompressedDiskSize ?? 0) bytes") + Logger.info( + "Found disk size in cached config: \(uncompressedDiskSize ?? 0) bytes") } } catch { Logger.error("Failed to read cached config.json: \(error). Will re-process.") @@ -2745,20 +2572,22 @@ class ImageContainerRegistry: @unchecked Sendable { let configData = try Data(contentsOf: configPath) configDigest = "sha256:" + configData.sha256String() configSize = configData.count - try configData.write(to: cachedConfigPath) // Save to cache + try configData.write(to: cachedConfigPath) // Save to cache // Try to get uncompressed disk size from original config if let vmConfig = try? JSONDecoder().decode(VMConfig.self, from: configData) { uncompressedDiskSize = vmConfig.diskSize Logger.info("Found disk size in config: \(uncompressedDiskSize ?? 0) bytes") } } - - if var digest = configDigest, let size = configSize { // Use 'var' to modify if uploaded - if !dryRun { + + if var digest = configDigest, let size = configSize { // Use 'var' to modify if uploaded + if !dryRun { // Upload only if not in dry-run mode and blob doesn't exist - if !(try await blobExists(repository: "\(self.organization)/\(imageName)", digest: digest, token: token)) { + if !(try await blobExists( + repository: "\(self.organization)/\(imageName)", digest: digest, token: token)) + { Logger.info("Uploading config.json blob") - let configData = try Data(contentsOf: cachedConfigPath) // Read from cache for upload + let configData = try Data(contentsOf: cachedConfigPath) // Read from cache for upload digest = try await uploadBlobFromData( repository: "\(self.organization)/\(imageName)", data: configData, @@ -2769,13 +2598,14 @@ class ImageContainerRegistry: @unchecked Sendable { } } // Add config layer - layers.append(OCIManifestLayer( - mediaType: "application/vnd.oci.image.config.v1+json", - size: size, - digest: digest - )) + layers.append( + OCIManifestLayer( + mediaType: "application/vnd.oci.image.config.v1+json", + size: size, + digest: digest + )) } - + // Process nvram.bin let cachedNvramPath = workDir.appendingPathComponent("nvram.bin") var nvramDigest: String? = nil @@ -2788,47 +2618,56 @@ class ImageContainerRegistry: @unchecked Sendable { nvramDigest = "sha256:" + nvramData.sha256String() nvramSize = nvramData.count } catch { - Logger.error("Failed to read cached nvram.bin: \(error). Will re-process.") + Logger.error("Failed to read cached nvram.bin: \(error). Will re-process.") } } else if FileManager.default.fileExists(atPath: nvramPath.path) { Logger.info("Processing nvram.bin") let nvramData = try Data(contentsOf: nvramPath) nvramDigest = "sha256:" + nvramData.sha256String() nvramSize = nvramData.count - try nvramData.write(to: cachedNvramPath) // Save to cache + try nvramData.write(to: cachedNvramPath) // Save to cache } - - if var digest = nvramDigest, let size = nvramSize { // Use 'var' - if !dryRun { - // Upload only if not in dry-run mode and blob doesn't exist - if !(try await blobExists(repository: "\(self.organization)/\(imageName)", digest: digest, token: token)) { + + if var digest = nvramDigest, let size = nvramSize { // Use 'var' + if !dryRun { + // Upload only if not in dry-run mode and blob doesn't exist + if !(try await blobExists( + repository: "\(self.organization)/\(imageName)", digest: digest, token: token)) + { Logger.info("Uploading nvram.bin blob") - let nvramData = try Data(contentsOf: cachedNvramPath) // Read from cache + let nvramData = try Data(contentsOf: cachedNvramPath) // Read from cache digest = try await uploadBlobFromData( repository: "\(self.organization)/\(imageName)", data: nvramData, token: token ) } else { - Logger.info("NVRAM blob already exists on registry") + Logger.info("NVRAM blob already exists on registry") } } // Add nvram layer - layers.append(OCIManifestLayer( - mediaType: "application/octet-stream", - size: size, - digest: digest - )) + layers.append( + OCIManifestLayer( + mediaType: "application/octet-stream", + size: size, + digest: digest + )) } - + // Process disk.img if FileManager.default.fileExists(atPath: diskPath.path) { let diskAttributes = try FileManager.default.attributesOfItem(atPath: diskPath.path) let diskSize = diskAttributes[.size] as? UInt64 ?? 0 let actualDiskSize = uncompressedDiskSize ?? diskSize - Logger.info("Processing disk.img in chunks", metadata: ["disk_path": diskPath.path, "disk_size": "\(diskSize) bytes", "actual_size": "\(actualDiskSize) bytes", "chunk_size": "\(chunkSizeMb)MB"]) + Logger.info( + "Processing disk.img in chunks", + metadata: [ + "disk_path": diskPath.path, "disk_size": "\(diskSize) bytes", + "actual_size": "\(actualDiskSize) bytes", "chunk_size": "\(chunkSizeMb)MB", + ]) let chunksDir = workDir.appendingPathComponent("disk.img.parts") - try FileManager.default.createDirectory(at: chunksDir, withIntermediateDirectories: true) + try FileManager.default.createDirectory( + at: chunksDir, withIntermediateDirectories: true) let chunkSizeBytes = chunkSizeMb * 1024 * 1024 let totalChunks = Int((diskSize + UInt64(chunkSizeBytes) - 1) / UInt64(chunkSizeBytes)) Logger.info("Splitting disk into \(totalChunks) chunks") @@ -2836,58 +2675,125 @@ class ImageContainerRegistry: @unchecked Sendable { defer { try? fileHandle.close() } var pushedDiskLayers: [(index: Int, layer: OCIManifestLayer)] = [] var diskChunks: [(index: Int, path: URL, digest: String)] = [] - - try await withThrowingTaskGroup(of: (Int, OCIManifestLayer, URL, String).self) { group in + + try await withThrowingTaskGroup(of: (Int, OCIManifestLayer, URL, String).self) { + group in let maxConcurrency = 4 for chunkIndex in 0..= maxConcurrency { if let res = try await group.next() { pushedDiskLayers.append((res.0, res.1)); diskChunks.append((res.0, res.2, res.3)) } } + if chunkIndex >= maxConcurrency { + if let res = try await group.next() { + pushedDiskLayers.append((res.0, res.1)) + diskChunks.append((res.0, res.2, res.3)) + } + } group.addTask { [token, verbose, dryRun, organization, imageName] in let chunkIndex = chunkIndex let chunkPath = chunksDir.appendingPathComponent("chunk.\(chunkIndex)") - let metadataPath = chunksDir.appendingPathComponent("chunk_metadata.\(chunkIndex).json") + let metadataPath = chunksDir.appendingPathComponent( + "chunk_metadata.\(chunkIndex).json") var layer: OCIManifestLayer? = nil var finalCompressedDigest: String? = nil - if FileManager.default.fileExists(atPath: metadataPath.path), FileManager.default.fileExists(atPath: chunkPath.path) { + if FileManager.default.fileExists(atPath: metadataPath.path), + FileManager.default.fileExists(atPath: chunkPath.path) + { do { let metadataData = try Data(contentsOf: metadataPath) - let metadata = try JSONDecoder().decode(ChunkMetadata.self, from: metadataData) - Logger.info("Resuming chunk \(chunkIndex + 1)/\(totalChunks) from cache") + let metadata = try JSONDecoder().decode( + ChunkMetadata.self, from: metadataData) + Logger.info( + "Resuming chunk \(chunkIndex + 1)/\(totalChunks) from cache") finalCompressedDigest = metadata.compressedDigest - if !dryRun { if !(try await self.blobExists(repository: "\(organization)/\(imageName)", digest: metadata.compressedDigest, token: token)) { Logger.info("Uploading cached chunk \(chunkIndex + 1) blob"); _ = try await self.uploadBlobFromPath(repository: "\(organization)/\(imageName)", path: chunkPath, digest: metadata.compressedDigest, token: token) } else { Logger.info("Chunk \(chunkIndex + 1) blob already exists on registry") } } - layer = OCIManifestLayer(mediaType: "application/octet-stream+lz4", size: metadata.compressedSize, digest: metadata.compressedDigest, uncompressedSize: metadata.uncompressedSize, uncompressedContentDigest: metadata.uncompressedDigest) - } catch { Logger.info("Failed to load cached metadata/chunk for index \(chunkIndex): \(error). Re-processing."); finalCompressedDigest = nil; layer = nil } + if !dryRun { + if !(try await self.blobExists( + repository: "\(organization)/\(imageName)", + digest: metadata.compressedDigest, token: token)) + { + Logger.info("Uploading cached chunk \(chunkIndex + 1) blob") + _ = try await self.uploadBlobFromPath( + repository: "\(organization)/\(imageName)", + path: chunkPath, digest: metadata.compressedDigest, + token: token) + } else { + Logger.info( + "Chunk \(chunkIndex + 1) blob already exists on registry" + ) + } + } + layer = OCIManifestLayer( + mediaType: "application/octet-stream+lz4", + size: metadata.compressedSize, + digest: metadata.compressedDigest, + uncompressedSize: metadata.uncompressedSize, + uncompressedContentDigest: metadata.uncompressedDigest) + } catch { + Logger.info( + "Failed to load cached metadata/chunk for index \(chunkIndex): \(error). Re-processing." + ) + finalCompressedDigest = nil + layer = nil + } } if layer == nil { Logger.info("Processing chunk \(chunkIndex + 1)/\(totalChunks)") let localFileHandle = try FileHandle(forReadingFrom: diskPath) defer { try? localFileHandle.close() } try localFileHandle.seek(toOffset: UInt64(chunkIndex * chunkSizeBytes)) - let chunkData = try localFileHandle.read(upToCount: chunkSizeBytes) ?? Data() + let chunkData = + try localFileHandle.read(upToCount: chunkSizeBytes) ?? Data() let uncompressedSize = UInt64(chunkData.count) let uncompressedDigest = "sha256:" + chunkData.sha256String() - let compressedData = try (chunkData as NSData).compressed(using: .lz4) as Data + let compressedData = + try (chunkData as NSData).compressed(using: .lz4) as Data let compressedSize = compressedData.count let compressedDigest = "sha256:" + compressedData.sha256String() try compressedData.write(to: chunkPath) - let metadata = ChunkMetadata(uncompressedDigest: uncompressedDigest, uncompressedSize: uncompressedSize, compressedDigest: compressedDigest, compressedSize: compressedSize) + let metadata = ChunkMetadata( + uncompressedDigest: uncompressedDigest, + uncompressedSize: uncompressedSize, + compressedDigest: compressedDigest, compressedSize: compressedSize) let metadataData = try JSONEncoder().encode(metadata) try metadataData.write(to: metadataPath) finalCompressedDigest = compressedDigest - if !dryRun { if !(try await self.blobExists(repository: "\(organization)/\(imageName)", digest: compressedDigest, token: token)) { Logger.info("Uploading processed chunk \(chunkIndex + 1) blob"); _ = try await self.uploadBlobFromPath(repository: "\(organization)/\(imageName)", path: chunkPath, digest: compressedDigest, token: token) } else { Logger.info("Chunk \(chunkIndex + 1) blob already exists on registry (processed fresh)") } } - layer = OCIManifestLayer(mediaType: "application/octet-stream+lz4", size: compressedSize, digest: compressedDigest, uncompressedSize: uncompressedSize, uncompressedContentDigest: uncompressedDigest) + if !dryRun { + if !(try await self.blobExists( + repository: "\(organization)/\(imageName)", + digest: compressedDigest, token: token)) + { + Logger.info("Uploading processed chunk \(chunkIndex + 1) blob") + _ = try await self.uploadBlobFromPath( + repository: "\(organization)/\(imageName)", path: chunkPath, + digest: compressedDigest, token: token) + } else { + Logger.info( + "Chunk \(chunkIndex + 1) blob already exists on registry (processed fresh)" + ) + } + } + layer = OCIManifestLayer( + mediaType: "application/octet-stream+lz4", size: compressedSize, + digest: compressedDigest, uncompressedSize: uncompressedSize, + uncompressedContentDigest: uncompressedDigest) + } + guard let finalLayer = layer, let finalDigest = finalCompressedDigest else { + throw PushError.blobUploadFailed + } + if verbose { + Logger.info("Finished chunk \(chunkIndex + 1)/\(totalChunks)") } - guard let finalLayer = layer, let finalDigest = finalCompressedDigest else { throw PushError.blobUploadFailed } - if verbose { Logger.info("Finished chunk \(chunkIndex + 1)/\(totalChunks)") } return (chunkIndex, finalLayer, chunkPath, finalDigest) } } - for try await (index, layer, path, digest) in group { pushedDiskLayers.append((index, layer)); diskChunks.append((index, path, digest)) } + for try await (index, layer, path, digest) in group { + pushedDiskLayers.append((index, layer)) + diskChunks.append((index, path, digest)) + } } - layers.append(contentsOf: pushedDiskLayers.sorted { $0.index < $1.index }.map { $0.layer }) + layers.append( + contentsOf: pushedDiskLayers.sorted { $0.index < $1.index }.map { $0.layer }) diskChunks.sort { $0.index < $1.index } Logger.info("All disk chunks processed successfully") - // --- Calculate Total Upload Size & Initialize Tracker --- + // --- Calculate Total Upload Size & Initialize Tracker --- if !dryRun { var totalUploadSizeBytes: Int64 = 0 var totalUploadFiles: Int = 0 @@ -2898,49 +2804,60 @@ class ImageContainerRegistry: @unchecked Sendable { } // Add nvram size if it exists if let size = nvramSize { - totalUploadSizeBytes += Int64(size) - totalUploadFiles += 1 + totalUploadSizeBytes += Int64(size) + totalUploadFiles += 1 } // Add sizes of all compressed disk chunks - let allChunkSizes = diskChunks.compactMap { try? FileManager.default.attributesOfItem(atPath: $0.path.path)[.size] as? Int64 ?? 0 } + let allChunkSizes = diskChunks.compactMap { + try? FileManager.default.attributesOfItem(atPath: $0.path.path)[.size] as? Int64 + ?? 0 + } totalUploadSizeBytes += allChunkSizes.reduce(0, +) - totalUploadFiles += totalChunks // Use totalChunks calculated earlier - + totalUploadFiles += totalChunks // Use totalChunks calculated earlier + if totalUploadSizeBytes > 0 { - Logger.info("Initializing upload progress: \(totalUploadFiles) files, total size: \(ByteCountFormatter.string(fromByteCount: totalUploadSizeBytes, countStyle: .file))") + Logger.info( + "Initializing upload progress: \(totalUploadFiles) files, total size: \(ByteCountFormatter.string(fromByteCount: totalUploadSizeBytes, countStyle: .file))" + ) await uploadProgress.setTotal(totalUploadSizeBytes, files: totalUploadFiles) // Print initial progress bar - print("[░░░░░░░░░░░░░░░░░░░░] 0% (0/\(totalUploadFiles)) | Initializing upload... | ETA: calculating... ") - fflush(stdout) - } else { - Logger.info("No files marked for upload.") - } + print( + "[░░░░░░░░░░░░░░░░░░░░] 0% (0/\(totalUploadFiles)) | Initializing upload... | ETA: calculating... " + ) + fflush(stdout) + } else { + Logger.info("No files marked for upload.") + } } - // --- End Size Calculation & Init --- + // --- End Size Calculation & Init --- // Perform reassembly verification if requested in dry-run mode if dryRun && reassemble { Logger.info("=== REASSEMBLY MODE ===") Logger.info("Reassembling chunks to verify integrity...") let reassemblyDir = workDir.appendingPathComponent("reassembly") - try FileManager.default.createDirectory(at: reassemblyDir, withIntermediateDirectories: true) + try FileManager.default.createDirectory( + at: reassemblyDir, withIntermediateDirectories: true) let reassembledFile = reassemblyDir.appendingPathComponent("reassembled_disk.img") - + // Pre-allocate a sparse file with the correct size - Logger.info("Pre-allocating sparse file of \(ByteCountFormatter.string(fromByteCount: Int64(actualDiskSize), countStyle: .file))...") + Logger.info( + "Pre-allocating sparse file of \(ByteCountFormatter.string(fromByteCount: Int64(actualDiskSize), countStyle: .file))..." + ) if FileManager.default.fileExists(atPath: reassembledFile.path) { try FileManager.default.removeItem(at: reassembledFile) } - guard FileManager.default.createFile(atPath: reassembledFile.path, contents: nil) else { + guard FileManager.default.createFile(atPath: reassembledFile.path, contents: nil) + else { throw PushError.fileCreationFailed(reassembledFile.path) } - + let outputHandle = try FileHandle(forWritingTo: reassembledFile) defer { try? outputHandle.close() } - + // Set the file size without writing data (creates a sparse file) try outputHandle.truncate(atOffset: actualDiskSize) - + // Add test patterns at start and end to verify writability let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! try outputHandle.seek(toOffset: 0) @@ -2948,217 +2865,266 @@ class ImageContainerRegistry: @unchecked Sendable { try outputHandle.seek(toOffset: actualDiskSize - UInt64(testPattern.count)) try outputHandle.write(contentsOf: testPattern) try outputHandle.synchronize() - + Logger.info("Test patterns written to sparse file. File is ready for writing.") - + // Track reassembly progress var reassemblyProgressLogger = ProgressLogger(threshold: 0.05) var currentOffset: UInt64 = 0 - + // Process each chunk in order for (index, cachedChunkPath, _) in diskChunks.sorted(by: { $0.index < $1.index }) { - Logger.info("Decompressing & writing part \(index + 1)/\(diskChunks.count): \(cachedChunkPath.lastPathComponent) at offset \(currentOffset)...") - + Logger.info( + "Decompressing & writing part \(index + 1)/\(diskChunks.count): \(cachedChunkPath.lastPathComponent) at offset \(currentOffset)..." + ) + // Always seek to the correct position try outputHandle.seek(toOffset: currentOffset) - + // Decompress and write the chunk let decompressedBytesWritten = try decompressChunkAndWriteSparse( inputPath: cachedChunkPath.path, outputHandle: outputHandle, startOffset: currentOffset ) - + currentOffset += decompressedBytesWritten reassemblyProgressLogger.logProgress( current: Double(currentOffset) / Double(actualDiskSize), context: "Reassembling" ) - + // Ensure data is written before processing next part try outputHandle.synchronize() } - + // Finalize progress reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") Logger.info("") // Newline - + // Close handle before post-processing try outputHandle.close() - + // Optimize sparseness if on macOS let optimizedFile = reassemblyDir.appendingPathComponent("optimized_disk.img") if FileManager.default.fileExists(atPath: "/bin/cp") { Logger.info("Optimizing sparse file representation...") - + let process = Process() process.executableURL = URL(fileURLWithPath: "/bin/cp") process.arguments = ["-c", reassembledFile.path, optimizedFile.path] - + do { try process.run() process.waitUntilExit() - + if process.terminationStatus == 0 { // Get sizes of original and optimized files - let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedFile.path)[.size] as? UInt64) ?? 0 + let optimizedSize = + (try? FileManager.default.attributesOfItem( + atPath: optimizedFile.path)[.size] as? UInt64) ?? 0 let originalUsage = getActualDiskUsage(path: reassembledFile.path) let optimizedUsage = getActualDiskUsage(path: optimizedFile.path) - + Logger.info( "Sparse optimization results: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" ) - + // Replace original with optimized version try FileManager.default.removeItem(at: reassembledFile) try FileManager.default.moveItem(at: optimizedFile, to: reassembledFile) Logger.info("Using sparse-optimized file for verification") } else { - Logger.info("Sparse optimization failed, using original file for verification") + Logger.info( + "Sparse optimization failed, using original file for verification") try? FileManager.default.removeItem(at: optimizedFile) } } catch { - Logger.info("Error during sparse optimization: \(error.localizedDescription)") + Logger.info( + "Error during sparse optimization: \(error.localizedDescription)") try? FileManager.default.removeItem(at: optimizedFile) } } - + // Verification step Logger.info("Verifying reassembled file...") let originalSize = diskSize let originalDigest = calculateSHA256(filePath: diskPath.path) - let reassembledAttributes = try FileManager.default.attributesOfItem(atPath: reassembledFile.path) + let reassembledAttributes = try FileManager.default.attributesOfItem( + atPath: reassembledFile.path) let reassembledSize = reassembledAttributes[.size] as? UInt64 ?? 0 let reassembledDigest = calculateSHA256(filePath: reassembledFile.path) - + // Check actual disk usage let originalActualSize = getActualDiskUsage(path: diskPath.path) let reassembledActualSize = getActualDiskUsage(path: reassembledFile.path) - + // Report results Logger.info("Results:") - Logger.info(" Original size: \(ByteCountFormatter.string(fromByteCount: Int64(originalSize), countStyle: .file)) (\(originalSize) bytes)") - Logger.info(" Reassembled size: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledSize), countStyle: .file)) (\(reassembledSize) bytes)") + Logger.info( + " Original size: \(ByteCountFormatter.string(fromByteCount: Int64(originalSize), countStyle: .file)) (\(originalSize) bytes)" + ) + Logger.info( + " Reassembled size: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledSize), countStyle: .file)) (\(reassembledSize) bytes)" + ) Logger.info(" Original digest: \(originalDigest)") Logger.info(" Reassembled digest: \(reassembledDigest)") - Logger.info(" Original: Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(originalSize), countStyle: .file)), Actual disk usage: \(ByteCountFormatter.string(fromByteCount: Int64(originalActualSize), countStyle: .file))") - Logger.info(" Reassembled: Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledSize), countStyle: .file)), Actual disk usage: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledActualSize), countStyle: .file))") - + Logger.info( + " Original: Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(originalSize), countStyle: .file)), Actual disk usage: \(ByteCountFormatter.string(fromByteCount: Int64(originalActualSize), countStyle: .file))" + ) + Logger.info( + " Reassembled: Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledSize), countStyle: .file)), Actual disk usage: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledActualSize), countStyle: .file))" + ) + // Determine if verification was successful if originalDigest == reassembledDigest { Logger.info("✅ VERIFICATION SUCCESSFUL: Files are identical") } else { Logger.info("❌ VERIFICATION FAILED: Files differ") - + if originalSize != reassembledSize { - Logger.info(" Size mismatch: Original \(originalSize) bytes, Reassembled \(reassembledSize) bytes") + Logger.info( + " Size mismatch: Original \(originalSize) bytes, Reassembled \(reassembledSize) bytes" + ) } - + // Check sparse file characteristics Logger.info("Attempting to identify differences...") - Logger.info("NOTE: This might be a sparse file issue. The content may be identical, but sparse regions") - Logger.info(" may be handled differently between the original and reassembled files.") - + Logger.info( + "NOTE: This might be a sparse file issue. The content may be identical, but sparse regions" + ) + Logger.info( + " may be handled differently between the original and reassembled files." + ) + if originalActualSize > 0 { - let diffPercentage = ((Double(reassembledActualSize) - Double(originalActualSize)) / Double(originalActualSize)) * 100.0 - Logger.info(" Disk usage difference: \(String(format: "%.2f", diffPercentage))%") - + let diffPercentage = + ((Double(reassembledActualSize) - Double(originalActualSize)) + / Double(originalActualSize)) * 100.0 + Logger.info( + " Disk usage difference: \(String(format: "%.2f", diffPercentage))%") + if diffPercentage < -40 { - Logger.info(" ⚠️ WARNING: Reassembled disk uses significantly less space (>40% difference).") - Logger.info(" This indicates sparse regions weren't properly preserved and may affect VM functionality.") + Logger.info( + " ⚠️ WARNING: Reassembled disk uses significantly less space (>40% difference)." + ) + Logger.info( + " This indicates sparse regions weren't properly preserved and may affect VM functionality." + ) } else if diffPercentage < -10 { - Logger.info(" ⚠️ WARNING: Reassembled disk uses less space (10-40% difference).") - Logger.info(" Some sparse regions may not be properly preserved but VM might still function correctly.") + Logger.info( + " ⚠️ WARNING: Reassembled disk uses less space (10-40% difference)." + ) + Logger.info( + " Some sparse regions may not be properly preserved but VM might still function correctly." + ) } else if diffPercentage > 10 { - Logger.info(" ⚠️ WARNING: Reassembled disk uses more space (>10% difference).") - Logger.info(" This is unusual and may indicate improper sparse file handling.") + Logger.info( + " ⚠️ WARNING: Reassembled disk uses more space (>10% difference).") + Logger.info( + " This is unusual and may indicate improper sparse file handling.") } else { - Logger.info(" ✓ Disk usage difference is minimal (<10%). VM likely to function correctly.") + Logger.info( + " ✓ Disk usage difference is minimal (<10%). VM likely to function correctly." + ) } } - + // Offer recovery option if originalDigest != reassembledDigest { Logger.info("") Logger.info("===== ATTEMPTING RECOVERY ACTION =====") - Logger.info("Since verification failed, trying direct copy as a fallback method.") - + Logger.info( + "Since verification failed, trying direct copy as a fallback method.") + let fallbackFile = reassemblyDir.appendingPathComponent("fallback_disk.img") Logger.info("Creating fallback disk image at: \(fallbackFile.path)") - + // Try rsync first let rsyncProcess = Process() rsyncProcess.executableURL = URL(fileURLWithPath: "/usr/bin/rsync") - rsyncProcess.arguments = ["-aS", "--progress", diskPath.path, fallbackFile.path] - + rsyncProcess.arguments = [ + "-aS", "--progress", diskPath.path, fallbackFile.path, + ] + do { try rsyncProcess.run() rsyncProcess.waitUntilExit() - + if rsyncProcess.terminationStatus == 0 { - Logger.info("Direct copy completed with rsync. Fallback image available at: \(fallbackFile.path)") + Logger.info( + "Direct copy completed with rsync. Fallback image available at: \(fallbackFile.path)" + ) } else { // Try cp -c as fallback Logger.info("Rsync failed. Attempting with cp -c command...") let cpProcess = Process() cpProcess.executableURL = URL(fileURLWithPath: "/bin/cp") cpProcess.arguments = ["-c", diskPath.path, fallbackFile.path] - + try cpProcess.run() cpProcess.waitUntilExit() - + if cpProcess.terminationStatus == 0 { - Logger.info("Direct copy completed with cp -c. Fallback image available at: \(fallbackFile.path)") + Logger.info( + "Direct copy completed with cp -c. Fallback image available at: \(fallbackFile.path)" + ) } else { Logger.info("All recovery attempts failed.") } } } catch { - Logger.info("Error during recovery attempts: \(error.localizedDescription)") + Logger.info( + "Error during recovery attempts: \(error.localizedDescription)") Logger.info("All recovery attempts failed.") } } } - + Logger.info("Reassembled file is available at: \(reassembledFile.path)") } } - // --- Manifest Creation & Push --- + // --- Manifest Creation & Push --- let manifest = createManifest( layers: layers, - configLayerIndex: layers.firstIndex(where: { $0.mediaType == "application/vnd.oci.image.config.v1+json" }), + configLayerIndex: layers.firstIndex(where: { + $0.mediaType == "application/vnd.oci.image.config.v1+json" + }), uncompressedDiskSize: uncompressedDiskSize ) // Push manifest only if not in dry-run mode if !dryRun { - Logger.info("Pushing manifest(s)") // Updated log + Logger.info("Pushing manifest(s)") // Updated log // Serialize the manifest dictionary to Data first - let manifestData = try JSONSerialization.data(withJSONObject: manifest, options: [.prettyPrinted, .sortedKeys]) + let manifestData = try JSONSerialization.data( + withJSONObject: manifest, options: [.prettyPrinted, .sortedKeys]) // Loop through tags to push the same manifest data for tag in tags { - Logger.info("Pushing manifest for tag: \(tag)") - try await pushManifest( - repository: "\(self.organization)/\(imageName)", - tag: tag, // Use the current tag from the loop - manifest: manifestData, // Pass the serialized Data - token: token // Token should be in scope here now - ) + Logger.info("Pushing manifest for tag: \(tag)") + try await pushManifest( + repository: "\(self.organization)/\(imageName)", + tag: tag, // Use the current tag from the loop + manifest: manifestData, // Pass the serialized Data + token: token // Token should be in scope here now + ) } } // Print final upload summary if not dry run if !dryRun { let stats = await uploadProgress.getUploadStats() - Logger.info("\n\(stats.formattedSummary())") // Add newline for separation + Logger.info("\n\(stats.formattedSummary())") // Add newline for separation } // Clean up cache directory only on successful non-dry-run push } - - private func createManifest(layers: [OCIManifestLayer], configLayerIndex: Int?, uncompressedDiskSize: UInt64?) -> [String: Any] { + + private func createManifest( + layers: [OCIManifestLayer], configLayerIndex: Int?, uncompressedDiskSize: UInt64? + ) -> [String: Any] { var manifest: [String: Any] = [ "schemaVersion": 2, "mediaType": "application/vnd.oci.image.manifest.v1+json", @@ -3166,221 +3132,244 @@ class ImageContainerRegistry: @unchecked Sendable { var layerDict: [String: Any] = [ "mediaType": layer.mediaType, "size": layer.size, - "digest": layer.digest + "digest": layer.digest, ] - + if let uncompressedSize = layer.uncompressedSize { var annotations: [String: String] = [:] - annotations["org.trycua.lume.uncompressed-size"] = "\(uncompressedSize)" // Updated prefix - + annotations["org.trycua.lume.uncompressed-size"] = "\(uncompressedSize)" // Updated prefix + if let digest = layer.uncompressedContentDigest { - annotations["org.trycua.lume.uncompressed-content-digest"] = digest // Updated prefix + annotations["org.trycua.lume.uncompressed-content-digest"] = digest // Updated prefix } - + layerDict["annotations"] = annotations } - + return layerDict - } + }, ] - + // Add config reference if available if let configIndex = configLayerIndex { let configLayer = layers[configIndex] manifest["config"] = [ "mediaType": configLayer.mediaType, "size": configLayer.size, - "digest": configLayer.digest + "digest": configLayer.digest, ] } - + // Add annotations var annotations: [String: String] = [:] - annotations["org.trycua.lume.upload-time"] = ISO8601DateFormatter().string(from: Date()) // Updated prefix - + annotations["org.trycua.lume.upload-time"] = ISO8601DateFormatter().string(from: Date()) // Updated prefix + if let diskSize = uncompressedDiskSize { - annotations["org.trycua.lume.uncompressed-disk-size"] = "\(diskSize)" // Updated prefix + annotations["org.trycua.lume.uncompressed-disk-size"] = "\(diskSize)" // Updated prefix } - + manifest["annotations"] = annotations - + return manifest } - - private func uploadBlobFromData(repository: String, data: Data, token: String) async throws -> String { + + private func uploadBlobFromData(repository: String, data: Data, token: String) async throws + -> String + { // Calculate digest let digest = "sha256:" + data.sha256String() - + // Check if blob already exists if try await blobExists(repository: repository, digest: digest, token: token) { Logger.info("Blob already exists: \(digest)") return digest } - + // Initiate upload let uploadURL = try await startBlobUpload(repository: repository, token: token) - + // Upload blob try await uploadBlob(url: uploadURL, data: data, digest: digest, token: token) - + // Report progress await uploadProgress.addProgress(Int64(data.count)) - + return digest } - - private func uploadBlobFromPath(repository: String, path: URL, digest: String, token: String) async throws -> String { + + private func uploadBlobFromPath(repository: String, path: URL, digest: String, token: String) + async throws -> String + { // Check if blob already exists if try await blobExists(repository: repository, digest: digest, token: token) { Logger.info("Blob already exists: \(digest)") return digest } - + // Initiate upload let uploadURL = try await startBlobUpload(repository: repository, token: token) - + // Load data from file let data = try Data(contentsOf: path) - + // Upload blob try await uploadBlob(url: uploadURL, data: data, digest: digest, token: token) - + // Report progress await uploadProgress.addProgress(Int64(data.count)) - + return digest } - - private func blobExists(repository: String, digest: String, token: String) async throws -> Bool { + + private func blobExists(repository: String, digest: String, token: String) async throws -> Bool + { let url = URL(string: "https://\(registry)/v2/\(repository)/blobs/\(digest)")! var request = URLRequest(url: url) request.httpMethod = "HEAD" request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") - + let (_, response) = try await URLSession.shared.data(for: request) - + if let httpResponse = response as? HTTPURLResponse { return httpResponse.statusCode == 200 } - + return false } - + private func startBlobUpload(repository: String, token: String) async throws -> URL { let url = URL(string: "https://\(registry)/v2/\(repository)/blobs/uploads/")! var request = URLRequest(url: url) request.httpMethod = "POST" request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") - request.setValue("0", forHTTPHeaderField: "Content-Length") // Explicitly set Content-Length to 0 for POST - + request.setValue("0", forHTTPHeaderField: "Content-Length") // Explicitly set Content-Length to 0 for POST + let (_, response) = try await URLSession.shared.data(for: request) - - guard let httpResponse = response as? HTTPURLResponse, - httpResponse.statusCode == 202, - let locationString = httpResponse.value(forHTTPHeaderField: "Location") else { + + guard let httpResponse = response as? HTTPURLResponse, + httpResponse.statusCode == 202, + let locationString = httpResponse.value(forHTTPHeaderField: "Location") + else { // Log response details on failure - let responseBody = String(data: (try? await URLSession.shared.data(for: request).0) ?? Data(), encoding: .utf8) ?? "(No Body)" - Logger.error("Failed to initiate blob upload. Status: \( (response as? HTTPURLResponse)?.statusCode ?? 0 ). Headers: \( (response as? HTTPURLResponse)?.allHeaderFields ?? [:] ). Body: \(responseBody)") + let responseBody = + String( + data: (try? await URLSession.shared.data(for: request).0) ?? Data(), + encoding: .utf8) ?? "(No Body)" + Logger.error( + "Failed to initiate blob upload. Status: \( (response as? HTTPURLResponse)?.statusCode ?? 0 ). Headers: \( (response as? HTTPURLResponse)?.allHeaderFields ?? [:] ). Body: \(responseBody)" + ) throw PushError.uploadInitiationFailed } - + // Construct the base URL for the registry guard let baseRegistryURL = URL(string: "https://\(registry)") else { Logger.error("Failed to create base registry URL from: \(registry)") - throw PushError.invalidURL - } - - // Create the final upload URL, resolving the location against the base URL - guard let uploadURL = URL(string: locationString, relativeTo: baseRegistryURL) else { - Logger.error("Failed to create absolute upload URL from location: \(locationString) relative to base: \(baseRegistryURL.absoluteString)") throw PushError.invalidURL } - + + // Create the final upload URL, resolving the location against the base URL + guard let uploadURL = URL(string: locationString, relativeTo: baseRegistryURL) else { + Logger.error( + "Failed to create absolute upload URL from location: \(locationString) relative to base: \(baseRegistryURL.absoluteString)" + ) + throw PushError.invalidURL + } + Logger.info("Blob upload initiated. Upload URL: \(uploadURL.absoluteString)") - return uploadURL.absoluteURL // Ensure it's absolute + return uploadURL.absoluteURL // Ensure it's absolute } - + private func uploadBlob(url: URL, data: Data, digest: String, token: String) async throws { var components = URLComponents(url: url, resolvingAgainstBaseURL: true)! - + // Add digest parameter var queryItems = components.queryItems ?? [] queryItems.append(URLQueryItem(name: "digest", value: digest)) components.queryItems = queryItems - + guard let uploadURL = components.url else { throw PushError.invalidURL } - + var request = URLRequest(url: uploadURL) request.httpMethod = "PUT" request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") request.setValue("application/octet-stream", forHTTPHeaderField: "Content-Type") request.setValue("\(data.count)", forHTTPHeaderField: "Content-Length") request.httpBody = data - + let (_, response) = try await URLSession.shared.data(for: request) - + guard let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 201 else { throw PushError.blobUploadFailed } } - - private func pushManifest(repository: String, tag: String, manifest: Data, token: String) async throws { + + private func pushManifest(repository: String, tag: String, manifest: Data, token: String) + async throws + { let url = URL(string: "https://\(registry)/v2/\(repository)/manifests/\(tag)")! var request = URLRequest(url: url) request.httpMethod = "PUT" request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") - request.setValue("application/vnd.oci.image.manifest.v1+json", forHTTPHeaderField: "Content-Type") + request.setValue( + "application/vnd.oci.image.manifest.v1+json", forHTTPHeaderField: "Content-Type") request.httpBody = manifest - + let (_, response) = try await URLSession.shared.data(for: request) - + guard let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 201 else { throw PushError.manifestPushFailed } } - + private func getCredentialsFromEnvironment() -> (String?, String?) { - let username = ProcessInfo.processInfo.environment["GITHUB_USERNAME"] ?? - ProcessInfo.processInfo.environment["GHCR_USERNAME"] - let password = ProcessInfo.processInfo.environment["GITHUB_TOKEN"] ?? - ProcessInfo.processInfo.environment["GHCR_TOKEN"] + let username = + ProcessInfo.processInfo.environment["GITHUB_USERNAME"] + ?? ProcessInfo.processInfo.environment["GHCR_USERNAME"] + let password = + ProcessInfo.processInfo.environment["GITHUB_TOKEN"] + ?? ProcessInfo.processInfo.environment["GHCR_TOKEN"] return (username, password) } // Add these helper methods for dry-run and reassemble implementation - + // NEW Helper function using Compression framework and sparse writing - private func decompressChunkAndWriteSparse(inputPath: String, outputHandle: FileHandle, startOffset: UInt64) throws -> UInt64 { + private func decompressChunkAndWriteSparse( + inputPath: String, outputHandle: FileHandle, startOffset: UInt64 + ) throws -> UInt64 { guard FileManager.default.fileExists(atPath: inputPath) else { Logger.error("Compressed chunk not found at: \(inputPath)") - return 0 // Or throw an error + return 0 // Or throw an error } - let sourceData = try Data(contentsOf: URL(fileURLWithPath: inputPath), options: .alwaysMapped) + let sourceData = try Data( + contentsOf: URL(fileURLWithPath: inputPath), options: .alwaysMapped) var currentWriteOffset = startOffset var totalDecompressedBytes: UInt64 = 0 - var sourceReadOffset = 0 // Keep track of how much compressed data we've provided + var sourceReadOffset = 0 // Keep track of how much compressed data we've provided // Use the initializer with the readingFrom closure let filter = try InputFilter(.decompress, using: .lz4) { (length: Int) -> Data? in let bytesAvailable = sourceData.count - sourceReadOffset if bytesAvailable == 0 { - return nil // No more data + return nil // No more data } let bytesToRead = min(length, bytesAvailable) - let chunk = sourceData.subdata(in: sourceReadOffset ..< sourceReadOffset + bytesToRead) + let chunk = sourceData.subdata(in: sourceReadOffset..= 0.5) || (completedFiles == totalFiles) + let shouldUpdate = + (uploadedBytes <= bytes) || (elapsed >= 0.5) || (completedFiles == totalFiles) - if shouldUpdate && totalBytes > 0 { // Ensure totalBytes is set + if shouldUpdate && totalBytes > 0 { // Ensure totalBytes is set let currentSpeed = Double(uploadedBytes - lastUpdateBytes) / max(elapsed, 0.001) speedSamples.append(currentSpeed) @@ -3479,14 +3470,17 @@ actor UploadProgressTracker { peakSpeed = max(peakSpeed, currentSpeed) // Apply exponential smoothing - if smoothedSpeed == 0 { smoothedSpeed = currentSpeed } - else { smoothedSpeed = speedSmoothing * currentSpeed + (1 - speedSmoothing) * smoothedSpeed } + if smoothedSpeed == 0 { + smoothedSpeed = currentSpeed + } else { + smoothedSpeed = speedSmoothing * currentSpeed + (1 - speedSmoothing) * smoothedSpeed + } let recentAvgSpeed = calculateAverageSpeed() let totalElapsed = now.timeIntervalSince(startTime) let overallAvgSpeed = totalElapsed > 0 ? Double(uploadedBytes) / totalElapsed : 0 - let progress = totalBytes > 0 ? Double(uploadedBytes) / Double(totalBytes) : 1.0 // Avoid division by zero + let progress = totalBytes > 0 ? Double(uploadedBytes) / Double(totalBytes) : 1.0 // Avoid division by zero logSpeedProgress( current: progress, currentSpeed: currentSpeed, @@ -3494,7 +3488,7 @@ actor UploadProgressTracker { smoothedSpeed: smoothedSpeed, overallSpeed: overallAvgSpeed, peakSpeed: peakSpeed, - context: "Uploading Image" // Changed context + context: "Uploading Image" // Changed context ) lastUpdateTime = now @@ -3521,7 +3515,7 @@ actor UploadProgressTracker { let avgSpeed = totalElapsedTime > 0 ? Double(uploadedBytes) / totalElapsedTime : 0 return UploadStats( totalBytes: totalBytes, - uploadedBytes: uploadedBytes, // Renamed + uploadedBytes: uploadedBytes, // Renamed elapsedTime: totalElapsedTime, averageSpeed: avgSpeed, peakSpeed: peakSpeed @@ -3546,10 +3540,10 @@ actor UploadProgressTracker { let etaSeconds = speedForEta > 0 ? Double(remainingBytes) / speedForEta : 0 let etaStr = formatTimeRemaining(etaSeconds) let progressBar = createProgressBar(progress: current) - let fileProgress = "(\(completedFiles)/\(totalFiles))" // Add file count + let fileProgress = "(\(completedFiles)/\(totalFiles))" // Add file count print( - "\r\(progressBar) \(progressPercent)% \(fileProgress) | Speed: \(avgSpeedStr) (Avg) | ETA: \(etaStr) ", // Simplified output + "\r\(progressBar) \(progressPercent)% \(fileProgress) | Speed: \(avgSpeedStr) (Avg) | ETA: \(etaStr) ", // Simplified output terminator: "") fflush(stdout) } @@ -3566,7 +3560,10 @@ actor UploadProgressTracker { let units = ["B/s", "KB/s", "MB/s", "GB/s"] var speed = bytesPerSecond var unitIndex = 0 - while speed > 1024 && unitIndex < units.count - 1 { speed /= 1024; unitIndex += 1 } + while speed > 1024 && unitIndex < units.count - 1 { + speed /= 1024 + unitIndex += 1 + } return String(format: "%.1f %@", speed, units[unitIndex]) } private func formatTimeRemaining(_ seconds: Double) -> String { @@ -3574,8 +3571,10 @@ actor UploadProgressTracker { let hours = Int(seconds) / 3600 let minutes = (Int(seconds) % 3600) / 60 let secs = Int(seconds) % 60 - if hours > 0 { return String(format: "%d:%02d:%02d", hours, minutes, secs) } - else { return String(format: "%d:%02d", minutes, secs) } + if hours > 0 { + return String(format: "%d:%02d:%02d", hours, minutes, secs) + } else { + return String(format: "%d:%02d", minutes, secs) + } } } - From b77e58244c22575caaca3cd8735e8fdf394699d2 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 20:09:46 -0700 Subject: [PATCH 24/28] Fix first pull --- .../ImageContainerRegistry.swift | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index ddf5ea1f..d51d92b3 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1011,6 +1011,12 @@ class ImageContainerRegistry: @unchecked Sendable { // Move files to final location try FileManager.default.moveItem(at: tempVMDir, to: URL(fileURLWithPath: vmDir.dir.path)) + // If caching is disabled, clean up the cache entry + if !cachingEnabled { + Logger.info("Caching disabled - cleaning up temporary cache entry") + try? cleanupCacheEntry(manifestId: manifestId) + } + Logger.info("Download complete: Files extracted to \(vmDir.dir.path)") Logger.info( "Note: Actual disk usage is significantly lower than reported size due to macOS sparse file system" @@ -1020,6 +1026,16 @@ class ImageContainerRegistry: @unchecked Sendable { ) } + // Helper function to clean up a specific cache entry + private func cleanupCacheEntry(manifestId: String) throws { + let cacheDir = getImageCacheDirectory(manifestId: manifestId) + + if FileManager.default.fileExists(atPath: cacheDir.path) { + Logger.info("Removing cache entry for manifest ID: \(manifestId)") + try FileManager.default.removeItem(at: cacheDir) + } + } + // Shared function to handle disk image creation - can be used by both cache hit and cache miss paths private func createDiskImageFromSource( sourceURL: URL, // Source data to decompress From e2aff16432465464e6f8fec9e297ca0fe733344b Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 21:29:42 -0700 Subject: [PATCH 25/28] Fix first pull --- .../ImageContainerRegistry.swift | 43 ++----------------- 1 file changed, 4 insertions(+), 39 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index d51d92b3..8f59eff0 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -948,45 +948,10 @@ class ImageContainerRegistry: @unchecked Sendable { Logger.info("Using copyFromCache method to properly preserve partition tables") try await copyFromCache(manifest: manifest, manifestId: manifestId, to: tempVMDir) } else { - // If caching is disabled, just copy files directly to tempVMDir - Logger.info("Caching disabled - copying downloaded files directly to VM directory") - - // Copy non-disk files first - for file in ["config.json", "nvram.bin"] { - let sourceURL = tempDownloadDir.appendingPathComponent(file) - if FileManager.default.fileExists(atPath: sourceURL.path) { - try FileManager.default.copyItem( - at: sourceURL, - to: tempVMDir.appendingPathComponent(file) - ) - } - } - - // For the disk image, we have two cases - either a single file or parts - let diskURL = tempDownloadDir.appendingPathComponent("disk.img") - if FileManager.default.fileExists(atPath: diskURL.path) { - // Single file disk image - try FileManager.default.copyItem( - at: diskURL, - to: tempVMDir.appendingPathComponent("disk.img") - ) - Logger.info("Copied single disk.img file to VM directory") - } else { - // Multiple parts case - use the partitioned disk.img from reassembly - let diskParts = await diskPartsCollector.getSortedParts() - if !diskParts.isEmpty { - Logger.info("Using most recently assembled disk image for VM") - let assembledDiskURL = tempVMDir.appendingPathComponent("disk.img") - if FileManager.default.fileExists(atPath: assembledDiskURL.path) { - Logger.info("Assembled disk.img already exists in VM directory") - } else { - Logger.error( - "Could not find assembled disk image - VM may not boot properly") - } - } else { - Logger.error("No disk image found - VM may not boot properly") - } - } + // Even if caching is disabled, we need to use copyFromCache to assemble the disk image + // correctly with partition tables, then we'll clean up the cache afterward + Logger.info("Caching disabled - using temporary cache to assemble VM files") + try await copyFromCache(manifest: manifest, manifestId: manifestId, to: tempVMDir) } } From 1ebf14f30467576053f9275337e0cdcb5c96681b Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 21:42:14 -0700 Subject: [PATCH 26/28] Fix first pull --- .../ImageContainerRegistry.swift | 57 +++++++++++++------ 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 8f59eff0..731a706a 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1400,14 +1400,14 @@ class ImageContainerRegistry: @unchecked Sendable { async throws { Logger.info("Copying from cache...") - + // Define output URL and expected size variable scope here let outputURL = destination.appendingPathComponent("disk.img") - var expectedTotalSize: UInt64? = nil // Use optional to handle missing config + var expectedTotalSize: UInt64? = nil // Use optional to handle missing config // Instantiate collector let diskPartsCollector = DiskPartsCollector() - var lz4LayerCount = 0 // Count lz4 layers found + var lz4LayerCount = 0 // Count lz4 layers found // First identify disk parts and non-disk files for layer in manifest.layers { @@ -1415,14 +1415,21 @@ class ImageContainerRegistry: @unchecked Sendable { // Identify disk parts simply by media type if layer.mediaType == "application/octet-stream+lz4" { - lz4LayerCount += 1 // Increment count - // Add to collector. It will assign the sequential part number. - let collectorPartNum = await diskPartsCollector.addPart(url: cachedLayer) - Logger.info( - "Adding cached lz4 layer (part \(lz4LayerCount)) -> Collector #\(collectorPartNum): \(cachedLayer.lastPathComponent)" - ) - } else { - // --- Handle Non-Disk-Part Layer (from cache) --- + lz4LayerCount += 1 // Increment count + + // When caching is disabled, the file might not exist with the cache path name + // Check if the file exists before trying to use it + if !FileManager.default.fileExists(atPath: cachedLayer.path) { + Logger.info("Layer file not found in cache: \(cachedLayer.path) - skipping") + continue + } + + // Add to collector. It will assign the sequential part number. + let collectorPartNum = await diskPartsCollector.addPart(url: cachedLayer) + Logger.info("Adding cached lz4 layer (part \(lz4LayerCount)) -> Collector #\(collectorPartNum): \(cachedLayer.lastPathComponent)") + } + else { + // --- Handle Non-Disk-Part Layer (from cache) --- let fileName: String switch layer.mediaType { case "application/vnd.oci.image.config.v1+json": @@ -1430,14 +1437,21 @@ class ImageContainerRegistry: @unchecked Sendable { case "application/octet-stream": // Assume nvram if config layer exists, otherwise assume single disk image fileName = manifest.config != nil ? "nvram.bin" : "disk.img" - case "application/vnd.oci.image.layer.v1.tar", - "application/octet-stream+gzip": - // Assume disk image for these types as well if encountered in cache scenario - fileName = "disk.img" + case "application/vnd.oci.image.layer.v1.tar", + "application/octet-stream+gzip": + // Assume disk image for these types as well if encountered in cache scenario + fileName = "disk.img" default: - Logger.info("Skipping unsupported cached layer media type: \(layer.mediaType)") + Logger.info("Skipping unsupported cached layer media type: \(layer.mediaType)") continue } + + // When caching is disabled, the file might not exist with the cache path name + if !FileManager.default.fileExists(atPath: cachedLayer.path) { + Logger.info("Non-disk layer file not found in cache: \(cachedLayer.path) - skipping") + continue + } + // Copy the non-disk file directly from cache to destination try FileManager.default.copyItem( at: cachedLayer, @@ -1815,9 +1829,16 @@ class ImageContainerRegistry: @unchecked Sendable { try FileManager.default.moveItem(at: tempURL, to: url) progress.addProgress(Int64(httpResponse.expectedContentLength)) - // Cache the downloaded layer if caching is enabled - if cachingEnabled, let manifestId = manifestId { + // Always save a copy to the cache directory for use by copyFromCache, + // even if caching is disabled + if let manifestId = manifestId { let cachedLayer = getCachedLayerPath(manifestId: manifestId, digest: digest) + // Make sure cache directory exists + try FileManager.default.createDirectory( + at: cachedLayer.deletingLastPathComponent(), + withIntermediateDirectories: true + ) + if FileManager.default.fileExists(atPath: cachedLayer.path) { try FileManager.default.removeItem(at: cachedLayer) } From 2efbcb4f8f8f25b98c2a819591e94e3049a05211 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Tue, 22 Apr 2025 00:29:46 -0700 Subject: [PATCH 27/28] Remove push, pull old scripts --- libs/lume/scripts/ghcr/pull-ghcr.sh | 205 -------- libs/lume/scripts/ghcr/push-ghcr.sh | 745 ---------------------------- 2 files changed, 950 deletions(-) delete mode 100755 libs/lume/scripts/ghcr/pull-ghcr.sh delete mode 100755 libs/lume/scripts/ghcr/push-ghcr.sh diff --git a/libs/lume/scripts/ghcr/pull-ghcr.sh b/libs/lume/scripts/ghcr/pull-ghcr.sh deleted file mode 100755 index 8b10fae1..00000000 --- a/libs/lume/scripts/ghcr/pull-ghcr.sh +++ /dev/null @@ -1,205 +0,0 @@ -#!/bin/bash - -# Exit immediately if a command exits with a non-zero status -set -e - -# Default parameters -organization="" -image_name="" -image_version="" -target_folder_path="" - -# Parse the command line arguments -while [[ $# -gt 0 ]]; do - case "$1" in - --organization) - organization="$2" - shift 2 - ;; - --image-name) - image_name="$2" - shift 2 - ;; - --image-version) - image_version="$2" - shift 2 - ;; - --target-folder-path) - target_folder_path="$2" - shift 2 - ;; - --help) - echo "Usage: $0 [options]" - echo "Options:" - echo " --organization : GitHub organization (required)" - echo " --image-name : Name of the image to pull (required)" - echo " --image-version : Version of the image to pull (required)" - echo " --target-folder-path : Path where to extract the files (required)" - exit 0 - ;; - *) - echo "Unknown option: $1" - exit 1 - ;; - esac -done - -# Ensure required arguments -if [[ -z "$organization" || -z "$image_name" || -z "$image_version" || -z "$target_folder_path" ]]; then - echo "Error: Missing required arguments. Use --help for usage." - exit 1 -fi - -# Check and install required tools -for tool in "jq" "pv" "parallel"; do - if ! command -v "$tool" &> /dev/null; then - echo "$tool is not installed. Installing using Homebrew..." - if ! command -v brew &> /dev/null; then - echo "Homebrew is not installed. Please install Homebrew first: https://brew.sh/" - exit 1 - fi - brew install "$tool" - fi -done - -# Create target folder if it doesn't exist -mkdir -p "$target_folder_path" - -# Create a temporary directory for processing files -work_dir=$(mktemp -d) -echo "Working directory: $work_dir" -trap 'rm -rf "$work_dir"' EXIT - -# Registry details -REGISTRY="ghcr.io" -REPOSITORY="$organization/$image_name" -TAG="$image_version" - -# Get anonymous token -echo "Getting authentication token..." -curl -s "https://$REGISTRY/token?service=ghcr.io&scope=repository:$REPOSITORY:pull" -o "$work_dir/token.json" -TOKEN=$(cat "$work_dir/token.json" | jq -r ".token") - -if [ -z "$TOKEN" ] || [ "$TOKEN" = "null" ]; then - echo "Failed to obtain token" - exit 1 -fi - -echo "Token obtained successfully" - -# Fetch manifest -echo "Fetching manifest..." -MANIFEST_RESPONSE=$(curl -s \ - -H "Authorization: Bearer $TOKEN" \ - -H "Accept: application/vnd.oci.image.manifest.v1+json" \ - "https://$REGISTRY/v2/$REPOSITORY/manifests/$TAG") - -echo "Processing manifest..." - -# Create a directory for all files -cd "$work_dir" - -# Create a download function for parallel execution -download_layer() { - local media_type="$1" - local digest="$2" - local output_file="$3" - - echo "Downloading $output_file..." - curl -s -L \ - -H "Authorization: Bearer $TOKEN" \ - -H "Accept: $media_type" \ - "https://$REGISTRY/v2/$REPOSITORY/blobs/$digest" | \ - pv > "$output_file" -} -export -f download_layer -export TOKEN REGISTRY REPOSITORY - -# Process layers and create download jobs -echo "$MANIFEST_RESPONSE" | jq -c '.layers[]' | while read -r layer; do - media_type=$(echo "$layer" | jq -r '.mediaType') - digest=$(echo "$layer" | jq -r '.digest') - - # Skip empty layers - if [[ "$media_type" == "application/vnd.oci.empty.v1+json" ]]; then - continue - fi - - # Extract part information if present - if [[ $media_type =~ part\.number=([0-9]+)\;part\.total=([0-9]+) ]]; then - part_num="${BASH_REMATCH[1]}" - total_parts="${BASH_REMATCH[2]}" - echo "Found part $part_num of $total_parts" - output_file="disk.img.part.$part_num" - else - case "$media_type" in - "application/vnd.oci.image.layer.v1.tar") - output_file="disk.img" - ;; - "application/vnd.oci.image.config.v1+json") - output_file="config.json" - ;; - "application/octet-stream") - output_file="nvram.bin" - ;; - *) - echo "Unknown media type: $media_type" - continue - ;; - esac - fi - - # Add to download queue - echo "$media_type"$'\t'"$digest"$'\t'"$output_file" >> download_queue.txt -done - -# Download all files in parallel -echo "Downloading files in parallel..." -parallel --colsep $'\t' -a download_queue.txt download_layer {1} {2} {3} - -# Check if we have disk parts to reassemble -if ls disk.img.part.* 1> /dev/null 2>&1; then - echo "Found disk parts, reassembling..." - - # Get total parts from the first part's filename - first_part=$(ls disk.img.part.* | head -n 1) - total_parts=$(echo "$MANIFEST_RESPONSE" | jq -r '.layers[] | select(.mediaType | contains("part.total")) | .mediaType' | grep -o 'part\.total=[0-9]*' | cut -d= -f2 | head -n 1) - - echo "Total parts to reassemble: $total_parts" - - # Concatenate parts in order - echo "Reassembling disk image..." - { - for i in $(seq 1 "$total_parts"); do - part_file="disk.img.part.$i" - if [ -f "$part_file" ]; then - cat "$part_file" - else - echo "Error: Missing part $i" - exit 1 - fi - done - } | pv > "$target_folder_path/disk.img" - - echo "Disk image reassembled successfully" -else - # If no parts found, just copy disk.img if it exists - if [ -f disk.img ]; then - echo "Copying disk image..." - pv disk.img > "$target_folder_path/disk.img" - fi -fi - -# Copy config.json if it exists -if [ -f config.json ]; then - echo "Copying config.json..." - cp config.json "$target_folder_path/" -fi - -# Copy nvram.bin if it exists -if [ -f nvram.bin ]; then - echo "Copying nvram.bin..." - cp nvram.bin "$target_folder_path/" -fi - -echo "Download complete: Files extracted to $target_folder_path" \ No newline at end of file diff --git a/libs/lume/scripts/ghcr/push-ghcr.sh b/libs/lume/scripts/ghcr/push-ghcr.sh deleted file mode 100755 index d279be66..00000000 --- a/libs/lume/scripts/ghcr/push-ghcr.sh +++ /dev/null @@ -1,745 +0,0 @@ -#!/bin/bash - -# Exit immediately if a command exits with a non-zero status -set -e - -# Default parameters -organization="" -folder_path="" -image_name="" -image_versions="" -chunk_size="512M" # Default chunk size for splitting large files -dry_run=true # Default: actually push to registry -reassemble=true # Default: don't reassemble in dry-run mode -# Define the OCI media type for the compressed disk layer -oci_layer_media_type="application/octet-stream+lz4" # LZ4 compression format - -# Parse the command line arguments -while [[ $# -gt 0 ]]; do - case "$1" in - --organization) - organization="$2" - shift 2 - ;; - --folder-path) - folder_path="$2" - shift 2 - ;; - --image-name) - image_name="$2" - shift 2 - ;; - --image-versions) - image_versions="$2" - shift 2 - ;; - --chunk-size) - chunk_size="$2" - shift 2 - ;; - --dry-run) - dry_run=true - shift 1 - ;; - --reassemble) - reassemble=true - dry_run=true # Reassemble implies dry-run - shift 1 - ;; - --help) - echo "Usage: $0 [options]" - echo "Options:" - echo " --organization : GitHub organization (required if not using token)" - echo " --folder-path : Path to the folder to upload (required)" - echo " --image-name : Name of the image to publish (required)" - echo " --image-versions : Comma separated list of versions of the image to publish (required)" - echo " --chunk-size : Size of chunks for large files (e.g., 512M, default: 512M)" - echo " --dry-run : Prepare files but don't upload to registry" - echo " --reassemble : In dry-run mode, also reassemble chunks to verify integrity" - echo "Note: The script will automatically resume from the last attempt if available" - exit 0 - ;; - *) - echo "Unknown option: $1" - exit 1 - ;; - esac -done - -# Ensure required arguments -if [[ -z "$folder_path" ]]; then - echo "Error: Missing required folder-path argument. Use --help for usage." - exit 1 -fi - -# Only check organization and other push parameters if not in dry-run mode -if [[ "$dry_run" = false ]]; then - if [[ -z "$organization" || -z "$image_name" || -z "$image_versions" ]]; then - echo "Error: Missing required arguments for push. Use --help for usage." - exit 1 - fi - - # Check if the GITHUB_TOKEN variable is set - if [[ -z "$GITHUB_TOKEN" ]]; then - echo "Error: GITHUB_TOKEN is not set." - exit 1 - fi -fi - -# Ensure the folder exists -if [[ ! -d "$folder_path" ]]; then - echo "Error: Folder $folder_path does not exist." - exit 1 -fi - -# Check and install required tools -for tool in "oras" "split" "pv" "jq" "lz4"; do - if ! command -v "$tool" &> /dev/null; then - echo "$tool is not installed. Installing using Homebrew..." - if ! command -v brew &> /dev/null; then - echo "Homebrew is not installed. Please install Homebrew first: https://brew.sh/" - exit 1 - fi - brew install "$tool" - fi -done - -echo "LZ4 detected - will use for efficient compression and decompression" -compressed_ext=".lz4" - -# Authenticate with GitHub Container Registry if not in dry-run mode -if [[ "$dry_run" = false ]]; then - echo "$GITHUB_TOKEN" | oras login ghcr.io -u "$organization" --password-stdin -fi - -# Use the source folder path as the working directory and get its absolute path -work_dir=$(cd "$folder_path" && pwd) -echo "Working directory: $work_dir" - -# Function to find the most recent cache directory -find_latest_cache() { - local latest_cache=$(ls -td "$work_dir"/.ghcr_cache_* 2>/dev/null | head -n1) - if [ -n "$latest_cache" ]; then - echo "$latest_cache" - else - echo "" - fi -} - -# Function to check if a cache directory is valid for resuming -is_valid_cache() { - local cache_dir="$1" - # Check if it contains the necessary files - [ -f "$cache_dir/config.json" ] || [ -f "$cache_dir/nvram.bin" ] || \ - [ -f "$cache_dir/disk.img.lz4" ] || ls "$cache_dir"/disk.img.part.* 1>/dev/null 2>&1 -} - -# Always try to find and use an existing cache -existing_cache=$(find_latest_cache) -if [ -n "$existing_cache" ] && is_valid_cache "$existing_cache"; then - cache_dir="$existing_cache" - - # Check if the cache contains old compressed format - if [ -f "$cache_dir/disk.img.gz" ] || [ -f "$cache_dir/disk.img.aa" ] || ls "$cache_dir"/disk.img.*.part.* 1>/dev/null 2>&1; then - echo "Error: Found legacy compressed format in cache. This script uses improved LZ4 format." - echo "Please delete the cache directory and start fresh: $cache_dir" - exit 1 - fi - - echo "Resuming from existing cache: $cache_dir" -else - echo "No valid cache found. Starting fresh." - cache_dir="$work_dir/.ghcr_cache_$(date +%Y%m%d_%H%M%S)" - mkdir -p "$cache_dir" -fi - -echo "Using cache directory: $cache_dir" - -# Display space information -echo "=== DISK SPACE INFORMATION ===" -df -h "$cache_dir" | head -1 -df -h "$cache_dir" | grep -v "Filesystem" -echo - -# Change to the cache directory -cd "$cache_dir" -files=() # Initialize files array here - -# Function to check if a version was already pushed -version_pushed() { - local version="$1" - local version_file="$cache_dir/.pushed_$version" - [ -f "$version_file" ] -} - -# Function to mark a version as pushed -mark_version_pushed() { - local version="$1" - touch "$cache_dir/.pushed_$version" -} - -# Function to calculate sha256 hash -calculate_sha256() { - local file="$1" - if command -v shasum &> /dev/null; then - shasum -a 256 "$file" | awk '{print "sha256:" $1}' - else - echo "sha256:$(openssl dgst -sha256 -binary "$file" | xxd -p | tr -d '\n')" - fi -} - -# Copy config.json if it exists and not already in cache -config_json_source="$folder_path/config.json" -config_json_dest="$cache_dir/config.json" -if [ -f "$config_json_source" ]; then - if [ ! -f "$config_json_dest" ]; then - echo "Copying config.json..." - # Copy config.json as is - we'll add annotations later - cp "$config_json_source" "$config_json_dest" - fi -fi -if [ -f "$config_json_dest" ]; then - files+=("config.json:application/vnd.oci.image.config.v1+json") -fi - -# Copy nvram.bin if it exists and not already in cache -if [ -f "$folder_path/nvram.bin" ] && [ ! -f "$cache_dir/nvram.bin" ]; then - echo "Copying nvram.bin..." - cp "$folder_path/nvram.bin" nvram.bin -fi -if [ -f "$cache_dir/nvram.bin" ]; then - files+=("nvram.bin:application/octet-stream") -fi - -# Process disk.img if it exists -disk_img_orig="$folder_path/disk.img" -original_disk_size="" -if [ -f "$disk_img_orig" ]; then - # Get original size *before* compression - original_disk_size=$(stat -f%z "$disk_img_orig") - - # Get real (non-sparse) size - real_size=$(du -k "$disk_img_orig" | cut -f1) - real_size_bytes=$((real_size * 1024)) - sparseness_ratio=$(echo "scale=2; $original_disk_size / $real_size_bytes" | bc) - echo "Disk image: $disk_img_orig" - echo " Logical size: $original_disk_size bytes ($(du -h "$disk_img_orig" | cut -f1))" - echo " Actual disk usage: $((real_size_bytes / 1073741824)) GB" - echo " Sparseness ratio: ${sparseness_ratio}:1" - - # If we have config.json, update it with the uncompressed disk size annotation - if [ -f "$config_json_dest" ] && command -v jq &> /dev/null; then - echo "Adding uncompressed disk size annotation: $original_disk_size bytes" - jq --arg size "$original_disk_size" '.annotations = (.annotations // {}) + {"com.trycua.lume.disk.uncompressed_size": $size}' "$config_json_dest" > "$config_json_dest.tmp" - mv "$config_json_dest.tmp" "$config_json_dest" - fi - - # Create a temporary directory for disk processing - tmp_dir="$cache_dir/tmp_processing" - mkdir -p "$tmp_dir" - - # Split the disk image into chunks first (before compression) - split_parts_dir="$tmp_dir/split_parts" - mkdir -p "$split_parts_dir" - - # Check if we already have split parts - if [ -z "$(ls -A "$split_parts_dir" 2>/dev/null)" ]; then - echo "Splitting disk image into chunks of $chunk_size..." - cd "$split_parts_dir" - pv "$disk_img_orig" | split -b "$chunk_size" - "chunk." - cd "$cache_dir" - else - echo "Using existing split chunks from previous run" - fi - - # Process each chunk (compress, calculate digest, etc.) - compressed_parts_dir="$tmp_dir/compressed_parts" - mkdir -p "$compressed_parts_dir" - - # Store layer information in an array - layers=() - part_num=0 - total_parts=$(ls "$split_parts_dir"/chunk.* | wc -l) - - for chunk_file in "$split_parts_dir"/chunk.*; do - part_basename=$(basename "$chunk_file") - part_num=$((part_num + 1)) - compressed_file="$compressed_parts_dir/${part_basename}${compressed_ext}" - - if [ ! -f "$compressed_file" ]; then - echo "Compressing chunk $part_num of $total_parts: $part_basename" - - # Calculate uncompressed content digest before compression - uncompressed_digest=$(calculate_sha256 "$chunk_file") - - # Get uncompressed size - uncompressed_size=$(stat -f%z "$chunk_file") - - # Compress the chunk with LZ4 - lz4 -9 "$chunk_file" "$compressed_file" - - # Get compressed size - compressed_size=$(stat -f%z "$compressed_file") - - echo "Chunk $part_num: Original size: $(du -h "$chunk_file" | cut -f1), Compressed: $(du -h "$compressed_file" | cut -f1)" - else - echo "Using existing compressed chunk $part_num of $total_parts" - - # Need to calculate these values for existing files - uncompressed_digest=$(calculate_sha256 "$chunk_file") - uncompressed_size=$(stat -f%z "$chunk_file") - compressed_size=$(stat -f%z "$compressed_file") - fi - - # Store layer information - layer_info="$compressed_file:${oci_layer_media_type};uncompressed_size=$uncompressed_size;uncompressed_digest=$uncompressed_digest;part.number=$part_num;part.total=$total_parts" - layers+=("$layer_info") - done - - # Generate the files array for ORAS push - for layer_info in "${layers[@]}"; do - files+=("$layer_info") - done - - # --- Reassembly in dry-run mode --- - if [[ "$reassemble" = true ]]; then - echo "=== REASSEMBLY MODE ===" - echo "Reassembling chunks to verify integrity..." - - # Create a directory for reassembly - reassembly_dir="$cache_dir/reassembly" - mkdir -p "$reassembly_dir" - - # Prepare the reassembled file - create a properly sized sparse file first - reassembled_file="$reassembly_dir/reassembled_disk.img" - if [ -f "$reassembled_file" ]; then - echo "Removing previous reassembled file..." - rm -f "$reassembled_file" - fi - - # Get the original disk size from config annotation or directly from image - if [ -f "$config_json_dest" ] && command -v jq &> /dev/null; then - config_size=$(jq -r '.annotations."com.trycua.lume.disk.uncompressed_size" // empty' "$config_json_dest") - if [ -n "$config_size" ]; then - original_disk_size_bytes=$config_size - echo "Using uncompressed size from config: $original_disk_size_bytes bytes" - fi - fi - - # Create a sparse file of the exact original size - echo "Pre-allocating sparse file of $(du -h "$disk_img_orig" | cut -f1)..." - dd if=/dev/zero of="$reassembled_file" bs=1 count=0 seek=$original_disk_size - - # Make sure filesystem recognizes this as a sparse file - if [[ "$OSTYPE" == "darwin"* ]]; then - # On macOS, we can use a better sparse file creation method if mkfile is available - if command -v mkfile &> /dev/null; then - rm -f "$reassembled_file" - mkfile -n ${original_disk_size}b "$reassembled_file" - echo "Created sparse file using mkfile" - fi - else - # On Linux systems, ensure sparseness with truncate if available - if command -v truncate &> /dev/null; then - rm -f "$reassembled_file" - truncate -s $original_disk_size "$reassembled_file" - echo "Created sparse file using truncate" - fi - fi - - # Create an offset tracker to keep track of where each chunk should go - current_offset=0 - - # Decompress each chunk and write it at the correct offset - for ((i=1; i<=total_parts; i++)); do - # Find the chunk file for part number i - chunk_pattern="" - chunk_uncompressed_size="" - - for layer_info in "${layers[@]}"; do - if [[ "$layer_info" == *";part.number=$i;"* ]]; then - chunk_pattern="${layer_info%%:*}" - # Extract the uncompressed size from metadata - if [[ "$layer_info" =~ uncompressed_size=([0-9]+) ]]; then - chunk_uncompressed_size="${BASH_REMATCH[1]}" - fi - break - fi - done - - if [ -z "$chunk_pattern" ]; then - echo "Error: Could not find chunk for part $i" - exit 1 - fi - - echo "Processing part $i/$total_parts: $(basename "$chunk_pattern") at offset $current_offset..." - - # Create temp decompressed file - temp_decompressed="$reassembly_dir/temp_part_$i" - lz4 -d -f "$chunk_pattern" "$temp_decompressed" || { - echo "Error decompressing part $i" - exit 1 - } - - # Check if this chunk is all zeros (sparse data) - # Only check the first 1MB for efficiency - is_likely_sparse=false - if command -v hexdump &> /dev/null; then - # Use hexdump to check a sample of the file for non-zero content - sparse_check=$(hexdump -n 1048576 -v "$temp_decompressed" | grep -v "0000 0000 0000 0000 0000 0000 0000 0000" | head -n 1) - if [ -z "$sparse_check" ]; then - echo "Chunk appears to be all zeros (sparse data)" - is_likely_sparse=true - fi - fi - - # Use dd to write the chunk at the correct offset with sparse file handling - if [ "$is_likely_sparse" = true ]; then - # For sparse chunks, we don't need to write anything - leave as zeros - echo "Skipping write for all-zero chunk (preserving sparseness)" - elif [[ "$OSTYPE" == "darwin"* ]]; then - # macOS dd doesn't support conv=sparse, use standard approach - dd if="$temp_decompressed" of="$reassembled_file" bs=1M conv=notrunc seek=$((current_offset / 1024 / 1024)) status=progress || { - echo "Error writing part $i at offset $current_offset" - exit 1 - } - else - # On Linux, use conv=sparse to preserve sparseness during the write - dd if="$temp_decompressed" of="$reassembled_file" bs=1M conv=sparse,notrunc seek=$((current_offset / 1024 / 1024)) status=progress || { - echo "Error writing part $i at offset $current_offset" - exit 1 - } - fi - - # Clean up the temporary file - rm -f "$temp_decompressed" - - # Update the offset for the next chunk - current_offset=$((current_offset + chunk_uncompressed_size)) - done - - # After all chunks are processed, ensure sparseness is preserved - if command -v cp &> /dev/null && [[ "$OSTYPE" == "darwin"* ]]; then - echo "Copying disk image to maintain sparseness..." - final_sparse_file="$reassembly_dir/final_disk.img" - rm -f "$final_sparse_file" 2>/dev/null - - # On macOS, use cp with the clone flag to preserve sparseness - cp -c "$reassembled_file" "$final_sparse_file" - - # Use the sparse-optimized file for verification - echo "Using sparse-optimized copy for verification" - mv "$final_sparse_file" "$reassembled_file" - sync - elif command -v cp &> /dev/null && command -v file &> /dev/null; then - # For Linux systems - echo "Optimizing file sparseness..." - final_sparse_file="$reassembly_dir/final_disk.img" - rm -f "$final_sparse_file" 2>/dev/null - - # Use cp --sparse=always on Linux - cp --sparse=always "$reassembled_file" "$final_sparse_file" - - # Use the sparse-optimized file for verification - echo "Using sparse-optimized copy for verification" - mv "$final_sparse_file" "$reassembled_file" - sync - fi - - # Make sure to sync to disk - sync - - # Calculate digests for comparison - echo "Verifying reassembled file..." - original_digest=$(calculate_sha256 "$disk_img_orig") - reassembled_digest=$(calculate_sha256 "$reassembled_file") - - # Compare the original and reassembled file sizes - original_size=$(stat -f%z "$disk_img_orig") - reassembled_size=$(stat -f%z "$reassembled_file") - - echo "Results:" - echo " Original size: $(du -h "$disk_img_orig" | cut -f1) ($original_size bytes)" - echo " Reassembled size: $(du -h "$reassembled_file" | cut -f1) ($reassembled_size bytes)" - echo " Original digest: ${original_digest#sha256:}" - echo " Reassembled digest: ${reassembled_digest#sha256:}" - - # Check if the disk is sparse - original_apparent_size=$(du -h "$disk_img_orig" | cut -f1) - original_actual_size=$(du -sh "$disk_img_orig" | cut -f1) - reassembled_apparent_size=$(du -h "$reassembled_file" | cut -f1) - reassembled_actual_size=$(du -sh "$reassembled_file" | cut -f1) - - echo " Original: Apparent size: $original_apparent_size, Actual disk usage: $original_actual_size" - echo " Reassembled: Apparent size: $reassembled_apparent_size, Actual disk usage: $reassembled_actual_size" - - if [ "$original_digest" = "$reassembled_digest" ]; then - echo "✅ VERIFICATION SUCCESSFUL: Files are identical" - else - echo "❌ VERIFICATION FAILED: Files differ" - if [ "$original_size" != "$reassembled_size" ]; then - echo " Size mismatch: Original $original_size bytes, Reassembled $reassembled_size bytes" - fi - - # Try to identify where they differ - echo "Attempting to identify differences..." - if command -v cmp &> /dev/null; then - cmp_output=$(cmp -l "$disk_img_orig" "$reassembled_file" 2>&1 | head -5) - if [[ "$cmp_output" == *"differ"* ]]; then - echo " First few differences:" - echo "$cmp_output" - fi - fi - - # Check if the virtual machine will still boot despite differences - echo "NOTE: This might be a sparse file issue. The content may be identical, but sparse regions" - echo " may be handled differently between the original and reassembled files." - - # Calculate a percentage comparison of used blocks - # This helps determine if the sparse issues are severe or minor - original_used_kb=$(du -k "$disk_img_orig" | cut -f1) - reassembled_used_kb=$(du -k "$reassembled_file" | cut -f1) - - # Calculate percentage difference in used space - if [ "$original_used_kb" -ne 0 ]; then - diff_percentage=$(echo "scale=2; ($reassembled_used_kb - $original_used_kb) * 100 / $original_used_kb" | bc) - echo " Disk usage difference: $diff_percentage% ($reassembled_used_kb KB vs $original_used_kb KB)" - - # If reassembled is much smaller, this likely indicates sparse regions weren't preserved - if (( $(echo "$diff_percentage < -40" | bc -l) )); then - echo " ⚠️ WARNING: Reassembled disk uses significantly less space (>40% difference)." - echo " This indicates sparse regions weren't properly preserved and may affect VM functionality." - echo " The VM might boot but could be missing applications or data." - elif (( $(echo "$diff_percentage < -10" | bc -l) )); then - echo " ⚠️ WARNING: Reassembled disk uses less space (10-40% difference)." - echo " Some sparse regions may not be properly preserved but VM might still function correctly." - elif (( $(echo "$diff_percentage > 10" | bc -l) )); then - echo " ⚠️ WARNING: Reassembled disk uses more space (>10% difference)." - echo " This is unusual and may indicate improper sparse file handling." - else - echo " ✓ Disk usage difference is minimal (<10%). VM likely to function correctly." - fi - fi - fi - - echo "Reassembled file is available at: $reassembled_file" - - # If verification failed and difference is significant, try a direct copy as fallback - if [ "$original_digest" != "$reassembled_digest" ] && [ -n "$diff_percentage" ] && (( $(echo "$diff_percentage < -20" | bc -l) )); then - echo - echo "===== ATTEMPTING RECOVERY ACTION =====" - echo "Since verification failed with significant disk usage difference," - echo "trying direct copy of disk image as a fallback method." - echo - - fallback_file="$reassembly_dir/fallback_disk.img" - echo "Creating fallback disk image at: $fallback_file" - - # Use rsync with sparse option if available - if command -v rsync &> /dev/null; then - echo "Using rsync with sparse option for direct copy..." - rsync -aS --progress "$disk_img_orig" "$fallback_file" - else - # Direct cp with sparse option if available - if [[ "$OSTYPE" == "darwin"* ]]; then - echo "Using cp -c (clone) for direct copy..." - cp -c "$disk_img_orig" "$fallback_file" - else - echo "Using cp --sparse=always for direct copy..." - cp --sparse=always "$disk_img_orig" "$fallback_file" - fi - fi - - echo "Direct copy completed. You may want to try using this fallback disk image" - echo "instead if the reassembled one has issues: $fallback_file" - fi - fi - - # --- Push Logic --- - if [[ "$dry_run" = true ]]; then - echo "=== DRY RUN MODE ===" - echo "The following files would be pushed to the registry:" - for file_info in "${files[@]}"; do - file_path="${file_info%%:*}" - file_metadata="${file_info#*:}" - file_size=$(du -h "$file_path" | cut -f1) - echo " - $file_path ($file_size) with metadata: $file_metadata" - done - - if [[ -n "$image_versions" ]]; then - echo "Would push to the following versions:" - IFS=',' read -ra versions <<< "$image_versions" - for version in "${versions[@]}"; do - version=$(echo "$version" | xargs) - if [[ -z "$version" ]]; then continue; fi - echo " - ghcr.io/$organization/$image_name:$version" - done - else - echo "No versions specified for dry run. Processing completed successfully." - fi - - echo "All processing tasks completed. No actual push performed." - echo "Cache directory: $cache_dir" - exit 0 - fi - - # Regular push logic (non-dry-run) - push_pids=() - IFS=',' read -ra versions <<< "$image_versions" - for version in "${versions[@]}"; do - # Trim whitespace if any from version splitting - version=$(echo "$version" | xargs) - if [[ -z "$version" ]]; then continue; fi - - # Skip if version was already pushed - if version_pushed "$version"; then - echo "Version $version was already pushed, skipping..." - continue - fi - - echo "Pushing version $version..." - ( - # Use process substitution to feed file list safely if it gets long - oras push --disable-path-validation \ - "ghcr.io/$organization/$image_name:$version" \ - "${files[@]}" - echo "Completed push for version $version" - mark_version_pushed "$version" - ) & - push_pids+=($!) - done - - # Wait for all pushes to complete - for pid in "${push_pids[@]}"; do - wait "$pid" - done - - # --- Cleanup only if all versions were pushed successfully --- - all_versions_pushed=true - for version in "${versions[@]}"; do - version=$(echo "$version" | xargs) - if [[ -z "$version" ]]; then continue; fi - if ! version_pushed "$version"; then - all_versions_pushed=false - break - fi - done - - if [ "$all_versions_pushed" = true ]; then - echo "All versions pushed successfully, cleaning up cache directory..." - cd "$work_dir" - rm -rf "$cache_dir" - else - echo "Some versions failed to push. Cache directory preserved at: $cache_dir" - echo "Run again to resume from this point" - fi - -else - echo "Warning: $disk_img_orig not found." - - # If in dry run mode, just show what would happen - if [[ "$dry_run" = true ]]; then - echo "=== DRY RUN MODE ===" - if [ ${#files[@]} -gt 0 ]; then - echo "The following non-disk files would be pushed:" - for file_info in "${files[@]}"; do - file_path="${file_info%%:*}" - file_metadata="${file_info#*:}" - file_size=$(du -h "$file_path" | cut -f1) - echo " - $file_path ($file_size) with metadata: $file_metadata" - done - else - echo "No files found to push." - fi - echo "All processing tasks completed. No actual push performed." - exit 0 - fi - - # Push only config/nvram if they exist - if [ ${#files[@]} -gt 0 ]; then - echo "Pushing non-disk files..." - push_pids=() - IFS=',' read -ra versions <<< "$image_versions" - for version in "${versions[@]}"; do - # Trim whitespace if any from version splitting - version=$(echo "$version" | xargs) - if [[ -z "$version" ]]; then continue; fi - - # Skip if version was already pushed - if version_pushed "$version"; then - echo "Version $version was already pushed, skipping..." - continue - fi - - echo "Pushing version $version (config/nvram only)..." - ( - oras push --disable-path-validation \ - "ghcr.io/$organization/$image_name:$version" \ - "${files[@]}" - echo "Completed push for version $version" - mark_version_pushed "$version" - ) & - push_pids+=($!) - done - - # Wait for all pushes to complete - for pid in "${push_pids[@]}"; do - wait "$pid" - done - - # --- Cleanup only if all versions were pushed successfully --- - all_versions_pushed=true - for version in "${versions[@]}"; do - version=$(echo "$version" | xargs) - if [[ -z "$version" ]]; then continue; fi - if ! version_pushed "$version"; then - all_versions_pushed=false - break - fi - done - - if [ "$all_versions_pushed" = true ]; then - echo "All non-disk versions pushed successfully, cleaning up cache directory..." - cd "$work_dir" - rm -rf "$cache_dir" - else - echo "Some non-disk versions failed to push. Cache directory preserved at: $cache_dir" - echo "Run again to resume from this point" - fi - else - echo "No files found to push." - cd "$work_dir" - rm -rf "$cache_dir" - exit 1 - fi -fi - -# Skip final status check in dry-run mode -if [[ "$dry_run" = true ]]; then - exit 0 -fi - -# Determine final status based on the success check *before* potential cleanup -echo # Add a newline for better readability -if [ "$all_versions_pushed" = true ]; then - echo "All versions pushed successfully:" - for version in "${versions[@]}"; do - version=$(echo "$version" | xargs) - if [[ -z "$version" ]]; then continue; fi - echo " Upload complete: ghcr.io/$organization/$image_name:$version" - done -else - echo "Final upload status:" - for version in "${versions[@]}"; do - version=$(echo "$version" | xargs) - if [[ -z "$version" ]]; then continue; fi - # Check the marker file only if the overall process failed (cache preserved) - if version_pushed "$version"; then - echo " Upload complete: ghcr.io/$organization/$image_name:$version" - else - echo " Upload failed: ghcr.io/$organization/$image_name:$version" - fi - done - # Exit with error code if any version failed - exit 1 -fi From 6092a51844a980344896c458ae17667e5c81903a Mon Sep 17 00:00:00 2001 From: f-trycua Date: Tue, 22 Apr 2025 13:09:03 -0700 Subject: [PATCH 28/28] Fix auth for public images --- .../ImageContainerRegistry.swift | 143 +++++++++--------- 1 file changed, 75 insertions(+), 68 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 731a706a..ee4375f0 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -677,7 +677,8 @@ class ImageContainerRegistry: @unchecked Sendable { // Get anonymous token Logger.info("Getting registry authentication token") - let token = try await getToken(repository: "\(self.organization)/\(imageName)") + let token = try await getToken( + repository: "\(self.organization)/\(imageName)", scopes: ["pull"]) // Fetch manifest Logger.info("Fetching Image manifest") @@ -1400,14 +1401,14 @@ class ImageContainerRegistry: @unchecked Sendable { async throws { Logger.info("Copying from cache...") - + // Define output URL and expected size variable scope here let outputURL = destination.appendingPathComponent("disk.img") - var expectedTotalSize: UInt64? = nil // Use optional to handle missing config + var expectedTotalSize: UInt64? = nil // Use optional to handle missing config // Instantiate collector let diskPartsCollector = DiskPartsCollector() - var lz4LayerCount = 0 // Count lz4 layers found + var lz4LayerCount = 0 // Count lz4 layers found // First identify disk parts and non-disk files for layer in manifest.layers { @@ -1415,21 +1416,22 @@ class ImageContainerRegistry: @unchecked Sendable { // Identify disk parts simply by media type if layer.mediaType == "application/octet-stream+lz4" { - lz4LayerCount += 1 // Increment count - - // When caching is disabled, the file might not exist with the cache path name - // Check if the file exists before trying to use it - if !FileManager.default.fileExists(atPath: cachedLayer.path) { - Logger.info("Layer file not found in cache: \(cachedLayer.path) - skipping") - continue - } - - // Add to collector. It will assign the sequential part number. - let collectorPartNum = await diskPartsCollector.addPart(url: cachedLayer) - Logger.info("Adding cached lz4 layer (part \(lz4LayerCount)) -> Collector #\(collectorPartNum): \(cachedLayer.lastPathComponent)") - } - else { - // --- Handle Non-Disk-Part Layer (from cache) --- + lz4LayerCount += 1 // Increment count + + // When caching is disabled, the file might not exist with the cache path name + // Check if the file exists before trying to use it + if !FileManager.default.fileExists(atPath: cachedLayer.path) { + Logger.info("Layer file not found in cache: \(cachedLayer.path) - skipping") + continue + } + + // Add to collector. It will assign the sequential part number. + let collectorPartNum = await diskPartsCollector.addPart(url: cachedLayer) + Logger.info( + "Adding cached lz4 layer (part \(lz4LayerCount)) -> Collector #\(collectorPartNum): \(cachedLayer.lastPathComponent)" + ) + } else { + // --- Handle Non-Disk-Part Layer (from cache) --- let fileName: String switch layer.mediaType { case "application/vnd.oci.image.config.v1+json": @@ -1437,21 +1439,22 @@ class ImageContainerRegistry: @unchecked Sendable { case "application/octet-stream": // Assume nvram if config layer exists, otherwise assume single disk image fileName = manifest.config != nil ? "nvram.bin" : "disk.img" - case "application/vnd.oci.image.layer.v1.tar", - "application/octet-stream+gzip": - // Assume disk image for these types as well if encountered in cache scenario - fileName = "disk.img" + case "application/vnd.oci.image.layer.v1.tar", + "application/octet-stream+gzip": + // Assume disk image for these types as well if encountered in cache scenario + fileName = "disk.img" default: - Logger.info("Skipping unsupported cached layer media type: \(layer.mediaType)") + Logger.info("Skipping unsupported cached layer media type: \(layer.mediaType)") continue } - + // When caching is disabled, the file might not exist with the cache path name if !FileManager.default.fileExists(atPath: cachedLayer.path) { - Logger.info("Non-disk layer file not found in cache: \(cachedLayer.path) - skipping") + Logger.info( + "Non-disk layer file not found in cache: \(cachedLayer.path) - skipping") continue } - + // Copy the non-disk file directly from cache to destination try FileManager.default.copyItem( at: cachedLayer, @@ -1691,51 +1694,45 @@ class ImageContainerRegistry: @unchecked Sendable { Logger.info("Cache copy complete") } - private func getToken(repository: String) async throws -> String { - let encodedRepo = - repository.addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed) ?? repository - // Request both pull and push scope for uploads + private func getToken(repository: String, scopes: [String] = ["pull", "push"]) async throws + -> String + { + let encodedRepo = repository.addingPercentEncoding(withAllowedCharacters: .urlHostAllowed)! + + // Build scope string from scopes array + let scopeString = scopes.joined(separator: ",") + let url = URL( string: - "https://\(self.registry)/token?scope=repository:\(encodedRepo):pull,push&service=\(self.registry)" + "https://\(self.registry)/token?scope=repository:\(encodedRepo):\(scopeString)&service=\(self.registry)" )! var request = URLRequest(url: url) - request.httpMethod = "GET" // Token endpoint uses GET - request.setValue("application/json", forHTTPHeaderField: "Accept") + request.httpMethod = "GET" - // *** Add Basic Authentication Header if credentials exist *** - let (username, password) = getCredentialsFromEnvironment() - if let username = username, let password = password, !username.isEmpty, !password.isEmpty { - let authString = "\(username):\(password)" - if let authData = authString.data(using: .utf8) { - let base64Auth = authData.base64EncodedString() - request.setValue("Basic \(base64Auth)", forHTTPHeaderField: "Authorization") - Logger.info("Adding Basic Authentication header to token request.") - } else { - Logger.error("Failed to encode credentials for Basic Auth.") + let session = URLSession.shared + let (data, response) = try await session.data(for: request) + + if let httpResponse = response as? HTTPURLResponse { + if httpResponse.statusCode != 200 { + // If we get 403 and we're requesting both pull and push, retry with just pull + if httpResponse.statusCode == 403 && scopes.contains("push") + && scopes.contains("pull") + { + return try await getToken(repository: repository, scopes: ["pull"]) + } + + // For pull scope only, if authentication fails, assume this is a public image + // and continue without a token (empty string) + if scopes == ["pull"] { + Logger.info( + "Authentication failed for pull scope, assuming public image and continuing without token" + ) + return "" + } + + throw PushError.authenticationFailed } - } else { - Logger.info("No credentials found in environment for token request.") - // Allow anonymous request for pull scope, but push scope likely requires auth - } - // *** End Basic Auth addition *** - - let (data, response) = try await URLSession.shared.data(for: request) - - // Check response status code *before* parsing JSON - guard let httpResponse = response as? HTTPURLResponse else { - throw PushError.authenticationFailed // Or a more generic network error - } - - guard httpResponse.statusCode == 200 else { - // Log detailed error including status code and potentially response body - let responseBody = String(data: data, encoding: .utf8) ?? "(Could not decode body)" - Logger.error( - "Token request failed with status code: \(httpResponse.statusCode). Response: \(responseBody)" - ) - // Throw specific error based on status if needed (e.g., 401 for unauthorized) - throw PushError.authenticationFailed } let jsonResponse = try JSONSerialization.jsonObject(with: data) as? [String: Any] @@ -1755,7 +1752,12 @@ class ImageContainerRegistry: @unchecked Sendable { ) { var request = URLRequest( url: URL(string: "https://\(self.registry)/v2/\(repository)/manifests/\(tag)")!) - request.addValue("Bearer \(token)", forHTTPHeaderField: "Authorization") + + // Only add Authorization header if token is not empty + if !token.isEmpty { + request.addValue("Bearer \(token)", forHTTPHeaderField: "Authorization") + } + request.addValue("application/vnd.oci.image.manifest.v1+json", forHTTPHeaderField: "Accept") let (data, response) = try await URLSession.shared.data(for: request) @@ -1808,7 +1810,12 @@ class ImageContainerRegistry: @unchecked Sendable { do { var request = URLRequest( url: URL(string: "https://\(self.registry)/v2/\(repository)/blobs/\(digest)")!) - request.addValue("Bearer \(token)", forHTTPHeaderField: "Authorization") + + // Only add Authorization header if token is not empty + if !token.isEmpty { + request.addValue("Bearer \(token)", forHTTPHeaderField: "Authorization") + } + request.addValue(mediaType, forHTTPHeaderField: "Accept") request.timeoutInterval = 60 @@ -1838,7 +1845,7 @@ class ImageContainerRegistry: @unchecked Sendable { at: cachedLayer.deletingLastPathComponent(), withIntermediateDirectories: true ) - + if FileManager.default.fileExists(atPath: cachedLayer.path) { try FileManager.default.removeItem(at: cachedLayer) }