From 0ba1b970531ded1030e00b7bbcc99c4e5f7d9bf0 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 14 Apr 2025 00:25:25 -0700 Subject: [PATCH 01/43] Fix broken storage tests --- libs/lume/tests/Mocks/MockVM.swift | 24 ++- .../lume/tests/VM/VMDetailsPrinterTests.swift | 139 ++++++++++-------- libs/lume/tests/VMTests.swift | 67 +++++---- 3 files changed, 132 insertions(+), 98 deletions(-) diff --git a/libs/lume/tests/Mocks/MockVM.swift b/libs/lume/tests/Mocks/MockVM.swift index 907252da..ea21fb6d 100644 --- a/libs/lume/tests/Mocks/MockVM.swift +++ b/libs/lume/tests/Mocks/MockVM.swift @@ -1,15 +1,18 @@ import Foundation + @testable import lume @MainActor class MockVM: VM { private var mockIsRunning = false - + override func getOSType() -> String { return "mock-os" } - - override func setup(ipswPath: String, cpuCount: Int, memorySize: UInt64, diskSize: UInt64, display: String) async throws { + + override func setup( + ipswPath: String, cpuCount: Int, memorySize: UInt64, diskSize: UInt64, display: String + ) async throws { // Mock setup implementation vmDirContext.config.setCpuCount(cpuCount) vmDirContext.config.setMemorySize(memorySize) @@ -17,12 +20,19 @@ class MockVM: VM { vmDirContext.config.setMacAddress("00:11:22:33:44:55") try vmDirContext.saveConfig() } - - override func run(noDisplay: Bool, sharedDirectories: [SharedDirectory], mount: Path?, vncPort: Int = 0, recoveryMode: Bool = false) async throws { + + override func run( + noDisplay: Bool, sharedDirectories: [SharedDirectory], mount: Path?, vncPort: Int = 0, + recoveryMode: Bool = false, usbMassStoragePaths: [Path]? = nil + ) async throws { mockIsRunning = true - try await super.run(noDisplay: noDisplay, sharedDirectories: sharedDirectories, mount: mount, vncPort: vncPort, recoveryMode: recoveryMode) + try await super.run( + noDisplay: noDisplay, sharedDirectories: sharedDirectories, mount: mount, + vncPort: vncPort, recoveryMode: recoveryMode, + usbMassStoragePaths: usbMassStoragePaths + ) } - + override func stop() async throws { mockIsRunning = false try await super.stop() diff --git a/libs/lume/tests/VM/VMDetailsPrinterTests.swift b/libs/lume/tests/VM/VMDetailsPrinterTests.swift index 4c8b864e..42de5f9f 100644 --- a/libs/lume/tests/VM/VMDetailsPrinterTests.swift +++ b/libs/lume/tests/VM/VMDetailsPrinterTests.swift @@ -1,70 +1,89 @@ -import Testing import Foundation +import Testing + @testable import lume struct VMDetailsPrinterTests { - + @Test func printStatus_whenJSON() throws { - // Given - let vms: [VMDetails] = [VMDetails(name: "name", - os: "os", - cpuCount: 2, - memorySize: 1024, - diskSize: .init(allocated: 24, total: 30), - display: "1024x768", - status: "status", - vncUrl: "vncUrl", - ipAddress: "0.0.0.0")] - let jsonEncoder = JSONEncoder() - jsonEncoder.outputFormatting = .prettyPrinted - let expectedOutput = try String(data: jsonEncoder.encode(vms), encoding: .utf8)! - - // When - var printedStatus: String? - try VMDetailsPrinter.printStatus(vms, format: .json, print: { printedStatus = $0 }) + // Given + let vms: [VMDetails] = [ + VMDetails( + name: "name", + os: "os", + cpuCount: 2, + memorySize: 1024, + diskSize: .init(allocated: 24, total: 30), + display: "1024x768", + status: "status", + vncUrl: "vncUrl", + ipAddress: "0.0.0.0", + locationName: "mockLocation") + ] + let jsonEncoder = JSONEncoder() + jsonEncoder.outputFormatting = .prettyPrinted + let expectedOutput = try String(data: jsonEncoder.encode(vms), encoding: .utf8)! - // Then - // Decode both JSONs and compare the actual data structures - let jsonDecoder = JSONDecoder() - let printedVMs = try jsonDecoder.decode([VMDetails].self, from: printedStatus!.data(using: .utf8)!) - let expectedVMs = try jsonDecoder.decode([VMDetails].self, from: expectedOutput.data(using: .utf8)!) - - #expect(printedVMs.count == expectedVMs.count) - for (printed, expected) in zip(printedVMs, expectedVMs) { - #expect(printed.name == expected.name) - #expect(printed.os == expected.os) - #expect(printed.cpuCount == expected.cpuCount) - #expect(printed.memorySize == expected.memorySize) - #expect(printed.diskSize.allocated == expected.diskSize.allocated) - #expect(printed.diskSize.total == expected.diskSize.total) - #expect(printed.status == expected.status) - #expect(printed.vncUrl == expected.vncUrl) - #expect(printed.ipAddress == expected.ipAddress) - } + // When + var printedStatus: String? + try VMDetailsPrinter.printStatus(vms, format: .json, print: { printedStatus = $0 }) + + // Then + // Decode both JSONs and compare the actual data structures + let jsonDecoder = JSONDecoder() + let printedVMs = try jsonDecoder.decode( + [VMDetails].self, from: printedStatus!.data(using: .utf8)!) + let expectedVMs = try jsonDecoder.decode( + [VMDetails].self, from: expectedOutput.data(using: .utf8)!) + + #expect(printedVMs.count == expectedVMs.count) + for (printed, expected) in zip(printedVMs, expectedVMs) { + #expect(printed.name == expected.name) + #expect(printed.os == expected.os) + #expect(printed.cpuCount == expected.cpuCount) + #expect(printed.memorySize == expected.memorySize) + #expect(printed.diskSize.allocated == expected.diskSize.allocated) + #expect(printed.diskSize.total == expected.diskSize.total) + #expect(printed.status == expected.status) + #expect(printed.vncUrl == expected.vncUrl) + #expect(printed.ipAddress == expected.ipAddress) } - - @Test func printStatus_whenNotJSON() throws { - // Given - let vms: [VMDetails] = [VMDetails(name: "name", - os: "os", - cpuCount: 2, - memorySize: 1024, - diskSize: .init(allocated: 24, total: 30), - display: "1024x768", - status: "status", - vncUrl: "vncUrl", - ipAddress: "0.0.0.0")] - - // When - var printedLines: [String] = [] - try VMDetailsPrinter.printStatus(vms, format: .text, print: { printedLines.append($0) }) + } - // Then - #expect(printedLines.count == 2) - - let headerParts = printedLines[0].split(whereSeparator: \.isWhitespace) - #expect(headerParts == ["name", "os", "cpu", "memory", "disk", "display", "status", "ip", "vnc"]) + @Test func printStatus_whenNotJSON() throws { + // Given + let vms: [VMDetails] = [ + VMDetails( + name: "name", + os: "os", + cpuCount: 2, + memorySize: 1024, + diskSize: .init(allocated: 24, total: 30), + display: "1024x768", + status: "status", + vncUrl: "vncUrl", + ipAddress: "0.0.0.0", + locationName: "mockLocation") + ] - #expect(printedLines[1].split(whereSeparator: \.isWhitespace).map(String.init) == ["name", "os", "2", "0.00G", "24.0B/30.0B", "1024x768", "status", "0.0.0.0", "vncUrl"]) - } + // When + var printedLines: [String] = [] + try VMDetailsPrinter.printStatus(vms, format: .text, print: { printedLines.append($0) }) + + // Then + #expect(printedLines.count == 2) + + let headerParts = printedLines[0].split(whereSeparator: \.isWhitespace) + #expect( + headerParts == [ + "name", "os", "cpu", "memory", "disk", "display", "status", "storage", "ip", "vnc", + ]) + + #expect( + printedLines[1].split(whereSeparator: \.isWhitespace).map(String.init) == [ + "name", "os", "2", "0.00G", "24.0B/30.0B", "1024x768", "status", "mockLocation", + "0.0.0.0", + "vncUrl", + ]) + } } diff --git a/libs/lume/tests/VMTests.swift b/libs/lume/tests/VMTests.swift index 09a56e06..e7e31287 100644 --- a/libs/lume/tests/VMTests.swift +++ b/libs/lume/tests/VMTests.swift @@ -1,10 +1,11 @@ import Foundation import Testing + @testable import lume class MockProcessRunner: ProcessRunner { var runCalls: [(executable: String, arguments: [String])] = [] - + func run(executable: String, arguments: [String]) throws { runCalls.append((executable, arguments)) } @@ -12,17 +13,17 @@ class MockProcessRunner: ProcessRunner { private func setupVMDirectory(_ tempDir: URL) throws -> VMDirectory { let vmDir = VMDirectory(Path(tempDir.path)) - + // Create disk image file let diskPath = vmDir.diskPath - let diskData = Data(repeating: 0, count: 1024 * 1024) // 1MB mock disk + let diskData = Data(repeating: 0, count: 1024 * 1024) // 1MB mock disk try diskData.write(to: diskPath.url) - + // Create nvram file let nvramPath = vmDir.nvramPath - let nvramData = Data(repeating: 0, count: 1024) // 1KB mock nvram + let nvramData = Data(repeating: 0, count: 1024) // 1KB mock nvram try nvramData.write(to: nvramPath.url) - + // Create initial config file var config = try VMConfig( os: "mock-os", @@ -33,11 +34,11 @@ private func setupVMDirectory(_ tempDir: URL) throws -> VMDirectory { ) config.setMacAddress("00:11:22:33:44:55") try vmDir.saveConfig(config) - + // Create .initialized file to mark VM as initialized let initializedPath = vmDir.dir.file(".initialized") try Data().write(to: initializedPath.url) - + return vmDir } @@ -53,16 +54,16 @@ func testVMInitialization() async throws { diskSize: 1024, display: "1024x768" ) - config.setMacAddress("00:11:22:33:44:55") // Set MAC address to avoid nil + config.setMacAddress("00:11:22:33:44:55") // Set MAC address to avoid nil let home = Home(fileManager: FileManager.default) - let context = VMDirContext(dir: vmDir, config: config, home: home) - + let context = VMDirContext(dir: vmDir, config: config, home: home, storage: nil) + let vm = MockVM( vmDirContext: context, virtualizationServiceFactory: { _ in MockVMVirtualizationService() }, vncServiceFactory: { MockVNCService(vmDirectory: $0) } ) - + // Test initial state let details = vm.details #expect(details.name == vmDir.name) @@ -85,22 +86,24 @@ func testVMRunAndStop() async throws { ) config.setMacAddress("00:11:22:33:44:55") let home = Home(fileManager: FileManager.default) - let context = VMDirContext(dir: vmDir, config: config, home: home) - + let context = VMDirContext(dir: vmDir, config: config, home: home, storage: nil) + let vm = MockVM( vmDirContext: context, virtualizationServiceFactory: { _ in MockVMVirtualizationService() }, vncServiceFactory: { MockVNCService(vmDirectory: $0) } ) - + // Test running VM let runTask = Task { - try await vm.run(noDisplay: false, sharedDirectories: [], mount: nil, vncPort: 0, recoveryMode: false) + try await vm.run( + noDisplay: false, sharedDirectories: [], mount: nil as Path?, vncPort: 0, + recoveryMode: false) } - + // Give the VM time to start try await Task.sleep(nanoseconds: UInt64(1e9)) - + // Test stopping VM try await vm.stop() runTask.cancel() @@ -120,22 +123,22 @@ func testVMConfigurationUpdates() async throws { ) config.setMacAddress("00:11:22:33:44:55") let home = Home(fileManager: FileManager.default) - let context = VMDirContext(dir: vmDir, config: config, home: home) - + let context = VMDirContext(dir: vmDir, config: config, home: home, storage: nil) + let vm = MockVM( vmDirContext: context, virtualizationServiceFactory: { _ in MockVMVirtualizationService() }, vncServiceFactory: { MockVNCService(vmDirectory: $0) } ) - + // Test CPU count update try vm.setCpuCount(4) #expect(vm.vmDirContext.config.cpuCount == 4) - + // Test memory size update try vm.setMemorySize(4096) #expect(vm.vmDirContext.config.memorySize == 4096) - + // Test MAC address update try vm.setMacAddress("00:11:22:33:44:66") #expect(vm.vmDirContext.config.macAddress == "00:11:22:33:44:66") @@ -155,16 +158,16 @@ func testVMSetup() async throws { ) config.setMacAddress("00:11:22:33:44:55") let home = Home(fileManager: FileManager.default) - let context = VMDirContext(dir: vmDir, config: config, home: home) - + let context = VMDirContext(dir: vmDir, config: config, home: home, storage: nil) + let vm = MockVM( vmDirContext: context, virtualizationServiceFactory: { _ in MockVMVirtualizationService() }, vncServiceFactory: { MockVNCService(vmDirectory: $0) } ) - - let expectedDiskSize: UInt64 = 64 * 1024 * 1024 * 1024 // 64 GB - + + let expectedDiskSize: UInt64 = 64 * 1024 * 1024 * 1024 // 64 GB + try await vm.setup( ipswPath: "/path/to/mock.ipsw", cpuCount: 2, @@ -172,11 +175,13 @@ func testVMSetup() async throws { diskSize: expectedDiskSize, display: "1024x768" ) - + #expect(vm.vmDirContext.config.cpuCount == 2) #expect(vm.vmDirContext.config.memorySize == 2048) let actualDiskSize = vm.vmDirContext.config.diskSize ?? 0 - #expect(actualDiskSize == expectedDiskSize, "Expected disk size \(expectedDiskSize), but got \(actualDiskSize)") + #expect( + actualDiskSize == expectedDiskSize, + "Expected disk size \(expectedDiskSize), but got \(actualDiskSize)") #expect(vm.vmDirContext.config.macAddress == "00:11:22:33:44:55") } @@ -184,4 +189,4 @@ private func createTempDirectory() throws -> URL { let tempDir = FileManager.default.temporaryDirectory.appendingPathComponent(UUID().uuidString) try FileManager.default.createDirectory(at: tempDir, withIntermediateDirectories: true) return tempDir -} \ No newline at end of file +} From 18f92c6a85f19ff3fd5763c488fd849d4a366461 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Tue, 15 Apr 2025 10:02:13 -0700 Subject: [PATCH 02/43] Use sparse files --- libs/lume/scripts/ghcr/push-ghcr.sh | 182 ++++++----- .../ImageContainerRegistry.swift | 296 +++++++++++------- 2 files changed, 278 insertions(+), 200 deletions(-) diff --git a/libs/lume/scripts/ghcr/push-ghcr.sh b/libs/lume/scripts/ghcr/push-ghcr.sh index c204f97b..fbe4ab7e 100755 --- a/libs/lume/scripts/ghcr/push-ghcr.sh +++ b/libs/lume/scripts/ghcr/push-ghcr.sh @@ -83,19 +83,19 @@ done # Authenticate with GitHub Container Registry echo "$GITHUB_TOKEN" | oras login ghcr.io -u "$organization" --password-stdin -# Create a temporary directory for processing files -work_dir=$(mktemp -d) -echo "Working directory: $work_dir" -trap 'rm -rf "$work_dir"' EXIT +# Use the source folder path as the working directory and get its absolute path +work_dir=$(cd "$folder_path" && pwd) +echo "Working directory (persistent cache): $work_dir" -# Create a directory for all files -mkdir -p "$work_dir/files" -cd "$work_dir/files" +# Change to the working directory +cd "$work_dir" +files=() # Initialize files array here # Copy config.json if it exists if [ -f "$folder_path/config.json" ]; then echo "Copying config.json..." cp "$folder_path/config.json" config.json + files+=("config.json:application/vnd.oci.image.config.v1+json") fi # Copy nvram.bin if it exists @@ -103,106 +103,104 @@ nvram_bin="$folder_path/nvram.bin" if [ -f "$nvram_bin" ]; then echo "Copying nvram.bin..." cp "$nvram_bin" nvram.bin + files+=("nvram.bin:application/octet-stream") fi -# Process disk.img if it exists and needs splitting -disk_img="$folder_path/disk.img" -if [ -f "$disk_img" ]; then - file_size=$(stat -f%z "$disk_img") - if [ $file_size -gt 524288000 ]; then # 500MB in bytes - echo "Splitting large file: disk.img" - echo "Original disk.img size: $(du -h "$disk_img" | cut -f1)" - - # Copy and split the file with progress monitoring - echo "Copying disk image..." - pv "$disk_img" > disk.img - - echo "Splitting file..." - split -b "$chunk_size" disk.img disk.img.part. - rm disk.img +# Process disk.img if it exists +disk_img_orig="disk.img" # Already in work_dir +if [ -f "$disk_img_orig" ]; then + # --- Compression Step --- + echo "Compressing $disk_img_orig..." + compressed_ext=".gz" + compressor="gzip" + compress_opts="-k -f" + compressed_disk_img="disk.img${compressed_ext}" + pv "$disk_img_orig" | $compressor $compress_opts > "$compressed_disk_img" + compressed_size=$(stat -f%z "$compressed_disk_img") + echo "Compressed disk image size: $(du -h "$compressed_disk_img" | cut -f1)" + # --- End Compression Step --- - # Get original file size for verification - original_size=$(stat -f%z "$disk_img") - echo "Original disk.img size: $(awk -v size=$original_size 'BEGIN {printf "%.2f GB", size/1024/1024/1024}')" + # Check if splitting is needed based on *compressed* size + if [ $compressed_size -gt 524288000 ]; then # 500MB threshold + echo "Splitting compressed file: $compressed_disk_img" + split -b "$chunk_size" "$compressed_disk_img" "$compressed_disk_img.part." + # Keep the compressed file and parts in work_dir - # Verify split parts total size - total_size=0 - total_parts=$(ls disk.img.part.* | wc -l | tr -d ' ') + # --- Adjust part processing --- + parts_files=() + total_parts=$(ls "$compressed_disk_img.part."* | wc -l | tr -d ' ') part_num=0 - - # Create array for files and their annotations - files=() - for part in disk.img.part.*; do - part_size=$(stat -f%z "$part") - total_size=$((total_size + part_size)) + for part in "$compressed_disk_img.part."*; do part_num=$((part_num + 1)) - echo "Part $part: $(awk -v size=$part_size 'BEGIN {printf "%.2f GB", size/1024/1024/1024}')" - files+=("$part:application/vnd.oci.image.layer.v1.tar;part.number=$part_num;part.total=$total_parts") + # *** IMPORTANT: Use the *compressed* OCI media type with part info *** + parts_files+=("$part:${oci_layer_media_type};part.number=$part_num;part.total=$total_parts") + echo "Part $part: $(du -h "$part" | cut -f1)" done + # Combine non-disk files with disk parts + files+=("${parts_files[@]}") + # --- End Adjust part processing --- - echo "Total size of parts: $(awk -v size=$total_size 'BEGIN {printf "%.2f GB", size/1024/1024/1024}')" - - # Verify total size matches original - if [ $total_size -ne $original_size ]; then - echo "ERROR: Size mismatch!" - echo "Original file size: $(awk -v size=$original_size 'BEGIN {printf "%.2f GB", size/1024/1024/1024}')" - echo "Sum of parts size: $(awk -v size=$total_size 'BEGIN {printf "%.2f GB", size/1024/1024/1024}')" - echo "Difference: $(awk -v orig=$original_size -v total=$total_size 'BEGIN {printf "%.2f GB", (orig-total)/1024/1024/1024}')" - exit 1 - fi - - # Add remaining files - if [ -f "config.json" ]; then - files+=("config.json:application/vnd.oci.image.config.v1+json") - fi - - if [ -f "nvram.bin" ]; then - files+=("nvram.bin:application/octet-stream") - fi - - # Push versions in parallel - push_pids=() - for version in $image_versions; do - ( - echo "Pushing version $version..." - oras push --disable-path-validation \ - "ghcr.io/$organization/$image_name:$version" \ - "${files[@]}" - echo "Completed push for version $version" - ) & - push_pids+=($!) - done - - # Wait for all pushes to complete - for pid in "${push_pids[@]}"; do - wait "$pid" - done else - # Push disk.img directly if it's small enough - echo "Copying disk image..." - pv "$disk_img" > disk.img - - # Push all files together - echo "Pushing all files..." - files=("disk.img:application/vnd.oci.image.layer.v1.tar") - - if [ -f "config.json" ]; then - files+=("config.json:application/vnd.oci.image.config.v1+json") - fi - - if [ -f "nvram.bin" ]; then - files+=("nvram.bin:application/octet-stream") - fi + # Add the single compressed file to the list + # *** IMPORTANT: Use the *compressed* OCI media type *** + files+=("$compressed_disk_img:${oci_layer_media_type}") + fi - for version in $image_versions; do - # Push all files in one command + # --- Push Logic (Remains largely the same, but $files now contains compressed parts/file) --- + push_pids=() + IFS=',' read -ra versions <<< "$image_versions" + for version in "${versions[@]}"; do + # Trim whitespace if any from version splitting + version=$(echo "$version" | xargs) + if [[ -z "$version" ]]; then continue; fi + + echo "Pushing version $version..." + ( + # Use process substitution to feed file list safely if it gets long oras push --disable-path-validation \ "ghcr.io/$organization/$image_name:$version" \ "${files[@]}" - done + echo "Completed push for version $version" + ) & + push_pids+=($!) + done + + # Wait for all pushes to complete + for pid in "${push_pids[@]}"; do + wait "$pid" + done + + # --- Cleanup compressed files after successful push --- + echo "Push successful, cleaning up compressed artifacts..." + # Check if parts exist first + parts_exist=$(ls "$compressed_disk_img.part."* 2>/dev/null) + if [ -n "$parts_exist" ]; then + echo "Removing split parts: $compressed_disk_img.part.* and $compressed_disk_img" + rm -f "$compressed_disk_img.part."* + # Also remove the original compressed file that was split + rm -f "$compressed_disk_img" + elif [ -f "$compressed_disk_img" ]; then + echo "Removing compressed file: $compressed_disk_img" + rm -f "$compressed_disk_img" + fi + # --- End Push Logic --- + +else + echo "Warning: $disk_img_orig not found." + # Push only config/nvram if they exist + if [ ${#files[@]} -gt 0 ]; then + # (Add push logic here too if you want to push even without disk.img) + echo "Pushing non-disk files..." + # ... (similar push loop as above) ... + else + echo "No files found to push." + exit 1 fi fi -for version in $image_versions; do +for version in "${versions[@]}"; do + # Trim whitespace if any from version splitting + version=$(echo "$version" | xargs) + if [[ -z "$version" ]]; then continue; fi echo "Upload complete: ghcr.io/$organization/$image_name:$version" done diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index cd0f8fb8..9ef32355 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -809,94 +809,118 @@ class ImageContainerRegistry: @unchecked Sendable { ) // Create sparse file of the required size - FileManager.default.createFile(atPath: outputURL.path, contents: nil) let outputHandle = try FileHandle(forWritingTo: outputURL) + defer { try? outputHandle.close() } // Set the file size without writing data (creates a sparse file) try outputHandle.truncate(atOffset: expectedTotalSize) var reassemblyProgressLogger = ProgressLogger(threshold: 0.05) - var processedSize: UInt64 = 0 + var currentOffset: UInt64 = 0 // Track position in the final *decompressed* file - // Process each part in order for partNum in 1...totalParts { - guard let (_, partURL) = diskParts.first(where: { $0.0 == partNum }) else { + // Find the original layer info for this part number + guard + let layer = manifest.layers.first(where: { layer in + if let info = extractPartInfo(from: layer.mediaType) { + return info.partNum == partNum + } + return false + }), + let (_, partURL) = diskParts.first(where: { $0.0 == partNum }) + else { throw PullError.missingPart(partNum) } + let layerMediaType = layer.mediaType // Extract mediaType here Logger.info( "Processing part \(partNum) of \(totalParts): \(partURL.lastPathComponent)") - // Get part file size - let partAttributes = try FileManager.default.attributesOfItem( - atPath: partURL.path) - let partSize = partAttributes[.size] as? UInt64 ?? 0 - - // Calculate the offset in the final file (parts are sequential) - let partOffset = processedSize - - // Open input file let inputHandle = try FileHandle(forReadingFrom: partURL) defer { try? inputHandle.close() - // Don't delete the part file if it's from cache + // Clean up temp downloaded part if not from cache if !partURL.path.contains(cacheDirectory.path) { try? FileManager.default.removeItem(at: partURL) } } - // Seek to the appropriate offset in output file - try outputHandle.seek(toOffset: partOffset) + // Seek to the correct offset in the output sparse file + try outputHandle.seek(toOffset: currentOffset) - // Copy data in chunks to avoid memory issues - let chunkSize: UInt64 = - determineIfMemoryConstrained() ? 256 * 1024 : 1024 * 1024 // Use smaller chunks (256KB-1MB) - var bytesWritten: UInt64 = 0 + if let decompressCmd = getDecompressionCommand(for: layerMediaType) { // Use extracted mediaType + Logger.info("Decompressing part \(partNum)...") + let process = Process() + let pipe = Pipe() + process.executableURL = URL(fileURLWithPath: "/bin/sh") + process.arguments = ["-c", "\(decompressCmd) < \"\(partURL.path)\""] // Feed file via stdin redirection + process.standardOutput = pipe // Capture decompressed output - while bytesWritten < partSize { - // Use Foundation's autoreleasepool for proper memory management - Foundation.autoreleasepool { - let readSize: UInt64 = min(UInt64(chunkSize), partSize - bytesWritten) - if let chunk = try? inputHandle.read(upToCount: Int(readSize)) { - if !chunk.isEmpty { - try? outputHandle.write(contentsOf: chunk) - bytesWritten += UInt64(chunk.count) + try process.run() - // Update progress less frequently to reduce overhead - if bytesWritten % (chunkSize * 4) == 0 - || bytesWritten == partSize - { - let totalProgress = - Double(processedSize + bytesWritten) - / Double(expectedTotalSize) - reassemblyProgressLogger.logProgress( - current: totalProgress, - context: "Reassembling disk image") - } - } + let reader = pipe.fileHandleForReading + var partDecompressedSize: UInt64 = 0 + + // Read decompressed data in chunks and write to sparse file + while true { + let data = autoreleasepool { // Help manage memory with large files + reader.readData(ofLength: 1024 * 1024) // Read 1MB chunks } + if data.isEmpty { break } // End of stream - // Add a small delay every few MB to allow memory cleanup - if bytesWritten % (chunkSize * 16) == 0 && bytesWritten > 0 { - // Use Thread.sleep for now, but ideally this would use a non-blocking approach - // that is appropriate for the context (sync/async) - Thread.sleep(forTimeInterval: 0.01) - } + try outputHandle.write(contentsOf: data) + partDecompressedSize += UInt64(data.count) + + // Update progress based on decompressed size written + let totalProgress = + Double(currentOffset + partDecompressedSize) + / Double(expectedTotalSize) + reassemblyProgressLogger.logProgress( + current: totalProgress, + context: "Reassembling/Decompressing") } + process.waitUntilExit() + if process.terminationStatus != 0 { + throw PullError.decompressionFailed("Part \(partNum)") + } + currentOffset += partDecompressedSize // Advance offset by decompressed size + + } else { + // --- Handle non-compressed parts (if any, or the single file case) --- + // This part is similar to your original copy logic, writing directly + // from inputHandle to outputHandle at currentOffset + Logger.info("Copying non-compressed part \(partNum)...") + let partSize = + (try? FileManager.default.attributesOfItem(atPath: partURL.path)[.size] + as? UInt64) ?? 0 + var bytesWritten: UInt64 = 0 + let chunkSize = 1024 * 1024 + while bytesWritten < partSize { + let data = autoreleasepool { + try! inputHandle.read(upToCount: chunkSize) ?? Data() + } + if data.isEmpty { break } + try outputHandle.write(contentsOf: data) + bytesWritten += UInt64(data.count) + + // Update progress + let totalProgress = + Double(currentOffset + bytesWritten) / Double(expectedTotalSize) + reassemblyProgressLogger.logProgress( + current: totalProgress, + context: "Reassembling") + } + currentOffset += bytesWritten + // --- End non-compressed handling --- } - // Update processed size - processedSize += partSize + // Ensure data is written before processing next part (optional but safer) + try outputHandle.synchronize() } - // Finalize progress - reassemblyProgressLogger.logProgress( - current: 1.0, context: "Reassembling disk image") - Logger.info("") // Newline after progress - - // Close the output file - try outputHandle.synchronize() - try outputHandle.close() + // Finalize progress, close handle (done by defer) + reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") + Logger.info("") // Newline // Verify final size let finalSize = @@ -1031,86 +1055,112 @@ class ImageContainerRegistry: @unchecked Sendable { ) // Create sparse file of the required size - FileManager.default.createFile(atPath: outputURL.path, contents: nil) let outputHandle = try FileHandle(forWritingTo: outputURL) + defer { try? outputHandle.close() } // Set the file size without writing data (creates a sparse file) try outputHandle.truncate(atOffset: expectedTotalSize) var reassemblyProgressLogger = ProgressLogger(threshold: 0.05) - var processedSize: UInt64 = 0 + var currentOffset: UInt64 = 0 // Track position in the final *decompressed* file - // Process each part in order for partNum in 1...totalParts { - guard let (_, sourceURL) = diskPartSources.first(where: { $0.0 == partNum }) else { + // Find the original layer info for this part number + guard + let layer = manifest.layers.first(where: { layer in + if let info = extractPartInfo(from: layer.mediaType) { + return info.partNum == partNum + } + return false + }), + let (_, sourceURL) = diskPartSources.first(where: { $0.0 == partNum }) + else { throw PullError.missingPart(partNum) } + let layerMediaType = layer.mediaType // Extract mediaType here Logger.info( "Processing part \(partNum) of \(totalParts) from cache: \(sourceURL.lastPathComponent)" ) - // Get part file size - let partAttributes = try FileManager.default.attributesOfItem( - atPath: sourceURL.path) - let partSize = partAttributes[.size] as? UInt64 ?? 0 - - // Calculate the offset in the final file (parts are sequential) - let partOffset = processedSize - - // Open input file let inputHandle = try FileHandle(forReadingFrom: sourceURL) defer { try? inputHandle.close() } - // Seek to the appropriate offset in output file - try outputHandle.seek(toOffset: partOffset) + // Seek to the correct offset in the output sparse file + try outputHandle.seek(toOffset: currentOffset) - // Copy data in chunks to avoid memory issues - let chunkSize: UInt64 = determineIfMemoryConstrained() ? 256 * 1024 : 1024 * 1024 // Use smaller chunks (256KB-1MB) - var bytesWritten: UInt64 = 0 + if let decompressCmd = getDecompressionCommand(for: layerMediaType) { // Use extracted mediaType + Logger.info("Decompressing part \(partNum)...") + let process = Process() + let pipe = Pipe() + process.executableURL = URL(fileURLWithPath: "/bin/sh") + process.arguments = ["-c", "\(decompressCmd) < \"\(sourceURL.path)\""] // Feed file via stdin redirection + process.standardOutput = pipe // Capture decompressed output - while bytesWritten < partSize { - // Use Foundation's autoreleasepool for proper memory management - Foundation.autoreleasepool { - let readSize: UInt64 = min(UInt64(chunkSize), partSize - bytesWritten) - if let chunk = try? inputHandle.read(upToCount: Int(readSize)) { - if !chunk.isEmpty { - try? outputHandle.write(contentsOf: chunk) - bytesWritten += UInt64(chunk.count) + try process.run() - // Update progress less frequently to reduce overhead - if bytesWritten % (chunkSize * 4) == 0 || bytesWritten == partSize { - let totalProgress = - Double(processedSize + bytesWritten) - / Double(expectedTotalSize) - reassemblyProgressLogger.logProgress( - current: totalProgress, - context: "Reassembling disk image from cache") - } - } + let reader = pipe.fileHandleForReading + var partDecompressedSize: UInt64 = 0 + + // Read decompressed data in chunks and write to sparse file + while true { + let data = autoreleasepool { // Help manage memory with large files + reader.readData(ofLength: 1024 * 1024) // Read 1MB chunks } + if data.isEmpty { break } // End of stream - // Add a small delay every few MB to allow memory cleanup - if bytesWritten % (chunkSize * 16) == 0 && bytesWritten > 0 { - // Use Thread.sleep for now, but ideally this would use a non-blocking approach - // that is appropriate for the context (sync/async) - Thread.sleep(forTimeInterval: 0.01) - } + try outputHandle.write(contentsOf: data) + partDecompressedSize += UInt64(data.count) + + // Update progress based on decompressed size written + let totalProgress = + Double(currentOffset + partDecompressedSize) / Double(expectedTotalSize) + reassemblyProgressLogger.logProgress( + current: totalProgress, + context: "Reassembling") } + process.waitUntilExit() + if process.terminationStatus != 0 { + throw PullError.decompressionFailed("Part \(partNum)") + } + currentOffset += partDecompressedSize // Advance offset by decompressed size + + } else { + // --- Handle non-compressed parts (if any, or the single file case) --- + // This part is similar to your original copy logic, writing directly + // from inputHandle to outputHandle at currentOffset + Logger.info("Copying non-compressed part \(partNum)...") + let partSize = + (try? FileManager.default.attributesOfItem(atPath: sourceURL.path)[.size] + as? UInt64) ?? 0 + var bytesWritten: UInt64 = 0 + let chunkSize = 1024 * 1024 + while bytesWritten < partSize { + let data = autoreleasepool { + try! inputHandle.read(upToCount: chunkSize) ?? Data() + } + if data.isEmpty { break } + try outputHandle.write(contentsOf: data) + bytesWritten += UInt64(data.count) + + // Update progress + let totalProgress = + Double(currentOffset + bytesWritten) / Double(expectedTotalSize) + reassemblyProgressLogger.logProgress( + current: totalProgress, + context: "Reassembling") + } + currentOffset += bytesWritten + // --- End non-compressed handling --- } - // Update processed size - processedSize += partSize + // Ensure data is written before processing next part (optional but safer) + try outputHandle.synchronize() } - // Finalize progress - reassemblyProgressLogger.logProgress( - current: 1.0, context: "Reassembling disk image from cache") - Logger.info("") // Newline after progress - - // Close the output file - try outputHandle.synchronize() - try outputHandle.close() + // Finalize progress, close handle (done by defer) + reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") + Logger.info("") // Newline // Verify final size let finalSize = @@ -1646,4 +1696,34 @@ class ImageContainerRegistry: @unchecked Sendable { return nil } + + // Add helper to check media type and get decompress command + private func getDecompressionCommand(for mediaType: String) -> String? { + if mediaType.hasSuffix("+gzip") { + return "/usr/bin/gunzip -c" // -c writes to stdout + } else if mediaType.hasSuffix("+zstd") { + // Check if zstd exists, otherwise handle error? + // Assuming brew install zstd -> /opt/homebrew/bin/zstd or /usr/local/bin/zstd + let zstdPath = findExecutablePath(named: "zstd") ?? "/usr/local/bin/zstd" + return "\(zstdPath) -dc" // -d decompress, -c stdout + } + return nil // Not compressed or unknown compression + } + + // Helper to find executables (optional, or hardcode paths) + private func findExecutablePath(named executableName: String) -> String? { + let pathEnv = + ProcessInfo.processInfo.environment["PATH"] + ?? "/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/bin:/opt/homebrew/bin" + let paths = pathEnv.split(separator: ":") + for path in paths { + let executablePath = URL(fileURLWithPath: String(path)).appendingPathComponent( + executableName + ).path + if FileManager.default.isExecutableFile(atPath: executablePath) { + return executablePath + } + } + return nil + } } From b686b3ec3aa687fc62185c4de5ec55ae9bcf3642 Mon Sep 17 00:00:00 2001 From: trospix Date: Fri, 18 Apr 2025 17:52:03 +0100 Subject: [PATCH 03/43] docs: Update FAQ section with new information about VM IP bug and EasyOCR Certificate --- docs/FAQ.md | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/docs/FAQ.md b/docs/FAQ.md index a342f6c0..913e665c 100644 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -29,6 +29,74 @@ No, macOS uses sparse files, which only allocate space as needed. For example, V lume delete ``` +### How do I fix EasyOCR `[SSL: CERTIFICATE_VERIFY_FAILED]` errors? + +**Symptom:** +When running an agent that uses OCR (e.g., with `AgentLoop.OMNI`), you might encounter an error during the first run or initialization phase that includes: +``` +ssl.SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1000) +``` + +**Cause:** +This usually happens when EasyOCR attempts to download its language models over HTTPS. Python's SSL module cannot verify the server's certificate because it can't locate the necessary root Certificate Authority (CA) certificates in your environment's trust store. + +**Solution:** +You need to explicitly tell Python where to find a trusted CA bundle. The `certifi` package provides one. Before running your Python agent script, set the following environment variables in the *same terminal session*: +```bash +# Ensure certifi is installed: pip show certifi +export SSL_CERT_FILE=$(python -m certifi) +export REQUESTS_CA_BUNDLE=$(python -m certifi) + +# Now run your Python script that uses the agent... +# python your_agent_script.py +``` +This directs Python to use the CA bundle provided by `certifi` for SSL verification. + +### How do I troubleshoot the agent failing to get the VM IP address or getting stuck on "VM status changed to: stopped"? + +**Symptom:** +When running your agent script (e.g., using `Computer().run(...)`), the script might hang during the VM startup phase, logging messages like: +* `Waiting for VM to be ready...` +* `VM status changed to: stopped (after 0.0s)` +* `Still waiting for VM IP address... (elapsed: XX.Xs)` +* Eventually, it might time out, or you might notice the VM window never appears or closes quickly. + +**Cause:** +This is typically due to known instability issues with the `lume serve` background daemon process, as documented in the main `README.md`: +1. **`lume serve` Crash:** The `lume serve` process might terminate unexpectedly shortly after launch or when the script tries to interact with it. If it's not running, the script cannot get VM status updates or the IP address. +2. **Incorrect Status Reporting:** Even if `lume serve` is running, its API sometimes incorrectly reports the VM status as `stopped` immediately after startup is initiated. While the underlying `Computer` library tries to poll and wait for the correct `running` status, this initial incorrect report can cause delays or failures if the status doesn't update correctly within the timeout or if `lume serve` crashes during the polling. + +**Troubleshooting Steps:** +1. **Check `lume serve`:** Is the `lume serve` process still running in its terminal? Did it print any errors or exit? If it's not running, stop your agent script (`Ctrl+C`) and proceed to step 2. +2. **Force Cleanup:** Before *every* run, perform a rigorous cleanup to ensure no old `lume` processes or VM states interfere. Open a **new terminal** and run: + ```bash + # Stop any running Lume VM gracefully first (replace if needed) + lume stop macos-sequoia-cua_latest + + # Force kill lume serve and related processes + pkill -f "lume serve" + pkill -9 -f "lume" + pkill -9 -f "VzVirtualMachine" # Kills underlying VM process + + # Optional: Verify they are gone + # ps aux | grep -E 'lume|VzVirtualMachine' | grep -v grep + ``` +3. **Restart Sequence:** + * **Terminal 1:** Start `lume serve` cleanly: + ```bash + lume serve + ``` + *(Watch this terminal to ensure it stays running).* + * **Terminal 2:** Run your agent script (including the `export SSL_CERT_FILE...` commands if needed for OCR): + ```bash + export SSL_CERT_FILE=$(python -m certifi) # If using OCR + export REQUESTS_CA_BUNDLE=$(python -m certifi) # If using OCR + python your_agent_script.py + ``` +4. **Retry:** Due to the intermittent nature of the Lume issues, sometimes simply repeating steps 2 and 3 allows the run to succeed if the timing avoids the status reporting bug or the `lume serve` crash. + +**Note:** Improving the stability of `lume serve` is an ongoing development area. + ### How do I troubleshoot Computer not connecting to lume daemon? If you're experiencing connection issues between Computer and the lume daemon, it could be because the port 3000 (used by lume) is already in use by an orphaned process. You can diagnose this issue with: From f2ef1a9b57b75f413280b66f5c48c76bd0417453 Mon Sep 17 00:00:00 2001 From: "allcontributors[bot]" <46447321+allcontributors[bot]@users.noreply.github.com> Date: Fri, 18 Apr 2025 20:18:02 +0000 Subject: [PATCH 04/43] docs: update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index fabc5127..eb56101f 100644 --- a/README.md +++ b/README.md @@ -219,6 +219,7 @@ Apple, macOS, and Apple Silicon are trademarks of Apple Inc. Ubuntu and Canonica Ethan Gutierrez
Ethan Gutierrez

💻 Ricter Zheng
Ricter Zheng

💻 Rahul Karajgikar
Rahul Karajgikar

💻 + trospix
trospix

💻 From 7519b6af8b5129571e71eccaafb286a827e481f1 Mon Sep 17 00:00:00 2001 From: "allcontributors[bot]" <46447321+allcontributors[bot]@users.noreply.github.com> Date: Fri, 18 Apr 2025 20:18:03 +0000 Subject: [PATCH 05/43] docs: update .all-contributorsrc --- .all-contributorsrc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.all-contributorsrc b/.all-contributorsrc index 6e5d6a63..fd1e1c62 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -106,6 +106,15 @@ "contributions": [ "code" ] + }, + { + "login": "trospix", + "name": "trospix", + "avatar_url": "https://avatars.githubusercontent.com/u/81363696?v=4", + "profile": "https://github.com/trospix", + "contributions": [ + "code" + ] } ] } From 353f3cf45df13275d76b0b12fcabdccad622f040 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sat, 19 Apr 2025 09:59:32 +0200 Subject: [PATCH 06/43] Sparse file optimizations --- libs/lume/scripts/ghcr/push-ghcr.sh | 389 +++++- .../ImageContainerRegistry.swift | 1238 ++++++++++++++--- libs/lume/src/Errors/Errors.swift | 68 +- 3 files changed, 1445 insertions(+), 250 deletions(-) diff --git a/libs/lume/scripts/ghcr/push-ghcr.sh b/libs/lume/scripts/ghcr/push-ghcr.sh index fbe4ab7e..33874122 100755 --- a/libs/lume/scripts/ghcr/push-ghcr.sh +++ b/libs/lume/scripts/ghcr/push-ghcr.sh @@ -9,6 +9,8 @@ folder_path="" image_name="" image_versions="" chunk_size="500M" # Default chunk size for splitting large files +# Define the OCI media type for the compressed disk layer +oci_layer_media_type="application/octet-stream+lzfse" # Apple Archive format # Parse the command line arguments while [[ $# -gt 0 ]]; do @@ -41,6 +43,7 @@ while [[ $# -gt 0 ]]; do echo " --image-name : Name of the image to publish (required)" echo " --image-versions : Comma separated list of versions of the image to publish (required)" echo " --chunk-size : Size of chunks for large files (e.g., 500M, default: 500M)" + echo "Note: The script will automatically resume from the last attempt if available" exit 0 ;; *) @@ -69,7 +72,7 @@ if [[ ! -d "$folder_path" ]]; then fi # Check and install required tools -for tool in "oras" "split" "pv" "gzip"; do +for tool in "oras" "split" "pv" "jq"; do if ! command -v "$tool" &> /dev/null; then echo "$tool is not installed. Installing using Homebrew..." if ! command -v brew &> /dev/null; then @@ -80,80 +83,252 @@ for tool in "oras" "split" "pv" "gzip"; do fi done +# Check if Apple Archive is available +if ! command -v compression_tool &> /dev/null; then + echo "Error: Apple Archive (compression_tool) is required but not found" + echo "This script requires macOS with Apple Archive support" + exit 1 +fi + +echo "Apple Archive detected - will use for optimal sparse file handling" +compressed_ext=".aa" + # Authenticate with GitHub Container Registry echo "$GITHUB_TOKEN" | oras login ghcr.io -u "$organization" --password-stdin # Use the source folder path as the working directory and get its absolute path work_dir=$(cd "$folder_path" && pwd) -echo "Working directory (persistent cache): $work_dir" +echo "Working directory: $work_dir" -# Change to the working directory -cd "$work_dir" +# Function to find the most recent cache directory +find_latest_cache() { + local latest_cache=$(ls -td "$work_dir"/.ghcr_cache_* 2>/dev/null | head -n1) + if [ -n "$latest_cache" ]; then + echo "$latest_cache" + else + echo "" + fi +} + +# Function to check if a cache directory is valid for resuming +is_valid_cache() { + local cache_dir="$1" + # Check if it contains the necessary files + [ -f "$cache_dir/config.json" ] || [ -f "$cache_dir/nvram.bin" ] || \ + [ -f "$cache_dir/disk.img.aa" ] || ls "$cache_dir"/disk.img.aa.part.* 1>/dev/null 2>&1 +} + +# Always try to find and use an existing cache +existing_cache=$(find_latest_cache) +if [ -n "$existing_cache" ] && is_valid_cache "$existing_cache"; then + cache_dir="$existing_cache" + + # Check if the cache contains old gzip format + if [ -f "$cache_dir/disk.img.gz" ] || ls "$cache_dir"/disk.img.gz.part.* 1>/dev/null 2>&1; then + echo "Error: Found legacy gzip format in cache. This script only supports Apple Archive format." + echo "Please delete the cache directory and start fresh: $cache_dir" + exit 1 + fi + + echo "Resuming from existing cache: $cache_dir" +else + echo "No valid cache found. Starting fresh." + cache_dir="$work_dir/.ghcr_cache_$(date +%Y%m%d_%H%M%S)" + mkdir -p "$cache_dir" +fi + +echo "Using cache directory: $cache_dir" + +# Display space information +echo "=== DISK SPACE INFORMATION ===" +df -h "$cache_dir" | head -1 +df -h "$cache_dir" | grep -v "Filesystem" +echo + +# Change to the cache directory +cd "$cache_dir" files=() # Initialize files array here -# Copy config.json if it exists -if [ -f "$folder_path/config.json" ]; then - echo "Copying config.json..." - cp "$folder_path/config.json" config.json +# Function to check if a version was already pushed +version_pushed() { + local version="$1" + local version_file="$cache_dir/.pushed_$version" + [ -f "$version_file" ] +} + +# Function to mark a version as pushed +mark_version_pushed() { + local version="$1" + touch "$cache_dir/.pushed_$version" +} + +# Copy config.json if it exists and not already in cache +config_json_source="$folder_path/config.json" +config_json_dest="$cache_dir/config.json" +if [ -f "$config_json_source" ]; then + if [ ! -f "$config_json_dest" ]; then + echo "Copying config.json..." + # Add the uncompressed disk size annotation if disk.img exists and jq is available + if [ -n "$original_disk_size" ] && command -v jq &> /dev/null; then + echo "Adding uncompressed disk size annotation: $original_disk_size bytes" + jq --arg size "$original_disk_size" '.annotations += {"com.trycua.lume.disk.uncompressed_size": $size}' "$config_json_source" > "$config_json_dest" || \ + (echo "jq failed, copying original config.json"; cp "$config_json_source" "$config_json_dest") # Fallback to copy if jq fails + else + cp "$config_json_source" "$config_json_dest" + fi + fi +fi +if [ -f "$config_json_dest" ]; then files+=("config.json:application/vnd.oci.image.config.v1+json") fi -# Copy nvram.bin if it exists -nvram_bin="$folder_path/nvram.bin" -if [ -f "$nvram_bin" ]; then +# Copy nvram.bin if it exists and not already in cache +if [ -f "$folder_path/nvram.bin" ] && [ ! -f "$cache_dir/nvram.bin" ]; then echo "Copying nvram.bin..." - cp "$nvram_bin" nvram.bin + cp "$folder_path/nvram.bin" nvram.bin +fi +if [ -f "$cache_dir/nvram.bin" ]; then files+=("nvram.bin:application/octet-stream") fi # Process disk.img if it exists -disk_img_orig="disk.img" # Already in work_dir +disk_img_orig="$folder_path/disk.img" +original_disk_size="" if [ -f "$disk_img_orig" ]; then - # --- Compression Step --- - echo "Compressing $disk_img_orig..." - compressed_ext=".gz" - compressor="gzip" - compress_opts="-k -f" + # Get original size *before* compression + original_disk_size=$(stat -f%z "$disk_img_orig") + + # Get real (non-sparse) size + real_size=$(du -k "$disk_img_orig" | cut -f1) + real_size_bytes=$((real_size * 1024)) + sparseness_ratio=$(echo "scale=2; $original_disk_size / $real_size_bytes" | bc) + echo "Disk image: $disk_img_orig" + echo " Logical size: $original_disk_size bytes ($(du -h "$disk_img_orig" | cut -f1))" + echo " Actual disk usage: $((real_size_bytes / 1073741824)) GB" + echo " Sparseness ratio: ${sparseness_ratio}:1" + + # Check if we already have compressed files in the cache compressed_disk_img="disk.img${compressed_ext}" - pv "$disk_img_orig" | $compressor $compress_opts > "$compressed_disk_img" - compressed_size=$(stat -f%z "$compressed_disk_img") - echo "Compressed disk image size: $(du -h "$compressed_disk_img" | cut -f1)" - # --- End Compression Step --- - - # Check if splitting is needed based on *compressed* size - if [ $compressed_size -gt 524288000 ]; then # 500MB threshold - echo "Splitting compressed file: $compressed_disk_img" - split -b "$chunk_size" "$compressed_disk_img" "$compressed_disk_img.part." - # Keep the compressed file and parts in work_dir - - # --- Adjust part processing --- - parts_files=() - total_parts=$(ls "$compressed_disk_img.part."* | wc -l | tr -d ' ') - part_num=0 - for part in "$compressed_disk_img.part."*; do - part_num=$((part_num + 1)) - # *** IMPORTANT: Use the *compressed* OCI media type with part info *** - parts_files+=("$part:${oci_layer_media_type};part.number=$part_num;part.total=$total_parts") - echo "Part $part: $(du -h "$part" | cut -f1)" - done - # Combine non-disk files with disk parts - files+=("${parts_files[@]}") - # --- End Adjust part processing --- - - else - # Add the single compressed file to the list - # *** IMPORTANT: Use the *compressed* OCI media type *** - files+=("$compressed_disk_img:${oci_layer_media_type}") + already_compressed=false + + if [ -f "$cache_dir/$compressed_disk_img" ]; then + already_compressed=true + echo "Using existing compressed file from cache: $compressed_disk_img" + elif ls "$cache_dir"/disk.img${compressed_ext}.part.* 1>/dev/null 2>&1; then + already_compressed=true + echo "Using existing compressed parts from cache" fi - # --- Push Logic (Remains largely the same, but $files now contains compressed parts/file) --- + # Only compress if not already compressed in cache + if [ "$already_compressed" = false ]; then + # Check for free disk space before compression + avail_space=$(df -k "$cache_dir" | tail -1 | awk '{print $4}') + avail_space_bytes=$((avail_space * 1024)) + # Assume compressed size is roughly 30% of real size as a safe estimate + estimated_compressed=$((real_size_bytes * 30 / 100)) + + if [ "$avail_space_bytes" -lt "$estimated_compressed" ]; then + echo "WARNING: Possibly insufficient disk space for compression!" + echo "Available: $((avail_space_bytes / 1073741824)) GB, Estimated required: $((estimated_compressed / 1073741824)) GB" + read -p "Continue anyway? (y/n) " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + echo "Exiting. Free up some space and try again." + exit 1 + fi + fi + + # --- Compression Step --- + echo "Compressing $disk_img_orig with Apple Archive..." + + # Apple Archive compression + echo "Starting compression with Apple Archive (showing output file growth)..." + compression_tool -encode -i "$disk_img_orig" -o "$compressed_disk_img" -a lzfse & + COMP_PID=$! + + sleep 1 # Give compression a moment to start + + # Display progress based on output file growth + while kill -0 $COMP_PID 2>/dev/null; do + if [ -f "$compressed_disk_img" ]; then + current_size=$(stat -f%z "$compressed_disk_img" 2>/dev/null || echo 0) + percent=$(echo "scale=2; 100 * $current_size / $original_disk_size" | bc) + echo -ne "Progress: $percent% ($(du -h "$compressed_disk_img" 2>/dev/null | cut -f1 || echo "0"))\r" + else + echo -ne "Preparing compression...\r" + fi + sleep 2 + done + + wait $COMP_PID + echo -e "\nCompression complete!" + + compressed_size=$(stat -f%z "$compressed_disk_img") + echo "Compressed disk image size: $(du -h "$compressed_disk_img" | cut -f1)" + echo "Compression ratio: $(echo "scale=2; $compressed_size * 100 / $original_disk_size" | bc)%" + # --- End Compression Step --- + + # Check if splitting is needed based on *compressed* size + if [ $compressed_size -gt 524288000 ]; then # 500MB threshold + echo "Splitting compressed file into chunks of $chunk_size..." + pv "$compressed_disk_img" | split -b "$chunk_size" - "$compressed_disk_img.part." + rm -f "$compressed_disk_img" # Remove the unsplit compressed file + # Verify that parts were created + echo "Verifying split parts..." + ls -la "$cache_dir"/disk.img${compressed_ext}.part.* + fi + else + echo "Using existing compressed/split files from cache" + fi + + # --- Adjust part processing --- + echo "Looking for compressed files in $cache_dir..." + + # List all files in the cache directory for debugging + ls -la "$cache_dir" + + if [ -f "$cache_dir/$compressed_disk_img" ]; then + echo "Found single compressed file: $compressed_disk_img" + # Add the single compressed file to the list + files+=("$compressed_disk_img:${oci_layer_media_type}") + else + # Look for split parts + part_files=($(ls "$cache_dir"/disk.img${compressed_ext}.part.* 2>/dev/null || echo "")) + if [ ${#part_files[@]} -gt 0 ]; then + echo "Found ${#part_files[@]} split parts" + parts_files=() + part_num=0 + + for part in "${part_files[@]}"; do + part_num=$((part_num + 1)) + part_basename=$(basename "$part") + parts_files+=("$part_basename:${oci_layer_media_type};part.number=$part_num;part.total=${#part_files[@]}") + echo "Part $part_num: $(du -h "$part" | cut -f1)" + done + + files+=("${parts_files[@]}") + else + echo "ERROR: No compressed files found in cache directory: $cache_dir" + echo "Contents of cache directory:" + find "$cache_dir" -type f | sort + exit 1 + fi + fi + + # --- Push Logic --- push_pids=() IFS=',' read -ra versions <<< "$image_versions" for version in "${versions[@]}"; do - # Trim whitespace if any from version splitting + # Trim whitespace if any from version splitting version=$(echo "$version" | xargs) if [[ -z "$version" ]]; then continue; fi + # Skip if version was already pushed + if version_pushed "$version"; then + echo "Version $version was already pushed, skipping..." + continue + fi + echo "Pushing version $version..." ( # Use process substitution to feed file list safely if it gets long @@ -161,6 +336,7 @@ if [ -f "$disk_img_orig" ]; then "ghcr.io/$organization/$image_name:$version" \ "${files[@]}" echo "Completed push for version $version" + mark_version_pushed "$version" ) & push_pids+=($!) done @@ -170,37 +346,108 @@ if [ -f "$disk_img_orig" ]; then wait "$pid" done - # --- Cleanup compressed files after successful push --- - echo "Push successful, cleaning up compressed artifacts..." - # Check if parts exist first - parts_exist=$(ls "$compressed_disk_img.part."* 2>/dev/null) - if [ -n "$parts_exist" ]; then - echo "Removing split parts: $compressed_disk_img.part.* and $compressed_disk_img" - rm -f "$compressed_disk_img.part."* - # Also remove the original compressed file that was split - rm -f "$compressed_disk_img" - elif [ -f "$compressed_disk_img" ]; then - echo "Removing compressed file: $compressed_disk_img" - rm -f "$compressed_disk_img" + # --- Cleanup only if all versions were pushed successfully --- + all_versions_pushed=true + for version in "${versions[@]}"; do + version=$(echo "$version" | xargs) + if [[ -z "$version" ]]; then continue; fi + if ! version_pushed "$version"; then + all_versions_pushed=false + break + fi + done + + if [ "$all_versions_pushed" = true ]; then + echo "All versions pushed successfully, cleaning up cache directory..." + cd "$work_dir" + rm -rf "$cache_dir" + else + echo "Some versions failed to push. Cache directory preserved at: $cache_dir" + echo "Run again to resume from this point" fi - # --- End Push Logic --- else echo "Warning: $disk_img_orig not found." # Push only config/nvram if they exist if [ ${#files[@]} -gt 0 ]; then - # (Add push logic here too if you want to push even without disk.img) - echo "Pushing non-disk files..." - # ... (similar push loop as above) ... + echo "Pushing non-disk files..." + push_pids=() + IFS=',' read -ra versions <<< "$image_versions" + for version in "${versions[@]}"; do + # Trim whitespace if any from version splitting + version=$(echo "$version" | xargs) + if [[ -z "$version" ]]; then continue; fi + + # Skip if version was already pushed + if version_pushed "$version"; then + echo "Version $version was already pushed, skipping..." + continue + fi + + echo "Pushing version $version (config/nvram only)..." + ( + oras push --disable-path-validation \ + "ghcr.io/$organization/$image_name:$version" \ + "${files[@]}" + echo "Completed push for version $version" + mark_version_pushed "$version" + ) & + push_pids+=($!) + done + + # Wait for all pushes to complete + for pid in "${push_pids[@]}"; do + wait "$pid" + done + + # --- Cleanup only if all versions were pushed successfully --- + all_versions_pushed=true + for version in "${versions[@]}"; do + version=$(echo "$version" | xargs) + if [[ -z "$version" ]]; then continue; fi + if ! version_pushed "$version"; then + all_versions_pushed=false + break + fi + done + + if [ "$all_versions_pushed" = true ]; then + echo "All non-disk versions pushed successfully, cleaning up cache directory..." + cd "$work_dir" + rm -rf "$cache_dir" + else + echo "Some non-disk versions failed to push. Cache directory preserved at: $cache_dir" + echo "Run again to resume from this point" + fi else echo "No files found to push." + cd "$work_dir" + rm -rf "$cache_dir" exit 1 fi fi -for version in "${versions[@]}"; do - # Trim whitespace if any from version splitting - version=$(echo "$version" | xargs) - if [[ -z "$version" ]]; then continue; fi - echo "Upload complete: ghcr.io/$organization/$image_name:$version" -done +# Determine final status based on the success check *before* potential cleanup +echo # Add a newline for better readability +if [ "$all_versions_pushed" = true ]; then + echo "All versions pushed successfully:" + for version in "${versions[@]}"; do + version=$(echo "$version" | xargs) + if [[ -z "$version" ]]; then continue; fi + echo " Upload complete: ghcr.io/$organization/$image_name:$version" + done +else + echo "Final upload status:" + for version in "${versions[@]}"; do + version=$(echo "$version" | xargs) + if [[ -z "$version" ]]; then continue; fi + # Check the marker file only if the overall process failed (cache preserved) + if version_pushed "$version"; then + echo " Upload complete: ghcr.io/$organization/$image_name:$version" + else + echo " Upload failed: ghcr.io/$organization/$image_name:$version" + fi + done + # Exit with error code if any version failed + exit 1 +fi diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 9ef32355..ac7453ca 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -3,6 +3,18 @@ import Darwin import Foundation import Swift +// Define struct to decode relevant parts of config.json +struct OCIConfig: Codable { + struct Annotations: Codable { + let uncompressedSize: String? // Use optional String + + enum CodingKeys: String, CodingKey { + case uncompressedSize = "com.trycua.lume.disk.uncompressed_size" + } + } + let annotations: Annotations? // Optional annotations +} + struct Layer: Codable, Equatable { let mediaType: String let digest: String @@ -178,7 +190,7 @@ actor ProgressTracker { fflush(stdout) } - private func createProgressBar(progress: Double, width: Int = 20) -> String { + private func createProgressBar(progress: Double, width: Int = 30) -> String { let completedWidth = Int(progress * Double(width)) let remainingWidth = width - completedWidth @@ -279,6 +291,17 @@ class ImageContainerRegistry: @unchecked Sendable { private var activeDownloads: [String] = [] private let cachingEnabled: Bool + // Add the createProgressBar function here as a private method + private func createProgressBar(progress: Double, width: Int = 30) -> String { + let completedWidth = Int(progress * Double(width)) + let remainingWidth = width - completedWidth + + let completed = String(repeating: "█", count: completedWidth) + let remaining = String(repeating: "░", count: remainingWidth) + + return "[\(completed)\(remaining)]" + } + init(registry: String, organization: String) { self.registry = registry self.organization = organization @@ -716,7 +739,8 @@ class ImageContainerRegistry: @unchecked Sendable { let outputURL: URL switch mediaType { - case "application/vnd.oci.image.layer.v1.tar": + case "application/vnd.oci.image.layer.v1.tar", + "application/octet-stream+gzip": outputURL = tempDownloadDir.appendingPathComponent("disk.img") case "application/vnd.oci.image.config.v1+json": outputURL = tempDownloadDir.appendingPathComponent("config.json") @@ -787,33 +811,127 @@ class ImageContainerRegistry: @unchecked Sendable { let stats = await progress.getDownloadStats() Logger.info(stats.formattedSummary()) + // Parse config.json to get uncompressed size *before* reassembly + let configURL = tempDownloadDir.appendingPathComponent("config.json") + let uncompressedSize = getUncompressedSizeFromConfig(configPath: configURL) + + // Now also try to get disk size from VM config if OCI annotation not found + var vmConfigDiskSize: UInt64? = nil + if uncompressedSize == nil && FileManager.default.fileExists(atPath: configURL.path) { + do { + let configData = try Data(contentsOf: configURL) + let decoder = JSONDecoder() + if let vmConfig = try? decoder.decode(VMConfig.self, from: configData) { + vmConfigDiskSize = vmConfig.diskSize + if let size = vmConfigDiskSize { + Logger.info("Found diskSize from VM config.json: \(size) bytes") + } + } + } catch { + Logger.error("Failed to parse VM config.json for diskSize: \(error)") + } + } + + // Force explicit use + if uncompressedSize != nil { + Logger.info( + "Will use uncompressed size from annotation for sparse file: \(uncompressedSize!) bytes" + ) + } else if vmConfigDiskSize != nil { + Logger.info( + "Will use diskSize from VM config for sparse file: \(vmConfigDiskSize!) bytes") + } + // Handle disk parts if present if !diskParts.isEmpty { Logger.info("Reassembling disk image using sparse file technique...") let outputURL = tempVMDir.appendingPathComponent("disk.img") - try FileManager.default.createDirectory( - at: outputURL.deletingLastPathComponent(), withIntermediateDirectories: true) - // Ensure the output file exists but is empty - if FileManager.default.fileExists(atPath: outputURL.path) { - try FileManager.default.removeItem(at: outputURL) + // Wrap setup in do-catch for better error reporting + let outputHandle: FileHandle + do { + // 1. Ensure parent directory exists + try FileManager.default.createDirectory( + at: outputURL.deletingLastPathComponent(), withIntermediateDirectories: true + ) + + // 2. Explicitly create the file first, removing old one if needed + if FileManager.default.fileExists(atPath: outputURL.path) { + try FileManager.default.removeItem(at: outputURL) + } + guard FileManager.default.createFile(atPath: outputURL.path, contents: nil) + else { + throw PullError.fileCreationFailed(outputURL.path) + } + + // 3. Now open the handle for writing + outputHandle = try FileHandle(forWritingTo: outputURL) + + } catch { + // Catch errors during directory/file creation or handle opening + Logger.error( + "Failed during setup for disk image reassembly: \(error.localizedDescription)", + metadata: ["path": outputURL.path]) + throw PullError.reassemblySetupFailed( + path: outputURL.path, underlyingError: error) } - // Calculate expected size from the manifest layers - let expectedTotalSize = UInt64( + // Calculate expected size from the manifest layers (sum of compressed parts - for logging only now) + let expectedCompressedTotalSize = UInt64( manifest.layers.filter { extractPartInfo(from: $0.mediaType) != nil }.reduce(0) { $0 + $1.size } ) Logger.info( - "Expected download size: \(ByteCountFormatter.string(fromByteCount: Int64(expectedTotalSize), countStyle: .file)) (actual disk usage will be significantly lower)" + "Total compressed parts size: \(ByteCountFormatter.string(fromByteCount: Int64(expectedCompressedTotalSize), countStyle: .file))" ) - // Create sparse file of the required size - let outputHandle = try FileHandle(forWritingTo: outputURL) + // Calculate fallback size (sum of compressed parts) + let _: UInt64 = diskParts.reduce(UInt64(0)) { + (acc: UInt64, element) -> UInt64 in + let fileSize = + (try? FileManager.default.attributesOfItem(atPath: element.1.path)[.size] + as? UInt64 ?? 0) ?? 0 + return acc + fileSize + } + + // Use: annotation size > VM config diskSize > fallback size + let sizeForTruncate: UInt64 + if let size = uncompressedSize { + Logger.info("Using uncompressed size from annotation: \(size) bytes") + sizeForTruncate = size + } else if let size = vmConfigDiskSize { + Logger.info("Using diskSize from VM config: \(size) bytes") + sizeForTruncate = size + } else { + Logger.error( + "Missing both uncompressed size annotation and VM config diskSize for multi-part image." + ) + throw PullError.missingUncompressedSizeAnnotation + } + defer { try? outputHandle.close() } // Set the file size without writing data (creates a sparse file) - try outputHandle.truncate(atOffset: expectedTotalSize) + try outputHandle.truncate(atOffset: sizeForTruncate) + + // Verify the sparse file was created with the correct size + let initialSize = + (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] + as? UInt64) ?? 0 + Logger.info( + "Sparse file initialized with size: \(ByteCountFormatter.string(fromByteCount: Int64(initialSize), countStyle: .file))" + ) + + // Add a simple test pattern at the beginning and end of the file to verify it's writable + try outputHandle.seek(toOffset: 0) + let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! + try outputHandle.write(contentsOf: testPattern) + + try outputHandle.seek(toOffset: sizeForTruncate - UInt64(testPattern.count)) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.synchronize() + + Logger.info("Test patterns written to sparse file. File is ready for writing.") var reassemblyProgressLogger = ProgressLogger(threshold: 0.05) var currentOffset: UInt64 = 0 // Track position in the final *decompressed* file @@ -849,69 +967,252 @@ class ImageContainerRegistry: @unchecked Sendable { try outputHandle.seek(toOffset: currentOffset) if let decompressCmd = getDecompressionCommand(for: layerMediaType) { // Use extracted mediaType - Logger.info("Decompressing part \(partNum)...") + Logger.info( + "Decompressing part \(partNum) with media type: \(layerMediaType)") + + // Handle Apple Archive format + let toolPath = String(decompressCmd.dropFirst("apple_archive:".count)) + let tempOutputPath = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString) + + // Check input file size before decompression + let inputFileSize = + (try? FileManager.default.attributesOfItem(atPath: partURL.path)[.size] + as? UInt64) ?? 0 + Logger.info( + "Part \(partNum) input size: \(ByteCountFormatter.string(fromByteCount: Int64(inputFileSize), countStyle: .file))" + ) + + // Create a process that decompresses to a temporary file let process = Process() - let pipe = Pipe() - process.executableURL = URL(fileURLWithPath: "/bin/sh") - process.arguments = ["-c", "\(decompressCmd) < \"\(partURL.path)\""] // Feed file via stdin redirection - process.standardOutput = pipe // Capture decompressed output + process.executableURL = URL(fileURLWithPath: toolPath) + process.arguments = [ + "extract", "-i", partURL.path, "-o", tempOutputPath.path, + ] + // Add error output capture + let errorPipe = Pipe() + process.standardError = errorPipe + + Logger.info( + "Decompressing Apple Archive format with: \(toolPath) \(process.arguments?.joined(separator: " ") ?? "")" + ) try process.run() + process.waitUntilExit() - let reader = pipe.fileHandleForReading - var partDecompressedSize: UInt64 = 0 + // Check error output if any + let errorData = errorPipe.fileHandleForReading.readDataToEndOfFile() + if !errorData.isEmpty, + let errorString = String(data: errorData, encoding: .utf8) + { + Logger.error("Decompression error output: \(errorString)") + } + + if process.terminationStatus != 0 { + Logger.error( + "Apple Archive decompression failed with status: \(process.terminationStatus), falling back to direct copy" + ) + // Fall back to direct copying (uncompressed) + Logger.info("Copying part \(partNum) directly without decompression...") + try outputHandle.seek(toOffset: currentOffset) + + let inputHandle = try FileHandle(forReadingFrom: partURL) + defer { try? inputHandle.close() } + + var bytesWritten: UInt64 = 0 + let chunkSize = 1024 * 1024 // 1MB chunks + var chunkCount = 0 + + while true { + let data = autoreleasepool { + try! inputHandle.read(upToCount: chunkSize) ?? Data() + } + if data.isEmpty { break } + + try outputHandle.write(contentsOf: data) + bytesWritten += UInt64(data.count) + chunkCount += 1 + + // Update progress + let totalProgress = + Double(currentOffset + bytesWritten) + / Double(expectedCompressedTotalSize) + let progressBar = createProgressBar( + progress: totalProgress, width: 30) + let progressPercent = Int(totalProgress * 100) + let currentSpeed = + ByteCountFormatter.string( + fromByteCount: Int64(Double(bytesWritten) / 0.5), + countStyle: .file) + "/s" + + print( + "\r\(progressBar) \(progressPercent)% | Speed: \(currentSpeed) | Part \(partNum) | \(ByteCountFormatter.string(fromByteCount: Int64(currentOffset + bytesWritten), countStyle: .file)) ", + terminator: "") + fflush(stdout) + + // Also log to the progress logger for consistency + reassemblyProgressLogger.logProgress( + current: totalProgress, + context: "Direct copying") + } + + Logger.info( + "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" + ) + currentOffset += bytesWritten + continue + } + + // Check if the output file exists and has content + let outputExists = FileManager.default.fileExists( + atPath: tempOutputPath.path) + let outputFileSize = + outputExists + ? ((try? FileManager.default.attributesOfItem( + atPath: tempOutputPath.path)[ + .size] as? UInt64) ?? 0) : 0 + Logger.info( + "Part \(partNum) - Decompressed output exists: \(outputExists), size: \(ByteCountFormatter.string(fromByteCount: Int64(outputFileSize), countStyle: .file))" + ) + + // If decompression produced an empty file, fall back to direct copy + if outputFileSize == 0 { + Logger.info( + "Decompression resulted in empty file, falling back to direct copy for part \(partNum)" + ) + try? FileManager.default.removeItem(at: tempOutputPath) + + // Fall back to direct copying (uncompressed) + Logger.info("Copying part \(partNum) directly without decompression...") + try outputHandle.seek(toOffset: currentOffset) + + let inputHandle = try FileHandle(forReadingFrom: partURL) + defer { try? inputHandle.close() } + + var bytesWritten: UInt64 = 0 + let chunkSize = 1024 * 1024 // 1MB chunks + var chunkCount = 0 + + while true { + let data = autoreleasepool { + try! inputHandle.read(upToCount: chunkSize) ?? Data() + } + if data.isEmpty { break } + + try outputHandle.write(contentsOf: data) + bytesWritten += UInt64(data.count) + chunkCount += 1 + + // Update progress + let totalProgress = + Double(currentOffset + bytesWritten) + / Double(expectedCompressedTotalSize) + let progressBar = createProgressBar( + progress: totalProgress, width: 30) + let progressPercent = Int(totalProgress * 100) + let currentSpeed = + ByteCountFormatter.string( + fromByteCount: Int64(Double(bytesWritten) / 0.5), + countStyle: .file) + "/s" + + print( + "\r\(progressBar) \(progressPercent)% | Speed: \(currentSpeed) | Part \(partNum) | \(ByteCountFormatter.string(fromByteCount: Int64(currentOffset + bytesWritten), countStyle: .file)) ", + terminator: "") + fflush(stdout) + + // Also log to the progress logger for consistency + reassemblyProgressLogger.logProgress( + current: totalProgress, + context: "Direct copying") + } + + Logger.info( + "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" + ) + currentOffset += bytesWritten + continue + } + + // Read the decompressed file and write to our output + let tempInputHandle = try FileHandle(forReadingFrom: tempOutputPath) + defer { + try? tempInputHandle.close() + try? FileManager.default.removeItem(at: tempOutputPath) + } // Read decompressed data in chunks and write to sparse file + var partDecompressedSize: UInt64 = 0 + let chunkSize = 1024 * 1024 // 1MB chunks + var chunkCount = 0 + while true { let data = autoreleasepool { // Help manage memory with large files - reader.readData(ofLength: 1024 * 1024) // Read 1MB chunks + try! tempInputHandle.read(upToCount: chunkSize) ?? Data() } if data.isEmpty { break } // End of stream try outputHandle.write(contentsOf: data) partDecompressedSize += UInt64(data.count) + chunkCount += 1 // Update progress based on decompressed size written let totalProgress = Double(currentOffset + partDecompressedSize) - / Double(expectedTotalSize) - reassemblyProgressLogger.logProgress( - current: totalProgress, - context: "Reassembling/Decompressing") - } - process.waitUntilExit() - if process.terminationStatus != 0 { - throw PullError.decompressionFailed("Part \(partNum)") - } - currentOffset += partDecompressedSize // Advance offset by decompressed size - - } else { - // --- Handle non-compressed parts (if any, or the single file case) --- - // This part is similar to your original copy logic, writing directly - // from inputHandle to outputHandle at currentOffset - Logger.info("Copying non-compressed part \(partNum)...") - let partSize = - (try? FileManager.default.attributesOfItem(atPath: partURL.path)[.size] - as? UInt64) ?? 0 - var bytesWritten: UInt64 = 0 - let chunkSize = 1024 * 1024 - while bytesWritten < partSize { - let data = autoreleasepool { - try! inputHandle.read(upToCount: chunkSize) ?? Data() - } - if data.isEmpty { break } - try outputHandle.write(contentsOf: data) - bytesWritten += UInt64(data.count) - - // Update progress - let totalProgress = - Double(currentOffset + bytesWritten) / Double(expectedTotalSize) + / Double(expectedCompressedTotalSize) reassemblyProgressLogger.logProgress( current: totalProgress, context: "Reassembling") } + + Logger.info( + "Part \(partNum) - Wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(partDecompressedSize), countStyle: .file))" + ) + currentOffset += partDecompressedSize // Advance offset by decompressed size + } else { + // No decompression command available, try direct copy + Logger.info( + "Copying part \(partNum) directly..." + ) + try outputHandle.seek(toOffset: currentOffset) + + let inputHandle = try FileHandle(forReadingFrom: partURL) + defer { try? inputHandle.close() } + + // Get part size + let partSize = + (try? FileManager.default.attributesOfItem(atPath: partURL.path)[.size] + as? UInt64) ?? 0 + Logger.info( + "Direct copy of part \(partNum) with size: \(ByteCountFormatter.string(fromByteCount: Int64(partSize), countStyle: .file))" + ) + + var bytesWritten: UInt64 = 0 + let chunkSize = 1024 * 1024 // 1MB chunks + var chunkCount = 0 + + while true { + let data = autoreleasepool { + try! inputHandle.read(upToCount: chunkSize) ?? Data() + } + if data.isEmpty { break } + + try outputHandle.write(contentsOf: data) + bytesWritten += UInt64(data.count) + chunkCount += 1 + + // Update progress + let totalProgress = + Double(currentOffset + bytesWritten) + / Double(expectedCompressedTotalSize) + reassemblyProgressLogger.logProgress( + current: totalProgress, + context: "Direct copying") + } + + Logger.info( + "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" + ) currentOffset += bytesWritten - // --- End non-compressed handling --- } // Ensure data is written before processing next part (optional but safer) @@ -922,23 +1223,99 @@ class ImageContainerRegistry: @unchecked Sendable { reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") Logger.info("") // Newline + // Ensure output handle is closed before post-processing + try outputHandle.close() + // Verify final size let finalSize = (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] as? UInt64) ?? 0 Logger.info( - "Final disk image size (before sparse file optimization): \(ByteCountFormatter.string(fromByteCount: Int64(finalSize), countStyle: .file))" + "Final disk image size from cache (before sparse file optimization): \(ByteCountFormatter.string(fromByteCount: Int64(finalSize), countStyle: .file))" ) - Logger.info( - "Note: Actual disk usage will be much lower due to macOS sparse file system") - if finalSize != expectedTotalSize { + if finalSize != sizeForTruncate { Logger.info( - "Warning: Final reported size (\(finalSize) bytes) differs from expected size (\(expectedTotalSize) bytes), but this doesn't affect functionality" + "Warning: Final reported size (\(finalSize) bytes) differs from expected size (\(sizeForTruncate) bytes), but this doesn't affect functionality" ) } - Logger.info("Disk image reassembled successfully using sparse file technique") + // Decompress the assembled disk image if it's in LZFSE compressed format + Logger.info( + "Checking if disk image is LZFSE compressed and decompressing if needed...") + decompressLZFSEImage(inputPath: outputURL.path) + + // Create a properly formatted disk image + Logger.info("Converting assembled data to proper disk image format...") + + // Get actual disk usage of the assembled file + let assembledUsage = getActualDiskUsage(path: outputURL.path) + let bufferBytes: UInt64 = 2 * 1024 * 1024 * 1024 // 2GB buffer + let requiredSpace = assembledUsage + bufferBytes + + // Check available disk space in the destination directory + let fileManager = FileManager.default + let availableSpace = + try? fileManager.attributesOfFileSystem( + forPath: outputURL.deletingLastPathComponent().path)[.systemFreeSize] + as? UInt64 + + if let available = availableSpace, available < requiredSpace { + Logger.error( + "Insufficient disk space to convert disk image format. Skipping conversion.", + metadata: [ + "available": ByteCountFormatter.string( + fromByteCount: Int64(available), countStyle: .file), + "required": ByteCountFormatter.string( + fromByteCount: Int64(requiredSpace), countStyle: .file), + ] + ) + } else { + // Prioritize SPARSE format for better sparse file handling + Logger.info("Attempting conversion to SPARSE format...") + let process = Process() + process.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") + process.arguments = [ + "convert", + outputURL.path, // Source: our assembled file + "-format", "SPARSE", // Format: SPARSE (best for sparse images) + "-o", outputURL.path, // Output: overwrite with converted image + ] + + let errorPipe = Pipe() + process.standardError = errorPipe + process.standardOutput = errorPipe + + try process.run() + process.waitUntilExit() + + // Check for errors + let outputData = errorPipe.fileHandleForReading.readDataToEndOfFile() + if !outputData.isEmpty, + let outputString = String(data: outputData, encoding: .utf8) + { + Logger.info("hdiutil output: \(outputString)") + } + + if process.terminationStatus == 0 { + // Find the potentially renamed formatted file + let formattedFile = findFormattedFile(tempFormatted: outputURL) ?? outputURL + // If the output path is different, remove the original and move the new one + if formattedFile.path != outputURL.path { + try? FileManager.default.removeItem(at: outputURL) + try FileManager.default.moveItem(at: formattedFile, to: outputURL) + } + Logger.info("Successfully converted disk image to proper format (SPARSE)") + } else { + Logger.error( + "Failed to convert disk image to SPARSE format. VM might not start properly." + ) + // If SPARSE failed, maybe try UDRW as a last resort? + // For now, we'll just log the error. + } + } + + Logger.info("Disk image reassembly completed") } else { // Copy single disk image if it exists let diskURL = tempDownloadDir.appendingPathComponent("disk.img") @@ -996,9 +1373,9 @@ class ImageContainerRegistry: @unchecked Sendable { async throws { Logger.info("Copying from cache...") + var diskPartSources: [(Int, URL)] = [] var totalParts = 0 - var expectedTotalSize: UInt64 = 0 // First identify disk parts and non-disk files for layer in manifest.layers { @@ -1009,11 +1386,10 @@ class ImageContainerRegistry: @unchecked Sendable { totalParts = total // Just store the reference to source instead of copying diskPartSources.append((partNum, cachedLayer)) - expectedTotalSize += UInt64(layer.size) } else { let fileName: String switch layer.mediaType { - case "application/vnd.oci.image.layer.v1.tar": + case "application/vnd.oci.image.layer.v1.tar", "application/octet-stream+gzip": fileName = "disk.img" case "application/vnd.oci.image.config.v1+json": fileName = "config.json" @@ -1032,14 +1408,76 @@ class ImageContainerRegistry: @unchecked Sendable { // Reassemble disk parts if needed if !diskPartSources.isEmpty { + // Get the uncompressed size from cached config + let configDigest = manifest.config?.digest + let cachedConfigPath = + configDigest != nil + ? getCachedLayerPath(manifestId: manifestId, digest: configDigest!) : nil + let uncompressedSize = cachedConfigPath.flatMap { + getUncompressedSizeFromConfig(configPath: $0) + } + + // Try to get disk size from VM config if OCI annotation not found + var vmConfigDiskSize: UInt64? = nil + if uncompressedSize == nil { + // Find config.json in the copied files + let vmConfigPath = destination.appendingPathComponent("config.json") + if FileManager.default.fileExists(atPath: vmConfigPath.path) { + do { + let configData = try Data(contentsOf: vmConfigPath) + let decoder = JSONDecoder() + if let vmConfig = try? decoder.decode(VMConfig.self, from: configData) { + vmConfigDiskSize = vmConfig.diskSize + if let size = vmConfigDiskSize { + Logger.info( + "Found diskSize from cached VM config.json: \(size) bytes") + } + } + } catch { + Logger.error("Failed to parse cached VM config.json for diskSize: \(error)") + } + } + } + + // Force explicit use + if uncompressedSize != nil { + Logger.info( + "Will use uncompressed size from annotation for sparse file: \(uncompressedSize!) bytes" + ) + } else if vmConfigDiskSize != nil { + Logger.info( + "Will use diskSize from VM config for sparse file: \(vmConfigDiskSize!) bytes") + } + Logger.info( "Reassembling disk image from cached parts using sparse file technique..." ) let outputURL = destination.appendingPathComponent("disk.img") - // Ensure the output file exists but is empty - if FileManager.default.fileExists(atPath: outputURL.path) { - try FileManager.default.removeItem(at: outputURL) + // Wrap setup in do-catch for better error reporting + let outputHandle: FileHandle + do { + // 1. Ensure parent directory exists + try FileManager.default.createDirectory( + at: outputURL.deletingLastPathComponent(), withIntermediateDirectories: true) + + // 2. Explicitly create the file first, removing old one if needed + if FileManager.default.fileExists(atPath: outputURL.path) { + try FileManager.default.removeItem(at: outputURL) + } + guard FileManager.default.createFile(atPath: outputURL.path, contents: nil) else { + throw PullError.fileCreationFailed(outputURL.path) + } + + // 3. Now open the handle for writing + outputHandle = try FileHandle(forWritingTo: outputURL) + + } catch { + // Catch errors during directory/file creation or handle opening + Logger.error( + "Failed during setup for disk image reassembly: \(error.localizedDescription)", + metadata: ["path": outputURL.path]) + throw PullError.reassemblySetupFailed(path: outputURL.path, underlyingError: error) } // Calculate expected total size from the cached files @@ -1054,13 +1492,6 @@ class ImageContainerRegistry: @unchecked Sendable { "Expected download size from cache: \(ByteCountFormatter.string(fromByteCount: Int64(expectedTotalSize), countStyle: .file)) (actual disk usage will be lower)" ) - // Create sparse file of the required size - let outputHandle = try FileHandle(forWritingTo: outputURL) - defer { try? outputHandle.close() } - - // Set the file size without writing data (creates a sparse file) - try outputHandle.truncate(atOffset: expectedTotalSize) - var reassemblyProgressLogger = ProgressLogger(threshold: 0.05) var currentOffset: UInt64 = 0 // Track position in the final *decompressed* file @@ -1090,68 +1521,245 @@ class ImageContainerRegistry: @unchecked Sendable { try outputHandle.seek(toOffset: currentOffset) if let decompressCmd = getDecompressionCommand(for: layerMediaType) { // Use extracted mediaType - Logger.info("Decompressing part \(partNum)...") + Logger.info("Decompressing part \(partNum) with media type: \(layerMediaType)") + + // Handle Apple Archive format + let toolPath = String(decompressCmd.dropFirst("apple_archive:".count)) + let tempOutputPath = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString) + + // Check input file size before decompression + let inputFileSize = + (try? FileManager.default.attributesOfItem(atPath: sourceURL.path)[.size] + as? UInt64) ?? 0 + Logger.info( + "Part \(partNum) input size: \(ByteCountFormatter.string(fromByteCount: Int64(inputFileSize), countStyle: .file))" + ) + + // Create a process that decompresses to a temporary file let process = Process() - let pipe = Pipe() - process.executableURL = URL(fileURLWithPath: "/bin/sh") - process.arguments = ["-c", "\(decompressCmd) < \"\(sourceURL.path)\""] // Feed file via stdin redirection - process.standardOutput = pipe // Capture decompressed output + process.executableURL = URL(fileURLWithPath: toolPath) + process.arguments = [ + "extract", "-i", sourceURL.path, "-o", tempOutputPath.path, + ] + // Add error output capture + let errorPipe = Pipe() + process.standardError = errorPipe + + Logger.info( + "Decompressing Apple Archive format with: \(toolPath) \(process.arguments?.joined(separator: " ") ?? "")" + ) try process.run() + process.waitUntilExit() - let reader = pipe.fileHandleForReading - var partDecompressedSize: UInt64 = 0 + // Check error output if any + let errorData = errorPipe.fileHandleForReading.readDataToEndOfFile() + if !errorData.isEmpty, + let errorString = String(data: errorData, encoding: .utf8) + { + Logger.error("Decompression error output: \(errorString)") + } + + if process.terminationStatus != 0 { + Logger.error( + "Apple Archive decompression failed with status: \(process.terminationStatus), falling back to direct copy" + ) + // Fall back to direct copying (uncompressed) + Logger.info("Copying part \(partNum) directly without decompression...") + try outputHandle.seek(toOffset: currentOffset) + + let inputHandle = try FileHandle(forReadingFrom: sourceURL) + defer { try? inputHandle.close() } + + var bytesWritten: UInt64 = 0 + let chunkSize = 1024 * 1024 // 1MB chunks + var chunkCount = 0 + + while true { + let data = autoreleasepool { + try! inputHandle.read(upToCount: chunkSize) ?? Data() + } + if data.isEmpty { break } + + try outputHandle.write(contentsOf: data) + bytesWritten += UInt64(data.count) + chunkCount += 1 + + // Update progress + let totalProgress = + Double(currentOffset + bytesWritten) / Double(expectedTotalSize) + let progressBar = createProgressBar(progress: totalProgress, width: 30) + let progressPercent = Int(totalProgress * 100) + let currentSpeed = + ByteCountFormatter.string( + fromByteCount: Int64(Double(bytesWritten) / 0.5), + countStyle: .file) + "/s" + + print( + "\r\(progressBar) \(progressPercent)% | Speed: \(currentSpeed) | Part \(partNum) | \(ByteCountFormatter.string(fromByteCount: Int64(currentOffset + bytesWritten), countStyle: .file)) ", + terminator: "") + fflush(stdout) + + // Also log to the progress logger for consistency + reassemblyProgressLogger.logProgress( + current: totalProgress, + context: "Direct copying") + } + + Logger.info( + "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" + ) + currentOffset += bytesWritten + continue + } + + // Check if the output file exists and has content + let outputExists = FileManager.default.fileExists(atPath: tempOutputPath.path) + let outputFileSize = + outputExists + ? ((try? FileManager.default.attributesOfItem(atPath: tempOutputPath.path)[ + .size] as? UInt64) ?? 0) : 0 + Logger.info( + "Part \(partNum) - Decompressed output exists: \(outputExists), size: \(ByteCountFormatter.string(fromByteCount: Int64(outputFileSize), countStyle: .file))" + ) + + // If decompression produced an empty file, fall back to direct copy + if outputFileSize == 0 { + Logger.info( + "Decompression resulted in empty file, falling back to direct copy for part \(partNum)" + ) + try? FileManager.default.removeItem(at: tempOutputPath) + + // Fall back to direct copying (uncompressed) + Logger.info("Copying part \(partNum) directly without decompression...") + try outputHandle.seek(toOffset: currentOffset) + + let inputHandle = try FileHandle(forReadingFrom: sourceURL) + defer { try? inputHandle.close() } + + var bytesWritten: UInt64 = 0 + let chunkSize = 1024 * 1024 // 1MB chunks + var chunkCount = 0 + + while true { + let data = autoreleasepool { + try! inputHandle.read(upToCount: chunkSize) ?? Data() + } + if data.isEmpty { break } + + try outputHandle.write(contentsOf: data) + bytesWritten += UInt64(data.count) + chunkCount += 1 + + // Update progress + let totalProgress = + Double(currentOffset + bytesWritten) / Double(expectedTotalSize) + let progressBar = createProgressBar(progress: totalProgress, width: 30) + let progressPercent = Int(totalProgress * 100) + let currentSpeed = + ByteCountFormatter.string( + fromByteCount: Int64(Double(bytesWritten) / 0.5), + countStyle: .file) + "/s" + + print( + "\r\(progressBar) \(progressPercent)% | Speed: \(currentSpeed) | Part \(partNum) | \(ByteCountFormatter.string(fromByteCount: Int64(currentOffset + bytesWritten), countStyle: .file)) ", + terminator: "") + fflush(stdout) + + // Also log to the progress logger for consistency + reassemblyProgressLogger.logProgress( + current: totalProgress, + context: "Direct copying") + } + + Logger.info( + "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" + ) + currentOffset += bytesWritten + continue + } + + // Read the decompressed file and write to our output + let tempInputHandle = try FileHandle(forReadingFrom: tempOutputPath) + defer { + try? tempInputHandle.close() + try? FileManager.default.removeItem(at: tempOutputPath) + } // Read decompressed data in chunks and write to sparse file + var partDecompressedSize: UInt64 = 0 + let chunkSize = 1024 * 1024 // 1MB chunks + var chunkCount = 0 + while true { let data = autoreleasepool { // Help manage memory with large files - reader.readData(ofLength: 1024 * 1024) // Read 1MB chunks + try! tempInputHandle.read(upToCount: chunkSize) ?? Data() } if data.isEmpty { break } // End of stream try outputHandle.write(contentsOf: data) partDecompressedSize += UInt64(data.count) + chunkCount += 1 // Update progress based on decompressed size written let totalProgress = - Double(currentOffset + partDecompressedSize) / Double(expectedTotalSize) + Double(currentOffset + partDecompressedSize) + / Double(expectedTotalSize) reassemblyProgressLogger.logProgress( current: totalProgress, context: "Reassembling") } - process.waitUntilExit() - if process.terminationStatus != 0 { - throw PullError.decompressionFailed("Part \(partNum)") - } - currentOffset += partDecompressedSize // Advance offset by decompressed size + Logger.info( + "Part \(partNum) - Wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(partDecompressedSize), countStyle: .file))" + ) + currentOffset += partDecompressedSize // Advance offset by decompressed size } else { - // --- Handle non-compressed parts (if any, or the single file case) --- - // This part is similar to your original copy logic, writing directly - // from inputHandle to outputHandle at currentOffset - Logger.info("Copying non-compressed part \(partNum)...") + // No decompression command available, try direct copy + Logger.info( + "Copying part \(partNum) directly..." + ) + try outputHandle.seek(toOffset: currentOffset) + + let inputHandle = try FileHandle(forReadingFrom: sourceURL) + defer { try? inputHandle.close() } + + // Get part size let partSize = (try? FileManager.default.attributesOfItem(atPath: sourceURL.path)[.size] as? UInt64) ?? 0 + Logger.info( + "Direct copy of part \(partNum) with size: \(ByteCountFormatter.string(fromByteCount: Int64(partSize), countStyle: .file))" + ) + var bytesWritten: UInt64 = 0 - let chunkSize = 1024 * 1024 - while bytesWritten < partSize { + let chunkSize = 1024 * 1024 // 1MB chunks + var chunkCount = 0 + + while true { let data = autoreleasepool { try! inputHandle.read(upToCount: chunkSize) ?? Data() } if data.isEmpty { break } + try outputHandle.write(contentsOf: data) bytesWritten += UInt64(data.count) + chunkCount += 1 // Update progress let totalProgress = - Double(currentOffset + bytesWritten) / Double(expectedTotalSize) + Double(currentOffset + bytesWritten) + / Double(expectedTotalSize) reassemblyProgressLogger.logProgress( current: totalProgress, - context: "Reassembling") + context: "Direct copying") } + + Logger.info( + "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" + ) currentOffset += bytesWritten - // --- End non-compressed handling --- } // Ensure data is written before processing next part (optional but safer) @@ -1162,10 +1770,13 @@ class ImageContainerRegistry: @unchecked Sendable { reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") Logger.info("") // Newline + // Ensure output handle is closed before post-processing + try outputHandle.close() + // Verify final size let finalSize = - (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] as? UInt64) - ?? 0 + (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] + as? UInt64) ?? 0 Logger.info( "Final disk image size from cache (before sparse file optimization): \(ByteCountFormatter.string(fromByteCount: Int64(finalSize), countStyle: .file))" ) @@ -1176,8 +1787,79 @@ class ImageContainerRegistry: @unchecked Sendable { ) } - Logger.info( - "Disk image reassembled successfully from cache using sparse file technique") + // Decompress the assembled disk image if it's in LZFSE compressed format + Logger.info("Checking if disk image is LZFSE compressed and decompressing if needed...") + decompressLZFSEImage(inputPath: outputURL.path) + + // Create a properly formatted disk image + Logger.info("Converting assembled data to proper disk image format...") + + // Get actual disk usage of the assembled file + let assembledUsage = getActualDiskUsage(path: outputURL.path) + let bufferBytes: UInt64 = 2 * 1024 * 1024 * 1024 // 2GB buffer + let requiredSpace = assembledUsage + bufferBytes + + // Check available disk space in the destination directory + let fileManager = FileManager.default + let availableSpace = + try? fileManager.attributesOfFileSystem( + forPath: outputURL.deletingLastPathComponent().path)[.systemFreeSize] as? UInt64 + + if let available = availableSpace, available < requiredSpace { + Logger.error( + "Insufficient disk space to convert disk image format. Skipping conversion.", + metadata: [ + "available": ByteCountFormatter.string( + fromByteCount: Int64(available), countStyle: .file), + "required": ByteCountFormatter.string( + fromByteCount: Int64(requiredSpace), countStyle: .file), + ] + ) + } else { + // Prioritize SPARSE format for better sparse file handling + Logger.info("Attempting conversion to SPARSE format...") + let process = Process() + process.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") + process.arguments = [ + "convert", + outputURL.path, // Source: our assembled file + "-format", "SPARSE", // Format: SPARSE (best for sparse images) + "-o", outputURL.path, // Output: overwrite with converted image + ] + + let errorPipe = Pipe() + process.standardError = errorPipe + process.standardOutput = errorPipe + + try process.run() + process.waitUntilExit() + + // Check for errors + let outputData = errorPipe.fileHandleForReading.readDataToEndOfFile() + if !outputData.isEmpty, let outputString = String(data: outputData, encoding: .utf8) + { + Logger.info("hdiutil output: \(outputString)") + } + + if process.terminationStatus == 0 { + // Find the potentially renamed formatted file + let formattedFile = findFormattedFile(tempFormatted: outputURL) ?? outputURL + // If the output path is different, remove the original and move the new one + if formattedFile.path != outputURL.path { + try? FileManager.default.removeItem(at: outputURL) + try FileManager.default.moveItem(at: formattedFile, to: outputURL) + } + Logger.info("Successfully converted disk image to proper format (SPARSE)") + } else { + Logger.error( + "Failed to convert disk image to SPARSE format. VM might not start properly." + ) + // If SPARSE failed, maybe try UDRW as a last resort? + // For now, we'll just log the error. + } + } + + Logger.info("Disk image reassembly completed") } Logger.info("Cache copy complete") @@ -1307,70 +1989,6 @@ class ImageContainerRegistry: @unchecked Sendable { throw lastError ?? PullError.layerDownloadFailed(digest) } - private func decompressGzipFile(at source: URL, to destination: URL) throws { - Logger.info("Decompressing \(source.lastPathComponent)...") - let process = Process() - process.executableURL = URL(fileURLWithPath: "/usr/bin/gunzip") - process.arguments = ["-c"] - - let inputPipe = Pipe() - let outputPipe = Pipe() - process.standardInput = inputPipe - process.standardOutput = outputPipe - - try process.run() - - // Read and pipe the gzipped file in chunks to avoid memory issues - let inputHandle = try FileHandle(forReadingFrom: source) - let outputHandle = try FileHandle(forWritingTo: destination) - defer { - try? inputHandle.close() - try? outputHandle.close() - } - - // Create the output file - FileManager.default.createFile(atPath: destination.path, contents: nil) - - // Process with optimal chunk size - let chunkSize = getOptimalChunkSize() - while let chunk = try inputHandle.read(upToCount: chunkSize) { - try autoreleasepool { - try inputPipe.fileHandleForWriting.write(contentsOf: chunk) - - // Read and write output in chunks as well - while let decompressedChunk = try outputPipe.fileHandleForReading.read( - upToCount: chunkSize) - { - try outputHandle.write(contentsOf: decompressedChunk) - } - } - } - - try inputPipe.fileHandleForWriting.close() - - // Read any remaining output - while let decompressedChunk = try outputPipe.fileHandleForReading.read(upToCount: chunkSize) - { - try autoreleasepool { - try outputHandle.write(contentsOf: decompressedChunk) - } - } - - process.waitUntilExit() - - if process.terminationStatus != 0 { - throw PullError.decompressionFailed(source.lastPathComponent) - } - - // Verify the decompressed size - let decompressedSize = - try FileManager.default.attributesOfItem(atPath: destination.path)[.size] as? UInt64 - ?? 0 - Logger.info( - "Decompressed size: \(ByteCountFormatter.string(fromByteCount: Int64(decompressedSize), countStyle: .file))" - ) - } - private func extractPartInfo(from mediaType: String) -> (partNum: Int, total: Int)? { let pattern = #"part\.number=(\d+);part\.total=(\d+)"# guard let regex = try? NSRegularExpression(pattern: pattern), @@ -1699,19 +2317,47 @@ class ImageContainerRegistry: @unchecked Sendable { // Add helper to check media type and get decompress command private func getDecompressionCommand(for mediaType: String) -> String? { - if mediaType.hasSuffix("+gzip") { - return "/usr/bin/gunzip -c" // -c writes to stdout - } else if mediaType.hasSuffix("+zstd") { - // Check if zstd exists, otherwise handle error? - // Assuming brew install zstd -> /opt/homebrew/bin/zstd or /usr/local/bin/zstd - let zstdPath = findExecutablePath(named: "zstd") ?? "/usr/local/bin/zstd" - return "\(zstdPath) -dc" // -d decompress, -c stdout + // Determine appropriate decompression command based on layer media type + Logger.info("Determining decompression command for media type: \(mediaType)") + + // For the specific format that appears in our GHCR repository, skip decompression attempts + // These files are labeled +lzfse but aren't actually in Apple Archive format + if mediaType.contains("+lzfse;part.number=") { + Logger.info("Detected LZFSE part file, using direct copy instead of decompression") + return nil + } + + // Check for LZFSE or Apple Archive format anywhere in the media type string + // The format may include part information like: application/octet-stream+lzfse;part.number=1;part.total=38 + if mediaType.contains("+lzfse") || mediaType.contains("+aa") { + // Apple Archive format requires special handling + if let aaPath = findExecutablePath(for: "aa") { + Logger.info("Found Apple Archive tool at: \(aaPath)") + return "apple_archive:\(aaPath)" + } else { + Logger.error( + "Apple Archive tool (aa) not found in PATH, falling back to default path") + + // Check if the default path exists + let defaultPath = "/usr/bin/aa" + if FileManager.default.isExecutableFile(atPath: defaultPath) { + Logger.info("Default Apple Archive tool exists at: \(defaultPath)") + } else { + Logger.error("Default Apple Archive tool not found at: \(defaultPath)") + } + + return "apple_archive:/usr/bin/aa" + } + } else { + Logger.info( + "Unsupported media type: \(mediaType) - only Apple Archive (+lzfse/+aa) is supported" + ) + return nil } - return nil // Not compressed or unknown compression } // Helper to find executables (optional, or hardcode paths) - private func findExecutablePath(named executableName: String) -> String? { + private func findExecutablePath(for executableName: String) -> String? { let pathEnv = ProcessInfo.processInfo.environment["PATH"] ?? "/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/bin:/opt/homebrew/bin" @@ -1726,4 +2372,250 @@ class ImageContainerRegistry: @unchecked Sendable { } return nil } + + // Helper function to extract uncompressed disk size from config.json + private func getUncompressedSizeFromConfig(configPath: URL) -> UInt64? { + guard FileManager.default.fileExists(atPath: configPath.path) else { + Logger.info("Config file not found: \(configPath.path)") + return nil + } + + do { + let configData = try Data(contentsOf: configPath) + let decoder = JSONDecoder() + let ociConfig = try decoder.decode(OCIConfig.self, from: configData) + + if let sizeString = ociConfig.annotations?.uncompressedSize, + let size = UInt64(sizeString) + { + Logger.info("Found uncompressed disk size annotation: \(size) bytes") + return size + } else { + Logger.info("No uncompressed disk size annotation found in config.json") + return nil + } + } catch { + Logger.error("Failed to parse config.json for uncompressed size: \(error)") + return nil + } + } + + // Helper function to find formatted file with potential extensions + private func findFormattedFile(tempFormatted: URL) -> URL? { + // Check for the exact path first + if FileManager.default.fileExists(atPath: tempFormatted.path) { + return tempFormatted + } + + // Check with .dmg extension + let dmgPath = tempFormatted.path + ".dmg" + if FileManager.default.fileExists(atPath: dmgPath) { + return URL(fileURLWithPath: dmgPath) + } + + // Check with .sparseimage extension + let sparsePath = tempFormatted.path + ".sparseimage" + if FileManager.default.fileExists(atPath: sparsePath) { + return URL(fileURLWithPath: sparsePath) + } + + // Try to find any file with the same basename + do { + let files = try FileManager.default.contentsOfDirectory( + at: tempFormatted.deletingLastPathComponent(), + includingPropertiesForKeys: nil) + if let matchingFile = files.first(where: { + $0.lastPathComponent.starts(with: tempFormatted.lastPathComponent) + }) { + return matchingFile + } + } catch { + Logger.error("Failed to list directory contents: \(error)") + } + + return nil + } + + // Helper function to decompress LZFSE compressed disk image + @discardableResult + private func decompressLZFSEImage(inputPath: String, outputPath: String? = nil) -> Bool { + Logger.info("Attempting to decompress LZFSE compressed disk image using sparse pipe...") + + let finalOutputPath = outputPath ?? inputPath // If outputPath is nil, we'll overwrite input + let tempFinalPath = finalOutputPath + ".ddsparse.tmp" // Temporary name during dd operation + + // Ensure the temporary file doesn't exist from a previous failed run + try? FileManager.default.removeItem(atPath: tempFinalPath) + + // Process 1: compression_tool + let process1 = Process() + process1.executableURL = URL(fileURLWithPath: "/usr/bin/compression_tool") + process1.arguments = [ + "-decode", + "-i", inputPath, + "-o", "/dev/stdout", // Write to standard output + ] + + // Process 2: dd + let process2 = Process() + process2.executableURL = URL(fileURLWithPath: "/bin/dd") + process2.arguments = [ + "if=/dev/stdin", // Read from standard input + "of=\(tempFinalPath)", // Write to the temporary final path + "conv=sparse", // Use sparse conversion + "bs=1m", // Use a reasonable block size (e.g., 1MB) + ] + + // Create pipes + let pipe = Pipe() // Connects process1 stdout to process2 stdin + let errorPipe1 = Pipe() + let errorPipe2 = Pipe() + + process1.standardOutput = pipe + process1.standardError = errorPipe1 + + process2.standardInput = pipe + process2.standardError = errorPipe2 + + do { + Logger.info("Starting decompression pipe: compression_tool | dd conv=sparse...") + // Start processes + try process1.run() + try process2.run() + + // Close the write end of the pipe for process2 to prevent hanging + // This might not be strictly necessary if process1 exits cleanly, but safer. + // Note: Accessing fileHandleForWriting after run can be tricky. + // We rely on process1 exiting to signal EOF to process2. + + process1.waitUntilExit() + process2.waitUntilExit() // Wait for dd to finish processing the stream + + // --- Check for errors --- + let errorData1 = errorPipe1.fileHandleForReading.readDataToEndOfFile() + if !errorData1.isEmpty, + let errorString = String(data: errorData1, encoding: .utf8)?.trimmingCharacters( + in: .whitespacesAndNewlines), !errorString.isEmpty + { + Logger.error("compression_tool stderr: \(errorString)") + } + let errorData2 = errorPipe2.fileHandleForReading.readDataToEndOfFile() + if !errorData2.isEmpty, + let errorString = String(data: errorData2, encoding: .utf8)?.trimmingCharacters( + in: .whitespacesAndNewlines), !errorString.isEmpty + { + // dd often reports blocks in/out to stderr, filter that if needed, but log for now + Logger.info("dd stderr: \(errorString)") + } + + // Check termination statuses + let status1 = process1.terminationStatus + let status2 = process2.terminationStatus + + if status1 != 0 || status2 != 0 { + Logger.error( + "Pipe command failed. compression_tool status: \(status1), dd status: \(status2)" + ) + try? FileManager.default.removeItem(atPath: tempFinalPath) // Clean up failed attempt + return false + } + + // --- Validation --- + if FileManager.default.fileExists(atPath: tempFinalPath) { + let fileSize = + (try? FileManager.default.attributesOfItem(atPath: tempFinalPath)[.size] + as? UInt64) ?? 0 + let actualUsage = getActualDiskUsage(path: tempFinalPath) + Logger.info( + "Piped decompression successful - Allocated: \(ByteCountFormatter.string(fromByteCount: Int64(fileSize), countStyle: .file)), Actual Usage: \(ByteCountFormatter.string(fromByteCount: Int64(actualUsage), countStyle: .file))" + ) + + // Basic header validation + var isValid = false + if let fileHandle = FileHandle(forReadingAtPath: tempFinalPath) { + if let data = try? fileHandle.read(upToCount: 512), data.count >= 512, + data[510] == 0x55 && data[511] == 0xAA + { + isValid = true + } + // Ensure handle is closed regardless of validation outcome + try? fileHandle.close() + } else { + Logger.error( + "Validation Error: Could not open decompressed file handle for reading.") + } + + if isValid { + Logger.info("Decompressed file appears to be a valid disk image.") + + // Move the final file into place + // If outputPath was nil, we need to replace the original inputPath + if outputPath == nil { + // Backup original only if it's different from the temp path + if inputPath != tempFinalPath { + try? FileManager.default.copyItem( + at: URL(fileURLWithPath: inputPath), + to: URL(fileURLWithPath: inputPath + ".compressed.bak")) + try? FileManager.default.removeItem(at: URL(fileURLWithPath: inputPath)) + } + try FileManager.default.moveItem( + at: URL(fileURLWithPath: tempFinalPath), + to: URL(fileURLWithPath: inputPath)) + Logger.info("Replaced original file with sparsely decompressed version.") + } else { + // If outputPath was specified, move it there (overwrite if needed) + try? FileManager.default.removeItem( + at: URL(fileURLWithPath: finalOutputPath)) // Remove existing if overwriting + try FileManager.default.moveItem( + at: URL(fileURLWithPath: tempFinalPath), + to: URL(fileURLWithPath: finalOutputPath)) + Logger.info("Moved sparsely decompressed file to: \(finalOutputPath)") + } + return true + } else { + Logger.error( + "Validation failed: Decompressed file header is invalid or file couldn't be read. Cleaning up." + ) + try? FileManager.default.removeItem(atPath: tempFinalPath) + return false + } + } else { + Logger.error( + "Piped decompression failed: Output file '\(tempFinalPath)' not found after dd completed." + ) + return false + } + + } catch { + Logger.error("Error running decompression pipe command: \(error)") + try? FileManager.default.removeItem(atPath: tempFinalPath) // Clean up on error + return false + } + } + + // Helper function to get actual disk usage of a file + private func getActualDiskUsage(path: String) -> UInt64 { + let task = Process() + task.executableURL = URL(fileURLWithPath: "/usr/bin/du") + task.arguments = ["-k", path] // -k for 1024-byte blocks + + let pipe = Pipe() + task.standardOutput = pipe + + do { + try task.run() + task.waitUntilExit() + + let data = pipe.fileHandleForReading.readDataToEndOfFile() + if let output = String(data: data, encoding: .utf8), + let size = UInt64(output.split(separator: "\t").first ?? "0") + { + return size * 1024 // Convert from KB to bytes + } + } catch { + Logger.error("Failed to get actual disk usage: \(error)") + } + + return 0 + } } diff --git a/libs/lume/src/Errors/Errors.swift b/libs/lume/src/Errors/Errors.swift index 91b04950..b6568c10 100644 --- a/libs/lume/src/Errors/Errors.swift +++ b/libs/lume/src/Errors/Errors.swift @@ -5,6 +5,15 @@ enum HomeError: Error, LocalizedError { case directoryAccessDenied(path: String) case invalidHomeDirectory case directoryAlreadyExists(path: String) + case homeNotFound + case defaultStorageNotDefined + case storageLocationNotFound(String) + case storageLocationNotADirectory(String) + case storageLocationNotWritable(String) + case invalidStorageLocation(String) + case cannotCreateDirectory(String) + case cannotGetVMsDirectory + case vmDirectoryNotFound(String) var errorDescription: String? { switch self { @@ -16,6 +25,24 @@ enum HomeError: Error, LocalizedError { return "Invalid home directory configuration" case .directoryAlreadyExists(let path): return "Directory already exists at path: \(path)" + case .homeNotFound: + return "Home directory not found." + case .defaultStorageNotDefined: + return "Default storage location is not defined." + case .storageLocationNotFound(let path): + return "Storage location not found: \(path)" + case .storageLocationNotADirectory(let path): + return "Storage location is not a directory: \(path)" + case .storageLocationNotWritable(let path): + return "Storage location is not writable: \(path)" + case .invalidStorageLocation(let path): + return "Invalid storage location specified: \(path)" + case .cannotCreateDirectory(let path): + return "Cannot create directory: \(path)" + case .cannotGetVMsDirectory: + return "Cannot determine the VMs directory." + case .vmDirectoryNotFound(let path): + return "VM directory not found: \(path)" } } } @@ -28,23 +55,32 @@ enum PullError: Error, LocalizedError { case missingPart(Int) case decompressionFailed(String) case reassemblyFailed(String) + case fileCreationFailed(String) + case reassemblySetupFailed(path: String, underlyingError: Error) + case missingUncompressedSizeAnnotation var errorDescription: String? { switch self { case .invalidImageFormat: return "Invalid image format. Expected format: name:tag" case .tokenFetchFailed: - return "Failed to obtain authentication token" + return "Failed to fetch authentication token from registry." case .manifestFetchFailed: - return "Failed to fetch manifest" + return "Failed to fetch image manifest from registry." case .layerDownloadFailed(let digest): return "Failed to download layer: \(digest)" - case .missingPart(let number): - return "Missing disk image part \(number)" - case .decompressionFailed(let filename): - return "Failed to decompress file: \(filename)" + case .missingPart(let partNum): + return "Missing required part number \(partNum) for reassembly." + case .decompressionFailed(let file): + return "Failed to decompress file: \(file)" case .reassemblyFailed(let reason): return "Disk image reassembly failed: \(reason)." + case .fileCreationFailed(let path): + return "Failed to create the necessary file at path: \(path)" + case .reassemblySetupFailed(let path, let underlyingError): + return "Failed to set up for reassembly at path: \(path). Underlying error: \(underlyingError.localizedDescription)" + case .missingUncompressedSizeAnnotation: + return "Could not find the required uncompressed disk size annotation in the image config.json." } } } @@ -165,4 +201,24 @@ enum VMError: Error, LocalizedError { return "Invalid display resolution: \(resolution)" } } +} + +enum ResticError: Error { + case snapshotFailed(String) + case restoreFailed(String) + case genericError(String) +} + +enum VmrunError: Error, LocalizedError { + case commandNotFound + case operationFailed(command: String, output: String?) + + var errorDescription: String? { + switch self { + case .commandNotFound: + return "vmrun command not found. Ensure VMware Fusion is installed and in the system PATH." + case .operationFailed(let command, let output): + return "vmrun command '\(command)' failed. Output: \(output ?? "No output")" + } + } } \ No newline at end of file From a27fd1354ac793b66a7d70ab673cace90e97ec55 Mon Sep 17 00:00:00 2001 From: trospix Date: Sat, 19 Apr 2025 13:04:29 +0100 Subject: [PATCH 07/43] Address review feedback for PR #114 --- docs/FAQ.md | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/docs/FAQ.md b/docs/FAQ.md index 913e665c..8b89c3fa 100644 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -38,10 +38,10 @@ ssl.SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verif ``` **Cause:** -This usually happens when EasyOCR attempts to download its language models over HTTPS. Python's SSL module cannot verify the server's certificate because it can't locate the necessary root Certificate Authority (CA) certificates in your environment's trust store. +This usually happens when EasyOCR attempts to download its language models over HTTPS for the first time. Python's SSL module cannot verify the server's certificate because it can't locate the necessary root Certificate Authority (CA) certificates in your environment's trust store. **Solution:** -You need to explicitly tell Python where to find a trusted CA bundle. The `certifi` package provides one. Before running your Python agent script, set the following environment variables in the *same terminal session*: +You need to explicitly tell Python where to find a trusted CA bundle. The `certifi` package provides one. Before running your Python agent script **the first time it needs to download models**, set the following environment variables in the *same terminal session*: ```bash # Ensure certifi is installed: pip show certifi export SSL_CERT_FILE=$(python -m certifi) @@ -50,7 +50,7 @@ export REQUESTS_CA_BUNDLE=$(python -m certifi) # Now run your Python script that uses the agent... # python your_agent_script.py ``` -This directs Python to use the CA bundle provided by `certifi` for SSL verification. +This directs Python to use the CA bundle provided by `certifi` for SSL verification. **Note:** Once EasyOCR has successfully downloaded its models, you typically do not need to set these environment variables before every subsequent run. ### How do I troubleshoot the agent failing to get the VM IP address or getting stuck on "VM status changed to: stopped"? @@ -87,14 +87,20 @@ This is typically due to known instability issues with the `lume serve` backgrou lume serve ``` *(Watch this terminal to ensure it stays running).* - * **Terminal 2:** Run your agent script (including the `export SSL_CERT_FILE...` commands if needed for OCR): + * **Terminal 2:** Run your agent script (including the `export SSL_CERT_FILE...` commands if *first time* using OCR): ```bash - export SSL_CERT_FILE=$(python -m certifi) # If using OCR - export REQUESTS_CA_BUNDLE=$(python -m certifi) # If using OCR + # export SSL_CERT_FILE=$(python -m certifi) # Only if first run with OCR + # export REQUESTS_CA_BUNDLE=$(python -m certifi) # Only if first run with OCR python your_agent_script.py ``` 4. **Retry:** Due to the intermittent nature of the Lume issues, sometimes simply repeating steps 2 and 3 allows the run to succeed if the timing avoids the status reporting bug or the `lume serve` crash. +**Related Issue: "No route to host" Error (macOS Sequoia+)** + +* **Symptom:** Even if the `Computer` library logs show the VM has obtained an IP address, you might encounter connection errors like `No route to host` when the agent tries to connect to the internal server, especially when running the agent script from within an IDE (like VS Code or Cursor). +* **Cause:** This is often due to macOS Sequoia's enhanced local network privacy controls. Applications need explicit permission to access the local network, which includes communicating with the VM. +* **Solution:** Grant "Local Network" access to the application you are running the script from (e.g., your IDE or terminal application). Go to **System Settings > Privacy & Security > Local Network**, find your application in the list, and toggle the switch ON. You might need to trigger a connection attempt from the application first for it to appear in the list. See [GitHub Issue #61](https://github.com/trycua/cua/issues/61) for more details and discussion. + **Note:** Improving the stability of `lume serve` is an ongoing development area. ### How do I troubleshoot Computer not connecting to lume daemon? From 54c5ae2bd8204879b63a01868a7076ce3585aa83 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sat, 19 Apr 2025 22:15:42 -0700 Subject: [PATCH 08/43] Prevent sparse disk expand --- libs/lume/.cursorignore | 233 +++++ libs/lume/README.md | 26 +- libs/lume/docs/API-Reference.md | 36 + libs/lume/scripts/ghcr/push-ghcr.sh | 550 ++++++++--- libs/lume/src/Commands/Push.swift | 74 ++ .../ImageContainerRegistry.swift | 905 +++++++++++++++++- libs/lume/src/LumeController.swift | 98 ++ libs/lume/src/Server/Handlers.swift | 48 + libs/lume/src/Server/Requests.swift | 28 + libs/lume/src/Server/Responses.swift | 13 + libs/lume/src/Server/Server.swift | 6 + libs/lume/src/Utils/CommandRegistry.swift | 1 + 12 files changed, 1864 insertions(+), 154 deletions(-) create mode 100644 libs/lume/.cursorignore create mode 100644 libs/lume/src/Commands/Push.swift diff --git a/libs/lume/.cursorignore b/libs/lume/.cursorignore new file mode 100644 index 00000000..12e8e403 --- /dev/null +++ b/libs/lume/.cursorignore @@ -0,0 +1,233 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +!libs/lume/scripts/build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Scripts +server/scripts/ + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Conda +.conda/ + +# Local environment +.env.local + +# macOS DS_Store +.DS_Store + +weights/ +weights/icon_detect/ +weights/icon_detect/model.pt +weights/icon_detect/model.pt.zip +weights/icon_detect/model.pt.zip.part* + +libs/omniparser/weights/icon_detect/model.pt + +# Example test data and output +examples/test_data/ +examples/output/ + +/screenshots/ + +/experiments/ + +/logs/ + +# Xcode +# +# gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore + +## User settings +xcuserdata/ + +## Obj-C/Swift specific +*.hmap + +## App packaging +*.ipa +*.dSYM.zip +*.dSYM + +## Playgrounds +timeline.xctimeline +playground.xcworkspace + +# Swift Package Manager +# +# Add this line if you want to avoid checking in source code from Swift Package Manager dependencies. +# Packages/ +# Package.pins +# Package.resolved +# *.xcodeproj +# +# Xcode automatically generates this directory with a .xcworkspacedata file and xcuserdata +# hence it is not needed unless you have added a package configuration file to your project +.swiftpm/ +.build/ + +# CocoaPods +# +# We recommend against adding the Pods directory to your .gitignore. However +# you should judge for yourself, the pros and cons are mentioned at: +# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control +# +# Pods/ +# +# Add this line if you want to avoid checking in source code from the Xcode workspace +# *.xcworkspace + +# Carthage +# +# Add this line if you want to avoid checking in source code from Carthage dependencies. +# Carthage/Checkouts +Carthage/Build/ + +# fastlane +# +# It is recommended to not store the screenshots in the git repo. +# Instead, use fastlane to re-generate the screenshots whenever they are needed. +# For more information about the recommended setup visit: +# https://docs.fastlane.tools/best-practices/source-control/#source-control +fastlane/report.xml +fastlane/Preview.html +fastlane/screenshots/**/*.png +fastlane/test_output + +# Ignore folder +ignore + +# .release +.release/ \ No newline at end of file diff --git a/libs/lume/README.md b/libs/lume/README.md index a0da2f49..5d303d2c 100644 --- a/libs/lume/README.md +++ b/libs/lume/README.md @@ -52,6 +52,7 @@ Commands: lume stop Stop a running VM lume delete Delete a VM lume pull Pull a macOS image from container registry + lume push Push a VM image to a container registry lume clone Clone an existing VM lume config Get or set lume configuration lume images List available macOS images in local cache @@ -99,6 +100,16 @@ Command Options: --organization Organization to pull from (default: trycua) --storage VM storage location to use + push: + --additional-tags Additional tags to push the same image to + --registry Container registry URL (default: ghcr.io) + --organization Organization/user to push to (default: trycua) + --storage VM storage location to use + --chunk-size-mb Chunk size for disk image upload in MB (default: 512) + --verbose Enable verbose logging + --dry-run Prepare files and show plan without uploading + --reassemble Verify integrity by reassembling chunks (requires --dry-run) + get: -f, --format Output format (json|text) --storage VM storage location to use @@ -141,18 +152,21 @@ You can also download the `lume.pkg.tar.gz` archive from the [latest release](ht ## Prebuilt Images Pre-built images are available in the registry [ghcr.io/trycua](https://github.com/orgs/trycua/packages). + +**Important Note (v0.2.0+):** Images are being re-uploaded with sparse file system optimizations enabled, resulting in significantly lower actual disk usage. Older images (without the `-sparse` suffix) are now **deprecated**. The last version of `lume` fully supporting the non-sparse images was `v0.1.x`. Starting from `lume v0.2.0`, please use the images with the `-sparse` suffix. + These images come with an SSH server pre-configured and auto-login enabled. For the security of your VM, change the default password `lume` immediately after your first login. -| Image | Tag | Description | Size | +| Image | Tag | Description | Logical Size | |-------|------------|-------------|------| -| `macos-sequoia-vanilla` | `latest`, `15.2` | macOS Sequoia 15.2 image | 40GB | -| `macos-sequoia-xcode` | `latest`, `15.2` | macOS Sequoia 15.2 image with Xcode command line tools | 50GB | -| `macos-sequoia-cua` | `latest`, `15.3` | macOS Sequoia 15.3 image compatible with the Computer interface | 80GB | -| `ubuntu-noble-vanilla` | `latest`, `24.04.1` | [Ubuntu Server for ARM 24.04.1 LTS](https://ubuntu.com/download/server/arm) with Ubuntu Desktop | 20GB | +| `macos-sequoia-vanilla-sparse` | `latest`, `15.2` | macOS Sequoia 15.2 image | 40GB | +| `macos-sequoia-xcode-sparse` | `latest`, `15.2` | macOS Sequoia 15.2 image with Xcode command line tools | 50GB | +| `macos-sequoia-cua-sparse` | `latest`, `15.3` | macOS Sequoia 15.3 image compatible with the Computer interface | 80GB | +| `ubuntu-noble-vanilla-sparse` | `latest`, `24.04.1` | [Ubuntu Server for ARM 24.04.1 LTS](https://ubuntu.com/download/server/arm) with Ubuntu Desktop | 20GB | -For additional disk space, resize the VM disk after pulling the image using the `lume set --disk-size ` command. +For additional disk space, resize the VM disk after pulling the image using the `lume set --disk-size ` command. Note that the actual disk space used by sparse images will be much lower than the logical size listed. ## Local API Server diff --git a/libs/lume/docs/API-Reference.md b/libs/lume/docs/API-Reference.md index 67ed42a4..7ab9459b 100644 --- a/libs/lume/docs/API-Reference.md +++ b/libs/lume/docs/API-Reference.md @@ -193,6 +193,42 @@ curl --connect-timeout 6000 \ ``` +
+Push Image (Async) - POST /vms/push + +```bash +# Push VM 'my-local-vm' to 'my-org/my-image:latest' and 'my-org/my-image:v1' +curl --connect-timeout 6000 \ + --max-time 5000 \ + -X POST \ + -H "Content-Type: application/json" \ + -d '{ + "name": "my-local-vm", + "imageName": "my-image", + "tags": ["latest", "v1"], + "organization": "my-org", + "registry": "ghcr.io", + "chunkSizeMb": 512, + "storage": null + }' \ + http://localhost:3000/lume/vms/push +``` + +**Response (202 Accepted):** + +```json +{ + "message": "Push initiated in background", + "name": "my-local-vm", + "imageName": "my-image", + "tags": [ + "latest", + "v1" + ] +} +``` +
+
Clone VM - POST /vms/:name/clone diff --git a/libs/lume/scripts/ghcr/push-ghcr.sh b/libs/lume/scripts/ghcr/push-ghcr.sh index 33874122..d279be66 100755 --- a/libs/lume/scripts/ghcr/push-ghcr.sh +++ b/libs/lume/scripts/ghcr/push-ghcr.sh @@ -8,9 +8,11 @@ organization="" folder_path="" image_name="" image_versions="" -chunk_size="500M" # Default chunk size for splitting large files +chunk_size="512M" # Default chunk size for splitting large files +dry_run=true # Default: actually push to registry +reassemble=true # Default: don't reassemble in dry-run mode # Define the OCI media type for the compressed disk layer -oci_layer_media_type="application/octet-stream+lzfse" # Apple Archive format +oci_layer_media_type="application/octet-stream+lz4" # LZ4 compression format # Parse the command line arguments while [[ $# -gt 0 ]]; do @@ -35,6 +37,15 @@ while [[ $# -gt 0 ]]; do chunk_size="$2" shift 2 ;; + --dry-run) + dry_run=true + shift 1 + ;; + --reassemble) + reassemble=true + dry_run=true # Reassemble implies dry-run + shift 1 + ;; --help) echo "Usage: $0 [options]" echo "Options:" @@ -42,7 +53,9 @@ while [[ $# -gt 0 ]]; do echo " --folder-path : Path to the folder to upload (required)" echo " --image-name : Name of the image to publish (required)" echo " --image-versions : Comma separated list of versions of the image to publish (required)" - echo " --chunk-size : Size of chunks for large files (e.g., 500M, default: 500M)" + echo " --chunk-size : Size of chunks for large files (e.g., 512M, default: 512M)" + echo " --dry-run : Prepare files but don't upload to registry" + echo " --reassemble : In dry-run mode, also reassemble chunks to verify integrity" echo "Note: The script will automatically resume from the last attempt if available" exit 0 ;; @@ -54,15 +67,23 @@ while [[ $# -gt 0 ]]; do done # Ensure required arguments -if [[ -z "$organization" || -z "$folder_path" || -z "$image_name" || -z "$image_versions" ]]; then - echo "Error: Missing required arguments. Use --help for usage." +if [[ -z "$folder_path" ]]; then + echo "Error: Missing required folder-path argument. Use --help for usage." exit 1 fi -# Check if the GITHUB_TOKEN variable is set -if [[ -z "$GITHUB_TOKEN" ]]; then - echo "Error: GITHUB_TOKEN is not set." - exit 1 +# Only check organization and other push parameters if not in dry-run mode +if [[ "$dry_run" = false ]]; then + if [[ -z "$organization" || -z "$image_name" || -z "$image_versions" ]]; then + echo "Error: Missing required arguments for push. Use --help for usage." + exit 1 + fi + + # Check if the GITHUB_TOKEN variable is set + if [[ -z "$GITHUB_TOKEN" ]]; then + echo "Error: GITHUB_TOKEN is not set." + exit 1 + fi fi # Ensure the folder exists @@ -72,7 +93,7 @@ if [[ ! -d "$folder_path" ]]; then fi # Check and install required tools -for tool in "oras" "split" "pv" "jq"; do +for tool in "oras" "split" "pv" "jq" "lz4"; do if ! command -v "$tool" &> /dev/null; then echo "$tool is not installed. Installing using Homebrew..." if ! command -v brew &> /dev/null; then @@ -83,19 +104,14 @@ for tool in "oras" "split" "pv" "jq"; do fi done -# Check if Apple Archive is available -if ! command -v compression_tool &> /dev/null; then - echo "Error: Apple Archive (compression_tool) is required but not found" - echo "This script requires macOS with Apple Archive support" - exit 1 +echo "LZ4 detected - will use for efficient compression and decompression" +compressed_ext=".lz4" + +# Authenticate with GitHub Container Registry if not in dry-run mode +if [[ "$dry_run" = false ]]; then + echo "$GITHUB_TOKEN" | oras login ghcr.io -u "$organization" --password-stdin fi -echo "Apple Archive detected - will use for optimal sparse file handling" -compressed_ext=".aa" - -# Authenticate with GitHub Container Registry -echo "$GITHUB_TOKEN" | oras login ghcr.io -u "$organization" --password-stdin - # Use the source folder path as the working directory and get its absolute path work_dir=$(cd "$folder_path" && pwd) echo "Working directory: $work_dir" @@ -115,7 +131,7 @@ is_valid_cache() { local cache_dir="$1" # Check if it contains the necessary files [ -f "$cache_dir/config.json" ] || [ -f "$cache_dir/nvram.bin" ] || \ - [ -f "$cache_dir/disk.img.aa" ] || ls "$cache_dir"/disk.img.aa.part.* 1>/dev/null 2>&1 + [ -f "$cache_dir/disk.img.lz4" ] || ls "$cache_dir"/disk.img.part.* 1>/dev/null 2>&1 } # Always try to find and use an existing cache @@ -123,9 +139,9 @@ existing_cache=$(find_latest_cache) if [ -n "$existing_cache" ] && is_valid_cache "$existing_cache"; then cache_dir="$existing_cache" - # Check if the cache contains old gzip format - if [ -f "$cache_dir/disk.img.gz" ] || ls "$cache_dir"/disk.img.gz.part.* 1>/dev/null 2>&1; then - echo "Error: Found legacy gzip format in cache. This script only supports Apple Archive format." + # Check if the cache contains old compressed format + if [ -f "$cache_dir/disk.img.gz" ] || [ -f "$cache_dir/disk.img.aa" ] || ls "$cache_dir"/disk.img.*.part.* 1>/dev/null 2>&1; then + echo "Error: Found legacy compressed format in cache. This script uses improved LZ4 format." echo "Please delete the cache directory and start fresh: $cache_dir" exit 1 fi @@ -162,20 +178,24 @@ mark_version_pushed() { touch "$cache_dir/.pushed_$version" } +# Function to calculate sha256 hash +calculate_sha256() { + local file="$1" + if command -v shasum &> /dev/null; then + shasum -a 256 "$file" | awk '{print "sha256:" $1}' + else + echo "sha256:$(openssl dgst -sha256 -binary "$file" | xxd -p | tr -d '\n')" + fi +} + # Copy config.json if it exists and not already in cache config_json_source="$folder_path/config.json" config_json_dest="$cache_dir/config.json" if [ -f "$config_json_source" ]; then if [ ! -f "$config_json_dest" ]; then echo "Copying config.json..." - # Add the uncompressed disk size annotation if disk.img exists and jq is available - if [ -n "$original_disk_size" ] && command -v jq &> /dev/null; then - echo "Adding uncompressed disk size annotation: $original_disk_size bytes" - jq --arg size "$original_disk_size" '.annotations += {"com.trycua.lume.disk.uncompressed_size": $size}' "$config_json_source" > "$config_json_dest" || \ - (echo "jq failed, copying original config.json"; cp "$config_json_source" "$config_json_dest") # Fallback to copy if jq fails - else - cp "$config_json_source" "$config_json_dest" - fi + # Copy config.json as is - we'll add annotations later + cp "$config_json_source" "$config_json_dest" fi fi if [ -f "$config_json_dest" ]; then @@ -207,115 +227,363 @@ if [ -f "$disk_img_orig" ]; then echo " Actual disk usage: $((real_size_bytes / 1073741824)) GB" echo " Sparseness ratio: ${sparseness_ratio}:1" - # Check if we already have compressed files in the cache - compressed_disk_img="disk.img${compressed_ext}" - already_compressed=false - - if [ -f "$cache_dir/$compressed_disk_img" ]; then - already_compressed=true - echo "Using existing compressed file from cache: $compressed_disk_img" - elif ls "$cache_dir"/disk.img${compressed_ext}.part.* 1>/dev/null 2>&1; then - already_compressed=true - echo "Using existing compressed parts from cache" + # If we have config.json, update it with the uncompressed disk size annotation + if [ -f "$config_json_dest" ] && command -v jq &> /dev/null; then + echo "Adding uncompressed disk size annotation: $original_disk_size bytes" + jq --arg size "$original_disk_size" '.annotations = (.annotations // {}) + {"com.trycua.lume.disk.uncompressed_size": $size}' "$config_json_dest" > "$config_json_dest.tmp" + mv "$config_json_dest.tmp" "$config_json_dest" fi - - # Only compress if not already compressed in cache - if [ "$already_compressed" = false ]; then - # Check for free disk space before compression - avail_space=$(df -k "$cache_dir" | tail -1 | awk '{print $4}') - avail_space_bytes=$((avail_space * 1024)) - # Assume compressed size is roughly 30% of real size as a safe estimate - estimated_compressed=$((real_size_bytes * 30 / 100)) - - if [ "$avail_space_bytes" -lt "$estimated_compressed" ]; then - echo "WARNING: Possibly insufficient disk space for compression!" - echo "Available: $((avail_space_bytes / 1073741824)) GB, Estimated required: $((estimated_compressed / 1073741824)) GB" - read -p "Continue anyway? (y/n) " -n 1 -r - echo - if [[ ! $REPLY =~ ^[Yy]$ ]]; then - echo "Exiting. Free up some space and try again." - exit 1 - fi - fi - # --- Compression Step --- - echo "Compressing $disk_img_orig with Apple Archive..." - - # Apple Archive compression - echo "Starting compression with Apple Archive (showing output file growth)..." - compression_tool -encode -i "$disk_img_orig" -o "$compressed_disk_img" -a lzfse & - COMP_PID=$! - - sleep 1 # Give compression a moment to start - - # Display progress based on output file growth - while kill -0 $COMP_PID 2>/dev/null; do - if [ -f "$compressed_disk_img" ]; then - current_size=$(stat -f%z "$compressed_disk_img" 2>/dev/null || echo 0) - percent=$(echo "scale=2; 100 * $current_size / $original_disk_size" | bc) - echo -ne "Progress: $percent% ($(du -h "$compressed_disk_img" 2>/dev/null | cut -f1 || echo "0"))\r" - else - echo -ne "Preparing compression...\r" - fi - sleep 2 - done - - wait $COMP_PID - echo -e "\nCompression complete!" - - compressed_size=$(stat -f%z "$compressed_disk_img") - echo "Compressed disk image size: $(du -h "$compressed_disk_img" | cut -f1)" - echo "Compression ratio: $(echo "scale=2; $compressed_size * 100 / $original_disk_size" | bc)%" - # --- End Compression Step --- - - # Check if splitting is needed based on *compressed* size - if [ $compressed_size -gt 524288000 ]; then # 500MB threshold - echo "Splitting compressed file into chunks of $chunk_size..." - pv "$compressed_disk_img" | split -b "$chunk_size" - "$compressed_disk_img.part." - rm -f "$compressed_disk_img" # Remove the unsplit compressed file - # Verify that parts were created - echo "Verifying split parts..." - ls -la "$cache_dir"/disk.img${compressed_ext}.part.* - fi + # Create a temporary directory for disk processing + tmp_dir="$cache_dir/tmp_processing" + mkdir -p "$tmp_dir" + + # Split the disk image into chunks first (before compression) + split_parts_dir="$tmp_dir/split_parts" + mkdir -p "$split_parts_dir" + + # Check if we already have split parts + if [ -z "$(ls -A "$split_parts_dir" 2>/dev/null)" ]; then + echo "Splitting disk image into chunks of $chunk_size..." + cd "$split_parts_dir" + pv "$disk_img_orig" | split -b "$chunk_size" - "chunk." + cd "$cache_dir" else - echo "Using existing compressed/split files from cache" + echo "Using existing split chunks from previous run" fi - - # --- Adjust part processing --- - echo "Looking for compressed files in $cache_dir..." - # List all files in the cache directory for debugging - ls -la "$cache_dir" + # Process each chunk (compress, calculate digest, etc.) + compressed_parts_dir="$tmp_dir/compressed_parts" + mkdir -p "$compressed_parts_dir" - if [ -f "$cache_dir/$compressed_disk_img" ]; then - echo "Found single compressed file: $compressed_disk_img" - # Add the single compressed file to the list - files+=("$compressed_disk_img:${oci_layer_media_type}") - else - # Look for split parts - part_files=($(ls "$cache_dir"/disk.img${compressed_ext}.part.* 2>/dev/null || echo "")) - if [ ${#part_files[@]} -gt 0 ]; then - echo "Found ${#part_files[@]} split parts" - parts_files=() - part_num=0 + # Store layer information in an array + layers=() + part_num=0 + total_parts=$(ls "$split_parts_dir"/chunk.* | wc -l) + + for chunk_file in "$split_parts_dir"/chunk.*; do + part_basename=$(basename "$chunk_file") + part_num=$((part_num + 1)) + compressed_file="$compressed_parts_dir/${part_basename}${compressed_ext}" + + if [ ! -f "$compressed_file" ]; then + echo "Compressing chunk $part_num of $total_parts: $part_basename" - for part in "${part_files[@]}"; do - part_num=$((part_num + 1)) - part_basename=$(basename "$part") - parts_files+=("$part_basename:${oci_layer_media_type};part.number=$part_num;part.total=${#part_files[@]}") - echo "Part $part_num: $(du -h "$part" | cut -f1)" + # Calculate uncompressed content digest before compression + uncompressed_digest=$(calculate_sha256 "$chunk_file") + + # Get uncompressed size + uncompressed_size=$(stat -f%z "$chunk_file") + + # Compress the chunk with LZ4 + lz4 -9 "$chunk_file" "$compressed_file" + + # Get compressed size + compressed_size=$(stat -f%z "$compressed_file") + + echo "Chunk $part_num: Original size: $(du -h "$chunk_file" | cut -f1), Compressed: $(du -h "$compressed_file" | cut -f1)" + else + echo "Using existing compressed chunk $part_num of $total_parts" + + # Need to calculate these values for existing files + uncompressed_digest=$(calculate_sha256 "$chunk_file") + uncompressed_size=$(stat -f%z "$chunk_file") + compressed_size=$(stat -f%z "$compressed_file") + fi + + # Store layer information + layer_info="$compressed_file:${oci_layer_media_type};uncompressed_size=$uncompressed_size;uncompressed_digest=$uncompressed_digest;part.number=$part_num;part.total=$total_parts" + layers+=("$layer_info") + done + + # Generate the files array for ORAS push + for layer_info in "${layers[@]}"; do + files+=("$layer_info") + done + + # --- Reassembly in dry-run mode --- + if [[ "$reassemble" = true ]]; then + echo "=== REASSEMBLY MODE ===" + echo "Reassembling chunks to verify integrity..." + + # Create a directory for reassembly + reassembly_dir="$cache_dir/reassembly" + mkdir -p "$reassembly_dir" + + # Prepare the reassembled file - create a properly sized sparse file first + reassembled_file="$reassembly_dir/reassembled_disk.img" + if [ -f "$reassembled_file" ]; then + echo "Removing previous reassembled file..." + rm -f "$reassembled_file" + fi + + # Get the original disk size from config annotation or directly from image + if [ -f "$config_json_dest" ] && command -v jq &> /dev/null; then + config_size=$(jq -r '.annotations."com.trycua.lume.disk.uncompressed_size" // empty' "$config_json_dest") + if [ -n "$config_size" ]; then + original_disk_size_bytes=$config_size + echo "Using uncompressed size from config: $original_disk_size_bytes bytes" + fi + fi + + # Create a sparse file of the exact original size + echo "Pre-allocating sparse file of $(du -h "$disk_img_orig" | cut -f1)..." + dd if=/dev/zero of="$reassembled_file" bs=1 count=0 seek=$original_disk_size + + # Make sure filesystem recognizes this as a sparse file + if [[ "$OSTYPE" == "darwin"* ]]; then + # On macOS, we can use a better sparse file creation method if mkfile is available + if command -v mkfile &> /dev/null; then + rm -f "$reassembled_file" + mkfile -n ${original_disk_size}b "$reassembled_file" + echo "Created sparse file using mkfile" + fi + else + # On Linux systems, ensure sparseness with truncate if available + if command -v truncate &> /dev/null; then + rm -f "$reassembled_file" + truncate -s $original_disk_size "$reassembled_file" + echo "Created sparse file using truncate" + fi + fi + + # Create an offset tracker to keep track of where each chunk should go + current_offset=0 + + # Decompress each chunk and write it at the correct offset + for ((i=1; i<=total_parts; i++)); do + # Find the chunk file for part number i + chunk_pattern="" + chunk_uncompressed_size="" + + for layer_info in "${layers[@]}"; do + if [[ "$layer_info" == *";part.number=$i;"* ]]; then + chunk_pattern="${layer_info%%:*}" + # Extract the uncompressed size from metadata + if [[ "$layer_info" =~ uncompressed_size=([0-9]+) ]]; then + chunk_uncompressed_size="${BASH_REMATCH[1]}" + fi + break + fi done - files+=("${parts_files[@]}") + if [ -z "$chunk_pattern" ]; then + echo "Error: Could not find chunk for part $i" + exit 1 + fi + + echo "Processing part $i/$total_parts: $(basename "$chunk_pattern") at offset $current_offset..." + + # Create temp decompressed file + temp_decompressed="$reassembly_dir/temp_part_$i" + lz4 -d -f "$chunk_pattern" "$temp_decompressed" || { + echo "Error decompressing part $i" + exit 1 + } + + # Check if this chunk is all zeros (sparse data) + # Only check the first 1MB for efficiency + is_likely_sparse=false + if command -v hexdump &> /dev/null; then + # Use hexdump to check a sample of the file for non-zero content + sparse_check=$(hexdump -n 1048576 -v "$temp_decompressed" | grep -v "0000 0000 0000 0000 0000 0000 0000 0000" | head -n 1) + if [ -z "$sparse_check" ]; then + echo "Chunk appears to be all zeros (sparse data)" + is_likely_sparse=true + fi + fi + + # Use dd to write the chunk at the correct offset with sparse file handling + if [ "$is_likely_sparse" = true ]; then + # For sparse chunks, we don't need to write anything - leave as zeros + echo "Skipping write for all-zero chunk (preserving sparseness)" + elif [[ "$OSTYPE" == "darwin"* ]]; then + # macOS dd doesn't support conv=sparse, use standard approach + dd if="$temp_decompressed" of="$reassembled_file" bs=1M conv=notrunc seek=$((current_offset / 1024 / 1024)) status=progress || { + echo "Error writing part $i at offset $current_offset" + exit 1 + } + else + # On Linux, use conv=sparse to preserve sparseness during the write + dd if="$temp_decompressed" of="$reassembled_file" bs=1M conv=sparse,notrunc seek=$((current_offset / 1024 / 1024)) status=progress || { + echo "Error writing part $i at offset $current_offset" + exit 1 + } + fi + + # Clean up the temporary file + rm -f "$temp_decompressed" + + # Update the offset for the next chunk + current_offset=$((current_offset + chunk_uncompressed_size)) + done + + # After all chunks are processed, ensure sparseness is preserved + if command -v cp &> /dev/null && [[ "$OSTYPE" == "darwin"* ]]; then + echo "Copying disk image to maintain sparseness..." + final_sparse_file="$reassembly_dir/final_disk.img" + rm -f "$final_sparse_file" 2>/dev/null + + # On macOS, use cp with the clone flag to preserve sparseness + cp -c "$reassembled_file" "$final_sparse_file" + + # Use the sparse-optimized file for verification + echo "Using sparse-optimized copy for verification" + mv "$final_sparse_file" "$reassembled_file" + sync + elif command -v cp &> /dev/null && command -v file &> /dev/null; then + # For Linux systems + echo "Optimizing file sparseness..." + final_sparse_file="$reassembly_dir/final_disk.img" + rm -f "$final_sparse_file" 2>/dev/null + + # Use cp --sparse=always on Linux + cp --sparse=always "$reassembled_file" "$final_sparse_file" + + # Use the sparse-optimized file for verification + echo "Using sparse-optimized copy for verification" + mv "$final_sparse_file" "$reassembled_file" + sync + fi + + # Make sure to sync to disk + sync + + # Calculate digests for comparison + echo "Verifying reassembled file..." + original_digest=$(calculate_sha256 "$disk_img_orig") + reassembled_digest=$(calculate_sha256 "$reassembled_file") + + # Compare the original and reassembled file sizes + original_size=$(stat -f%z "$disk_img_orig") + reassembled_size=$(stat -f%z "$reassembled_file") + + echo "Results:" + echo " Original size: $(du -h "$disk_img_orig" | cut -f1) ($original_size bytes)" + echo " Reassembled size: $(du -h "$reassembled_file" | cut -f1) ($reassembled_size bytes)" + echo " Original digest: ${original_digest#sha256:}" + echo " Reassembled digest: ${reassembled_digest#sha256:}" + + # Check if the disk is sparse + original_apparent_size=$(du -h "$disk_img_orig" | cut -f1) + original_actual_size=$(du -sh "$disk_img_orig" | cut -f1) + reassembled_apparent_size=$(du -h "$reassembled_file" | cut -f1) + reassembled_actual_size=$(du -sh "$reassembled_file" | cut -f1) + + echo " Original: Apparent size: $original_apparent_size, Actual disk usage: $original_actual_size" + echo " Reassembled: Apparent size: $reassembled_apparent_size, Actual disk usage: $reassembled_actual_size" + + if [ "$original_digest" = "$reassembled_digest" ]; then + echo "✅ VERIFICATION SUCCESSFUL: Files are identical" else - echo "ERROR: No compressed files found in cache directory: $cache_dir" - echo "Contents of cache directory:" - find "$cache_dir" -type f | sort - exit 1 + echo "❌ VERIFICATION FAILED: Files differ" + if [ "$original_size" != "$reassembled_size" ]; then + echo " Size mismatch: Original $original_size bytes, Reassembled $reassembled_size bytes" + fi + + # Try to identify where they differ + echo "Attempting to identify differences..." + if command -v cmp &> /dev/null; then + cmp_output=$(cmp -l "$disk_img_orig" "$reassembled_file" 2>&1 | head -5) + if [[ "$cmp_output" == *"differ"* ]]; then + echo " First few differences:" + echo "$cmp_output" + fi + fi + + # Check if the virtual machine will still boot despite differences + echo "NOTE: This might be a sparse file issue. The content may be identical, but sparse regions" + echo " may be handled differently between the original and reassembled files." + + # Calculate a percentage comparison of used blocks + # This helps determine if the sparse issues are severe or minor + original_used_kb=$(du -k "$disk_img_orig" | cut -f1) + reassembled_used_kb=$(du -k "$reassembled_file" | cut -f1) + + # Calculate percentage difference in used space + if [ "$original_used_kb" -ne 0 ]; then + diff_percentage=$(echo "scale=2; ($reassembled_used_kb - $original_used_kb) * 100 / $original_used_kb" | bc) + echo " Disk usage difference: $diff_percentage% ($reassembled_used_kb KB vs $original_used_kb KB)" + + # If reassembled is much smaller, this likely indicates sparse regions weren't preserved + if (( $(echo "$diff_percentage < -40" | bc -l) )); then + echo " ⚠️ WARNING: Reassembled disk uses significantly less space (>40% difference)." + echo " This indicates sparse regions weren't properly preserved and may affect VM functionality." + echo " The VM might boot but could be missing applications or data." + elif (( $(echo "$diff_percentage < -10" | bc -l) )); then + echo " ⚠️ WARNING: Reassembled disk uses less space (10-40% difference)." + echo " Some sparse regions may not be properly preserved but VM might still function correctly." + elif (( $(echo "$diff_percentage > 10" | bc -l) )); then + echo " ⚠️ WARNING: Reassembled disk uses more space (>10% difference)." + echo " This is unusual and may indicate improper sparse file handling." + else + echo " ✓ Disk usage difference is minimal (<10%). VM likely to function correctly." + fi + fi + fi + + echo "Reassembled file is available at: $reassembled_file" + + # If verification failed and difference is significant, try a direct copy as fallback + if [ "$original_digest" != "$reassembled_digest" ] && [ -n "$diff_percentage" ] && (( $(echo "$diff_percentage < -20" | bc -l) )); then + echo + echo "===== ATTEMPTING RECOVERY ACTION =====" + echo "Since verification failed with significant disk usage difference," + echo "trying direct copy of disk image as a fallback method." + echo + + fallback_file="$reassembly_dir/fallback_disk.img" + echo "Creating fallback disk image at: $fallback_file" + + # Use rsync with sparse option if available + if command -v rsync &> /dev/null; then + echo "Using rsync with sparse option for direct copy..." + rsync -aS --progress "$disk_img_orig" "$fallback_file" + else + # Direct cp with sparse option if available + if [[ "$OSTYPE" == "darwin"* ]]; then + echo "Using cp -c (clone) for direct copy..." + cp -c "$disk_img_orig" "$fallback_file" + else + echo "Using cp --sparse=always for direct copy..." + cp --sparse=always "$disk_img_orig" "$fallback_file" + fi + fi + + echo "Direct copy completed. You may want to try using this fallback disk image" + echo "instead if the reassembled one has issues: $fallback_file" fi fi - + # --- Push Logic --- + if [[ "$dry_run" = true ]]; then + echo "=== DRY RUN MODE ===" + echo "The following files would be pushed to the registry:" + for file_info in "${files[@]}"; do + file_path="${file_info%%:*}" + file_metadata="${file_info#*:}" + file_size=$(du -h "$file_path" | cut -f1) + echo " - $file_path ($file_size) with metadata: $file_metadata" + done + + if [[ -n "$image_versions" ]]; then + echo "Would push to the following versions:" + IFS=',' read -ra versions <<< "$image_versions" + for version in "${versions[@]}"; do + version=$(echo "$version" | xargs) + if [[ -z "$version" ]]; then continue; fi + echo " - ghcr.io/$organization/$image_name:$version" + done + else + echo "No versions specified for dry run. Processing completed successfully." + fi + + echo "All processing tasks completed. No actual push performed." + echo "Cache directory: $cache_dir" + exit 0 + fi + + # Regular push logic (non-dry-run) push_pids=() IFS=',' read -ra versions <<< "$image_versions" for version in "${versions[@]}"; do @@ -368,6 +636,25 @@ if [ -f "$disk_img_orig" ]; then else echo "Warning: $disk_img_orig not found." + + # If in dry run mode, just show what would happen + if [[ "$dry_run" = true ]]; then + echo "=== DRY RUN MODE ===" + if [ ${#files[@]} -gt 0 ]; then + echo "The following non-disk files would be pushed:" + for file_info in "${files[@]}"; do + file_path="${file_info%%:*}" + file_metadata="${file_info#*:}" + file_size=$(du -h "$file_path" | cut -f1) + echo " - $file_path ($file_size) with metadata: $file_metadata" + done + else + echo "No files found to push." + fi + echo "All processing tasks completed. No actual push performed." + exit 0 + fi + # Push only config/nvram if they exist if [ ${#files[@]} -gt 0 ]; then echo "Pushing non-disk files..." @@ -427,6 +714,11 @@ else fi fi +# Skip final status check in dry-run mode +if [[ "$dry_run" = true ]]; then + exit 0 +fi + # Determine final status based on the success check *before* potential cleanup echo # Add a newline for better readability if [ "$all_versions_pushed" = true ]; then diff --git a/libs/lume/src/Commands/Push.swift b/libs/lume/src/Commands/Push.swift new file mode 100644 index 00000000..df784b39 --- /dev/null +++ b/libs/lume/src/Commands/Push.swift @@ -0,0 +1,74 @@ +import ArgumentParser +import Foundation + +struct Push: AsyncParsableCommand { + static let configuration = CommandConfiguration( + abstract: "Push a macOS VM to GitHub Container Registry" + ) + + @Argument(help: "Name of the VM to push") + var name: String + + @Argument(help: "Image tag to push (format: name:tag)") + var image: String + + @Option(parsing: .upToNextOption, help: "Additional tags to push the same image to") + var additionalTags: [String] = [] + + @Option(help: "Github Container Registry to push to. Defaults to ghcr.io") + var registry: String = "ghcr.io" + + @Option(help: "Organization to push to. Defaults to trycua") + var organization: String = "trycua" + + @Option(name: .customLong("storage"), help: "VM storage location to use") + var storage: String? + + @Option(help: "Chunk size for large files in MB. Defaults to 512.") + var chunkSizeMb: Int = 512 + + @Flag(name: .long, help: "Enable verbose logging") + var verbose: Bool = false + + @Flag(name: .long, help: "Prepare files without uploading to registry") + var dryRun: Bool = false + + @Flag(name: .long, help: "In dry-run mode, also reassemble chunks to verify integrity") + var reassemble: Bool = true + + init() {} + + @MainActor + func run() async throws { + let controller = LumeController() + + // Parse primary image name and tag + let components = image.split(separator: ":") + guard components.count == 2, let primaryTag = components.last else { + throw ValidationError("Invalid primary image format. Expected format: name:tag") + } + let imageName = String(components.first!) + + // Combine primary and additional tags, ensuring uniqueness + var allTags: Swift.Set = [] + allTags.insert(String(primaryTag)) + allTags.formUnion(additionalTags) + + guard !allTags.isEmpty else { + throw ValidationError("At least one tag must be provided.") + } + + try await controller.pushImage( + name: name, + imageName: imageName, // Pass base image name + tags: Array(allTags), // Pass array of all unique tags + registry: registry, + organization: organization, + storage: storage, + chunkSizeMb: chunkSizeMb, + verbose: verbose, + dryRun: dryRun, + reassemble: reassemble + ) + } +} \ No newline at end of file diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index ac7453ca..3ba7d543 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -2,8 +2,56 @@ import ArgumentParser import Darwin import Foundation import Swift +import CommonCrypto +import Compression // Add this import + +// Extension to calculate SHA256 hash +extension Data { + func sha256String() -> String { + let hash = self.withUnsafeBytes { (bytes: UnsafeRawBufferPointer) -> [UInt8] in + var hash = [UInt8](repeating: 0, count: Int(CC_SHA256_DIGEST_LENGTH)) + CC_SHA256(bytes.baseAddress, CC_LONG(self.count), &hash) + return hash + } + return hash.map { String(format: "%02x", $0) }.joined() + } +} + +// Push-related errors +enum PushError: Error { + case uploadInitiationFailed + case blobUploadFailed + case manifestPushFailed + case authenticationFailed + case missingToken + case invalidURL + case lz4NotFound // Added error case +} + +struct ChunkMetadata: Codable { + let uncompressedDigest: String + let uncompressedSize: UInt64 + let compressedDigest: String + let compressedSize: Int +} // Define struct to decode relevant parts of config.json +struct OCIManifestLayer { + let mediaType: String + let size: Int + let digest: String + let uncompressedSize: UInt64? + let uncompressedContentDigest: String? + + init(mediaType: String, size: Int, digest: String, uncompressedSize: UInt64? = nil, uncompressedContentDigest: String? = nil) { + self.mediaType = mediaType + self.size = size + self.digest = digest + self.uncompressedSize = uncompressedSize + self.uncompressedContentDigest = uncompressedContentDigest + } +} + struct OCIConfig: Codable { struct Annotations: Codable { let uncompressedSize: String? // Use optional String @@ -274,6 +322,43 @@ struct DownloadStats { } } +// Renamed struct +struct UploadStats { + let totalBytes: Int64 + let uploadedBytes: Int64 // Renamed + let elapsedTime: TimeInterval + let averageSpeed: Double + let peakSpeed: Double + + func formattedSummary() -> String { + let bytesStr = ByteCountFormatter.string(fromByteCount: uploadedBytes, countStyle: .file) + let avgSpeedStr = formatSpeed(averageSpeed) + let peakSpeedStr = formatSpeed(peakSpeed) + let timeStr = formatTime(elapsedTime) + return """ + Upload Statistics: + - Total uploaded: \(bytesStr) + - Elapsed time: \(timeStr) + - Average speed: \(avgSpeedStr) + - Peak speed: \(peakSpeedStr) + """ + } + private func formatSpeed(_ bytesPerSecond: Double) -> String { + let formatter = ByteCountFormatter() + formatter.countStyle = .file + let bytesStr = formatter.string(fromByteCount: Int64(bytesPerSecond)) + return "\(bytesStr)/s" + } + private func formatTime(_ seconds: TimeInterval) -> String { + let hours = Int(seconds) / 3600 + let minutes = (Int(seconds) % 3600) / 60 + let secs = Int(seconds) % 60 + if hours > 0 { return String(format: "%d hours, %d minutes, %d seconds", hours, minutes, secs) } + else if minutes > 0 { return String(format: "%d minutes, %d seconds", minutes, secs) } + else { return String(format: "%d seconds", secs) } + } +} + actor TaskCounter { private var count: Int = 0 @@ -285,12 +370,17 @@ actor TaskCounter { class ImageContainerRegistry: @unchecked Sendable { private let registry: String private let organization: String - private let progress = ProgressTracker() + private let downloadProgress = ProgressTracker() // Renamed for clarity + private let uploadProgress = UploadProgressTracker() // Added upload tracker private let cacheDirectory: URL private let downloadLock = NSLock() private var activeDownloads: [String] = [] private let cachingEnabled: Bool + // Constants for zero-skipping write logic + private static let holeGranularityBytes = 4 * 1024 * 1024 // 4MB block size for checking zeros + private static let zeroChunk = Data(count: holeGranularityBytes) + // Add the createProgressBar function here as a private method private func createProgressBar(progress: Double, width: Int = 30) -> String { let completedWidth = Int(progress * Double(width)) @@ -613,7 +703,7 @@ class ImageContainerRegistry: @unchecked Sendable { $0.mediaType != "application/vnd.oci.empty.v1+json" }.count let totalSize = manifest.layers.reduce(0) { $0 + Int64($1.size) } - await progress.setTotal(totalSize, files: totalFiles) + await downloadProgress.setTotal(totalSize, files: totalFiles) // Process layers with limited concurrency Logger.info("Processing Image layers") @@ -671,7 +761,7 @@ class ImageContainerRegistry: @unchecked Sendable { // Still need to account for progress group.addTask { [self] in await counter.increment() - await progress.addProgress(Int64(size)) + await downloadProgress.addProgress(Int64(size)) await counter.decrement() return Int64(size) } @@ -686,7 +776,7 @@ class ImageContainerRegistry: @unchecked Sendable { if FileManager.default.fileExists(atPath: cachedLayer.path) { try FileManager.default.copyItem(at: cachedLayer, to: partURL) - await progress.addProgress(Int64(size)) + await downloadProgress.addProgress(Int64(size)) } else { // Check if this layer is already being downloaded and we're not skipping cache if isDownloading(digest) { @@ -696,7 +786,7 @@ class ImageContainerRegistry: @unchecked Sendable { { try FileManager.default.copyItem( at: cachedLayer, to: partURL) - await progress.addProgress(Int64(size)) + await downloadProgress.addProgress(Int64(size)) return Int64(size) } } @@ -711,7 +801,7 @@ class ImageContainerRegistry: @unchecked Sendable { token: token, to: partURL, maxRetries: 5, - progress: progress, + progress: downloadProgress, manifestId: manifestId ) @@ -758,7 +848,7 @@ class ImageContainerRegistry: @unchecked Sendable { if FileManager.default.fileExists(atPath: cachedLayer.path) { try FileManager.default.copyItem(at: cachedLayer, to: outputURL) - await progress.addProgress(Int64(size)) + await downloadProgress.addProgress(Int64(size)) } else { // Check if this layer is already being downloaded and we're not skipping cache if isDownloading(digest) { @@ -767,7 +857,7 @@ class ImageContainerRegistry: @unchecked Sendable { if FileManager.default.fileExists(atPath: cachedLayer.path) { try FileManager.default.copyItem( at: cachedLayer, to: outputURL) - await progress.addProgress(Int64(size)) + await downloadProgress.addProgress(Int64(size)) return Int64(size) } } @@ -782,7 +872,7 @@ class ImageContainerRegistry: @unchecked Sendable { token: token, to: outputURL, maxRetries: 5, - progress: progress, + progress: downloadProgress, manifestId: manifestId ) @@ -808,7 +898,7 @@ class ImageContainerRegistry: @unchecked Sendable { Logger.info("") // New line after progress // Display download statistics - let stats = await progress.getDownloadStats() + let stats = await downloadProgress.getDownloadStats() Logger.info(stats.formattedSummary()) // Parse config.json to get uncompressed size *before* reassembly @@ -1866,17 +1956,52 @@ class ImageContainerRegistry: @unchecked Sendable { } private func getToken(repository: String) async throws -> String { - let url = URL(string: "https://\(self.registry)/token")! - .appending(queryItems: [ - URLQueryItem(name: "service", value: self.registry), - URLQueryItem(name: "scope", value: "repository:\(repository):pull"), - ]) + let encodedRepo = repository.addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed) ?? repository + // Request both pull and push scope for uploads + let url = URL(string: "https://\(self.registry)/token?scope=repository:\(encodedRepo):pull,push&service=\(self.registry)")! + + var request = URLRequest(url: url) + request.httpMethod = "GET" // Token endpoint uses GET + request.setValue("application/json", forHTTPHeaderField: "Accept") - let (data, _) = try await URLSession.shared.data(from: url) - let json = try JSONSerialization.jsonObject(with: data) as? [String: Any] - guard let token = json?["token"] as? String else { - throw PullError.tokenFetchFailed + // *** Add Basic Authentication Header if credentials exist *** + let (username, password) = getCredentialsFromEnvironment() + if let username = username, let password = password, !username.isEmpty, !password.isEmpty { + let authString = "\(username):\(password)" + if let authData = authString.data(using: .utf8) { + let base64Auth = authData.base64EncodedString() + request.setValue("Basic \(base64Auth)", forHTTPHeaderField: "Authorization") + Logger.info("Adding Basic Authentication header to token request.") + } else { + Logger.error("Failed to encode credentials for Basic Auth.") + } + } else { + Logger.info("No credentials found in environment for token request.") + // Allow anonymous request for pull scope, but push scope likely requires auth } + // *** End Basic Auth addition *** + + let (data, response) = try await URLSession.shared.data(for: request) + + // Check response status code *before* parsing JSON + guard let httpResponse = response as? HTTPURLResponse else { + throw PushError.authenticationFailed // Or a more generic network error + } + + guard httpResponse.statusCode == 200 else { + // Log detailed error including status code and potentially response body + let responseBody = String(data: data, encoding: .utf8) ?? "(Could not decode body)" + Logger.error("Token request failed with status code: \(httpResponse.statusCode). Response: \(responseBody)") + // Throw specific error based on status if needed (e.g., 401 for unauthorized) + throw PushError.authenticationFailed + } + + let jsonResponse = try JSONSerialization.jsonObject(with: data) as? [String: Any] + guard let token = jsonResponse?["token"] as? String ?? jsonResponse?["access_token"] as? String else { + Logger.error("Token not found in registry response.") + throw PushError.missingToken + } + return token } @@ -2618,4 +2743,746 @@ class ImageContainerRegistry: @unchecked Sendable { return 0 } + + // New push method + public func push( + vmDirPath: String, + imageName: String, + tags: [String], + chunkSizeMb: Int = 512, + verbose: Bool = false, + dryRun: Bool = false, + reassemble: Bool = false + ) async throws { + Logger.info( + "Pushing VM to registry", + metadata: [ + "vm_path": vmDirPath, + "imageName": imageName, + "tags": "\(tags.joined(separator: ", "))", // Log all tags + "registry": registry, + "organization": organization, + "chunk_size": "\(chunkSizeMb)MB", + "dry_run": "\(dryRun)", + "reassemble": "\(reassemble)" + ]) + + // Remove tag parsing here, imageName is now passed directly + // let components = image.split(separator: ":") ... + // let imageTag = String(tag) + + // Get authentication token only if not in dry-run mode + var token: String = "" + if !dryRun { + Logger.info("Getting registry authentication token") + token = try await getToken(repository: "\(self.organization)/\(imageName)") + } else { + Logger.info("Dry run mode: skipping authentication token request") + } + + // Create working directory inside the VM folder for caching/resuming + let workDir = URL(fileURLWithPath: vmDirPath).appendingPathComponent(".lume_push_cache") + try FileManager.default.createDirectory(at: workDir, withIntermediateDirectories: true) + Logger.info("Using push cache directory: \(workDir.path)") + + // Get VM files that need to be pushed using vmDirPath + let diskPath = URL(fileURLWithPath: vmDirPath).appendingPathComponent("disk.img") + let configPath = URL(fileURLWithPath: vmDirPath).appendingPathComponent("config.json") + let nvramPath = URL(fileURLWithPath: vmDirPath).appendingPathComponent("nvram.bin") + + var layers: [OCIManifestLayer] = [] + var uncompressedDiskSize: UInt64? = nil + + // Process config.json + let cachedConfigPath = workDir.appendingPathComponent("config.json") + var configDigest: String? = nil + var configSize: Int? = nil + + if FileManager.default.fileExists(atPath: cachedConfigPath.path) { + Logger.info("Using cached config.json") + do { + let configData = try Data(contentsOf: cachedConfigPath) + configDigest = "sha256:" + configData.sha256String() + configSize = configData.count + // Try to get uncompressed disk size from cached config + if let vmConfig = try? JSONDecoder().decode(VMConfig.self, from: configData) { + uncompressedDiskSize = vmConfig.diskSize + Logger.info("Found disk size in cached config: \(uncompressedDiskSize ?? 0) bytes") + } + } catch { + Logger.error("Failed to read cached config.json: \(error). Will re-process.") + // Force re-processing by leaving configDigest nil + } + } else if FileManager.default.fileExists(atPath: configPath.path) { + Logger.info("Processing config.json") + let configData = try Data(contentsOf: configPath) + configDigest = "sha256:" + configData.sha256String() + configSize = configData.count + try configData.write(to: cachedConfigPath) // Save to cache + // Try to get uncompressed disk size from original config + if let vmConfig = try? JSONDecoder().decode(VMConfig.self, from: configData) { + uncompressedDiskSize = vmConfig.diskSize + Logger.info("Found disk size in config: \(uncompressedDiskSize ?? 0) bytes") + } + } + + if var digest = configDigest, let size = configSize { // Use 'var' to modify if uploaded + if !dryRun { + // Upload only if not in dry-run mode and blob doesn't exist + if !(try await blobExists(repository: "\(self.organization)/\(imageName)", digest: digest, token: token)) { + Logger.info("Uploading config.json blob") + let configData = try Data(contentsOf: cachedConfigPath) // Read from cache for upload + digest = try await uploadBlobFromData( + repository: "\(self.organization)/\(imageName)", + data: configData, + token: token + ) + } else { + Logger.info("Config blob already exists on registry") + } + } + // Add config layer + layers.append(OCIManifestLayer( + mediaType: "application/vnd.oci.image.config.v1+json", + size: size, + digest: digest + )) + } + + // Process nvram.bin + let cachedNvramPath = workDir.appendingPathComponent("nvram.bin") + var nvramDigest: String? = nil + var nvramSize: Int? = nil + + if FileManager.default.fileExists(atPath: cachedNvramPath.path) { + Logger.info("Using cached nvram.bin") + do { + let nvramData = try Data(contentsOf: cachedNvramPath) + nvramDigest = "sha256:" + nvramData.sha256String() + nvramSize = nvramData.count + } catch { + Logger.error("Failed to read cached nvram.bin: \(error). Will re-process.") + } + } else if FileManager.default.fileExists(atPath: nvramPath.path) { + Logger.info("Processing nvram.bin") + let nvramData = try Data(contentsOf: nvramPath) + nvramDigest = "sha256:" + nvramData.sha256String() + nvramSize = nvramData.count + try nvramData.write(to: cachedNvramPath) // Save to cache + } + + if var digest = nvramDigest, let size = nvramSize { // Use 'var' + if !dryRun { + // Upload only if not in dry-run mode and blob doesn't exist + if !(try await blobExists(repository: "\(self.organization)/\(imageName)", digest: digest, token: token)) { + Logger.info("Uploading nvram.bin blob") + let nvramData = try Data(contentsOf: cachedNvramPath) // Read from cache + digest = try await uploadBlobFromData( + repository: "\(self.organization)/\(imageName)", + data: nvramData, + token: token + ) + } else { + Logger.info("NVRAM blob already exists on registry") + } + } + // Add nvram layer + layers.append(OCIManifestLayer( + mediaType: "application/octet-stream", + size: size, + digest: digest + )) + } + + // Process disk.img + if FileManager.default.fileExists(atPath: diskPath.path) { + let diskAttributes = try FileManager.default.attributesOfItem(atPath: diskPath.path) + let diskSize = diskAttributes[.size] as? UInt64 ?? 0 + let actualDiskSize = uncompressedDiskSize ?? diskSize + Logger.info("Processing disk.img in chunks", metadata: ["disk_path": diskPath.path, "disk_size": "\(diskSize) bytes", "actual_size": "\(actualDiskSize) bytes", "chunk_size": "\(chunkSizeMb)MB"]) + let chunksDir = workDir.appendingPathComponent("disk.img.parts") + try FileManager.default.createDirectory(at: chunksDir, withIntermediateDirectories: true) + let chunkSizeBytes = chunkSizeMb * 1024 * 1024 + let totalChunks = Int((diskSize + UInt64(chunkSizeBytes) - 1) / UInt64(chunkSizeBytes)) + Logger.info("Splitting disk into \(totalChunks) chunks") + let fileHandle = try FileHandle(forReadingFrom: diskPath) + defer { try? fileHandle.close() } + var pushedDiskLayers: [(index: Int, layer: OCIManifestLayer)] = [] + var diskChunks: [(index: Int, path: URL, digest: String)] = [] + + try await withThrowingTaskGroup(of: (Int, OCIManifestLayer, URL, String).self) { group in + let maxConcurrency = 4 + for chunkIndex in 0..= maxConcurrency { if let res = try await group.next() { pushedDiskLayers.append((res.0, res.1)); diskChunks.append((res.0, res.2, res.3)) } } + group.addTask { [token, verbose, dryRun, organization, imageName] in + let chunkIndex = chunkIndex + let chunkPath = chunksDir.appendingPathComponent("chunk.\(chunkIndex)") + let metadataPath = chunksDir.appendingPathComponent("chunk_metadata.\(chunkIndex).json") + var layer: OCIManifestLayer? = nil + var finalCompressedDigest: String? = nil + if FileManager.default.fileExists(atPath: metadataPath.path), FileManager.default.fileExists(atPath: chunkPath.path) { + do { + let metadataData = try Data(contentsOf: metadataPath) + let metadata = try JSONDecoder().decode(ChunkMetadata.self, from: metadataData) + Logger.info("Resuming chunk \(chunkIndex + 1)/\(totalChunks) from cache") + finalCompressedDigest = metadata.compressedDigest + if !dryRun { if !(try await self.blobExists(repository: "\(organization)/\(imageName)", digest: metadata.compressedDigest, token: token)) { Logger.info("Uploading cached chunk \(chunkIndex + 1) blob"); _ = try await self.uploadBlobFromPath(repository: "\(organization)/\(imageName)", path: chunkPath, digest: metadata.compressedDigest, token: token) } else { Logger.info("Chunk \(chunkIndex + 1) blob already exists on registry") } } + layer = OCIManifestLayer(mediaType: "application/octet-stream+lz4", size: metadata.compressedSize, digest: metadata.compressedDigest, uncompressedSize: metadata.uncompressedSize, uncompressedContentDigest: metadata.uncompressedDigest) + } catch { Logger.info("Failed to load cached metadata/chunk for index \(chunkIndex): \(error). Re-processing."); finalCompressedDigest = nil; layer = nil } + } + if layer == nil { + Logger.info("Processing chunk \(chunkIndex + 1)/\(totalChunks)") + let localFileHandle = try FileHandle(forReadingFrom: diskPath) + defer { try? localFileHandle.close() } + try localFileHandle.seek(toOffset: UInt64(chunkIndex * chunkSizeBytes)) + let chunkData = try localFileHandle.read(upToCount: chunkSizeBytes) ?? Data() + let uncompressedSize = UInt64(chunkData.count) + let uncompressedDigest = "sha256:" + chunkData.sha256String() + let compressedData = try (chunkData as NSData).compressed(using: .lz4) as Data + let compressedSize = compressedData.count + let compressedDigest = "sha256:" + compressedData.sha256String() + try compressedData.write(to: chunkPath) + let metadata = ChunkMetadata(uncompressedDigest: uncompressedDigest, uncompressedSize: uncompressedSize, compressedDigest: compressedDigest, compressedSize: compressedSize) + let metadataData = try JSONEncoder().encode(metadata) + try metadataData.write(to: metadataPath) + finalCompressedDigest = compressedDigest + if !dryRun { if !(try await self.blobExists(repository: "\(organization)/\(imageName)", digest: compressedDigest, token: token)) { Logger.info("Uploading processed chunk \(chunkIndex + 1) blob"); _ = try await self.uploadBlobFromPath(repository: "\(organization)/\(imageName)", path: chunkPath, digest: compressedDigest, token: token) } else { Logger.info("Chunk \(chunkIndex + 1) blob already exists on registry (processed fresh)") } } + layer = OCIManifestLayer(mediaType: "application/octet-stream+lz4", size: compressedSize, digest: compressedDigest, uncompressedSize: uncompressedSize, uncompressedContentDigest: uncompressedDigest) + } + guard let finalLayer = layer, let finalDigest = finalCompressedDigest else { throw PushError.blobUploadFailed } + if verbose { Logger.info("Finished chunk \(chunkIndex + 1)/\(totalChunks)") } + return (chunkIndex, finalLayer, chunkPath, finalDigest) + } + } + for try await (index, layer, path, digest) in group { pushedDiskLayers.append((index, layer)); diskChunks.append((index, path, digest)) } + } + layers.append(contentsOf: pushedDiskLayers.sorted { $0.index < $1.index }.map { $0.layer }) + diskChunks.sort { $0.index < $1.index } + Logger.info("All disk chunks processed successfully") + + // --- Calculate Total Upload Size & Initialize Tracker --- + if !dryRun { + var totalUploadSizeBytes: Int64 = 0 + var totalUploadFiles: Int = 0 + // Add config size if it exists + if let size = configSize { + totalUploadSizeBytes += Int64(size) + totalUploadFiles += 1 + } + // Add nvram size if it exists + if let size = nvramSize { + totalUploadSizeBytes += Int64(size) + totalUploadFiles += 1 + } + // Add sizes of all compressed disk chunks + let allChunkSizes = diskChunks.compactMap { try? FileManager.default.attributesOfItem(atPath: $0.path.path)[.size] as? Int64 ?? 0 } + totalUploadSizeBytes += allChunkSizes.reduce(0, +) + totalUploadFiles += totalChunks // Use totalChunks calculated earlier + + if totalUploadSizeBytes > 0 { + Logger.info("Initializing upload progress: \(totalUploadFiles) files, total size: \(ByteCountFormatter.string(fromByteCount: totalUploadSizeBytes, countStyle: .file))") + await uploadProgress.setTotal(totalUploadSizeBytes, files: totalUploadFiles) + // Print initial progress bar + print("[░░░░░░░░░░░░░░░░░░░░] 0% (0/\(totalUploadFiles)) | Initializing upload... | ETA: calculating... ") + fflush(stdout) + } else { + Logger.info("No files marked for upload.") + } + } + // --- End Size Calculation & Init --- + + // Perform reassembly verification if requested in dry-run mode + if dryRun && reassemble { + Logger.info("=== REASSEMBLY MODE ===") + Logger.info("Reassembling chunks to verify integrity...") + let reassemblyDir = workDir.appendingPathComponent("reassembly") + try FileManager.default.createDirectory(at: reassemblyDir, withIntermediateDirectories: true) + let reassembledFile = reassemblyDir.appendingPathComponent("reassembled_disk.img") + Logger.info("Pre-allocating sparse file of \(ByteCountFormatter.string(fromByteCount: Int64(actualDiskSize), countStyle: .file))...") + if FileManager.default.fileExists(atPath: reassembledFile.path) { try FileManager.default.removeItem(at: reassembledFile) } + guard FileManager.default.createFile(atPath: reassembledFile.path, contents: nil) else { throw PushError.invalidURL } + let outputHandle = try FileHandle(forWritingTo: reassembledFile) + defer { try? outputHandle.close() } + try outputHandle.truncate(atOffset: actualDiskSize) + var currentOffset: UInt64 = 0 + for (index, cachedChunkPath, _) in diskChunks { + Logger.info("Decompressing & writing part \(index + 1)/\(diskChunks.count): \(cachedChunkPath.lastPathComponent) at offset \(currentOffset)...") + let decompressedBytesWritten = try decompressChunkAndWriteSparse(inputPath: cachedChunkPath.path, outputHandle: outputHandle, startOffset: currentOffset) + currentOffset += decompressedBytesWritten + } + Logger.info("Verifying reassembled file...") + let originalSize = diskSize + let originalDigest = calculateSHA256(filePath: diskPath.path) + let reassembledAttributes = try FileManager.default.attributesOfItem(atPath: reassembledFile.path) + let reassembledSize = reassembledAttributes[.size] as? UInt64 ?? 0 + let reassembledDigest = calculateSHA256(filePath: reassembledFile.path) + let originalActualSize = getActualDiskUsage(path: diskPath.path) + let reassembledActualSize = getActualDiskUsage(path: reassembledFile.path) + Logger.info("Results:") + Logger.info(" Original size: \(ByteCountFormatter.string(fromByteCount: Int64(originalSize), countStyle: .file)) (\(originalSize) bytes)") + Logger.info(" Reassembled size: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledSize), countStyle: .file)) (\(reassembledSize) bytes)") + Logger.info(" Original digest: \(originalDigest)") + Logger.info(" Reassembled digest: \(reassembledDigest)") + Logger.info(" Original: Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(originalSize), countStyle: .file)), Actual disk usage: \(ByteCountFormatter.string(fromByteCount: Int64(originalActualSize), countStyle: .file))") + Logger.info(" Reassembled: Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledSize), countStyle: .file)), Actual disk usage: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledActualSize), countStyle: .file))") + if originalDigest == reassembledDigest { Logger.info("✅ VERIFICATION SUCCESSFUL: Files are identical") } else { Logger.info("❌ VERIFICATION FAILED: Files differ"); if originalSize != reassembledSize { Logger.info(" Size mismatch: Original \(originalSize) bytes, Reassembled \(reassembledSize) bytes") }; Logger.info("Attempting to identify differences..."); Logger.info("NOTE: This might be a sparse file issue. The content may be identical, but sparse regions"); Logger.info(" may be handled differently between the original and reassembled files."); if originalActualSize > 0 { let diffPercentage = ((Double(reassembledActualSize) - Double(originalActualSize)) / Double(originalActualSize)) * 100.0; Logger.info(" Disk usage difference: \(String(format: "%.2f", diffPercentage))%"); if diffPercentage < -40 { Logger.info(" ⚠️ WARNING: Reassembled disk uses significantly less space (>40% difference)."); Logger.info(" This indicates sparse regions weren't properly preserved and may affect VM functionality.") } else if diffPercentage < -10 { Logger.info(" ⚠️ WARNING: Reassembled disk uses less space (10-40% difference)."); Logger.info(" Some sparse regions may not be properly preserved but VM might still function correctly.") } else if diffPercentage > 10 { Logger.info(" ⚠️ WARNING: Reassembled disk uses more space (>10% difference)."); Logger.info(" This is unusual and may indicate improper sparse file handling.") } else { Logger.info(" ✓ Disk usage difference is minimal (<10%). VM likely to function correctly.") } } } + Logger.info("Reassembled file is available at: \(reassembledFile.path)") + if originalDigest != reassembledDigest { Logger.info(""); Logger.info("===== ATTEMPTING RECOVERY ACTION ====="); Logger.info("Since verification failed, trying direct copy as a fallback method."); let fallbackFile = reassemblyDir.appendingPathComponent("fallback_disk.img"); Logger.info("Creating fallback disk image at: \(fallbackFile.path)"); let rsyncProcess = Process(); rsyncProcess.executableURL = URL(fileURLWithPath: "/usr/bin/rsync"); rsyncProcess.arguments = ["-aS", "--progress", diskPath.path, fallbackFile.path]; try rsyncProcess.run(); rsyncProcess.waitUntilExit(); if rsyncProcess.terminationStatus == 0 { Logger.info("Direct copy completed. You may want to try using this fallback disk image"); Logger.info("instead if the reassembled one has issues: \(fallbackFile.path)") } else { Logger.info("Direct copy failed. Attempting with cp -c command..."); let cpProcess = Process(); cpProcess.executableURL = URL(fileURLWithPath: "/bin/cp"); cpProcess.arguments = ["-c", diskPath.path, fallbackFile.path]; try cpProcess.run(); cpProcess.waitUntilExit(); if cpProcess.terminationStatus == 0 { Logger.info("Direct copy completed with cp -c. Fallback image available at: \(fallbackFile.path)") } else { Logger.info("All recovery attempts failed.") } } } + } + } + + // --- Manifest Creation & Push --- + let manifest = createManifest( + layers: layers, + configLayerIndex: layers.firstIndex(where: { $0.mediaType == "application/vnd.oci.image.config.v1+json" }), + uncompressedDiskSize: uncompressedDiskSize + ) + + // Push manifest only if not in dry-run mode + if !dryRun { + Logger.info("Pushing manifest(s)") // Updated log + // Serialize the manifest dictionary to Data first + let manifestData = try JSONSerialization.data(withJSONObject: manifest, options: [.prettyPrinted, .sortedKeys]) + + // Loop through tags to push the same manifest data + for tag in tags { + Logger.info("Pushing manifest for tag: \(tag)") + try await pushManifest( + repository: "\(self.organization)/\(imageName)", + tag: tag, // Use the current tag from the loop + manifest: manifestData, // Pass the serialized Data + token: token // Token should be in scope here now + ) + } + } + + // Print final upload summary if not dry run + if !dryRun { + let stats = await uploadProgress.getUploadStats() + Logger.info("\n\(stats.formattedSummary())") // Add newline for separation + } + + // Clean up cache directory only on successful non-dry-run push + } + + private func createManifest(layers: [OCIManifestLayer], configLayerIndex: Int?, uncompressedDiskSize: UInt64?) -> [String: Any] { + var manifest: [String: Any] = [ + "schemaVersion": 2, + "mediaType": "application/vnd.oci.image.manifest.v1+json", + "layers": layers.map { layer in + var layerDict: [String: Any] = [ + "mediaType": layer.mediaType, + "size": layer.size, + "digest": layer.digest + ] + + if let uncompressedSize = layer.uncompressedSize { + var annotations: [String: String] = [:] + annotations["org.trycua.lume.uncompressed-size"] = "\(uncompressedSize)" // Updated prefix + + if let digest = layer.uncompressedContentDigest { + annotations["org.trycua.lume.uncompressed-content-digest"] = digest // Updated prefix + } + + layerDict["annotations"] = annotations + } + + return layerDict + } + ] + + // Add config reference if available + if let configIndex = configLayerIndex { + let configLayer = layers[configIndex] + manifest["config"] = [ + "mediaType": configLayer.mediaType, + "size": configLayer.size, + "digest": configLayer.digest + ] + } + + // Add annotations + var annotations: [String: String] = [:] + annotations["org.trycua.lume.upload-time"] = ISO8601DateFormatter().string(from: Date()) // Updated prefix + + if let diskSize = uncompressedDiskSize { + annotations["org.trycua.lume.uncompressed-disk-size"] = "\(diskSize)" // Updated prefix + } + + manifest["annotations"] = annotations + + return manifest + } + + private func uploadBlobFromData(repository: String, data: Data, token: String) async throws -> String { + // Calculate digest + let digest = "sha256:" + data.sha256String() + + // Check if blob already exists + if try await blobExists(repository: repository, digest: digest, token: token) { + Logger.info("Blob already exists: \(digest)") + return digest + } + + // Initiate upload + let uploadURL = try await startBlobUpload(repository: repository, token: token) + + // Upload blob + try await uploadBlob(url: uploadURL, data: data, digest: digest, token: token) + + // Report progress + await uploadProgress.addProgress(Int64(data.count)) + + return digest + } + + private func uploadBlobFromPath(repository: String, path: URL, digest: String, token: String) async throws -> String { + // Check if blob already exists + if try await blobExists(repository: repository, digest: digest, token: token) { + Logger.info("Blob already exists: \(digest)") + return digest + } + + // Initiate upload + let uploadURL = try await startBlobUpload(repository: repository, token: token) + + // Load data from file + let data = try Data(contentsOf: path) + + // Upload blob + try await uploadBlob(url: uploadURL, data: data, digest: digest, token: token) + + // Report progress + await uploadProgress.addProgress(Int64(data.count)) + + return digest + } + + private func blobExists(repository: String, digest: String, token: String) async throws -> Bool { + let url = URL(string: "https://\(registry)/v2/\(repository)/blobs/\(digest)")! + var request = URLRequest(url: url) + request.httpMethod = "HEAD" + request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") + + let (_, response) = try await URLSession.shared.data(for: request) + + if let httpResponse = response as? HTTPURLResponse { + return httpResponse.statusCode == 200 + } + + return false + } + + private func startBlobUpload(repository: String, token: String) async throws -> URL { + let url = URL(string: "https://\(registry)/v2/\(repository)/blobs/uploads/")! + var request = URLRequest(url: url) + request.httpMethod = "POST" + request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") + request.setValue("0", forHTTPHeaderField: "Content-Length") // Explicitly set Content-Length to 0 for POST + + let (_, response) = try await URLSession.shared.data(for: request) + + guard let httpResponse = response as? HTTPURLResponse, + httpResponse.statusCode == 202, + let locationString = httpResponse.value(forHTTPHeaderField: "Location") else { + // Log response details on failure + let responseBody = String(data: (try? await URLSession.shared.data(for: request).0) ?? Data(), encoding: .utf8) ?? "(No Body)" + Logger.error("Failed to initiate blob upload. Status: \( (response as? HTTPURLResponse)?.statusCode ?? 0 ). Headers: \( (response as? HTTPURLResponse)?.allHeaderFields ?? [:] ). Body: \(responseBody)") + throw PushError.uploadInitiationFailed + } + + // Construct the base URL for the registry + guard let baseRegistryURL = URL(string: "https://\(registry)") else { + Logger.error("Failed to create base registry URL from: \(registry)") + throw PushError.invalidURL + } + + // Create the final upload URL, resolving the location against the base URL + guard let uploadURL = URL(string: locationString, relativeTo: baseRegistryURL) else { + Logger.error("Failed to create absolute upload URL from location: \(locationString) relative to base: \(baseRegistryURL.absoluteString)") + throw PushError.invalidURL + } + + Logger.info("Blob upload initiated. Upload URL: \(uploadURL.absoluteString)") + return uploadURL.absoluteURL // Ensure it's absolute + } + + private func uploadBlob(url: URL, data: Data, digest: String, token: String) async throws { + var components = URLComponents(url: url, resolvingAgainstBaseURL: true)! + + // Add digest parameter + var queryItems = components.queryItems ?? [] + queryItems.append(URLQueryItem(name: "digest", value: digest)) + components.queryItems = queryItems + + guard let uploadURL = components.url else { + throw PushError.invalidURL + } + + var request = URLRequest(url: uploadURL) + request.httpMethod = "PUT" + request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") + request.setValue("application/octet-stream", forHTTPHeaderField: "Content-Type") + request.setValue("\(data.count)", forHTTPHeaderField: "Content-Length") + request.httpBody = data + + let (_, response) = try await URLSession.shared.data(for: request) + + guard let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 201 else { + throw PushError.blobUploadFailed + } + } + + private func pushManifest(repository: String, tag: String, manifest: Data, token: String) async throws { + let url = URL(string: "https://\(registry)/v2/\(repository)/manifests/\(tag)")! + var request = URLRequest(url: url) + request.httpMethod = "PUT" + request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") + request.setValue("application/vnd.oci.image.manifest.v1+json", forHTTPHeaderField: "Content-Type") + request.httpBody = manifest + + let (_, response) = try await URLSession.shared.data(for: request) + + guard let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 201 else { + throw PushError.manifestPushFailed + } + } + + private func getCredentialsFromEnvironment() -> (String?, String?) { + let username = ProcessInfo.processInfo.environment["GITHUB_USERNAME"] ?? + ProcessInfo.processInfo.environment["GHCR_USERNAME"] + let password = ProcessInfo.processInfo.environment["GITHUB_TOKEN"] ?? + ProcessInfo.processInfo.environment["GHCR_TOKEN"] + return (username, password) + } + + // Add these helper methods for dry-run and reassemble implementation + + // NEW Helper function using Compression framework and sparse writing + private func decompressChunkAndWriteSparse(inputPath: String, outputHandle: FileHandle, startOffset: UInt64) throws -> UInt64 { + guard FileManager.default.fileExists(atPath: inputPath) else { + Logger.error("Compressed chunk not found at: \(inputPath)") + return 0 // Or throw an error + } + + let sourceData = try Data(contentsOf: URL(fileURLWithPath: inputPath), options: .alwaysMapped) + var currentWriteOffset = startOffset + var totalDecompressedBytes: UInt64 = 0 + var sourceReadOffset = 0 // Keep track of how much compressed data we've provided + + // Use the initializer with the readingFrom closure + let filter = try InputFilter(.decompress, using: .lz4) { (length: Int) -> Data? in + let bytesAvailable = sourceData.count - sourceReadOffset + if bytesAvailable == 0 { + return nil // No more data + } + let bytesToRead = min(length, bytesAvailable) + let chunk = sourceData.subdata(in: sourceReadOffset ..< sourceReadOffset + bytesToRead) + sourceReadOffset += bytesToRead + return chunk + } + + // Process the decompressed output by reading from the filter + while let decompressedData = try filter.readData(ofLength: Self.holeGranularityBytes) { + if decompressedData.isEmpty { break } // End of stream + + // Check if the chunk is all zeros + if decompressedData.count == Self.holeGranularityBytes && decompressedData == Self.zeroChunk { + // It's a zero chunk, just advance the offset, don't write + currentWriteOffset += UInt64(decompressedData.count) + } else { + // Not a zero chunk (or a partial chunk at the end), write it + try outputHandle.seek(toOffset: currentWriteOffset) + try outputHandle.write(contentsOf: decompressedData) + currentWriteOffset += UInt64(decompressedData.count) + } + totalDecompressedBytes += UInt64(decompressedData.count) + } + + // No explicit finalize needed when initialized with source data + + return totalDecompressedBytes + } + + // Helper function to calculate SHA256 hash of a file + private func calculateSHA256(filePath: String) -> String { + guard FileManager.default.fileExists(atPath: filePath) else { + return "file-not-found" + } + + let process = Process() + process.executableURL = URL(fileURLWithPath: "/usr/bin/shasum") + process.arguments = ["-a", "256", filePath] + + let outputPipe = Pipe() + process.standardOutput = outputPipe + + do { + try process.run() + process.waitUntilExit() + + if let data = try outputPipe.fileHandleForReading.readToEnd(), + let output = String(data: data, encoding: .utf8) { + return output.components(separatedBy: " ").first ?? "hash-calculation-failed" + } + } catch { + Logger.error("SHA256 calculation failed: \(error)") + } + + return "hash-calculation-failed" + } } + +actor UploadProgressTracker { + private var totalBytes: Int64 = 0 + private var uploadedBytes: Int64 = 0 // Renamed + private var progressLogger = ProgressLogger(threshold: 0.01) + private var totalFiles: Int = 0 // Keep track of total items + private var completedFiles: Int = 0 // Keep track of completed items + + // Upload speed tracking + private var startTime: Date = Date() + private var lastUpdateTime: Date = Date() + private var lastUpdateBytes: Int64 = 0 + private var speedSamples: [Double] = [] + private var peakSpeed: Double = 0 + private var totalElapsedTime: TimeInterval = 0 + + // Smoothing factor for speed calculation + private var speedSmoothing: Double = 0.3 + private var smoothedSpeed: Double = 0 + + func setTotal(_ total: Int64, files: Int) { + totalBytes = total + totalFiles = files + startTime = Date() + lastUpdateTime = startTime + uploadedBytes = 0 // Reset uploaded bytes + completedFiles = 0 // Reset completed files + smoothedSpeed = 0 + speedSamples = [] + peakSpeed = 0 + totalElapsedTime = 0 + } + + func addProgress(_ bytes: Int64) { + uploadedBytes += bytes + completedFiles += 1 // Increment completed files count + let now = Date() + let elapsed = now.timeIntervalSince(lastUpdateTime) + + // Show first progress update immediately, then throttle updates + let shouldUpdate = (uploadedBytes <= bytes) || (elapsed >= 0.5) || (completedFiles == totalFiles) + + if shouldUpdate && totalBytes > 0 { // Ensure totalBytes is set + let currentSpeed = Double(uploadedBytes - lastUpdateBytes) / max(elapsed, 0.001) + speedSamples.append(currentSpeed) + + // Cap samples array + if speedSamples.count > 20 { + speedSamples.removeFirst(speedSamples.count - 20) + } + + peakSpeed = max(peakSpeed, currentSpeed) + + // Apply exponential smoothing + if smoothedSpeed == 0 { smoothedSpeed = currentSpeed } + else { smoothedSpeed = speedSmoothing * currentSpeed + (1 - speedSmoothing) * smoothedSpeed } + + let recentAvgSpeed = calculateAverageSpeed() + let totalElapsed = now.timeIntervalSince(startTime) + let overallAvgSpeed = totalElapsed > 0 ? Double(uploadedBytes) / totalElapsed : 0 + + let progress = totalBytes > 0 ? Double(uploadedBytes) / Double(totalBytes) : 1.0 // Avoid division by zero + logSpeedProgress( + current: progress, + currentSpeed: currentSpeed, + averageSpeed: recentAvgSpeed, + smoothedSpeed: smoothedSpeed, + overallSpeed: overallAvgSpeed, + peakSpeed: peakSpeed, + context: "Uploading Image" // Changed context + ) + + lastUpdateTime = now + lastUpdateBytes = uploadedBytes + totalElapsedTime = totalElapsed + } + } + + private func calculateAverageSpeed() -> Double { + guard !speedSamples.isEmpty else { return 0 } + var totalWeight = 0.0 + var weightedSum = 0.0 + let samples = speedSamples.suffix(min(8, speedSamples.count)) + for (index, speed) in samples.enumerated() { + let weight = Double(index + 1) + weightedSum += speed * weight + totalWeight += weight + } + return totalWeight > 0 ? weightedSum / totalWeight : 0 + } + + // Use the UploadStats struct + func getUploadStats() -> UploadStats { + let avgSpeed = totalElapsedTime > 0 ? Double(uploadedBytes) / totalElapsedTime : 0 + return UploadStats( + totalBytes: totalBytes, + uploadedBytes: uploadedBytes, // Renamed + elapsedTime: totalElapsedTime, + averageSpeed: avgSpeed, + peakSpeed: peakSpeed + ) + } + + private func logSpeedProgress( + current: Double, + currentSpeed: Double, + averageSpeed: Double, + smoothedSpeed: Double, + overallSpeed: Double, + peakSpeed: Double, + context: String + ) { + let progressPercent = Int(current * 100) + // let currentSpeedStr = formatByteSpeed(currentSpeed) // Removed unused + let avgSpeedStr = formatByteSpeed(averageSpeed) + // let peakSpeedStr = formatByteSpeed(peakSpeed) // Removed unused + let remainingBytes = totalBytes - uploadedBytes + let speedForEta = max(smoothedSpeed, averageSpeed * 0.8) + let etaSeconds = speedForEta > 0 ? Double(remainingBytes) / speedForEta : 0 + let etaStr = formatTimeRemaining(etaSeconds) + let progressBar = createProgressBar(progress: current) + let fileProgress = "(\(completedFiles)/\(totalFiles))" // Add file count + + print( + "\r\(progressBar) \(progressPercent)% \(fileProgress) | Speed: \(avgSpeedStr) (Avg) | ETA: \(etaStr) ", // Simplified output + terminator: "") + fflush(stdout) + } + + // Helper methods (createProgressBar, formatByteSpeed, formatTimeRemaining) remain the same + private func createProgressBar(progress: Double, width: Int = 30) -> String { + let completedWidth = Int(progress * Double(width)) + let remainingWidth = width - completedWidth + let completed = String(repeating: "█", count: completedWidth) + let remaining = String(repeating: "░", count: remainingWidth) + return "[\(completed)\(remaining)]" + } + private func formatByteSpeed(_ bytesPerSecond: Double) -> String { + let units = ["B/s", "KB/s", "MB/s", "GB/s"] + var speed = bytesPerSecond + var unitIndex = 0 + while speed > 1024 && unitIndex < units.count - 1 { speed /= 1024; unitIndex += 1 } + return String(format: "%.1f %@", speed, units[unitIndex]) + } + private func formatTimeRemaining(_ seconds: Double) -> String { + if seconds.isNaN || seconds.isInfinite || seconds <= 0 { return "calculating..." } + let hours = Int(seconds) / 3600 + let minutes = (Int(seconds) % 3600) / 60 + let secs = Int(seconds) % 60 + if hours > 0 { return String(format: "%d:%02d:%02d", hours, minutes, secs) } + else { return String(format: "%d:%02d", minutes, secs) } + } +} + diff --git a/libs/lume/src/LumeController.swift b/libs/lume/src/LumeController.swift index 4cb8253d..1329f8c5 100644 --- a/libs/lume/src/LumeController.swift +++ b/libs/lume/src/LumeController.swift @@ -452,6 +452,77 @@ final class LumeController { } } + @MainActor + public func pushImage( + name: String, + imageName: String, + tags: [String], + registry: String, + organization: String, + storage: String? = nil, + chunkSizeMb: Int = 512, + verbose: Bool = false, + dryRun: Bool = false, + reassemble: Bool = false + ) async throws { + do { + Logger.info( + "Pushing VM to registry", + metadata: [ + "name": name, + "imageName": imageName, + "tags": "\(tags.joined(separator: ", "))", + "registry": registry, + "organization": organization, + "location": storage ?? "default", + "chunk_size": "\(chunkSizeMb)MB", + "dry_run": "\(dryRun)", + "reassemble": "\(reassemble)" + ]) + + try validatePushParameters( + name: name, + imageName: imageName, + tags: tags, + registry: registry, + organization: organization + ) + + // Find the actual location of the VM + let actualLocation = try self.validateVMExists(name, storage: storage) + + // Get the VM directory + let vmDir = try home.getVMDirectory(name, storage: actualLocation) + + // Use ImageContainerRegistry to push the VM + let imageContainerRegistry = ImageContainerRegistry( + registry: registry, organization: organization) + + try await imageContainerRegistry.push( + vmDirPath: vmDir.dir.path, + imageName: imageName, + tags: tags, + chunkSizeMb: chunkSizeMb, + verbose: verbose, + dryRun: dryRun, + reassemble: reassemble + ) + + Logger.info( + "VM pushed successfully", + metadata: [ + "name": name, + "imageName": imageName, + "tags": "\(tags.joined(separator: ", "))", + "registry": registry, + "organization": organization, + ]) + } catch { + Logger.error("Failed to push VM", metadata: ["error": error.localizedDescription]) + throw error + } + } + @MainActor public func pruneImages() async throws { Logger.info("Pruning cached images") @@ -755,4 +826,31 @@ final class LumeController { break } } + + private func validatePushParameters( + name: String, + imageName: String, + tags: [String], + registry: String, + organization: String + ) throws { + guard !name.isEmpty else { + throw ValidationError("VM name cannot be empty") + } + guard !imageName.isEmpty else { + throw ValidationError("Image name cannot be empty") + } + guard !tags.isEmpty else { + throw ValidationError("At least one tag must be provided.") + } + guard !registry.isEmpty else { + throw ValidationError("Registry cannot be empty") + } + guard !organization.isEmpty else { + throw ValidationError("Organization cannot be empty") + } + + // Verify VM exists (this will throw if not found) + _ = try self.validateVMExists(name) + } } diff --git a/libs/lume/src/Server/Handlers.swift b/libs/lume/src/Server/Handlers.swift index aac16e80..c968359a 100644 --- a/libs/lume/src/Server/Handlers.swift +++ b/libs/lume/src/Server/Handlers.swift @@ -288,6 +288,54 @@ extension Server { } } + func handlePush(_ body: Data?) async throws -> HTTPResponse { + guard let body = body, + let request = try? JSONDecoder().decode(PushRequest.self, from: body) + else { + return HTTPResponse( + statusCode: .badRequest, + headers: ["Content-Type": "application/json"], + body: try JSONEncoder().encode(APIError(message: "Invalid request body")) + ) + } + + // Trigger push asynchronously, return Accepted immediately + Task.detached { @MainActor @Sendable in + do { + let vmController = LumeController() + try await vmController.pushImage( + name: request.name, + imageName: request.imageName, + tags: request.tags, + registry: request.registry, + organization: request.organization, + storage: request.storage, + chunkSizeMb: request.chunkSizeMb, + verbose: false, // Verbose typically handled by server logs + dryRun: false, // Default API behavior is likely non-dry-run + reassemble: false // Default API behavior is likely non-reassemble + ) + Logger.info("Background push completed successfully for image: \(request.imageName):\(request.tags.joined(separator: ","))") + } catch { + Logger.error( + "Background push failed for image: \(request.imageName):\(request.tags.joined(separator: ","))", + metadata: ["error": error.localizedDescription] + ) + } + } + + return HTTPResponse( + statusCode: .accepted, + headers: ["Content-Type": "application/json"], + body: try JSONEncoder().encode([ + "message": AnyEncodable("Push initiated in background"), + "name": AnyEncodable(request.name), + "imageName": AnyEncodable(request.imageName), + "tags": AnyEncodable(request.tags), + ]) + ) + } + func handleGetImages(_ request: HTTPRequest) async throws -> HTTPResponse { let pathAndQuery = request.path.split(separator: "?", maxSplits: 1) let queryParams = diff --git a/libs/lume/src/Server/Requests.swift b/libs/lume/src/Server/Requests.swift index 19291072..da0bf681 100644 --- a/libs/lume/src/Server/Requests.swift +++ b/libs/lume/src/Server/Requests.swift @@ -102,3 +102,31 @@ struct CloneRequest: Codable { let sourceLocation: String? let destLocation: String? } + +struct PushRequest: Codable { + let name: String // Name of the local VM + let imageName: String // Base name for the image in the registry + let tags: [String] // List of tags to push + var registry: String // Registry URL + var organization: String // Organization/user in the registry + let storage: String? // Optional VM storage location + var chunkSizeMb: Int // Chunk size + // dryRun and reassemble are less common for API, default to false? + // verbose is usually handled by server logging + + enum CodingKeys: String, CodingKey { + case name, imageName, tags, registry, organization, storage, chunkSizeMb + } + + // Provide default values for optional fields during decoding + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + name = try container.decode(String.self, forKey: .name) + imageName = try container.decode(String.self, forKey: .imageName) + tags = try container.decode([String].self, forKey: .tags) + registry = try container.decodeIfPresent(String.self, forKey: .registry) ?? "ghcr.io" + organization = try container.decodeIfPresent(String.self, forKey: .organization) ?? "trycua" + storage = try container.decodeIfPresent(String.self, forKey: .storage) + chunkSizeMb = try container.decodeIfPresent(Int.self, forKey: .chunkSizeMb) ?? 512 + } +} diff --git a/libs/lume/src/Server/Responses.swift b/libs/lume/src/Server/Responses.swift index e6d3bfe9..12a5b4c3 100644 --- a/libs/lume/src/Server/Responses.swift +++ b/libs/lume/src/Server/Responses.swift @@ -4,6 +4,19 @@ struct APIError: Codable { let message: String } +// Helper struct to encode mixed-type dictionaries +struct AnyEncodable: Encodable { + private let value: Encodable + + init(_ value: Encodable) { + self.value = value + } + + func encode(to encoder: Encoder) throws { + try value.encode(to: encoder) + } +} + extension HTTPResponse { static func json(_ value: T) throws -> HTTPResponse { let data = try JSONEncoder().encode(value) diff --git a/libs/lume/src/Server/Server.swift b/libs/lume/src/Server/Server.swift index 4ed671c5..71db4a75 100644 --- a/libs/lume/src/Server/Server.swift +++ b/libs/lume/src/Server/Server.swift @@ -261,6 +261,12 @@ final class Server { } return try await self.handleSetDefaultLocation(name) }), + Route( + method: "POST", path: "/vms/push", + handler: { [weak self] request in + guard let self else { throw HTTPError.internalError } + return try await self.handlePush(request.body) + }), ] } diff --git a/libs/lume/src/Utils/CommandRegistry.swift b/libs/lume/src/Utils/CommandRegistry.swift index a7e2a7bc..4d128971 100644 --- a/libs/lume/src/Utils/CommandRegistry.swift +++ b/libs/lume/src/Utils/CommandRegistry.swift @@ -5,6 +5,7 @@ enum CommandRegistry { [ Create.self, Pull.self, + Push.self, Images.self, Clone.self, Get.self, From b10d310e03aa1ba894da441c845eec53e0bfde4b Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sat, 19 Apr 2025 23:04:40 -0700 Subject: [PATCH 09/43] Fix pull --- .../ImageContainerRegistry.swift | 482 +++++------------- 1 file changed, 140 insertions(+), 342 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 3ba7d543..8b668db7 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -102,6 +102,28 @@ struct ImageMetadata: Codable { let timestamp: Date } +// Actor to safely collect disk part information from concurrent tasks +actor DiskPartsCollector { + private var diskParts: [(Int, URL)] = [] + private var partCounter = 0 + + // Adds a part and returns its assigned sequential number + func addPart(url: URL) -> Int { + partCounter += 1 + let partNum = partCounter + diskParts.append((partNum, url)) + return partNum + } + + func getSortedParts() -> [(Int, URL)] { + return diskParts.sorted { $0.0 < $1.0 } + } + + func getTotalParts() -> Int { + return partCounter + } +} + actor ProgressTracker { private var totalBytes: Int64 = 0 private var downloadedBytes: Int64 = 0 @@ -716,8 +738,8 @@ class ImageContainerRegistry: @unchecked Sendable { "[░░░░░░░░░░░░░░░░░░░░] 0% | Initializing downloads... | ETA: calculating... ") fflush(stdout) - var diskParts: [(Int, URL)] = [] - var totalParts = 0 + // Instantiate the collector + let diskPartsCollector = DiskPartsCollector() // Adaptive concurrency based on system capabilities let memoryConstrained = determineIfMemoryConstrained() @@ -742,85 +764,97 @@ class ImageContainerRegistry: @unchecked Sendable { await counter.decrement() } - if let partInfo = extractPartInfo(from: layer.mediaType) { - let (partNum, total) = partInfo - totalParts = total + // Check both media type and safely unwrap part info + if layer.mediaType == "application/octet-stream+lz4" { + let size = layer.size + // Declare cachedLayer and digest here let cachedLayer = getCachedLayerPath( manifestId: manifestId, digest: layer.digest) let digest = layer.digest - let size = layer.size - // For memory-optimized mode - point directly to cache when possible - if memoryConstrained - && FileManager.default.fileExists(atPath: cachedLayer.path) + // For memory-constrained mode - point directly to cache when possible + if memoryConstrained // Line 777 + && FileManager.default.fileExists(atPath: cachedLayer.path) { - // Use the cached file directly - diskParts.append((partNum, cachedLayer)) + // Use the cached file *directly* without copying to temp + // Add the *cached* layer path to the collector + let partNum = await diskPartsCollector.addPart(url: cachedLayer) // Use the collector + Logger.info("Using cached layer directly for part #\(partNum): \(cachedLayer.lastPathComponent)") - // Still need to account for progress - group.addTask { [self] in - await counter.increment() - await downloadProgress.addProgress(Int64(size)) - await counter.decrement() - return Int64(size) - } - continue - } else { - let partURL = tempDownloadDir.appendingPathComponent( - "disk.img.part.\(partNum)") - diskParts.append((partNum, partURL)) - - group.addTask { [self] in - await counter.increment() + // Account for progress directly, no need for a separate task + await downloadProgress.addProgress(Int64(size)) + + // No task was added, so no need to increment/decrement counter here + + continue // Skip the download task group logic below + } else { + // Not memory constrained OR file not cached + // Add a task to handle copy/download and adding to collector + group.addTask { [self] in + await counter.increment() // Increment counter for the task + let finalPath: URL if FileManager.default.fileExists(atPath: cachedLayer.path) { - try FileManager.default.copyItem(at: cachedLayer, to: partURL) - await downloadProgress.addProgress(Int64(size)) - } else { - // Check if this layer is already being downloaded and we're not skipping cache - if isDownloading(digest) { - try await waitForExistingDownload( - digest, cachedLayer: cachedLayer) - if FileManager.default.fileExists(atPath: cachedLayer.path) - { - try FileManager.default.copyItem( - at: cachedLayer, to: partURL) - await downloadProgress.addProgress(Int64(size)) - return Int64(size) - } - } + // If cached, copy to temp and use temp path for reassembly later + let tempPartURL = tempDownloadDir.appendingPathComponent("disk.img.part.\(UUID().uuidString)") // Unique temp path + try FileManager.default.copyItem(at: cachedLayer, to: tempPartURL) + await downloadProgress.addProgress(Int64(size)) // Update progress after copy + finalPath = tempPartURL + } else { + // If not cached, download to temp path + let tempPartURL = tempDownloadDir.appendingPathComponent("disk.img.part.\(UUID().uuidString)") // Unique temp path + + // Check if this layer is already being downloaded + if isDownloading(digest) { + try await waitForExistingDownload(digest, cachedLayer: cachedLayer) + // If it finished downloading while waiting, copy from cache to temp + if FileManager.default.fileExists(atPath: cachedLayer.path) { + try FileManager.default.copyItem(at: cachedLayer, to: tempPartURL) + await downloadProgress.addProgress(Int64(size)) // Update progress + finalPath = tempPartURL + } else { + // If still not available after waiting (should be rare), proceed to download + markDownloadStarted(digest) + try await self.downloadLayer( + repository: "\(self.organization)/\(imageName)", + digest: digest, + mediaType: layer.mediaType, // Use correct mediaType + token: token, + to: tempPartURL, + maxRetries: 5, + progress: downloadProgress, // Progress updated inside downloadLayer + manifestId: manifestId + ) + // downloadLayer handles caching and markDownloadComplete + finalPath = tempPartURL + } + } else { + // Start new download + markDownloadStarted(digest) + try await self.downloadLayer( + repository: "\(self.organization)/\(imageName)", + digest: digest, + mediaType: layer.mediaType, // Use correct mediaType + token: token, + to: tempPartURL, + maxRetries: 5, + progress: downloadProgress, // Progress updated inside downloadLayer + manifestId: manifestId + ) + // downloadLayer handles caching and markDownloadComplete + finalPath = tempPartURL + } + } + + // Add the final determined path (temp path) to the collector + let partNum = await diskPartsCollector.addPart(url: finalPath) + Logger.info("Assigned part #\(partNum) for path: \(finalPath.lastPathComponent)") - // Start new download - markDownloadStarted(digest) - - try await self.downloadLayer( - repository: "\(self.organization)/\(imageName)", - digest: digest, - mediaType: layer.mediaType, - token: token, - to: partURL, - maxRetries: 5, - progress: downloadProgress, - manifestId: manifestId - ) - - // Cache the downloaded layer if caching is enabled - if cachingEnabled { - if FileManager.default.fileExists(atPath: cachedLayer.path) - { - try FileManager.default.removeItem(at: cachedLayer) - } - try FileManager.default.copyItem( - at: partURL, to: cachedLayer) - } - markDownloadComplete(digest) - } - - await counter.decrement() - return Int64(size) - } - continue + await counter.decrement() // Decrement counter + return Int64(size) + } + continue // Ensure we move to the next layer after adding task } } else { let mediaType = layer.mediaType @@ -894,7 +928,14 @@ class ImageContainerRegistry: @unchecked Sendable { // Wait for remaining tasks for try await _ in group {} - } + } // End TaskGroup + + // --- Safely retrieve parts AFTER TaskGroup --- + let diskParts = await diskPartsCollector.getSortedParts() + let totalParts = await diskPartsCollector.getTotalParts() + Logger.info("Finished processing layers. Found \(totalParts) disk parts.") + // --- End retrieving parts --- + Logger.info("") // New line after progress // Display download statistics @@ -1464,23 +1505,22 @@ class ImageContainerRegistry: @unchecked Sendable { { Logger.info("Copying from cache...") - var diskPartSources: [(Int, URL)] = [] - var totalParts = 0 + // Instantiate collector + let diskPartsCollector = DiskPartsCollector() // First identify disk parts and non-disk files for layer in manifest.layers { let cachedLayer = getCachedLayerPath(manifestId: manifestId, digest: layer.digest) - if let partInfo = extractPartInfo(from: layer.mediaType) { - let (partNum, total) = partInfo - totalParts = total - // Just store the reference to source instead of copying - diskPartSources.append((partNum, cachedLayer)) - } else { + // Check if it's a disk chunk layer based on media type + if layer.mediaType == "application/octet-stream+lz4" { + // It's a disk chunk - Add to collector + _ = await diskPartsCollector.addPart(url: cachedLayer) // Ignore return value + } + else { + // Handle non-disk layers (config, nvram) let fileName: String switch layer.mediaType { - case "application/vnd.oci.image.layer.v1.tar", "application/octet-stream+gzip": - fileName = "disk.img" case "application/vnd.oci.image.config.v1+json": fileName = "config.json" case "application/octet-stream": @@ -1496,8 +1536,14 @@ class ImageContainerRegistry: @unchecked Sendable { } } + // --- Safely retrieve parts AFTER loop --- + let diskPartSources = await diskPartsCollector.getSortedParts() + let totalParts = await diskPartsCollector.getTotalParts() + Logger.info("Found \(totalParts) disk parts in cache.") + // --- End retrieving parts --- + // Reassemble disk parts if needed - if !diskPartSources.isEmpty { + if !diskPartSources.isEmpty { // Use the retrieved array // Get the uncompressed size from cached config let configDigest = manifest.config?.digest let cachedConfigPath = @@ -1588,277 +1634,29 @@ class ImageContainerRegistry: @unchecked Sendable { for partNum in 1...totalParts { // Find the original layer info for this part number guard - let layer = manifest.layers.first(where: { layer in - if let info = extractPartInfo(from: layer.mediaType) { - return info.partNum == partNum - } - return false - }), + // Find layer by index approximated during collection, not media type parts let (_, sourceURL) = diskPartSources.first(where: { $0.0 == partNum }) else { throw PullError.missingPart(partNum) } - let layerMediaType = layer.mediaType // Extract mediaType here Logger.info( - "Processing part \(partNum) of \(totalParts) from cache: \(sourceURL.lastPathComponent)" + "Decompressing part \(partNum) of \(totalParts) from cache: \(sourceURL.lastPathComponent) at offset \(currentOffset)..." ) - let inputHandle = try FileHandle(forReadingFrom: sourceURL) - defer { try? inputHandle.close() } - - // Seek to the correct offset in the output sparse file - try outputHandle.seek(toOffset: currentOffset) - - if let decompressCmd = getDecompressionCommand(for: layerMediaType) { // Use extracted mediaType - Logger.info("Decompressing part \(partNum) with media type: \(layerMediaType)") - - // Handle Apple Archive format - let toolPath = String(decompressCmd.dropFirst("apple_archive:".count)) - let tempOutputPath = FileManager.default.temporaryDirectory - .appendingPathComponent(UUID().uuidString) - - // Check input file size before decompression - let inputFileSize = - (try? FileManager.default.attributesOfItem(atPath: sourceURL.path)[.size] - as? UInt64) ?? 0 - Logger.info( - "Part \(partNum) input size: \(ByteCountFormatter.string(fromByteCount: Int64(inputFileSize), countStyle: .file))" - ) - - // Create a process that decompresses to a temporary file - let process = Process() - process.executableURL = URL(fileURLWithPath: toolPath) - process.arguments = [ - "extract", "-i", sourceURL.path, "-o", tempOutputPath.path, - ] - - // Add error output capture - let errorPipe = Pipe() - process.standardError = errorPipe - - Logger.info( - "Decompressing Apple Archive format with: \(toolPath) \(process.arguments?.joined(separator: " ") ?? "")" - ) - try process.run() - process.waitUntilExit() - - // Check error output if any - let errorData = errorPipe.fileHandleForReading.readDataToEndOfFile() - if !errorData.isEmpty, - let errorString = String(data: errorData, encoding: .utf8) - { - Logger.error("Decompression error output: \(errorString)") - } - - if process.terminationStatus != 0 { - Logger.error( - "Apple Archive decompression failed with status: \(process.terminationStatus), falling back to direct copy" - ) - // Fall back to direct copying (uncompressed) - Logger.info("Copying part \(partNum) directly without decompression...") - try outputHandle.seek(toOffset: currentOffset) - - let inputHandle = try FileHandle(forReadingFrom: sourceURL) - defer { try? inputHandle.close() } - - var bytesWritten: UInt64 = 0 - let chunkSize = 1024 * 1024 // 1MB chunks - var chunkCount = 0 - - while true { - let data = autoreleasepool { - try! inputHandle.read(upToCount: chunkSize) ?? Data() - } - if data.isEmpty { break } - - try outputHandle.write(contentsOf: data) - bytesWritten += UInt64(data.count) - chunkCount += 1 - - // Update progress - let totalProgress = - Double(currentOffset + bytesWritten) / Double(expectedTotalSize) - let progressBar = createProgressBar(progress: totalProgress, width: 30) - let progressPercent = Int(totalProgress * 100) - let currentSpeed = - ByteCountFormatter.string( - fromByteCount: Int64(Double(bytesWritten) / 0.5), - countStyle: .file) + "/s" - - print( - "\r\(progressBar) \(progressPercent)% | Speed: \(currentSpeed) | Part \(partNum) | \(ByteCountFormatter.string(fromByteCount: Int64(currentOffset + bytesWritten), countStyle: .file)) ", - terminator: "") - fflush(stdout) - - // Also log to the progress logger for consistency - reassemblyProgressLogger.logProgress( - current: totalProgress, - context: "Direct copying") - } - - Logger.info( - "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" - ) - currentOffset += bytesWritten - continue - } - - // Check if the output file exists and has content - let outputExists = FileManager.default.fileExists(atPath: tempOutputPath.path) - let outputFileSize = - outputExists - ? ((try? FileManager.default.attributesOfItem(atPath: tempOutputPath.path)[ - .size] as? UInt64) ?? 0) : 0 - Logger.info( - "Part \(partNum) - Decompressed output exists: \(outputExists), size: \(ByteCountFormatter.string(fromByteCount: Int64(outputFileSize), countStyle: .file))" - ) - - // If decompression produced an empty file, fall back to direct copy - if outputFileSize == 0 { - Logger.info( - "Decompression resulted in empty file, falling back to direct copy for part \(partNum)" - ) - try? FileManager.default.removeItem(at: tempOutputPath) - - // Fall back to direct copying (uncompressed) - Logger.info("Copying part \(partNum) directly without decompression...") - try outputHandle.seek(toOffset: currentOffset) - - let inputHandle = try FileHandle(forReadingFrom: sourceURL) - defer { try? inputHandle.close() } - - var bytesWritten: UInt64 = 0 - let chunkSize = 1024 * 1024 // 1MB chunks - var chunkCount = 0 - - while true { - let data = autoreleasepool { - try! inputHandle.read(upToCount: chunkSize) ?? Data() - } - if data.isEmpty { break } - - try outputHandle.write(contentsOf: data) - bytesWritten += UInt64(data.count) - chunkCount += 1 - - // Update progress - let totalProgress = - Double(currentOffset + bytesWritten) / Double(expectedTotalSize) - let progressBar = createProgressBar(progress: totalProgress, width: 30) - let progressPercent = Int(totalProgress * 100) - let currentSpeed = - ByteCountFormatter.string( - fromByteCount: Int64(Double(bytesWritten) / 0.5), - countStyle: .file) + "/s" - - print( - "\r\(progressBar) \(progressPercent)% | Speed: \(currentSpeed) | Part \(partNum) | \(ByteCountFormatter.string(fromByteCount: Int64(currentOffset + bytesWritten), countStyle: .file)) ", - terminator: "") - fflush(stdout) - - // Also log to the progress logger for consistency - reassemblyProgressLogger.logProgress( - current: totalProgress, - context: "Direct copying") - } - - Logger.info( - "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" - ) - currentOffset += bytesWritten - continue - } - - // Read the decompressed file and write to our output - let tempInputHandle = try FileHandle(forReadingFrom: tempOutputPath) - defer { - try? tempInputHandle.close() - try? FileManager.default.removeItem(at: tempOutputPath) - } - - // Read decompressed data in chunks and write to sparse file - var partDecompressedSize: UInt64 = 0 - let chunkSize = 1024 * 1024 // 1MB chunks - var chunkCount = 0 - - while true { - let data = autoreleasepool { // Help manage memory with large files - try! tempInputHandle.read(upToCount: chunkSize) ?? Data() - } - if data.isEmpty { break } // End of stream - - try outputHandle.write(contentsOf: data) - partDecompressedSize += UInt64(data.count) - chunkCount += 1 - - // Update progress based on decompressed size written - let totalProgress = - Double(currentOffset + partDecompressedSize) - / Double(expectedTotalSize) - reassemblyProgressLogger.logProgress( - current: totalProgress, - context: "Reassembling") - } - - Logger.info( - "Part \(partNum) - Wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(partDecompressedSize), countStyle: .file))" - ) - currentOffset += partDecompressedSize // Advance offset by decompressed size - } else { - // No decompression command available, try direct copy - Logger.info( - "Copying part \(partNum) directly..." - ) - try outputHandle.seek(toOffset: currentOffset) - - let inputHandle = try FileHandle(forReadingFrom: sourceURL) - defer { try? inputHandle.close() } - - // Get part size - let partSize = - (try? FileManager.default.attributesOfItem(atPath: sourceURL.path)[.size] - as? UInt64) ?? 0 - Logger.info( - "Direct copy of part \(partNum) with size: \(ByteCountFormatter.string(fromByteCount: Int64(partSize), countStyle: .file))" - ) - - var bytesWritten: UInt64 = 0 - let chunkSize = 1024 * 1024 // 1MB chunks - var chunkCount = 0 - - while true { - let data = autoreleasepool { - try! inputHandle.read(upToCount: chunkSize) ?? Data() - } - if data.isEmpty { break } - - try outputHandle.write(contentsOf: data) - bytesWritten += UInt64(data.count) - chunkCount += 1 - - // Update progress - let totalProgress = - Double(currentOffset + bytesWritten) - / Double(expectedTotalSize) - reassemblyProgressLogger.logProgress( - current: totalProgress, - context: "Direct copying") - } - - Logger.info( - "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" - ) - currentOffset += bytesWritten - } - - // Ensure data is written before processing next part (optional but safer) - try outputHandle.synchronize() + // Use the correct sparse decompression function + let decompressedBytesWritten = try decompressChunkAndWriteSparse( + inputPath: sourceURL.path, + outputHandle: outputHandle, + startOffset: currentOffset + ) + currentOffset += decompressedBytesWritten + + try outputHandle.synchronize() // Optional: Synchronize after each chunk } // Finalize progress, close handle (done by defer) reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") - Logger.info("") // Newline // Ensure output handle is closed before post-processing try outputHandle.close() From b9f2a73941420a36bd753b2351c0d257d035b5d6 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sat, 19 Apr 2025 23:33:02 -0700 Subject: [PATCH 10/43] Add sequential ordering --- .../ImageContainerRegistry.swift | 484 ++++++++++-------- libs/lume/src/Errors/Errors.swift | 3 + 2 files changed, 262 insertions(+), 225 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 8b668db7..3c90b40b 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -26,6 +26,18 @@ enum PushError: Error { case missingToken case invalidURL case lz4NotFound // Added error case + case invalidMediaType // Added during part refactoring + case missingUncompressedSizeAnnotation // Added for sparse file handling + case fileCreationFailed(String) // Added for sparse file handling + case reassemblySetupFailed(path: String, underlyingError: Error?) // Added for sparse file handling + case missingPart(Int) // Added for sparse file handling + case layerDownloadFailed(String) // Added for download retries + case manifestFetchFailed // Added for manifest fetching +} + +// Define a specific error type for when no underlying error exists +struct NoSpecificUnderlyingError: Error, CustomStringConvertible { + var description: String { "No specific underlying error was provided." } } struct ChunkMetadata: Codable { @@ -104,21 +116,25 @@ struct ImageMetadata: Codable { // Actor to safely collect disk part information from concurrent tasks actor DiskPartsCollector { + // Store tuples of (sequentialPartNum, url) private var diskParts: [(Int, URL)] = [] - private var partCounter = 0 + // Restore internal counter + private var partCounter = 0 // Adds a part and returns its assigned sequential number func addPart(url: URL) -> Int { - partCounter += 1 + partCounter += 1 // Use counter logic let partNum = partCounter - diskParts.append((partNum, url)) - return partNum + diskParts.append((partNum, url)) // Store sequential number + return partNum // Return assigned sequential number } + // Sort by the sequential part number (index 0 of tuple) func getSortedParts() -> [(Int, URL)] { return diskParts.sorted { $0.0 < $1.0 } } + // Restore getTotalParts func getTotalParts() -> Int { return partCounter } @@ -752,6 +768,9 @@ class ImageContainerRegistry: @unchecked Sendable { ) let counter = TaskCounter() + // Remove totalDiskParts + // var totalDiskParts: Int? = nil + var lz4LayerCount = 0 // Count lz4 layers found try await withThrowingTaskGroup(of: Int64.self) { group in for layer in manifest.layers { @@ -764,176 +783,151 @@ class ImageContainerRegistry: @unchecked Sendable { await counter.decrement() } - // Check both media type and safely unwrap part info + // Identify disk parts by media type if layer.mediaType == "application/octet-stream+lz4" { - let size = layer.size - - // Declare cachedLayer and digest here + // --- Handle LZ4 Disk Part Layer --- + lz4LayerCount += 1 // Increment count + let currentPartNum = lz4LayerCount // Use the current count as the logical number for logging + let cachedLayer = getCachedLayerPath( manifestId: manifestId, digest: layer.digest) let digest = layer.digest + let size = layer.size - // For memory-constrained mode - point directly to cache when possible - if memoryConstrained // Line 777 - && FileManager.default.fileExists(atPath: cachedLayer.path) - { - // Use the cached file *directly* without copying to temp - // Add the *cached* layer path to the collector - let partNum = await diskPartsCollector.addPart(url: cachedLayer) // Use the collector - Logger.info("Using cached layer directly for part #\(partNum): \(cachedLayer.lastPathComponent)") - - // Account for progress directly, no need for a separate task - await downloadProgress.addProgress(Int64(size)) - - // No task was added, so no need to increment/decrement counter here - - continue // Skip the download task group logic below - } else { - // Not memory constrained OR file not cached - // Add a task to handle copy/download and adding to collector - group.addTask { [self] in - await counter.increment() // Increment counter for the task - - let finalPath: URL + if memoryConstrained && FileManager.default.fileExists(atPath: cachedLayer.path) { + // Add to collector, get sequential number assigned by collector + let collectorPartNum = await diskPartsCollector.addPart(url: cachedLayer) + // Log using the sequential number from collector for clarity if needed, or the lz4LayerCount + Logger.info("Using cached lz4 layer (part \(currentPartNum)) directly: \(cachedLayer.lastPathComponent) -> Collector #\(collectorPartNum)") + await downloadProgress.addProgress(Int64(size)) + continue + } else { + // Download/Copy Path (Task Group) + group.addTask { [self] in + await counter.increment() + let finalPath: URL if FileManager.default.fileExists(atPath: cachedLayer.path) { - // If cached, copy to temp and use temp path for reassembly later - let tempPartURL = tempDownloadDir.appendingPathComponent("disk.img.part.\(UUID().uuidString)") // Unique temp path - try FileManager.default.copyItem(at: cachedLayer, to: tempPartURL) - await downloadProgress.addProgress(Int64(size)) // Update progress after copy - finalPath = tempPartURL - } else { - // If not cached, download to temp path - let tempPartURL = tempDownloadDir.appendingPathComponent("disk.img.part.\(UUID().uuidString)") // Unique temp path - - // Check if this layer is already being downloaded - if isDownloading(digest) { - try await waitForExistingDownload(digest, cachedLayer: cachedLayer) - // If it finished downloading while waiting, copy from cache to temp - if FileManager.default.fileExists(atPath: cachedLayer.path) { - try FileManager.default.copyItem(at: cachedLayer, to: tempPartURL) - await downloadProgress.addProgress(Int64(size)) // Update progress - finalPath = tempPartURL - } else { - // If still not available after waiting (should be rare), proceed to download - markDownloadStarted(digest) - try await self.downloadLayer( - repository: "\(self.organization)/\(imageName)", - digest: digest, - mediaType: layer.mediaType, // Use correct mediaType - token: token, - to: tempPartURL, - maxRetries: 5, - progress: downloadProgress, // Progress updated inside downloadLayer - manifestId: manifestId - ) - // downloadLayer handles caching and markDownloadComplete - finalPath = tempPartURL - } - } else { - // Start new download - markDownloadStarted(digest) - try await self.downloadLayer( - repository: "\(self.organization)/\(imageName)", - digest: digest, - mediaType: layer.mediaType, // Use correct mediaType - token: token, - to: tempPartURL, - maxRetries: 5, - progress: downloadProgress, // Progress updated inside downloadLayer - manifestId: manifestId - ) - // downloadLayer handles caching and markDownloadComplete - finalPath = tempPartURL - } - } - - // Add the final determined path (temp path) to the collector - let partNum = await diskPartsCollector.addPart(url: finalPath) - Logger.info("Assigned part #\(partNum) for path: \(finalPath.lastPathComponent)") - - await counter.decrement() // Decrement counter - return Int64(size) - } - continue // Ensure we move to the next layer after adding task + let tempPartURL = tempDownloadDir.appendingPathComponent("disk.img.part.\(UUID().uuidString)") + try FileManager.default.copyItem(at: cachedLayer, to: tempPartURL) + await downloadProgress.addProgress(Int64(size)) + finalPath = tempPartURL + } else { + let tempPartURL = tempDownloadDir.appendingPathComponent("disk.img.part.\(UUID().uuidString)") + if isDownloading(digest) { + try await waitForExistingDownload(digest, cachedLayer: cachedLayer) + if FileManager.default.fileExists(atPath: cachedLayer.path) { + try FileManager.default.copyItem(at: cachedLayer, to: tempPartURL) + await downloadProgress.addProgress(Int64(size)) + finalPath = tempPartURL + } else { + markDownloadStarted(digest) + try await self.downloadLayer( + repository: "\(self.organization)/\(imageName)", + digest: digest, mediaType: layer.mediaType, token: token, + to: tempPartURL, maxRetries: 5, + progress: downloadProgress, manifestId: manifestId + ) + finalPath = tempPartURL + } + } else { + markDownloadStarted(digest) + try await self.downloadLayer( + repository: "\(self.organization)/\(imageName)", + digest: digest, mediaType: layer.mediaType, token: token, + to: tempPartURL, maxRetries: 5, + progress: downloadProgress, manifestId: manifestId + ) + finalPath = tempPartURL + } + } + // Add to collector, get sequential number assigned by collector + let collectorPartNum = await diskPartsCollector.addPart(url: finalPath) + // Log using the sequential number from collector + Logger.info("Assigned path for lz4 layer (part \(currentPartNum)): \(finalPath.lastPathComponent) -> Collector #\(collectorPartNum)") + await counter.decrement() + return Int64(size) + } } } else { + // --- Handle Non-Disk-Part Layer --- let mediaType = layer.mediaType let digest = layer.digest let size = layer.size + // Determine output path based on media type let outputURL: URL switch mediaType { case "application/vnd.oci.image.layer.v1.tar", - "application/octet-stream+gzip": - outputURL = tempDownloadDir.appendingPathComponent("disk.img") + "application/octet-stream+gzip": // Might be compressed disk.img single file? + outputURL = tempDownloadDir.appendingPathComponent("disk.img") case "application/vnd.oci.image.config.v1+json": outputURL = tempDownloadDir.appendingPathComponent("config.json") - case "application/octet-stream": - outputURL = tempDownloadDir.appendingPathComponent("nvram.bin") + case "application/octet-stream": // Could be nvram or uncompressed single disk.img + // Heuristic: If a config.json already exists or is expected, assume this is nvram. + // This might need refinement if single disk images use octet-stream. + if manifest.config != nil { + outputURL = tempDownloadDir.appendingPathComponent("nvram.bin") + } else { + // Assume it's a single-file disk image if no config layer is present + outputURL = tempDownloadDir.appendingPathComponent("disk.img") + } default: - continue + Logger.info("Skipping unsupported layer media type: \(mediaType)") + continue // Skip to the next layer } + // Add task to download/copy the non-disk-part layer group.addTask { [self] in await counter.increment() - - let cachedLayer = getCachedLayerPath( - manifestId: manifestId, digest: digest) + let cachedLayer = getCachedLayerPath(manifestId: manifestId, digest: digest) if FileManager.default.fileExists(atPath: cachedLayer.path) { try FileManager.default.copyItem(at: cachedLayer, to: outputURL) await downloadProgress.addProgress(Int64(size)) } else { - // Check if this layer is already being downloaded and we're not skipping cache if isDownloading(digest) { - try await waitForExistingDownload( - digest, cachedLayer: cachedLayer) + try await waitForExistingDownload(digest, cachedLayer: cachedLayer) if FileManager.default.fileExists(atPath: cachedLayer.path) { - try FileManager.default.copyItem( - at: cachedLayer, to: outputURL) + try FileManager.default.copyItem(at: cachedLayer, to: outputURL) await downloadProgress.addProgress(Int64(size)) + await counter.decrement() // Decrement before returning return Int64(size) } } - // Start new download markDownloadStarted(digest) - try await self.downloadLayer( repository: "\(self.organization)/\(imageName)", - digest: digest, - mediaType: mediaType, - token: token, - to: outputURL, - maxRetries: 5, - progress: downloadProgress, - manifestId: manifestId + digest: digest, mediaType: mediaType, token: token, + to: outputURL, maxRetries: 5, + progress: downloadProgress, manifestId: manifestId ) - - // Cache the downloaded layer if caching is enabled - if cachingEnabled { - if FileManager.default.fileExists(atPath: cachedLayer.path) { - try FileManager.default.removeItem(at: cachedLayer) - } - try FileManager.default.copyItem(at: outputURL, to: cachedLayer) - } - markDownloadComplete(digest) + // Note: downloadLayer handles caching and marking download complete } - await counter.decrement() return Int64(size) } } - } + } // End for layer in manifest.layers // Wait for remaining tasks for try await _ in group {} } // End TaskGroup // --- Safely retrieve parts AFTER TaskGroup --- - let diskParts = await diskPartsCollector.getSortedParts() - let totalParts = await diskPartsCollector.getTotalParts() - Logger.info("Finished processing layers. Found \(totalParts) disk parts.") + let diskParts = await diskPartsCollector.getSortedParts() // Already sorted by logicalPartNum + // Check if totalDiskParts was set (meaning at least one lz4 layer was processed) + // Get total parts from the collector + let totalPartsFromCollector = await diskPartsCollector.getTotalParts() + // Change guard to if for logging only, as the later if condition handles the logic + if totalPartsFromCollector == 0 { + // If totalParts is 0, it means no layers matched the lz4 format. + Logger.info("No lz4 disk part layers found. Assuming single-part image or non-lz4 parts.") + // Reassembly logic below will be skipped if diskParts is empty. + // Explicitly set totalParts to 0 to prevent entering the reassembly block if diskParts might somehow be non-empty but totalParts was 0 + // This ensures consistency if the collector logic changes. + } + Logger.info("Finished processing layers. Found \(diskParts.count) disk parts to reassemble (Total Lz4 Layers: \(totalPartsFromCollector)).") // --- End retrieving parts --- Logger.info("") // New line after progress @@ -974,8 +968,9 @@ class ImageContainerRegistry: @unchecked Sendable { } // Handle disk parts if present - if !diskParts.isEmpty { - Logger.info("Reassembling disk image using sparse file technique...") + if !diskParts.isEmpty && totalPartsFromCollector > 0 { + // Use totalPartsFromCollector here + Logger.info("Reassembling \(totalPartsFromCollector) disk image parts using sparse file technique...") let outputURL = tempVMDir.appendingPathComponent("disk.img") // Wrap setup in do-catch for better error reporting @@ -1008,8 +1003,9 @@ class ImageContainerRegistry: @unchecked Sendable { } // Calculate expected size from the manifest layers (sum of compressed parts - for logging only now) + // Filter based on the correct media type now let expectedCompressedTotalSize = UInt64( - manifest.layers.filter { extractPartInfo(from: $0.mediaType) != nil }.reduce(0) + manifest.layers.filter { $0.mediaType == "application/octet-stream+lz4" }.reduce(0) { $0 + $1.size } ) Logger.info( @@ -1067,23 +1063,39 @@ class ImageContainerRegistry: @unchecked Sendable { var reassemblyProgressLogger = ProgressLogger(threshold: 0.05) var currentOffset: UInt64 = 0 // Track position in the final *decompressed* file - for partNum in 1...totalParts { + // Iterate using the reliable totalParts count from media type + // Use totalPartsFromCollector for the loop range + for partNum in 1...totalPartsFromCollector { // Find the original layer info for this part number - guard - let layer = manifest.layers.first(where: { layer in - if let info = extractPartInfo(from: layer.mediaType) { - return info.partNum == partNum - } - return false - }), - let (_, partURL) = diskParts.first(where: { $0.0 == partNum }) - else { - throw PullError.missingPart(partNum) + // Find the part URL from our collected parts using the logical partNum + guard let partInfo = diskParts.first(where: { $0.0 == partNum }) else { + // This error should now be less likely, but good to keep + Logger.error("Missing required part number \(partNum) in collected parts during reassembly.") + throw PullError.missingPart(partNum) } - let layerMediaType = layer.mediaType // Extract mediaType here + let partURL = partInfo.1 // Get the URL from the tuple + + // We no longer need to find the original manifest layer here, + // as all parts collected by the collector should be the lz4 type. + // Remove the block that used extractPartInfo: + /* + guard let layer = manifest.layers.first(where: { layer in + if let info = extractPartInfo(from: layer.mediaType) { + return info.partNum == partNum + } + return false + }) else { + // Should not happen if totalParts was derived correctly + Logger.error("Could not find manifest layer for logical part number \(partNum).") + throw PullError.missingPart(partNum) // Or a different error + } + let layerMediaType = layer.mediaType + */ + // Assume the media type for decompression purposes + let layerMediaType = "application/octet-stream+lz4" Logger.info( - "Processing part \(partNum) of \(totalParts): \(partURL.lastPathComponent)") + "Processing part \(partNum) of \(totalPartsFromCollector): \(partURL.lastPathComponent)") let inputHandle = try FileHandle(forReadingFrom: partURL) defer { @@ -1504,31 +1516,46 @@ class ImageContainerRegistry: @unchecked Sendable { async throws { Logger.info("Copying from cache...") + + // Define output URL and expected size variable scope here + let outputURL = destination.appendingPathComponent("disk.img") + var expectedTotalSize: UInt64? = nil // Use optional to handle missing config // Instantiate collector let diskPartsCollector = DiskPartsCollector() + // Remove totalDiskParts + // var totalDiskParts: Int? = nil + var lz4LayerCount = 0 // Count lz4 layers found // First identify disk parts and non-disk files for layer in manifest.layers { let cachedLayer = getCachedLayerPath(manifestId: manifestId, digest: layer.digest) - // Check if it's a disk chunk layer based on media type + // Identify disk parts simply by media type if layer.mediaType == "application/octet-stream+lz4" { - // It's a disk chunk - Add to collector - _ = await diskPartsCollector.addPart(url: cachedLayer) // Ignore return value - } + lz4LayerCount += 1 // Increment count + // Add to collector. It will assign the sequential part number. + let collectorPartNum = await diskPartsCollector.addPart(url: cachedLayer) + Logger.info("Adding cached lz4 layer (part \(lz4LayerCount)) -> Collector #\(collectorPartNum): \(cachedLayer.lastPathComponent)") + } else { - // Handle non-disk layers (config, nvram) + // --- Handle Non-Disk-Part Layer (from cache) --- let fileName: String switch layer.mediaType { case "application/vnd.oci.image.config.v1+json": fileName = "config.json" case "application/octet-stream": - fileName = "nvram.bin" + // Assume nvram if config layer exists, otherwise assume single disk image + fileName = manifest.config != nil ? "nvram.bin" : "disk.img" + case "application/vnd.oci.image.layer.v1.tar", + "application/octet-stream+gzip": + // Assume disk image for these types as well if encountered in cache scenario + fileName = "disk.img" default: + Logger.info("Skipping unsupported cached layer media type: \(layer.mediaType)") continue } - // Only non-disk files are copied + // Copy the non-disk file directly from cache to destination try FileManager.default.copyItem( at: cachedLayer, to: destination.appendingPathComponent(fileName) @@ -1537,111 +1564,113 @@ class ImageContainerRegistry: @unchecked Sendable { } // --- Safely retrieve parts AFTER loop --- - let diskPartSources = await diskPartsCollector.getSortedParts() - let totalParts = await diskPartsCollector.getTotalParts() - Logger.info("Found \(totalParts) disk parts in cache.") + let diskPartSources = await diskPartsCollector.getSortedParts() // Sorted by assigned sequential number + let totalParts = await diskPartsCollector.getTotalParts() // Get total count from collector + + // Remove old guard check + /* + guard let totalParts = totalDiskParts else { + Logger.info("No cached layers with valid part information found. Assuming single-part image or non-lz4 parts.") + } + */ + Logger.info("Found \(totalParts) lz4 disk parts in cache to reassemble.") // --- End retrieving parts --- // Reassemble disk parts if needed - if !diskPartSources.isEmpty { // Use the retrieved array - // Get the uncompressed size from cached config - let configDigest = manifest.config?.digest - let cachedConfigPath = - configDigest != nil - ? getCachedLayerPath(manifestId: manifestId, digest: configDigest!) : nil - let uncompressedSize = cachedConfigPath.flatMap { - getUncompressedSizeFromConfig(configPath: $0) - } + // Use the count from the collector + if !diskPartSources.isEmpty { + // Use totalParts from collector directly + Logger.info("Reassembling \(totalParts) disk image parts using sparse file technique...") + + // Get uncompressed size from cached config file (needs to be copied first) + let configURL = destination.appendingPathComponent("config.json") + // Parse config.json to get uncompressed size *before* reassembly + let uncompressedSize = getUncompressedSizeFromConfig(configPath: configURL) - // Try to get disk size from VM config if OCI annotation not found + // Now also try to get disk size from VM config if OCI annotation not found var vmConfigDiskSize: UInt64? = nil - if uncompressedSize == nil { - // Find config.json in the copied files - let vmConfigPath = destination.appendingPathComponent("config.json") - if FileManager.default.fileExists(atPath: vmConfigPath.path) { - do { - let configData = try Data(contentsOf: vmConfigPath) - let decoder = JSONDecoder() - if let vmConfig = try? decoder.decode(VMConfig.self, from: configData) { - vmConfigDiskSize = vmConfig.diskSize - if let size = vmConfigDiskSize { - Logger.info( - "Found diskSize from cached VM config.json: \(size) bytes") - } + if uncompressedSize == nil && FileManager.default.fileExists(atPath: configURL.path) { + do { + let configData = try Data(contentsOf: configURL) + let decoder = JSONDecoder() + if let vmConfig = try? decoder.decode(VMConfig.self, from: configData) { + vmConfigDiskSize = vmConfig.diskSize + if let size = vmConfigDiskSize { + Logger.info("Found diskSize from VM config.json: \(size) bytes") } - } catch { - Logger.error("Failed to parse cached VM config.json for diskSize: \(error)") } + } catch { + Logger.error("Failed to parse VM config.json for diskSize: \(error)") } } - // Force explicit use - if uncompressedSize != nil { - Logger.info( - "Will use uncompressed size from annotation for sparse file: \(uncompressedSize!) bytes" + // Determine the size to use for the sparse file + // Use: annotation size > VM config diskSize > fallback (error) + if let size = uncompressedSize { + Logger.info("Using uncompressed size from annotation: \(size) bytes") + expectedTotalSize = size + } else if let size = vmConfigDiskSize { + Logger.info("Using diskSize from VM config: \(size) bytes") + expectedTotalSize = size + } else { + // If neither is found in cache scenario, throw error as we cannot determine the size + Logger.error( + "Missing both uncompressed size annotation and VM config diskSize for cached multi-part image." + + " Cannot reassemble." ) - } else if vmConfigDiskSize != nil { - Logger.info( - "Will use diskSize from VM config for sparse file: \(vmConfigDiskSize!) bytes") + throw PullError.missingUncompressedSizeAnnotation } - Logger.info( - "Reassembling disk image from cached parts using sparse file technique..." - ) - let outputURL = destination.appendingPathComponent("disk.img") + // Now that expectedTotalSize is guaranteed to be non-nil, proceed with setup + guard let sizeForTruncate = expectedTotalSize else { + // This should not happen due to the checks above, but safety first + let nilError: Error? = nil + // Use nil-coalescing to provide a default error, appeasing the compiler + throw PullError.reassemblySetupFailed(path: outputURL.path, underlyingError: nilError ?? NoSpecificUnderlyingError()) + } - // Wrap setup in do-catch for better error reporting + // Wrap file handle setup and sparse file creation within this block let outputHandle: FileHandle do { - // 1. Ensure parent directory exists - try FileManager.default.createDirectory( - at: outputURL.deletingLastPathComponent(), withIntermediateDirectories: true) - - // 2. Explicitly create the file first, removing old one if needed + // Ensure parent directory exists + try FileManager.default.createDirectory(at: outputURL.deletingLastPathComponent(), withIntermediateDirectories: true) + // Explicitly create the file first, removing old one if needed if FileManager.default.fileExists(atPath: outputURL.path) { try FileManager.default.removeItem(at: outputURL) } guard FileManager.default.createFile(atPath: outputURL.path, contents: nil) else { throw PullError.fileCreationFailed(outputURL.path) } - - // 3. Now open the handle for writing + // Open handle for writing outputHandle = try FileHandle(forWritingTo: outputURL) - + // Set the file size (creates sparse file) + try outputHandle.truncate(atOffset: sizeForTruncate) + Logger.info("Sparse file initialized for cache reassembly with size: \(ByteCountFormatter.string(fromByteCount: Int64(sizeForTruncate), countStyle: .file))") } catch { - // Catch errors during directory/file creation or handle opening - Logger.error( - "Failed during setup for disk image reassembly: \(error.localizedDescription)", - metadata: ["path": outputURL.path]) - throw PullError.reassemblySetupFailed(path: outputURL.path, underlyingError: error) + Logger.error("Failed during setup for cached disk image reassembly: \(error.localizedDescription)", metadata: ["path": outputURL.path]) + throw PullError.reassemblySetupFailed(path: outputURL.path, underlyingError: error) } - // Calculate expected total size from the cached files - let expectedTotalSize: UInt64 = diskPartSources.reduce(UInt64(0)) { - (acc: UInt64, element) -> UInt64 in - let fileSize = - (try? FileManager.default.attributesOfItem(atPath: element.1.path)[.size] - as? UInt64 ?? 0) ?? 0 - return acc + fileSize - } - Logger.info( - "Expected download size from cache: \(ByteCountFormatter.string(fromByteCount: Int64(expectedTotalSize), countStyle: .file)) (actual disk usage will be lower)" - ) + // Ensure handle is closed when exiting this scope + defer { try? outputHandle.close() } + + // ... (Get uncompressed size etc.) ... var reassemblyProgressLogger = ProgressLogger(threshold: 0.05) - var currentOffset: UInt64 = 0 // Track position in the final *decompressed* file + var currentOffset: UInt64 = 0 - for partNum in 1...totalParts { - // Find the original layer info for this part number - guard - // Find layer by index approximated during collection, not media type parts - let (_, sourceURL) = diskPartSources.first(where: { $0.0 == partNum }) - else { - throw PullError.missingPart(partNum) + // Iterate from 1 up to the total number of parts found by the collector + for collectorPartNum in 1...totalParts { + // Find the source URL from our collected parts using the sequential collectorPartNum + guard let sourceInfo = diskPartSources.first(where: { $0.0 == collectorPartNum }) else { + Logger.error("Missing required cached part number \(collectorPartNum) in collected parts during reassembly.") + throw PullError.missingPart(collectorPartNum) } + let sourceURL = sourceInfo.1 // Get URL from tuple + // Log using the sequential collector part number Logger.info( - "Decompressing part \(partNum) of \(totalParts) from cache: \(sourceURL.lastPathComponent) at offset \(currentOffset)..." + "Decompressing part \(collectorPartNum) of \(totalParts) from cache: \(sourceURL.lastPathComponent) at offset \(currentOffset)..." ) // Use the correct sparse decompression function @@ -1659,7 +1688,8 @@ class ImageContainerRegistry: @unchecked Sendable { reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") // Ensure output handle is closed before post-processing - try outputHandle.close() + // No need for explicit close here, defer handles it + // try outputHandle.close() // Verify final size let finalSize = @@ -1669,9 +1699,10 @@ class ImageContainerRegistry: @unchecked Sendable { "Final disk image size from cache (before sparse file optimization): \(ByteCountFormatter.string(fromByteCount: Int64(finalSize), countStyle: .file))" ) - if finalSize != expectedTotalSize { + // Use the calculated sizeForTruncate for comparison + if finalSize != sizeForTruncate { Logger.info( - "Warning: Final reported size (\(finalSize) bytes) differs from expected size (\(expectedTotalSize) bytes), but this doesn't affect functionality" + "Warning: Final reported size (\(finalSize) bytes) differs from expected size (\(sizeForTruncate) bytes), but this doesn't affect functionality" ) } @@ -1912,8 +1943,10 @@ class ImageContainerRegistry: @unchecked Sendable { throw lastError ?? PullError.layerDownloadFailed(digest) } + // Function removed as it's not applicable to the observed manifest format + /* private func extractPartInfo(from mediaType: String) -> (partNum: Int, total: Int)? { - let pattern = #"part\.number=(\d+);part\.total=(\d+)"# + let pattern = #"part\\.number=(\\d+);part\\.total=(\\d+)"# guard let regex = try? NSRegularExpression(pattern: pattern), let match = regex.firstMatch( in: mediaType, @@ -1928,6 +1961,7 @@ class ImageContainerRegistry: @unchecked Sendable { } return (partNum, total) } + */ private func listRepositories() async throws -> [String] { var request = URLRequest( diff --git a/libs/lume/src/Errors/Errors.swift b/libs/lume/src/Errors/Errors.swift index b6568c10..c769d10d 100644 --- a/libs/lume/src/Errors/Errors.swift +++ b/libs/lume/src/Errors/Errors.swift @@ -58,6 +58,7 @@ enum PullError: Error, LocalizedError { case fileCreationFailed(String) case reassemblySetupFailed(path: String, underlyingError: Error) case missingUncompressedSizeAnnotation + case invalidMediaType var errorDescription: String? { switch self { @@ -81,6 +82,8 @@ enum PullError: Error, LocalizedError { return "Failed to set up for reassembly at path: \(path). Underlying error: \(underlyingError.localizedDescription)" case .missingUncompressedSizeAnnotation: return "Could not find the required uncompressed disk size annotation in the image config.json." + case .invalidMediaType: + return "Invalid media type" } } } From f491d5a3c996225e7e1f6dc83f0131a39d9def7f Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sat, 19 Apr 2025 23:41:02 -0700 Subject: [PATCH 11/43] Add logs --- .../lume/src/ContainerRegistry/ImageContainerRegistry.swift | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 3c90b40b..cd57352e 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -930,6 +930,9 @@ class ImageContainerRegistry: @unchecked Sendable { Logger.info("Finished processing layers. Found \(diskParts.count) disk parts to reassemble (Total Lz4 Layers: \(totalPartsFromCollector)).") // --- End retrieving parts --- + // Add detailed logging for debugging + Logger.info("Disk part numbers collected and sorted: \(diskParts.map { $0.0 })") + Logger.info("") // New line after progress // Display download statistics @@ -1068,9 +1071,12 @@ class ImageContainerRegistry: @unchecked Sendable { for partNum in 1...totalPartsFromCollector { // Find the original layer info for this part number // Find the part URL from our collected parts using the logical partNum + Logger.info("Reassembly loop: Looking for partNum \(partNum) in diskParts") // Log loop iteration guard let partInfo = diskParts.first(where: { $0.0 == partNum }) else { // This error should now be less likely, but good to keep Logger.error("Missing required part number \(partNum) in collected parts during reassembly.") + // Add current state log on error + Logger.error("Current disk part numbers available: \(diskParts.map { $0.0 })") throw PullError.missingPart(partNum) } let partURL = partInfo.1 // Get the URL from the tuple From ad78a85879e134945c116ff07aa15748384f751b Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sat, 19 Apr 2025 23:56:56 -0700 Subject: [PATCH 12/43] Remove legacy decompress --- .../ImageContainerRegistry.swift | 280 ++---------------- 1 file changed, 25 insertions(+), 255 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index cd57352e..4e82927b 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1098,7 +1098,7 @@ class ImageContainerRegistry: @unchecked Sendable { let layerMediaType = layer.mediaType */ // Assume the media type for decompression purposes - let layerMediaType = "application/octet-stream+lz4" + // Remove unused variable: let layerMediaType = "application/octet-stream+lz4" Logger.info( "Processing part \(partNum) of \(totalPartsFromCollector): \(partURL.lastPathComponent)") @@ -1115,254 +1115,29 @@ class ImageContainerRegistry: @unchecked Sendable { // Seek to the correct offset in the output sparse file try outputHandle.seek(toOffset: currentOffset) + // Always attempt decompression using decompressChunkAndWriteSparse for lz4 parts + Logger.info( + "Decompressing part \(partNum) using decompressChunkAndWriteSparse") + + let decompressedBytesWritten = try decompressChunkAndWriteSparse( + inputPath: partURL.path, + outputHandle: outputHandle, + startOffset: currentOffset + ) + currentOffset += decompressedBytesWritten + reassemblyProgressLogger.logProgress( + current: Double(currentOffset) / Double(sizeForTruncate), // Use sizeForTruncate + context: "Reassembling") + + // Remove the old block that checked getDecompressionCommand and did direct copy + /* if let decompressCmd = getDecompressionCommand(for: layerMediaType) { // Use extracted mediaType - Logger.info( - "Decompressing part \(partNum) with media type: \(layerMediaType)") - - // Handle Apple Archive format - let toolPath = String(decompressCmd.dropFirst("apple_archive:".count)) - let tempOutputPath = FileManager.default.temporaryDirectory - .appendingPathComponent(UUID().uuidString) - - // Check input file size before decompression - let inputFileSize = - (try? FileManager.default.attributesOfItem(atPath: partURL.path)[.size] - as? UInt64) ?? 0 - Logger.info( - "Part \(partNum) input size: \(ByteCountFormatter.string(fromByteCount: Int64(inputFileSize), countStyle: .file))" - ) - - // Create a process that decompresses to a temporary file - let process = Process() - process.executableURL = URL(fileURLWithPath: toolPath) - process.arguments = [ - "extract", "-i", partURL.path, "-o", tempOutputPath.path, - ] - - // Add error output capture - let errorPipe = Pipe() - process.standardError = errorPipe - - Logger.info( - "Decompressing Apple Archive format with: \(toolPath) \(process.arguments?.joined(separator: " ") ?? "")" - ) - try process.run() - process.waitUntilExit() - - // Check error output if any - let errorData = errorPipe.fileHandleForReading.readDataToEndOfFile() - if !errorData.isEmpty, - let errorString = String(data: errorData, encoding: .utf8) - { - Logger.error("Decompression error output: \(errorString)") - } - - if process.terminationStatus != 0 { - Logger.error( - "Apple Archive decompression failed with status: \(process.terminationStatus), falling back to direct copy" - ) - // Fall back to direct copying (uncompressed) - Logger.info("Copying part \(partNum) directly without decompression...") - try outputHandle.seek(toOffset: currentOffset) - - let inputHandle = try FileHandle(forReadingFrom: partURL) - defer { try? inputHandle.close() } - - var bytesWritten: UInt64 = 0 - let chunkSize = 1024 * 1024 // 1MB chunks - var chunkCount = 0 - - while true { - let data = autoreleasepool { - try! inputHandle.read(upToCount: chunkSize) ?? Data() - } - if data.isEmpty { break } - - try outputHandle.write(contentsOf: data) - bytesWritten += UInt64(data.count) - chunkCount += 1 - - // Update progress - let totalProgress = - Double(currentOffset + bytesWritten) - / Double(expectedCompressedTotalSize) - let progressBar = createProgressBar( - progress: totalProgress, width: 30) - let progressPercent = Int(totalProgress * 100) - let currentSpeed = - ByteCountFormatter.string( - fromByteCount: Int64(Double(bytesWritten) / 0.5), - countStyle: .file) + "/s" - - print( - "\r\(progressBar) \(progressPercent)% | Speed: \(currentSpeed) | Part \(partNum) | \(ByteCountFormatter.string(fromByteCount: Int64(currentOffset + bytesWritten), countStyle: .file)) ", - terminator: "") - fflush(stdout) - - // Also log to the progress logger for consistency - reassemblyProgressLogger.logProgress( - current: totalProgress, - context: "Direct copying") - } - - Logger.info( - "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" - ) - currentOffset += bytesWritten - continue - } - - // Check if the output file exists and has content - let outputExists = FileManager.default.fileExists( - atPath: tempOutputPath.path) - let outputFileSize = - outputExists - ? ((try? FileManager.default.attributesOfItem( - atPath: tempOutputPath.path)[ - .size] as? UInt64) ?? 0) : 0 - Logger.info( - "Part \(partNum) - Decompressed output exists: \(outputExists), size: \(ByteCountFormatter.string(fromByteCount: Int64(outputFileSize), countStyle: .file))" - ) - - // If decompression produced an empty file, fall back to direct copy - if outputFileSize == 0 { - Logger.info( - "Decompression resulted in empty file, falling back to direct copy for part \(partNum)" - ) - try? FileManager.default.removeItem(at: tempOutputPath) - - // Fall back to direct copying (uncompressed) - Logger.info("Copying part \(partNum) directly without decompression...") - try outputHandle.seek(toOffset: currentOffset) - - let inputHandle = try FileHandle(forReadingFrom: partURL) - defer { try? inputHandle.close() } - - var bytesWritten: UInt64 = 0 - let chunkSize = 1024 * 1024 // 1MB chunks - var chunkCount = 0 - - while true { - let data = autoreleasepool { - try! inputHandle.read(upToCount: chunkSize) ?? Data() - } - if data.isEmpty { break } - - try outputHandle.write(contentsOf: data) - bytesWritten += UInt64(data.count) - chunkCount += 1 - - // Update progress - let totalProgress = - Double(currentOffset + bytesWritten) - / Double(expectedCompressedTotalSize) - let progressBar = createProgressBar( - progress: totalProgress, width: 30) - let progressPercent = Int(totalProgress * 100) - let currentSpeed = - ByteCountFormatter.string( - fromByteCount: Int64(Double(bytesWritten) / 0.5), - countStyle: .file) + "/s" - - print( - "\r\(progressBar) \(progressPercent)% | Speed: \(currentSpeed) | Part \(partNum) | \(ByteCountFormatter.string(fromByteCount: Int64(currentOffset + bytesWritten), countStyle: .file)) ", - terminator: "") - fflush(stdout) - - // Also log to the progress logger for consistency - reassemblyProgressLogger.logProgress( - current: totalProgress, - context: "Direct copying") - } - - Logger.info( - "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" - ) - currentOffset += bytesWritten - continue - } - - // Read the decompressed file and write to our output - let tempInputHandle = try FileHandle(forReadingFrom: tempOutputPath) - defer { - try? tempInputHandle.close() - try? FileManager.default.removeItem(at: tempOutputPath) - } - - // Read decompressed data in chunks and write to sparse file - var partDecompressedSize: UInt64 = 0 - let chunkSize = 1024 * 1024 // 1MB chunks - var chunkCount = 0 - - while true { - let data = autoreleasepool { // Help manage memory with large files - try! tempInputHandle.read(upToCount: chunkSize) ?? Data() - } - if data.isEmpty { break } // End of stream - - try outputHandle.write(contentsOf: data) - partDecompressedSize += UInt64(data.count) - chunkCount += 1 - - // Update progress based on decompressed size written - let totalProgress = - Double(currentOffset + partDecompressedSize) - / Double(expectedCompressedTotalSize) - reassemblyProgressLogger.logProgress( - current: totalProgress, - context: "Reassembling") - } - - Logger.info( - "Part \(partNum) - Wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(partDecompressedSize), countStyle: .file))" - ) - currentOffset += partDecompressedSize // Advance offset by decompressed size + // ... [removed decompression logic using external tool] ... } else { // No decompression command available, try direct copy - Logger.info( - "Copying part \(partNum) directly..." - ) - try outputHandle.seek(toOffset: currentOffset) - - let inputHandle = try FileHandle(forReadingFrom: partURL) - defer { try? inputHandle.close() } - - // Get part size - let partSize = - (try? FileManager.default.attributesOfItem(atPath: partURL.path)[.size] - as? UInt64) ?? 0 - Logger.info( - "Direct copy of part \(partNum) with size: \(ByteCountFormatter.string(fromByteCount: Int64(partSize), countStyle: .file))" - ) - - var bytesWritten: UInt64 = 0 - let chunkSize = 1024 * 1024 // 1MB chunks - var chunkCount = 0 - - while true { - let data = autoreleasepool { - try! inputHandle.read(upToCount: chunkSize) ?? Data() - } - if data.isEmpty { break } - - try outputHandle.write(contentsOf: data) - bytesWritten += UInt64(data.count) - chunkCount += 1 - - // Update progress - let totalProgress = - Double(currentOffset + bytesWritten) - / Double(expectedCompressedTotalSize) - reassemblyProgressLogger.logProgress( - current: totalProgress, - context: "Direct copying") - } - - Logger.info( - "Part \(partNum) - Direct copy: wrote \(chunkCount) chunks, total bytes: \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file))" - ) - currentOffset += bytesWritten + // ... [removed direct copy logic] ... } + */ // Ensure data is written before processing next part (optional but safer) try outputHandle.synchronize() @@ -1389,11 +1164,6 @@ class ImageContainerRegistry: @unchecked Sendable { ) } - // Decompress the assembled disk image if it's in LZFSE compressed format - Logger.info( - "Checking if disk image is LZFSE compressed and decompressing if needed...") - decompressLZFSEImage(inputPath: outputURL.path) - // Create a properly formatted disk image Logger.info("Converting assembled data to proper disk image format...") @@ -1679,13 +1449,17 @@ class ImageContainerRegistry: @unchecked Sendable { "Decompressing part \(collectorPartNum) of \(totalParts) from cache: \(sourceURL.lastPathComponent) at offset \(currentOffset)..." ) - // Use the correct sparse decompression function + // Always use the correct sparse decompression function let decompressedBytesWritten = try decompressChunkAndWriteSparse( inputPath: sourceURL.path, outputHandle: outputHandle, startOffset: currentOffset ) currentOffset += decompressedBytesWritten + // Update progress (using sizeForTruncate which should be available) + reassemblyProgressLogger.logProgress( + current: Double(currentOffset) / Double(sizeForTruncate), + context: "Reassembling Cache") try outputHandle.synchronize() // Optional: Synchronize after each chunk } @@ -1712,10 +1486,6 @@ class ImageContainerRegistry: @unchecked Sendable { ) } - // Decompress the assembled disk image if it's in LZFSE compressed format - Logger.info("Checking if disk image is LZFSE compressed and decompressing if needed...") - decompressLZFSEImage(inputPath: outputURL.path) - // Create a properly formatted disk image Logger.info("Converting assembled data to proper disk image format...") From 5134f719034ffa94efcf5d4659bf78a64455a822 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sun, 20 Apr 2025 00:06:40 -0700 Subject: [PATCH 13/43] Remove hdutil --- .../ImageContainerRegistry.swift | 138 ------------------ 1 file changed, 138 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 4e82927b..db9597a5 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1164,76 +1164,6 @@ class ImageContainerRegistry: @unchecked Sendable { ) } - // Create a properly formatted disk image - Logger.info("Converting assembled data to proper disk image format...") - - // Get actual disk usage of the assembled file - let assembledUsage = getActualDiskUsage(path: outputURL.path) - let bufferBytes: UInt64 = 2 * 1024 * 1024 * 1024 // 2GB buffer - let requiredSpace = assembledUsage + bufferBytes - - // Check available disk space in the destination directory - let fileManager = FileManager.default - let availableSpace = - try? fileManager.attributesOfFileSystem( - forPath: outputURL.deletingLastPathComponent().path)[.systemFreeSize] - as? UInt64 - - if let available = availableSpace, available < requiredSpace { - Logger.error( - "Insufficient disk space to convert disk image format. Skipping conversion.", - metadata: [ - "available": ByteCountFormatter.string( - fromByteCount: Int64(available), countStyle: .file), - "required": ByteCountFormatter.string( - fromByteCount: Int64(requiredSpace), countStyle: .file), - ] - ) - } else { - // Prioritize SPARSE format for better sparse file handling - Logger.info("Attempting conversion to SPARSE format...") - let process = Process() - process.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") - process.arguments = [ - "convert", - outputURL.path, // Source: our assembled file - "-format", "SPARSE", // Format: SPARSE (best for sparse images) - "-o", outputURL.path, // Output: overwrite with converted image - ] - - let errorPipe = Pipe() - process.standardError = errorPipe - process.standardOutput = errorPipe - - try process.run() - process.waitUntilExit() - - // Check for errors - let outputData = errorPipe.fileHandleForReading.readDataToEndOfFile() - if !outputData.isEmpty, - let outputString = String(data: outputData, encoding: .utf8) - { - Logger.info("hdiutil output: \(outputString)") - } - - if process.terminationStatus == 0 { - // Find the potentially renamed formatted file - let formattedFile = findFormattedFile(tempFormatted: outputURL) ?? outputURL - // If the output path is different, remove the original and move the new one - if formattedFile.path != outputURL.path { - try? FileManager.default.removeItem(at: outputURL) - try FileManager.default.moveItem(at: formattedFile, to: outputURL) - } - Logger.info("Successfully converted disk image to proper format (SPARSE)") - } else { - Logger.error( - "Failed to convert disk image to SPARSE format. VM might not start properly." - ) - // If SPARSE failed, maybe try UDRW as a last resort? - // For now, we'll just log the error. - } - } - Logger.info("Disk image reassembly completed") } else { // Copy single disk image if it exists @@ -1486,74 +1416,6 @@ class ImageContainerRegistry: @unchecked Sendable { ) } - // Create a properly formatted disk image - Logger.info("Converting assembled data to proper disk image format...") - - // Get actual disk usage of the assembled file - let assembledUsage = getActualDiskUsage(path: outputURL.path) - let bufferBytes: UInt64 = 2 * 1024 * 1024 * 1024 // 2GB buffer - let requiredSpace = assembledUsage + bufferBytes - - // Check available disk space in the destination directory - let fileManager = FileManager.default - let availableSpace = - try? fileManager.attributesOfFileSystem( - forPath: outputURL.deletingLastPathComponent().path)[.systemFreeSize] as? UInt64 - - if let available = availableSpace, available < requiredSpace { - Logger.error( - "Insufficient disk space to convert disk image format. Skipping conversion.", - metadata: [ - "available": ByteCountFormatter.string( - fromByteCount: Int64(available), countStyle: .file), - "required": ByteCountFormatter.string( - fromByteCount: Int64(requiredSpace), countStyle: .file), - ] - ) - } else { - // Prioritize SPARSE format for better sparse file handling - Logger.info("Attempting conversion to SPARSE format...") - let process = Process() - process.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") - process.arguments = [ - "convert", - outputURL.path, // Source: our assembled file - "-format", "SPARSE", // Format: SPARSE (best for sparse images) - "-o", outputURL.path, // Output: overwrite with converted image - ] - - let errorPipe = Pipe() - process.standardError = errorPipe - process.standardOutput = errorPipe - - try process.run() - process.waitUntilExit() - - // Check for errors - let outputData = errorPipe.fileHandleForReading.readDataToEndOfFile() - if !outputData.isEmpty, let outputString = String(data: outputData, encoding: .utf8) - { - Logger.info("hdiutil output: \(outputString)") - } - - if process.terminationStatus == 0 { - // Find the potentially renamed formatted file - let formattedFile = findFormattedFile(tempFormatted: outputURL) ?? outputURL - // If the output path is different, remove the original and move the new one - if formattedFile.path != outputURL.path { - try? FileManager.default.removeItem(at: outputURL) - try FileManager.default.moveItem(at: formattedFile, to: outputURL) - } - Logger.info("Successfully converted disk image to proper format (SPARSE)") - } else { - Logger.error( - "Failed to convert disk image to SPARSE format. VM might not start properly." - ) - // If SPARSE failed, maybe try UDRW as a last resort? - // For now, we'll just log the error. - } - } - Logger.info("Disk image reassembly completed") } From da46b10ec719558305646d73fcc1818a57b7acf9 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sun, 20 Apr 2025 11:08:14 -0700 Subject: [PATCH 14/43] Update README.md with conda --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f972a0fb..fa5dd2ea 100644 --- a/README.md +++ b/README.md @@ -35,8 +35,8 @@ - Mac with Apple Silicon (M1/M2/M3/M4 series) - macOS 15 (Sequoia) or newer -- Python 3.10+ (for Computer and Agent libraries) -- Disk space for VM images (40GB+ recommended) +- Python 3.10+ (required for the Computer, Agent, and MCP libraries). We recommend using Conda (or Anaconda) to create an ad hoc Python environment. +- Disk space for VM images (30GB+ recommended) ## Quick Start From f3cfa7574b8d36ec92d56926f7f1b0fccc4da683 Mon Sep 17 00:00:00 2001 From: Ikko Eltociear Ashimine Date: Mon, 21 Apr 2025 04:01:23 +0900 Subject: [PATCH 15/43] docs: update computer/README.md HuggingFace -> Hugging Face --- libs/computer/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/computer/README.md b/libs/computer/README.md index f5b84783..1b86d122 100644 --- a/libs/computer/README.md +++ b/libs/computer/README.md @@ -67,7 +67,7 @@ Refer to this notebook for a step-by-step guide on how to use the Computer-Use I ## Using the Gradio Computer UI -The computer module includes a Gradio UI for creating and sharing demonstration data. The UI provides built-in integration with HuggingFace Datasets for sharing demonstrations and incorporating them into CUA ML pipelines. +The computer module includes a Gradio UI for creating and sharing demonstration data. The UI provides built-in integration with Hugging Face Datasets for sharing demonstrations and incorporating them into CUA ML pipelines. ```bash # Install with UI support From 3d00091d2bcdb449e7d2441552510b2bec073101 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Sun, 20 Apr 2025 23:01:26 -0700 Subject: [PATCH 16/43] Fix reassembly --- .../ImageContainerRegistry.swift | 303 ++++++++++++++---- 1 file changed, 242 insertions(+), 61 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index db9597a5..7af60d6d 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1069,9 +1069,7 @@ class ImageContainerRegistry: @unchecked Sendable { // Iterate using the reliable totalParts count from media type // Use totalPartsFromCollector for the loop range for partNum in 1...totalPartsFromCollector { - // Find the original layer info for this part number // Find the part URL from our collected parts using the logical partNum - Logger.info("Reassembly loop: Looking for partNum \(partNum) in diskParts") // Log loop iteration guard let partInfo = diskParts.first(where: { $0.0 == partNum }) else { // This error should now be less likely, but good to keep Logger.error("Missing required part number \(partNum) in collected parts during reassembly.") @@ -1081,65 +1079,45 @@ class ImageContainerRegistry: @unchecked Sendable { } let partURL = partInfo.1 // Get the URL from the tuple - // We no longer need to find the original manifest layer here, - // as all parts collected by the collector should be the lz4 type. - // Remove the block that used extractPartInfo: - /* - guard let layer = manifest.layers.first(where: { layer in - if let info = extractPartInfo(from: layer.mediaType) { - return info.partNum == partNum - } - return false - }) else { - // Should not happen if totalParts was derived correctly - Logger.error("Could not find manifest layer for logical part number \(partNum).") - throw PullError.missingPart(partNum) // Or a different error - } - let layerMediaType = layer.mediaType - */ - // Assume the media type for decompression purposes - // Remove unused variable: let layerMediaType = "application/octet-stream+lz4" - Logger.info( "Processing part \(partNum) of \(totalPartsFromCollector): \(partURL.lastPathComponent)") - let inputHandle = try FileHandle(forReadingFrom: partURL) - defer { - try? inputHandle.close() - // Clean up temp downloaded part if not from cache - if !partURL.path.contains(cacheDirectory.path) { - try? FileManager.default.removeItem(at: partURL) - } - } - // Seek to the correct offset in the output sparse file try outputHandle.seek(toOffset: currentOffset) - // Always attempt decompression using decompressChunkAndWriteSparse for lz4 parts - Logger.info( - "Decompressing part \(partNum) using decompressChunkAndWriteSparse") + // Check if this chunk might be all zeros (sparse data) by sampling the compressed data + // Skip this check for now as it's an optimization we can add later if needed + let isLikelySparse = false - let decompressedBytesWritten = try decompressChunkAndWriteSparse( - inputPath: partURL.path, - outputHandle: outputHandle, - startOffset: currentOffset - ) - currentOffset += decompressedBytesWritten - reassemblyProgressLogger.logProgress( - current: Double(currentOffset) / Double(sizeForTruncate), // Use sizeForTruncate - context: "Reassembling") - - // Remove the old block that checked getDecompressionCommand and did direct copy - /* - if let decompressCmd = getDecompressionCommand(for: layerMediaType) { // Use extracted mediaType - // ... [removed decompression logic using external tool] ... + // Always attempt decompression using decompressChunkAndWriteSparse for LZ4 parts + if isLikelySparse { + // For sparse chunks, we don't need to write anything - just advance the offset + // We determine the uncompressed size from the chunk metadata or estimation + + // For now, we'll still decompress to ensure correct behavior, and optimize later + Logger.info("Chunk appears to be sparse, but decompressing for reliability") + let decompressedBytesWritten = try decompressChunkAndWriteSparse( + inputPath: partURL.path, + outputHandle: outputHandle, + startOffset: currentOffset + ) + currentOffset += decompressedBytesWritten } else { - // No decompression command available, try direct copy - // ... [removed direct copy logic] ... + Logger.info("Decompressing part \(partNum)") + let decompressedBytesWritten = try decompressChunkAndWriteSparse( + inputPath: partURL.path, + outputHandle: outputHandle, + startOffset: currentOffset + ) + currentOffset += decompressedBytesWritten } - */ + + reassemblyProgressLogger.logProgress( + current: Double(currentOffset) / Double(sizeForTruncate), + context: "Reassembling" + ) - // Ensure data is written before processing next part (optional but safer) + // Ensure data is written before processing next part try outputHandle.synchronize() } @@ -1147,17 +1125,54 @@ class ImageContainerRegistry: @unchecked Sendable { reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") Logger.info("") // Newline - // Ensure output handle is closed before post-processing - try outputHandle.close() - + // Optimize sparseness after completing reassembly + try outputHandle.close() // Close handle to ensure all data is flushed + // Verify final size let finalSize = (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] as? UInt64) ?? 0 Logger.info( - "Final disk image size from cache (before sparse file optimization): \(ByteCountFormatter.string(fromByteCount: Int64(finalSize), countStyle: .file))" + "Final disk image size: \(ByteCountFormatter.string(fromByteCount: Int64(finalSize), countStyle: .file))" ) + // Optimize sparseness if on macOS + if FileManager.default.fileExists(atPath: "/bin/cp") { + Logger.info("Optimizing sparse file representation...") + let optimizedPath = outputURL.path + ".optimized" + + let process = Process() + process.executableURL = URL(fileURLWithPath: "/bin/cp") + process.arguments = ["-c", outputURL.path, optimizedPath] + + do { + try process.run() + process.waitUntilExit() + + if process.terminationStatus == 0 { + // Get size of optimized file + let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 + let originalUsage = getActualDiskUsage(path: outputURL.path) + let optimizedUsage = getActualDiskUsage(path: optimizedPath) + + Logger.info( + "Sparse optimization results: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" + ) + + // Replace the original with the optimized version + try FileManager.default.removeItem(at: outputURL) + try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: outputURL) + Logger.info("Replaced with optimized sparse version") + } else { + Logger.info("Sparse optimization failed, using original file") + try? FileManager.default.removeItem(atPath: optimizedPath) + } + } catch { + Logger.info("Error during sparse optimization: \(error.localizedDescription)") + try? FileManager.default.removeItem(atPath: optimizedPath) + } + } + if finalSize != sizeForTruncate { Logger.info( "Warning: Final reported size (\(finalSize) bytes) differs from expected size (\(sizeForTruncate) bytes), but this doesn't affect functionality" @@ -2468,26 +2483,117 @@ class ImageContainerRegistry: @unchecked Sendable { let reassemblyDir = workDir.appendingPathComponent("reassembly") try FileManager.default.createDirectory(at: reassemblyDir, withIntermediateDirectories: true) let reassembledFile = reassemblyDir.appendingPathComponent("reassembled_disk.img") + + // Pre-allocate a sparse file with the correct size Logger.info("Pre-allocating sparse file of \(ByteCountFormatter.string(fromByteCount: Int64(actualDiskSize), countStyle: .file))...") - if FileManager.default.fileExists(atPath: reassembledFile.path) { try FileManager.default.removeItem(at: reassembledFile) } - guard FileManager.default.createFile(atPath: reassembledFile.path, contents: nil) else { throw PushError.invalidURL } + if FileManager.default.fileExists(atPath: reassembledFile.path) { + try FileManager.default.removeItem(at: reassembledFile) + } + guard FileManager.default.createFile(atPath: reassembledFile.path, contents: nil) else { + throw PushError.fileCreationFailed(reassembledFile.path) + } + let outputHandle = try FileHandle(forWritingTo: reassembledFile) defer { try? outputHandle.close() } + + // Set the file size without writing data (creates a sparse file) try outputHandle.truncate(atOffset: actualDiskSize) + + // Add test patterns at start and end to verify writability + let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! + try outputHandle.seek(toOffset: 0) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.seek(toOffset: actualDiskSize - UInt64(testPattern.count)) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.synchronize() + + Logger.info("Test patterns written to sparse file. File is ready for writing.") + + // Track reassembly progress + var reassemblyProgressLogger = ProgressLogger(threshold: 0.05) var currentOffset: UInt64 = 0 - for (index, cachedChunkPath, _) in diskChunks { + + // Process each chunk in order + for (index, cachedChunkPath, _) in diskChunks.sorted(by: { $0.index < $1.index }) { Logger.info("Decompressing & writing part \(index + 1)/\(diskChunks.count): \(cachedChunkPath.lastPathComponent) at offset \(currentOffset)...") - let decompressedBytesWritten = try decompressChunkAndWriteSparse(inputPath: cachedChunkPath.path, outputHandle: outputHandle, startOffset: currentOffset) + + // Always seek to the correct position + try outputHandle.seek(toOffset: currentOffset) + + // Decompress and write the chunk + let decompressedBytesWritten = try decompressChunkAndWriteSparse( + inputPath: cachedChunkPath.path, + outputHandle: outputHandle, + startOffset: currentOffset + ) + currentOffset += decompressedBytesWritten + reassemblyProgressLogger.logProgress( + current: Double(currentOffset) / Double(actualDiskSize), + context: "Reassembling" + ) + + // Ensure data is written before processing next part + try outputHandle.synchronize() } + + // Finalize progress + reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") + Logger.info("") // Newline + + // Close handle before post-processing + try outputHandle.close() + + // Optimize sparseness if on macOS + let optimizedFile = reassemblyDir.appendingPathComponent("optimized_disk.img") + if FileManager.default.fileExists(atPath: "/bin/cp") { + Logger.info("Optimizing sparse file representation...") + + let process = Process() + process.executableURL = URL(fileURLWithPath: "/bin/cp") + process.arguments = ["-c", reassembledFile.path, optimizedFile.path] + + do { + try process.run() + process.waitUntilExit() + + if process.terminationStatus == 0 { + // Get sizes of original and optimized files + let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedFile.path)[.size] as? UInt64) ?? 0 + let originalUsage = getActualDiskUsage(path: reassembledFile.path) + let optimizedUsage = getActualDiskUsage(path: optimizedFile.path) + + Logger.info( + "Sparse optimization results: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" + ) + + // Replace original with optimized version + try FileManager.default.removeItem(at: reassembledFile) + try FileManager.default.moveItem(at: optimizedFile, to: reassembledFile) + Logger.info("Using sparse-optimized file for verification") + } else { + Logger.info("Sparse optimization failed, using original file for verification") + try? FileManager.default.removeItem(at: optimizedFile) + } + } catch { + Logger.info("Error during sparse optimization: \(error.localizedDescription)") + try? FileManager.default.removeItem(at: optimizedFile) + } + } + + // Verification step Logger.info("Verifying reassembled file...") let originalSize = diskSize let originalDigest = calculateSHA256(filePath: diskPath.path) let reassembledAttributes = try FileManager.default.attributesOfItem(atPath: reassembledFile.path) let reassembledSize = reassembledAttributes[.size] as? UInt64 ?? 0 let reassembledDigest = calculateSHA256(filePath: reassembledFile.path) + + // Check actual disk usage let originalActualSize = getActualDiskUsage(path: diskPath.path) let reassembledActualSize = getActualDiskUsage(path: reassembledFile.path) + + // Report results Logger.info("Results:") Logger.info(" Original size: \(ByteCountFormatter.string(fromByteCount: Int64(originalSize), countStyle: .file)) (\(originalSize) bytes)") Logger.info(" Reassembled size: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledSize), countStyle: .file)) (\(reassembledSize) bytes)") @@ -2495,9 +2601,84 @@ class ImageContainerRegistry: @unchecked Sendable { Logger.info(" Reassembled digest: \(reassembledDigest)") Logger.info(" Original: Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(originalSize), countStyle: .file)), Actual disk usage: \(ByteCountFormatter.string(fromByteCount: Int64(originalActualSize), countStyle: .file))") Logger.info(" Reassembled: Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledSize), countStyle: .file)), Actual disk usage: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledActualSize), countStyle: .file))") - if originalDigest == reassembledDigest { Logger.info("✅ VERIFICATION SUCCESSFUL: Files are identical") } else { Logger.info("❌ VERIFICATION FAILED: Files differ"); if originalSize != reassembledSize { Logger.info(" Size mismatch: Original \(originalSize) bytes, Reassembled \(reassembledSize) bytes") }; Logger.info("Attempting to identify differences..."); Logger.info("NOTE: This might be a sparse file issue. The content may be identical, but sparse regions"); Logger.info(" may be handled differently between the original and reassembled files."); if originalActualSize > 0 { let diffPercentage = ((Double(reassembledActualSize) - Double(originalActualSize)) / Double(originalActualSize)) * 100.0; Logger.info(" Disk usage difference: \(String(format: "%.2f", diffPercentage))%"); if diffPercentage < -40 { Logger.info(" ⚠️ WARNING: Reassembled disk uses significantly less space (>40% difference)."); Logger.info(" This indicates sparse regions weren't properly preserved and may affect VM functionality.") } else if diffPercentage < -10 { Logger.info(" ⚠️ WARNING: Reassembled disk uses less space (10-40% difference)."); Logger.info(" Some sparse regions may not be properly preserved but VM might still function correctly.") } else if diffPercentage > 10 { Logger.info(" ⚠️ WARNING: Reassembled disk uses more space (>10% difference)."); Logger.info(" This is unusual and may indicate improper sparse file handling.") } else { Logger.info(" ✓ Disk usage difference is minimal (<10%). VM likely to function correctly.") } } } + + // Determine if verification was successful + if originalDigest == reassembledDigest { + Logger.info("✅ VERIFICATION SUCCESSFUL: Files are identical") + } else { + Logger.info("❌ VERIFICATION FAILED: Files differ") + + if originalSize != reassembledSize { + Logger.info(" Size mismatch: Original \(originalSize) bytes, Reassembled \(reassembledSize) bytes") + } + + // Check sparse file characteristics + Logger.info("Attempting to identify differences...") + Logger.info("NOTE: This might be a sparse file issue. The content may be identical, but sparse regions") + Logger.info(" may be handled differently between the original and reassembled files.") + + if originalActualSize > 0 { + let diffPercentage = ((Double(reassembledActualSize) - Double(originalActualSize)) / Double(originalActualSize)) * 100.0 + Logger.info(" Disk usage difference: \(String(format: "%.2f", diffPercentage))%") + + if diffPercentage < -40 { + Logger.info(" ⚠️ WARNING: Reassembled disk uses significantly less space (>40% difference).") + Logger.info(" This indicates sparse regions weren't properly preserved and may affect VM functionality.") + } else if diffPercentage < -10 { + Logger.info(" ⚠️ WARNING: Reassembled disk uses less space (10-40% difference).") + Logger.info(" Some sparse regions may not be properly preserved but VM might still function correctly.") + } else if diffPercentage > 10 { + Logger.info(" ⚠️ WARNING: Reassembled disk uses more space (>10% difference).") + Logger.info(" This is unusual and may indicate improper sparse file handling.") + } else { + Logger.info(" ✓ Disk usage difference is minimal (<10%). VM likely to function correctly.") + } + } + + // Offer recovery option + if originalDigest != reassembledDigest { + Logger.info("") + Logger.info("===== ATTEMPTING RECOVERY ACTION =====") + Logger.info("Since verification failed, trying direct copy as a fallback method.") + + let fallbackFile = reassemblyDir.appendingPathComponent("fallback_disk.img") + Logger.info("Creating fallback disk image at: \(fallbackFile.path)") + + // Try rsync first + let rsyncProcess = Process() + rsyncProcess.executableURL = URL(fileURLWithPath: "/usr/bin/rsync") + rsyncProcess.arguments = ["-aS", "--progress", diskPath.path, fallbackFile.path] + + do { + try rsyncProcess.run() + rsyncProcess.waitUntilExit() + + if rsyncProcess.terminationStatus == 0 { + Logger.info("Direct copy completed with rsync. Fallback image available at: \(fallbackFile.path)") + } else { + // Try cp -c as fallback + Logger.info("Rsync failed. Attempting with cp -c command...") + let cpProcess = Process() + cpProcess.executableURL = URL(fileURLWithPath: "/bin/cp") + cpProcess.arguments = ["-c", diskPath.path, fallbackFile.path] + + try cpProcess.run() + cpProcess.waitUntilExit() + + if cpProcess.terminationStatus == 0 { + Logger.info("Direct copy completed with cp -c. Fallback image available at: \(fallbackFile.path)") + } else { + Logger.info("All recovery attempts failed.") + } + } + } catch { + Logger.info("Error during recovery attempts: \(error.localizedDescription)") + Logger.info("All recovery attempts failed.") + } + } + } + Logger.info("Reassembled file is available at: \(reassembledFile.path)") - if originalDigest != reassembledDigest { Logger.info(""); Logger.info("===== ATTEMPTING RECOVERY ACTION ====="); Logger.info("Since verification failed, trying direct copy as a fallback method."); let fallbackFile = reassemblyDir.appendingPathComponent("fallback_disk.img"); Logger.info("Creating fallback disk image at: \(fallbackFile.path)"); let rsyncProcess = Process(); rsyncProcess.executableURL = URL(fileURLWithPath: "/usr/bin/rsync"); rsyncProcess.arguments = ["-aS", "--progress", diskPath.path, fallbackFile.path]; try rsyncProcess.run(); rsyncProcess.waitUntilExit(); if rsyncProcess.terminationStatus == 0 { Logger.info("Direct copy completed. You may want to try using this fallback disk image"); Logger.info("instead if the reassembled one has issues: \(fallbackFile.path)") } else { Logger.info("Direct copy failed. Attempting with cp -c command..."); let cpProcess = Process(); cpProcess.executableURL = URL(fileURLWithPath: "/bin/cp"); cpProcess.arguments = ["-c", diskPath.path, fallbackFile.path]; try cpProcess.run(); cpProcess.waitUntilExit(); if cpProcess.terminationStatus == 0 { Logger.info("Direct copy completed with cp -c. Fallback image available at: \(fallbackFile.path)") } else { Logger.info("All recovery attempts failed.") } } } } } From 360e9f3d0bf6b9a84aa618afcb3d33de2d8e2078 Mon Sep 17 00:00:00 2001 From: "allcontributors[bot]" <46447321+allcontributors[bot]@users.noreply.github.com> Date: Mon, 21 Apr 2025 16:37:48 +0000 Subject: [PATCH 17/43] docs: update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index fa5dd2ea..bb491aa2 100644 --- a/README.md +++ b/README.md @@ -222,6 +222,7 @@ Apple, macOS, and Apple Silicon are trademarks of Apple Inc. Ubuntu and Canonica Ricter Zheng
Ricter Zheng

💻 Rahul Karajgikar
Rahul Karajgikar

💻 trospix
trospix

💻 + Ikko Eltociear Ashimine
Ikko Eltociear Ashimine

💻 From 1a333478c999cea16cd74b8b8b24794763505897 Mon Sep 17 00:00:00 2001 From: "allcontributors[bot]" <46447321+allcontributors[bot]@users.noreply.github.com> Date: Mon, 21 Apr 2025 16:37:49 +0000 Subject: [PATCH 18/43] docs: update .all-contributorsrc --- .all-contributorsrc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.all-contributorsrc b/.all-contributorsrc index fd1e1c62..f0c6bab2 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -115,6 +115,15 @@ "contributions": [ "code" ] + }, + { + "login": "eltociear", + "name": "Ikko Eltociear Ashimine", + "avatar_url": "https://avatars.githubusercontent.com/u/22633385?v=4", + "profile": "https://wavee.world/invitation/b96d00e6-b802-4a1b-8a66-2e3854a01ffd", + "contributions": [ + "code" + ] } ] } From a87861a85c36f37e9ebafff2cf3513ad81d9251c Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 13:32:30 -0700 Subject: [PATCH 19/43] Fix first pull --- .../ImageContainerRegistry.swift | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 7af60d6d..8d60cdf4 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1432,6 +1432,43 @@ class ImageContainerRegistry: @unchecked Sendable { } Logger.info("Disk image reassembly completed") + + // Optimize sparseness for cached reassembly if on macOS + if FileManager.default.fileExists(atPath: "/bin/cp") { + Logger.info("Optimizing sparse file representation for cached reassembly...") + let optimizedPath = outputURL.path + ".optimized" + + let process = Process() + process.executableURL = URL(fileURLWithPath: "/bin/cp") + process.arguments = ["-c", outputURL.path, optimizedPath] + + do { + try process.run() + process.waitUntilExit() + + if process.terminationStatus == 0 { + // Get size of optimized file + let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 + let originalUsage = getActualDiskUsage(path: outputURL.path) + let optimizedUsage = getActualDiskUsage(path: optimizedPath) + + Logger.info( + "Sparse optimization results for cache: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" + ) + + // Replace the original with the optimized version + try FileManager.default.removeItem(at: outputURL) + try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: outputURL) + Logger.info("Replaced cached reassembly with optimized sparse version") + } else { + Logger.info("Sparse optimization failed for cache, using original file") + try? FileManager.default.removeItem(atPath: optimizedPath) + } + } catch { + Logger.info("Error during sparse optimization for cache: \(error.localizedDescription)") + try? FileManager.default.removeItem(atPath: optimizedPath) + } + } } Logger.info("Cache copy complete") From c43a870794e16a91becd429765ee0f5d728ca4d7 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 14:44:38 -0700 Subject: [PATCH 20/43] Fix first pull --- .../ImageContainerRegistry.swift | 79 +++++++++++++++++-- 1 file changed, 74 insertions(+), 5 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 8d60cdf4..c42324ec 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1224,6 +1224,64 @@ class ImageContainerRegistry: @unchecked Sendable { // Move files to final location try FileManager.default.moveItem(at: tempVMDir, to: URL(fileURLWithPath: vmDir.dir.path)) + // Apply proper ownership and permissions to ensure VM can start + Logger.info("Setting proper file permissions and ownership...") + let chmodProcess = Process() + chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") + chmodProcess.arguments = ["-R", "u+rw", vmDir.dir.path] + try chmodProcess.run() + chmodProcess.waitUntilExit() + + // Ensure disk image has proper permissions + let diskImgPath = URL(fileURLWithPath: vmDir.dir.path).appendingPathComponent("disk.img").path + if FileManager.default.fileExists(atPath: diskImgPath) { + let diskChmodProcess = Process() + diskChmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") + diskChmodProcess.arguments = ["0644", diskImgPath] + try diskChmodProcess.run() + diskChmodProcess.waitUntilExit() + + Logger.info("Applied file permissions to disk image") + + // Ensure disk image is properly synchronized to disk + Logger.info("Ensuring disk image is properly synchronized to disk...") + let syncProcess = Process() + syncProcess.executableURL = URL(fileURLWithPath: "/usr/sbin/diskutil") + syncProcess.arguments = ["apfs", "resetFusionStats"] // This forces disk cache flush + try? syncProcess.run() + syncProcess.waitUntilExit() + + // Alternative sync method if needed + let syncProcess2 = Process() + syncProcess2.executableURL = URL(fileURLWithPath: "/bin/sync") + try? syncProcess2.run() + syncProcess2.waitUntilExit() + + Logger.info("Disk image sync complete") + + // Verify the disk image is readable + Logger.info("Verifying disk image integrity...") + let fileHandle = try? FileHandle(forReadingFrom: URL(fileURLWithPath: diskImgPath)) + if let handle = fileHandle { + // Try to read the first 512 bytes (boot sector) + if let data = try? handle.read(upToCount: 512), data.count == 512 { + Logger.info("Disk image verification: Successfully read first 512 bytes") + + // Check for boot signature (0x55AA at the end of the boot sector) + if data.count >= 512 && data[510] == 0x55 && data[511] == 0xAA { + Logger.info("Disk image verification: Boot signature valid (0x55AA)") + } else { + Logger.info("Disk image verification: No valid boot signature found") + } + } else { + Logger.error("Disk image verification: Failed to read first 512 bytes") + } + try? handle.close() + } else { + Logger.error("Disk image verification: Failed to open file for reading") + } + } + Logger.info("Download complete: Files extracted to \(vmDir.dir.path)") Logger.info( "Note: Actual disk usage is significantly lower than reported size due to macOS sparse file system" @@ -1406,16 +1464,27 @@ class ImageContainerRegistry: @unchecked Sendable { current: Double(currentOffset) / Double(sizeForTruncate), context: "Reassembling Cache") - try outputHandle.synchronize() // Optional: Synchronize after each chunk + try outputHandle.synchronize() // Explicitly synchronize after each chunk } // Finalize progress, close handle (done by defer) reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") - // Ensure output handle is closed before post-processing - // No need for explicit close here, defer handles it - // try outputHandle.close() - + // Add test patterns at the beginning and end of the file + Logger.info("Writing test patterns to sparse file to verify integrity...") + let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! + try outputHandle.seek(toOffset: 0) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.seek(toOffset: sizeForTruncate - UInt64(testPattern.count)) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.synchronize() + + // Ensure handle is properly synchronized before closing + try outputHandle.synchronize() + + // Close handle explicitly instead of relying on defer + try outputHandle.close() + // Verify final size let finalSize = (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] From 5972ebd4257038d1643a304180911da8247fcf12 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 14:56:44 -0700 Subject: [PATCH 21/43] Fix first pull --- .../ImageContainerRegistry.swift | 225 +++++++++++++----- 1 file changed, 167 insertions(+), 58 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index c42324ec..84959104 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1203,6 +1203,11 @@ class ImageContainerRegistry: @unchecked Sendable { } } + // Simulate cache pull behavior if this is a first pull + if !cachingEnabled || !validateCache(manifest: manifest, manifestId: manifestId) { + try simulateCachePull(tempVMDir: tempVMDir) + } + // Only move to final location once everything is complete if FileManager.default.fileExists(atPath: vmDir.dir.path) { try FileManager.default.removeItem(at: URL(fileURLWithPath: vmDir.dir.path)) @@ -1224,64 +1229,6 @@ class ImageContainerRegistry: @unchecked Sendable { // Move files to final location try FileManager.default.moveItem(at: tempVMDir, to: URL(fileURLWithPath: vmDir.dir.path)) - // Apply proper ownership and permissions to ensure VM can start - Logger.info("Setting proper file permissions and ownership...") - let chmodProcess = Process() - chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") - chmodProcess.arguments = ["-R", "u+rw", vmDir.dir.path] - try chmodProcess.run() - chmodProcess.waitUntilExit() - - // Ensure disk image has proper permissions - let diskImgPath = URL(fileURLWithPath: vmDir.dir.path).appendingPathComponent("disk.img").path - if FileManager.default.fileExists(atPath: diskImgPath) { - let diskChmodProcess = Process() - diskChmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") - diskChmodProcess.arguments = ["0644", diskImgPath] - try diskChmodProcess.run() - diskChmodProcess.waitUntilExit() - - Logger.info("Applied file permissions to disk image") - - // Ensure disk image is properly synchronized to disk - Logger.info("Ensuring disk image is properly synchronized to disk...") - let syncProcess = Process() - syncProcess.executableURL = URL(fileURLWithPath: "/usr/sbin/diskutil") - syncProcess.arguments = ["apfs", "resetFusionStats"] // This forces disk cache flush - try? syncProcess.run() - syncProcess.waitUntilExit() - - // Alternative sync method if needed - let syncProcess2 = Process() - syncProcess2.executableURL = URL(fileURLWithPath: "/bin/sync") - try? syncProcess2.run() - syncProcess2.waitUntilExit() - - Logger.info("Disk image sync complete") - - // Verify the disk image is readable - Logger.info("Verifying disk image integrity...") - let fileHandle = try? FileHandle(forReadingFrom: URL(fileURLWithPath: diskImgPath)) - if let handle = fileHandle { - // Try to read the first 512 bytes (boot sector) - if let data = try? handle.read(upToCount: 512), data.count == 512 { - Logger.info("Disk image verification: Successfully read first 512 bytes") - - // Check for boot signature (0x55AA at the end of the boot sector) - if data.count >= 512 && data[510] == 0x55 && data[511] == 0xAA { - Logger.info("Disk image verification: Boot signature valid (0x55AA)") - } else { - Logger.info("Disk image verification: No valid boot signature found") - } - } else { - Logger.error("Disk image verification: Failed to read first 512 bytes") - } - try? handle.close() - } else { - Logger.error("Disk image verification: Failed to open file for reading") - } - } - Logger.info("Download complete: Files extracted to \(vmDir.dir.path)") Logger.info( "Note: Actual disk usage is significantly lower than reported size due to macOS sparse file system" @@ -1543,6 +1490,168 @@ class ImageContainerRegistry: @unchecked Sendable { Logger.info("Cache copy complete") } + // Function to simulate cache pull behavior for freshly downloaded images + private func simulateCachePull(tempVMDir: URL) throws { + Logger.info("Simulating cache pull behavior for freshly downloaded image...") + + // 1. Find disk.img in tempVMDir + let diskImgPath = tempVMDir.appendingPathComponent("disk.img") + guard FileManager.default.fileExists(atPath: diskImgPath.path) else { + Logger.info("No disk.img found to simulate cache pull behavior") + return + } + + // 2. Create a temporary directory for the simulation + let simCacheDir = FileManager.default.temporaryDirectory.appendingPathComponent( + "lume_simcache_\(UUID().uuidString)") + try FileManager.default.createDirectory(at: simCacheDir, withIntermediateDirectories: true) + defer { + try? FileManager.default.removeItem(at: simCacheDir) + } + + // 3. Copy the disk.img to the simulation directory + let cachedDiskPath = simCacheDir.appendingPathComponent("cached_disk.img") + try FileManager.default.copyItem(at: diskImgPath, to: cachedDiskPath) + + // 4. Delete original disk.img (will be replaced by the simulated cache pull) + try FileManager.default.removeItem(at: diskImgPath) + + // 5. Get disk size which will be needed for the sparse file + var diskSize: UInt64 = 0 + if let attributes = try? FileManager.default.attributesOfItem(atPath: cachedDiskPath.path), + let size = attributes[.size] as? UInt64 { + diskSize = size + } else { + // If size can't be determined, read config.json + let configPath = tempVMDir.appendingPathComponent("config.json") + if let configDiskSize = getUncompressedSizeFromConfig(configPath: configPath) { + diskSize = configDiskSize + } else { + // Try to get from VM config + if FileManager.default.fileExists(atPath: configPath.path) { + do { + let configData = try Data(contentsOf: configPath) + if let vmConfig = try? JSONDecoder().decode(VMConfig.self, from: configData), + let size = vmConfig.diskSize { + diskSize = size + } + } catch { + Logger.error("Failed to read config for disk size: \(error)") + } + } + } + } + + // Fallback if no size could be determined + if diskSize == 0 { + diskSize = 10 * 1024 * 1024 * 1024 // 10GB default + Logger.error("Could not determine disk size, using default: \(diskSize) bytes") + } + + // 6. Create the sparse file with proper size + guard FileManager.default.createFile(atPath: diskImgPath.path, contents: nil) else { + throw PullError.fileCreationFailed(diskImgPath.path) + } + + let outputHandle = try FileHandle(forWritingTo: diskImgPath) + defer { try? outputHandle.close() } + + // Set the file size (creates sparse file) + try outputHandle.truncate(atOffset: diskSize) + Logger.info("Sparse file initialized for simulated cache pull with size: \(ByteCountFormatter.string(fromByteCount: Int64(diskSize), countStyle: .file))") + + // 7. Add test patterns at beginning and end + let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! + try outputHandle.seek(toOffset: 0) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.seek(toOffset: diskSize - UInt64(testPattern.count)) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.synchronize() + + // 8. Copy data from the cached file + let sourceHandle = try FileHandle(forReadingFrom: cachedDiskPath) + defer { try? sourceHandle.close() } + + // Copy in 50MB chunks to maintain sparse files + let chunkSize = 50 * 1024 * 1024 + var currentOffset: UInt64 = 0 + var progressLogger = ProgressLogger(threshold: 0.05) + + while currentOffset < diskSize { + try sourceHandle.seek(toOffset: currentOffset) + if let chunkData = try sourceHandle.read(upToCount: chunkSize) { + if chunkData.isEmpty { break } + + try outputHandle.seek(toOffset: currentOffset) + try outputHandle.write(contentsOf: chunkData) + currentOffset += UInt64(chunkData.count) + + progressLogger.logProgress( + current: Double(currentOffset) / Double(diskSize), + context: "Simulating Cache Pull" + ) + } else { + break + } + } + + try outputHandle.synchronize() + try outputHandle.close() // Close explicitly before optimizing + + // 9. Optimize the sparse file with cp -c (same as in copyFromCache) + if FileManager.default.fileExists(atPath: "/bin/cp") { + Logger.info("Optimizing sparse file representation for simulated cache pull...") + let optimizedPath = diskImgPath.path + ".optimized" + + let process = Process() + process.executableURL = URL(fileURLWithPath: "/bin/cp") + process.arguments = ["-c", diskImgPath.path, optimizedPath] + + do { + try process.run() + process.waitUntilExit() + + if process.terminationStatus == 0 { + // Get size of optimized file + let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 + let originalUsage = getActualDiskUsage(path: diskImgPath.path) + let optimizedUsage = getActualDiskUsage(path: optimizedPath) + + Logger.info( + "Sparse optimization results for simulated cache: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" + ) + + // Replace the original with the optimized version + try FileManager.default.removeItem(at: diskImgPath) + try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: diskImgPath) + Logger.info("Replaced with optimized sparse version for simulated cache") + } else { + Logger.info("Sparse optimization failed for simulated cache, using original file") + try? FileManager.default.removeItem(atPath: optimizedPath) + } + } catch { + Logger.info("Error during sparse optimization for simulated cache: \(error.localizedDescription)") + try? FileManager.default.removeItem(atPath: optimizedPath) + } + } + + // 10. Ensure disk image is properly synchronized to disk + Logger.info("Ensuring disk image is properly synchronized for simulated cache...") + let syncProcess = Process() + syncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") + try? syncProcess.run() + syncProcess.waitUntilExit() + + // Set proper permissions on the disk image + let chmodProcess = Process() + chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") + chmodProcess.arguments = ["0644", diskImgPath.path] + try chmodProcess.run() + chmodProcess.waitUntilExit() + + Logger.info("Simulated cache pull completed successfully") + } + private func getToken(repository: String) async throws -> String { let encodedRepo = repository.addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed) ?? repository // Request both pull and push scope for uploads From 134afc59ee745cf2ee2de1c8427bf476add0e64a Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 15:17:49 -0700 Subject: [PATCH 22/43] Fix first pull --- .../ImageContainerRegistry.swift | 132 ++++++------------ 1 file changed, 42 insertions(+), 90 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 84959104..25d4c34f 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1501,66 +1501,32 @@ class ImageContainerRegistry: @unchecked Sendable { return } - // 2. Create a temporary directory for the simulation - let simCacheDir = FileManager.default.temporaryDirectory.appendingPathComponent( - "lume_simcache_\(UUID().uuidString)") - try FileManager.default.createDirectory(at: simCacheDir, withIntermediateDirectories: true) - defer { - try? FileManager.default.removeItem(at: simCacheDir) + // Get the file size + let attributes = try FileManager.default.attributesOfItem(atPath: diskImgPath.path) + guard let diskSize = attributes[.size] as? UInt64, diskSize > 0 else { + Logger.error("Could not determine disk.img size for simulation") + return } - // 3. Copy the disk.img to the simulation directory - let cachedDiskPath = simCacheDir.appendingPathComponent("cached_disk.img") - try FileManager.default.copyItem(at: diskImgPath, to: cachedDiskPath) + // 2. Rename the original file to .original + let originalPath = tempVMDir.appendingPathComponent("disk.img.original") + try FileManager.default.moveItem(at: diskImgPath, to: originalPath) - // 4. Delete original disk.img (will be replaced by the simulated cache pull) - try FileManager.default.removeItem(at: diskImgPath) - - // 5. Get disk size which will be needed for the sparse file - var diskSize: UInt64 = 0 - if let attributes = try? FileManager.default.attributesOfItem(atPath: cachedDiskPath.path), - let size = attributes[.size] as? UInt64 { - diskSize = size - } else { - // If size can't be determined, read config.json - let configPath = tempVMDir.appendingPathComponent("config.json") - if let configDiskSize = getUncompressedSizeFromConfig(configPath: configPath) { - diskSize = configDiskSize - } else { - // Try to get from VM config - if FileManager.default.fileExists(atPath: configPath.path) { - do { - let configData = try Data(contentsOf: configPath) - if let vmConfig = try? JSONDecoder().decode(VMConfig.self, from: configData), - let size = vmConfig.diskSize { - diskSize = size - } - } catch { - Logger.error("Failed to read config for disk size: \(error)") - } - } - } - } - - // Fallback if no size could be determined - if diskSize == 0 { - diskSize = 10 * 1024 * 1024 * 1024 // 10GB default - Logger.error("Could not determine disk size, using default: \(diskSize) bytes") - } - - // 6. Create the sparse file with proper size + // 3. Create a new empty file with the same name guard FileManager.default.createFile(atPath: diskImgPath.path, contents: nil) else { + // If creation fails, restore the original + try? FileManager.default.moveItem(at: originalPath, to: diskImgPath) throw PullError.fileCreationFailed(diskImgPath.path) } + // 4. Open a file handle for writing to the new file let outputHandle = try FileHandle(forWritingTo: diskImgPath) - defer { try? outputHandle.close() } - // Set the file size (creates sparse file) + // 5. Set the total size (creates a sparse file) try outputHandle.truncate(atOffset: diskSize) - Logger.info("Sparse file initialized for simulated cache pull with size: \(ByteCountFormatter.string(fromByteCount: Int64(diskSize), countStyle: .file))") + Logger.info("Created sparse file with size: \(ByteCountFormatter.string(fromByteCount: Int64(diskSize), countStyle: .file))") - // 7. Add test patterns at beginning and end + // 6. Add test patterns at beginning and end (same as in copyFromCache) let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! try outputHandle.seek(toOffset: 0) try outputHandle.write(contentsOf: testPattern) @@ -1568,37 +1534,22 @@ class ImageContainerRegistry: @unchecked Sendable { try outputHandle.write(contentsOf: testPattern) try outputHandle.synchronize() - // 8. Copy data from the cached file - let sourceHandle = try FileHandle(forReadingFrom: cachedDiskPath) - defer { try? sourceHandle.close() } + Logger.info("Test patterns written, starting decompression simulation...") - // Copy in 50MB chunks to maintain sparse files - let chunkSize = 50 * 1024 * 1024 - var currentOffset: UInt64 = 0 - var progressLogger = ProgressLogger(threshold: 0.05) - - while currentOffset < diskSize { - try sourceHandle.seek(toOffset: currentOffset) - if let chunkData = try sourceHandle.read(upToCount: chunkSize) { - if chunkData.isEmpty { break } - - try outputHandle.seek(toOffset: currentOffset) - try outputHandle.write(contentsOf: chunkData) - currentOffset += UInt64(chunkData.count) - - progressLogger.logProgress( - current: Double(currentOffset) / Double(diskSize), - context: "Simulating Cache Pull" - ) - } else { - break - } - } + // 7. Use decompressChunkAndWriteSparse - the EXACT same function used by copyFromCache + let bytesWritten = try decompressChunkAndWriteSparse( + inputPath: originalPath.path, + outputHandle: outputHandle, + startOffset: 0 + ) + // 8. Make sure the file handle is properly synchronized before closing try outputHandle.synchronize() - try outputHandle.close() // Close explicitly before optimizing + try outputHandle.close() - // 9. Optimize the sparse file with cp -c (same as in copyFromCache) + Logger.info("Decompressed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) using the same method as cache pull") + + // 9. Use the same sparse file optimization as copyFromCache if FileManager.default.fileExists(atPath: "/bin/cp") { Logger.info("Optimizing sparse file representation for simulated cache pull...") let optimizedPath = diskImgPath.path + ".optimized" @@ -1612,43 +1563,44 @@ class ImageContainerRegistry: @unchecked Sendable { process.waitUntilExit() if process.terminationStatus == 0 { - // Get size of optimized file let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 let originalUsage = getActualDiskUsage(path: diskImgPath.path) let optimizedUsage = getActualDiskUsage(path: optimizedPath) Logger.info( - "Sparse optimization results for simulated cache: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" + "Sparse optimization results: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" ) - // Replace the original with the optimized version + // Replace original with optimized try FileManager.default.removeItem(at: diskImgPath) try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: diskImgPath) - Logger.info("Replaced with optimized sparse version for simulated cache") + Logger.info("Replaced with optimized sparse version") } else { - Logger.info("Sparse optimization failed for simulated cache, using original file") + Logger.info("Sparse optimization failed, using original file") try? FileManager.default.removeItem(atPath: optimizedPath) } } catch { - Logger.info("Error during sparse optimization for simulated cache: \(error.localizedDescription)") + Logger.info("Error during sparse optimization: \(error.localizedDescription)") try? FileManager.default.removeItem(atPath: optimizedPath) } } - // 10. Ensure disk image is properly synchronized to disk - Logger.info("Ensuring disk image is properly synchronized for simulated cache...") - let syncProcess = Process() - syncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") - try? syncProcess.run() - syncProcess.waitUntilExit() - - // Set proper permissions on the disk image + // 10. Set permissions and do final sync let chmodProcess = Process() chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") chmodProcess.arguments = ["0644", diskImgPath.path] try chmodProcess.run() chmodProcess.waitUntilExit() + // Final sync + let syncProcess = Process() + syncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") + try? syncProcess.run() + syncProcess.waitUntilExit() + + // 11. Clean up + try? FileManager.default.removeItem(at: originalPath) + Logger.info("Simulated cache pull completed successfully") } From e017a7c641d2b0b8802930f5015bf153446db3a0 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 15:34:17 -0700 Subject: [PATCH 23/43] Fix first pull --- .../ImageContainerRegistry.swift | 51 +++++++++---------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 25d4c34f..057e6bed 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1501,32 +1501,32 @@ class ImageContainerRegistry: @unchecked Sendable { return } - // Get the file size + // 2. Get disk size and other attributes let attributes = try FileManager.default.attributesOfItem(atPath: diskImgPath.path) guard let diskSize = attributes[.size] as? UInt64, diskSize > 0 else { Logger.error("Could not determine disk.img size for simulation") return } - // 2. Rename the original file to .original - let originalPath = tempVMDir.appendingPathComponent("disk.img.original") - try FileManager.default.moveItem(at: diskImgPath, to: originalPath) + // 3. Create backup of original + let backupPath = tempVMDir.appendingPathComponent("disk.img.original") + try FileManager.default.moveItem(at: diskImgPath, to: backupPath) - // 3. Create a new empty file with the same name + Logger.info("Creating sparse file with size: \(ByteCountFormatter.string(fromByteCount: Int64(diskSize), countStyle: .file))") + + // 4. Create empty sparse file guard FileManager.default.createFile(atPath: diskImgPath.path, contents: nil) else { // If creation fails, restore the original - try? FileManager.default.moveItem(at: originalPath, to: diskImgPath) + try? FileManager.default.moveItem(at: backupPath, to: diskImgPath) throw PullError.fileCreationFailed(diskImgPath.path) } - // 4. Open a file handle for writing to the new file + // 5. Open the file and truncate to desired size (creates sparse file) let outputHandle = try FileHandle(forWritingTo: diskImgPath) - - // 5. Set the total size (creates a sparse file) try outputHandle.truncate(atOffset: diskSize) - Logger.info("Created sparse file with size: \(ByteCountFormatter.string(fromByteCount: Int64(diskSize), countStyle: .file))") - // 6. Add test patterns at beginning and end (same as in copyFromCache) + // 6. Add test patterns at beginning and end exactly as in copyFromCache + Logger.info("Writing test patterns to verify writability...") let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! try outputHandle.seek(toOffset: 0) try outputHandle.write(contentsOf: testPattern) @@ -1534,24 +1534,24 @@ class ImageContainerRegistry: @unchecked Sendable { try outputHandle.write(contentsOf: testPattern) try outputHandle.synchronize() - Logger.info("Test patterns written, starting decompression simulation...") + // 7. Now decompress the original disk image exactly as we would with cache parts + Logger.info("Processing disk image using the same mechanism as cache pull...") - // 7. Use decompressChunkAndWriteSparse - the EXACT same function used by copyFromCache let bytesWritten = try decompressChunkAndWriteSparse( - inputPath: originalPath.path, + inputPath: backupPath.path, outputHandle: outputHandle, startOffset: 0 ) - // 8. Make sure the file handle is properly synchronized before closing + Logger.info("Processed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) of disk image data") + + // 8. Ensure all data is written to disk try outputHandle.synchronize() try outputHandle.close() - Logger.info("Decompressed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) using the same method as cache pull") - - // 9. Use the same sparse file optimization as copyFromCache + // 9. Run sparse file optimization with cp -c exactly as in the cache pull process if FileManager.default.fileExists(atPath: "/bin/cp") { - Logger.info("Optimizing sparse file representation for simulated cache pull...") + Logger.info("Optimizing sparse file representation...") let optimizedPath = diskImgPath.path + ".optimized" let process = Process() @@ -1571,7 +1571,6 @@ class ImageContainerRegistry: @unchecked Sendable { "Sparse optimization results: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" ) - // Replace original with optimized try FileManager.default.removeItem(at: diskImgPath) try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: diskImgPath) Logger.info("Replaced with optimized sparse version") @@ -1585,23 +1584,23 @@ class ImageContainerRegistry: @unchecked Sendable { } } - // 10. Set permissions and do final sync + // 10. Ensure file has correct permissions let chmodProcess = Process() chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") chmodProcess.arguments = ["0644", diskImgPath.path] try chmodProcess.run() chmodProcess.waitUntilExit() - // Final sync + // 11. Final sync to ensure all data is on disk let syncProcess = Process() syncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") - try? syncProcess.run() + try syncProcess.run() syncProcess.waitUntilExit() - // 11. Clean up - try? FileManager.default.removeItem(at: originalPath) + // 12. Clean up the backup file + try FileManager.default.removeItem(at: backupPath) - Logger.info("Simulated cache pull completed successfully") + Logger.info("Simulation of cache pull behavior completed") } private func getToken(repository: String) async throws -> String { From a3895d424dda04e26ec5e938acc934e7636d9ac3 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 15:47:42 -0700 Subject: [PATCH 24/43] Fix first pull --- .../ImageContainerRegistry.swift | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 057e6bed..7022858b 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1501,55 +1501,53 @@ class ImageContainerRegistry: @unchecked Sendable { return } - // 2. Get disk size and other attributes + // 2. Get file size and other attributes let attributes = try FileManager.default.attributesOfItem(atPath: diskImgPath.path) guard let diskSize = attributes[.size] as? UInt64, diskSize > 0 else { Logger.error("Could not determine disk.img size for simulation") return } - // 3. Create backup of original + // 3. Rename the original file to backup let backupPath = tempVMDir.appendingPathComponent("disk.img.original") try FileManager.default.moveItem(at: diskImgPath, to: backupPath) Logger.info("Creating sparse file with size: \(ByteCountFormatter.string(fromByteCount: Int64(diskSize), countStyle: .file))") - // 4. Create empty sparse file + // 4. Create a new empty file guard FileManager.default.createFile(atPath: diskImgPath.path, contents: nil) else { // If creation fails, restore the original try? FileManager.default.moveItem(at: backupPath, to: diskImgPath) throw PullError.fileCreationFailed(diskImgPath.path) } - // 5. Open the file and truncate to desired size (creates sparse file) + // 5. Open the file handle and set size (creates sparse file) let outputHandle = try FileHandle(forWritingTo: diskImgPath) try outputHandle.truncate(atOffset: diskSize) - // 6. Add test patterns at beginning and end exactly as in copyFromCache + // 6. Add test patterns at beginning and end Logger.info("Writing test patterns to verify writability...") let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! try outputHandle.seek(toOffset: 0) try outputHandle.write(contentsOf: testPattern) try outputHandle.seek(toOffset: diskSize - UInt64(testPattern.count)) try outputHandle.write(contentsOf: testPattern) - try outputHandle.synchronize() - - // 7. Now decompress the original disk image exactly as we would with cache parts - Logger.info("Processing disk image using the same mechanism as cache pull...") + // 7. Decompress the original disk image at offset 0 + Logger.info("Decompressing original disk image at offset 0...") let bytesWritten = try decompressChunkAndWriteSparse( inputPath: backupPath.path, outputHandle: outputHandle, startOffset: 0 ) - Logger.info("Processed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) of disk image data") + Logger.info("Decompressed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) of disk image data") // 8. Ensure all data is written to disk try outputHandle.synchronize() try outputHandle.close() - // 9. Run sparse file optimization with cp -c exactly as in the cache pull process + // 9. Optimize sparse representation with cp -c if FileManager.default.fileExists(atPath: "/bin/cp") { Logger.info("Optimizing sparse file representation...") let optimizedPath = diskImgPath.path + ".optimized" @@ -1584,7 +1582,7 @@ class ImageContainerRegistry: @unchecked Sendable { } } - // 10. Ensure file has correct permissions + // 10. Set permissions to match cache hit (0644) let chmodProcess = Process() chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") chmodProcess.arguments = ["0644", diskImgPath.path] @@ -1600,7 +1598,7 @@ class ImageContainerRegistry: @unchecked Sendable { // 12. Clean up the backup file try FileManager.default.removeItem(at: backupPath) - Logger.info("Simulation of cache pull behavior completed") + Logger.info("Cache pull simulation completed successfully") } private func getToken(repository: String) async throws -> String { From 62f90afb2c795a8acf7348247b4ae617ab6c5d68 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 16:01:33 -0700 Subject: [PATCH 25/43] Fix first pull --- .../ImageContainerRegistry.swift | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 7022858b..dc1b32ef 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1501,40 +1501,39 @@ class ImageContainerRegistry: @unchecked Sendable { return } - // 2. Get file size and other attributes + // 2. Get file attributes and size let attributes = try FileManager.default.attributesOfItem(atPath: diskImgPath.path) guard let diskSize = attributes[.size] as? UInt64, diskSize > 0 else { Logger.error("Could not determine disk.img size for simulation") return } - // 3. Rename the original file to backup + Logger.info("Creating sparse file with size: \(ByteCountFormatter.string(fromByteCount: Int64(diskSize), countStyle: .file))") + + // 3. Create backup of original file let backupPath = tempVMDir.appendingPathComponent("disk.img.original") try FileManager.default.moveItem(at: diskImgPath, to: backupPath) - Logger.info("Creating sparse file with size: \(ByteCountFormatter.string(fromByteCount: Int64(diskSize), countStyle: .file))") - - // 4. Create a new empty file + // 4. Create empty file and prepare for sparse file creation guard FileManager.default.createFile(atPath: diskImgPath.path, contents: nil) else { // If creation fails, restore the original try? FileManager.default.moveItem(at: backupPath, to: diskImgPath) throw PullError.fileCreationFailed(diskImgPath.path) } - // 5. Open the file handle and set size (creates sparse file) + // 5. Set up file handle and create sparse file let outputHandle = try FileHandle(forWritingTo: diskImgPath) try outputHandle.truncate(atOffset: diskSize) - // 6. Add test patterns at beginning and end - Logger.info("Writing test patterns to verify writability...") + // 6. Write test patterns at beginning and end let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! try outputHandle.seek(toOffset: 0) try outputHandle.write(contentsOf: testPattern) try outputHandle.seek(toOffset: diskSize - UInt64(testPattern.count)) try outputHandle.write(contentsOf: testPattern) - // 7. Decompress the original disk image at offset 0 - Logger.info("Decompressing original disk image at offset 0...") + // 7. Decompress the original data at offset 0 + Logger.info("Decompressing original disk image with same mechanism as cache pull...") let bytesWritten = try decompressChunkAndWriteSparse( inputPath: backupPath.path, outputHandle: outputHandle, @@ -1547,7 +1546,7 @@ class ImageContainerRegistry: @unchecked Sendable { try outputHandle.synchronize() try outputHandle.close() - // 9. Optimize sparse representation with cp -c + // 9. Optimize sparse file with cp -c (exactly matching cache pull process) if FileManager.default.fileExists(atPath: "/bin/cp") { Logger.info("Optimizing sparse file representation...") let optimizedPath = diskImgPath.path + ".optimized" @@ -1582,14 +1581,14 @@ class ImageContainerRegistry: @unchecked Sendable { } } - // 10. Set permissions to match cache hit (0644) + // 10. Explicitly set permissions to match cache hit (0644) let chmodProcess = Process() chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") chmodProcess.arguments = ["0644", diskImgPath.path] try chmodProcess.run() chmodProcess.waitUntilExit() - // 11. Final sync to ensure all data is on disk + // 11. Final sync to ensure all data is flushed to disk let syncProcess = Process() syncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") try syncProcess.run() From c321c8affc28588fbb5833ef54e3058e4cacc277 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 16:20:10 -0700 Subject: [PATCH 26/43] Fix first pull --- .../ContainerRegistry/ImageContainerRegistry.swift | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index dc1b32ef..63dde180 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1526,11 +1526,13 @@ class ImageContainerRegistry: @unchecked Sendable { try outputHandle.truncate(atOffset: diskSize) // 6. Write test patterns at beginning and end + Logger.info("Writing test patterns to verify writability...") let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! try outputHandle.seek(toOffset: 0) try outputHandle.write(contentsOf: testPattern) try outputHandle.seek(toOffset: diskSize - UInt64(testPattern.count)) try outputHandle.write(contentsOf: testPattern) + try outputHandle.synchronize() // 7. Decompress the original data at offset 0 Logger.info("Decompressing original disk image with same mechanism as cache pull...") @@ -1542,8 +1544,10 @@ class ImageContainerRegistry: @unchecked Sendable { Logger.info("Decompressed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) of disk image data") - // 8. Ensure all data is written to disk + // 8. Ensure all data is written to disk with an explicit sync try outputHandle.synchronize() + + // Very important: close the handle before optimization try outputHandle.close() // 9. Optimize sparse file with cp -c (exactly matching cache pull process) @@ -1568,6 +1572,13 @@ class ImageContainerRegistry: @unchecked Sendable { "Sparse optimization results: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" ) + // Before replacing the file, make sure to synchronize the filesystem + let syncBeforeReplace = Process() + syncBeforeReplace.executableURL = URL(fileURLWithPath: "/bin/sync") + try syncBeforeReplace.run() + syncBeforeReplace.waitUntilExit() + + // Now replace the original with the optimized version try FileManager.default.removeItem(at: diskImgPath) try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: diskImgPath) Logger.info("Replaced with optimized sparse version") From ae7a8f526ec2f2a5d8856c7d9ffeeb12e0149968 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 16:35:56 -0700 Subject: [PATCH 27/43] Fix first pull --- .../ImageContainerRegistry.swift | 67 +++++++++++-------- 1 file changed, 40 insertions(+), 27 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 63dde180..5db9b411 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1521,40 +1521,53 @@ class ImageContainerRegistry: @unchecked Sendable { throw PullError.fileCreationFailed(diskImgPath.path) } - // 5. Set up file handle and create sparse file - let outputHandle = try FileHandle(forWritingTo: diskImgPath) - try outputHandle.truncate(atOffset: diskSize) + // IMPORTANT: Use autoreleasepool to ensure file handle is released promptly + try autoreleasepool { + // 5. Set up file handle and create sparse file + let outputHandle = try FileHandle(forWritingTo: diskImgPath) + try outputHandle.truncate(atOffset: diskSize) + + // 6. Write test patterns at beginning and end + Logger.info("Writing test patterns to verify writability...") + let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! + try outputHandle.seek(toOffset: 0) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.seek(toOffset: diskSize - UInt64(testPattern.count)) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.synchronize() + + // 7. Decompress the original data at offset 0 + Logger.info("Decompressing original disk image with same mechanism as cache pull...") + let bytesWritten = try decompressChunkAndWriteSparse( + inputPath: backupPath.path, + outputHandle: outputHandle, + startOffset: 0 + ) + + Logger.info("Decompressed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) of disk image data") + + // 8. Ensure all data is written to disk with an explicit sync + try outputHandle.synchronize() + + // Very important: explicitly close the handle here inside the autorelease pool + try outputHandle.close() + Logger.info("File handle explicitly closed after decompression and synchronization") + } - // 6. Write test patterns at beginning and end - Logger.info("Writing test patterns to verify writability...") - let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! - try outputHandle.seek(toOffset: 0) - try outputHandle.write(contentsOf: testPattern) - try outputHandle.seek(toOffset: diskSize - UInt64(testPattern.count)) - try outputHandle.write(contentsOf: testPattern) - try outputHandle.synchronize() - - // 7. Decompress the original data at offset 0 - Logger.info("Decompressing original disk image with same mechanism as cache pull...") - let bytesWritten = try decompressChunkAndWriteSparse( - inputPath: backupPath.path, - outputHandle: outputHandle, - startOffset: 0 - ) - - Logger.info("Decompressed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) of disk image data") - - // 8. Ensure all data is written to disk with an explicit sync - try outputHandle.synchronize() - - // Very important: close the handle before optimization - try outputHandle.close() + // Wait a moment for file system operations to complete + Thread.sleep(forTimeInterval: 0.5) // 9. Optimize sparse file with cp -c (exactly matching cache pull process) if FileManager.default.fileExists(atPath: "/bin/cp") { Logger.info("Optimizing sparse file representation...") let optimizedPath = diskImgPath.path + ".optimized" + // Run a sync before optimization + let syncBeforeOptimize = Process() + syncBeforeOptimize.executableURL = URL(fileURLWithPath: "/bin/sync") + try syncBeforeOptimize.run() + syncBeforeOptimize.waitUntilExit() + let process = Process() process.executableURL = URL(fileURLWithPath: "/bin/cp") process.arguments = ["-c", diskImgPath.path, optimizedPath] From 661556f3d110d110871bb5193c95acd1117db76e Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 16:52:08 -0700 Subject: [PATCH 28/43] Fix first pull --- .../ImageContainerRegistry.swift | 43 +++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 5db9b411..9d748aab 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1521,6 +1521,12 @@ class ImageContainerRegistry: @unchecked Sendable { throw PullError.fileCreationFailed(diskImgPath.path) } + // Run an initial filesystem sync + let initialSyncProcess = Process() + initialSyncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") + try initialSyncProcess.run() + initialSyncProcess.waitUntilExit() + // IMPORTANT: Use autoreleasepool to ensure file handle is released promptly try autoreleasepool { // 5. Set up file handle and create sparse file @@ -1534,6 +1540,8 @@ class ImageContainerRegistry: @unchecked Sendable { try outputHandle.write(contentsOf: testPattern) try outputHandle.seek(toOffset: diskSize - UInt64(testPattern.count)) try outputHandle.write(contentsOf: testPattern) + + // Make sure test patterns are synced to disk first try outputHandle.synchronize() // 7. Decompress the original data at offset 0 @@ -1546,16 +1554,30 @@ class ImageContainerRegistry: @unchecked Sendable { Logger.info("Decompressed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) of disk image data") - // 8. Ensure all data is written to disk with an explicit sync + // 8. Ensure all data is written to disk with multiple explicit syncs try outputHandle.synchronize() + // Force an fsync using lower-level API for the file descriptor + let fd = outputHandle.fileDescriptor + if fd >= 0 { + fsync(fd) + Logger.info("Performed low-level fsync on file descriptor") + } + // Very important: explicitly close the handle here inside the autorelease pool try outputHandle.close() Logger.info("File handle explicitly closed after decompression and synchronization") } - // Wait a moment for file system operations to complete - Thread.sleep(forTimeInterval: 0.5) + // Perform an explicit filesystem sync after closing the file handle + let postCloseSyncProcess = Process() + postCloseSyncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") + try postCloseSyncProcess.run() + postCloseSyncProcess.waitUntilExit() + + // Wait longer to ensure all filesystem operations are complete + Logger.info("Waiting for filesystem operations to complete...") + Thread.sleep(forTimeInterval: 1.0) // 9. Optimize sparse file with cp -c (exactly matching cache pull process) if FileManager.default.fileExists(atPath: "/bin/cp") { @@ -1595,6 +1617,12 @@ class ImageContainerRegistry: @unchecked Sendable { try FileManager.default.removeItem(at: diskImgPath) try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: diskImgPath) Logger.info("Replaced with optimized sparse version") + + // Additional sync after replacement + let syncAfterReplace = Process() + syncAfterReplace.executableURL = URL(fileURLWithPath: "/bin/sync") + try syncAfterReplace.run() + syncAfterReplace.waitUntilExit() } else { Logger.info("Sparse optimization failed, using original file") try? FileManager.default.removeItem(atPath: optimizedPath) @@ -1618,6 +1646,15 @@ class ImageContainerRegistry: @unchecked Sendable { try syncProcess.run() syncProcess.waitUntilExit() + // One more filesystem sync for good measure + let finalSyncProcess = Process() + finalSyncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") + try finalSyncProcess.run() + finalSyncProcess.waitUntilExit() + + // Wait a moment for final filesystem operations + Thread.sleep(forTimeInterval: 0.5) + // 12. Clean up the backup file try FileManager.default.removeItem(at: backupPath) From b47201f1ee90a7ad6e185e04fa1cb08d0d3bec1b Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 17:44:59 -0700 Subject: [PATCH 29/43] Fix --- .../ImageContainerRegistry.swift | 573 +++++++++--------- 1 file changed, 271 insertions(+), 302 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 9d748aab..d075f445 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1238,6 +1238,137 @@ class ImageContainerRegistry: @unchecked Sendable { ) } + // Shared function to handle disk image creation - can be used by both cache hit and cache miss paths + private func createDiskImageFromSource( + sourceURL: URL, // Source data to decompress + destinationURL: URL, // Where to create the disk image + diskSize: UInt64 // Total size for the sparse file + ) throws { + Logger.info("Creating sparse disk image...") + + // Create empty destination file + if FileManager.default.fileExists(atPath: destinationURL.path) { + try FileManager.default.removeItem(at: destinationURL) + } + guard FileManager.default.createFile(atPath: destinationURL.path, contents: nil) else { + throw PullError.fileCreationFailed(destinationURL.path) + } + + // Create sparse file + let outputHandle = try FileHandle(forWritingTo: destinationURL) + try outputHandle.truncate(atOffset: diskSize) + + // Write test patterns at beginning and end + Logger.info("Writing test patterns to verify writability...") + let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! + try outputHandle.seek(toOffset: 0) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.seek(toOffset: diskSize - UInt64(testPattern.count)) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.synchronize() + + // Decompress the source data at offset 0 + Logger.info("Decompressing source data...") + let bytesWritten = try decompressChunkAndWriteSparse( + inputPath: sourceURL.path, + outputHandle: outputHandle, + startOffset: 0 + ) + Logger.info("Decompressed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) of data") + + // Ensure data is written and close handle + try outputHandle.synchronize() + try outputHandle.close() + + // Run sync to flush filesystem + let syncProcess = Process() + syncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") + try syncProcess.run() + syncProcess.waitUntilExit() + + // Optimize with cp -c + if FileManager.default.fileExists(atPath: "/bin/cp") { + Logger.info("Optimizing sparse file representation...") + let optimizedPath = destinationURL.path + ".optimized" + + let process = Process() + process.executableURL = URL(fileURLWithPath: "/bin/cp") + process.arguments = ["-c", destinationURL.path, optimizedPath] + + try process.run() + process.waitUntilExit() + + if process.terminationStatus == 0 { + // Get optimization results + let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 + let originalUsage = getActualDiskUsage(path: destinationURL.path) + let optimizedUsage = getActualDiskUsage(path: optimizedPath) + + Logger.info( + "Sparse optimization results: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" + ) + + // Replace original with optimized + try FileManager.default.removeItem(at: destinationURL) + try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: destinationURL) + Logger.info("Replaced with optimized sparse version") + } else { + Logger.info("Sparse optimization failed, using original file") + try? FileManager.default.removeItem(atPath: optimizedPath) + } + } + + // Set permissions to 0644 + let chmodProcess = Process() + chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") + chmodProcess.arguments = ["0644", destinationURL.path] + try chmodProcess.run() + chmodProcess.waitUntilExit() + + // Final sync + let finalSyncProcess = Process() + finalSyncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") + try finalSyncProcess.run() + finalSyncProcess.waitUntilExit() + } + + // Function to simulate cache pull behavior for freshly downloaded images + private func simulateCachePull(tempVMDir: URL) throws { + Logger.info("Simulating cache pull behavior for freshly downloaded image...") + + // Find disk.img in tempVMDir + let diskImgPath = tempVMDir.appendingPathComponent("disk.img") + guard FileManager.default.fileExists(atPath: diskImgPath.path) else { + Logger.info("No disk.img found to simulate cache pull behavior") + return + } + + // Get file attributes and size + let attributes = try FileManager.default.attributesOfItem(atPath: diskImgPath.path) + guard let diskSize = attributes[.size] as? UInt64, diskSize > 0 else { + Logger.error("Could not determine disk.img size for simulation") + return + } + + Logger.info("Creating sparse file with size: \(ByteCountFormatter.string(fromByteCount: Int64(diskSize), countStyle: .file))") + + // Create backup of original file + let backupPath = tempVMDir.appendingPathComponent("disk.img.original") + try FileManager.default.moveItem(at: diskImgPath, to: backupPath) + + // Use shared function to create the disk image + try createDiskImageFromSource( + sourceURL: backupPath, + destinationURL: diskImgPath, + diskSize: diskSize + ) + + // Clean up backup + try FileManager.default.removeItem(at: backupPath) + + Logger.info("Cache pull simulation completed successfully") + } + private func copyFromCache(manifest: Manifest, manifestId: String, to destination: URL) async throws { @@ -1249,8 +1380,6 @@ class ImageContainerRegistry: @unchecked Sendable { // Instantiate collector let diskPartsCollector = DiskPartsCollector() - // Remove totalDiskParts - // var totalDiskParts: Int? = nil var lz4LayerCount = 0 // Count lz4 layers found // First identify disk parts and non-disk files @@ -1293,12 +1422,6 @@ class ImageContainerRegistry: @unchecked Sendable { let diskPartSources = await diskPartsCollector.getSortedParts() // Sorted by assigned sequential number let totalParts = await diskPartsCollector.getTotalParts() // Get total count from collector - // Remove old guard check - /* - guard let totalParts = totalDiskParts else { - Logger.info("No cached layers with valid part information found. Assuming single-part image or non-lz4 parts.") - } - */ Logger.info("Found \(totalParts) lz4 disk parts in cache to reassemble.") // --- End retrieving parts --- @@ -1355,312 +1478,158 @@ class ImageContainerRegistry: @unchecked Sendable { throw PullError.reassemblySetupFailed(path: outputURL.path, underlyingError: nilError ?? NoSpecificUnderlyingError()) } - // Wrap file handle setup and sparse file creation within this block - let outputHandle: FileHandle - do { - // Ensure parent directory exists - try FileManager.default.createDirectory(at: outputURL.deletingLastPathComponent(), withIntermediateDirectories: true) - // Explicitly create the file first, removing old one if needed - if FileManager.default.fileExists(atPath: outputURL.path) { - try FileManager.default.removeItem(at: outputURL) - } - guard FileManager.default.createFile(atPath: outputURL.path, contents: nil) else { - throw PullError.fileCreationFailed(outputURL.path) - } - // Open handle for writing - outputHandle = try FileHandle(forWritingTo: outputURL) - // Set the file size (creates sparse file) - try outputHandle.truncate(atOffset: sizeForTruncate) - Logger.info("Sparse file initialized for cache reassembly with size: \(ByteCountFormatter.string(fromByteCount: Int64(sizeForTruncate), countStyle: .file))") - } catch { - Logger.error("Failed during setup for cached disk image reassembly: \(error.localizedDescription)", metadata: ["path": outputURL.path]) - throw PullError.reassemblySetupFailed(path: outputURL.path, underlyingError: error) - } - - // Ensure handle is closed when exiting this scope - defer { try? outputHandle.close() } - - // ... (Get uncompressed size etc.) ... - - var reassemblyProgressLogger = ProgressLogger(threshold: 0.05) - var currentOffset: UInt64 = 0 - - // Iterate from 1 up to the total number of parts found by the collector - for collectorPartNum in 1...totalParts { - // Find the source URL from our collected parts using the sequential collectorPartNum - guard let sourceInfo = diskPartSources.first(where: { $0.0 == collectorPartNum }) else { - Logger.error("Missing required cached part number \(collectorPartNum) in collected parts during reassembly.") - throw PullError.missingPart(collectorPartNum) - } - let sourceURL = sourceInfo.1 // Get URL from tuple - - // Log using the sequential collector part number - Logger.info( - "Decompressing part \(collectorPartNum) of \(totalParts) from cache: \(sourceURL.lastPathComponent) at offset \(currentOffset)..." + // If we have just one disk part, use the shared function + if totalParts == 1 { + // Single part - use shared function + let sourceURL = diskPartSources[0].1 // Get the first source URL (index 1 of the tuple) + try createDiskImageFromSource( + sourceURL: sourceURL, + destinationURL: outputURL, + diskSize: sizeForTruncate ) - - // Always use the correct sparse decompression function - let decompressedBytesWritten = try decompressChunkAndWriteSparse( - inputPath: sourceURL.path, - outputHandle: outputHandle, - startOffset: currentOffset - ) - currentOffset += decompressedBytesWritten - // Update progress (using sizeForTruncate which should be available) - reassemblyProgressLogger.logProgress( - current: Double(currentOffset) / Double(sizeForTruncate), - context: "Reassembling Cache") - - try outputHandle.synchronize() // Explicitly synchronize after each chunk - } - - // Finalize progress, close handle (done by defer) - reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") - - // Add test patterns at the beginning and end of the file - Logger.info("Writing test patterns to sparse file to verify integrity...") - let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! - try outputHandle.seek(toOffset: 0) - try outputHandle.write(contentsOf: testPattern) - try outputHandle.seek(toOffset: sizeForTruncate - UInt64(testPattern.count)) - try outputHandle.write(contentsOf: testPattern) - try outputHandle.synchronize() - - // Ensure handle is properly synchronized before closing - try outputHandle.synchronize() - - // Close handle explicitly instead of relying on defer - try outputHandle.close() - - // Verify final size - let finalSize = - (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] - as? UInt64) ?? 0 - Logger.info( - "Final disk image size from cache (before sparse file optimization): \(ByteCountFormatter.string(fromByteCount: Int64(finalSize), countStyle: .file))" - ) - - // Use the calculated sizeForTruncate for comparison - if finalSize != sizeForTruncate { - Logger.info( - "Warning: Final reported size (\(finalSize) bytes) differs from expected size (\(sizeForTruncate) bytes), but this doesn't affect functionality" - ) - } - - Logger.info("Disk image reassembly completed") - - // Optimize sparseness for cached reassembly if on macOS - if FileManager.default.fileExists(atPath: "/bin/cp") { - Logger.info("Optimizing sparse file representation for cached reassembly...") - let optimizedPath = outputURL.path + ".optimized" - - let process = Process() - process.executableURL = URL(fileURLWithPath: "/bin/cp") - process.arguments = ["-c", outputURL.path, optimizedPath] - + } else { + // Multiple parts - we need to reassemble + // Wrap file handle setup and sparse file creation within this block + let outputHandle: FileHandle do { - try process.run() - process.waitUntilExit() - - if process.terminationStatus == 0 { - // Get size of optimized file - let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 - let originalUsage = getActualDiskUsage(path: outputURL.path) - let optimizedUsage = getActualDiskUsage(path: optimizedPath) - - Logger.info( - "Sparse optimization results for cache: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" - ) - - // Replace the original with the optimized version + // Ensure parent directory exists + try FileManager.default.createDirectory(at: outputURL.deletingLastPathComponent(), withIntermediateDirectories: true) + // Explicitly create the file first, removing old one if needed + if FileManager.default.fileExists(atPath: outputURL.path) { try FileManager.default.removeItem(at: outputURL) - try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: outputURL) - Logger.info("Replaced cached reassembly with optimized sparse version") - } else { - Logger.info("Sparse optimization failed for cache, using original file") + } + guard FileManager.default.createFile(atPath: outputURL.path, contents: nil) else { + throw PullError.fileCreationFailed(outputURL.path) + } + // Open handle for writing + outputHandle = try FileHandle(forWritingTo: outputURL) + // Set the file size (creates sparse file) + try outputHandle.truncate(atOffset: sizeForTruncate) + Logger.info("Sparse file initialized for cache reassembly with size: \(ByteCountFormatter.string(fromByteCount: Int64(sizeForTruncate), countStyle: .file))") + } catch { + Logger.error("Failed during setup for cached disk image reassembly: \(error.localizedDescription)", metadata: ["path": outputURL.path]) + throw PullError.reassemblySetupFailed(path: outputURL.path, underlyingError: error) + } + + // Ensure handle is closed when exiting this scope + defer { try? outputHandle.close() } + + var reassemblyProgressLogger = ProgressLogger(threshold: 0.05) + var currentOffset: UInt64 = 0 + + // Iterate from 1 up to the total number of parts found by the collector + for collectorPartNum in 1...totalParts { + // Find the source URL from our collected parts using the sequential collectorPartNum + guard let sourceInfo = diskPartSources.first(where: { $0.0 == collectorPartNum }) else { + Logger.error("Missing required cached part number \(collectorPartNum) in collected parts during reassembly.") + throw PullError.missingPart(collectorPartNum) + } + let sourceURL = sourceInfo.1 // Get URL from tuple + + // Log using the sequential collector part number + Logger.info( + "Decompressing part \(collectorPartNum) of \(totalParts) from cache: \(sourceURL.lastPathComponent) at offset \(currentOffset)..." + ) + + // Always use the correct sparse decompression function + let decompressedBytesWritten = try decompressChunkAndWriteSparse( + inputPath: sourceURL.path, + outputHandle: outputHandle, + startOffset: currentOffset + ) + currentOffset += decompressedBytesWritten + // Update progress (using sizeForTruncate which should be available) + reassemblyProgressLogger.logProgress( + current: Double(currentOffset) / Double(sizeForTruncate), + context: "Reassembling Cache") + + try outputHandle.synchronize() // Explicitly synchronize after each chunk + } + + // Finalize progress, close handle (done by defer) + reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") + + // Add test patterns at the beginning and end of the file + Logger.info("Writing test patterns to sparse file to verify integrity...") + let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! + try outputHandle.seek(toOffset: 0) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.seek(toOffset: sizeForTruncate - UInt64(testPattern.count)) + try outputHandle.write(contentsOf: testPattern) + try outputHandle.synchronize() + + // Ensure handle is properly synchronized before closing + try outputHandle.synchronize() + + // Close handle explicitly instead of relying on defer + try outputHandle.close() + + // Verify final size + let finalSize = + (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] + as? UInt64) ?? 0 + Logger.info( + "Final disk image size from cache (before sparse file optimization): \(ByteCountFormatter.string(fromByteCount: Int64(finalSize), countStyle: .file))" + ) + + // Use the calculated sizeForTruncate for comparison + if finalSize != sizeForTruncate { + Logger.info( + "Warning: Final reported size (\(finalSize) bytes) differs from expected size (\(sizeForTruncate) bytes), but this doesn't affect functionality" + ) + } + + Logger.info("Disk image reassembly completed") + + // Optimize sparseness for cached reassembly if on macOS + if FileManager.default.fileExists(atPath: "/bin/cp") { + Logger.info("Optimizing sparse file representation for cached reassembly...") + let optimizedPath = outputURL.path + ".optimized" + + let process = Process() + process.executableURL = URL(fileURLWithPath: "/bin/cp") + process.arguments = ["-c", outputURL.path, optimizedPath] + + do { + try process.run() + process.waitUntilExit() + + if process.terminationStatus == 0 { + // Get size of optimized file + let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 + let originalUsage = getActualDiskUsage(path: outputURL.path) + let optimizedUsage = getActualDiskUsage(path: optimizedPath) + + Logger.info( + "Sparse optimization results for cache: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" + ) + + // Replace the original with the optimized version + try FileManager.default.removeItem(at: outputURL) + try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: outputURL) + Logger.info("Replaced cached reassembly with optimized sparse version") + } else { + Logger.info("Sparse optimization failed for cache, using original file") + try? FileManager.default.removeItem(atPath: optimizedPath) + } + } catch { + Logger.info("Error during sparse optimization for cache: \(error.localizedDescription)") try? FileManager.default.removeItem(atPath: optimizedPath) } - } catch { - Logger.info("Error during sparse optimization for cache: \(error.localizedDescription)") - try? FileManager.default.removeItem(atPath: optimizedPath) } + + // Set permissions to ensure consistency + let chmodProcess = Process() + chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") + chmodProcess.arguments = ["0644", outputURL.path] + try chmodProcess.run() + chmodProcess.waitUntilExit() } } Logger.info("Cache copy complete") } - // Function to simulate cache pull behavior for freshly downloaded images - private func simulateCachePull(tempVMDir: URL) throws { - Logger.info("Simulating cache pull behavior for freshly downloaded image...") - - // 1. Find disk.img in tempVMDir - let diskImgPath = tempVMDir.appendingPathComponent("disk.img") - guard FileManager.default.fileExists(atPath: diskImgPath.path) else { - Logger.info("No disk.img found to simulate cache pull behavior") - return - } - - // 2. Get file attributes and size - let attributes = try FileManager.default.attributesOfItem(atPath: diskImgPath.path) - guard let diskSize = attributes[.size] as? UInt64, diskSize > 0 else { - Logger.error("Could not determine disk.img size for simulation") - return - } - - Logger.info("Creating sparse file with size: \(ByteCountFormatter.string(fromByteCount: Int64(diskSize), countStyle: .file))") - - // 3. Create backup of original file - let backupPath = tempVMDir.appendingPathComponent("disk.img.original") - try FileManager.default.moveItem(at: diskImgPath, to: backupPath) - - // 4. Create empty file and prepare for sparse file creation - guard FileManager.default.createFile(atPath: diskImgPath.path, contents: nil) else { - // If creation fails, restore the original - try? FileManager.default.moveItem(at: backupPath, to: diskImgPath) - throw PullError.fileCreationFailed(diskImgPath.path) - } - - // Run an initial filesystem sync - let initialSyncProcess = Process() - initialSyncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") - try initialSyncProcess.run() - initialSyncProcess.waitUntilExit() - - // IMPORTANT: Use autoreleasepool to ensure file handle is released promptly - try autoreleasepool { - // 5. Set up file handle and create sparse file - let outputHandle = try FileHandle(forWritingTo: diskImgPath) - try outputHandle.truncate(atOffset: diskSize) - - // 6. Write test patterns at beginning and end - Logger.info("Writing test patterns to verify writability...") - let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! - try outputHandle.seek(toOffset: 0) - try outputHandle.write(contentsOf: testPattern) - try outputHandle.seek(toOffset: diskSize - UInt64(testPattern.count)) - try outputHandle.write(contentsOf: testPattern) - - // Make sure test patterns are synced to disk first - try outputHandle.synchronize() - - // 7. Decompress the original data at offset 0 - Logger.info("Decompressing original disk image with same mechanism as cache pull...") - let bytesWritten = try decompressChunkAndWriteSparse( - inputPath: backupPath.path, - outputHandle: outputHandle, - startOffset: 0 - ) - - Logger.info("Decompressed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) of disk image data") - - // 8. Ensure all data is written to disk with multiple explicit syncs - try outputHandle.synchronize() - - // Force an fsync using lower-level API for the file descriptor - let fd = outputHandle.fileDescriptor - if fd >= 0 { - fsync(fd) - Logger.info("Performed low-level fsync on file descriptor") - } - - // Very important: explicitly close the handle here inside the autorelease pool - try outputHandle.close() - Logger.info("File handle explicitly closed after decompression and synchronization") - } - - // Perform an explicit filesystem sync after closing the file handle - let postCloseSyncProcess = Process() - postCloseSyncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") - try postCloseSyncProcess.run() - postCloseSyncProcess.waitUntilExit() - - // Wait longer to ensure all filesystem operations are complete - Logger.info("Waiting for filesystem operations to complete...") - Thread.sleep(forTimeInterval: 1.0) - - // 9. Optimize sparse file with cp -c (exactly matching cache pull process) - if FileManager.default.fileExists(atPath: "/bin/cp") { - Logger.info("Optimizing sparse file representation...") - let optimizedPath = diskImgPath.path + ".optimized" - - // Run a sync before optimization - let syncBeforeOptimize = Process() - syncBeforeOptimize.executableURL = URL(fileURLWithPath: "/bin/sync") - try syncBeforeOptimize.run() - syncBeforeOptimize.waitUntilExit() - - let process = Process() - process.executableURL = URL(fileURLWithPath: "/bin/cp") - process.arguments = ["-c", diskImgPath.path, optimizedPath] - - do { - try process.run() - process.waitUntilExit() - - if process.terminationStatus == 0 { - let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 - let originalUsage = getActualDiskUsage(path: diskImgPath.path) - let optimizedUsage = getActualDiskUsage(path: optimizedPath) - - Logger.info( - "Sparse optimization results: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" - ) - - // Before replacing the file, make sure to synchronize the filesystem - let syncBeforeReplace = Process() - syncBeforeReplace.executableURL = URL(fileURLWithPath: "/bin/sync") - try syncBeforeReplace.run() - syncBeforeReplace.waitUntilExit() - - // Now replace the original with the optimized version - try FileManager.default.removeItem(at: diskImgPath) - try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: diskImgPath) - Logger.info("Replaced with optimized sparse version") - - // Additional sync after replacement - let syncAfterReplace = Process() - syncAfterReplace.executableURL = URL(fileURLWithPath: "/bin/sync") - try syncAfterReplace.run() - syncAfterReplace.waitUntilExit() - } else { - Logger.info("Sparse optimization failed, using original file") - try? FileManager.default.removeItem(atPath: optimizedPath) - } - } catch { - Logger.info("Error during sparse optimization: \(error.localizedDescription)") - try? FileManager.default.removeItem(atPath: optimizedPath) - } - } - - // 10. Explicitly set permissions to match cache hit (0644) - let chmodProcess = Process() - chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") - chmodProcess.arguments = ["0644", diskImgPath.path] - try chmodProcess.run() - chmodProcess.waitUntilExit() - - // 11. Final sync to ensure all data is flushed to disk - let syncProcess = Process() - syncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") - try syncProcess.run() - syncProcess.waitUntilExit() - - // One more filesystem sync for good measure - let finalSyncProcess = Process() - finalSyncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") - try finalSyncProcess.run() - finalSyncProcess.waitUntilExit() - - // Wait a moment for final filesystem operations - Thread.sleep(forTimeInterval: 0.5) - - // 12. Clean up the backup file - try FileManager.default.removeItem(at: backupPath) - - Logger.info("Cache pull simulation completed successfully") - } - private func getToken(repository: String) async throws -> String { let encodedRepo = repository.addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed) ?? repository // Request both pull and push scope for uploads From eca23e0333b9a5ef60c6d58834401bb8ccc877f5 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 18:48:07 -0700 Subject: [PATCH 30/43] Handle disk partition --- .../ImageContainerRegistry.swift | 126 ++++++++++++++++-- 1 file changed, 118 insertions(+), 8 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index d075f445..0cd61573 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1350,22 +1350,132 @@ class ImageContainerRegistry: @unchecked Sendable { return } - Logger.info("Creating sparse file with size: \(ByteCountFormatter.string(fromByteCount: Int64(diskSize), countStyle: .file))") + Logger.info("Creating disk image clone with partition table preserved...") // Create backup of original file let backupPath = tempVMDir.appendingPathComponent("disk.img.original") try FileManager.default.moveItem(at: diskImgPath, to: backupPath) - // Use shared function to create the disk image - try createDiskImageFromSource( - sourceURL: backupPath, - destinationURL: diskImgPath, - diskSize: diskSize - ) + // We'll use macOS's built-in disk cloning capabilities to preserve partition information + // First, create an empty sparse file with the target size + guard FileManager.default.createFile(atPath: diskImgPath.path, contents: nil) else { + // If creation fails, restore the original + try? FileManager.default.moveItem(at: backupPath, to: diskImgPath) + throw PullError.fileCreationFailed(diskImgPath.path) + } - // Clean up backup + // Use dd to clone the disk with partition table preserved + Logger.info("Cloning disk with partition table using dd...") + let ddProcess = Process() + ddProcess.executableURL = URL(fileURLWithPath: "/bin/dd") + ddProcess.arguments = [ + "if=\(backupPath.path)", + "of=\(diskImgPath.path)", + "bs=4m", // Use a large block size for efficiency + "conv=sparse" // Ensure sparse file creation + ] + + // Capture and log output/errors + let outputPipe = Pipe() + let errorPipe = Pipe() + ddProcess.standardOutput = outputPipe + ddProcess.standardError = errorPipe + + try ddProcess.run() + ddProcess.waitUntilExit() + + // Log command output/errors + let outputData = outputPipe.fileHandleForReading.readDataToEndOfFile() + let errorData = errorPipe.fileHandleForReading.readDataToEndOfFile() + + if let errorOutput = String(data: errorData, encoding: .utf8), !errorOutput.isEmpty { + Logger.info("dd command output: \(errorOutput)") + } + + if ddProcess.terminationStatus != 0 { + Logger.error("dd command failed with status \(ddProcess.terminationStatus)") + // If dd fails, try to restore the original + if FileManager.default.fileExists(atPath: diskImgPath.path) { + try? FileManager.default.removeItem(at: diskImgPath) + } + try? FileManager.default.moveItem(at: backupPath, to: diskImgPath) + throw PullError.fileCreationFailed("dd command failed") + } + + // Sync filesystem to ensure all changes are written + let syncProcess = Process() + syncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") + try syncProcess.run() + syncProcess.waitUntilExit() + + // Optimize with cp -c to ensure best sparse file representation + if FileManager.default.fileExists(atPath: "/bin/cp") { + Logger.info("Optimizing sparse file representation...") + let optimizedPath = diskImgPath.path + ".optimized" + + let process = Process() + process.executableURL = URL(fileURLWithPath: "/bin/cp") + process.arguments = ["-c", diskImgPath.path, optimizedPath] + + try process.run() + process.waitUntilExit() + + if process.terminationStatus == 0 { + let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 + let originalUsage = getActualDiskUsage(path: diskImgPath.path) + let optimizedUsage = getActualDiskUsage(path: optimizedPath) + + Logger.info( + "Sparse optimization results: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" + ) + + // Replace with optimized version + try FileManager.default.removeItem(at: diskImgPath) + try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: diskImgPath) + Logger.info("Replaced with optimized sparse version") + } else { + Logger.info("Sparse optimization failed, using original file") + try? FileManager.default.removeItem(atPath: optimizedPath) + } + } + + // Set permissions to 0644 + let chmodProcess = Process() + chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") + chmodProcess.arguments = ["0644", diskImgPath.path] + try chmodProcess.run() + chmodProcess.waitUntilExit() + + // Final sync + let finalSyncProcess = Process() + finalSyncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") + try finalSyncProcess.run() + finalSyncProcess.waitUntilExit() + + // Clean up backup file try FileManager.default.removeItem(at: backupPath) + Logger.info("Verifying final disk image partition information...") + // Use hdiutil to verify partition information (output only for debugging) + let verifyProcess = Process() + verifyProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") + verifyProcess.arguments = ["imageinfo", diskImgPath.path] + + let verifyOutputPipe = Pipe() + verifyProcess.standardOutput = verifyOutputPipe + + try verifyProcess.run() + verifyProcess.waitUntilExit() + + let verifyOutputData = verifyOutputPipe.fileHandleForReading.readDataToEndOfFile() + if let verifyOutput = String(data: verifyOutputData, encoding: .utf8), verifyProcess.terminationStatus == 0 { + // Extract just the partition scheme information for logging + if let partitionSchemeRange = verifyOutput.range(of: "partition-scheme: .*", options: .regularExpression) { + let partitionScheme = verifyOutput[partitionSchemeRange] + Logger.info("Disk image partition scheme: \(partitionScheme)") + } + } + Logger.info("Cache pull simulation completed successfully") } From 6427ae676d02c9b0542563179fdc764340042c55 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 19:25:22 -0700 Subject: [PATCH 31/43] Handle disk partition --- .../ImageContainerRegistry.swift | 253 ++++++++++++++---- 1 file changed, 196 insertions(+), 57 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 0cd61573..9ae6f65f 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1350,65 +1350,209 @@ class ImageContainerRegistry: @unchecked Sendable { return } - Logger.info("Creating disk image clone with partition table preserved...") + Logger.info("Creating true disk image clone with partition table preserved...") // Create backup of original file let backupPath = tempVMDir.appendingPathComponent("disk.img.original") try FileManager.default.moveItem(at: diskImgPath, to: backupPath) - // We'll use macOS's built-in disk cloning capabilities to preserve partition information - // First, create an empty sparse file with the target size - guard FileManager.default.createFile(atPath: diskImgPath.path, contents: nil) else { - // If creation fails, restore the original - try? FileManager.default.moveItem(at: backupPath, to: diskImgPath) - throw PullError.fileCreationFailed(diskImgPath.path) - } + // Let's first check if the original image has a partition table + Logger.info("Checking if source image has a partition table...") + let checkProcess = Process() + checkProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") + checkProcess.arguments = ["imageinfo", backupPath.path] - // Use dd to clone the disk with partition table preserved - Logger.info("Cloning disk with partition table using dd...") - let ddProcess = Process() - ddProcess.executableURL = URL(fileURLWithPath: "/bin/dd") - ddProcess.arguments = [ - "if=\(backupPath.path)", - "of=\(diskImgPath.path)", - "bs=4m", // Use a large block size for efficiency - "conv=sparse" // Ensure sparse file creation - ] + let checkPipe = Pipe() + checkProcess.standardOutput = checkPipe - // Capture and log output/errors - let outputPipe = Pipe() - let errorPipe = Pipe() - ddProcess.standardOutput = outputPipe - ddProcess.standardError = errorPipe + try checkProcess.run() + checkProcess.waitUntilExit() - try ddProcess.run() - ddProcess.waitUntilExit() + let checkData = checkPipe.fileHandleForReading.readDataToEndOfFile() + let checkOutput = String(data: checkData, encoding: .utf8) ?? "" + Logger.info("Source image info: \(checkOutput)") - // Log command output/errors - let outputData = outputPipe.fileHandleForReading.readDataToEndOfFile() - let errorData = errorPipe.fileHandleForReading.readDataToEndOfFile() + // Try different methods in sequence until one works + var success = false - if let errorOutput = String(data: errorData, encoding: .utf8), !errorOutput.isEmpty { - Logger.info("dd command output: \(errorOutput)") - } - - if ddProcess.terminationStatus != 0 { - Logger.error("dd command failed with status \(ddProcess.terminationStatus)") - // If dd fails, try to restore the original - if FileManager.default.fileExists(atPath: diskImgPath.path) { - try? FileManager.default.removeItem(at: diskImgPath) + // Method 1: Use hdiutil convert to convert the image while preserving all data + if !success { + Logger.info("Trying hdiutil convert...") + let tempPath = tempVMDir.appendingPathComponent("disk.img.temp") + + let convertProcess = Process() + convertProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") + convertProcess.arguments = [ + "convert", + backupPath.path, + "-format", "UDRO", // Read-only first to preserve partition table + "-o", tempPath.path + ] + + let convertOutPipe = Pipe() + let convertErrPipe = Pipe() + convertProcess.standardOutput = convertOutPipe + convertProcess.standardError = convertErrPipe + + do { + try convertProcess.run() + convertProcess.waitUntilExit() + + let errData = convertErrPipe.fileHandleForReading.readDataToEndOfFile() + let errOutput = String(data: errData, encoding: .utf8) ?? "" + + if convertProcess.terminationStatus == 0 { + Logger.info("hdiutil convert succeeded. Converting to writable format...") + // Now convert to writable format + let convertBackProcess = Process() + convertBackProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") + convertBackProcess.arguments = [ + "convert", + tempPath.path, + "-format", "UDRW", // Read-write format + "-o", diskImgPath.path + ] + + try convertBackProcess.run() + convertBackProcess.waitUntilExit() + + if convertBackProcess.terminationStatus == 0 { + Logger.info("Successfully converted to writable format with partition table") + success = true + } else { + Logger.error("hdiutil convert to writable format failed") + } + + // Clean up temporary image + try? FileManager.default.removeItem(at: tempPath) + } else { + Logger.error("hdiutil convert failed: \(errOutput)") + } + } catch { + Logger.error("Error executing hdiutil convert: \(error)") } - try? FileManager.default.moveItem(at: backupPath, to: diskImgPath) - throw PullError.fileCreationFailed("dd command failed") } - // Sync filesystem to ensure all changes are written - let syncProcess = Process() - syncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") - try syncProcess.run() - syncProcess.waitUntilExit() + // Method 2: Try direct raw copy method + if !success { + Logger.info("Trying direct raw copy with dd...") + + // Create empty file first + FileManager.default.createFile(atPath: diskImgPath.path, contents: nil) + + let ddProcess = Process() + ddProcess.executableURL = URL(fileURLWithPath: "/bin/dd") + ddProcess.arguments = [ + "if=\(backupPath.path)", + "of=\(diskImgPath.path)", + "bs=1m", // Large block size + "count=81920" // Ensure we copy everything (80GB+ should be sufficient) + ] + + let ddErrPipe = Pipe() + ddProcess.standardError = ddErrPipe + + do { + try ddProcess.run() + ddProcess.waitUntilExit() + + let errData = ddErrPipe.fileHandleForReading.readDataToEndOfFile() + let errOutput = String(data: errData, encoding: .utf8) ?? "" + + if ddProcess.terminationStatus == 0 { + Logger.info("Raw dd copy completed: \(errOutput)") + success = true + } else { + Logger.error("Raw dd copy failed: \(errOutput)") + } + } catch { + Logger.error("Error executing dd: \(error)") + } + } - // Optimize with cp -c to ensure best sparse file representation + // Method 3: Use a more complex approach with disk mounting + if !success { + Logger.info("Trying advanced disk attach/detach approach...") + + // Mount the source disk image + let attachProcess = Process() + attachProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") + attachProcess.arguments = ["attach", backupPath.path, "-nomount"] + + let attachPipe = Pipe() + attachProcess.standardOutput = attachPipe + + try attachProcess.run() + attachProcess.waitUntilExit() + + let attachData = attachPipe.fileHandleForReading.readDataToEndOfFile() + let attachOutput = String(data: attachData, encoding: .utf8) ?? "" + + // Extract the disk device from output (/dev/diskN) + var diskDevice: String? = nil + if let diskMatch = attachOutput.range(of: "/dev/disk[0-9]+", options: .regularExpression) { + diskDevice = String(attachOutput[diskMatch]) + } + + if let device = diskDevice { + Logger.info("Source disk attached at \(device)") + + // Create a bootable disk image clone + let createProcess = Process() + createProcess.executableURL = URL(fileURLWithPath: "/usr/sbin/asr") + createProcess.arguments = [ + "restore", + "--source", device, + "--target", diskImgPath.path, + "--erase", + "--noprompt" + ] + + let createPipe = Pipe() + createProcess.standardOutput = createPipe + + do { + try createProcess.run() + createProcess.waitUntilExit() + + let createOutput = String(data: createPipe.fileHandleForReading.readDataToEndOfFile(), encoding: .utf8) ?? "" + Logger.info("asr output: \(createOutput)") + + if createProcess.terminationStatus == 0 { + Logger.info("Successfully created bootable disk image clone!") + success = true + } else { + Logger.error("Failed to create bootable disk image clone") + } + } catch { + Logger.error("Error executing asr: \(error)") + } + + // Always detach the disk when done + let detachProcess = Process() + detachProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") + detachProcess.arguments = ["detach", device] + try? detachProcess.run() + detachProcess.waitUntilExit() + } else { + Logger.error("Failed to extract disk device from hdiutil attach output") + } + } + + // Fallback: If none of the methods worked, revert to our previous method just to ensure we have a usable image + if !success { + Logger.info("All specialized methods failed. Reverting to basic copy...") + + // If the disk image file exists (from a failed attempt), remove it + if FileManager.default.fileExists(atPath: diskImgPath.path) { + try FileManager.default.removeItem(at: diskImgPath) + } + + // Attempt a basic file copy which will at least give us something to work with + try FileManager.default.copyItem(at: backupPath, to: diskImgPath) + } + + // Optimize sparseness if possible if FileManager.default.fileExists(atPath: "/bin/cp") { Logger.info("Optimizing sparse file representation...") let optimizedPath = diskImgPath.path + ".optimized" @@ -1452,11 +1596,8 @@ class ImageContainerRegistry: @unchecked Sendable { try finalSyncProcess.run() finalSyncProcess.waitUntilExit() - // Clean up backup file - try FileManager.default.removeItem(at: backupPath) - + // Verify the final disk image Logger.info("Verifying final disk image partition information...") - // Use hdiutil to verify partition information (output only for debugging) let verifyProcess = Process() verifyProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") verifyProcess.arguments = ["imageinfo", diskImgPath.path] @@ -1468,15 +1609,13 @@ class ImageContainerRegistry: @unchecked Sendable { verifyProcess.waitUntilExit() let verifyOutputData = verifyOutputPipe.fileHandleForReading.readDataToEndOfFile() - if let verifyOutput = String(data: verifyOutputData, encoding: .utf8), verifyProcess.terminationStatus == 0 { - // Extract just the partition scheme information for logging - if let partitionSchemeRange = verifyOutput.range(of: "partition-scheme: .*", options: .regularExpression) { - let partitionScheme = verifyOutput[partitionSchemeRange] - Logger.info("Disk image partition scheme: \(partitionScheme)") - } - } + let verifyOutput = String(data: verifyOutputData, encoding: .utf8) ?? "" + Logger.info("Final disk image verification:\n\(verifyOutput)") - Logger.info("Cache pull simulation completed successfully") + // Clean up backup file + try FileManager.default.removeItem(at: backupPath) + + Logger.info("Cache pull simulation completed successfully with partition table preservation") } private func copyFromCache(manifest: Manifest, manifestId: String, to destination: URL) From 5b0ff2f2187870da5fbe5e2a923e5ba1a121dfb1 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 19:52:28 -0700 Subject: [PATCH 32/43] Fix first pull --- .../ImageContainerRegistry.swift | 1505 ++++++++--------- 1 file changed, 752 insertions(+), 753 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 9ae6f65f..ddf5ea1f 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1,9 +1,9 @@ import ArgumentParser +import CommonCrypto +import Compression // Add this import import Darwin import Foundation import Swift -import CommonCrypto -import Compression // Add this import // Extension to calculate SHA256 hash extension Data { @@ -25,14 +25,14 @@ enum PushError: Error { case authenticationFailed case missingToken case invalidURL - case lz4NotFound // Added error case - case invalidMediaType // Added during part refactoring - case missingUncompressedSizeAnnotation // Added for sparse file handling - case fileCreationFailed(String) // Added for sparse file handling - case reassemblySetupFailed(path: String, underlyingError: Error?) // Added for sparse file handling - case missingPart(Int) // Added for sparse file handling - case layerDownloadFailed(String) // Added for download retries - case manifestFetchFailed // Added for manifest fetching + case lz4NotFound // Added error case + case invalidMediaType // Added during part refactoring + case missingUncompressedSizeAnnotation // Added for sparse file handling + case fileCreationFailed(String) // Added for sparse file handling + case reassemblySetupFailed(path: String, underlyingError: Error?) // Added for sparse file handling + case missingPart(Int) // Added for sparse file handling + case layerDownloadFailed(String) // Added for download retries + case manifestFetchFailed // Added for manifest fetching } // Define a specific error type for when no underlying error exists @@ -54,8 +54,11 @@ struct OCIManifestLayer { let digest: String let uncompressedSize: UInt64? let uncompressedContentDigest: String? - - init(mediaType: String, size: Int, digest: String, uncompressedSize: UInt64? = nil, uncompressedContentDigest: String? = nil) { + + init( + mediaType: String, size: Int, digest: String, uncompressedSize: UInt64? = nil, + uncompressedContentDigest: String? = nil + ) { self.mediaType = mediaType self.size = size self.digest = digest @@ -119,21 +122,21 @@ actor DiskPartsCollector { // Store tuples of (sequentialPartNum, url) private var diskParts: [(Int, URL)] = [] // Restore internal counter - private var partCounter = 0 + private var partCounter = 0 // Adds a part and returns its assigned sequential number func addPart(url: URL) -> Int { - partCounter += 1 // Use counter logic - let partNum = partCounter - diskParts.append((partNum, url)) // Store sequential number - return partNum // Return assigned sequential number + partCounter += 1 // Use counter logic + let partNum = partCounter + diskParts.append((partNum, url)) // Store sequential number + return partNum // Return assigned sequential number } // Sort by the sequential part number (index 0 of tuple) func getSortedParts() -> [(Int, URL)] { return diskParts.sorted { $0.0 < $1.0 } } - + // Restore getTotalParts func getTotalParts() -> Int { return partCounter @@ -363,7 +366,7 @@ struct DownloadStats { // Renamed struct struct UploadStats { let totalBytes: Int64 - let uploadedBytes: Int64 // Renamed + let uploadedBytes: Int64 // Renamed let elapsedTime: TimeInterval let averageSpeed: Double let peakSpeed: Double @@ -391,9 +394,13 @@ struct UploadStats { let hours = Int(seconds) / 3600 let minutes = (Int(seconds) % 3600) / 60 let secs = Int(seconds) % 60 - if hours > 0 { return String(format: "%d hours, %d minutes, %d seconds", hours, minutes, secs) } - else if minutes > 0 { return String(format: "%d minutes, %d seconds", minutes, secs) } - else { return String(format: "%d seconds", secs) } + if hours > 0 { + return String(format: "%d hours, %d minutes, %d seconds", hours, minutes, secs) + } else if minutes > 0 { + return String(format: "%d minutes, %d seconds", minutes, secs) + } else { + return String(format: "%d seconds", secs) + } } } @@ -408,15 +415,15 @@ actor TaskCounter { class ImageContainerRegistry: @unchecked Sendable { private let registry: String private let organization: String - private let downloadProgress = ProgressTracker() // Renamed for clarity - private let uploadProgress = UploadProgressTracker() // Added upload tracker + private let downloadProgress = ProgressTracker() // Renamed for clarity + private let uploadProgress = UploadProgressTracker() // Added upload tracker private let cacheDirectory: URL private let downloadLock = NSLock() private var activeDownloads: [String] = [] private let cachingEnabled: Bool // Constants for zero-skipping write logic - private static let holeGranularityBytes = 4 * 1024 * 1024 // 4MB block size for checking zeros + private static let holeGranularityBytes = 4 * 1024 * 1024 // 4MB block size for checking zeros private static let zeroChunk = Data(count: holeGranularityBytes) // Add the createProgressBar function here as a private method @@ -768,9 +775,7 @@ class ImageContainerRegistry: @unchecked Sendable { ) let counter = TaskCounter() - // Remove totalDiskParts - // var totalDiskParts: Int? = nil - var lz4LayerCount = 0 // Count lz4 layers found + var lz4LayerCount = 0 // Count lz4 layers found try await withThrowingTaskGroup(of: Int64.self) { group in for layer in manifest.layers { @@ -785,45 +790,57 @@ class ImageContainerRegistry: @unchecked Sendable { // Identify disk parts by media type if layer.mediaType == "application/octet-stream+lz4" { - // --- Handle LZ4 Disk Part Layer --- - lz4LayerCount += 1 // Increment count - let currentPartNum = lz4LayerCount // Use the current count as the logical number for logging - + // --- Handle LZ4 Disk Part Layer --- + lz4LayerCount += 1 // Increment count + let currentPartNum = lz4LayerCount // Use the current count as the logical number for logging + let cachedLayer = getCachedLayerPath( manifestId: manifestId, digest: layer.digest) let digest = layer.digest let size = layer.size - if memoryConstrained && FileManager.default.fileExists(atPath: cachedLayer.path) { + if memoryConstrained + && FileManager.default.fileExists(atPath: cachedLayer.path) + { // Add to collector, get sequential number assigned by collector - let collectorPartNum = await diskPartsCollector.addPart(url: cachedLayer) + let collectorPartNum = await diskPartsCollector.addPart( + url: cachedLayer) // Log using the sequential number from collector for clarity if needed, or the lz4LayerCount - Logger.info("Using cached lz4 layer (part \(currentPartNum)) directly: \(cachedLayer.lastPathComponent) -> Collector #\(collectorPartNum)") + Logger.info( + "Using cached lz4 layer (part \(currentPartNum)) directly: \(cachedLayer.lastPathComponent) -> Collector #\(collectorPartNum)" + ) await downloadProgress.addProgress(Int64(size)) - continue + continue } else { // Download/Copy Path (Task Group) group.addTask { [self] in await counter.increment() let finalPath: URL if FileManager.default.fileExists(atPath: cachedLayer.path) { - let tempPartURL = tempDownloadDir.appendingPathComponent("disk.img.part.\(UUID().uuidString)") - try FileManager.default.copyItem(at: cachedLayer, to: tempPartURL) + let tempPartURL = tempDownloadDir.appendingPathComponent( + "disk.img.part.\(UUID().uuidString)") + try FileManager.default.copyItem( + at: cachedLayer, to: tempPartURL) await downloadProgress.addProgress(Int64(size)) finalPath = tempPartURL } else { - let tempPartURL = tempDownloadDir.appendingPathComponent("disk.img.part.\(UUID().uuidString)") + let tempPartURL = tempDownloadDir.appendingPathComponent( + "disk.img.part.\(UUID().uuidString)") if isDownloading(digest) { - try await waitForExistingDownload(digest, cachedLayer: cachedLayer) - if FileManager.default.fileExists(atPath: cachedLayer.path) { - try FileManager.default.copyItem(at: cachedLayer, to: tempPartURL) + try await waitForExistingDownload( + digest, cachedLayer: cachedLayer) + if FileManager.default.fileExists(atPath: cachedLayer.path) + { + try FileManager.default.copyItem( + at: cachedLayer, to: tempPartURL) await downloadProgress.addProgress(Int64(size)) finalPath = tempPartURL } else { markDownloadStarted(digest) try await self.downloadLayer( repository: "\(self.organization)/\(imageName)", - digest: digest, mediaType: layer.mediaType, token: token, + digest: digest, mediaType: layer.mediaType, + token: token, to: tempPartURL, maxRetries: 5, progress: downloadProgress, manifestId: manifestId ) @@ -833,7 +850,8 @@ class ImageContainerRegistry: @unchecked Sendable { markDownloadStarted(digest) try await self.downloadLayer( repository: "\(self.organization)/\(imageName)", - digest: digest, mediaType: layer.mediaType, token: token, + digest: digest, mediaType: layer.mediaType, + token: token, to: tempPartURL, maxRetries: 5, progress: downloadProgress, manifestId: manifestId ) @@ -841,15 +859,18 @@ class ImageContainerRegistry: @unchecked Sendable { } } // Add to collector, get sequential number assigned by collector - let collectorPartNum = await diskPartsCollector.addPart(url: finalPath) + let collectorPartNum = await diskPartsCollector.addPart( + url: finalPath) // Log using the sequential number from collector - Logger.info("Assigned path for lz4 layer (part \(currentPartNum)): \(finalPath.lastPathComponent) -> Collector #\(collectorPartNum)") + Logger.info( + "Assigned path for lz4 layer (part \(currentPartNum)): \(finalPath.lastPathComponent) -> Collector #\(collectorPartNum)" + ) await counter.decrement() return Int64(size) } } } else { - // --- Handle Non-Disk-Part Layer --- + // --- Handle Non-Disk-Part Layer --- let mediaType = layer.mediaType let digest = layer.digest let size = layer.size @@ -858,39 +879,42 @@ class ImageContainerRegistry: @unchecked Sendable { let outputURL: URL switch mediaType { case "application/vnd.oci.image.layer.v1.tar", - "application/octet-stream+gzip": // Might be compressed disk.img single file? - outputURL = tempDownloadDir.appendingPathComponent("disk.img") + "application/octet-stream+gzip": // Might be compressed disk.img single file? + outputURL = tempDownloadDir.appendingPathComponent("disk.img") case "application/vnd.oci.image.config.v1+json": outputURL = tempDownloadDir.appendingPathComponent("config.json") - case "application/octet-stream": // Could be nvram or uncompressed single disk.img - // Heuristic: If a config.json already exists or is expected, assume this is nvram. - // This might need refinement if single disk images use octet-stream. - if manifest.config != nil { + case "application/octet-stream": // Could be nvram or uncompressed single disk.img + // Heuristic: If a config.json already exists or is expected, assume this is nvram. + // This might need refinement if single disk images use octet-stream. + if manifest.config != nil { outputURL = tempDownloadDir.appendingPathComponent("nvram.bin") - } else { + } else { // Assume it's a single-file disk image if no config layer is present outputURL = tempDownloadDir.appendingPathComponent("disk.img") - } + } default: - Logger.info("Skipping unsupported layer media type: \(mediaType)") - continue // Skip to the next layer + Logger.info("Skipping unsupported layer media type: \(mediaType)") + continue // Skip to the next layer } // Add task to download/copy the non-disk-part layer group.addTask { [self] in await counter.increment() - let cachedLayer = getCachedLayerPath(manifestId: manifestId, digest: digest) + let cachedLayer = getCachedLayerPath( + manifestId: manifestId, digest: digest) if FileManager.default.fileExists(atPath: cachedLayer.path) { try FileManager.default.copyItem(at: cachedLayer, to: outputURL) await downloadProgress.addProgress(Int64(size)) } else { if isDownloading(digest) { - try await waitForExistingDownload(digest, cachedLayer: cachedLayer) + try await waitForExistingDownload( + digest, cachedLayer: cachedLayer) if FileManager.default.fileExists(atPath: cachedLayer.path) { - try FileManager.default.copyItem(at: cachedLayer, to: outputURL) + try FileManager.default.copyItem( + at: cachedLayer, to: outputURL) await downloadProgress.addProgress(Int64(size)) - await counter.decrement() // Decrement before returning + await counter.decrement() // Decrement before returning return Int64(size) } } @@ -908,304 +932,62 @@ class ImageContainerRegistry: @unchecked Sendable { return Int64(size) } } - } // End for layer in manifest.layers + } // End for layer in manifest.layers // Wait for remaining tasks for try await _ in group {} - } // End TaskGroup - - // --- Safely retrieve parts AFTER TaskGroup --- - let diskParts = await diskPartsCollector.getSortedParts() // Already sorted by logicalPartNum - // Check if totalDiskParts was set (meaning at least one lz4 layer was processed) - // Get total parts from the collector - let totalPartsFromCollector = await diskPartsCollector.getTotalParts() - // Change guard to if for logging only, as the later if condition handles the logic - if totalPartsFromCollector == 0 { - // If totalParts is 0, it means no layers matched the lz4 format. - Logger.info("No lz4 disk part layers found. Assuming single-part image or non-lz4 parts.") - // Reassembly logic below will be skipped if diskParts is empty. - // Explicitly set totalParts to 0 to prevent entering the reassembly block if diskParts might somehow be non-empty but totalParts was 0 - // This ensures consistency if the collector logic changes. - } - Logger.info("Finished processing layers. Found \(diskParts.count) disk parts to reassemble (Total Lz4 Layers: \(totalPartsFromCollector)).") - // --- End retrieving parts --- - - // Add detailed logging for debugging - Logger.info("Disk part numbers collected and sorted: \(diskParts.map { $0.0 })") - - Logger.info("") // New line after progress + } // End TaskGroup // Display download statistics let stats = await downloadProgress.getDownloadStats() + Logger.info("") // New line after progress Logger.info(stats.formattedSummary()) - // Parse config.json to get uncompressed size *before* reassembly - let configURL = tempDownloadDir.appendingPathComponent("config.json") - let uncompressedSize = getUncompressedSizeFromConfig(configPath: configURL) - - // Now also try to get disk size from VM config if OCI annotation not found - var vmConfigDiskSize: UInt64? = nil - if uncompressedSize == nil && FileManager.default.fileExists(atPath: configURL.path) { - do { - let configData = try Data(contentsOf: configURL) - let decoder = JSONDecoder() - if let vmConfig = try? decoder.decode(VMConfig.self, from: configData) { - vmConfigDiskSize = vmConfig.diskSize - if let size = vmConfigDiskSize { - Logger.info("Found diskSize from VM config.json: \(size) bytes") - } - } - } catch { - Logger.error("Failed to parse VM config.json for diskSize: \(error)") - } - } - - // Force explicit use - if uncompressedSize != nil { - Logger.info( - "Will use uncompressed size from annotation for sparse file: \(uncompressedSize!) bytes" - ) - } else if vmConfigDiskSize != nil { - Logger.info( - "Will use diskSize from VM config for sparse file: \(vmConfigDiskSize!) bytes") - } - - // Handle disk parts if present - if !diskParts.isEmpty && totalPartsFromCollector > 0 { - // Use totalPartsFromCollector here - Logger.info("Reassembling \(totalPartsFromCollector) disk image parts using sparse file technique...") - let outputURL = tempVMDir.appendingPathComponent("disk.img") - - // Wrap setup in do-catch for better error reporting - let outputHandle: FileHandle - do { - // 1. Ensure parent directory exists - try FileManager.default.createDirectory( - at: outputURL.deletingLastPathComponent(), withIntermediateDirectories: true - ) - - // 2. Explicitly create the file first, removing old one if needed - if FileManager.default.fileExists(atPath: outputURL.path) { - try FileManager.default.removeItem(at: outputURL) - } - guard FileManager.default.createFile(atPath: outputURL.path, contents: nil) - else { - throw PullError.fileCreationFailed(outputURL.path) - } - - // 3. Now open the handle for writing - outputHandle = try FileHandle(forWritingTo: outputURL) - - } catch { - // Catch errors during directory/file creation or handle opening - Logger.error( - "Failed during setup for disk image reassembly: \(error.localizedDescription)", - metadata: ["path": outputURL.path]) - throw PullError.reassemblySetupFailed( - path: outputURL.path, underlyingError: error) - } - - // Calculate expected size from the manifest layers (sum of compressed parts - for logging only now) - // Filter based on the correct media type now - let expectedCompressedTotalSize = UInt64( - manifest.layers.filter { $0.mediaType == "application/octet-stream+lz4" }.reduce(0) - { $0 + $1.size } - ) - Logger.info( - "Total compressed parts size: \(ByteCountFormatter.string(fromByteCount: Int64(expectedCompressedTotalSize), countStyle: .file))" - ) - - // Calculate fallback size (sum of compressed parts) - let _: UInt64 = diskParts.reduce(UInt64(0)) { - (acc: UInt64, element) -> UInt64 in - let fileSize = - (try? FileManager.default.attributesOfItem(atPath: element.1.path)[.size] - as? UInt64 ?? 0) ?? 0 - return acc + fileSize - } - - // Use: annotation size > VM config diskSize > fallback size - let sizeForTruncate: UInt64 - if let size = uncompressedSize { - Logger.info("Using uncompressed size from annotation: \(size) bytes") - sizeForTruncate = size - } else if let size = vmConfigDiskSize { - Logger.info("Using diskSize from VM config: \(size) bytes") - sizeForTruncate = size - } else { - Logger.error( - "Missing both uncompressed size annotation and VM config diskSize for multi-part image." - ) - throw PullError.missingUncompressedSizeAnnotation - } - - defer { try? outputHandle.close() } - - // Set the file size without writing data (creates a sparse file) - try outputHandle.truncate(atOffset: sizeForTruncate) - - // Verify the sparse file was created with the correct size - let initialSize = - (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] - as? UInt64) ?? 0 - Logger.info( - "Sparse file initialized with size: \(ByteCountFormatter.string(fromByteCount: Int64(initialSize), countStyle: .file))" - ) - - // Add a simple test pattern at the beginning and end of the file to verify it's writable - try outputHandle.seek(toOffset: 0) - let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! - try outputHandle.write(contentsOf: testPattern) - - try outputHandle.seek(toOffset: sizeForTruncate - UInt64(testPattern.count)) - try outputHandle.write(contentsOf: testPattern) - try outputHandle.synchronize() - - Logger.info("Test patterns written to sparse file. File is ready for writing.") - - var reassemblyProgressLogger = ProgressLogger(threshold: 0.05) - var currentOffset: UInt64 = 0 // Track position in the final *decompressed* file - - // Iterate using the reliable totalParts count from media type - // Use totalPartsFromCollector for the loop range - for partNum in 1...totalPartsFromCollector { - // Find the part URL from our collected parts using the logical partNum - guard let partInfo = diskParts.first(where: { $0.0 == partNum }) else { - // This error should now be less likely, but good to keep - Logger.error("Missing required part number \(partNum) in collected parts during reassembly.") - // Add current state log on error - Logger.error("Current disk part numbers available: \(diskParts.map { $0.0 })") - throw PullError.missingPart(partNum) - } - let partURL = partInfo.1 // Get the URL from the tuple - - Logger.info( - "Processing part \(partNum) of \(totalPartsFromCollector): \(partURL.lastPathComponent)") - - // Seek to the correct offset in the output sparse file - try outputHandle.seek(toOffset: currentOffset) - - // Check if this chunk might be all zeros (sparse data) by sampling the compressed data - // Skip this check for now as it's an optimization we can add later if needed - let isLikelySparse = false - - // Always attempt decompression using decompressChunkAndWriteSparse for LZ4 parts - if isLikelySparse { - // For sparse chunks, we don't need to write anything - just advance the offset - // We determine the uncompressed size from the chunk metadata or estimation - - // For now, we'll still decompress to ensure correct behavior, and optimize later - Logger.info("Chunk appears to be sparse, but decompressing for reliability") - let decompressedBytesWritten = try decompressChunkAndWriteSparse( - inputPath: partURL.path, - outputHandle: outputHandle, - startOffset: currentOffset - ) - currentOffset += decompressedBytesWritten - } else { - Logger.info("Decompressing part \(partNum)") - let decompressedBytesWritten = try decompressChunkAndWriteSparse( - inputPath: partURL.path, - outputHandle: outputHandle, - startOffset: currentOffset - ) - currentOffset += decompressedBytesWritten - } - - reassemblyProgressLogger.logProgress( - current: Double(currentOffset) / Double(sizeForTruncate), - context: "Reassembling" - ) - - // Ensure data is written before processing next part - try outputHandle.synchronize() - } - - // Finalize progress, close handle (done by defer) - reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") - Logger.info("") // Newline - - // Optimize sparseness after completing reassembly - try outputHandle.close() // Close handle to ensure all data is flushed - - // Verify final size - let finalSize = - (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] - as? UInt64) ?? 0 - Logger.info( - "Final disk image size: \(ByteCountFormatter.string(fromByteCount: Int64(finalSize), countStyle: .file))" - ) - - // Optimize sparseness if on macOS - if FileManager.default.fileExists(atPath: "/bin/cp") { - Logger.info("Optimizing sparse file representation...") - let optimizedPath = outputURL.path + ".optimized" - - let process = Process() - process.executableURL = URL(fileURLWithPath: "/bin/cp") - process.arguments = ["-c", outputURL.path, optimizedPath] - - do { - try process.run() - process.waitUntilExit() - - if process.terminationStatus == 0 { - // Get size of optimized file - let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 - let originalUsage = getActualDiskUsage(path: outputURL.path) - let optimizedUsage = getActualDiskUsage(path: optimizedPath) - - Logger.info( - "Sparse optimization results: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" - ) - - // Replace the original with the optimized version - try FileManager.default.removeItem(at: outputURL) - try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: outputURL) - Logger.info("Replaced with optimized sparse version") - } else { - Logger.info("Sparse optimization failed, using original file") - try? FileManager.default.removeItem(atPath: optimizedPath) - } - } catch { - Logger.info("Error during sparse optimization: \(error.localizedDescription)") - try? FileManager.default.removeItem(atPath: optimizedPath) - } - } - - if finalSize != sizeForTruncate { - Logger.info( - "Warning: Final reported size (\(finalSize) bytes) differs from expected size (\(sizeForTruncate) bytes), but this doesn't affect functionality" - ) - } - - Logger.info("Disk image reassembly completed") + // Now that we've downloaded everything to the cache, use copyFromCache to create final VM files + if cachingEnabled { + Logger.info("Using copyFromCache method to properly preserve partition tables") + try await copyFromCache(manifest: manifest, manifestId: manifestId, to: tempVMDir) } else { - // Copy single disk image if it exists + // If caching is disabled, just copy files directly to tempVMDir + Logger.info("Caching disabled - copying downloaded files directly to VM directory") + + // Copy non-disk files first + for file in ["config.json", "nvram.bin"] { + let sourceURL = tempDownloadDir.appendingPathComponent(file) + if FileManager.default.fileExists(atPath: sourceURL.path) { + try FileManager.default.copyItem( + at: sourceURL, + to: tempVMDir.appendingPathComponent(file) + ) + } + } + + // For the disk image, we have two cases - either a single file or parts let diskURL = tempDownloadDir.appendingPathComponent("disk.img") if FileManager.default.fileExists(atPath: diskURL.path) { + // Single file disk image try FileManager.default.copyItem( at: diskURL, to: tempVMDir.appendingPathComponent("disk.img") ) + Logger.info("Copied single disk.img file to VM directory") + } else { + // Multiple parts case - use the partitioned disk.img from reassembly + let diskParts = await diskPartsCollector.getSortedParts() + if !diskParts.isEmpty { + Logger.info("Using most recently assembled disk image for VM") + let assembledDiskURL = tempVMDir.appendingPathComponent("disk.img") + if FileManager.default.fileExists(atPath: assembledDiskURL.path) { + Logger.info("Assembled disk.img already exists in VM directory") + } else { + Logger.error( + "Could not find assembled disk image - VM may not boot properly") + } + } else { + Logger.error("No disk image found - VM may not boot properly") + } } } - - // Copy config and nvram files if they exist - for file in ["config.json", "nvram.bin"] { - let sourceURL = tempDownloadDir.appendingPathComponent(file) - if FileManager.default.fileExists(atPath: sourceURL.path) { - try FileManager.default.copyItem( - at: sourceURL, - to: tempVMDir.appendingPathComponent(file) - ) - } - } - } - - // Simulate cache pull behavior if this is a first pull - if !cachingEnabled || !validateCache(manifest: manifest, manifestId: manifestId) { - try simulateCachePull(tempVMDir: tempVMDir) } // Only move to final location once everything is complete @@ -1242,10 +1024,10 @@ class ImageContainerRegistry: @unchecked Sendable { private func createDiskImageFromSource( sourceURL: URL, // Source data to decompress destinationURL: URL, // Where to create the disk image - diskSize: UInt64 // Total size for the sparse file + diskSize: UInt64 // Total size for the sparse file ) throws { Logger.info("Creating sparse disk image...") - + // Create empty destination file if FileManager.default.fileExists(atPath: destinationURL.path) { try FileManager.default.removeItem(at: destinationURL) @@ -1253,11 +1035,11 @@ class ImageContainerRegistry: @unchecked Sendable { guard FileManager.default.createFile(atPath: destinationURL.path, contents: nil) else { throw PullError.fileCreationFailed(destinationURL.path) } - + // Create sparse file let outputHandle = try FileHandle(forWritingTo: destinationURL) try outputHandle.truncate(atOffset: diskSize) - + // Write test patterns at beginning and end Logger.info("Writing test patterns to verify writability...") let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! @@ -1266,7 +1048,7 @@ class ImageContainerRegistry: @unchecked Sendable { try outputHandle.seek(toOffset: diskSize - UInt64(testPattern.count)) try outputHandle.write(contentsOf: testPattern) try outputHandle.synchronize() - + // Decompress the source data at offset 0 Logger.info("Decompressing source data...") let bytesWritten = try decompressChunkAndWriteSparse( @@ -1274,57 +1056,62 @@ class ImageContainerRegistry: @unchecked Sendable { outputHandle: outputHandle, startOffset: 0 ) - Logger.info("Decompressed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) of data") - + Logger.info( + "Decompressed \(ByteCountFormatter.string(fromByteCount: Int64(bytesWritten), countStyle: .file)) of data" + ) + // Ensure data is written and close handle try outputHandle.synchronize() try outputHandle.close() - + // Run sync to flush filesystem let syncProcess = Process() syncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") try syncProcess.run() syncProcess.waitUntilExit() - + // Optimize with cp -c if FileManager.default.fileExists(atPath: "/bin/cp") { Logger.info("Optimizing sparse file representation...") let optimizedPath = destinationURL.path + ".optimized" - + let process = Process() process.executableURL = URL(fileURLWithPath: "/bin/cp") process.arguments = ["-c", destinationURL.path, optimizedPath] - + try process.run() process.waitUntilExit() - + if process.terminationStatus == 0 { // Get optimization results - let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 + let optimizedSize = + (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] + as? UInt64) ?? 0 let originalUsage = getActualDiskUsage(path: destinationURL.path) let optimizedUsage = getActualDiskUsage(path: optimizedPath) - + Logger.info( "Sparse optimization results: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" ) - + // Replace original with optimized try FileManager.default.removeItem(at: destinationURL) - try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: destinationURL) + try FileManager.default.moveItem( + at: URL(fileURLWithPath: optimizedPath), to: destinationURL) Logger.info("Replaced with optimized sparse version") } else { Logger.info("Sparse optimization failed, using original file") try? FileManager.default.removeItem(atPath: optimizedPath) } } - + // Set permissions to 0644 let chmodProcess = Process() chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") chmodProcess.arguments = ["0644", destinationURL.path] try chmodProcess.run() chmodProcess.waitUntilExit() - + // Final sync let finalSyncProcess = Process() finalSyncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") @@ -1335,94 +1122,95 @@ class ImageContainerRegistry: @unchecked Sendable { // Function to simulate cache pull behavior for freshly downloaded images private func simulateCachePull(tempVMDir: URL) throws { Logger.info("Simulating cache pull behavior for freshly downloaded image...") - + // Find disk.img in tempVMDir let diskImgPath = tempVMDir.appendingPathComponent("disk.img") guard FileManager.default.fileExists(atPath: diskImgPath.path) else { Logger.info("No disk.img found to simulate cache pull behavior") return } - + // Get file attributes and size let attributes = try FileManager.default.attributesOfItem(atPath: diskImgPath.path) guard let diskSize = attributes[.size] as? UInt64, diskSize > 0 else { Logger.error("Could not determine disk.img size for simulation") return } - + Logger.info("Creating true disk image clone with partition table preserved...") - + // Create backup of original file let backupPath = tempVMDir.appendingPathComponent("disk.img.original") try FileManager.default.moveItem(at: diskImgPath, to: backupPath) - + // Let's first check if the original image has a partition table Logger.info("Checking if source image has a partition table...") let checkProcess = Process() checkProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") checkProcess.arguments = ["imageinfo", backupPath.path] - + let checkPipe = Pipe() checkProcess.standardOutput = checkPipe - + try checkProcess.run() checkProcess.waitUntilExit() - + let checkData = checkPipe.fileHandleForReading.readDataToEndOfFile() let checkOutput = String(data: checkData, encoding: .utf8) ?? "" Logger.info("Source image info: \(checkOutput)") - + // Try different methods in sequence until one works var success = false - + // Method 1: Use hdiutil convert to convert the image while preserving all data if !success { Logger.info("Trying hdiutil convert...") let tempPath = tempVMDir.appendingPathComponent("disk.img.temp") - + let convertProcess = Process() convertProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") convertProcess.arguments = [ - "convert", - backupPath.path, - "-format", "UDRO", // Read-only first to preserve partition table - "-o", tempPath.path + "convert", + backupPath.path, + "-format", "UDRO", // Read-only first to preserve partition table + "-o", tempPath.path, ] - + let convertOutPipe = Pipe() let convertErrPipe = Pipe() convertProcess.standardOutput = convertOutPipe convertProcess.standardError = convertErrPipe - + do { try convertProcess.run() convertProcess.waitUntilExit() - + let errData = convertErrPipe.fileHandleForReading.readDataToEndOfFile() let errOutput = String(data: errData, encoding: .utf8) ?? "" - + if convertProcess.terminationStatus == 0 { Logger.info("hdiutil convert succeeded. Converting to writable format...") // Now convert to writable format let convertBackProcess = Process() convertBackProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") convertBackProcess.arguments = [ - "convert", - tempPath.path, - "-format", "UDRW", // Read-write format - "-o", diskImgPath.path + "convert", + tempPath.path, + "-format", "UDRW", // Read-write format + "-o", diskImgPath.path, ] - + try convertBackProcess.run() convertBackProcess.waitUntilExit() - + if convertBackProcess.terminationStatus == 0 { - Logger.info("Successfully converted to writable format with partition table") + Logger.info( + "Successfully converted to writable format with partition table") success = true } else { Logger.error("hdiutil convert to writable format failed") } - + // Clean up temporary image try? FileManager.default.removeItem(at: tempPath) } else { @@ -1432,33 +1220,33 @@ class ImageContainerRegistry: @unchecked Sendable { Logger.error("Error executing hdiutil convert: \(error)") } } - + // Method 2: Try direct raw copy method if !success { Logger.info("Trying direct raw copy with dd...") - + // Create empty file first FileManager.default.createFile(atPath: diskImgPath.path, contents: nil) - + let ddProcess = Process() ddProcess.executableURL = URL(fileURLWithPath: "/bin/dd") ddProcess.arguments = [ "if=\(backupPath.path)", "of=\(diskImgPath.path)", - "bs=1m", // Large block size - "count=81920" // Ensure we copy everything (80GB+ should be sufficient) + "bs=1m", // Large block size + "count=81920", // Ensure we copy everything (80GB+ should be sufficient) ] - + let ddErrPipe = Pipe() ddProcess.standardError = ddErrPipe - + do { try ddProcess.run() ddProcess.waitUntilExit() - + let errData = ddErrPipe.fileHandleForReading.readDataToEndOfFile() let errOutput = String(data: errData, encoding: .utf8) ?? "" - + if ddProcess.terminationStatus == 0 { Logger.info("Raw dd copy completed: \(errOutput)") success = true @@ -1469,34 +1257,36 @@ class ImageContainerRegistry: @unchecked Sendable { Logger.error("Error executing dd: \(error)") } } - + // Method 3: Use a more complex approach with disk mounting if !success { Logger.info("Trying advanced disk attach/detach approach...") - + // Mount the source disk image let attachProcess = Process() attachProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") attachProcess.arguments = ["attach", backupPath.path, "-nomount"] - + let attachPipe = Pipe() attachProcess.standardOutput = attachPipe - + try attachProcess.run() attachProcess.waitUntilExit() - + let attachData = attachPipe.fileHandleForReading.readDataToEndOfFile() let attachOutput = String(data: attachData, encoding: .utf8) ?? "" - + // Extract the disk device from output (/dev/diskN) var diskDevice: String? = nil - if let diskMatch = attachOutput.range(of: "/dev/disk[0-9]+", options: .regularExpression) { + if let diskMatch = attachOutput.range( + of: "/dev/disk[0-9]+", options: .regularExpression) + { diskDevice = String(attachOutput[diskMatch]) } - + if let device = diskDevice { Logger.info("Source disk attached at \(device)") - + // Create a bootable disk image clone let createProcess = Process() createProcess.executableURL = URL(fileURLWithPath: "/usr/sbin/asr") @@ -1505,19 +1295,22 @@ class ImageContainerRegistry: @unchecked Sendable { "--source", device, "--target", diskImgPath.path, "--erase", - "--noprompt" + "--noprompt", ] - + let createPipe = Pipe() createProcess.standardOutput = createPipe - + do { try createProcess.run() createProcess.waitUntilExit() - - let createOutput = String(data: createPipe.fileHandleForReading.readDataToEndOfFile(), encoding: .utf8) ?? "" + + let createOutput = + String( + data: createPipe.fileHandleForReading.readDataToEndOfFile(), + encoding: .utf8) ?? "" Logger.info("asr output: \(createOutput)") - + if createProcess.terminationStatus == 0 { Logger.info("Successfully created bootable disk image clone!") success = true @@ -1527,7 +1320,7 @@ class ImageContainerRegistry: @unchecked Sendable { } catch { Logger.error("Error executing asr: \(error)") } - + // Always detach the disk when done let detachProcess = Process() detachProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") @@ -1538,98 +1331,102 @@ class ImageContainerRegistry: @unchecked Sendable { Logger.error("Failed to extract disk device from hdiutil attach output") } } - + // Fallback: If none of the methods worked, revert to our previous method just to ensure we have a usable image if !success { Logger.info("All specialized methods failed. Reverting to basic copy...") - + // If the disk image file exists (from a failed attempt), remove it if FileManager.default.fileExists(atPath: diskImgPath.path) { try FileManager.default.removeItem(at: diskImgPath) } - + // Attempt a basic file copy which will at least give us something to work with try FileManager.default.copyItem(at: backupPath, to: diskImgPath) } - + // Optimize sparseness if possible if FileManager.default.fileExists(atPath: "/bin/cp") { Logger.info("Optimizing sparse file representation...") let optimizedPath = diskImgPath.path + ".optimized" - + let process = Process() process.executableURL = URL(fileURLWithPath: "/bin/cp") process.arguments = ["-c", diskImgPath.path, optimizedPath] - + try process.run() process.waitUntilExit() - + if process.terminationStatus == 0 { - let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 + let optimizedSize = + (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] + as? UInt64) ?? 0 let originalUsage = getActualDiskUsage(path: diskImgPath.path) let optimizedUsage = getActualDiskUsage(path: optimizedPath) - + Logger.info( "Sparse optimization results: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" ) - + // Replace with optimized version try FileManager.default.removeItem(at: diskImgPath) - try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: diskImgPath) + try FileManager.default.moveItem( + at: URL(fileURLWithPath: optimizedPath), to: diskImgPath) Logger.info("Replaced with optimized sparse version") } else { Logger.info("Sparse optimization failed, using original file") try? FileManager.default.removeItem(atPath: optimizedPath) } } - + // Set permissions to 0644 let chmodProcess = Process() chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") chmodProcess.arguments = ["0644", diskImgPath.path] try chmodProcess.run() chmodProcess.waitUntilExit() - + // Final sync let finalSyncProcess = Process() finalSyncProcess.executableURL = URL(fileURLWithPath: "/bin/sync") try finalSyncProcess.run() finalSyncProcess.waitUntilExit() - + // Verify the final disk image Logger.info("Verifying final disk image partition information...") let verifyProcess = Process() verifyProcess.executableURL = URL(fileURLWithPath: "/usr/bin/hdiutil") verifyProcess.arguments = ["imageinfo", diskImgPath.path] - + let verifyOutputPipe = Pipe() verifyProcess.standardOutput = verifyOutputPipe - + try verifyProcess.run() verifyProcess.waitUntilExit() - + let verifyOutputData = verifyOutputPipe.fileHandleForReading.readDataToEndOfFile() let verifyOutput = String(data: verifyOutputData, encoding: .utf8) ?? "" Logger.info("Final disk image verification:\n\(verifyOutput)") - + // Clean up backup file try FileManager.default.removeItem(at: backupPath) - - Logger.info("Cache pull simulation completed successfully with partition table preservation") + + Logger.info( + "Cache pull simulation completed successfully with partition table preservation") } private func copyFromCache(manifest: Manifest, manifestId: String, to destination: URL) async throws { Logger.info("Copying from cache...") - + // Define output URL and expected size variable scope here let outputURL = destination.appendingPathComponent("disk.img") - var expectedTotalSize: UInt64? = nil // Use optional to handle missing config + var expectedTotalSize: UInt64? = nil // Use optional to handle missing config // Instantiate collector let diskPartsCollector = DiskPartsCollector() - var lz4LayerCount = 0 // Count lz4 layers found + var lz4LayerCount = 0 // Count lz4 layers found // First identify disk parts and non-disk files for layer in manifest.layers { @@ -1637,13 +1434,14 @@ class ImageContainerRegistry: @unchecked Sendable { // Identify disk parts simply by media type if layer.mediaType == "application/octet-stream+lz4" { - lz4LayerCount += 1 // Increment count - // Add to collector. It will assign the sequential part number. - let collectorPartNum = await diskPartsCollector.addPart(url: cachedLayer) - Logger.info("Adding cached lz4 layer (part \(lz4LayerCount)) -> Collector #\(collectorPartNum): \(cachedLayer.lastPathComponent)") - } - else { - // --- Handle Non-Disk-Part Layer (from cache) --- + lz4LayerCount += 1 // Increment count + // Add to collector. It will assign the sequential part number. + let collectorPartNum = await diskPartsCollector.addPart(url: cachedLayer) + Logger.info( + "Adding cached lz4 layer (part \(lz4LayerCount)) -> Collector #\(collectorPartNum): \(cachedLayer.lastPathComponent)" + ) + } else { + // --- Handle Non-Disk-Part Layer (from cache) --- let fileName: String switch layer.mediaType { case "application/vnd.oci.image.config.v1+json": @@ -1651,12 +1449,12 @@ class ImageContainerRegistry: @unchecked Sendable { case "application/octet-stream": // Assume nvram if config layer exists, otherwise assume single disk image fileName = manifest.config != nil ? "nvram.bin" : "disk.img" - case "application/vnd.oci.image.layer.v1.tar", - "application/octet-stream+gzip": - // Assume disk image for these types as well if encountered in cache scenario - fileName = "disk.img" + case "application/vnd.oci.image.layer.v1.tar", + "application/octet-stream+gzip": + // Assume disk image for these types as well if encountered in cache scenario + fileName = "disk.img" default: - Logger.info("Skipping unsupported cached layer media type: \(layer.mediaType)") + Logger.info("Skipping unsupported cached layer media type: \(layer.mediaType)") continue } // Copy the non-disk file directly from cache to destination @@ -1667,19 +1465,20 @@ class ImageContainerRegistry: @unchecked Sendable { } } - // --- Safely retrieve parts AFTER loop --- - let diskPartSources = await diskPartsCollector.getSortedParts() // Sorted by assigned sequential number - let totalParts = await diskPartsCollector.getTotalParts() // Get total count from collector + // --- Safely retrieve parts AFTER loop --- + let diskPartSources = await diskPartsCollector.getSortedParts() // Sorted by assigned sequential number + let totalParts = await diskPartsCollector.getTotalParts() // Get total count from collector Logger.info("Found \(totalParts) lz4 disk parts in cache to reassemble.") - // --- End retrieving parts --- + // --- End retrieving parts --- // Reassemble disk parts if needed // Use the count from the collector if !diskPartSources.isEmpty { // Use totalParts from collector directly - Logger.info("Reassembling \(totalParts) disk image parts using sparse file technique...") - + Logger.info( + "Reassembling \(totalParts) disk image parts using sparse file technique...") + // Get uncompressed size from cached config file (needs to be copied first) let configURL = destination.appendingPathComponent("config.json") // Parse config.json to get uncompressed size *before* reassembly @@ -1713,24 +1512,25 @@ class ImageContainerRegistry: @unchecked Sendable { } else { // If neither is found in cache scenario, throw error as we cannot determine the size Logger.error( - "Missing both uncompressed size annotation and VM config diskSize for cached multi-part image." - + " Cannot reassemble." + "Missing both uncompressed size annotation and VM config diskSize for cached multi-part image." + + " Cannot reassemble." ) throw PullError.missingUncompressedSizeAnnotation } // Now that expectedTotalSize is guaranteed to be non-nil, proceed with setup guard let sizeForTruncate = expectedTotalSize else { - // This should not happen due to the checks above, but safety first - let nilError: Error? = nil - // Use nil-coalescing to provide a default error, appeasing the compiler - throw PullError.reassemblySetupFailed(path: outputURL.path, underlyingError: nilError ?? NoSpecificUnderlyingError()) + // This should not happen due to the checks above, but safety first + let nilError: Error? = nil + // Use nil-coalescing to provide a default error, appeasing the compiler + throw PullError.reassemblySetupFailed( + path: outputURL.path, underlyingError: nilError ?? NoSpecificUnderlyingError()) } // If we have just one disk part, use the shared function if totalParts == 1 { // Single part - use shared function - let sourceURL = diskPartSources[0].1 // Get the first source URL (index 1 of the tuple) + let sourceURL = diskPartSources[0].1 // Get the first source URL (index 1 of the tuple) try createDiskImageFromSource( sourceURL: sourceURL, destinationURL: outputURL, @@ -1742,22 +1542,30 @@ class ImageContainerRegistry: @unchecked Sendable { let outputHandle: FileHandle do { // Ensure parent directory exists - try FileManager.default.createDirectory(at: outputURL.deletingLastPathComponent(), withIntermediateDirectories: true) + try FileManager.default.createDirectory( + at: outputURL.deletingLastPathComponent(), withIntermediateDirectories: true + ) // Explicitly create the file first, removing old one if needed if FileManager.default.fileExists(atPath: outputURL.path) { try FileManager.default.removeItem(at: outputURL) } - guard FileManager.default.createFile(atPath: outputURL.path, contents: nil) else { + guard FileManager.default.createFile(atPath: outputURL.path, contents: nil) + else { throw PullError.fileCreationFailed(outputURL.path) } // Open handle for writing outputHandle = try FileHandle(forWritingTo: outputURL) // Set the file size (creates sparse file) try outputHandle.truncate(atOffset: sizeForTruncate) - Logger.info("Sparse file initialized for cache reassembly with size: \(ByteCountFormatter.string(fromByteCount: Int64(sizeForTruncate), countStyle: .file))") + Logger.info( + "Sparse file initialized for cache reassembly with size: \(ByteCountFormatter.string(fromByteCount: Int64(sizeForTruncate), countStyle: .file))" + ) } catch { - Logger.error("Failed during setup for cached disk image reassembly: \(error.localizedDescription)", metadata: ["path": outputURL.path]) - throw PullError.reassemblySetupFailed(path: outputURL.path, underlyingError: error) + Logger.error( + "Failed during setup for cached disk image reassembly: \(error.localizedDescription)", + metadata: ["path": outputURL.path]) + throw PullError.reassemblySetupFailed( + path: outputURL.path, underlyingError: error) } // Ensure handle is closed when exiting this scope @@ -1769,11 +1577,15 @@ class ImageContainerRegistry: @unchecked Sendable { // Iterate from 1 up to the total number of parts found by the collector for collectorPartNum in 1...totalParts { // Find the source URL from our collected parts using the sequential collectorPartNum - guard let sourceInfo = diskPartSources.first(where: { $0.0 == collectorPartNum }) else { - Logger.error("Missing required cached part number \(collectorPartNum) in collected parts during reassembly.") + guard + let sourceInfo = diskPartSources.first(where: { $0.0 == collectorPartNum }) + else { + Logger.error( + "Missing required cached part number \(collectorPartNum) in collected parts during reassembly." + ) throw PullError.missingPart(collectorPartNum) } - let sourceURL = sourceInfo.1 // Get URL from tuple + let sourceURL = sourceInfo.1 // Get URL from tuple // Log using the sequential collector part number Logger.info( @@ -1789,10 +1601,10 @@ class ImageContainerRegistry: @unchecked Sendable { currentOffset += decompressedBytesWritten // Update progress (using sizeForTruncate which should be available) reassemblyProgressLogger.logProgress( - current: Double(currentOffset) / Double(sizeForTruncate), - context: "Reassembling Cache") - - try outputHandle.synchronize() // Explicitly synchronize after each chunk + current: Double(currentOffset) / Double(sizeForTruncate), + context: "Reassembling Cache") + + try outputHandle.synchronize() // Explicitly synchronize after each chunk } // Finalize progress, close handle (done by defer) @@ -1806,13 +1618,13 @@ class ImageContainerRegistry: @unchecked Sendable { try outputHandle.seek(toOffset: sizeForTruncate - UInt64(testPattern.count)) try outputHandle.write(contentsOf: testPattern) try outputHandle.synchronize() - + // Ensure handle is properly synchronized before closing try outputHandle.synchronize() - + // Close handle explicitly instead of relying on defer try outputHandle.close() - + // Verify final size let finalSize = (try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] @@ -1829,44 +1641,49 @@ class ImageContainerRegistry: @unchecked Sendable { } Logger.info("Disk image reassembly completed") - + // Optimize sparseness for cached reassembly if on macOS if FileManager.default.fileExists(atPath: "/bin/cp") { Logger.info("Optimizing sparse file representation for cached reassembly...") let optimizedPath = outputURL.path + ".optimized" - + let process = Process() process.executableURL = URL(fileURLWithPath: "/bin/cp") process.arguments = ["-c", outputURL.path, optimizedPath] - + do { try process.run() process.waitUntilExit() - + if process.terminationStatus == 0 { // Get size of optimized file - let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[.size] as? UInt64) ?? 0 + let optimizedSize = + (try? FileManager.default.attributesOfItem(atPath: optimizedPath)[ + .size] as? UInt64) ?? 0 let originalUsage = getActualDiskUsage(path: outputURL.path) let optimizedUsage = getActualDiskUsage(path: optimizedPath) - + Logger.info( "Sparse optimization results for cache: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" ) - + // Replace the original with the optimized version try FileManager.default.removeItem(at: outputURL) - try FileManager.default.moveItem(at: URL(fileURLWithPath: optimizedPath), to: outputURL) + try FileManager.default.moveItem( + at: URL(fileURLWithPath: optimizedPath), to: outputURL) Logger.info("Replaced cached reassembly with optimized sparse version") } else { Logger.info("Sparse optimization failed for cache, using original file") try? FileManager.default.removeItem(atPath: optimizedPath) } } catch { - Logger.info("Error during sparse optimization for cache: \(error.localizedDescription)") + Logger.info( + "Error during sparse optimization for cache: \(error.localizedDescription)" + ) try? FileManager.default.removeItem(atPath: optimizedPath) } } - + // Set permissions to ensure consistency let chmodProcess = Process() chmodProcess.executableURL = URL(fileURLWithPath: "/bin/chmod") @@ -1880,12 +1697,16 @@ class ImageContainerRegistry: @unchecked Sendable { } private func getToken(repository: String) async throws -> String { - let encodedRepo = repository.addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed) ?? repository + let encodedRepo = + repository.addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed) ?? repository // Request both pull and push scope for uploads - let url = URL(string: "https://\(self.registry)/token?scope=repository:\(encodedRepo):pull,push&service=\(self.registry)")! - + let url = URL( + string: + "https://\(self.registry)/token?scope=repository:\(encodedRepo):pull,push&service=\(self.registry)" + )! + var request = URLRequest(url: url) - request.httpMethod = "GET" // Token endpoint uses GET + request.httpMethod = "GET" // Token endpoint uses GET request.setValue("application/json", forHTTPHeaderField: "Accept") // *** Add Basic Authentication Header if credentials exist *** @@ -1906,26 +1727,31 @@ class ImageContainerRegistry: @unchecked Sendable { // *** End Basic Auth addition *** let (data, response) = try await URLSession.shared.data(for: request) - + // Check response status code *before* parsing JSON guard let httpResponse = response as? HTTPURLResponse else { - throw PushError.authenticationFailed // Or a more generic network error + throw PushError.authenticationFailed // Or a more generic network error } - + guard httpResponse.statusCode == 200 else { // Log detailed error including status code and potentially response body let responseBody = String(data: data, encoding: .utf8) ?? "(Could not decode body)" - Logger.error("Token request failed with status code: \(httpResponse.statusCode). Response: \(responseBody)") + Logger.error( + "Token request failed with status code: \(httpResponse.statusCode). Response: \(responseBody)" + ) // Throw specific error based on status if needed (e.g., 401 for unauthorized) - throw PushError.authenticationFailed + throw PushError.authenticationFailed } - + let jsonResponse = try JSONSerialization.jsonObject(with: data) as? [String: Any] - guard let token = jsonResponse?["token"] as? String ?? jsonResponse?["access_token"] as? String else { + guard + let token = jsonResponse?["token"] as? String ?? jsonResponse?["access_token"] + as? String + else { Logger.error("Token not found in registry response.") throw PushError.missingToken } - + return token } @@ -2673,9 +2499,9 @@ class ImageContainerRegistry: @unchecked Sendable { // New push method public func push( - vmDirPath: String, - imageName: String, - tags: [String], + vmDirPath: String, + imageName: String, + tags: [String], chunkSizeMb: Int = 512, verbose: Bool = false, dryRun: Bool = false, @@ -2686,18 +2512,18 @@ class ImageContainerRegistry: @unchecked Sendable { metadata: [ "vm_path": vmDirPath, "imageName": imageName, - "tags": "\(tags.joined(separator: ", "))", // Log all tags + "tags": "\(tags.joined(separator: ", "))", // Log all tags "registry": registry, "organization": organization, "chunk_size": "\(chunkSizeMb)MB", "dry_run": "\(dryRun)", - "reassemble": "\(reassemble)" + "reassemble": "\(reassemble)", ]) - + // Remove tag parsing here, imageName is now passed directly // let components = image.split(separator: ":") ... // let imageTag = String(tag) - + // Get authentication token only if not in dry-run mode var token: String = "" if !dryRun { @@ -2706,17 +2532,17 @@ class ImageContainerRegistry: @unchecked Sendable { } else { Logger.info("Dry run mode: skipping authentication token request") } - + // Create working directory inside the VM folder for caching/resuming let workDir = URL(fileURLWithPath: vmDirPath).appendingPathComponent(".lume_push_cache") try FileManager.default.createDirectory(at: workDir, withIntermediateDirectories: true) Logger.info("Using push cache directory: \(workDir.path)") - + // Get VM files that need to be pushed using vmDirPath let diskPath = URL(fileURLWithPath: vmDirPath).appendingPathComponent("disk.img") let configPath = URL(fileURLWithPath: vmDirPath).appendingPathComponent("config.json") let nvramPath = URL(fileURLWithPath: vmDirPath).appendingPathComponent("nvram.bin") - + var layers: [OCIManifestLayer] = [] var uncompressedDiskSize: UInt64? = nil @@ -2724,7 +2550,7 @@ class ImageContainerRegistry: @unchecked Sendable { let cachedConfigPath = workDir.appendingPathComponent("config.json") var configDigest: String? = nil var configSize: Int? = nil - + if FileManager.default.fileExists(atPath: cachedConfigPath.path) { Logger.info("Using cached config.json") do { @@ -2734,7 +2560,8 @@ class ImageContainerRegistry: @unchecked Sendable { // Try to get uncompressed disk size from cached config if let vmConfig = try? JSONDecoder().decode(VMConfig.self, from: configData) { uncompressedDiskSize = vmConfig.diskSize - Logger.info("Found disk size in cached config: \(uncompressedDiskSize ?? 0) bytes") + Logger.info( + "Found disk size in cached config: \(uncompressedDiskSize ?? 0) bytes") } } catch { Logger.error("Failed to read cached config.json: \(error). Will re-process.") @@ -2745,20 +2572,22 @@ class ImageContainerRegistry: @unchecked Sendable { let configData = try Data(contentsOf: configPath) configDigest = "sha256:" + configData.sha256String() configSize = configData.count - try configData.write(to: cachedConfigPath) // Save to cache + try configData.write(to: cachedConfigPath) // Save to cache // Try to get uncompressed disk size from original config if let vmConfig = try? JSONDecoder().decode(VMConfig.self, from: configData) { uncompressedDiskSize = vmConfig.diskSize Logger.info("Found disk size in config: \(uncompressedDiskSize ?? 0) bytes") } } - - if var digest = configDigest, let size = configSize { // Use 'var' to modify if uploaded - if !dryRun { + + if var digest = configDigest, let size = configSize { // Use 'var' to modify if uploaded + if !dryRun { // Upload only if not in dry-run mode and blob doesn't exist - if !(try await blobExists(repository: "\(self.organization)/\(imageName)", digest: digest, token: token)) { + if !(try await blobExists( + repository: "\(self.organization)/\(imageName)", digest: digest, token: token)) + { Logger.info("Uploading config.json blob") - let configData = try Data(contentsOf: cachedConfigPath) // Read from cache for upload + let configData = try Data(contentsOf: cachedConfigPath) // Read from cache for upload digest = try await uploadBlobFromData( repository: "\(self.organization)/\(imageName)", data: configData, @@ -2769,13 +2598,14 @@ class ImageContainerRegistry: @unchecked Sendable { } } // Add config layer - layers.append(OCIManifestLayer( - mediaType: "application/vnd.oci.image.config.v1+json", - size: size, - digest: digest - )) + layers.append( + OCIManifestLayer( + mediaType: "application/vnd.oci.image.config.v1+json", + size: size, + digest: digest + )) } - + // Process nvram.bin let cachedNvramPath = workDir.appendingPathComponent("nvram.bin") var nvramDigest: String? = nil @@ -2788,47 +2618,56 @@ class ImageContainerRegistry: @unchecked Sendable { nvramDigest = "sha256:" + nvramData.sha256String() nvramSize = nvramData.count } catch { - Logger.error("Failed to read cached nvram.bin: \(error). Will re-process.") + Logger.error("Failed to read cached nvram.bin: \(error). Will re-process.") } } else if FileManager.default.fileExists(atPath: nvramPath.path) { Logger.info("Processing nvram.bin") let nvramData = try Data(contentsOf: nvramPath) nvramDigest = "sha256:" + nvramData.sha256String() nvramSize = nvramData.count - try nvramData.write(to: cachedNvramPath) // Save to cache + try nvramData.write(to: cachedNvramPath) // Save to cache } - - if var digest = nvramDigest, let size = nvramSize { // Use 'var' - if !dryRun { - // Upload only if not in dry-run mode and blob doesn't exist - if !(try await blobExists(repository: "\(self.organization)/\(imageName)", digest: digest, token: token)) { + + if var digest = nvramDigest, let size = nvramSize { // Use 'var' + if !dryRun { + // Upload only if not in dry-run mode and blob doesn't exist + if !(try await blobExists( + repository: "\(self.organization)/\(imageName)", digest: digest, token: token)) + { Logger.info("Uploading nvram.bin blob") - let nvramData = try Data(contentsOf: cachedNvramPath) // Read from cache + let nvramData = try Data(contentsOf: cachedNvramPath) // Read from cache digest = try await uploadBlobFromData( repository: "\(self.organization)/\(imageName)", data: nvramData, token: token ) } else { - Logger.info("NVRAM blob already exists on registry") + Logger.info("NVRAM blob already exists on registry") } } // Add nvram layer - layers.append(OCIManifestLayer( - mediaType: "application/octet-stream", - size: size, - digest: digest - )) + layers.append( + OCIManifestLayer( + mediaType: "application/octet-stream", + size: size, + digest: digest + )) } - + // Process disk.img if FileManager.default.fileExists(atPath: diskPath.path) { let diskAttributes = try FileManager.default.attributesOfItem(atPath: diskPath.path) let diskSize = diskAttributes[.size] as? UInt64 ?? 0 let actualDiskSize = uncompressedDiskSize ?? diskSize - Logger.info("Processing disk.img in chunks", metadata: ["disk_path": diskPath.path, "disk_size": "\(diskSize) bytes", "actual_size": "\(actualDiskSize) bytes", "chunk_size": "\(chunkSizeMb)MB"]) + Logger.info( + "Processing disk.img in chunks", + metadata: [ + "disk_path": diskPath.path, "disk_size": "\(diskSize) bytes", + "actual_size": "\(actualDiskSize) bytes", "chunk_size": "\(chunkSizeMb)MB", + ]) let chunksDir = workDir.appendingPathComponent("disk.img.parts") - try FileManager.default.createDirectory(at: chunksDir, withIntermediateDirectories: true) + try FileManager.default.createDirectory( + at: chunksDir, withIntermediateDirectories: true) let chunkSizeBytes = chunkSizeMb * 1024 * 1024 let totalChunks = Int((diskSize + UInt64(chunkSizeBytes) - 1) / UInt64(chunkSizeBytes)) Logger.info("Splitting disk into \(totalChunks) chunks") @@ -2836,58 +2675,125 @@ class ImageContainerRegistry: @unchecked Sendable { defer { try? fileHandle.close() } var pushedDiskLayers: [(index: Int, layer: OCIManifestLayer)] = [] var diskChunks: [(index: Int, path: URL, digest: String)] = [] - - try await withThrowingTaskGroup(of: (Int, OCIManifestLayer, URL, String).self) { group in + + try await withThrowingTaskGroup(of: (Int, OCIManifestLayer, URL, String).self) { + group in let maxConcurrency = 4 for chunkIndex in 0..= maxConcurrency { if let res = try await group.next() { pushedDiskLayers.append((res.0, res.1)); diskChunks.append((res.0, res.2, res.3)) } } + if chunkIndex >= maxConcurrency { + if let res = try await group.next() { + pushedDiskLayers.append((res.0, res.1)) + diskChunks.append((res.0, res.2, res.3)) + } + } group.addTask { [token, verbose, dryRun, organization, imageName] in let chunkIndex = chunkIndex let chunkPath = chunksDir.appendingPathComponent("chunk.\(chunkIndex)") - let metadataPath = chunksDir.appendingPathComponent("chunk_metadata.\(chunkIndex).json") + let metadataPath = chunksDir.appendingPathComponent( + "chunk_metadata.\(chunkIndex).json") var layer: OCIManifestLayer? = nil var finalCompressedDigest: String? = nil - if FileManager.default.fileExists(atPath: metadataPath.path), FileManager.default.fileExists(atPath: chunkPath.path) { + if FileManager.default.fileExists(atPath: metadataPath.path), + FileManager.default.fileExists(atPath: chunkPath.path) + { do { let metadataData = try Data(contentsOf: metadataPath) - let metadata = try JSONDecoder().decode(ChunkMetadata.self, from: metadataData) - Logger.info("Resuming chunk \(chunkIndex + 1)/\(totalChunks) from cache") + let metadata = try JSONDecoder().decode( + ChunkMetadata.self, from: metadataData) + Logger.info( + "Resuming chunk \(chunkIndex + 1)/\(totalChunks) from cache") finalCompressedDigest = metadata.compressedDigest - if !dryRun { if !(try await self.blobExists(repository: "\(organization)/\(imageName)", digest: metadata.compressedDigest, token: token)) { Logger.info("Uploading cached chunk \(chunkIndex + 1) blob"); _ = try await self.uploadBlobFromPath(repository: "\(organization)/\(imageName)", path: chunkPath, digest: metadata.compressedDigest, token: token) } else { Logger.info("Chunk \(chunkIndex + 1) blob already exists on registry") } } - layer = OCIManifestLayer(mediaType: "application/octet-stream+lz4", size: metadata.compressedSize, digest: metadata.compressedDigest, uncompressedSize: metadata.uncompressedSize, uncompressedContentDigest: metadata.uncompressedDigest) - } catch { Logger.info("Failed to load cached metadata/chunk for index \(chunkIndex): \(error). Re-processing."); finalCompressedDigest = nil; layer = nil } + if !dryRun { + if !(try await self.blobExists( + repository: "\(organization)/\(imageName)", + digest: metadata.compressedDigest, token: token)) + { + Logger.info("Uploading cached chunk \(chunkIndex + 1) blob") + _ = try await self.uploadBlobFromPath( + repository: "\(organization)/\(imageName)", + path: chunkPath, digest: metadata.compressedDigest, + token: token) + } else { + Logger.info( + "Chunk \(chunkIndex + 1) blob already exists on registry" + ) + } + } + layer = OCIManifestLayer( + mediaType: "application/octet-stream+lz4", + size: metadata.compressedSize, + digest: metadata.compressedDigest, + uncompressedSize: metadata.uncompressedSize, + uncompressedContentDigest: metadata.uncompressedDigest) + } catch { + Logger.info( + "Failed to load cached metadata/chunk for index \(chunkIndex): \(error). Re-processing." + ) + finalCompressedDigest = nil + layer = nil + } } if layer == nil { Logger.info("Processing chunk \(chunkIndex + 1)/\(totalChunks)") let localFileHandle = try FileHandle(forReadingFrom: diskPath) defer { try? localFileHandle.close() } try localFileHandle.seek(toOffset: UInt64(chunkIndex * chunkSizeBytes)) - let chunkData = try localFileHandle.read(upToCount: chunkSizeBytes) ?? Data() + let chunkData = + try localFileHandle.read(upToCount: chunkSizeBytes) ?? Data() let uncompressedSize = UInt64(chunkData.count) let uncompressedDigest = "sha256:" + chunkData.sha256String() - let compressedData = try (chunkData as NSData).compressed(using: .lz4) as Data + let compressedData = + try (chunkData as NSData).compressed(using: .lz4) as Data let compressedSize = compressedData.count let compressedDigest = "sha256:" + compressedData.sha256String() try compressedData.write(to: chunkPath) - let metadata = ChunkMetadata(uncompressedDigest: uncompressedDigest, uncompressedSize: uncompressedSize, compressedDigest: compressedDigest, compressedSize: compressedSize) + let metadata = ChunkMetadata( + uncompressedDigest: uncompressedDigest, + uncompressedSize: uncompressedSize, + compressedDigest: compressedDigest, compressedSize: compressedSize) let metadataData = try JSONEncoder().encode(metadata) try metadataData.write(to: metadataPath) finalCompressedDigest = compressedDigest - if !dryRun { if !(try await self.blobExists(repository: "\(organization)/\(imageName)", digest: compressedDigest, token: token)) { Logger.info("Uploading processed chunk \(chunkIndex + 1) blob"); _ = try await self.uploadBlobFromPath(repository: "\(organization)/\(imageName)", path: chunkPath, digest: compressedDigest, token: token) } else { Logger.info("Chunk \(chunkIndex + 1) blob already exists on registry (processed fresh)") } } - layer = OCIManifestLayer(mediaType: "application/octet-stream+lz4", size: compressedSize, digest: compressedDigest, uncompressedSize: uncompressedSize, uncompressedContentDigest: uncompressedDigest) + if !dryRun { + if !(try await self.blobExists( + repository: "\(organization)/\(imageName)", + digest: compressedDigest, token: token)) + { + Logger.info("Uploading processed chunk \(chunkIndex + 1) blob") + _ = try await self.uploadBlobFromPath( + repository: "\(organization)/\(imageName)", path: chunkPath, + digest: compressedDigest, token: token) + } else { + Logger.info( + "Chunk \(chunkIndex + 1) blob already exists on registry (processed fresh)" + ) + } + } + layer = OCIManifestLayer( + mediaType: "application/octet-stream+lz4", size: compressedSize, + digest: compressedDigest, uncompressedSize: uncompressedSize, + uncompressedContentDigest: uncompressedDigest) + } + guard let finalLayer = layer, let finalDigest = finalCompressedDigest else { + throw PushError.blobUploadFailed + } + if verbose { + Logger.info("Finished chunk \(chunkIndex + 1)/\(totalChunks)") } - guard let finalLayer = layer, let finalDigest = finalCompressedDigest else { throw PushError.blobUploadFailed } - if verbose { Logger.info("Finished chunk \(chunkIndex + 1)/\(totalChunks)") } return (chunkIndex, finalLayer, chunkPath, finalDigest) } } - for try await (index, layer, path, digest) in group { pushedDiskLayers.append((index, layer)); diskChunks.append((index, path, digest)) } + for try await (index, layer, path, digest) in group { + pushedDiskLayers.append((index, layer)) + diskChunks.append((index, path, digest)) + } } - layers.append(contentsOf: pushedDiskLayers.sorted { $0.index < $1.index }.map { $0.layer }) + layers.append( + contentsOf: pushedDiskLayers.sorted { $0.index < $1.index }.map { $0.layer }) diskChunks.sort { $0.index < $1.index } Logger.info("All disk chunks processed successfully") - // --- Calculate Total Upload Size & Initialize Tracker --- + // --- Calculate Total Upload Size & Initialize Tracker --- if !dryRun { var totalUploadSizeBytes: Int64 = 0 var totalUploadFiles: Int = 0 @@ -2898,49 +2804,60 @@ class ImageContainerRegistry: @unchecked Sendable { } // Add nvram size if it exists if let size = nvramSize { - totalUploadSizeBytes += Int64(size) - totalUploadFiles += 1 + totalUploadSizeBytes += Int64(size) + totalUploadFiles += 1 } // Add sizes of all compressed disk chunks - let allChunkSizes = diskChunks.compactMap { try? FileManager.default.attributesOfItem(atPath: $0.path.path)[.size] as? Int64 ?? 0 } + let allChunkSizes = diskChunks.compactMap { + try? FileManager.default.attributesOfItem(atPath: $0.path.path)[.size] as? Int64 + ?? 0 + } totalUploadSizeBytes += allChunkSizes.reduce(0, +) - totalUploadFiles += totalChunks // Use totalChunks calculated earlier - + totalUploadFiles += totalChunks // Use totalChunks calculated earlier + if totalUploadSizeBytes > 0 { - Logger.info("Initializing upload progress: \(totalUploadFiles) files, total size: \(ByteCountFormatter.string(fromByteCount: totalUploadSizeBytes, countStyle: .file))") + Logger.info( + "Initializing upload progress: \(totalUploadFiles) files, total size: \(ByteCountFormatter.string(fromByteCount: totalUploadSizeBytes, countStyle: .file))" + ) await uploadProgress.setTotal(totalUploadSizeBytes, files: totalUploadFiles) // Print initial progress bar - print("[░░░░░░░░░░░░░░░░░░░░] 0% (0/\(totalUploadFiles)) | Initializing upload... | ETA: calculating... ") - fflush(stdout) - } else { - Logger.info("No files marked for upload.") - } + print( + "[░░░░░░░░░░░░░░░░░░░░] 0% (0/\(totalUploadFiles)) | Initializing upload... | ETA: calculating... " + ) + fflush(stdout) + } else { + Logger.info("No files marked for upload.") + } } - // --- End Size Calculation & Init --- + // --- End Size Calculation & Init --- // Perform reassembly verification if requested in dry-run mode if dryRun && reassemble { Logger.info("=== REASSEMBLY MODE ===") Logger.info("Reassembling chunks to verify integrity...") let reassemblyDir = workDir.appendingPathComponent("reassembly") - try FileManager.default.createDirectory(at: reassemblyDir, withIntermediateDirectories: true) + try FileManager.default.createDirectory( + at: reassemblyDir, withIntermediateDirectories: true) let reassembledFile = reassemblyDir.appendingPathComponent("reassembled_disk.img") - + // Pre-allocate a sparse file with the correct size - Logger.info("Pre-allocating sparse file of \(ByteCountFormatter.string(fromByteCount: Int64(actualDiskSize), countStyle: .file))...") + Logger.info( + "Pre-allocating sparse file of \(ByteCountFormatter.string(fromByteCount: Int64(actualDiskSize), countStyle: .file))..." + ) if FileManager.default.fileExists(atPath: reassembledFile.path) { try FileManager.default.removeItem(at: reassembledFile) } - guard FileManager.default.createFile(atPath: reassembledFile.path, contents: nil) else { + guard FileManager.default.createFile(atPath: reassembledFile.path, contents: nil) + else { throw PushError.fileCreationFailed(reassembledFile.path) } - + let outputHandle = try FileHandle(forWritingTo: reassembledFile) defer { try? outputHandle.close() } - + // Set the file size without writing data (creates a sparse file) try outputHandle.truncate(atOffset: actualDiskSize) - + // Add test patterns at start and end to verify writability let testPattern = "LUME_TEST_PATTERN".data(using: .utf8)! try outputHandle.seek(toOffset: 0) @@ -2948,217 +2865,266 @@ class ImageContainerRegistry: @unchecked Sendable { try outputHandle.seek(toOffset: actualDiskSize - UInt64(testPattern.count)) try outputHandle.write(contentsOf: testPattern) try outputHandle.synchronize() - + Logger.info("Test patterns written to sparse file. File is ready for writing.") - + // Track reassembly progress var reassemblyProgressLogger = ProgressLogger(threshold: 0.05) var currentOffset: UInt64 = 0 - + // Process each chunk in order for (index, cachedChunkPath, _) in diskChunks.sorted(by: { $0.index < $1.index }) { - Logger.info("Decompressing & writing part \(index + 1)/\(diskChunks.count): \(cachedChunkPath.lastPathComponent) at offset \(currentOffset)...") - + Logger.info( + "Decompressing & writing part \(index + 1)/\(diskChunks.count): \(cachedChunkPath.lastPathComponent) at offset \(currentOffset)..." + ) + // Always seek to the correct position try outputHandle.seek(toOffset: currentOffset) - + // Decompress and write the chunk let decompressedBytesWritten = try decompressChunkAndWriteSparse( inputPath: cachedChunkPath.path, outputHandle: outputHandle, startOffset: currentOffset ) - + currentOffset += decompressedBytesWritten reassemblyProgressLogger.logProgress( current: Double(currentOffset) / Double(actualDiskSize), context: "Reassembling" ) - + // Ensure data is written before processing next part try outputHandle.synchronize() } - + // Finalize progress reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete") Logger.info("") // Newline - + // Close handle before post-processing try outputHandle.close() - + // Optimize sparseness if on macOS let optimizedFile = reassemblyDir.appendingPathComponent("optimized_disk.img") if FileManager.default.fileExists(atPath: "/bin/cp") { Logger.info("Optimizing sparse file representation...") - + let process = Process() process.executableURL = URL(fileURLWithPath: "/bin/cp") process.arguments = ["-c", reassembledFile.path, optimizedFile.path] - + do { try process.run() process.waitUntilExit() - + if process.terminationStatus == 0 { // Get sizes of original and optimized files - let optimizedSize = (try? FileManager.default.attributesOfItem(atPath: optimizedFile.path)[.size] as? UInt64) ?? 0 + let optimizedSize = + (try? FileManager.default.attributesOfItem( + atPath: optimizedFile.path)[.size] as? UInt64) ?? 0 let originalUsage = getActualDiskUsage(path: reassembledFile.path) let optimizedUsage = getActualDiskUsage(path: optimizedFile.path) - + Logger.info( "Sparse optimization results: Before: \(ByteCountFormatter.string(fromByteCount: Int64(originalUsage), countStyle: .file)) actual usage, After: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedUsage), countStyle: .file)) actual usage (Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(optimizedSize), countStyle: .file)))" ) - + // Replace original with optimized version try FileManager.default.removeItem(at: reassembledFile) try FileManager.default.moveItem(at: optimizedFile, to: reassembledFile) Logger.info("Using sparse-optimized file for verification") } else { - Logger.info("Sparse optimization failed, using original file for verification") + Logger.info( + "Sparse optimization failed, using original file for verification") try? FileManager.default.removeItem(at: optimizedFile) } } catch { - Logger.info("Error during sparse optimization: \(error.localizedDescription)") + Logger.info( + "Error during sparse optimization: \(error.localizedDescription)") try? FileManager.default.removeItem(at: optimizedFile) } } - + // Verification step Logger.info("Verifying reassembled file...") let originalSize = diskSize let originalDigest = calculateSHA256(filePath: diskPath.path) - let reassembledAttributes = try FileManager.default.attributesOfItem(atPath: reassembledFile.path) + let reassembledAttributes = try FileManager.default.attributesOfItem( + atPath: reassembledFile.path) let reassembledSize = reassembledAttributes[.size] as? UInt64 ?? 0 let reassembledDigest = calculateSHA256(filePath: reassembledFile.path) - + // Check actual disk usage let originalActualSize = getActualDiskUsage(path: diskPath.path) let reassembledActualSize = getActualDiskUsage(path: reassembledFile.path) - + // Report results Logger.info("Results:") - Logger.info(" Original size: \(ByteCountFormatter.string(fromByteCount: Int64(originalSize), countStyle: .file)) (\(originalSize) bytes)") - Logger.info(" Reassembled size: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledSize), countStyle: .file)) (\(reassembledSize) bytes)") + Logger.info( + " Original size: \(ByteCountFormatter.string(fromByteCount: Int64(originalSize), countStyle: .file)) (\(originalSize) bytes)" + ) + Logger.info( + " Reassembled size: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledSize), countStyle: .file)) (\(reassembledSize) bytes)" + ) Logger.info(" Original digest: \(originalDigest)") Logger.info(" Reassembled digest: \(reassembledDigest)") - Logger.info(" Original: Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(originalSize), countStyle: .file)), Actual disk usage: \(ByteCountFormatter.string(fromByteCount: Int64(originalActualSize), countStyle: .file))") - Logger.info(" Reassembled: Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledSize), countStyle: .file)), Actual disk usage: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledActualSize), countStyle: .file))") - + Logger.info( + " Original: Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(originalSize), countStyle: .file)), Actual disk usage: \(ByteCountFormatter.string(fromByteCount: Int64(originalActualSize), countStyle: .file))" + ) + Logger.info( + " Reassembled: Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledSize), countStyle: .file)), Actual disk usage: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledActualSize), countStyle: .file))" + ) + // Determine if verification was successful if originalDigest == reassembledDigest { Logger.info("✅ VERIFICATION SUCCESSFUL: Files are identical") } else { Logger.info("❌ VERIFICATION FAILED: Files differ") - + if originalSize != reassembledSize { - Logger.info(" Size mismatch: Original \(originalSize) bytes, Reassembled \(reassembledSize) bytes") + Logger.info( + " Size mismatch: Original \(originalSize) bytes, Reassembled \(reassembledSize) bytes" + ) } - + // Check sparse file characteristics Logger.info("Attempting to identify differences...") - Logger.info("NOTE: This might be a sparse file issue. The content may be identical, but sparse regions") - Logger.info(" may be handled differently between the original and reassembled files.") - + Logger.info( + "NOTE: This might be a sparse file issue. The content may be identical, but sparse regions" + ) + Logger.info( + " may be handled differently between the original and reassembled files." + ) + if originalActualSize > 0 { - let diffPercentage = ((Double(reassembledActualSize) - Double(originalActualSize)) / Double(originalActualSize)) * 100.0 - Logger.info(" Disk usage difference: \(String(format: "%.2f", diffPercentage))%") - + let diffPercentage = + ((Double(reassembledActualSize) - Double(originalActualSize)) + / Double(originalActualSize)) * 100.0 + Logger.info( + " Disk usage difference: \(String(format: "%.2f", diffPercentage))%") + if diffPercentage < -40 { - Logger.info(" ⚠️ WARNING: Reassembled disk uses significantly less space (>40% difference).") - Logger.info(" This indicates sparse regions weren't properly preserved and may affect VM functionality.") + Logger.info( + " ⚠️ WARNING: Reassembled disk uses significantly less space (>40% difference)." + ) + Logger.info( + " This indicates sparse regions weren't properly preserved and may affect VM functionality." + ) } else if diffPercentage < -10 { - Logger.info(" ⚠️ WARNING: Reassembled disk uses less space (10-40% difference).") - Logger.info(" Some sparse regions may not be properly preserved but VM might still function correctly.") + Logger.info( + " ⚠️ WARNING: Reassembled disk uses less space (10-40% difference)." + ) + Logger.info( + " Some sparse regions may not be properly preserved but VM might still function correctly." + ) } else if diffPercentage > 10 { - Logger.info(" ⚠️ WARNING: Reassembled disk uses more space (>10% difference).") - Logger.info(" This is unusual and may indicate improper sparse file handling.") + Logger.info( + " ⚠️ WARNING: Reassembled disk uses more space (>10% difference).") + Logger.info( + " This is unusual and may indicate improper sparse file handling.") } else { - Logger.info(" ✓ Disk usage difference is minimal (<10%). VM likely to function correctly.") + Logger.info( + " ✓ Disk usage difference is minimal (<10%). VM likely to function correctly." + ) } } - + // Offer recovery option if originalDigest != reassembledDigest { Logger.info("") Logger.info("===== ATTEMPTING RECOVERY ACTION =====") - Logger.info("Since verification failed, trying direct copy as a fallback method.") - + Logger.info( + "Since verification failed, trying direct copy as a fallback method.") + let fallbackFile = reassemblyDir.appendingPathComponent("fallback_disk.img") Logger.info("Creating fallback disk image at: \(fallbackFile.path)") - + // Try rsync first let rsyncProcess = Process() rsyncProcess.executableURL = URL(fileURLWithPath: "/usr/bin/rsync") - rsyncProcess.arguments = ["-aS", "--progress", diskPath.path, fallbackFile.path] - + rsyncProcess.arguments = [ + "-aS", "--progress", diskPath.path, fallbackFile.path, + ] + do { try rsyncProcess.run() rsyncProcess.waitUntilExit() - + if rsyncProcess.terminationStatus == 0 { - Logger.info("Direct copy completed with rsync. Fallback image available at: \(fallbackFile.path)") + Logger.info( + "Direct copy completed with rsync. Fallback image available at: \(fallbackFile.path)" + ) } else { // Try cp -c as fallback Logger.info("Rsync failed. Attempting with cp -c command...") let cpProcess = Process() cpProcess.executableURL = URL(fileURLWithPath: "/bin/cp") cpProcess.arguments = ["-c", diskPath.path, fallbackFile.path] - + try cpProcess.run() cpProcess.waitUntilExit() - + if cpProcess.terminationStatus == 0 { - Logger.info("Direct copy completed with cp -c. Fallback image available at: \(fallbackFile.path)") + Logger.info( + "Direct copy completed with cp -c. Fallback image available at: \(fallbackFile.path)" + ) } else { Logger.info("All recovery attempts failed.") } } } catch { - Logger.info("Error during recovery attempts: \(error.localizedDescription)") + Logger.info( + "Error during recovery attempts: \(error.localizedDescription)") Logger.info("All recovery attempts failed.") } } } - + Logger.info("Reassembled file is available at: \(reassembledFile.path)") } } - // --- Manifest Creation & Push --- + // --- Manifest Creation & Push --- let manifest = createManifest( layers: layers, - configLayerIndex: layers.firstIndex(where: { $0.mediaType == "application/vnd.oci.image.config.v1+json" }), + configLayerIndex: layers.firstIndex(where: { + $0.mediaType == "application/vnd.oci.image.config.v1+json" + }), uncompressedDiskSize: uncompressedDiskSize ) // Push manifest only if not in dry-run mode if !dryRun { - Logger.info("Pushing manifest(s)") // Updated log + Logger.info("Pushing manifest(s)") // Updated log // Serialize the manifest dictionary to Data first - let manifestData = try JSONSerialization.data(withJSONObject: manifest, options: [.prettyPrinted, .sortedKeys]) + let manifestData = try JSONSerialization.data( + withJSONObject: manifest, options: [.prettyPrinted, .sortedKeys]) // Loop through tags to push the same manifest data for tag in tags { - Logger.info("Pushing manifest for tag: \(tag)") - try await pushManifest( - repository: "\(self.organization)/\(imageName)", - tag: tag, // Use the current tag from the loop - manifest: manifestData, // Pass the serialized Data - token: token // Token should be in scope here now - ) + Logger.info("Pushing manifest for tag: \(tag)") + try await pushManifest( + repository: "\(self.organization)/\(imageName)", + tag: tag, // Use the current tag from the loop + manifest: manifestData, // Pass the serialized Data + token: token // Token should be in scope here now + ) } } // Print final upload summary if not dry run if !dryRun { let stats = await uploadProgress.getUploadStats() - Logger.info("\n\(stats.formattedSummary())") // Add newline for separation + Logger.info("\n\(stats.formattedSummary())") // Add newline for separation } // Clean up cache directory only on successful non-dry-run push } - - private func createManifest(layers: [OCIManifestLayer], configLayerIndex: Int?, uncompressedDiskSize: UInt64?) -> [String: Any] { + + private func createManifest( + layers: [OCIManifestLayer], configLayerIndex: Int?, uncompressedDiskSize: UInt64? + ) -> [String: Any] { var manifest: [String: Any] = [ "schemaVersion": 2, "mediaType": "application/vnd.oci.image.manifest.v1+json", @@ -3166,221 +3132,244 @@ class ImageContainerRegistry: @unchecked Sendable { var layerDict: [String: Any] = [ "mediaType": layer.mediaType, "size": layer.size, - "digest": layer.digest + "digest": layer.digest, ] - + if let uncompressedSize = layer.uncompressedSize { var annotations: [String: String] = [:] - annotations["org.trycua.lume.uncompressed-size"] = "\(uncompressedSize)" // Updated prefix - + annotations["org.trycua.lume.uncompressed-size"] = "\(uncompressedSize)" // Updated prefix + if let digest = layer.uncompressedContentDigest { - annotations["org.trycua.lume.uncompressed-content-digest"] = digest // Updated prefix + annotations["org.trycua.lume.uncompressed-content-digest"] = digest // Updated prefix } - + layerDict["annotations"] = annotations } - + return layerDict - } + }, ] - + // Add config reference if available if let configIndex = configLayerIndex { let configLayer = layers[configIndex] manifest["config"] = [ "mediaType": configLayer.mediaType, "size": configLayer.size, - "digest": configLayer.digest + "digest": configLayer.digest, ] } - + // Add annotations var annotations: [String: String] = [:] - annotations["org.trycua.lume.upload-time"] = ISO8601DateFormatter().string(from: Date()) // Updated prefix - + annotations["org.trycua.lume.upload-time"] = ISO8601DateFormatter().string(from: Date()) // Updated prefix + if let diskSize = uncompressedDiskSize { - annotations["org.trycua.lume.uncompressed-disk-size"] = "\(diskSize)" // Updated prefix + annotations["org.trycua.lume.uncompressed-disk-size"] = "\(diskSize)" // Updated prefix } - + manifest["annotations"] = annotations - + return manifest } - - private func uploadBlobFromData(repository: String, data: Data, token: String) async throws -> String { + + private func uploadBlobFromData(repository: String, data: Data, token: String) async throws + -> String + { // Calculate digest let digest = "sha256:" + data.sha256String() - + // Check if blob already exists if try await blobExists(repository: repository, digest: digest, token: token) { Logger.info("Blob already exists: \(digest)") return digest } - + // Initiate upload let uploadURL = try await startBlobUpload(repository: repository, token: token) - + // Upload blob try await uploadBlob(url: uploadURL, data: data, digest: digest, token: token) - + // Report progress await uploadProgress.addProgress(Int64(data.count)) - + return digest } - - private func uploadBlobFromPath(repository: String, path: URL, digest: String, token: String) async throws -> String { + + private func uploadBlobFromPath(repository: String, path: URL, digest: String, token: String) + async throws -> String + { // Check if blob already exists if try await blobExists(repository: repository, digest: digest, token: token) { Logger.info("Blob already exists: \(digest)") return digest } - + // Initiate upload let uploadURL = try await startBlobUpload(repository: repository, token: token) - + // Load data from file let data = try Data(contentsOf: path) - + // Upload blob try await uploadBlob(url: uploadURL, data: data, digest: digest, token: token) - + // Report progress await uploadProgress.addProgress(Int64(data.count)) - + return digest } - - private func blobExists(repository: String, digest: String, token: String) async throws -> Bool { + + private func blobExists(repository: String, digest: String, token: String) async throws -> Bool + { let url = URL(string: "https://\(registry)/v2/\(repository)/blobs/\(digest)")! var request = URLRequest(url: url) request.httpMethod = "HEAD" request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") - + let (_, response) = try await URLSession.shared.data(for: request) - + if let httpResponse = response as? HTTPURLResponse { return httpResponse.statusCode == 200 } - + return false } - + private func startBlobUpload(repository: String, token: String) async throws -> URL { let url = URL(string: "https://\(registry)/v2/\(repository)/blobs/uploads/")! var request = URLRequest(url: url) request.httpMethod = "POST" request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") - request.setValue("0", forHTTPHeaderField: "Content-Length") // Explicitly set Content-Length to 0 for POST - + request.setValue("0", forHTTPHeaderField: "Content-Length") // Explicitly set Content-Length to 0 for POST + let (_, response) = try await URLSession.shared.data(for: request) - - guard let httpResponse = response as? HTTPURLResponse, - httpResponse.statusCode == 202, - let locationString = httpResponse.value(forHTTPHeaderField: "Location") else { + + guard let httpResponse = response as? HTTPURLResponse, + httpResponse.statusCode == 202, + let locationString = httpResponse.value(forHTTPHeaderField: "Location") + else { // Log response details on failure - let responseBody = String(data: (try? await URLSession.shared.data(for: request).0) ?? Data(), encoding: .utf8) ?? "(No Body)" - Logger.error("Failed to initiate blob upload. Status: \( (response as? HTTPURLResponse)?.statusCode ?? 0 ). Headers: \( (response as? HTTPURLResponse)?.allHeaderFields ?? [:] ). Body: \(responseBody)") + let responseBody = + String( + data: (try? await URLSession.shared.data(for: request).0) ?? Data(), + encoding: .utf8) ?? "(No Body)" + Logger.error( + "Failed to initiate blob upload. Status: \( (response as? HTTPURLResponse)?.statusCode ?? 0 ). Headers: \( (response as? HTTPURLResponse)?.allHeaderFields ?? [:] ). Body: \(responseBody)" + ) throw PushError.uploadInitiationFailed } - + // Construct the base URL for the registry guard let baseRegistryURL = URL(string: "https://\(registry)") else { Logger.error("Failed to create base registry URL from: \(registry)") - throw PushError.invalidURL - } - - // Create the final upload URL, resolving the location against the base URL - guard let uploadURL = URL(string: locationString, relativeTo: baseRegistryURL) else { - Logger.error("Failed to create absolute upload URL from location: \(locationString) relative to base: \(baseRegistryURL.absoluteString)") throw PushError.invalidURL } - + + // Create the final upload URL, resolving the location against the base URL + guard let uploadURL = URL(string: locationString, relativeTo: baseRegistryURL) else { + Logger.error( + "Failed to create absolute upload URL from location: \(locationString) relative to base: \(baseRegistryURL.absoluteString)" + ) + throw PushError.invalidURL + } + Logger.info("Blob upload initiated. Upload URL: \(uploadURL.absoluteString)") - return uploadURL.absoluteURL // Ensure it's absolute + return uploadURL.absoluteURL // Ensure it's absolute } - + private func uploadBlob(url: URL, data: Data, digest: String, token: String) async throws { var components = URLComponents(url: url, resolvingAgainstBaseURL: true)! - + // Add digest parameter var queryItems = components.queryItems ?? [] queryItems.append(URLQueryItem(name: "digest", value: digest)) components.queryItems = queryItems - + guard let uploadURL = components.url else { throw PushError.invalidURL } - + var request = URLRequest(url: uploadURL) request.httpMethod = "PUT" request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") request.setValue("application/octet-stream", forHTTPHeaderField: "Content-Type") request.setValue("\(data.count)", forHTTPHeaderField: "Content-Length") request.httpBody = data - + let (_, response) = try await URLSession.shared.data(for: request) - + guard let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 201 else { throw PushError.blobUploadFailed } } - - private func pushManifest(repository: String, tag: String, manifest: Data, token: String) async throws { + + private func pushManifest(repository: String, tag: String, manifest: Data, token: String) + async throws + { let url = URL(string: "https://\(registry)/v2/\(repository)/manifests/\(tag)")! var request = URLRequest(url: url) request.httpMethod = "PUT" request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") - request.setValue("application/vnd.oci.image.manifest.v1+json", forHTTPHeaderField: "Content-Type") + request.setValue( + "application/vnd.oci.image.manifest.v1+json", forHTTPHeaderField: "Content-Type") request.httpBody = manifest - + let (_, response) = try await URLSession.shared.data(for: request) - + guard let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 201 else { throw PushError.manifestPushFailed } } - + private func getCredentialsFromEnvironment() -> (String?, String?) { - let username = ProcessInfo.processInfo.environment["GITHUB_USERNAME"] ?? - ProcessInfo.processInfo.environment["GHCR_USERNAME"] - let password = ProcessInfo.processInfo.environment["GITHUB_TOKEN"] ?? - ProcessInfo.processInfo.environment["GHCR_TOKEN"] + let username = + ProcessInfo.processInfo.environment["GITHUB_USERNAME"] + ?? ProcessInfo.processInfo.environment["GHCR_USERNAME"] + let password = + ProcessInfo.processInfo.environment["GITHUB_TOKEN"] + ?? ProcessInfo.processInfo.environment["GHCR_TOKEN"] return (username, password) } // Add these helper methods for dry-run and reassemble implementation - + // NEW Helper function using Compression framework and sparse writing - private func decompressChunkAndWriteSparse(inputPath: String, outputHandle: FileHandle, startOffset: UInt64) throws -> UInt64 { + private func decompressChunkAndWriteSparse( + inputPath: String, outputHandle: FileHandle, startOffset: UInt64 + ) throws -> UInt64 { guard FileManager.default.fileExists(atPath: inputPath) else { Logger.error("Compressed chunk not found at: \(inputPath)") - return 0 // Or throw an error + return 0 // Or throw an error } - let sourceData = try Data(contentsOf: URL(fileURLWithPath: inputPath), options: .alwaysMapped) + let sourceData = try Data( + contentsOf: URL(fileURLWithPath: inputPath), options: .alwaysMapped) var currentWriteOffset = startOffset var totalDecompressedBytes: UInt64 = 0 - var sourceReadOffset = 0 // Keep track of how much compressed data we've provided + var sourceReadOffset = 0 // Keep track of how much compressed data we've provided // Use the initializer with the readingFrom closure let filter = try InputFilter(.decompress, using: .lz4) { (length: Int) -> Data? in let bytesAvailable = sourceData.count - sourceReadOffset if bytesAvailable == 0 { - return nil // No more data + return nil // No more data } let bytesToRead = min(length, bytesAvailable) - let chunk = sourceData.subdata(in: sourceReadOffset ..< sourceReadOffset + bytesToRead) + let chunk = sourceData.subdata(in: sourceReadOffset..= 0.5) || (completedFiles == totalFiles) + let shouldUpdate = + (uploadedBytes <= bytes) || (elapsed >= 0.5) || (completedFiles == totalFiles) - if shouldUpdate && totalBytes > 0 { // Ensure totalBytes is set + if shouldUpdate && totalBytes > 0 { // Ensure totalBytes is set let currentSpeed = Double(uploadedBytes - lastUpdateBytes) / max(elapsed, 0.001) speedSamples.append(currentSpeed) @@ -3479,14 +3470,17 @@ actor UploadProgressTracker { peakSpeed = max(peakSpeed, currentSpeed) // Apply exponential smoothing - if smoothedSpeed == 0 { smoothedSpeed = currentSpeed } - else { smoothedSpeed = speedSmoothing * currentSpeed + (1 - speedSmoothing) * smoothedSpeed } + if smoothedSpeed == 0 { + smoothedSpeed = currentSpeed + } else { + smoothedSpeed = speedSmoothing * currentSpeed + (1 - speedSmoothing) * smoothedSpeed + } let recentAvgSpeed = calculateAverageSpeed() let totalElapsed = now.timeIntervalSince(startTime) let overallAvgSpeed = totalElapsed > 0 ? Double(uploadedBytes) / totalElapsed : 0 - let progress = totalBytes > 0 ? Double(uploadedBytes) / Double(totalBytes) : 1.0 // Avoid division by zero + let progress = totalBytes > 0 ? Double(uploadedBytes) / Double(totalBytes) : 1.0 // Avoid division by zero logSpeedProgress( current: progress, currentSpeed: currentSpeed, @@ -3494,7 +3488,7 @@ actor UploadProgressTracker { smoothedSpeed: smoothedSpeed, overallSpeed: overallAvgSpeed, peakSpeed: peakSpeed, - context: "Uploading Image" // Changed context + context: "Uploading Image" // Changed context ) lastUpdateTime = now @@ -3521,7 +3515,7 @@ actor UploadProgressTracker { let avgSpeed = totalElapsedTime > 0 ? Double(uploadedBytes) / totalElapsedTime : 0 return UploadStats( totalBytes: totalBytes, - uploadedBytes: uploadedBytes, // Renamed + uploadedBytes: uploadedBytes, // Renamed elapsedTime: totalElapsedTime, averageSpeed: avgSpeed, peakSpeed: peakSpeed @@ -3546,10 +3540,10 @@ actor UploadProgressTracker { let etaSeconds = speedForEta > 0 ? Double(remainingBytes) / speedForEta : 0 let etaStr = formatTimeRemaining(etaSeconds) let progressBar = createProgressBar(progress: current) - let fileProgress = "(\(completedFiles)/\(totalFiles))" // Add file count + let fileProgress = "(\(completedFiles)/\(totalFiles))" // Add file count print( - "\r\(progressBar) \(progressPercent)% \(fileProgress) | Speed: \(avgSpeedStr) (Avg) | ETA: \(etaStr) ", // Simplified output + "\r\(progressBar) \(progressPercent)% \(fileProgress) | Speed: \(avgSpeedStr) (Avg) | ETA: \(etaStr) ", // Simplified output terminator: "") fflush(stdout) } @@ -3566,7 +3560,10 @@ actor UploadProgressTracker { let units = ["B/s", "KB/s", "MB/s", "GB/s"] var speed = bytesPerSecond var unitIndex = 0 - while speed > 1024 && unitIndex < units.count - 1 { speed /= 1024; unitIndex += 1 } + while speed > 1024 && unitIndex < units.count - 1 { + speed /= 1024 + unitIndex += 1 + } return String(format: "%.1f %@", speed, units[unitIndex]) } private func formatTimeRemaining(_ seconds: Double) -> String { @@ -3574,8 +3571,10 @@ actor UploadProgressTracker { let hours = Int(seconds) / 3600 let minutes = (Int(seconds) % 3600) / 60 let secs = Int(seconds) % 60 - if hours > 0 { return String(format: "%d:%02d:%02d", hours, minutes, secs) } - else { return String(format: "%d:%02d", minutes, secs) } + if hours > 0 { + return String(format: "%d:%02d:%02d", hours, minutes, secs) + } else { + return String(format: "%d:%02d", minutes, secs) + } } } - From b77e58244c22575caaca3cd8735e8fdf394699d2 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 20:09:46 -0700 Subject: [PATCH 33/43] Fix first pull --- .../ImageContainerRegistry.swift | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index ddf5ea1f..d51d92b3 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1011,6 +1011,12 @@ class ImageContainerRegistry: @unchecked Sendable { // Move files to final location try FileManager.default.moveItem(at: tempVMDir, to: URL(fileURLWithPath: vmDir.dir.path)) + // If caching is disabled, clean up the cache entry + if !cachingEnabled { + Logger.info("Caching disabled - cleaning up temporary cache entry") + try? cleanupCacheEntry(manifestId: manifestId) + } + Logger.info("Download complete: Files extracted to \(vmDir.dir.path)") Logger.info( "Note: Actual disk usage is significantly lower than reported size due to macOS sparse file system" @@ -1020,6 +1026,16 @@ class ImageContainerRegistry: @unchecked Sendable { ) } + // Helper function to clean up a specific cache entry + private func cleanupCacheEntry(manifestId: String) throws { + let cacheDir = getImageCacheDirectory(manifestId: manifestId) + + if FileManager.default.fileExists(atPath: cacheDir.path) { + Logger.info("Removing cache entry for manifest ID: \(manifestId)") + try FileManager.default.removeItem(at: cacheDir) + } + } + // Shared function to handle disk image creation - can be used by both cache hit and cache miss paths private func createDiskImageFromSource( sourceURL: URL, // Source data to decompress From e2aff16432465464e6f8fec9e297ca0fe733344b Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 21:29:42 -0700 Subject: [PATCH 34/43] Fix first pull --- .../ImageContainerRegistry.swift | 43 ++----------------- 1 file changed, 4 insertions(+), 39 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index d51d92b3..8f59eff0 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -948,45 +948,10 @@ class ImageContainerRegistry: @unchecked Sendable { Logger.info("Using copyFromCache method to properly preserve partition tables") try await copyFromCache(manifest: manifest, manifestId: manifestId, to: tempVMDir) } else { - // If caching is disabled, just copy files directly to tempVMDir - Logger.info("Caching disabled - copying downloaded files directly to VM directory") - - // Copy non-disk files first - for file in ["config.json", "nvram.bin"] { - let sourceURL = tempDownloadDir.appendingPathComponent(file) - if FileManager.default.fileExists(atPath: sourceURL.path) { - try FileManager.default.copyItem( - at: sourceURL, - to: tempVMDir.appendingPathComponent(file) - ) - } - } - - // For the disk image, we have two cases - either a single file or parts - let diskURL = tempDownloadDir.appendingPathComponent("disk.img") - if FileManager.default.fileExists(atPath: diskURL.path) { - // Single file disk image - try FileManager.default.copyItem( - at: diskURL, - to: tempVMDir.appendingPathComponent("disk.img") - ) - Logger.info("Copied single disk.img file to VM directory") - } else { - // Multiple parts case - use the partitioned disk.img from reassembly - let diskParts = await diskPartsCollector.getSortedParts() - if !diskParts.isEmpty { - Logger.info("Using most recently assembled disk image for VM") - let assembledDiskURL = tempVMDir.appendingPathComponent("disk.img") - if FileManager.default.fileExists(atPath: assembledDiskURL.path) { - Logger.info("Assembled disk.img already exists in VM directory") - } else { - Logger.error( - "Could not find assembled disk image - VM may not boot properly") - } - } else { - Logger.error("No disk image found - VM may not boot properly") - } - } + // Even if caching is disabled, we need to use copyFromCache to assemble the disk image + // correctly with partition tables, then we'll clean up the cache afterward + Logger.info("Caching disabled - using temporary cache to assemble VM files") + try await copyFromCache(manifest: manifest, manifestId: manifestId, to: tempVMDir) } } From 1ebf14f30467576053f9275337e0cdcb5c96681b Mon Sep 17 00:00:00 2001 From: f-trycua Date: Mon, 21 Apr 2025 21:42:14 -0700 Subject: [PATCH 35/43] Fix first pull --- .../ImageContainerRegistry.swift | 57 +++++++++++++------ 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 8f59eff0..731a706a 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -1400,14 +1400,14 @@ class ImageContainerRegistry: @unchecked Sendable { async throws { Logger.info("Copying from cache...") - + // Define output URL and expected size variable scope here let outputURL = destination.appendingPathComponent("disk.img") - var expectedTotalSize: UInt64? = nil // Use optional to handle missing config + var expectedTotalSize: UInt64? = nil // Use optional to handle missing config // Instantiate collector let diskPartsCollector = DiskPartsCollector() - var lz4LayerCount = 0 // Count lz4 layers found + var lz4LayerCount = 0 // Count lz4 layers found // First identify disk parts and non-disk files for layer in manifest.layers { @@ -1415,14 +1415,21 @@ class ImageContainerRegistry: @unchecked Sendable { // Identify disk parts simply by media type if layer.mediaType == "application/octet-stream+lz4" { - lz4LayerCount += 1 // Increment count - // Add to collector. It will assign the sequential part number. - let collectorPartNum = await diskPartsCollector.addPart(url: cachedLayer) - Logger.info( - "Adding cached lz4 layer (part \(lz4LayerCount)) -> Collector #\(collectorPartNum): \(cachedLayer.lastPathComponent)" - ) - } else { - // --- Handle Non-Disk-Part Layer (from cache) --- + lz4LayerCount += 1 // Increment count + + // When caching is disabled, the file might not exist with the cache path name + // Check if the file exists before trying to use it + if !FileManager.default.fileExists(atPath: cachedLayer.path) { + Logger.info("Layer file not found in cache: \(cachedLayer.path) - skipping") + continue + } + + // Add to collector. It will assign the sequential part number. + let collectorPartNum = await diskPartsCollector.addPart(url: cachedLayer) + Logger.info("Adding cached lz4 layer (part \(lz4LayerCount)) -> Collector #\(collectorPartNum): \(cachedLayer.lastPathComponent)") + } + else { + // --- Handle Non-Disk-Part Layer (from cache) --- let fileName: String switch layer.mediaType { case "application/vnd.oci.image.config.v1+json": @@ -1430,14 +1437,21 @@ class ImageContainerRegistry: @unchecked Sendable { case "application/octet-stream": // Assume nvram if config layer exists, otherwise assume single disk image fileName = manifest.config != nil ? "nvram.bin" : "disk.img" - case "application/vnd.oci.image.layer.v1.tar", - "application/octet-stream+gzip": - // Assume disk image for these types as well if encountered in cache scenario - fileName = "disk.img" + case "application/vnd.oci.image.layer.v1.tar", + "application/octet-stream+gzip": + // Assume disk image for these types as well if encountered in cache scenario + fileName = "disk.img" default: - Logger.info("Skipping unsupported cached layer media type: \(layer.mediaType)") + Logger.info("Skipping unsupported cached layer media type: \(layer.mediaType)") continue } + + // When caching is disabled, the file might not exist with the cache path name + if !FileManager.default.fileExists(atPath: cachedLayer.path) { + Logger.info("Non-disk layer file not found in cache: \(cachedLayer.path) - skipping") + continue + } + // Copy the non-disk file directly from cache to destination try FileManager.default.copyItem( at: cachedLayer, @@ -1815,9 +1829,16 @@ class ImageContainerRegistry: @unchecked Sendable { try FileManager.default.moveItem(at: tempURL, to: url) progress.addProgress(Int64(httpResponse.expectedContentLength)) - // Cache the downloaded layer if caching is enabled - if cachingEnabled, let manifestId = manifestId { + // Always save a copy to the cache directory for use by copyFromCache, + // even if caching is disabled + if let manifestId = manifestId { let cachedLayer = getCachedLayerPath(manifestId: manifestId, digest: digest) + // Make sure cache directory exists + try FileManager.default.createDirectory( + at: cachedLayer.deletingLastPathComponent(), + withIntermediateDirectories: true + ) + if FileManager.default.fileExists(atPath: cachedLayer.path) { try FileManager.default.removeItem(at: cachedLayer) } From 2efbcb4f8f8f25b98c2a819591e94e3049a05211 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Tue, 22 Apr 2025 00:29:46 -0700 Subject: [PATCH 36/43] Remove push, pull old scripts --- libs/lume/scripts/ghcr/pull-ghcr.sh | 205 -------- libs/lume/scripts/ghcr/push-ghcr.sh | 745 ---------------------------- 2 files changed, 950 deletions(-) delete mode 100755 libs/lume/scripts/ghcr/pull-ghcr.sh delete mode 100755 libs/lume/scripts/ghcr/push-ghcr.sh diff --git a/libs/lume/scripts/ghcr/pull-ghcr.sh b/libs/lume/scripts/ghcr/pull-ghcr.sh deleted file mode 100755 index 8b10fae1..00000000 --- a/libs/lume/scripts/ghcr/pull-ghcr.sh +++ /dev/null @@ -1,205 +0,0 @@ -#!/bin/bash - -# Exit immediately if a command exits with a non-zero status -set -e - -# Default parameters -organization="" -image_name="" -image_version="" -target_folder_path="" - -# Parse the command line arguments -while [[ $# -gt 0 ]]; do - case "$1" in - --organization) - organization="$2" - shift 2 - ;; - --image-name) - image_name="$2" - shift 2 - ;; - --image-version) - image_version="$2" - shift 2 - ;; - --target-folder-path) - target_folder_path="$2" - shift 2 - ;; - --help) - echo "Usage: $0 [options]" - echo "Options:" - echo " --organization : GitHub organization (required)" - echo " --image-name : Name of the image to pull (required)" - echo " --image-version : Version of the image to pull (required)" - echo " --target-folder-path : Path where to extract the files (required)" - exit 0 - ;; - *) - echo "Unknown option: $1" - exit 1 - ;; - esac -done - -# Ensure required arguments -if [[ -z "$organization" || -z "$image_name" || -z "$image_version" || -z "$target_folder_path" ]]; then - echo "Error: Missing required arguments. Use --help for usage." - exit 1 -fi - -# Check and install required tools -for tool in "jq" "pv" "parallel"; do - if ! command -v "$tool" &> /dev/null; then - echo "$tool is not installed. Installing using Homebrew..." - if ! command -v brew &> /dev/null; then - echo "Homebrew is not installed. Please install Homebrew first: https://brew.sh/" - exit 1 - fi - brew install "$tool" - fi -done - -# Create target folder if it doesn't exist -mkdir -p "$target_folder_path" - -# Create a temporary directory for processing files -work_dir=$(mktemp -d) -echo "Working directory: $work_dir" -trap 'rm -rf "$work_dir"' EXIT - -# Registry details -REGISTRY="ghcr.io" -REPOSITORY="$organization/$image_name" -TAG="$image_version" - -# Get anonymous token -echo "Getting authentication token..." -curl -s "https://$REGISTRY/token?service=ghcr.io&scope=repository:$REPOSITORY:pull" -o "$work_dir/token.json" -TOKEN=$(cat "$work_dir/token.json" | jq -r ".token") - -if [ -z "$TOKEN" ] || [ "$TOKEN" = "null" ]; then - echo "Failed to obtain token" - exit 1 -fi - -echo "Token obtained successfully" - -# Fetch manifest -echo "Fetching manifest..." -MANIFEST_RESPONSE=$(curl -s \ - -H "Authorization: Bearer $TOKEN" \ - -H "Accept: application/vnd.oci.image.manifest.v1+json" \ - "https://$REGISTRY/v2/$REPOSITORY/manifests/$TAG") - -echo "Processing manifest..." - -# Create a directory for all files -cd "$work_dir" - -# Create a download function for parallel execution -download_layer() { - local media_type="$1" - local digest="$2" - local output_file="$3" - - echo "Downloading $output_file..." - curl -s -L \ - -H "Authorization: Bearer $TOKEN" \ - -H "Accept: $media_type" \ - "https://$REGISTRY/v2/$REPOSITORY/blobs/$digest" | \ - pv > "$output_file" -} -export -f download_layer -export TOKEN REGISTRY REPOSITORY - -# Process layers and create download jobs -echo "$MANIFEST_RESPONSE" | jq -c '.layers[]' | while read -r layer; do - media_type=$(echo "$layer" | jq -r '.mediaType') - digest=$(echo "$layer" | jq -r '.digest') - - # Skip empty layers - if [[ "$media_type" == "application/vnd.oci.empty.v1+json" ]]; then - continue - fi - - # Extract part information if present - if [[ $media_type =~ part\.number=([0-9]+)\;part\.total=([0-9]+) ]]; then - part_num="${BASH_REMATCH[1]}" - total_parts="${BASH_REMATCH[2]}" - echo "Found part $part_num of $total_parts" - output_file="disk.img.part.$part_num" - else - case "$media_type" in - "application/vnd.oci.image.layer.v1.tar") - output_file="disk.img" - ;; - "application/vnd.oci.image.config.v1+json") - output_file="config.json" - ;; - "application/octet-stream") - output_file="nvram.bin" - ;; - *) - echo "Unknown media type: $media_type" - continue - ;; - esac - fi - - # Add to download queue - echo "$media_type"$'\t'"$digest"$'\t'"$output_file" >> download_queue.txt -done - -# Download all files in parallel -echo "Downloading files in parallel..." -parallel --colsep $'\t' -a download_queue.txt download_layer {1} {2} {3} - -# Check if we have disk parts to reassemble -if ls disk.img.part.* 1> /dev/null 2>&1; then - echo "Found disk parts, reassembling..." - - # Get total parts from the first part's filename - first_part=$(ls disk.img.part.* | head -n 1) - total_parts=$(echo "$MANIFEST_RESPONSE" | jq -r '.layers[] | select(.mediaType | contains("part.total")) | .mediaType' | grep -o 'part\.total=[0-9]*' | cut -d= -f2 | head -n 1) - - echo "Total parts to reassemble: $total_parts" - - # Concatenate parts in order - echo "Reassembling disk image..." - { - for i in $(seq 1 "$total_parts"); do - part_file="disk.img.part.$i" - if [ -f "$part_file" ]; then - cat "$part_file" - else - echo "Error: Missing part $i" - exit 1 - fi - done - } | pv > "$target_folder_path/disk.img" - - echo "Disk image reassembled successfully" -else - # If no parts found, just copy disk.img if it exists - if [ -f disk.img ]; then - echo "Copying disk image..." - pv disk.img > "$target_folder_path/disk.img" - fi -fi - -# Copy config.json if it exists -if [ -f config.json ]; then - echo "Copying config.json..." - cp config.json "$target_folder_path/" -fi - -# Copy nvram.bin if it exists -if [ -f nvram.bin ]; then - echo "Copying nvram.bin..." - cp nvram.bin "$target_folder_path/" -fi - -echo "Download complete: Files extracted to $target_folder_path" \ No newline at end of file diff --git a/libs/lume/scripts/ghcr/push-ghcr.sh b/libs/lume/scripts/ghcr/push-ghcr.sh deleted file mode 100755 index d279be66..00000000 --- a/libs/lume/scripts/ghcr/push-ghcr.sh +++ /dev/null @@ -1,745 +0,0 @@ -#!/bin/bash - -# Exit immediately if a command exits with a non-zero status -set -e - -# Default parameters -organization="" -folder_path="" -image_name="" -image_versions="" -chunk_size="512M" # Default chunk size for splitting large files -dry_run=true # Default: actually push to registry -reassemble=true # Default: don't reassemble in dry-run mode -# Define the OCI media type for the compressed disk layer -oci_layer_media_type="application/octet-stream+lz4" # LZ4 compression format - -# Parse the command line arguments -while [[ $# -gt 0 ]]; do - case "$1" in - --organization) - organization="$2" - shift 2 - ;; - --folder-path) - folder_path="$2" - shift 2 - ;; - --image-name) - image_name="$2" - shift 2 - ;; - --image-versions) - image_versions="$2" - shift 2 - ;; - --chunk-size) - chunk_size="$2" - shift 2 - ;; - --dry-run) - dry_run=true - shift 1 - ;; - --reassemble) - reassemble=true - dry_run=true # Reassemble implies dry-run - shift 1 - ;; - --help) - echo "Usage: $0 [options]" - echo "Options:" - echo " --organization : GitHub organization (required if not using token)" - echo " --folder-path : Path to the folder to upload (required)" - echo " --image-name : Name of the image to publish (required)" - echo " --image-versions : Comma separated list of versions of the image to publish (required)" - echo " --chunk-size : Size of chunks for large files (e.g., 512M, default: 512M)" - echo " --dry-run : Prepare files but don't upload to registry" - echo " --reassemble : In dry-run mode, also reassemble chunks to verify integrity" - echo "Note: The script will automatically resume from the last attempt if available" - exit 0 - ;; - *) - echo "Unknown option: $1" - exit 1 - ;; - esac -done - -# Ensure required arguments -if [[ -z "$folder_path" ]]; then - echo "Error: Missing required folder-path argument. Use --help for usage." - exit 1 -fi - -# Only check organization and other push parameters if not in dry-run mode -if [[ "$dry_run" = false ]]; then - if [[ -z "$organization" || -z "$image_name" || -z "$image_versions" ]]; then - echo "Error: Missing required arguments for push. Use --help for usage." - exit 1 - fi - - # Check if the GITHUB_TOKEN variable is set - if [[ -z "$GITHUB_TOKEN" ]]; then - echo "Error: GITHUB_TOKEN is not set." - exit 1 - fi -fi - -# Ensure the folder exists -if [[ ! -d "$folder_path" ]]; then - echo "Error: Folder $folder_path does not exist." - exit 1 -fi - -# Check and install required tools -for tool in "oras" "split" "pv" "jq" "lz4"; do - if ! command -v "$tool" &> /dev/null; then - echo "$tool is not installed. Installing using Homebrew..." - if ! command -v brew &> /dev/null; then - echo "Homebrew is not installed. Please install Homebrew first: https://brew.sh/" - exit 1 - fi - brew install "$tool" - fi -done - -echo "LZ4 detected - will use for efficient compression and decompression" -compressed_ext=".lz4" - -# Authenticate with GitHub Container Registry if not in dry-run mode -if [[ "$dry_run" = false ]]; then - echo "$GITHUB_TOKEN" | oras login ghcr.io -u "$organization" --password-stdin -fi - -# Use the source folder path as the working directory and get its absolute path -work_dir=$(cd "$folder_path" && pwd) -echo "Working directory: $work_dir" - -# Function to find the most recent cache directory -find_latest_cache() { - local latest_cache=$(ls -td "$work_dir"/.ghcr_cache_* 2>/dev/null | head -n1) - if [ -n "$latest_cache" ]; then - echo "$latest_cache" - else - echo "" - fi -} - -# Function to check if a cache directory is valid for resuming -is_valid_cache() { - local cache_dir="$1" - # Check if it contains the necessary files - [ -f "$cache_dir/config.json" ] || [ -f "$cache_dir/nvram.bin" ] || \ - [ -f "$cache_dir/disk.img.lz4" ] || ls "$cache_dir"/disk.img.part.* 1>/dev/null 2>&1 -} - -# Always try to find and use an existing cache -existing_cache=$(find_latest_cache) -if [ -n "$existing_cache" ] && is_valid_cache "$existing_cache"; then - cache_dir="$existing_cache" - - # Check if the cache contains old compressed format - if [ -f "$cache_dir/disk.img.gz" ] || [ -f "$cache_dir/disk.img.aa" ] || ls "$cache_dir"/disk.img.*.part.* 1>/dev/null 2>&1; then - echo "Error: Found legacy compressed format in cache. This script uses improved LZ4 format." - echo "Please delete the cache directory and start fresh: $cache_dir" - exit 1 - fi - - echo "Resuming from existing cache: $cache_dir" -else - echo "No valid cache found. Starting fresh." - cache_dir="$work_dir/.ghcr_cache_$(date +%Y%m%d_%H%M%S)" - mkdir -p "$cache_dir" -fi - -echo "Using cache directory: $cache_dir" - -# Display space information -echo "=== DISK SPACE INFORMATION ===" -df -h "$cache_dir" | head -1 -df -h "$cache_dir" | grep -v "Filesystem" -echo - -# Change to the cache directory -cd "$cache_dir" -files=() # Initialize files array here - -# Function to check if a version was already pushed -version_pushed() { - local version="$1" - local version_file="$cache_dir/.pushed_$version" - [ -f "$version_file" ] -} - -# Function to mark a version as pushed -mark_version_pushed() { - local version="$1" - touch "$cache_dir/.pushed_$version" -} - -# Function to calculate sha256 hash -calculate_sha256() { - local file="$1" - if command -v shasum &> /dev/null; then - shasum -a 256 "$file" | awk '{print "sha256:" $1}' - else - echo "sha256:$(openssl dgst -sha256 -binary "$file" | xxd -p | tr -d '\n')" - fi -} - -# Copy config.json if it exists and not already in cache -config_json_source="$folder_path/config.json" -config_json_dest="$cache_dir/config.json" -if [ -f "$config_json_source" ]; then - if [ ! -f "$config_json_dest" ]; then - echo "Copying config.json..." - # Copy config.json as is - we'll add annotations later - cp "$config_json_source" "$config_json_dest" - fi -fi -if [ -f "$config_json_dest" ]; then - files+=("config.json:application/vnd.oci.image.config.v1+json") -fi - -# Copy nvram.bin if it exists and not already in cache -if [ -f "$folder_path/nvram.bin" ] && [ ! -f "$cache_dir/nvram.bin" ]; then - echo "Copying nvram.bin..." - cp "$folder_path/nvram.bin" nvram.bin -fi -if [ -f "$cache_dir/nvram.bin" ]; then - files+=("nvram.bin:application/octet-stream") -fi - -# Process disk.img if it exists -disk_img_orig="$folder_path/disk.img" -original_disk_size="" -if [ -f "$disk_img_orig" ]; then - # Get original size *before* compression - original_disk_size=$(stat -f%z "$disk_img_orig") - - # Get real (non-sparse) size - real_size=$(du -k "$disk_img_orig" | cut -f1) - real_size_bytes=$((real_size * 1024)) - sparseness_ratio=$(echo "scale=2; $original_disk_size / $real_size_bytes" | bc) - echo "Disk image: $disk_img_orig" - echo " Logical size: $original_disk_size bytes ($(du -h "$disk_img_orig" | cut -f1))" - echo " Actual disk usage: $((real_size_bytes / 1073741824)) GB" - echo " Sparseness ratio: ${sparseness_ratio}:1" - - # If we have config.json, update it with the uncompressed disk size annotation - if [ -f "$config_json_dest" ] && command -v jq &> /dev/null; then - echo "Adding uncompressed disk size annotation: $original_disk_size bytes" - jq --arg size "$original_disk_size" '.annotations = (.annotations // {}) + {"com.trycua.lume.disk.uncompressed_size": $size}' "$config_json_dest" > "$config_json_dest.tmp" - mv "$config_json_dest.tmp" "$config_json_dest" - fi - - # Create a temporary directory for disk processing - tmp_dir="$cache_dir/tmp_processing" - mkdir -p "$tmp_dir" - - # Split the disk image into chunks first (before compression) - split_parts_dir="$tmp_dir/split_parts" - mkdir -p "$split_parts_dir" - - # Check if we already have split parts - if [ -z "$(ls -A "$split_parts_dir" 2>/dev/null)" ]; then - echo "Splitting disk image into chunks of $chunk_size..." - cd "$split_parts_dir" - pv "$disk_img_orig" | split -b "$chunk_size" - "chunk." - cd "$cache_dir" - else - echo "Using existing split chunks from previous run" - fi - - # Process each chunk (compress, calculate digest, etc.) - compressed_parts_dir="$tmp_dir/compressed_parts" - mkdir -p "$compressed_parts_dir" - - # Store layer information in an array - layers=() - part_num=0 - total_parts=$(ls "$split_parts_dir"/chunk.* | wc -l) - - for chunk_file in "$split_parts_dir"/chunk.*; do - part_basename=$(basename "$chunk_file") - part_num=$((part_num + 1)) - compressed_file="$compressed_parts_dir/${part_basename}${compressed_ext}" - - if [ ! -f "$compressed_file" ]; then - echo "Compressing chunk $part_num of $total_parts: $part_basename" - - # Calculate uncompressed content digest before compression - uncompressed_digest=$(calculate_sha256 "$chunk_file") - - # Get uncompressed size - uncompressed_size=$(stat -f%z "$chunk_file") - - # Compress the chunk with LZ4 - lz4 -9 "$chunk_file" "$compressed_file" - - # Get compressed size - compressed_size=$(stat -f%z "$compressed_file") - - echo "Chunk $part_num: Original size: $(du -h "$chunk_file" | cut -f1), Compressed: $(du -h "$compressed_file" | cut -f1)" - else - echo "Using existing compressed chunk $part_num of $total_parts" - - # Need to calculate these values for existing files - uncompressed_digest=$(calculate_sha256 "$chunk_file") - uncompressed_size=$(stat -f%z "$chunk_file") - compressed_size=$(stat -f%z "$compressed_file") - fi - - # Store layer information - layer_info="$compressed_file:${oci_layer_media_type};uncompressed_size=$uncompressed_size;uncompressed_digest=$uncompressed_digest;part.number=$part_num;part.total=$total_parts" - layers+=("$layer_info") - done - - # Generate the files array for ORAS push - for layer_info in "${layers[@]}"; do - files+=("$layer_info") - done - - # --- Reassembly in dry-run mode --- - if [[ "$reassemble" = true ]]; then - echo "=== REASSEMBLY MODE ===" - echo "Reassembling chunks to verify integrity..." - - # Create a directory for reassembly - reassembly_dir="$cache_dir/reassembly" - mkdir -p "$reassembly_dir" - - # Prepare the reassembled file - create a properly sized sparse file first - reassembled_file="$reassembly_dir/reassembled_disk.img" - if [ -f "$reassembled_file" ]; then - echo "Removing previous reassembled file..." - rm -f "$reassembled_file" - fi - - # Get the original disk size from config annotation or directly from image - if [ -f "$config_json_dest" ] && command -v jq &> /dev/null; then - config_size=$(jq -r '.annotations."com.trycua.lume.disk.uncompressed_size" // empty' "$config_json_dest") - if [ -n "$config_size" ]; then - original_disk_size_bytes=$config_size - echo "Using uncompressed size from config: $original_disk_size_bytes bytes" - fi - fi - - # Create a sparse file of the exact original size - echo "Pre-allocating sparse file of $(du -h "$disk_img_orig" | cut -f1)..." - dd if=/dev/zero of="$reassembled_file" bs=1 count=0 seek=$original_disk_size - - # Make sure filesystem recognizes this as a sparse file - if [[ "$OSTYPE" == "darwin"* ]]; then - # On macOS, we can use a better sparse file creation method if mkfile is available - if command -v mkfile &> /dev/null; then - rm -f "$reassembled_file" - mkfile -n ${original_disk_size}b "$reassembled_file" - echo "Created sparse file using mkfile" - fi - else - # On Linux systems, ensure sparseness with truncate if available - if command -v truncate &> /dev/null; then - rm -f "$reassembled_file" - truncate -s $original_disk_size "$reassembled_file" - echo "Created sparse file using truncate" - fi - fi - - # Create an offset tracker to keep track of where each chunk should go - current_offset=0 - - # Decompress each chunk and write it at the correct offset - for ((i=1; i<=total_parts; i++)); do - # Find the chunk file for part number i - chunk_pattern="" - chunk_uncompressed_size="" - - for layer_info in "${layers[@]}"; do - if [[ "$layer_info" == *";part.number=$i;"* ]]; then - chunk_pattern="${layer_info%%:*}" - # Extract the uncompressed size from metadata - if [[ "$layer_info" =~ uncompressed_size=([0-9]+) ]]; then - chunk_uncompressed_size="${BASH_REMATCH[1]}" - fi - break - fi - done - - if [ -z "$chunk_pattern" ]; then - echo "Error: Could not find chunk for part $i" - exit 1 - fi - - echo "Processing part $i/$total_parts: $(basename "$chunk_pattern") at offset $current_offset..." - - # Create temp decompressed file - temp_decompressed="$reassembly_dir/temp_part_$i" - lz4 -d -f "$chunk_pattern" "$temp_decompressed" || { - echo "Error decompressing part $i" - exit 1 - } - - # Check if this chunk is all zeros (sparse data) - # Only check the first 1MB for efficiency - is_likely_sparse=false - if command -v hexdump &> /dev/null; then - # Use hexdump to check a sample of the file for non-zero content - sparse_check=$(hexdump -n 1048576 -v "$temp_decompressed" | grep -v "0000 0000 0000 0000 0000 0000 0000 0000" | head -n 1) - if [ -z "$sparse_check" ]; then - echo "Chunk appears to be all zeros (sparse data)" - is_likely_sparse=true - fi - fi - - # Use dd to write the chunk at the correct offset with sparse file handling - if [ "$is_likely_sparse" = true ]; then - # For sparse chunks, we don't need to write anything - leave as zeros - echo "Skipping write for all-zero chunk (preserving sparseness)" - elif [[ "$OSTYPE" == "darwin"* ]]; then - # macOS dd doesn't support conv=sparse, use standard approach - dd if="$temp_decompressed" of="$reassembled_file" bs=1M conv=notrunc seek=$((current_offset / 1024 / 1024)) status=progress || { - echo "Error writing part $i at offset $current_offset" - exit 1 - } - else - # On Linux, use conv=sparse to preserve sparseness during the write - dd if="$temp_decompressed" of="$reassembled_file" bs=1M conv=sparse,notrunc seek=$((current_offset / 1024 / 1024)) status=progress || { - echo "Error writing part $i at offset $current_offset" - exit 1 - } - fi - - # Clean up the temporary file - rm -f "$temp_decompressed" - - # Update the offset for the next chunk - current_offset=$((current_offset + chunk_uncompressed_size)) - done - - # After all chunks are processed, ensure sparseness is preserved - if command -v cp &> /dev/null && [[ "$OSTYPE" == "darwin"* ]]; then - echo "Copying disk image to maintain sparseness..." - final_sparse_file="$reassembly_dir/final_disk.img" - rm -f "$final_sparse_file" 2>/dev/null - - # On macOS, use cp with the clone flag to preserve sparseness - cp -c "$reassembled_file" "$final_sparse_file" - - # Use the sparse-optimized file for verification - echo "Using sparse-optimized copy for verification" - mv "$final_sparse_file" "$reassembled_file" - sync - elif command -v cp &> /dev/null && command -v file &> /dev/null; then - # For Linux systems - echo "Optimizing file sparseness..." - final_sparse_file="$reassembly_dir/final_disk.img" - rm -f "$final_sparse_file" 2>/dev/null - - # Use cp --sparse=always on Linux - cp --sparse=always "$reassembled_file" "$final_sparse_file" - - # Use the sparse-optimized file for verification - echo "Using sparse-optimized copy for verification" - mv "$final_sparse_file" "$reassembled_file" - sync - fi - - # Make sure to sync to disk - sync - - # Calculate digests for comparison - echo "Verifying reassembled file..." - original_digest=$(calculate_sha256 "$disk_img_orig") - reassembled_digest=$(calculate_sha256 "$reassembled_file") - - # Compare the original and reassembled file sizes - original_size=$(stat -f%z "$disk_img_orig") - reassembled_size=$(stat -f%z "$reassembled_file") - - echo "Results:" - echo " Original size: $(du -h "$disk_img_orig" | cut -f1) ($original_size bytes)" - echo " Reassembled size: $(du -h "$reassembled_file" | cut -f1) ($reassembled_size bytes)" - echo " Original digest: ${original_digest#sha256:}" - echo " Reassembled digest: ${reassembled_digest#sha256:}" - - # Check if the disk is sparse - original_apparent_size=$(du -h "$disk_img_orig" | cut -f1) - original_actual_size=$(du -sh "$disk_img_orig" | cut -f1) - reassembled_apparent_size=$(du -h "$reassembled_file" | cut -f1) - reassembled_actual_size=$(du -sh "$reassembled_file" | cut -f1) - - echo " Original: Apparent size: $original_apparent_size, Actual disk usage: $original_actual_size" - echo " Reassembled: Apparent size: $reassembled_apparent_size, Actual disk usage: $reassembled_actual_size" - - if [ "$original_digest" = "$reassembled_digest" ]; then - echo "✅ VERIFICATION SUCCESSFUL: Files are identical" - else - echo "❌ VERIFICATION FAILED: Files differ" - if [ "$original_size" != "$reassembled_size" ]; then - echo " Size mismatch: Original $original_size bytes, Reassembled $reassembled_size bytes" - fi - - # Try to identify where they differ - echo "Attempting to identify differences..." - if command -v cmp &> /dev/null; then - cmp_output=$(cmp -l "$disk_img_orig" "$reassembled_file" 2>&1 | head -5) - if [[ "$cmp_output" == *"differ"* ]]; then - echo " First few differences:" - echo "$cmp_output" - fi - fi - - # Check if the virtual machine will still boot despite differences - echo "NOTE: This might be a sparse file issue. The content may be identical, but sparse regions" - echo " may be handled differently between the original and reassembled files." - - # Calculate a percentage comparison of used blocks - # This helps determine if the sparse issues are severe or minor - original_used_kb=$(du -k "$disk_img_orig" | cut -f1) - reassembled_used_kb=$(du -k "$reassembled_file" | cut -f1) - - # Calculate percentage difference in used space - if [ "$original_used_kb" -ne 0 ]; then - diff_percentage=$(echo "scale=2; ($reassembled_used_kb - $original_used_kb) * 100 / $original_used_kb" | bc) - echo " Disk usage difference: $diff_percentage% ($reassembled_used_kb KB vs $original_used_kb KB)" - - # If reassembled is much smaller, this likely indicates sparse regions weren't preserved - if (( $(echo "$diff_percentage < -40" | bc -l) )); then - echo " ⚠️ WARNING: Reassembled disk uses significantly less space (>40% difference)." - echo " This indicates sparse regions weren't properly preserved and may affect VM functionality." - echo " The VM might boot but could be missing applications or data." - elif (( $(echo "$diff_percentage < -10" | bc -l) )); then - echo " ⚠️ WARNING: Reassembled disk uses less space (10-40% difference)." - echo " Some sparse regions may not be properly preserved but VM might still function correctly." - elif (( $(echo "$diff_percentage > 10" | bc -l) )); then - echo " ⚠️ WARNING: Reassembled disk uses more space (>10% difference)." - echo " This is unusual and may indicate improper sparse file handling." - else - echo " ✓ Disk usage difference is minimal (<10%). VM likely to function correctly." - fi - fi - fi - - echo "Reassembled file is available at: $reassembled_file" - - # If verification failed and difference is significant, try a direct copy as fallback - if [ "$original_digest" != "$reassembled_digest" ] && [ -n "$diff_percentage" ] && (( $(echo "$diff_percentage < -20" | bc -l) )); then - echo - echo "===== ATTEMPTING RECOVERY ACTION =====" - echo "Since verification failed with significant disk usage difference," - echo "trying direct copy of disk image as a fallback method." - echo - - fallback_file="$reassembly_dir/fallback_disk.img" - echo "Creating fallback disk image at: $fallback_file" - - # Use rsync with sparse option if available - if command -v rsync &> /dev/null; then - echo "Using rsync with sparse option for direct copy..." - rsync -aS --progress "$disk_img_orig" "$fallback_file" - else - # Direct cp with sparse option if available - if [[ "$OSTYPE" == "darwin"* ]]; then - echo "Using cp -c (clone) for direct copy..." - cp -c "$disk_img_orig" "$fallback_file" - else - echo "Using cp --sparse=always for direct copy..." - cp --sparse=always "$disk_img_orig" "$fallback_file" - fi - fi - - echo "Direct copy completed. You may want to try using this fallback disk image" - echo "instead if the reassembled one has issues: $fallback_file" - fi - fi - - # --- Push Logic --- - if [[ "$dry_run" = true ]]; then - echo "=== DRY RUN MODE ===" - echo "The following files would be pushed to the registry:" - for file_info in "${files[@]}"; do - file_path="${file_info%%:*}" - file_metadata="${file_info#*:}" - file_size=$(du -h "$file_path" | cut -f1) - echo " - $file_path ($file_size) with metadata: $file_metadata" - done - - if [[ -n "$image_versions" ]]; then - echo "Would push to the following versions:" - IFS=',' read -ra versions <<< "$image_versions" - for version in "${versions[@]}"; do - version=$(echo "$version" | xargs) - if [[ -z "$version" ]]; then continue; fi - echo " - ghcr.io/$organization/$image_name:$version" - done - else - echo "No versions specified for dry run. Processing completed successfully." - fi - - echo "All processing tasks completed. No actual push performed." - echo "Cache directory: $cache_dir" - exit 0 - fi - - # Regular push logic (non-dry-run) - push_pids=() - IFS=',' read -ra versions <<< "$image_versions" - for version in "${versions[@]}"; do - # Trim whitespace if any from version splitting - version=$(echo "$version" | xargs) - if [[ -z "$version" ]]; then continue; fi - - # Skip if version was already pushed - if version_pushed "$version"; then - echo "Version $version was already pushed, skipping..." - continue - fi - - echo "Pushing version $version..." - ( - # Use process substitution to feed file list safely if it gets long - oras push --disable-path-validation \ - "ghcr.io/$organization/$image_name:$version" \ - "${files[@]}" - echo "Completed push for version $version" - mark_version_pushed "$version" - ) & - push_pids+=($!) - done - - # Wait for all pushes to complete - for pid in "${push_pids[@]}"; do - wait "$pid" - done - - # --- Cleanup only if all versions were pushed successfully --- - all_versions_pushed=true - for version in "${versions[@]}"; do - version=$(echo "$version" | xargs) - if [[ -z "$version" ]]; then continue; fi - if ! version_pushed "$version"; then - all_versions_pushed=false - break - fi - done - - if [ "$all_versions_pushed" = true ]; then - echo "All versions pushed successfully, cleaning up cache directory..." - cd "$work_dir" - rm -rf "$cache_dir" - else - echo "Some versions failed to push. Cache directory preserved at: $cache_dir" - echo "Run again to resume from this point" - fi - -else - echo "Warning: $disk_img_orig not found." - - # If in dry run mode, just show what would happen - if [[ "$dry_run" = true ]]; then - echo "=== DRY RUN MODE ===" - if [ ${#files[@]} -gt 0 ]; then - echo "The following non-disk files would be pushed:" - for file_info in "${files[@]}"; do - file_path="${file_info%%:*}" - file_metadata="${file_info#*:}" - file_size=$(du -h "$file_path" | cut -f1) - echo " - $file_path ($file_size) with metadata: $file_metadata" - done - else - echo "No files found to push." - fi - echo "All processing tasks completed. No actual push performed." - exit 0 - fi - - # Push only config/nvram if they exist - if [ ${#files[@]} -gt 0 ]; then - echo "Pushing non-disk files..." - push_pids=() - IFS=',' read -ra versions <<< "$image_versions" - for version in "${versions[@]}"; do - # Trim whitespace if any from version splitting - version=$(echo "$version" | xargs) - if [[ -z "$version" ]]; then continue; fi - - # Skip if version was already pushed - if version_pushed "$version"; then - echo "Version $version was already pushed, skipping..." - continue - fi - - echo "Pushing version $version (config/nvram only)..." - ( - oras push --disable-path-validation \ - "ghcr.io/$organization/$image_name:$version" \ - "${files[@]}" - echo "Completed push for version $version" - mark_version_pushed "$version" - ) & - push_pids+=($!) - done - - # Wait for all pushes to complete - for pid in "${push_pids[@]}"; do - wait "$pid" - done - - # --- Cleanup only if all versions were pushed successfully --- - all_versions_pushed=true - for version in "${versions[@]}"; do - version=$(echo "$version" | xargs) - if [[ -z "$version" ]]; then continue; fi - if ! version_pushed "$version"; then - all_versions_pushed=false - break - fi - done - - if [ "$all_versions_pushed" = true ]; then - echo "All non-disk versions pushed successfully, cleaning up cache directory..." - cd "$work_dir" - rm -rf "$cache_dir" - else - echo "Some non-disk versions failed to push. Cache directory preserved at: $cache_dir" - echo "Run again to resume from this point" - fi - else - echo "No files found to push." - cd "$work_dir" - rm -rf "$cache_dir" - exit 1 - fi -fi - -# Skip final status check in dry-run mode -if [[ "$dry_run" = true ]]; then - exit 0 -fi - -# Determine final status based on the success check *before* potential cleanup -echo # Add a newline for better readability -if [ "$all_versions_pushed" = true ]; then - echo "All versions pushed successfully:" - for version in "${versions[@]}"; do - version=$(echo "$version" | xargs) - if [[ -z "$version" ]]; then continue; fi - echo " Upload complete: ghcr.io/$organization/$image_name:$version" - done -else - echo "Final upload status:" - for version in "${versions[@]}"; do - version=$(echo "$version" | xargs) - if [[ -z "$version" ]]; then continue; fi - # Check the marker file only if the overall process failed (cache preserved) - if version_pushed "$version"; then - echo " Upload complete: ghcr.io/$organization/$image_name:$version" - else - echo " Upload failed: ghcr.io/$organization/$image_name:$version" - fi - done - # Exit with error code if any version failed - exit 1 -fi From 6092a51844a980344896c458ae17667e5c81903a Mon Sep 17 00:00:00 2001 From: f-trycua Date: Tue, 22 Apr 2025 13:09:03 -0700 Subject: [PATCH 37/43] Fix auth for public images --- .../ImageContainerRegistry.swift | 143 +++++++++--------- 1 file changed, 75 insertions(+), 68 deletions(-) diff --git a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift index 731a706a..ee4375f0 100644 --- a/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift +++ b/libs/lume/src/ContainerRegistry/ImageContainerRegistry.swift @@ -677,7 +677,8 @@ class ImageContainerRegistry: @unchecked Sendable { // Get anonymous token Logger.info("Getting registry authentication token") - let token = try await getToken(repository: "\(self.organization)/\(imageName)") + let token = try await getToken( + repository: "\(self.organization)/\(imageName)", scopes: ["pull"]) // Fetch manifest Logger.info("Fetching Image manifest") @@ -1400,14 +1401,14 @@ class ImageContainerRegistry: @unchecked Sendable { async throws { Logger.info("Copying from cache...") - + // Define output URL and expected size variable scope here let outputURL = destination.appendingPathComponent("disk.img") - var expectedTotalSize: UInt64? = nil // Use optional to handle missing config + var expectedTotalSize: UInt64? = nil // Use optional to handle missing config // Instantiate collector let diskPartsCollector = DiskPartsCollector() - var lz4LayerCount = 0 // Count lz4 layers found + var lz4LayerCount = 0 // Count lz4 layers found // First identify disk parts and non-disk files for layer in manifest.layers { @@ -1415,21 +1416,22 @@ class ImageContainerRegistry: @unchecked Sendable { // Identify disk parts simply by media type if layer.mediaType == "application/octet-stream+lz4" { - lz4LayerCount += 1 // Increment count - - // When caching is disabled, the file might not exist with the cache path name - // Check if the file exists before trying to use it - if !FileManager.default.fileExists(atPath: cachedLayer.path) { - Logger.info("Layer file not found in cache: \(cachedLayer.path) - skipping") - continue - } - - // Add to collector. It will assign the sequential part number. - let collectorPartNum = await diskPartsCollector.addPart(url: cachedLayer) - Logger.info("Adding cached lz4 layer (part \(lz4LayerCount)) -> Collector #\(collectorPartNum): \(cachedLayer.lastPathComponent)") - } - else { - // --- Handle Non-Disk-Part Layer (from cache) --- + lz4LayerCount += 1 // Increment count + + // When caching is disabled, the file might not exist with the cache path name + // Check if the file exists before trying to use it + if !FileManager.default.fileExists(atPath: cachedLayer.path) { + Logger.info("Layer file not found in cache: \(cachedLayer.path) - skipping") + continue + } + + // Add to collector. It will assign the sequential part number. + let collectorPartNum = await diskPartsCollector.addPart(url: cachedLayer) + Logger.info( + "Adding cached lz4 layer (part \(lz4LayerCount)) -> Collector #\(collectorPartNum): \(cachedLayer.lastPathComponent)" + ) + } else { + // --- Handle Non-Disk-Part Layer (from cache) --- let fileName: String switch layer.mediaType { case "application/vnd.oci.image.config.v1+json": @@ -1437,21 +1439,22 @@ class ImageContainerRegistry: @unchecked Sendable { case "application/octet-stream": // Assume nvram if config layer exists, otherwise assume single disk image fileName = manifest.config != nil ? "nvram.bin" : "disk.img" - case "application/vnd.oci.image.layer.v1.tar", - "application/octet-stream+gzip": - // Assume disk image for these types as well if encountered in cache scenario - fileName = "disk.img" + case "application/vnd.oci.image.layer.v1.tar", + "application/octet-stream+gzip": + // Assume disk image for these types as well if encountered in cache scenario + fileName = "disk.img" default: - Logger.info("Skipping unsupported cached layer media type: \(layer.mediaType)") + Logger.info("Skipping unsupported cached layer media type: \(layer.mediaType)") continue } - + // When caching is disabled, the file might not exist with the cache path name if !FileManager.default.fileExists(atPath: cachedLayer.path) { - Logger.info("Non-disk layer file not found in cache: \(cachedLayer.path) - skipping") + Logger.info( + "Non-disk layer file not found in cache: \(cachedLayer.path) - skipping") continue } - + // Copy the non-disk file directly from cache to destination try FileManager.default.copyItem( at: cachedLayer, @@ -1691,51 +1694,45 @@ class ImageContainerRegistry: @unchecked Sendable { Logger.info("Cache copy complete") } - private func getToken(repository: String) async throws -> String { - let encodedRepo = - repository.addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed) ?? repository - // Request both pull and push scope for uploads + private func getToken(repository: String, scopes: [String] = ["pull", "push"]) async throws + -> String + { + let encodedRepo = repository.addingPercentEncoding(withAllowedCharacters: .urlHostAllowed)! + + // Build scope string from scopes array + let scopeString = scopes.joined(separator: ",") + let url = URL( string: - "https://\(self.registry)/token?scope=repository:\(encodedRepo):pull,push&service=\(self.registry)" + "https://\(self.registry)/token?scope=repository:\(encodedRepo):\(scopeString)&service=\(self.registry)" )! var request = URLRequest(url: url) - request.httpMethod = "GET" // Token endpoint uses GET - request.setValue("application/json", forHTTPHeaderField: "Accept") + request.httpMethod = "GET" - // *** Add Basic Authentication Header if credentials exist *** - let (username, password) = getCredentialsFromEnvironment() - if let username = username, let password = password, !username.isEmpty, !password.isEmpty { - let authString = "\(username):\(password)" - if let authData = authString.data(using: .utf8) { - let base64Auth = authData.base64EncodedString() - request.setValue("Basic \(base64Auth)", forHTTPHeaderField: "Authorization") - Logger.info("Adding Basic Authentication header to token request.") - } else { - Logger.error("Failed to encode credentials for Basic Auth.") + let session = URLSession.shared + let (data, response) = try await session.data(for: request) + + if let httpResponse = response as? HTTPURLResponse { + if httpResponse.statusCode != 200 { + // If we get 403 and we're requesting both pull and push, retry with just pull + if httpResponse.statusCode == 403 && scopes.contains("push") + && scopes.contains("pull") + { + return try await getToken(repository: repository, scopes: ["pull"]) + } + + // For pull scope only, if authentication fails, assume this is a public image + // and continue without a token (empty string) + if scopes == ["pull"] { + Logger.info( + "Authentication failed for pull scope, assuming public image and continuing without token" + ) + return "" + } + + throw PushError.authenticationFailed } - } else { - Logger.info("No credentials found in environment for token request.") - // Allow anonymous request for pull scope, but push scope likely requires auth - } - // *** End Basic Auth addition *** - - let (data, response) = try await URLSession.shared.data(for: request) - - // Check response status code *before* parsing JSON - guard let httpResponse = response as? HTTPURLResponse else { - throw PushError.authenticationFailed // Or a more generic network error - } - - guard httpResponse.statusCode == 200 else { - // Log detailed error including status code and potentially response body - let responseBody = String(data: data, encoding: .utf8) ?? "(Could not decode body)" - Logger.error( - "Token request failed with status code: \(httpResponse.statusCode). Response: \(responseBody)" - ) - // Throw specific error based on status if needed (e.g., 401 for unauthorized) - throw PushError.authenticationFailed } let jsonResponse = try JSONSerialization.jsonObject(with: data) as? [String: Any] @@ -1755,7 +1752,12 @@ class ImageContainerRegistry: @unchecked Sendable { ) { var request = URLRequest( url: URL(string: "https://\(self.registry)/v2/\(repository)/manifests/\(tag)")!) - request.addValue("Bearer \(token)", forHTTPHeaderField: "Authorization") + + // Only add Authorization header if token is not empty + if !token.isEmpty { + request.addValue("Bearer \(token)", forHTTPHeaderField: "Authorization") + } + request.addValue("application/vnd.oci.image.manifest.v1+json", forHTTPHeaderField: "Accept") let (data, response) = try await URLSession.shared.data(for: request) @@ -1808,7 +1810,12 @@ class ImageContainerRegistry: @unchecked Sendable { do { var request = URLRequest( url: URL(string: "https://\(self.registry)/v2/\(repository)/blobs/\(digest)")!) - request.addValue("Bearer \(token)", forHTTPHeaderField: "Authorization") + + // Only add Authorization header if token is not empty + if !token.isEmpty { + request.addValue("Bearer \(token)", forHTTPHeaderField: "Authorization") + } + request.addValue(mediaType, forHTTPHeaderField: "Accept") request.timeoutInterval = 60 @@ -1838,7 +1845,7 @@ class ImageContainerRegistry: @unchecked Sendable { at: cachedLayer.deletingLastPathComponent(), withIntermediateDirectories: true ) - + if FileManager.default.fileExists(atPath: cachedLayer.path) { try FileManager.default.removeItem(at: cachedLayer) } From 35879b4bb999b261ac5ff2778cf109c43a8b1124 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Tue, 22 Apr 2025 14:34:21 -0700 Subject: [PATCH 38/43] Fix set latest release --- .github/workflows/publish-agent.yml | 1 - .github/workflows/publish-computer-server.yml | 1 - .github/workflows/publish-computer.yml | 1 - .github/workflows/publish-core.yml | 1 - .github/workflows/publish-mcp-server.yml | 1 - .github/workflows/publish-pylume.yml | 1 - .github/workflows/publish-som.yml | 1 - .github/workflows/reusable-publish.yml | 2 +- 8 files changed, 1 insertion(+), 8 deletions(-) diff --git a/.github/workflows/publish-agent.yml b/.github/workflows/publish-agent.yml index 2a4b3be0..1566880b 100644 --- a/.github/workflows/publish-agent.yml +++ b/.github/workflows/publish-agent.yml @@ -148,7 +148,6 @@ jobs: version: ${{ needs.prepare.outputs.version }} is_lume_package: false base_package_name: "cua-agent" - make_latest: false secrets: PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} diff --git a/.github/workflows/publish-computer-server.yml b/.github/workflows/publish-computer-server.yml index d2ba14d2..15eca348 100644 --- a/.github/workflows/publish-computer-server.yml +++ b/.github/workflows/publish-computer-server.yml @@ -68,7 +68,6 @@ jobs: version: ${{ needs.prepare.outputs.version }} is_lume_package: false base_package_name: "cua-computer-server" - make_latest: false secrets: PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} diff --git a/.github/workflows/publish-computer.yml b/.github/workflows/publish-computer.yml index 6933793a..9175907e 100644 --- a/.github/workflows/publish-computer.yml +++ b/.github/workflows/publish-computer.yml @@ -135,7 +135,6 @@ jobs: version: ${{ needs.prepare.outputs.version }} is_lume_package: false base_package_name: "cua-computer" - make_latest: false secrets: PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} diff --git a/.github/workflows/publish-core.yml b/.github/workflows/publish-core.yml index f3fd4037..4f868f26 100644 --- a/.github/workflows/publish-core.yml +++ b/.github/workflows/publish-core.yml @@ -59,6 +59,5 @@ jobs: version: ${{ needs.prepare.outputs.version }} is_lume_package: false base_package_name: "cua-core" - make_latest: false secrets: PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/publish-mcp-server.yml b/.github/workflows/publish-mcp-server.yml index 05e51456..d23bf8c0 100644 --- a/.github/workflows/publish-mcp-server.yml +++ b/.github/workflows/publish-mcp-server.yml @@ -144,7 +144,6 @@ jobs: version: ${{ needs.prepare.outputs.version }} is_lume_package: false base_package_name: "cua-mcp-server" - make_latest: false secrets: PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} diff --git a/.github/workflows/publish-pylume.yml b/.github/workflows/publish-pylume.yml index c1de45e8..dddde233 100644 --- a/.github/workflows/publish-pylume.yml +++ b/.github/workflows/publish-pylume.yml @@ -63,6 +63,5 @@ jobs: version: ${{ needs.determine-version.outputs.version }} is_lume_package: true base_package_name: "pylume" - make_latest: false secrets: PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/publish-som.yml b/.github/workflows/publish-som.yml index 6dccec49..b1d53ac8 100644 --- a/.github/workflows/publish-som.yml +++ b/.github/workflows/publish-som.yml @@ -63,6 +63,5 @@ jobs: version: ${{ needs.determine-version.outputs.version }} is_lume_package: false base_package_name: "cua-som" - make_latest: false secrets: PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/reusable-publish.yml b/.github/workflows/reusable-publish.yml index 2e92cf02..fa76636a 100644 --- a/.github/workflows/reusable-publish.yml +++ b/.github/workflows/reusable-publish.yml @@ -63,7 +63,7 @@ jobs: uses: pdm-project/setup-pdm@v3 with: python-version: '3.10' - cache: true + cache: 'pdm' - name: Set version id: set-version From 5653c86670cd3c1de2c9f57044ce3b41997a32fc Mon Sep 17 00:00:00 2001 From: f-trycua Date: Tue, 22 Apr 2025 15:19:45 -0700 Subject: [PATCH 39/43] Handle computer sparse image --- libs/computer/computer/computer.py | 20 ++++------- libs/lume/src/LumeController.swift | 57 +++++++++++++++++++++++------- 2 files changed, 52 insertions(+), 25 deletions(-) diff --git a/libs/computer/computer/computer.py b/libs/computer/computer/computer.py index 6c1119ac..f4d9d9bf 100644 --- a/libs/computer/computer/computer.py +++ b/libs/computer/computer/computer.py @@ -227,24 +227,18 @@ class Computer: self.logger.error(f"Failed to initialize PyLume context: {e}") raise RuntimeError(f"Failed to initialize PyLume: {e}") - # Try to get the VM, if it doesn't exist, create it and pull the image + # Try to get the VM, if it doesn't exist, return an error try: vm = await self.config.pylume.get_vm(self.config.name) # type: ignore[attr-defined] self.logger.verbose(f"Found existing VM: {self.config.name}") except Exception as e: - self.logger.verbose(f"VM not found, pulling image: {e}") - image_ref = ImageRef( - image=self.config.image, - tag=self.config.tag, - registry="ghcr.io", - organization="trycua", + self.logger.error(f"VM not found: {self.config.name}") + self.logger.error( + f"Please pull the VM first with lume pull macos-sequoia-cua-sparse:latest: {e}" + ) + raise RuntimeError( + f"VM not found: {self.config.name}. Please pull the VM first." ) - self.logger.info(f"Pulling image {self.config.image}:{self.config.tag}...") - try: - await self.config.pylume.pull_image(image_ref, name=self.config.name) # type: ignore[attr-defined] - except Exception as pull_error: - self.logger.error(f"Failed to pull image: {pull_error}") - raise RuntimeError(f"Failed to pull VM image: {pull_error}") # Convert paths to SharedDirectory objects shared_directories = [] diff --git a/libs/lume/src/LumeController.swift b/libs/lume/src/LumeController.swift index 1329f8c5..c50edd90 100644 --- a/libs/lume/src/LumeController.swift +++ b/libs/lume/src/LumeController.swift @@ -399,20 +399,53 @@ final class LumeController { storage: String? = nil ) async throws { do { - let vmName: String = name ?? normalizeVMName(name: image) + // Convert non-sparse image to sparse version if needed + var actualImage = image + var actualName = name + + // Check if image is a non-sparse version (doesn't contain -sparse) + if !image.contains("-sparse") { + // Split the image to get name and tag + let components = image.split(separator: ":") + guard components.count == 2 else { + throw ValidationError("Invalid image format. Expected format: name:tag") + } + + let originalName = String(components[0]) + let tag = String(components[1]) + + // Create sparse version of the image name + actualImage = "\(originalName)-sparse:\(tag)" + + // If name wasn't explicitly provided, use the original image name (without -sparse) + if actualName == nil { + actualName = originalName + } + + Logger.info( + "Converting to sparse image", + metadata: [ + "original": image, + "sparse": actualImage, + "vm_name": actualName ?? "default", + ] + ) + } + + let vmName: String = actualName ?? normalizeVMName(name: actualImage) Logger.info( "Pulling image", metadata: [ - "image": image, - "name": name ?? "default", + "image": actualImage, + "name": actualName ?? "default", "registry": registry, "organization": organization, "location": storage ?? "default", ]) try self.validatePullParameters( - image: image, + image: actualImage, name: vmName, registry: registry, organization: organization, @@ -422,7 +455,7 @@ final class LumeController { let imageContainerRegistry = ImageContainerRegistry( registry: registry, organization: organization) try await imageContainerRegistry.pull( - image: image, + image: actualImage, name: vmName, locationName: storage) @@ -440,7 +473,7 @@ final class LumeController { Logger.info( "Image pulled successfully", metadata: [ - "image": image, + "image": actualImage, "name": vmName, "registry": registry, "organization": organization, @@ -477,7 +510,7 @@ final class LumeController { "location": storage ?? "default", "chunk_size": "\(chunkSizeMb)MB", "dry_run": "\(dryRun)", - "reassemble": "\(reassemble)" + "reassemble": "\(reassemble)", ]) try validatePushParameters( @@ -490,14 +523,14 @@ final class LumeController { // Find the actual location of the VM let actualLocation = try self.validateVMExists(name, storage: storage) - + // Get the VM directory let vmDir = try home.getVMDirectory(name, storage: actualLocation) - - // Use ImageContainerRegistry to push the VM + + // Use ImageContainerRegistry to push the VM let imageContainerRegistry = ImageContainerRegistry( registry: registry, organization: organization) - + try await imageContainerRegistry.push( vmDirPath: vmDir.dir.path, imageName: imageName, @@ -849,7 +882,7 @@ final class LumeController { guard !organization.isEmpty else { throw ValidationError("Organization cannot be empty") } - + // Verify VM exists (this will throw if not found) _ = try self.validateVMExists(name) } From 6e5c4a34b682698d425b70243a8bd88ab5305a79 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Tue, 22 Apr 2025 15:56:33 -0700 Subject: [PATCH 40/43] Fix yaml schema --- .github/workflows/reusable-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/reusable-publish.yml b/.github/workflows/reusable-publish.yml index fa76636a..2e92cf02 100644 --- a/.github/workflows/reusable-publish.yml +++ b/.github/workflows/reusable-publish.yml @@ -63,7 +63,7 @@ jobs: uses: pdm-project/setup-pdm@v3 with: python-version: '3.10' - cache: 'pdm' + cache: true - name: Set version id: set-version From a863766a5eadb38429151768d3cb694f763e7804 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Tue, 22 Apr 2025 16:03:05 -0700 Subject: [PATCH 41/43] Update to action-gh-release@v2 --- .github/workflows/reusable-publish.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/reusable-publish.yml b/.github/workflows/reusable-publish.yml index 2e92cf02..892e3250 100644 --- a/.github/workflows/reusable-publish.yml +++ b/.github/workflows/reusable-publish.yml @@ -267,7 +267,7 @@ jobs: cat release_notes.md - name: Create GitHub Release - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@v2 if: startsWith(github.ref, 'refs/tags/') with: name: "${{ inputs.base_package_name }} v${{ env.VERSION }}" @@ -275,6 +275,6 @@ jobs: files: ${{ inputs.package_dir }}/${{ env.WHEEL_FILE }} draft: false prerelease: false - make_latest: ${{ inputs.make_latest }} + make_latest: ${{ inputs.package_name == 'lume' }} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file From f5121a6f4df500d68dad73a9b150e0a4843ce11f Mon Sep 17 00:00:00 2001 From: f-trycua Date: Tue, 22 Apr 2025 16:48:01 -0700 Subject: [PATCH 42/43] Fix name normalization --- libs/lume/src/LumeController.swift | 44 ++++++++++++++++++------------ 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/libs/lume/src/LumeController.swift b/libs/lume/src/LumeController.swift index c50edd90..b3ad25a3 100644 --- a/libs/lume/src/LumeController.swift +++ b/libs/lume/src/LumeController.swift @@ -403,25 +403,33 @@ final class LumeController { var actualImage = image var actualName = name - // Check if image is a non-sparse version (doesn't contain -sparse) + // Split the image to get name and tag for both sparse and non-sparse cases + let components = image.split(separator: ":") + guard components.count == 2 else { + throw ValidationError("Invalid image format. Expected format: name:tag") + } + + let originalName = String(components[0]) + let tag = String(components[1]) + + // For consistent VM naming, strip "-sparse" suffix if present when no name provided + let normalizedBaseName: String + if originalName.hasSuffix("-sparse") { + normalizedBaseName = String(originalName.dropLast(7)) // drop "-sparse" + } else { + normalizedBaseName = originalName + } + + // Set default VM name if not provided + if actualName == nil { + actualName = "\(normalizedBaseName)_\(tag)" + } + + // Convert non-sparse image to sparse version if needed if !image.contains("-sparse") { - // Split the image to get name and tag - let components = image.split(separator: ":") - guard components.count == 2 else { - throw ValidationError("Invalid image format. Expected format: name:tag") - } - - let originalName = String(components[0]) - let tag = String(components[1]) - // Create sparse version of the image name actualImage = "\(originalName)-sparse:\(tag)" - // If name wasn't explicitly provided, use the original image name (without -sparse) - if actualName == nil { - actualName = originalName - } - Logger.info( "Converting to sparse image", metadata: [ @@ -432,13 +440,13 @@ final class LumeController { ) } - let vmName: String = actualName ?? normalizeVMName(name: actualImage) + let vmName = actualName ?? "default" // Just use actualName as it's already normalized Logger.info( "Pulling image", metadata: [ "image": actualImage, - "name": actualName ?? "default", + "name": vmName, "registry": registry, "organization": organization, "location": storage ?? "default", @@ -753,7 +761,7 @@ final class LumeController { throw ValidationError("Unsupported OS type: \(os)") } - let vmDir = try home.getVMDirectory(name, storage: storage) + let vmDir: VMDirectory = try home.getVMDirectory(name, storage: storage) if vmDir.exists() { throw VMError.alreadyExists(name) } From 6335af78a8180b3950a498f3c80714e3c1a140a2 Mon Sep 17 00:00:00 2001 From: f-trycua Date: Tue, 22 Apr 2025 18:55:34 -0700 Subject: [PATCH 43/43] Add MCP pre-requisites --- libs/mcp-server/README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/libs/mcp-server/README.md b/libs/mcp-server/README.md index e5312700..8d55d59a 100644 --- a/libs/mcp-server/README.md +++ b/libs/mcp-server/README.md @@ -18,6 +18,17 @@ **cua-mcp-server** is a MCP server for the Computer-Use Agent (CUA), allowing you to run CUA through Claude Desktop or other MCP clients. ### Get started with Agent +## Prerequisites + +Before installing the MCP server, you'll need to set up the full Computer-Use Agent capabilities as described in [Option 2 of the main README](../../README.md#option-2-full-computer-use-agent-capabilities). This includes: + +1. Installing the Lume CLI +2. Pulling the latest macOS CUA image +3. Starting the Lume daemon service +4. Installing the required Python libraries (Optional: only needed if you want to verify the agent is working before installing MCP server) + +Make sure these steps are completed and working before proceeding with the MCP server installation. + ## Installation Install the package from PyPI: