Use sparse files

This commit is contained in:
f-trycua
2025-04-15 10:02:13 -07:00
parent 33b3bf3ef5
commit 18f92c6a85
2 changed files with 278 additions and 200 deletions
+90 -92
View File
@@ -83,19 +83,19 @@ done
# Authenticate with GitHub Container Registry
echo "$GITHUB_TOKEN" | oras login ghcr.io -u "$organization" --password-stdin
# Create a temporary directory for processing files
work_dir=$(mktemp -d)
echo "Working directory: $work_dir"
trap 'rm -rf "$work_dir"' EXIT
# Use the source folder path as the working directory and get its absolute path
work_dir=$(cd "$folder_path" && pwd)
echo "Working directory (persistent cache): $work_dir"
# Create a directory for all files
mkdir -p "$work_dir/files"
cd "$work_dir/files"
# Change to the working directory
cd "$work_dir"
files=() # Initialize files array here
# Copy config.json if it exists
if [ -f "$folder_path/config.json" ]; then
echo "Copying config.json..."
cp "$folder_path/config.json" config.json
files+=("config.json:application/vnd.oci.image.config.v1+json")
fi
# Copy nvram.bin if it exists
@@ -103,106 +103,104 @@ nvram_bin="$folder_path/nvram.bin"
if [ -f "$nvram_bin" ]; then
echo "Copying nvram.bin..."
cp "$nvram_bin" nvram.bin
files+=("nvram.bin:application/octet-stream")
fi
# Process disk.img if it exists and needs splitting
disk_img="$folder_path/disk.img"
if [ -f "$disk_img" ]; then
file_size=$(stat -f%z "$disk_img")
if [ $file_size -gt 524288000 ]; then # 500MB in bytes
echo "Splitting large file: disk.img"
echo "Original disk.img size: $(du -h "$disk_img" | cut -f1)"
# Copy and split the file with progress monitoring
echo "Copying disk image..."
pv "$disk_img" > disk.img
echo "Splitting file..."
split -b "$chunk_size" disk.img disk.img.part.
rm disk.img
# Process disk.img if it exists
disk_img_orig="disk.img" # Already in work_dir
if [ -f "$disk_img_orig" ]; then
# --- Compression Step ---
echo "Compressing $disk_img_orig..."
compressed_ext=".gz"
compressor="gzip"
compress_opts="-k -f"
compressed_disk_img="disk.img${compressed_ext}"
pv "$disk_img_orig" | $compressor $compress_opts > "$compressed_disk_img"
compressed_size=$(stat -f%z "$compressed_disk_img")
echo "Compressed disk image size: $(du -h "$compressed_disk_img" | cut -f1)"
# --- End Compression Step ---
# Get original file size for verification
original_size=$(stat -f%z "$disk_img")
echo "Original disk.img size: $(awk -v size=$original_size 'BEGIN {printf "%.2f GB", size/1024/1024/1024}')"
# Check if splitting is needed based on *compressed* size
if [ $compressed_size -gt 524288000 ]; then # 500MB threshold
echo "Splitting compressed file: $compressed_disk_img"
split -b "$chunk_size" "$compressed_disk_img" "$compressed_disk_img.part."
# Keep the compressed file and parts in work_dir
# Verify split parts total size
total_size=0
total_parts=$(ls disk.img.part.* | wc -l | tr -d ' ')
# --- Adjust part processing ---
parts_files=()
total_parts=$(ls "$compressed_disk_img.part."* | wc -l | tr -d ' ')
part_num=0
# Create array for files and their annotations
files=()
for part in disk.img.part.*; do
part_size=$(stat -f%z "$part")
total_size=$((total_size + part_size))
for part in "$compressed_disk_img.part."*; do
part_num=$((part_num + 1))
echo "Part $part: $(awk -v size=$part_size 'BEGIN {printf "%.2f GB", size/1024/1024/1024}')"
files+=("$part:application/vnd.oci.image.layer.v1.tar;part.number=$part_num;part.total=$total_parts")
# *** IMPORTANT: Use the *compressed* OCI media type with part info ***
parts_files+=("$part:${oci_layer_media_type};part.number=$part_num;part.total=$total_parts")
echo "Part $part: $(du -h "$part" | cut -f1)"
done
# Combine non-disk files with disk parts
files+=("${parts_files[@]}")
# --- End Adjust part processing ---
echo "Total size of parts: $(awk -v size=$total_size 'BEGIN {printf "%.2f GB", size/1024/1024/1024}')"
# Verify total size matches original
if [ $total_size -ne $original_size ]; then
echo "ERROR: Size mismatch!"
echo "Original file size: $(awk -v size=$original_size 'BEGIN {printf "%.2f GB", size/1024/1024/1024}')"
echo "Sum of parts size: $(awk -v size=$total_size 'BEGIN {printf "%.2f GB", size/1024/1024/1024}')"
echo "Difference: $(awk -v orig=$original_size -v total=$total_size 'BEGIN {printf "%.2f GB", (orig-total)/1024/1024/1024}')"
exit 1
fi
# Add remaining files
if [ -f "config.json" ]; then
files+=("config.json:application/vnd.oci.image.config.v1+json")
fi
if [ -f "nvram.bin" ]; then
files+=("nvram.bin:application/octet-stream")
fi
# Push versions in parallel
push_pids=()
for version in $image_versions; do
(
echo "Pushing version $version..."
oras push --disable-path-validation \
"ghcr.io/$organization/$image_name:$version" \
"${files[@]}"
echo "Completed push for version $version"
) &
push_pids+=($!)
done
# Wait for all pushes to complete
for pid in "${push_pids[@]}"; do
wait "$pid"
done
else
# Push disk.img directly if it's small enough
echo "Copying disk image..."
pv "$disk_img" > disk.img
# Push all files together
echo "Pushing all files..."
files=("disk.img:application/vnd.oci.image.layer.v1.tar")
if [ -f "config.json" ]; then
files+=("config.json:application/vnd.oci.image.config.v1+json")
fi
if [ -f "nvram.bin" ]; then
files+=("nvram.bin:application/octet-stream")
fi
# Add the single compressed file to the list
# *** IMPORTANT: Use the *compressed* OCI media type ***
files+=("$compressed_disk_img:${oci_layer_media_type}")
fi
for version in $image_versions; do
# Push all files in one command
# --- Push Logic (Remains largely the same, but $files now contains compressed parts/file) ---
push_pids=()
IFS=',' read -ra versions <<< "$image_versions"
for version in "${versions[@]}"; do
# Trim whitespace if any from version splitting
version=$(echo "$version" | xargs)
if [[ -z "$version" ]]; then continue; fi
echo "Pushing version $version..."
(
# Use process substitution to feed file list safely if it gets long
oras push --disable-path-validation \
"ghcr.io/$organization/$image_name:$version" \
"${files[@]}"
done
echo "Completed push for version $version"
) &
push_pids+=($!)
done
# Wait for all pushes to complete
for pid in "${push_pids[@]}"; do
wait "$pid"
done
# --- Cleanup compressed files after successful push ---
echo "Push successful, cleaning up compressed artifacts..."
# Check if parts exist first
parts_exist=$(ls "$compressed_disk_img.part."* 2>/dev/null)
if [ -n "$parts_exist" ]; then
echo "Removing split parts: $compressed_disk_img.part.* and $compressed_disk_img"
rm -f "$compressed_disk_img.part."*
# Also remove the original compressed file that was split
rm -f "$compressed_disk_img"
elif [ -f "$compressed_disk_img" ]; then
echo "Removing compressed file: $compressed_disk_img"
rm -f "$compressed_disk_img"
fi
# --- End Push Logic ---
else
echo "Warning: $disk_img_orig not found."
# Push only config/nvram if they exist
if [ ${#files[@]} -gt 0 ]; then
# (Add push logic here too if you want to push even without disk.img)
echo "Pushing non-disk files..."
# ... (similar push loop as above) ...
else
echo "No files found to push."
exit 1
fi
fi
for version in $image_versions; do
for version in "${versions[@]}"; do
# Trim whitespace if any from version splitting
version=$(echo "$version" | xargs)
if [[ -z "$version" ]]; then continue; fi
echo "Upload complete: ghcr.io/$organization/$image_name:$version"
done
@@ -809,94 +809,118 @@ class ImageContainerRegistry: @unchecked Sendable {
)
// Create sparse file of the required size
FileManager.default.createFile(atPath: outputURL.path, contents: nil)
let outputHandle = try FileHandle(forWritingTo: outputURL)
defer { try? outputHandle.close() }
// Set the file size without writing data (creates a sparse file)
try outputHandle.truncate(atOffset: expectedTotalSize)
var reassemblyProgressLogger = ProgressLogger(threshold: 0.05)
var processedSize: UInt64 = 0
var currentOffset: UInt64 = 0 // Track position in the final *decompressed* file
// Process each part in order
for partNum in 1...totalParts {
guard let (_, partURL) = diskParts.first(where: { $0.0 == partNum }) else {
// Find the original layer info for this part number
guard
let layer = manifest.layers.first(where: { layer in
if let info = extractPartInfo(from: layer.mediaType) {
return info.partNum == partNum
}
return false
}),
let (_, partURL) = diskParts.first(where: { $0.0 == partNum })
else {
throw PullError.missingPart(partNum)
}
let layerMediaType = layer.mediaType // Extract mediaType here
Logger.info(
"Processing part \(partNum) of \(totalParts): \(partURL.lastPathComponent)")
// Get part file size
let partAttributes = try FileManager.default.attributesOfItem(
atPath: partURL.path)
let partSize = partAttributes[.size] as? UInt64 ?? 0
// Calculate the offset in the final file (parts are sequential)
let partOffset = processedSize
// Open input file
let inputHandle = try FileHandle(forReadingFrom: partURL)
defer {
try? inputHandle.close()
// Don't delete the part file if it's from cache
// Clean up temp downloaded part if not from cache
if !partURL.path.contains(cacheDirectory.path) {
try? FileManager.default.removeItem(at: partURL)
}
}
// Seek to the appropriate offset in output file
try outputHandle.seek(toOffset: partOffset)
// Seek to the correct offset in the output sparse file
try outputHandle.seek(toOffset: currentOffset)
// Copy data in chunks to avoid memory issues
let chunkSize: UInt64 =
determineIfMemoryConstrained() ? 256 * 1024 : 1024 * 1024 // Use smaller chunks (256KB-1MB)
var bytesWritten: UInt64 = 0
if let decompressCmd = getDecompressionCommand(for: layerMediaType) { // Use extracted mediaType
Logger.info("Decompressing part \(partNum)...")
let process = Process()
let pipe = Pipe()
process.executableURL = URL(fileURLWithPath: "/bin/sh")
process.arguments = ["-c", "\(decompressCmd) < \"\(partURL.path)\""] // Feed file via stdin redirection
process.standardOutput = pipe // Capture decompressed output
while bytesWritten < partSize {
// Use Foundation's autoreleasepool for proper memory management
Foundation.autoreleasepool {
let readSize: UInt64 = min(UInt64(chunkSize), partSize - bytesWritten)
if let chunk = try? inputHandle.read(upToCount: Int(readSize)) {
if !chunk.isEmpty {
try? outputHandle.write(contentsOf: chunk)
bytesWritten += UInt64(chunk.count)
try process.run()
// Update progress less frequently to reduce overhead
if bytesWritten % (chunkSize * 4) == 0
|| bytesWritten == partSize
{
let totalProgress =
Double(processedSize + bytesWritten)
/ Double(expectedTotalSize)
reassemblyProgressLogger.logProgress(
current: totalProgress,
context: "Reassembling disk image")
}
}
let reader = pipe.fileHandleForReading
var partDecompressedSize: UInt64 = 0
// Read decompressed data in chunks and write to sparse file
while true {
let data = autoreleasepool { // Help manage memory with large files
reader.readData(ofLength: 1024 * 1024) // Read 1MB chunks
}
if data.isEmpty { break } // End of stream
// Add a small delay every few MB to allow memory cleanup
if bytesWritten % (chunkSize * 16) == 0 && bytesWritten > 0 {
// Use Thread.sleep for now, but ideally this would use a non-blocking approach
// that is appropriate for the context (sync/async)
Thread.sleep(forTimeInterval: 0.01)
}
try outputHandle.write(contentsOf: data)
partDecompressedSize += UInt64(data.count)
// Update progress based on decompressed size written
let totalProgress =
Double(currentOffset + partDecompressedSize)
/ Double(expectedTotalSize)
reassemblyProgressLogger.logProgress(
current: totalProgress,
context: "Reassembling/Decompressing")
}
process.waitUntilExit()
if process.terminationStatus != 0 {
throw PullError.decompressionFailed("Part \(partNum)")
}
currentOffset += partDecompressedSize // Advance offset by decompressed size
} else {
// --- Handle non-compressed parts (if any, or the single file case) ---
// This part is similar to your original copy logic, writing directly
// from inputHandle to outputHandle at currentOffset
Logger.info("Copying non-compressed part \(partNum)...")
let partSize =
(try? FileManager.default.attributesOfItem(atPath: partURL.path)[.size]
as? UInt64) ?? 0
var bytesWritten: UInt64 = 0
let chunkSize = 1024 * 1024
while bytesWritten < partSize {
let data = autoreleasepool {
try! inputHandle.read(upToCount: chunkSize) ?? Data()
}
if data.isEmpty { break }
try outputHandle.write(contentsOf: data)
bytesWritten += UInt64(data.count)
// Update progress
let totalProgress =
Double(currentOffset + bytesWritten) / Double(expectedTotalSize)
reassemblyProgressLogger.logProgress(
current: totalProgress,
context: "Reassembling")
}
currentOffset += bytesWritten
// --- End non-compressed handling ---
}
// Update processed size
processedSize += partSize
// Ensure data is written before processing next part (optional but safer)
try outputHandle.synchronize()
}
// Finalize progress
reassemblyProgressLogger.logProgress(
current: 1.0, context: "Reassembling disk image")
Logger.info("") // Newline after progress
// Close the output file
try outputHandle.synchronize()
try outputHandle.close()
// Finalize progress, close handle (done by defer)
reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete")
Logger.info("") // Newline
// Verify final size
let finalSize =
@@ -1031,86 +1055,112 @@ class ImageContainerRegistry: @unchecked Sendable {
)
// Create sparse file of the required size
FileManager.default.createFile(atPath: outputURL.path, contents: nil)
let outputHandle = try FileHandle(forWritingTo: outputURL)
defer { try? outputHandle.close() }
// Set the file size without writing data (creates a sparse file)
try outputHandle.truncate(atOffset: expectedTotalSize)
var reassemblyProgressLogger = ProgressLogger(threshold: 0.05)
var processedSize: UInt64 = 0
var currentOffset: UInt64 = 0 // Track position in the final *decompressed* file
// Process each part in order
for partNum in 1...totalParts {
guard let (_, sourceURL) = diskPartSources.first(where: { $0.0 == partNum }) else {
// Find the original layer info for this part number
guard
let layer = manifest.layers.first(where: { layer in
if let info = extractPartInfo(from: layer.mediaType) {
return info.partNum == partNum
}
return false
}),
let (_, sourceURL) = diskPartSources.first(where: { $0.0 == partNum })
else {
throw PullError.missingPart(partNum)
}
let layerMediaType = layer.mediaType // Extract mediaType here
Logger.info(
"Processing part \(partNum) of \(totalParts) from cache: \(sourceURL.lastPathComponent)"
)
// Get part file size
let partAttributes = try FileManager.default.attributesOfItem(
atPath: sourceURL.path)
let partSize = partAttributes[.size] as? UInt64 ?? 0
// Calculate the offset in the final file (parts are sequential)
let partOffset = processedSize
// Open input file
let inputHandle = try FileHandle(forReadingFrom: sourceURL)
defer { try? inputHandle.close() }
// Seek to the appropriate offset in output file
try outputHandle.seek(toOffset: partOffset)
// Seek to the correct offset in the output sparse file
try outputHandle.seek(toOffset: currentOffset)
// Copy data in chunks to avoid memory issues
let chunkSize: UInt64 = determineIfMemoryConstrained() ? 256 * 1024 : 1024 * 1024 // Use smaller chunks (256KB-1MB)
var bytesWritten: UInt64 = 0
if let decompressCmd = getDecompressionCommand(for: layerMediaType) { // Use extracted mediaType
Logger.info("Decompressing part \(partNum)...")
let process = Process()
let pipe = Pipe()
process.executableURL = URL(fileURLWithPath: "/bin/sh")
process.arguments = ["-c", "\(decompressCmd) < \"\(sourceURL.path)\""] // Feed file via stdin redirection
process.standardOutput = pipe // Capture decompressed output
while bytesWritten < partSize {
// Use Foundation's autoreleasepool for proper memory management
Foundation.autoreleasepool {
let readSize: UInt64 = min(UInt64(chunkSize), partSize - bytesWritten)
if let chunk = try? inputHandle.read(upToCount: Int(readSize)) {
if !chunk.isEmpty {
try? outputHandle.write(contentsOf: chunk)
bytesWritten += UInt64(chunk.count)
try process.run()
// Update progress less frequently to reduce overhead
if bytesWritten % (chunkSize * 4) == 0 || bytesWritten == partSize {
let totalProgress =
Double(processedSize + bytesWritten)
/ Double(expectedTotalSize)
reassemblyProgressLogger.logProgress(
current: totalProgress,
context: "Reassembling disk image from cache")
}
}
let reader = pipe.fileHandleForReading
var partDecompressedSize: UInt64 = 0
// Read decompressed data in chunks and write to sparse file
while true {
let data = autoreleasepool { // Help manage memory with large files
reader.readData(ofLength: 1024 * 1024) // Read 1MB chunks
}
if data.isEmpty { break } // End of stream
// Add a small delay every few MB to allow memory cleanup
if bytesWritten % (chunkSize * 16) == 0 && bytesWritten > 0 {
// Use Thread.sleep for now, but ideally this would use a non-blocking approach
// that is appropriate for the context (sync/async)
Thread.sleep(forTimeInterval: 0.01)
}
try outputHandle.write(contentsOf: data)
partDecompressedSize += UInt64(data.count)
// Update progress based on decompressed size written
let totalProgress =
Double(currentOffset + partDecompressedSize) / Double(expectedTotalSize)
reassemblyProgressLogger.logProgress(
current: totalProgress,
context: "Reassembling")
}
process.waitUntilExit()
if process.terminationStatus != 0 {
throw PullError.decompressionFailed("Part \(partNum)")
}
currentOffset += partDecompressedSize // Advance offset by decompressed size
} else {
// --- Handle non-compressed parts (if any, or the single file case) ---
// This part is similar to your original copy logic, writing directly
// from inputHandle to outputHandle at currentOffset
Logger.info("Copying non-compressed part \(partNum)...")
let partSize =
(try? FileManager.default.attributesOfItem(atPath: sourceURL.path)[.size]
as? UInt64) ?? 0
var bytesWritten: UInt64 = 0
let chunkSize = 1024 * 1024
while bytesWritten < partSize {
let data = autoreleasepool {
try! inputHandle.read(upToCount: chunkSize) ?? Data()
}
if data.isEmpty { break }
try outputHandle.write(contentsOf: data)
bytesWritten += UInt64(data.count)
// Update progress
let totalProgress =
Double(currentOffset + bytesWritten) / Double(expectedTotalSize)
reassemblyProgressLogger.logProgress(
current: totalProgress,
context: "Reassembling")
}
currentOffset += bytesWritten
// --- End non-compressed handling ---
}
// Update processed size
processedSize += partSize
// Ensure data is written before processing next part (optional but safer)
try outputHandle.synchronize()
}
// Finalize progress
reassemblyProgressLogger.logProgress(
current: 1.0, context: "Reassembling disk image from cache")
Logger.info("") // Newline after progress
// Close the output file
try outputHandle.synchronize()
try outputHandle.close()
// Finalize progress, close handle (done by defer)
reassemblyProgressLogger.logProgress(current: 1.0, context: "Reassembly Complete")
Logger.info("") // Newline
// Verify final size
let finalSize =
@@ -1646,4 +1696,34 @@ class ImageContainerRegistry: @unchecked Sendable {
return nil
}
// Add helper to check media type and get decompress command
private func getDecompressionCommand(for mediaType: String) -> String? {
if mediaType.hasSuffix("+gzip") {
return "/usr/bin/gunzip -c" // -c writes to stdout
} else if mediaType.hasSuffix("+zstd") {
// Check if zstd exists, otherwise handle error?
// Assuming brew install zstd -> /opt/homebrew/bin/zstd or /usr/local/bin/zstd
let zstdPath = findExecutablePath(named: "zstd") ?? "/usr/local/bin/zstd"
return "\(zstdPath) -dc" // -d decompress, -c stdout
}
return nil // Not compressed or unknown compression
}
// Helper to find executables (optional, or hardcode paths)
private func findExecutablePath(named executableName: String) -> String? {
let pathEnv =
ProcessInfo.processInfo.environment["PATH"]
?? "/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/bin:/opt/homebrew/bin"
let paths = pathEnv.split(separator: ":")
for path in paths {
let executablePath = URL(fileURLWithPath: String(path)).appendingPathComponent(
executableName
).path
if FileManager.default.isExecutableFile(atPath: executablePath) {
return executablePath
}
}
return nil
}
}