mirror of
https://github.com/trycua/computer.git
synced 2026-01-06 05:20:02 -06:00
Sparse file optimizations
This commit is contained in:
@@ -9,6 +9,8 @@ folder_path=""
|
||||
image_name=""
|
||||
image_versions=""
|
||||
chunk_size="500M" # Default chunk size for splitting large files
|
||||
# Define the OCI media type for the compressed disk layer
|
||||
oci_layer_media_type="application/octet-stream+lzfse" # Apple Archive format
|
||||
|
||||
# Parse the command line arguments
|
||||
while [[ $# -gt 0 ]]; do
|
||||
@@ -41,6 +43,7 @@ while [[ $# -gt 0 ]]; do
|
||||
echo " --image-name <name> : Name of the image to publish (required)"
|
||||
echo " --image-versions <versions> : Comma separated list of versions of the image to publish (required)"
|
||||
echo " --chunk-size <size> : Size of chunks for large files (e.g., 500M, default: 500M)"
|
||||
echo "Note: The script will automatically resume from the last attempt if available"
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
@@ -69,7 +72,7 @@ if [[ ! -d "$folder_path" ]]; then
|
||||
fi
|
||||
|
||||
# Check and install required tools
|
||||
for tool in "oras" "split" "pv" "gzip"; do
|
||||
for tool in "oras" "split" "pv" "jq"; do
|
||||
if ! command -v "$tool" &> /dev/null; then
|
||||
echo "$tool is not installed. Installing using Homebrew..."
|
||||
if ! command -v brew &> /dev/null; then
|
||||
@@ -80,80 +83,252 @@ for tool in "oras" "split" "pv" "gzip"; do
|
||||
fi
|
||||
done
|
||||
|
||||
# Check if Apple Archive is available
|
||||
if ! command -v compression_tool &> /dev/null; then
|
||||
echo "Error: Apple Archive (compression_tool) is required but not found"
|
||||
echo "This script requires macOS with Apple Archive support"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Apple Archive detected - will use for optimal sparse file handling"
|
||||
compressed_ext=".aa"
|
||||
|
||||
# Authenticate with GitHub Container Registry
|
||||
echo "$GITHUB_TOKEN" | oras login ghcr.io -u "$organization" --password-stdin
|
||||
|
||||
# Use the source folder path as the working directory and get its absolute path
|
||||
work_dir=$(cd "$folder_path" && pwd)
|
||||
echo "Working directory (persistent cache): $work_dir"
|
||||
echo "Working directory: $work_dir"
|
||||
|
||||
# Change to the working directory
|
||||
cd "$work_dir"
|
||||
# Function to find the most recent cache directory
|
||||
find_latest_cache() {
|
||||
local latest_cache=$(ls -td "$work_dir"/.ghcr_cache_* 2>/dev/null | head -n1)
|
||||
if [ -n "$latest_cache" ]; then
|
||||
echo "$latest_cache"
|
||||
else
|
||||
echo ""
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to check if a cache directory is valid for resuming
|
||||
is_valid_cache() {
|
||||
local cache_dir="$1"
|
||||
# Check if it contains the necessary files
|
||||
[ -f "$cache_dir/config.json" ] || [ -f "$cache_dir/nvram.bin" ] || \
|
||||
[ -f "$cache_dir/disk.img.aa" ] || ls "$cache_dir"/disk.img.aa.part.* 1>/dev/null 2>&1
|
||||
}
|
||||
|
||||
# Always try to find and use an existing cache
|
||||
existing_cache=$(find_latest_cache)
|
||||
if [ -n "$existing_cache" ] && is_valid_cache "$existing_cache"; then
|
||||
cache_dir="$existing_cache"
|
||||
|
||||
# Check if the cache contains old gzip format
|
||||
if [ -f "$cache_dir/disk.img.gz" ] || ls "$cache_dir"/disk.img.gz.part.* 1>/dev/null 2>&1; then
|
||||
echo "Error: Found legacy gzip format in cache. This script only supports Apple Archive format."
|
||||
echo "Please delete the cache directory and start fresh: $cache_dir"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Resuming from existing cache: $cache_dir"
|
||||
else
|
||||
echo "No valid cache found. Starting fresh."
|
||||
cache_dir="$work_dir/.ghcr_cache_$(date +%Y%m%d_%H%M%S)"
|
||||
mkdir -p "$cache_dir"
|
||||
fi
|
||||
|
||||
echo "Using cache directory: $cache_dir"
|
||||
|
||||
# Display space information
|
||||
echo "=== DISK SPACE INFORMATION ==="
|
||||
df -h "$cache_dir" | head -1
|
||||
df -h "$cache_dir" | grep -v "Filesystem"
|
||||
echo
|
||||
|
||||
# Change to the cache directory
|
||||
cd "$cache_dir"
|
||||
files=() # Initialize files array here
|
||||
|
||||
# Copy config.json if it exists
|
||||
if [ -f "$folder_path/config.json" ]; then
|
||||
echo "Copying config.json..."
|
||||
cp "$folder_path/config.json" config.json
|
||||
# Function to check if a version was already pushed
|
||||
version_pushed() {
|
||||
local version="$1"
|
||||
local version_file="$cache_dir/.pushed_$version"
|
||||
[ -f "$version_file" ]
|
||||
}
|
||||
|
||||
# Function to mark a version as pushed
|
||||
mark_version_pushed() {
|
||||
local version="$1"
|
||||
touch "$cache_dir/.pushed_$version"
|
||||
}
|
||||
|
||||
# Copy config.json if it exists and not already in cache
|
||||
config_json_source="$folder_path/config.json"
|
||||
config_json_dest="$cache_dir/config.json"
|
||||
if [ -f "$config_json_source" ]; then
|
||||
if [ ! -f "$config_json_dest" ]; then
|
||||
echo "Copying config.json..."
|
||||
# Add the uncompressed disk size annotation if disk.img exists and jq is available
|
||||
if [ -n "$original_disk_size" ] && command -v jq &> /dev/null; then
|
||||
echo "Adding uncompressed disk size annotation: $original_disk_size bytes"
|
||||
jq --arg size "$original_disk_size" '.annotations += {"com.trycua.lume.disk.uncompressed_size": $size}' "$config_json_source" > "$config_json_dest" || \
|
||||
(echo "jq failed, copying original config.json"; cp "$config_json_source" "$config_json_dest") # Fallback to copy if jq fails
|
||||
else
|
||||
cp "$config_json_source" "$config_json_dest"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
if [ -f "$config_json_dest" ]; then
|
||||
files+=("config.json:application/vnd.oci.image.config.v1+json")
|
||||
fi
|
||||
|
||||
# Copy nvram.bin if it exists
|
||||
nvram_bin="$folder_path/nvram.bin"
|
||||
if [ -f "$nvram_bin" ]; then
|
||||
# Copy nvram.bin if it exists and not already in cache
|
||||
if [ -f "$folder_path/nvram.bin" ] && [ ! -f "$cache_dir/nvram.bin" ]; then
|
||||
echo "Copying nvram.bin..."
|
||||
cp "$nvram_bin" nvram.bin
|
||||
cp "$folder_path/nvram.bin" nvram.bin
|
||||
fi
|
||||
if [ -f "$cache_dir/nvram.bin" ]; then
|
||||
files+=("nvram.bin:application/octet-stream")
|
||||
fi
|
||||
|
||||
# Process disk.img if it exists
|
||||
disk_img_orig="disk.img" # Already in work_dir
|
||||
disk_img_orig="$folder_path/disk.img"
|
||||
original_disk_size=""
|
||||
if [ -f "$disk_img_orig" ]; then
|
||||
# --- Compression Step ---
|
||||
echo "Compressing $disk_img_orig..."
|
||||
compressed_ext=".gz"
|
||||
compressor="gzip"
|
||||
compress_opts="-k -f"
|
||||
# Get original size *before* compression
|
||||
original_disk_size=$(stat -f%z "$disk_img_orig")
|
||||
|
||||
# Get real (non-sparse) size
|
||||
real_size=$(du -k "$disk_img_orig" | cut -f1)
|
||||
real_size_bytes=$((real_size * 1024))
|
||||
sparseness_ratio=$(echo "scale=2; $original_disk_size / $real_size_bytes" | bc)
|
||||
echo "Disk image: $disk_img_orig"
|
||||
echo " Logical size: $original_disk_size bytes ($(du -h "$disk_img_orig" | cut -f1))"
|
||||
echo " Actual disk usage: $((real_size_bytes / 1073741824)) GB"
|
||||
echo " Sparseness ratio: ${sparseness_ratio}:1"
|
||||
|
||||
# Check if we already have compressed files in the cache
|
||||
compressed_disk_img="disk.img${compressed_ext}"
|
||||
pv "$disk_img_orig" | $compressor $compress_opts > "$compressed_disk_img"
|
||||
compressed_size=$(stat -f%z "$compressed_disk_img")
|
||||
echo "Compressed disk image size: $(du -h "$compressed_disk_img" | cut -f1)"
|
||||
# --- End Compression Step ---
|
||||
|
||||
# Check if splitting is needed based on *compressed* size
|
||||
if [ $compressed_size -gt 524288000 ]; then # 500MB threshold
|
||||
echo "Splitting compressed file: $compressed_disk_img"
|
||||
split -b "$chunk_size" "$compressed_disk_img" "$compressed_disk_img.part."
|
||||
# Keep the compressed file and parts in work_dir
|
||||
|
||||
# --- Adjust part processing ---
|
||||
parts_files=()
|
||||
total_parts=$(ls "$compressed_disk_img.part."* | wc -l | tr -d ' ')
|
||||
part_num=0
|
||||
for part in "$compressed_disk_img.part."*; do
|
||||
part_num=$((part_num + 1))
|
||||
# *** IMPORTANT: Use the *compressed* OCI media type with part info ***
|
||||
parts_files+=("$part:${oci_layer_media_type};part.number=$part_num;part.total=$total_parts")
|
||||
echo "Part $part: $(du -h "$part" | cut -f1)"
|
||||
done
|
||||
# Combine non-disk files with disk parts
|
||||
files+=("${parts_files[@]}")
|
||||
# --- End Adjust part processing ---
|
||||
|
||||
else
|
||||
# Add the single compressed file to the list
|
||||
# *** IMPORTANT: Use the *compressed* OCI media type ***
|
||||
files+=("$compressed_disk_img:${oci_layer_media_type}")
|
||||
already_compressed=false
|
||||
|
||||
if [ -f "$cache_dir/$compressed_disk_img" ]; then
|
||||
already_compressed=true
|
||||
echo "Using existing compressed file from cache: $compressed_disk_img"
|
||||
elif ls "$cache_dir"/disk.img${compressed_ext}.part.* 1>/dev/null 2>&1; then
|
||||
already_compressed=true
|
||||
echo "Using existing compressed parts from cache"
|
||||
fi
|
||||
|
||||
# --- Push Logic (Remains largely the same, but $files now contains compressed parts/file) ---
|
||||
# Only compress if not already compressed in cache
|
||||
if [ "$already_compressed" = false ]; then
|
||||
# Check for free disk space before compression
|
||||
avail_space=$(df -k "$cache_dir" | tail -1 | awk '{print $4}')
|
||||
avail_space_bytes=$((avail_space * 1024))
|
||||
# Assume compressed size is roughly 30% of real size as a safe estimate
|
||||
estimated_compressed=$((real_size_bytes * 30 / 100))
|
||||
|
||||
if [ "$avail_space_bytes" -lt "$estimated_compressed" ]; then
|
||||
echo "WARNING: Possibly insufficient disk space for compression!"
|
||||
echo "Available: $((avail_space_bytes / 1073741824)) GB, Estimated required: $((estimated_compressed / 1073741824)) GB"
|
||||
read -p "Continue anyway? (y/n) " -n 1 -r
|
||||
echo
|
||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
||||
echo "Exiting. Free up some space and try again."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- Compression Step ---
|
||||
echo "Compressing $disk_img_orig with Apple Archive..."
|
||||
|
||||
# Apple Archive compression
|
||||
echo "Starting compression with Apple Archive (showing output file growth)..."
|
||||
compression_tool -encode -i "$disk_img_orig" -o "$compressed_disk_img" -a lzfse &
|
||||
COMP_PID=$!
|
||||
|
||||
sleep 1 # Give compression a moment to start
|
||||
|
||||
# Display progress based on output file growth
|
||||
while kill -0 $COMP_PID 2>/dev/null; do
|
||||
if [ -f "$compressed_disk_img" ]; then
|
||||
current_size=$(stat -f%z "$compressed_disk_img" 2>/dev/null || echo 0)
|
||||
percent=$(echo "scale=2; 100 * $current_size / $original_disk_size" | bc)
|
||||
echo -ne "Progress: $percent% ($(du -h "$compressed_disk_img" 2>/dev/null | cut -f1 || echo "0"))\r"
|
||||
else
|
||||
echo -ne "Preparing compression...\r"
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
|
||||
wait $COMP_PID
|
||||
echo -e "\nCompression complete!"
|
||||
|
||||
compressed_size=$(stat -f%z "$compressed_disk_img")
|
||||
echo "Compressed disk image size: $(du -h "$compressed_disk_img" | cut -f1)"
|
||||
echo "Compression ratio: $(echo "scale=2; $compressed_size * 100 / $original_disk_size" | bc)%"
|
||||
# --- End Compression Step ---
|
||||
|
||||
# Check if splitting is needed based on *compressed* size
|
||||
if [ $compressed_size -gt 524288000 ]; then # 500MB threshold
|
||||
echo "Splitting compressed file into chunks of $chunk_size..."
|
||||
pv "$compressed_disk_img" | split -b "$chunk_size" - "$compressed_disk_img.part."
|
||||
rm -f "$compressed_disk_img" # Remove the unsplit compressed file
|
||||
# Verify that parts were created
|
||||
echo "Verifying split parts..."
|
||||
ls -la "$cache_dir"/disk.img${compressed_ext}.part.*
|
||||
fi
|
||||
else
|
||||
echo "Using existing compressed/split files from cache"
|
||||
fi
|
||||
|
||||
# --- Adjust part processing ---
|
||||
echo "Looking for compressed files in $cache_dir..."
|
||||
|
||||
# List all files in the cache directory for debugging
|
||||
ls -la "$cache_dir"
|
||||
|
||||
if [ -f "$cache_dir/$compressed_disk_img" ]; then
|
||||
echo "Found single compressed file: $compressed_disk_img"
|
||||
# Add the single compressed file to the list
|
||||
files+=("$compressed_disk_img:${oci_layer_media_type}")
|
||||
else
|
||||
# Look for split parts
|
||||
part_files=($(ls "$cache_dir"/disk.img${compressed_ext}.part.* 2>/dev/null || echo ""))
|
||||
if [ ${#part_files[@]} -gt 0 ]; then
|
||||
echo "Found ${#part_files[@]} split parts"
|
||||
parts_files=()
|
||||
part_num=0
|
||||
|
||||
for part in "${part_files[@]}"; do
|
||||
part_num=$((part_num + 1))
|
||||
part_basename=$(basename "$part")
|
||||
parts_files+=("$part_basename:${oci_layer_media_type};part.number=$part_num;part.total=${#part_files[@]}")
|
||||
echo "Part $part_num: $(du -h "$part" | cut -f1)"
|
||||
done
|
||||
|
||||
files+=("${parts_files[@]}")
|
||||
else
|
||||
echo "ERROR: No compressed files found in cache directory: $cache_dir"
|
||||
echo "Contents of cache directory:"
|
||||
find "$cache_dir" -type f | sort
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- Push Logic ---
|
||||
push_pids=()
|
||||
IFS=',' read -ra versions <<< "$image_versions"
|
||||
for version in "${versions[@]}"; do
|
||||
# Trim whitespace if any from version splitting
|
||||
# Trim whitespace if any from version splitting
|
||||
version=$(echo "$version" | xargs)
|
||||
if [[ -z "$version" ]]; then continue; fi
|
||||
|
||||
# Skip if version was already pushed
|
||||
if version_pushed "$version"; then
|
||||
echo "Version $version was already pushed, skipping..."
|
||||
continue
|
||||
fi
|
||||
|
||||
echo "Pushing version $version..."
|
||||
(
|
||||
# Use process substitution to feed file list safely if it gets long
|
||||
@@ -161,6 +336,7 @@ if [ -f "$disk_img_orig" ]; then
|
||||
"ghcr.io/$organization/$image_name:$version" \
|
||||
"${files[@]}"
|
||||
echo "Completed push for version $version"
|
||||
mark_version_pushed "$version"
|
||||
) &
|
||||
push_pids+=($!)
|
||||
done
|
||||
@@ -170,37 +346,108 @@ if [ -f "$disk_img_orig" ]; then
|
||||
wait "$pid"
|
||||
done
|
||||
|
||||
# --- Cleanup compressed files after successful push ---
|
||||
echo "Push successful, cleaning up compressed artifacts..."
|
||||
# Check if parts exist first
|
||||
parts_exist=$(ls "$compressed_disk_img.part."* 2>/dev/null)
|
||||
if [ -n "$parts_exist" ]; then
|
||||
echo "Removing split parts: $compressed_disk_img.part.* and $compressed_disk_img"
|
||||
rm -f "$compressed_disk_img.part."*
|
||||
# Also remove the original compressed file that was split
|
||||
rm -f "$compressed_disk_img"
|
||||
elif [ -f "$compressed_disk_img" ]; then
|
||||
echo "Removing compressed file: $compressed_disk_img"
|
||||
rm -f "$compressed_disk_img"
|
||||
# --- Cleanup only if all versions were pushed successfully ---
|
||||
all_versions_pushed=true
|
||||
for version in "${versions[@]}"; do
|
||||
version=$(echo "$version" | xargs)
|
||||
if [[ -z "$version" ]]; then continue; fi
|
||||
if ! version_pushed "$version"; then
|
||||
all_versions_pushed=false
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$all_versions_pushed" = true ]; then
|
||||
echo "All versions pushed successfully, cleaning up cache directory..."
|
||||
cd "$work_dir"
|
||||
rm -rf "$cache_dir"
|
||||
else
|
||||
echo "Some versions failed to push. Cache directory preserved at: $cache_dir"
|
||||
echo "Run again to resume from this point"
|
||||
fi
|
||||
# --- End Push Logic ---
|
||||
|
||||
else
|
||||
echo "Warning: $disk_img_orig not found."
|
||||
# Push only config/nvram if they exist
|
||||
if [ ${#files[@]} -gt 0 ]; then
|
||||
# (Add push logic here too if you want to push even without disk.img)
|
||||
echo "Pushing non-disk files..."
|
||||
# ... (similar push loop as above) ...
|
||||
echo "Pushing non-disk files..."
|
||||
push_pids=()
|
||||
IFS=',' read -ra versions <<< "$image_versions"
|
||||
for version in "${versions[@]}"; do
|
||||
# Trim whitespace if any from version splitting
|
||||
version=$(echo "$version" | xargs)
|
||||
if [[ -z "$version" ]]; then continue; fi
|
||||
|
||||
# Skip if version was already pushed
|
||||
if version_pushed "$version"; then
|
||||
echo "Version $version was already pushed, skipping..."
|
||||
continue
|
||||
fi
|
||||
|
||||
echo "Pushing version $version (config/nvram only)..."
|
||||
(
|
||||
oras push --disable-path-validation \
|
||||
"ghcr.io/$organization/$image_name:$version" \
|
||||
"${files[@]}"
|
||||
echo "Completed push for version $version"
|
||||
mark_version_pushed "$version"
|
||||
) &
|
||||
push_pids+=($!)
|
||||
done
|
||||
|
||||
# Wait for all pushes to complete
|
||||
for pid in "${push_pids[@]}"; do
|
||||
wait "$pid"
|
||||
done
|
||||
|
||||
# --- Cleanup only if all versions were pushed successfully ---
|
||||
all_versions_pushed=true
|
||||
for version in "${versions[@]}"; do
|
||||
version=$(echo "$version" | xargs)
|
||||
if [[ -z "$version" ]]; then continue; fi
|
||||
if ! version_pushed "$version"; then
|
||||
all_versions_pushed=false
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$all_versions_pushed" = true ]; then
|
||||
echo "All non-disk versions pushed successfully, cleaning up cache directory..."
|
||||
cd "$work_dir"
|
||||
rm -rf "$cache_dir"
|
||||
else
|
||||
echo "Some non-disk versions failed to push. Cache directory preserved at: $cache_dir"
|
||||
echo "Run again to resume from this point"
|
||||
fi
|
||||
else
|
||||
echo "No files found to push."
|
||||
cd "$work_dir"
|
||||
rm -rf "$cache_dir"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
for version in "${versions[@]}"; do
|
||||
# Trim whitespace if any from version splitting
|
||||
version=$(echo "$version" | xargs)
|
||||
if [[ -z "$version" ]]; then continue; fi
|
||||
echo "Upload complete: ghcr.io/$organization/$image_name:$version"
|
||||
done
|
||||
# Determine final status based on the success check *before* potential cleanup
|
||||
echo # Add a newline for better readability
|
||||
if [ "$all_versions_pushed" = true ]; then
|
||||
echo "All versions pushed successfully:"
|
||||
for version in "${versions[@]}"; do
|
||||
version=$(echo "$version" | xargs)
|
||||
if [[ -z "$version" ]]; then continue; fi
|
||||
echo " Upload complete: ghcr.io/$organization/$image_name:$version"
|
||||
done
|
||||
else
|
||||
echo "Final upload status:"
|
||||
for version in "${versions[@]}"; do
|
||||
version=$(echo "$version" | xargs)
|
||||
if [[ -z "$version" ]]; then continue; fi
|
||||
# Check the marker file only if the overall process failed (cache preserved)
|
||||
if version_pushed "$version"; then
|
||||
echo " Upload complete: ghcr.io/$organization/$image_name:$version"
|
||||
else
|
||||
echo " Upload failed: ghcr.io/$organization/$image_name:$version"
|
||||
fi
|
||||
done
|
||||
# Exit with error code if any version failed
|
||||
exit 1
|
||||
fi
|
||||
|
||||
Reference in New Issue
Block a user