Prevent sparse disk expand

This commit is contained in:
f-trycua
2025-04-19 22:15:42 -07:00
parent 353f3cf45d
commit 54c5ae2bd8
12 changed files with 1864 additions and 154 deletions

233
libs/lume/.cursorignore Normal file
View File

@@ -0,0 +1,233 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
!libs/lume/scripts/build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Scripts
server/scripts/
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# Ruff stuff:
.ruff_cache/
# PyPI configuration file
.pypirc
# Conda
.conda/
# Local environment
.env.local
# macOS DS_Store
.DS_Store
weights/
weights/icon_detect/
weights/icon_detect/model.pt
weights/icon_detect/model.pt.zip
weights/icon_detect/model.pt.zip.part*
libs/omniparser/weights/icon_detect/model.pt
# Example test data and output
examples/test_data/
examples/output/
/screenshots/
/experiments/
/logs/
# Xcode
#
# gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore
## User settings
xcuserdata/
## Obj-C/Swift specific
*.hmap
## App packaging
*.ipa
*.dSYM.zip
*.dSYM
## Playgrounds
timeline.xctimeline
playground.xcworkspace
# Swift Package Manager
#
# Add this line if you want to avoid checking in source code from Swift Package Manager dependencies.
# Packages/
# Package.pins
# Package.resolved
# *.xcodeproj
#
# Xcode automatically generates this directory with a .xcworkspacedata file and xcuserdata
# hence it is not needed unless you have added a package configuration file to your project
.swiftpm/
.build/
# CocoaPods
#
# We recommend against adding the Pods directory to your .gitignore. However
# you should judge for yourself, the pros and cons are mentioned at:
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
#
# Pods/
#
# Add this line if you want to avoid checking in source code from the Xcode workspace
# *.xcworkspace
# Carthage
#
# Add this line if you want to avoid checking in source code from Carthage dependencies.
# Carthage/Checkouts
Carthage/Build/
# fastlane
#
# It is recommended to not store the screenshots in the git repo.
# Instead, use fastlane to re-generate the screenshots whenever they are needed.
# For more information about the recommended setup visit:
# https://docs.fastlane.tools/best-practices/source-control/#source-control
fastlane/report.xml
fastlane/Preview.html
fastlane/screenshots/**/*.png
fastlane/test_output
# Ignore folder
ignore
# .release
.release/

View File

@@ -52,6 +52,7 @@ Commands:
lume stop <name> Stop a running VM
lume delete <name> Delete a VM
lume pull <image> Pull a macOS image from container registry
lume push <name> <image:tag> Push a VM image to a container registry
lume clone <name> <new-name> Clone an existing VM
lume config Get or set lume configuration
lume images List available macOS images in local cache
@@ -99,6 +100,16 @@ Command Options:
--organization <org> Organization to pull from (default: trycua)
--storage <name> VM storage location to use
push:
--additional-tags <tags...> Additional tags to push the same image to
--registry <url> Container registry URL (default: ghcr.io)
--organization <org> Organization/user to push to (default: trycua)
--storage <name> VM storage location to use
--chunk-size-mb <size> Chunk size for disk image upload in MB (default: 512)
--verbose Enable verbose logging
--dry-run Prepare files and show plan without uploading
--reassemble Verify integrity by reassembling chunks (requires --dry-run)
get:
-f, --format <format> Output format (json|text)
--storage <name> VM storage location to use
@@ -141,18 +152,21 @@ You can also download the `lume.pkg.tar.gz` archive from the [latest release](ht
## Prebuilt Images
Pre-built images are available in the registry [ghcr.io/trycua](https://github.com/orgs/trycua/packages).
**Important Note (v0.2.0+):** Images are being re-uploaded with sparse file system optimizations enabled, resulting in significantly lower actual disk usage. Older images (without the `-sparse` suffix) are now **deprecated**. The last version of `lume` fully supporting the non-sparse images was `v0.1.x`. Starting from `lume v0.2.0`, please use the images with the `-sparse` suffix.
These images come with an SSH server pre-configured and auto-login enabled.
For the security of your VM, change the default password `lume` immediately after your first login.
| Image | Tag | Description | Size |
| Image | Tag | Description | Logical Size |
|-------|------------|-------------|------|
| `macos-sequoia-vanilla` | `latest`, `15.2` | macOS Sequoia 15.2 image | 40GB |
| `macos-sequoia-xcode` | `latest`, `15.2` | macOS Sequoia 15.2 image with Xcode command line tools | 50GB |
| `macos-sequoia-cua` | `latest`, `15.3` | macOS Sequoia 15.3 image compatible with the Computer interface | 80GB |
| `ubuntu-noble-vanilla` | `latest`, `24.04.1` | [Ubuntu Server for ARM 24.04.1 LTS](https://ubuntu.com/download/server/arm) with Ubuntu Desktop | 20GB |
| `macos-sequoia-vanilla-sparse` | `latest`, `15.2` | macOS Sequoia 15.2 image | 40GB |
| `macos-sequoia-xcode-sparse` | `latest`, `15.2` | macOS Sequoia 15.2 image with Xcode command line tools | 50GB |
| `macos-sequoia-cua-sparse` | `latest`, `15.3` | macOS Sequoia 15.3 image compatible with the Computer interface | 80GB |
| `ubuntu-noble-vanilla-sparse` | `latest`, `24.04.1` | [Ubuntu Server for ARM 24.04.1 LTS](https://ubuntu.com/download/server/arm) with Ubuntu Desktop | 20GB |
For additional disk space, resize the VM disk after pulling the image using the `lume set <name> --disk-size <size>` command.
For additional disk space, resize the VM disk after pulling the image using the `lume set <name> --disk-size <size>` command. Note that the actual disk space used by sparse images will be much lower than the logical size listed.
## Local API Server

View File

@@ -193,6 +193,42 @@ curl --connect-timeout 6000 \
```
</details>
<details open>
<summary><strong>Push Image (Async)</strong> - POST /vms/push</summary>
```bash
# Push VM 'my-local-vm' to 'my-org/my-image:latest' and 'my-org/my-image:v1'
curl --connect-timeout 6000 \
--max-time 5000 \
-X POST \
-H "Content-Type: application/json" \
-d '{
"name": "my-local-vm",
"imageName": "my-image",
"tags": ["latest", "v1"],
"organization": "my-org",
"registry": "ghcr.io",
"chunkSizeMb": 512,
"storage": null
}' \
http://localhost:3000/lume/vms/push
```
**Response (202 Accepted):**
```json
{
"message": "Push initiated in background",
"name": "my-local-vm",
"imageName": "my-image",
"tags": [
"latest",
"v1"
]
}
```
</details>
<details open>
<summary><strong>Clone VM</strong> - POST /vms/:name/clone</summary>

View File

@@ -8,9 +8,11 @@ organization=""
folder_path=""
image_name=""
image_versions=""
chunk_size="500M" # Default chunk size for splitting large files
chunk_size="512M" # Default chunk size for splitting large files
dry_run=true # Default: actually push to registry
reassemble=true # Default: don't reassemble in dry-run mode
# Define the OCI media type for the compressed disk layer
oci_layer_media_type="application/octet-stream+lzfse" # Apple Archive format
oci_layer_media_type="application/octet-stream+lz4" # LZ4 compression format
# Parse the command line arguments
while [[ $# -gt 0 ]]; do
@@ -35,6 +37,15 @@ while [[ $# -gt 0 ]]; do
chunk_size="$2"
shift 2
;;
--dry-run)
dry_run=true
shift 1
;;
--reassemble)
reassemble=true
dry_run=true # Reassemble implies dry-run
shift 1
;;
--help)
echo "Usage: $0 [options]"
echo "Options:"
@@ -42,7 +53,9 @@ while [[ $# -gt 0 ]]; do
echo " --folder-path <path> : Path to the folder to upload (required)"
echo " --image-name <name> : Name of the image to publish (required)"
echo " --image-versions <versions> : Comma separated list of versions of the image to publish (required)"
echo " --chunk-size <size> : Size of chunks for large files (e.g., 500M, default: 500M)"
echo " --chunk-size <size> : Size of chunks for large files (e.g., 512M, default: 512M)"
echo " --dry-run : Prepare files but don't upload to registry"
echo " --reassemble : In dry-run mode, also reassemble chunks to verify integrity"
echo "Note: The script will automatically resume from the last attempt if available"
exit 0
;;
@@ -54,15 +67,23 @@ while [[ $# -gt 0 ]]; do
done
# Ensure required arguments
if [[ -z "$organization" || -z "$folder_path" || -z "$image_name" || -z "$image_versions" ]]; then
echo "Error: Missing required arguments. Use --help for usage."
if [[ -z "$folder_path" ]]; then
echo "Error: Missing required folder-path argument. Use --help for usage."
exit 1
fi
# Check if the GITHUB_TOKEN variable is set
if [[ -z "$GITHUB_TOKEN" ]]; then
echo "Error: GITHUB_TOKEN is not set."
exit 1
# Only check organization and other push parameters if not in dry-run mode
if [[ "$dry_run" = false ]]; then
if [[ -z "$organization" || -z "$image_name" || -z "$image_versions" ]]; then
echo "Error: Missing required arguments for push. Use --help for usage."
exit 1
fi
# Check if the GITHUB_TOKEN variable is set
if [[ -z "$GITHUB_TOKEN" ]]; then
echo "Error: GITHUB_TOKEN is not set."
exit 1
fi
fi
# Ensure the folder exists
@@ -72,7 +93,7 @@ if [[ ! -d "$folder_path" ]]; then
fi
# Check and install required tools
for tool in "oras" "split" "pv" "jq"; do
for tool in "oras" "split" "pv" "jq" "lz4"; do
if ! command -v "$tool" &> /dev/null; then
echo "$tool is not installed. Installing using Homebrew..."
if ! command -v brew &> /dev/null; then
@@ -83,19 +104,14 @@ for tool in "oras" "split" "pv" "jq"; do
fi
done
# Check if Apple Archive is available
if ! command -v compression_tool &> /dev/null; then
echo "Error: Apple Archive (compression_tool) is required but not found"
echo "This script requires macOS with Apple Archive support"
exit 1
echo "LZ4 detected - will use for efficient compression and decompression"
compressed_ext=".lz4"
# Authenticate with GitHub Container Registry if not in dry-run mode
if [[ "$dry_run" = false ]]; then
echo "$GITHUB_TOKEN" | oras login ghcr.io -u "$organization" --password-stdin
fi
echo "Apple Archive detected - will use for optimal sparse file handling"
compressed_ext=".aa"
# Authenticate with GitHub Container Registry
echo "$GITHUB_TOKEN" | oras login ghcr.io -u "$organization" --password-stdin
# Use the source folder path as the working directory and get its absolute path
work_dir=$(cd "$folder_path" && pwd)
echo "Working directory: $work_dir"
@@ -115,7 +131,7 @@ is_valid_cache() {
local cache_dir="$1"
# Check if it contains the necessary files
[ -f "$cache_dir/config.json" ] || [ -f "$cache_dir/nvram.bin" ] || \
[ -f "$cache_dir/disk.img.aa" ] || ls "$cache_dir"/disk.img.aa.part.* 1>/dev/null 2>&1
[ -f "$cache_dir/disk.img.lz4" ] || ls "$cache_dir"/disk.img.part.* 1>/dev/null 2>&1
}
# Always try to find and use an existing cache
@@ -123,9 +139,9 @@ existing_cache=$(find_latest_cache)
if [ -n "$existing_cache" ] && is_valid_cache "$existing_cache"; then
cache_dir="$existing_cache"
# Check if the cache contains old gzip format
if [ -f "$cache_dir/disk.img.gz" ] || ls "$cache_dir"/disk.img.gz.part.* 1>/dev/null 2>&1; then
echo "Error: Found legacy gzip format in cache. This script only supports Apple Archive format."
# Check if the cache contains old compressed format
if [ -f "$cache_dir/disk.img.gz" ] || [ -f "$cache_dir/disk.img.aa" ] || ls "$cache_dir"/disk.img.*.part.* 1>/dev/null 2>&1; then
echo "Error: Found legacy compressed format in cache. This script uses improved LZ4 format."
echo "Please delete the cache directory and start fresh: $cache_dir"
exit 1
fi
@@ -162,20 +178,24 @@ mark_version_pushed() {
touch "$cache_dir/.pushed_$version"
}
# Function to calculate sha256 hash
calculate_sha256() {
local file="$1"
if command -v shasum &> /dev/null; then
shasum -a 256 "$file" | awk '{print "sha256:" $1}'
else
echo "sha256:$(openssl dgst -sha256 -binary "$file" | xxd -p | tr -d '\n')"
fi
}
# Copy config.json if it exists and not already in cache
config_json_source="$folder_path/config.json"
config_json_dest="$cache_dir/config.json"
if [ -f "$config_json_source" ]; then
if [ ! -f "$config_json_dest" ]; then
echo "Copying config.json..."
# Add the uncompressed disk size annotation if disk.img exists and jq is available
if [ -n "$original_disk_size" ] && command -v jq &> /dev/null; then
echo "Adding uncompressed disk size annotation: $original_disk_size bytes"
jq --arg size "$original_disk_size" '.annotations += {"com.trycua.lume.disk.uncompressed_size": $size}' "$config_json_source" > "$config_json_dest" || \
(echo "jq failed, copying original config.json"; cp "$config_json_source" "$config_json_dest") # Fallback to copy if jq fails
else
cp "$config_json_source" "$config_json_dest"
fi
# Copy config.json as is - we'll add annotations later
cp "$config_json_source" "$config_json_dest"
fi
fi
if [ -f "$config_json_dest" ]; then
@@ -207,115 +227,363 @@ if [ -f "$disk_img_orig" ]; then
echo " Actual disk usage: $((real_size_bytes / 1073741824)) GB"
echo " Sparseness ratio: ${sparseness_ratio}:1"
# Check if we already have compressed files in the cache
compressed_disk_img="disk.img${compressed_ext}"
already_compressed=false
if [ -f "$cache_dir/$compressed_disk_img" ]; then
already_compressed=true
echo "Using existing compressed file from cache: $compressed_disk_img"
elif ls "$cache_dir"/disk.img${compressed_ext}.part.* 1>/dev/null 2>&1; then
already_compressed=true
echo "Using existing compressed parts from cache"
# If we have config.json, update it with the uncompressed disk size annotation
if [ -f "$config_json_dest" ] && command -v jq &> /dev/null; then
echo "Adding uncompressed disk size annotation: $original_disk_size bytes"
jq --arg size "$original_disk_size" '.annotations = (.annotations // {}) + {"com.trycua.lume.disk.uncompressed_size": $size}' "$config_json_dest" > "$config_json_dest.tmp"
mv "$config_json_dest.tmp" "$config_json_dest"
fi
# Only compress if not already compressed in cache
if [ "$already_compressed" = false ]; then
# Check for free disk space before compression
avail_space=$(df -k "$cache_dir" | tail -1 | awk '{print $4}')
avail_space_bytes=$((avail_space * 1024))
# Assume compressed size is roughly 30% of real size as a safe estimate
estimated_compressed=$((real_size_bytes * 30 / 100))
if [ "$avail_space_bytes" -lt "$estimated_compressed" ]; then
echo "WARNING: Possibly insufficient disk space for compression!"
echo "Available: $((avail_space_bytes / 1073741824)) GB, Estimated required: $((estimated_compressed / 1073741824)) GB"
read -p "Continue anyway? (y/n) " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
echo "Exiting. Free up some space and try again."
exit 1
fi
fi
# --- Compression Step ---
echo "Compressing $disk_img_orig with Apple Archive..."
# Apple Archive compression
echo "Starting compression with Apple Archive (showing output file growth)..."
compression_tool -encode -i "$disk_img_orig" -o "$compressed_disk_img" -a lzfse &
COMP_PID=$!
sleep 1 # Give compression a moment to start
# Display progress based on output file growth
while kill -0 $COMP_PID 2>/dev/null; do
if [ -f "$compressed_disk_img" ]; then
current_size=$(stat -f%z "$compressed_disk_img" 2>/dev/null || echo 0)
percent=$(echo "scale=2; 100 * $current_size / $original_disk_size" | bc)
echo -ne "Progress: $percent% ($(du -h "$compressed_disk_img" 2>/dev/null | cut -f1 || echo "0"))\r"
else
echo -ne "Preparing compression...\r"
fi
sleep 2
done
wait $COMP_PID
echo -e "\nCompression complete!"
compressed_size=$(stat -f%z "$compressed_disk_img")
echo "Compressed disk image size: $(du -h "$compressed_disk_img" | cut -f1)"
echo "Compression ratio: $(echo "scale=2; $compressed_size * 100 / $original_disk_size" | bc)%"
# --- End Compression Step ---
# Check if splitting is needed based on *compressed* size
if [ $compressed_size -gt 524288000 ]; then # 500MB threshold
echo "Splitting compressed file into chunks of $chunk_size..."
pv "$compressed_disk_img" | split -b "$chunk_size" - "$compressed_disk_img.part."
rm -f "$compressed_disk_img" # Remove the unsplit compressed file
# Verify that parts were created
echo "Verifying split parts..."
ls -la "$cache_dir"/disk.img${compressed_ext}.part.*
fi
# Create a temporary directory for disk processing
tmp_dir="$cache_dir/tmp_processing"
mkdir -p "$tmp_dir"
# Split the disk image into chunks first (before compression)
split_parts_dir="$tmp_dir/split_parts"
mkdir -p "$split_parts_dir"
# Check if we already have split parts
if [ -z "$(ls -A "$split_parts_dir" 2>/dev/null)" ]; then
echo "Splitting disk image into chunks of $chunk_size..."
cd "$split_parts_dir"
pv "$disk_img_orig" | split -b "$chunk_size" - "chunk."
cd "$cache_dir"
else
echo "Using existing compressed/split files from cache"
echo "Using existing split chunks from previous run"
fi
# --- Adjust part processing ---
echo "Looking for compressed files in $cache_dir..."
# List all files in the cache directory for debugging
ls -la "$cache_dir"
# Process each chunk (compress, calculate digest, etc.)
compressed_parts_dir="$tmp_dir/compressed_parts"
mkdir -p "$compressed_parts_dir"
if [ -f "$cache_dir/$compressed_disk_img" ]; then
echo "Found single compressed file: $compressed_disk_img"
# Add the single compressed file to the list
files+=("$compressed_disk_img:${oci_layer_media_type}")
else
# Look for split parts
part_files=($(ls "$cache_dir"/disk.img${compressed_ext}.part.* 2>/dev/null || echo ""))
if [ ${#part_files[@]} -gt 0 ]; then
echo "Found ${#part_files[@]} split parts"
parts_files=()
part_num=0
# Store layer information in an array
layers=()
part_num=0
total_parts=$(ls "$split_parts_dir"/chunk.* | wc -l)
for chunk_file in "$split_parts_dir"/chunk.*; do
part_basename=$(basename "$chunk_file")
part_num=$((part_num + 1))
compressed_file="$compressed_parts_dir/${part_basename}${compressed_ext}"
if [ ! -f "$compressed_file" ]; then
echo "Compressing chunk $part_num of $total_parts: $part_basename"
for part in "${part_files[@]}"; do
part_num=$((part_num + 1))
part_basename=$(basename "$part")
parts_files+=("$part_basename:${oci_layer_media_type};part.number=$part_num;part.total=${#part_files[@]}")
echo "Part $part_num: $(du -h "$part" | cut -f1)"
# Calculate uncompressed content digest before compression
uncompressed_digest=$(calculate_sha256 "$chunk_file")
# Get uncompressed size
uncompressed_size=$(stat -f%z "$chunk_file")
# Compress the chunk with LZ4
lz4 -9 "$chunk_file" "$compressed_file"
# Get compressed size
compressed_size=$(stat -f%z "$compressed_file")
echo "Chunk $part_num: Original size: $(du -h "$chunk_file" | cut -f1), Compressed: $(du -h "$compressed_file" | cut -f1)"
else
echo "Using existing compressed chunk $part_num of $total_parts"
# Need to calculate these values for existing files
uncompressed_digest=$(calculate_sha256 "$chunk_file")
uncompressed_size=$(stat -f%z "$chunk_file")
compressed_size=$(stat -f%z "$compressed_file")
fi
# Store layer information
layer_info="$compressed_file:${oci_layer_media_type};uncompressed_size=$uncompressed_size;uncompressed_digest=$uncompressed_digest;part.number=$part_num;part.total=$total_parts"
layers+=("$layer_info")
done
# Generate the files array for ORAS push
for layer_info in "${layers[@]}"; do
files+=("$layer_info")
done
# --- Reassembly in dry-run mode ---
if [[ "$reassemble" = true ]]; then
echo "=== REASSEMBLY MODE ==="
echo "Reassembling chunks to verify integrity..."
# Create a directory for reassembly
reassembly_dir="$cache_dir/reassembly"
mkdir -p "$reassembly_dir"
# Prepare the reassembled file - create a properly sized sparse file first
reassembled_file="$reassembly_dir/reassembled_disk.img"
if [ -f "$reassembled_file" ]; then
echo "Removing previous reassembled file..."
rm -f "$reassembled_file"
fi
# Get the original disk size from config annotation or directly from image
if [ -f "$config_json_dest" ] && command -v jq &> /dev/null; then
config_size=$(jq -r '.annotations."com.trycua.lume.disk.uncompressed_size" // empty' "$config_json_dest")
if [ -n "$config_size" ]; then
original_disk_size_bytes=$config_size
echo "Using uncompressed size from config: $original_disk_size_bytes bytes"
fi
fi
# Create a sparse file of the exact original size
echo "Pre-allocating sparse file of $(du -h "$disk_img_orig" | cut -f1)..."
dd if=/dev/zero of="$reassembled_file" bs=1 count=0 seek=$original_disk_size
# Make sure filesystem recognizes this as a sparse file
if [[ "$OSTYPE" == "darwin"* ]]; then
# On macOS, we can use a better sparse file creation method if mkfile is available
if command -v mkfile &> /dev/null; then
rm -f "$reassembled_file"
mkfile -n ${original_disk_size}b "$reassembled_file"
echo "Created sparse file using mkfile"
fi
else
# On Linux systems, ensure sparseness with truncate if available
if command -v truncate &> /dev/null; then
rm -f "$reassembled_file"
truncate -s $original_disk_size "$reassembled_file"
echo "Created sparse file using truncate"
fi
fi
# Create an offset tracker to keep track of where each chunk should go
current_offset=0
# Decompress each chunk and write it at the correct offset
for ((i=1; i<=total_parts; i++)); do
# Find the chunk file for part number i
chunk_pattern=""
chunk_uncompressed_size=""
for layer_info in "${layers[@]}"; do
if [[ "$layer_info" == *";part.number=$i;"* ]]; then
chunk_pattern="${layer_info%%:*}"
# Extract the uncompressed size from metadata
if [[ "$layer_info" =~ uncompressed_size=([0-9]+) ]]; then
chunk_uncompressed_size="${BASH_REMATCH[1]}"
fi
break
fi
done
files+=("${parts_files[@]}")
if [ -z "$chunk_pattern" ]; then
echo "Error: Could not find chunk for part $i"
exit 1
fi
echo "Processing part $i/$total_parts: $(basename "$chunk_pattern") at offset $current_offset..."
# Create temp decompressed file
temp_decompressed="$reassembly_dir/temp_part_$i"
lz4 -d -f "$chunk_pattern" "$temp_decompressed" || {
echo "Error decompressing part $i"
exit 1
}
# Check if this chunk is all zeros (sparse data)
# Only check the first 1MB for efficiency
is_likely_sparse=false
if command -v hexdump &> /dev/null; then
# Use hexdump to check a sample of the file for non-zero content
sparse_check=$(hexdump -n 1048576 -v "$temp_decompressed" | grep -v "0000 0000 0000 0000 0000 0000 0000 0000" | head -n 1)
if [ -z "$sparse_check" ]; then
echo "Chunk appears to be all zeros (sparse data)"
is_likely_sparse=true
fi
fi
# Use dd to write the chunk at the correct offset with sparse file handling
if [ "$is_likely_sparse" = true ]; then
# For sparse chunks, we don't need to write anything - leave as zeros
echo "Skipping write for all-zero chunk (preserving sparseness)"
elif [[ "$OSTYPE" == "darwin"* ]]; then
# macOS dd doesn't support conv=sparse, use standard approach
dd if="$temp_decompressed" of="$reassembled_file" bs=1M conv=notrunc seek=$((current_offset / 1024 / 1024)) status=progress || {
echo "Error writing part $i at offset $current_offset"
exit 1
}
else
# On Linux, use conv=sparse to preserve sparseness during the write
dd if="$temp_decompressed" of="$reassembled_file" bs=1M conv=sparse,notrunc seek=$((current_offset / 1024 / 1024)) status=progress || {
echo "Error writing part $i at offset $current_offset"
exit 1
}
fi
# Clean up the temporary file
rm -f "$temp_decompressed"
# Update the offset for the next chunk
current_offset=$((current_offset + chunk_uncompressed_size))
done
# After all chunks are processed, ensure sparseness is preserved
if command -v cp &> /dev/null && [[ "$OSTYPE" == "darwin"* ]]; then
echo "Copying disk image to maintain sparseness..."
final_sparse_file="$reassembly_dir/final_disk.img"
rm -f "$final_sparse_file" 2>/dev/null
# On macOS, use cp with the clone flag to preserve sparseness
cp -c "$reassembled_file" "$final_sparse_file"
# Use the sparse-optimized file for verification
echo "Using sparse-optimized copy for verification"
mv "$final_sparse_file" "$reassembled_file"
sync
elif command -v cp &> /dev/null && command -v file &> /dev/null; then
# For Linux systems
echo "Optimizing file sparseness..."
final_sparse_file="$reassembly_dir/final_disk.img"
rm -f "$final_sparse_file" 2>/dev/null
# Use cp --sparse=always on Linux
cp --sparse=always "$reassembled_file" "$final_sparse_file"
# Use the sparse-optimized file for verification
echo "Using sparse-optimized copy for verification"
mv "$final_sparse_file" "$reassembled_file"
sync
fi
# Make sure to sync to disk
sync
# Calculate digests for comparison
echo "Verifying reassembled file..."
original_digest=$(calculate_sha256 "$disk_img_orig")
reassembled_digest=$(calculate_sha256 "$reassembled_file")
# Compare the original and reassembled file sizes
original_size=$(stat -f%z "$disk_img_orig")
reassembled_size=$(stat -f%z "$reassembled_file")
echo "Results:"
echo " Original size: $(du -h "$disk_img_orig" | cut -f1) ($original_size bytes)"
echo " Reassembled size: $(du -h "$reassembled_file" | cut -f1) ($reassembled_size bytes)"
echo " Original digest: ${original_digest#sha256:}"
echo " Reassembled digest: ${reassembled_digest#sha256:}"
# Check if the disk is sparse
original_apparent_size=$(du -h "$disk_img_orig" | cut -f1)
original_actual_size=$(du -sh "$disk_img_orig" | cut -f1)
reassembled_apparent_size=$(du -h "$reassembled_file" | cut -f1)
reassembled_actual_size=$(du -sh "$reassembled_file" | cut -f1)
echo " Original: Apparent size: $original_apparent_size, Actual disk usage: $original_actual_size"
echo " Reassembled: Apparent size: $reassembled_apparent_size, Actual disk usage: $reassembled_actual_size"
if [ "$original_digest" = "$reassembled_digest" ]; then
echo "✅ VERIFICATION SUCCESSFUL: Files are identical"
else
echo "ERROR: No compressed files found in cache directory: $cache_dir"
echo "Contents of cache directory:"
find "$cache_dir" -type f | sort
exit 1
echo "❌ VERIFICATION FAILED: Files differ"
if [ "$original_size" != "$reassembled_size" ]; then
echo " Size mismatch: Original $original_size bytes, Reassembled $reassembled_size bytes"
fi
# Try to identify where they differ
echo "Attempting to identify differences..."
if command -v cmp &> /dev/null; then
cmp_output=$(cmp -l "$disk_img_orig" "$reassembled_file" 2>&1 | head -5)
if [[ "$cmp_output" == *"differ"* ]]; then
echo " First few differences:"
echo "$cmp_output"
fi
fi
# Check if the virtual machine will still boot despite differences
echo "NOTE: This might be a sparse file issue. The content may be identical, but sparse regions"
echo " may be handled differently between the original and reassembled files."
# Calculate a percentage comparison of used blocks
# This helps determine if the sparse issues are severe or minor
original_used_kb=$(du -k "$disk_img_orig" | cut -f1)
reassembled_used_kb=$(du -k "$reassembled_file" | cut -f1)
# Calculate percentage difference in used space
if [ "$original_used_kb" -ne 0 ]; then
diff_percentage=$(echo "scale=2; ($reassembled_used_kb - $original_used_kb) * 100 / $original_used_kb" | bc)
echo " Disk usage difference: $diff_percentage% ($reassembled_used_kb KB vs $original_used_kb KB)"
# If reassembled is much smaller, this likely indicates sparse regions weren't preserved
if (( $(echo "$diff_percentage < -40" | bc -l) )); then
echo " ⚠️ WARNING: Reassembled disk uses significantly less space (>40% difference)."
echo " This indicates sparse regions weren't properly preserved and may affect VM functionality."
echo " The VM might boot but could be missing applications or data."
elif (( $(echo "$diff_percentage < -10" | bc -l) )); then
echo " ⚠️ WARNING: Reassembled disk uses less space (10-40% difference)."
echo " Some sparse regions may not be properly preserved but VM might still function correctly."
elif (( $(echo "$diff_percentage > 10" | bc -l) )); then
echo " ⚠️ WARNING: Reassembled disk uses more space (>10% difference)."
echo " This is unusual and may indicate improper sparse file handling."
else
echo " ✓ Disk usage difference is minimal (<10%). VM likely to function correctly."
fi
fi
fi
echo "Reassembled file is available at: $reassembled_file"
# If verification failed and difference is significant, try a direct copy as fallback
if [ "$original_digest" != "$reassembled_digest" ] && [ -n "$diff_percentage" ] && (( $(echo "$diff_percentage < -20" | bc -l) )); then
echo
echo "===== ATTEMPTING RECOVERY ACTION ====="
echo "Since verification failed with significant disk usage difference,"
echo "trying direct copy of disk image as a fallback method."
echo
fallback_file="$reassembly_dir/fallback_disk.img"
echo "Creating fallback disk image at: $fallback_file"
# Use rsync with sparse option if available
if command -v rsync &> /dev/null; then
echo "Using rsync with sparse option for direct copy..."
rsync -aS --progress "$disk_img_orig" "$fallback_file"
else
# Direct cp with sparse option if available
if [[ "$OSTYPE" == "darwin"* ]]; then
echo "Using cp -c (clone) for direct copy..."
cp -c "$disk_img_orig" "$fallback_file"
else
echo "Using cp --sparse=always for direct copy..."
cp --sparse=always "$disk_img_orig" "$fallback_file"
fi
fi
echo "Direct copy completed. You may want to try using this fallback disk image"
echo "instead if the reassembled one has issues: $fallback_file"
fi
fi
# --- Push Logic ---
if [[ "$dry_run" = true ]]; then
echo "=== DRY RUN MODE ==="
echo "The following files would be pushed to the registry:"
for file_info in "${files[@]}"; do
file_path="${file_info%%:*}"
file_metadata="${file_info#*:}"
file_size=$(du -h "$file_path" | cut -f1)
echo " - $file_path ($file_size) with metadata: $file_metadata"
done
if [[ -n "$image_versions" ]]; then
echo "Would push to the following versions:"
IFS=',' read -ra versions <<< "$image_versions"
for version in "${versions[@]}"; do
version=$(echo "$version" | xargs)
if [[ -z "$version" ]]; then continue; fi
echo " - ghcr.io/$organization/$image_name:$version"
done
else
echo "No versions specified for dry run. Processing completed successfully."
fi
echo "All processing tasks completed. No actual push performed."
echo "Cache directory: $cache_dir"
exit 0
fi
# Regular push logic (non-dry-run)
push_pids=()
IFS=',' read -ra versions <<< "$image_versions"
for version in "${versions[@]}"; do
@@ -368,6 +636,25 @@ if [ -f "$disk_img_orig" ]; then
else
echo "Warning: $disk_img_orig not found."
# If in dry run mode, just show what would happen
if [[ "$dry_run" = true ]]; then
echo "=== DRY RUN MODE ==="
if [ ${#files[@]} -gt 0 ]; then
echo "The following non-disk files would be pushed:"
for file_info in "${files[@]}"; do
file_path="${file_info%%:*}"
file_metadata="${file_info#*:}"
file_size=$(du -h "$file_path" | cut -f1)
echo " - $file_path ($file_size) with metadata: $file_metadata"
done
else
echo "No files found to push."
fi
echo "All processing tasks completed. No actual push performed."
exit 0
fi
# Push only config/nvram if they exist
if [ ${#files[@]} -gt 0 ]; then
echo "Pushing non-disk files..."
@@ -427,6 +714,11 @@ else
fi
fi
# Skip final status check in dry-run mode
if [[ "$dry_run" = true ]]; then
exit 0
fi
# Determine final status based on the success check *before* potential cleanup
echo # Add a newline for better readability
if [ "$all_versions_pushed" = true ]; then

View File

@@ -0,0 +1,74 @@
import ArgumentParser
import Foundation
struct Push: AsyncParsableCommand {
static let configuration = CommandConfiguration(
abstract: "Push a macOS VM to GitHub Container Registry"
)
@Argument(help: "Name of the VM to push")
var name: String
@Argument(help: "Image tag to push (format: name:tag)")
var image: String
@Option(parsing: .upToNextOption, help: "Additional tags to push the same image to")
var additionalTags: [String] = []
@Option(help: "Github Container Registry to push to. Defaults to ghcr.io")
var registry: String = "ghcr.io"
@Option(help: "Organization to push to. Defaults to trycua")
var organization: String = "trycua"
@Option(name: .customLong("storage"), help: "VM storage location to use")
var storage: String?
@Option(help: "Chunk size for large files in MB. Defaults to 512.")
var chunkSizeMb: Int = 512
@Flag(name: .long, help: "Enable verbose logging")
var verbose: Bool = false
@Flag(name: .long, help: "Prepare files without uploading to registry")
var dryRun: Bool = false
@Flag(name: .long, help: "In dry-run mode, also reassemble chunks to verify integrity")
var reassemble: Bool = true
init() {}
@MainActor
func run() async throws {
let controller = LumeController()
// Parse primary image name and tag
let components = image.split(separator: ":")
guard components.count == 2, let primaryTag = components.last else {
throw ValidationError("Invalid primary image format. Expected format: name:tag")
}
let imageName = String(components.first!)
// Combine primary and additional tags, ensuring uniqueness
var allTags: Swift.Set<String> = []
allTags.insert(String(primaryTag))
allTags.formUnion(additionalTags)
guard !allTags.isEmpty else {
throw ValidationError("At least one tag must be provided.")
}
try await controller.pushImage(
name: name,
imageName: imageName, // Pass base image name
tags: Array(allTags), // Pass array of all unique tags
registry: registry,
organization: organization,
storage: storage,
chunkSizeMb: chunkSizeMb,
verbose: verbose,
dryRun: dryRun,
reassemble: reassemble
)
}
}

View File

@@ -2,8 +2,56 @@ import ArgumentParser
import Darwin
import Foundation
import Swift
import CommonCrypto
import Compression // Add this import
// Extension to calculate SHA256 hash
extension Data {
func sha256String() -> String {
let hash = self.withUnsafeBytes { (bytes: UnsafeRawBufferPointer) -> [UInt8] in
var hash = [UInt8](repeating: 0, count: Int(CC_SHA256_DIGEST_LENGTH))
CC_SHA256(bytes.baseAddress, CC_LONG(self.count), &hash)
return hash
}
return hash.map { String(format: "%02x", $0) }.joined()
}
}
// Push-related errors
enum PushError: Error {
case uploadInitiationFailed
case blobUploadFailed
case manifestPushFailed
case authenticationFailed
case missingToken
case invalidURL
case lz4NotFound // Added error case
}
struct ChunkMetadata: Codable {
let uncompressedDigest: String
let uncompressedSize: UInt64
let compressedDigest: String
let compressedSize: Int
}
// Define struct to decode relevant parts of config.json
struct OCIManifestLayer {
let mediaType: String
let size: Int
let digest: String
let uncompressedSize: UInt64?
let uncompressedContentDigest: String?
init(mediaType: String, size: Int, digest: String, uncompressedSize: UInt64? = nil, uncompressedContentDigest: String? = nil) {
self.mediaType = mediaType
self.size = size
self.digest = digest
self.uncompressedSize = uncompressedSize
self.uncompressedContentDigest = uncompressedContentDigest
}
}
struct OCIConfig: Codable {
struct Annotations: Codable {
let uncompressedSize: String? // Use optional String
@@ -274,6 +322,43 @@ struct DownloadStats {
}
}
// Renamed struct
struct UploadStats {
let totalBytes: Int64
let uploadedBytes: Int64 // Renamed
let elapsedTime: TimeInterval
let averageSpeed: Double
let peakSpeed: Double
func formattedSummary() -> String {
let bytesStr = ByteCountFormatter.string(fromByteCount: uploadedBytes, countStyle: .file)
let avgSpeedStr = formatSpeed(averageSpeed)
let peakSpeedStr = formatSpeed(peakSpeed)
let timeStr = formatTime(elapsedTime)
return """
Upload Statistics:
- Total uploaded: \(bytesStr)
- Elapsed time: \(timeStr)
- Average speed: \(avgSpeedStr)
- Peak speed: \(peakSpeedStr)
"""
}
private func formatSpeed(_ bytesPerSecond: Double) -> String {
let formatter = ByteCountFormatter()
formatter.countStyle = .file
let bytesStr = formatter.string(fromByteCount: Int64(bytesPerSecond))
return "\(bytesStr)/s"
}
private func formatTime(_ seconds: TimeInterval) -> String {
let hours = Int(seconds) / 3600
let minutes = (Int(seconds) % 3600) / 60
let secs = Int(seconds) % 60
if hours > 0 { return String(format: "%d hours, %d minutes, %d seconds", hours, minutes, secs) }
else if minutes > 0 { return String(format: "%d minutes, %d seconds", minutes, secs) }
else { return String(format: "%d seconds", secs) }
}
}
actor TaskCounter {
private var count: Int = 0
@@ -285,12 +370,17 @@ actor TaskCounter {
class ImageContainerRegistry: @unchecked Sendable {
private let registry: String
private let organization: String
private let progress = ProgressTracker()
private let downloadProgress = ProgressTracker() // Renamed for clarity
private let uploadProgress = UploadProgressTracker() // Added upload tracker
private let cacheDirectory: URL
private let downloadLock = NSLock()
private var activeDownloads: [String] = []
private let cachingEnabled: Bool
// Constants for zero-skipping write logic
private static let holeGranularityBytes = 4 * 1024 * 1024 // 4MB block size for checking zeros
private static let zeroChunk = Data(count: holeGranularityBytes)
// Add the createProgressBar function here as a private method
private func createProgressBar(progress: Double, width: Int = 30) -> String {
let completedWidth = Int(progress * Double(width))
@@ -613,7 +703,7 @@ class ImageContainerRegistry: @unchecked Sendable {
$0.mediaType != "application/vnd.oci.empty.v1+json"
}.count
let totalSize = manifest.layers.reduce(0) { $0 + Int64($1.size) }
await progress.setTotal(totalSize, files: totalFiles)
await downloadProgress.setTotal(totalSize, files: totalFiles)
// Process layers with limited concurrency
Logger.info("Processing Image layers")
@@ -671,7 +761,7 @@ class ImageContainerRegistry: @unchecked Sendable {
// Still need to account for progress
group.addTask { [self] in
await counter.increment()
await progress.addProgress(Int64(size))
await downloadProgress.addProgress(Int64(size))
await counter.decrement()
return Int64(size)
}
@@ -686,7 +776,7 @@ class ImageContainerRegistry: @unchecked Sendable {
if FileManager.default.fileExists(atPath: cachedLayer.path) {
try FileManager.default.copyItem(at: cachedLayer, to: partURL)
await progress.addProgress(Int64(size))
await downloadProgress.addProgress(Int64(size))
} else {
// Check if this layer is already being downloaded and we're not skipping cache
if isDownloading(digest) {
@@ -696,7 +786,7 @@ class ImageContainerRegistry: @unchecked Sendable {
{
try FileManager.default.copyItem(
at: cachedLayer, to: partURL)
await progress.addProgress(Int64(size))
await downloadProgress.addProgress(Int64(size))
return Int64(size)
}
}
@@ -711,7 +801,7 @@ class ImageContainerRegistry: @unchecked Sendable {
token: token,
to: partURL,
maxRetries: 5,
progress: progress,
progress: downloadProgress,
manifestId: manifestId
)
@@ -758,7 +848,7 @@ class ImageContainerRegistry: @unchecked Sendable {
if FileManager.default.fileExists(atPath: cachedLayer.path) {
try FileManager.default.copyItem(at: cachedLayer, to: outputURL)
await progress.addProgress(Int64(size))
await downloadProgress.addProgress(Int64(size))
} else {
// Check if this layer is already being downloaded and we're not skipping cache
if isDownloading(digest) {
@@ -767,7 +857,7 @@ class ImageContainerRegistry: @unchecked Sendable {
if FileManager.default.fileExists(atPath: cachedLayer.path) {
try FileManager.default.copyItem(
at: cachedLayer, to: outputURL)
await progress.addProgress(Int64(size))
await downloadProgress.addProgress(Int64(size))
return Int64(size)
}
}
@@ -782,7 +872,7 @@ class ImageContainerRegistry: @unchecked Sendable {
token: token,
to: outputURL,
maxRetries: 5,
progress: progress,
progress: downloadProgress,
manifestId: manifestId
)
@@ -808,7 +898,7 @@ class ImageContainerRegistry: @unchecked Sendable {
Logger.info("") // New line after progress
// Display download statistics
let stats = await progress.getDownloadStats()
let stats = await downloadProgress.getDownloadStats()
Logger.info(stats.formattedSummary())
// Parse config.json to get uncompressed size *before* reassembly
@@ -1866,17 +1956,52 @@ class ImageContainerRegistry: @unchecked Sendable {
}
private func getToken(repository: String) async throws -> String {
let url = URL(string: "https://\(self.registry)/token")!
.appending(queryItems: [
URLQueryItem(name: "service", value: self.registry),
URLQueryItem(name: "scope", value: "repository:\(repository):pull"),
])
let encodedRepo = repository.addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed) ?? repository
// Request both pull and push scope for uploads
let url = URL(string: "https://\(self.registry)/token?scope=repository:\(encodedRepo):pull,push&service=\(self.registry)")!
var request = URLRequest(url: url)
request.httpMethod = "GET" // Token endpoint uses GET
request.setValue("application/json", forHTTPHeaderField: "Accept")
let (data, _) = try await URLSession.shared.data(from: url)
let json = try JSONSerialization.jsonObject(with: data) as? [String: Any]
guard let token = json?["token"] as? String else {
throw PullError.tokenFetchFailed
// *** Add Basic Authentication Header if credentials exist ***
let (username, password) = getCredentialsFromEnvironment()
if let username = username, let password = password, !username.isEmpty, !password.isEmpty {
let authString = "\(username):\(password)"
if let authData = authString.data(using: .utf8) {
let base64Auth = authData.base64EncodedString()
request.setValue("Basic \(base64Auth)", forHTTPHeaderField: "Authorization")
Logger.info("Adding Basic Authentication header to token request.")
} else {
Logger.error("Failed to encode credentials for Basic Auth.")
}
} else {
Logger.info("No credentials found in environment for token request.")
// Allow anonymous request for pull scope, but push scope likely requires auth
}
// *** End Basic Auth addition ***
let (data, response) = try await URLSession.shared.data(for: request)
// Check response status code *before* parsing JSON
guard let httpResponse = response as? HTTPURLResponse else {
throw PushError.authenticationFailed // Or a more generic network error
}
guard httpResponse.statusCode == 200 else {
// Log detailed error including status code and potentially response body
let responseBody = String(data: data, encoding: .utf8) ?? "(Could not decode body)"
Logger.error("Token request failed with status code: \(httpResponse.statusCode). Response: \(responseBody)")
// Throw specific error based on status if needed (e.g., 401 for unauthorized)
throw PushError.authenticationFailed
}
let jsonResponse = try JSONSerialization.jsonObject(with: data) as? [String: Any]
guard let token = jsonResponse?["token"] as? String ?? jsonResponse?["access_token"] as? String else {
Logger.error("Token not found in registry response.")
throw PushError.missingToken
}
return token
}
@@ -2618,4 +2743,746 @@ class ImageContainerRegistry: @unchecked Sendable {
return 0
}
// New push method
public func push(
vmDirPath: String,
imageName: String,
tags: [String],
chunkSizeMb: Int = 512,
verbose: Bool = false,
dryRun: Bool = false,
reassemble: Bool = false
) async throws {
Logger.info(
"Pushing VM to registry",
metadata: [
"vm_path": vmDirPath,
"imageName": imageName,
"tags": "\(tags.joined(separator: ", "))", // Log all tags
"registry": registry,
"organization": organization,
"chunk_size": "\(chunkSizeMb)MB",
"dry_run": "\(dryRun)",
"reassemble": "\(reassemble)"
])
// Remove tag parsing here, imageName is now passed directly
// let components = image.split(separator: ":") ...
// let imageTag = String(tag)
// Get authentication token only if not in dry-run mode
var token: String = ""
if !dryRun {
Logger.info("Getting registry authentication token")
token = try await getToken(repository: "\(self.organization)/\(imageName)")
} else {
Logger.info("Dry run mode: skipping authentication token request")
}
// Create working directory inside the VM folder for caching/resuming
let workDir = URL(fileURLWithPath: vmDirPath).appendingPathComponent(".lume_push_cache")
try FileManager.default.createDirectory(at: workDir, withIntermediateDirectories: true)
Logger.info("Using push cache directory: \(workDir.path)")
// Get VM files that need to be pushed using vmDirPath
let diskPath = URL(fileURLWithPath: vmDirPath).appendingPathComponent("disk.img")
let configPath = URL(fileURLWithPath: vmDirPath).appendingPathComponent("config.json")
let nvramPath = URL(fileURLWithPath: vmDirPath).appendingPathComponent("nvram.bin")
var layers: [OCIManifestLayer] = []
var uncompressedDiskSize: UInt64? = nil
// Process config.json
let cachedConfigPath = workDir.appendingPathComponent("config.json")
var configDigest: String? = nil
var configSize: Int? = nil
if FileManager.default.fileExists(atPath: cachedConfigPath.path) {
Logger.info("Using cached config.json")
do {
let configData = try Data(contentsOf: cachedConfigPath)
configDigest = "sha256:" + configData.sha256String()
configSize = configData.count
// Try to get uncompressed disk size from cached config
if let vmConfig = try? JSONDecoder().decode(VMConfig.self, from: configData) {
uncompressedDiskSize = vmConfig.diskSize
Logger.info("Found disk size in cached config: \(uncompressedDiskSize ?? 0) bytes")
}
} catch {
Logger.error("Failed to read cached config.json: \(error). Will re-process.")
// Force re-processing by leaving configDigest nil
}
} else if FileManager.default.fileExists(atPath: configPath.path) {
Logger.info("Processing config.json")
let configData = try Data(contentsOf: configPath)
configDigest = "sha256:" + configData.sha256String()
configSize = configData.count
try configData.write(to: cachedConfigPath) // Save to cache
// Try to get uncompressed disk size from original config
if let vmConfig = try? JSONDecoder().decode(VMConfig.self, from: configData) {
uncompressedDiskSize = vmConfig.diskSize
Logger.info("Found disk size in config: \(uncompressedDiskSize ?? 0) bytes")
}
}
if var digest = configDigest, let size = configSize { // Use 'var' to modify if uploaded
if !dryRun {
// Upload only if not in dry-run mode and blob doesn't exist
if !(try await blobExists(repository: "\(self.organization)/\(imageName)", digest: digest, token: token)) {
Logger.info("Uploading config.json blob")
let configData = try Data(contentsOf: cachedConfigPath) // Read from cache for upload
digest = try await uploadBlobFromData(
repository: "\(self.organization)/\(imageName)",
data: configData,
token: token
)
} else {
Logger.info("Config blob already exists on registry")
}
}
// Add config layer
layers.append(OCIManifestLayer(
mediaType: "application/vnd.oci.image.config.v1+json",
size: size,
digest: digest
))
}
// Process nvram.bin
let cachedNvramPath = workDir.appendingPathComponent("nvram.bin")
var nvramDigest: String? = nil
var nvramSize: Int? = nil
if FileManager.default.fileExists(atPath: cachedNvramPath.path) {
Logger.info("Using cached nvram.bin")
do {
let nvramData = try Data(contentsOf: cachedNvramPath)
nvramDigest = "sha256:" + nvramData.sha256String()
nvramSize = nvramData.count
} catch {
Logger.error("Failed to read cached nvram.bin: \(error). Will re-process.")
}
} else if FileManager.default.fileExists(atPath: nvramPath.path) {
Logger.info("Processing nvram.bin")
let nvramData = try Data(contentsOf: nvramPath)
nvramDigest = "sha256:" + nvramData.sha256String()
nvramSize = nvramData.count
try nvramData.write(to: cachedNvramPath) // Save to cache
}
if var digest = nvramDigest, let size = nvramSize { // Use 'var'
if !dryRun {
// Upload only if not in dry-run mode and blob doesn't exist
if !(try await blobExists(repository: "\(self.organization)/\(imageName)", digest: digest, token: token)) {
Logger.info("Uploading nvram.bin blob")
let nvramData = try Data(contentsOf: cachedNvramPath) // Read from cache
digest = try await uploadBlobFromData(
repository: "\(self.organization)/\(imageName)",
data: nvramData,
token: token
)
} else {
Logger.info("NVRAM blob already exists on registry")
}
}
// Add nvram layer
layers.append(OCIManifestLayer(
mediaType: "application/octet-stream",
size: size,
digest: digest
))
}
// Process disk.img
if FileManager.default.fileExists(atPath: diskPath.path) {
let diskAttributes = try FileManager.default.attributesOfItem(atPath: diskPath.path)
let diskSize = diskAttributes[.size] as? UInt64 ?? 0
let actualDiskSize = uncompressedDiskSize ?? diskSize
Logger.info("Processing disk.img in chunks", metadata: ["disk_path": diskPath.path, "disk_size": "\(diskSize) bytes", "actual_size": "\(actualDiskSize) bytes", "chunk_size": "\(chunkSizeMb)MB"])
let chunksDir = workDir.appendingPathComponent("disk.img.parts")
try FileManager.default.createDirectory(at: chunksDir, withIntermediateDirectories: true)
let chunkSizeBytes = chunkSizeMb * 1024 * 1024
let totalChunks = Int((diskSize + UInt64(chunkSizeBytes) - 1) / UInt64(chunkSizeBytes))
Logger.info("Splitting disk into \(totalChunks) chunks")
let fileHandle = try FileHandle(forReadingFrom: diskPath)
defer { try? fileHandle.close() }
var pushedDiskLayers: [(index: Int, layer: OCIManifestLayer)] = []
var diskChunks: [(index: Int, path: URL, digest: String)] = []
try await withThrowingTaskGroup(of: (Int, OCIManifestLayer, URL, String).self) { group in
let maxConcurrency = 4
for chunkIndex in 0..<totalChunks {
if chunkIndex >= maxConcurrency { if let res = try await group.next() { pushedDiskLayers.append((res.0, res.1)); diskChunks.append((res.0, res.2, res.3)) } }
group.addTask { [token, verbose, dryRun, organization, imageName] in
let chunkIndex = chunkIndex
let chunkPath = chunksDir.appendingPathComponent("chunk.\(chunkIndex)")
let metadataPath = chunksDir.appendingPathComponent("chunk_metadata.\(chunkIndex).json")
var layer: OCIManifestLayer? = nil
var finalCompressedDigest: String? = nil
if FileManager.default.fileExists(atPath: metadataPath.path), FileManager.default.fileExists(atPath: chunkPath.path) {
do {
let metadataData = try Data(contentsOf: metadataPath)
let metadata = try JSONDecoder().decode(ChunkMetadata.self, from: metadataData)
Logger.info("Resuming chunk \(chunkIndex + 1)/\(totalChunks) from cache")
finalCompressedDigest = metadata.compressedDigest
if !dryRun { if !(try await self.blobExists(repository: "\(organization)/\(imageName)", digest: metadata.compressedDigest, token: token)) { Logger.info("Uploading cached chunk \(chunkIndex + 1) blob"); _ = try await self.uploadBlobFromPath(repository: "\(organization)/\(imageName)", path: chunkPath, digest: metadata.compressedDigest, token: token) } else { Logger.info("Chunk \(chunkIndex + 1) blob already exists on registry") } }
layer = OCIManifestLayer(mediaType: "application/octet-stream+lz4", size: metadata.compressedSize, digest: metadata.compressedDigest, uncompressedSize: metadata.uncompressedSize, uncompressedContentDigest: metadata.uncompressedDigest)
} catch { Logger.info("Failed to load cached metadata/chunk for index \(chunkIndex): \(error). Re-processing."); finalCompressedDigest = nil; layer = nil }
}
if layer == nil {
Logger.info("Processing chunk \(chunkIndex + 1)/\(totalChunks)")
let localFileHandle = try FileHandle(forReadingFrom: diskPath)
defer { try? localFileHandle.close() }
try localFileHandle.seek(toOffset: UInt64(chunkIndex * chunkSizeBytes))
let chunkData = try localFileHandle.read(upToCount: chunkSizeBytes) ?? Data()
let uncompressedSize = UInt64(chunkData.count)
let uncompressedDigest = "sha256:" + chunkData.sha256String()
let compressedData = try (chunkData as NSData).compressed(using: .lz4) as Data
let compressedSize = compressedData.count
let compressedDigest = "sha256:" + compressedData.sha256String()
try compressedData.write(to: chunkPath)
let metadata = ChunkMetadata(uncompressedDigest: uncompressedDigest, uncompressedSize: uncompressedSize, compressedDigest: compressedDigest, compressedSize: compressedSize)
let metadataData = try JSONEncoder().encode(metadata)
try metadataData.write(to: metadataPath)
finalCompressedDigest = compressedDigest
if !dryRun { if !(try await self.blobExists(repository: "\(organization)/\(imageName)", digest: compressedDigest, token: token)) { Logger.info("Uploading processed chunk \(chunkIndex + 1) blob"); _ = try await self.uploadBlobFromPath(repository: "\(organization)/\(imageName)", path: chunkPath, digest: compressedDigest, token: token) } else { Logger.info("Chunk \(chunkIndex + 1) blob already exists on registry (processed fresh)") } }
layer = OCIManifestLayer(mediaType: "application/octet-stream+lz4", size: compressedSize, digest: compressedDigest, uncompressedSize: uncompressedSize, uncompressedContentDigest: uncompressedDigest)
}
guard let finalLayer = layer, let finalDigest = finalCompressedDigest else { throw PushError.blobUploadFailed }
if verbose { Logger.info("Finished chunk \(chunkIndex + 1)/\(totalChunks)") }
return (chunkIndex, finalLayer, chunkPath, finalDigest)
}
}
for try await (index, layer, path, digest) in group { pushedDiskLayers.append((index, layer)); diskChunks.append((index, path, digest)) }
}
layers.append(contentsOf: pushedDiskLayers.sorted { $0.index < $1.index }.map { $0.layer })
diskChunks.sort { $0.index < $1.index }
Logger.info("All disk chunks processed successfully")
// --- Calculate Total Upload Size & Initialize Tracker ---
if !dryRun {
var totalUploadSizeBytes: Int64 = 0
var totalUploadFiles: Int = 0
// Add config size if it exists
if let size = configSize {
totalUploadSizeBytes += Int64(size)
totalUploadFiles += 1
}
// Add nvram size if it exists
if let size = nvramSize {
totalUploadSizeBytes += Int64(size)
totalUploadFiles += 1
}
// Add sizes of all compressed disk chunks
let allChunkSizes = diskChunks.compactMap { try? FileManager.default.attributesOfItem(atPath: $0.path.path)[.size] as? Int64 ?? 0 }
totalUploadSizeBytes += allChunkSizes.reduce(0, +)
totalUploadFiles += totalChunks // Use totalChunks calculated earlier
if totalUploadSizeBytes > 0 {
Logger.info("Initializing upload progress: \(totalUploadFiles) files, total size: \(ByteCountFormatter.string(fromByteCount: totalUploadSizeBytes, countStyle: .file))")
await uploadProgress.setTotal(totalUploadSizeBytes, files: totalUploadFiles)
// Print initial progress bar
print("[░░░░░░░░░░░░░░░░░░░░] 0% (0/\(totalUploadFiles)) | Initializing upload... | ETA: calculating... ")
fflush(stdout)
} else {
Logger.info("No files marked for upload.")
}
}
// --- End Size Calculation & Init ---
// Perform reassembly verification if requested in dry-run mode
if dryRun && reassemble {
Logger.info("=== REASSEMBLY MODE ===")
Logger.info("Reassembling chunks to verify integrity...")
let reassemblyDir = workDir.appendingPathComponent("reassembly")
try FileManager.default.createDirectory(at: reassemblyDir, withIntermediateDirectories: true)
let reassembledFile = reassemblyDir.appendingPathComponent("reassembled_disk.img")
Logger.info("Pre-allocating sparse file of \(ByteCountFormatter.string(fromByteCount: Int64(actualDiskSize), countStyle: .file))...")
if FileManager.default.fileExists(atPath: reassembledFile.path) { try FileManager.default.removeItem(at: reassembledFile) }
guard FileManager.default.createFile(atPath: reassembledFile.path, contents: nil) else { throw PushError.invalidURL }
let outputHandle = try FileHandle(forWritingTo: reassembledFile)
defer { try? outputHandle.close() }
try outputHandle.truncate(atOffset: actualDiskSize)
var currentOffset: UInt64 = 0
for (index, cachedChunkPath, _) in diskChunks {
Logger.info("Decompressing & writing part \(index + 1)/\(diskChunks.count): \(cachedChunkPath.lastPathComponent) at offset \(currentOffset)...")
let decompressedBytesWritten = try decompressChunkAndWriteSparse(inputPath: cachedChunkPath.path, outputHandle: outputHandle, startOffset: currentOffset)
currentOffset += decompressedBytesWritten
}
Logger.info("Verifying reassembled file...")
let originalSize = diskSize
let originalDigest = calculateSHA256(filePath: diskPath.path)
let reassembledAttributes = try FileManager.default.attributesOfItem(atPath: reassembledFile.path)
let reassembledSize = reassembledAttributes[.size] as? UInt64 ?? 0
let reassembledDigest = calculateSHA256(filePath: reassembledFile.path)
let originalActualSize = getActualDiskUsage(path: diskPath.path)
let reassembledActualSize = getActualDiskUsage(path: reassembledFile.path)
Logger.info("Results:")
Logger.info(" Original size: \(ByteCountFormatter.string(fromByteCount: Int64(originalSize), countStyle: .file)) (\(originalSize) bytes)")
Logger.info(" Reassembled size: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledSize), countStyle: .file)) (\(reassembledSize) bytes)")
Logger.info(" Original digest: \(originalDigest)")
Logger.info(" Reassembled digest: \(reassembledDigest)")
Logger.info(" Original: Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(originalSize), countStyle: .file)), Actual disk usage: \(ByteCountFormatter.string(fromByteCount: Int64(originalActualSize), countStyle: .file))")
Logger.info(" Reassembled: Apparent size: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledSize), countStyle: .file)), Actual disk usage: \(ByteCountFormatter.string(fromByteCount: Int64(reassembledActualSize), countStyle: .file))")
if originalDigest == reassembledDigest { Logger.info("✅ VERIFICATION SUCCESSFUL: Files are identical") } else { Logger.info("❌ VERIFICATION FAILED: Files differ"); if originalSize != reassembledSize { Logger.info(" Size mismatch: Original \(originalSize) bytes, Reassembled \(reassembledSize) bytes") }; Logger.info("Attempting to identify differences..."); Logger.info("NOTE: This might be a sparse file issue. The content may be identical, but sparse regions"); Logger.info(" may be handled differently between the original and reassembled files."); if originalActualSize > 0 { let diffPercentage = ((Double(reassembledActualSize) - Double(originalActualSize)) / Double(originalActualSize)) * 100.0; Logger.info(" Disk usage difference: \(String(format: "%.2f", diffPercentage))%"); if diffPercentage < -40 { Logger.info(" ⚠️ WARNING: Reassembled disk uses significantly less space (>40% difference)."); Logger.info(" This indicates sparse regions weren't properly preserved and may affect VM functionality.") } else if diffPercentage < -10 { Logger.info(" ⚠️ WARNING: Reassembled disk uses less space (10-40% difference)."); Logger.info(" Some sparse regions may not be properly preserved but VM might still function correctly.") } else if diffPercentage > 10 { Logger.info(" ⚠️ WARNING: Reassembled disk uses more space (>10% difference)."); Logger.info(" This is unusual and may indicate improper sparse file handling.") } else { Logger.info(" ✓ Disk usage difference is minimal (<10%). VM likely to function correctly.") } } }
Logger.info("Reassembled file is available at: \(reassembledFile.path)")
if originalDigest != reassembledDigest { Logger.info(""); Logger.info("===== ATTEMPTING RECOVERY ACTION ====="); Logger.info("Since verification failed, trying direct copy as a fallback method."); let fallbackFile = reassemblyDir.appendingPathComponent("fallback_disk.img"); Logger.info("Creating fallback disk image at: \(fallbackFile.path)"); let rsyncProcess = Process(); rsyncProcess.executableURL = URL(fileURLWithPath: "/usr/bin/rsync"); rsyncProcess.arguments = ["-aS", "--progress", diskPath.path, fallbackFile.path]; try rsyncProcess.run(); rsyncProcess.waitUntilExit(); if rsyncProcess.terminationStatus == 0 { Logger.info("Direct copy completed. You may want to try using this fallback disk image"); Logger.info("instead if the reassembled one has issues: \(fallbackFile.path)") } else { Logger.info("Direct copy failed. Attempting with cp -c command..."); let cpProcess = Process(); cpProcess.executableURL = URL(fileURLWithPath: "/bin/cp"); cpProcess.arguments = ["-c", diskPath.path, fallbackFile.path]; try cpProcess.run(); cpProcess.waitUntilExit(); if cpProcess.terminationStatus == 0 { Logger.info("Direct copy completed with cp -c. Fallback image available at: \(fallbackFile.path)") } else { Logger.info("All recovery attempts failed.") } } }
}
}
// --- Manifest Creation & Push ---
let manifest = createManifest(
layers: layers,
configLayerIndex: layers.firstIndex(where: { $0.mediaType == "application/vnd.oci.image.config.v1+json" }),
uncompressedDiskSize: uncompressedDiskSize
)
// Push manifest only if not in dry-run mode
if !dryRun {
Logger.info("Pushing manifest(s)") // Updated log
// Serialize the manifest dictionary to Data first
let manifestData = try JSONSerialization.data(withJSONObject: manifest, options: [.prettyPrinted, .sortedKeys])
// Loop through tags to push the same manifest data
for tag in tags {
Logger.info("Pushing manifest for tag: \(tag)")
try await pushManifest(
repository: "\(self.organization)/\(imageName)",
tag: tag, // Use the current tag from the loop
manifest: manifestData, // Pass the serialized Data
token: token // Token should be in scope here now
)
}
}
// Print final upload summary if not dry run
if !dryRun {
let stats = await uploadProgress.getUploadStats()
Logger.info("\n\(stats.formattedSummary())") // Add newline for separation
}
// Clean up cache directory only on successful non-dry-run push
}
private func createManifest(layers: [OCIManifestLayer], configLayerIndex: Int?, uncompressedDiskSize: UInt64?) -> [String: Any] {
var manifest: [String: Any] = [
"schemaVersion": 2,
"mediaType": "application/vnd.oci.image.manifest.v1+json",
"layers": layers.map { layer in
var layerDict: [String: Any] = [
"mediaType": layer.mediaType,
"size": layer.size,
"digest": layer.digest
]
if let uncompressedSize = layer.uncompressedSize {
var annotations: [String: String] = [:]
annotations["org.trycua.lume.uncompressed-size"] = "\(uncompressedSize)" // Updated prefix
if let digest = layer.uncompressedContentDigest {
annotations["org.trycua.lume.uncompressed-content-digest"] = digest // Updated prefix
}
layerDict["annotations"] = annotations
}
return layerDict
}
]
// Add config reference if available
if let configIndex = configLayerIndex {
let configLayer = layers[configIndex]
manifest["config"] = [
"mediaType": configLayer.mediaType,
"size": configLayer.size,
"digest": configLayer.digest
]
}
// Add annotations
var annotations: [String: String] = [:]
annotations["org.trycua.lume.upload-time"] = ISO8601DateFormatter().string(from: Date()) // Updated prefix
if let diskSize = uncompressedDiskSize {
annotations["org.trycua.lume.uncompressed-disk-size"] = "\(diskSize)" // Updated prefix
}
manifest["annotations"] = annotations
return manifest
}
private func uploadBlobFromData(repository: String, data: Data, token: String) async throws -> String {
// Calculate digest
let digest = "sha256:" + data.sha256String()
// Check if blob already exists
if try await blobExists(repository: repository, digest: digest, token: token) {
Logger.info("Blob already exists: \(digest)")
return digest
}
// Initiate upload
let uploadURL = try await startBlobUpload(repository: repository, token: token)
// Upload blob
try await uploadBlob(url: uploadURL, data: data, digest: digest, token: token)
// Report progress
await uploadProgress.addProgress(Int64(data.count))
return digest
}
private func uploadBlobFromPath(repository: String, path: URL, digest: String, token: String) async throws -> String {
// Check if blob already exists
if try await blobExists(repository: repository, digest: digest, token: token) {
Logger.info("Blob already exists: \(digest)")
return digest
}
// Initiate upload
let uploadURL = try await startBlobUpload(repository: repository, token: token)
// Load data from file
let data = try Data(contentsOf: path)
// Upload blob
try await uploadBlob(url: uploadURL, data: data, digest: digest, token: token)
// Report progress
await uploadProgress.addProgress(Int64(data.count))
return digest
}
private func blobExists(repository: String, digest: String, token: String) async throws -> Bool {
let url = URL(string: "https://\(registry)/v2/\(repository)/blobs/\(digest)")!
var request = URLRequest(url: url)
request.httpMethod = "HEAD"
request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization")
let (_, response) = try await URLSession.shared.data(for: request)
if let httpResponse = response as? HTTPURLResponse {
return httpResponse.statusCode == 200
}
return false
}
private func startBlobUpload(repository: String, token: String) async throws -> URL {
let url = URL(string: "https://\(registry)/v2/\(repository)/blobs/uploads/")!
var request = URLRequest(url: url)
request.httpMethod = "POST"
request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization")
request.setValue("0", forHTTPHeaderField: "Content-Length") // Explicitly set Content-Length to 0 for POST
let (_, response) = try await URLSession.shared.data(for: request)
guard let httpResponse = response as? HTTPURLResponse,
httpResponse.statusCode == 202,
let locationString = httpResponse.value(forHTTPHeaderField: "Location") else {
// Log response details on failure
let responseBody = String(data: (try? await URLSession.shared.data(for: request).0) ?? Data(), encoding: .utf8) ?? "(No Body)"
Logger.error("Failed to initiate blob upload. Status: \( (response as? HTTPURLResponse)?.statusCode ?? 0 ). Headers: \( (response as? HTTPURLResponse)?.allHeaderFields ?? [:] ). Body: \(responseBody)")
throw PushError.uploadInitiationFailed
}
// Construct the base URL for the registry
guard let baseRegistryURL = URL(string: "https://\(registry)") else {
Logger.error("Failed to create base registry URL from: \(registry)")
throw PushError.invalidURL
}
// Create the final upload URL, resolving the location against the base URL
guard let uploadURL = URL(string: locationString, relativeTo: baseRegistryURL) else {
Logger.error("Failed to create absolute upload URL from location: \(locationString) relative to base: \(baseRegistryURL.absoluteString)")
throw PushError.invalidURL
}
Logger.info("Blob upload initiated. Upload URL: \(uploadURL.absoluteString)")
return uploadURL.absoluteURL // Ensure it's absolute
}
private func uploadBlob(url: URL, data: Data, digest: String, token: String) async throws {
var components = URLComponents(url: url, resolvingAgainstBaseURL: true)!
// Add digest parameter
var queryItems = components.queryItems ?? []
queryItems.append(URLQueryItem(name: "digest", value: digest))
components.queryItems = queryItems
guard let uploadURL = components.url else {
throw PushError.invalidURL
}
var request = URLRequest(url: uploadURL)
request.httpMethod = "PUT"
request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization")
request.setValue("application/octet-stream", forHTTPHeaderField: "Content-Type")
request.setValue("\(data.count)", forHTTPHeaderField: "Content-Length")
request.httpBody = data
let (_, response) = try await URLSession.shared.data(for: request)
guard let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 201 else {
throw PushError.blobUploadFailed
}
}
private func pushManifest(repository: String, tag: String, manifest: Data, token: String) async throws {
let url = URL(string: "https://\(registry)/v2/\(repository)/manifests/\(tag)")!
var request = URLRequest(url: url)
request.httpMethod = "PUT"
request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization")
request.setValue("application/vnd.oci.image.manifest.v1+json", forHTTPHeaderField: "Content-Type")
request.httpBody = manifest
let (_, response) = try await URLSession.shared.data(for: request)
guard let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 201 else {
throw PushError.manifestPushFailed
}
}
private func getCredentialsFromEnvironment() -> (String?, String?) {
let username = ProcessInfo.processInfo.environment["GITHUB_USERNAME"] ??
ProcessInfo.processInfo.environment["GHCR_USERNAME"]
let password = ProcessInfo.processInfo.environment["GITHUB_TOKEN"] ??
ProcessInfo.processInfo.environment["GHCR_TOKEN"]
return (username, password)
}
// Add these helper methods for dry-run and reassemble implementation
// NEW Helper function using Compression framework and sparse writing
private func decompressChunkAndWriteSparse(inputPath: String, outputHandle: FileHandle, startOffset: UInt64) throws -> UInt64 {
guard FileManager.default.fileExists(atPath: inputPath) else {
Logger.error("Compressed chunk not found at: \(inputPath)")
return 0 // Or throw an error
}
let sourceData = try Data(contentsOf: URL(fileURLWithPath: inputPath), options: .alwaysMapped)
var currentWriteOffset = startOffset
var totalDecompressedBytes: UInt64 = 0
var sourceReadOffset = 0 // Keep track of how much compressed data we've provided
// Use the initializer with the readingFrom closure
let filter = try InputFilter(.decompress, using: .lz4) { (length: Int) -> Data? in
let bytesAvailable = sourceData.count - sourceReadOffset
if bytesAvailable == 0 {
return nil // No more data
}
let bytesToRead = min(length, bytesAvailable)
let chunk = sourceData.subdata(in: sourceReadOffset ..< sourceReadOffset + bytesToRead)
sourceReadOffset += bytesToRead
return chunk
}
// Process the decompressed output by reading from the filter
while let decompressedData = try filter.readData(ofLength: Self.holeGranularityBytes) {
if decompressedData.isEmpty { break } // End of stream
// Check if the chunk is all zeros
if decompressedData.count == Self.holeGranularityBytes && decompressedData == Self.zeroChunk {
// It's a zero chunk, just advance the offset, don't write
currentWriteOffset += UInt64(decompressedData.count)
} else {
// Not a zero chunk (or a partial chunk at the end), write it
try outputHandle.seek(toOffset: currentWriteOffset)
try outputHandle.write(contentsOf: decompressedData)
currentWriteOffset += UInt64(decompressedData.count)
}
totalDecompressedBytes += UInt64(decompressedData.count)
}
// No explicit finalize needed when initialized with source data
return totalDecompressedBytes
}
// Helper function to calculate SHA256 hash of a file
private func calculateSHA256(filePath: String) -> String {
guard FileManager.default.fileExists(atPath: filePath) else {
return "file-not-found"
}
let process = Process()
process.executableURL = URL(fileURLWithPath: "/usr/bin/shasum")
process.arguments = ["-a", "256", filePath]
let outputPipe = Pipe()
process.standardOutput = outputPipe
do {
try process.run()
process.waitUntilExit()
if let data = try outputPipe.fileHandleForReading.readToEnd(),
let output = String(data: data, encoding: .utf8) {
return output.components(separatedBy: " ").first ?? "hash-calculation-failed"
}
} catch {
Logger.error("SHA256 calculation failed: \(error)")
}
return "hash-calculation-failed"
}
}
actor UploadProgressTracker {
private var totalBytes: Int64 = 0
private var uploadedBytes: Int64 = 0 // Renamed
private var progressLogger = ProgressLogger(threshold: 0.01)
private var totalFiles: Int = 0 // Keep track of total items
private var completedFiles: Int = 0 // Keep track of completed items
// Upload speed tracking
private var startTime: Date = Date()
private var lastUpdateTime: Date = Date()
private var lastUpdateBytes: Int64 = 0
private var speedSamples: [Double] = []
private var peakSpeed: Double = 0
private var totalElapsedTime: TimeInterval = 0
// Smoothing factor for speed calculation
private var speedSmoothing: Double = 0.3
private var smoothedSpeed: Double = 0
func setTotal(_ total: Int64, files: Int) {
totalBytes = total
totalFiles = files
startTime = Date()
lastUpdateTime = startTime
uploadedBytes = 0 // Reset uploaded bytes
completedFiles = 0 // Reset completed files
smoothedSpeed = 0
speedSamples = []
peakSpeed = 0
totalElapsedTime = 0
}
func addProgress(_ bytes: Int64) {
uploadedBytes += bytes
completedFiles += 1 // Increment completed files count
let now = Date()
let elapsed = now.timeIntervalSince(lastUpdateTime)
// Show first progress update immediately, then throttle updates
let shouldUpdate = (uploadedBytes <= bytes) || (elapsed >= 0.5) || (completedFiles == totalFiles)
if shouldUpdate && totalBytes > 0 { // Ensure totalBytes is set
let currentSpeed = Double(uploadedBytes - lastUpdateBytes) / max(elapsed, 0.001)
speedSamples.append(currentSpeed)
// Cap samples array
if speedSamples.count > 20 {
speedSamples.removeFirst(speedSamples.count - 20)
}
peakSpeed = max(peakSpeed, currentSpeed)
// Apply exponential smoothing
if smoothedSpeed == 0 { smoothedSpeed = currentSpeed }
else { smoothedSpeed = speedSmoothing * currentSpeed + (1 - speedSmoothing) * smoothedSpeed }
let recentAvgSpeed = calculateAverageSpeed()
let totalElapsed = now.timeIntervalSince(startTime)
let overallAvgSpeed = totalElapsed > 0 ? Double(uploadedBytes) / totalElapsed : 0
let progress = totalBytes > 0 ? Double(uploadedBytes) / Double(totalBytes) : 1.0 // Avoid division by zero
logSpeedProgress(
current: progress,
currentSpeed: currentSpeed,
averageSpeed: recentAvgSpeed,
smoothedSpeed: smoothedSpeed,
overallSpeed: overallAvgSpeed,
peakSpeed: peakSpeed,
context: "Uploading Image" // Changed context
)
lastUpdateTime = now
lastUpdateBytes = uploadedBytes
totalElapsedTime = totalElapsed
}
}
private func calculateAverageSpeed() -> Double {
guard !speedSamples.isEmpty else { return 0 }
var totalWeight = 0.0
var weightedSum = 0.0
let samples = speedSamples.suffix(min(8, speedSamples.count))
for (index, speed) in samples.enumerated() {
let weight = Double(index + 1)
weightedSum += speed * weight
totalWeight += weight
}
return totalWeight > 0 ? weightedSum / totalWeight : 0
}
// Use the UploadStats struct
func getUploadStats() -> UploadStats {
let avgSpeed = totalElapsedTime > 0 ? Double(uploadedBytes) / totalElapsedTime : 0
return UploadStats(
totalBytes: totalBytes,
uploadedBytes: uploadedBytes, // Renamed
elapsedTime: totalElapsedTime,
averageSpeed: avgSpeed,
peakSpeed: peakSpeed
)
}
private func logSpeedProgress(
current: Double,
currentSpeed: Double,
averageSpeed: Double,
smoothedSpeed: Double,
overallSpeed: Double,
peakSpeed: Double,
context: String
) {
let progressPercent = Int(current * 100)
// let currentSpeedStr = formatByteSpeed(currentSpeed) // Removed unused
let avgSpeedStr = formatByteSpeed(averageSpeed)
// let peakSpeedStr = formatByteSpeed(peakSpeed) // Removed unused
let remainingBytes = totalBytes - uploadedBytes
let speedForEta = max(smoothedSpeed, averageSpeed * 0.8)
let etaSeconds = speedForEta > 0 ? Double(remainingBytes) / speedForEta : 0
let etaStr = formatTimeRemaining(etaSeconds)
let progressBar = createProgressBar(progress: current)
let fileProgress = "(\(completedFiles)/\(totalFiles))" // Add file count
print(
"\r\(progressBar) \(progressPercent)% \(fileProgress) | Speed: \(avgSpeedStr) (Avg) | ETA: \(etaStr) ", // Simplified output
terminator: "")
fflush(stdout)
}
// Helper methods (createProgressBar, formatByteSpeed, formatTimeRemaining) remain the same
private func createProgressBar(progress: Double, width: Int = 30) -> String {
let completedWidth = Int(progress * Double(width))
let remainingWidth = width - completedWidth
let completed = String(repeating: "", count: completedWidth)
let remaining = String(repeating: "", count: remainingWidth)
return "[\(completed)\(remaining)]"
}
private func formatByteSpeed(_ bytesPerSecond: Double) -> String {
let units = ["B/s", "KB/s", "MB/s", "GB/s"]
var speed = bytesPerSecond
var unitIndex = 0
while speed > 1024 && unitIndex < units.count - 1 { speed /= 1024; unitIndex += 1 }
return String(format: "%.1f %@", speed, units[unitIndex])
}
private func formatTimeRemaining(_ seconds: Double) -> String {
if seconds.isNaN || seconds.isInfinite || seconds <= 0 { return "calculating..." }
let hours = Int(seconds) / 3600
let minutes = (Int(seconds) % 3600) / 60
let secs = Int(seconds) % 60
if hours > 0 { return String(format: "%d:%02d:%02d", hours, minutes, secs) }
else { return String(format: "%d:%02d", minutes, secs) }
}
}

View File

@@ -452,6 +452,77 @@ final class LumeController {
}
}
@MainActor
public func pushImage(
name: String,
imageName: String,
tags: [String],
registry: String,
organization: String,
storage: String? = nil,
chunkSizeMb: Int = 512,
verbose: Bool = false,
dryRun: Bool = false,
reassemble: Bool = false
) async throws {
do {
Logger.info(
"Pushing VM to registry",
metadata: [
"name": name,
"imageName": imageName,
"tags": "\(tags.joined(separator: ", "))",
"registry": registry,
"organization": organization,
"location": storage ?? "default",
"chunk_size": "\(chunkSizeMb)MB",
"dry_run": "\(dryRun)",
"reassemble": "\(reassemble)"
])
try validatePushParameters(
name: name,
imageName: imageName,
tags: tags,
registry: registry,
organization: organization
)
// Find the actual location of the VM
let actualLocation = try self.validateVMExists(name, storage: storage)
// Get the VM directory
let vmDir = try home.getVMDirectory(name, storage: actualLocation)
// Use ImageContainerRegistry to push the VM
let imageContainerRegistry = ImageContainerRegistry(
registry: registry, organization: organization)
try await imageContainerRegistry.push(
vmDirPath: vmDir.dir.path,
imageName: imageName,
tags: tags,
chunkSizeMb: chunkSizeMb,
verbose: verbose,
dryRun: dryRun,
reassemble: reassemble
)
Logger.info(
"VM pushed successfully",
metadata: [
"name": name,
"imageName": imageName,
"tags": "\(tags.joined(separator: ", "))",
"registry": registry,
"organization": organization,
])
} catch {
Logger.error("Failed to push VM", metadata: ["error": error.localizedDescription])
throw error
}
}
@MainActor
public func pruneImages() async throws {
Logger.info("Pruning cached images")
@@ -755,4 +826,31 @@ final class LumeController {
break
}
}
private func validatePushParameters(
name: String,
imageName: String,
tags: [String],
registry: String,
organization: String
) throws {
guard !name.isEmpty else {
throw ValidationError("VM name cannot be empty")
}
guard !imageName.isEmpty else {
throw ValidationError("Image name cannot be empty")
}
guard !tags.isEmpty else {
throw ValidationError("At least one tag must be provided.")
}
guard !registry.isEmpty else {
throw ValidationError("Registry cannot be empty")
}
guard !organization.isEmpty else {
throw ValidationError("Organization cannot be empty")
}
// Verify VM exists (this will throw if not found)
_ = try self.validateVMExists(name)
}
}

View File

@@ -288,6 +288,54 @@ extension Server {
}
}
func handlePush(_ body: Data?) async throws -> HTTPResponse {
guard let body = body,
let request = try? JSONDecoder().decode(PushRequest.self, from: body)
else {
return HTTPResponse(
statusCode: .badRequest,
headers: ["Content-Type": "application/json"],
body: try JSONEncoder().encode(APIError(message: "Invalid request body"))
)
}
// Trigger push asynchronously, return Accepted immediately
Task.detached { @MainActor @Sendable in
do {
let vmController = LumeController()
try await vmController.pushImage(
name: request.name,
imageName: request.imageName,
tags: request.tags,
registry: request.registry,
organization: request.organization,
storage: request.storage,
chunkSizeMb: request.chunkSizeMb,
verbose: false, // Verbose typically handled by server logs
dryRun: false, // Default API behavior is likely non-dry-run
reassemble: false // Default API behavior is likely non-reassemble
)
Logger.info("Background push completed successfully for image: \(request.imageName):\(request.tags.joined(separator: ","))")
} catch {
Logger.error(
"Background push failed for image: \(request.imageName):\(request.tags.joined(separator: ","))",
metadata: ["error": error.localizedDescription]
)
}
}
return HTTPResponse(
statusCode: .accepted,
headers: ["Content-Type": "application/json"],
body: try JSONEncoder().encode([
"message": AnyEncodable("Push initiated in background"),
"name": AnyEncodable(request.name),
"imageName": AnyEncodable(request.imageName),
"tags": AnyEncodable(request.tags),
])
)
}
func handleGetImages(_ request: HTTPRequest) async throws -> HTTPResponse {
let pathAndQuery = request.path.split(separator: "?", maxSplits: 1)
let queryParams =

View File

@@ -102,3 +102,31 @@ struct CloneRequest: Codable {
let sourceLocation: String?
let destLocation: String?
}
struct PushRequest: Codable {
let name: String // Name of the local VM
let imageName: String // Base name for the image in the registry
let tags: [String] // List of tags to push
var registry: String // Registry URL
var organization: String // Organization/user in the registry
let storage: String? // Optional VM storage location
var chunkSizeMb: Int // Chunk size
// dryRun and reassemble are less common for API, default to false?
// verbose is usually handled by server logging
enum CodingKeys: String, CodingKey {
case name, imageName, tags, registry, organization, storage, chunkSizeMb
}
// Provide default values for optional fields during decoding
init(from decoder: Decoder) throws {
let container = try decoder.container(keyedBy: CodingKeys.self)
name = try container.decode(String.self, forKey: .name)
imageName = try container.decode(String.self, forKey: .imageName)
tags = try container.decode([String].self, forKey: .tags)
registry = try container.decodeIfPresent(String.self, forKey: .registry) ?? "ghcr.io"
organization = try container.decodeIfPresent(String.self, forKey: .organization) ?? "trycua"
storage = try container.decodeIfPresent(String.self, forKey: .storage)
chunkSizeMb = try container.decodeIfPresent(Int.self, forKey: .chunkSizeMb) ?? 512
}
}

View File

@@ -4,6 +4,19 @@ struct APIError: Codable {
let message: String
}
// Helper struct to encode mixed-type dictionaries
struct AnyEncodable: Encodable {
private let value: Encodable
init(_ value: Encodable) {
self.value = value
}
func encode(to encoder: Encoder) throws {
try value.encode(to: encoder)
}
}
extension HTTPResponse {
static func json<T: Encodable>(_ value: T) throws -> HTTPResponse {
let data = try JSONEncoder().encode(value)

View File

@@ -261,6 +261,12 @@ final class Server {
}
return try await self.handleSetDefaultLocation(name)
}),
Route(
method: "POST", path: "/vms/push",
handler: { [weak self] request in
guard let self else { throw HTTPError.internalError }
return try await self.handlePush(request.body)
}),
]
}

View File

@@ -5,6 +5,7 @@ enum CommandRegistry {
[
Create.self,
Pull.self,
Push.self,
Images.self,
Clone.self,
Get.self,