Improve cache lookup time

This commit is contained in:
f-trycua
2025-05-13 22:36:47 -07:00
parent 254bc7f1a5
commit 681fb21993
5 changed files with 285 additions and 1254 deletions

View File

@@ -2,7 +2,6 @@ import os
import asyncio
from pathlib import Path
import sys
import json
import traceback
# Load environment variables from .env file
@@ -32,15 +31,14 @@ async def main():
display="1024x768",
memory="8GB",
cpu="4",
os_type="linux",
image="ubuntu-noble-vanilla:latest",
name="ubuntu-noble-vanilla_latest",
verbosity=LogLevel.NORMAL, # Use QUIET to suppress most logs
os_type="macos",
name="macos",
verbosity=LogLevel.VERBOSE,
provider_type=VMProviderType.LUME,
# storage="/Users/francescobonacci/repos/trycua/computer/examples/storage",
# shared_directories=[
# "/Users/francescobonacci/repos/trycua/computer/examples/shared"
# ],
storage="/Users/<USER>/repos/trycua/computer/examples/storage",
shared_directories=[
"/Users/<USER>/repos/trycua/computer/examples/shared"
],
ephemeral=False,
)

File diff suppressed because it is too large Load Diff

View File

@@ -215,53 +215,54 @@ class Computer:
provider_type_name = self.provider_type.name if isinstance(self.provider_type, VMProviderType) else self.provider_type
self.logger.verbose(f"Initializing {provider_type_name} provider context...")
# Configure provider based on initialization parameters
provider_kwargs = {
"verbose": self.verbosity >= LogLevel.DEBUG,
"ephemeral": self.ephemeral, # Pass ephemeral flag to provider
}
# Handle storage path separately from ephemeral flag
if self.ephemeral:
self.logger.info("Using ephemeral storage and ephemeral VMs")
# Use ephemeral storage location
provider_kwargs["storage"] = "ephemeral"
else:
# Use explicitly configured storage
provider_kwargs["storage"] = self.storage
# VM name is already set in self.config.name and will be used when calling provider methods
# For Lumier provider, add specific configuration
if self.provider_type == VMProviderType.LUMIER:
# Pass VM image to LumierProvider
provider_kwargs["image"] = self.image
self.logger.info(f"Using VM image for Lumier provider: {self.image}")
# Add shared_path if specified (for file sharing between host and VM)
if self.shared_path:
provider_kwargs["shared_path"] = self.shared_path
self.logger.info(f"Using shared path for Lumier provider: {self.shared_path}")
# Add noVNC_port if specified (for web interface)
if self.noVNC_port:
provider_kwargs["noVNC_port"] = self.noVNC_port
self.logger.info(f"Using noVNC port for Lumier provider: {self.noVNC_port}")
elif self.port is not None:
# For other providers, set port if specified
provider_kwargs["port"] = self.port
self.logger.verbose(f"Using specified port for provider: {self.port}")
# Explicitly set provider parameters
storage = "ephemeral" if self.ephemeral else self.storage
verbose = self.verbosity >= LogLevel.DEBUG
ephemeral = self.ephemeral
port = self.port if self.port is not None else 7777
host = self.host if self.host else "localhost"
image = self.image
shared_path = self.shared_path
noVNC_port = self.noVNC_port
# Set host if specified
if self.host:
provider_kwargs["host"] = self.host
self.logger.verbose(f"Using specified host for provider: {self.host}")
# Create VM provider instance with configured parameters
# Create VM provider instance with explicit parameters
try:
self.config.vm_provider = VMProviderFactory.create_provider(
self.provider_type, **provider_kwargs
)
if self.provider_type == VMProviderType.LUMIER:
self.logger.info(f"Using VM image for Lumier provider: {image}")
if shared_path:
self.logger.info(f"Using shared path for Lumier provider: {shared_path}")
if noVNC_port:
self.logger.info(f"Using noVNC port for Lumier provider: {noVNC_port}")
self.config.vm_provider = VMProviderFactory.create_provider(
self.provider_type,
port=port,
host=host,
storage=storage,
shared_path=shared_path,
image=image,
verbose=verbose,
ephemeral=ephemeral,
noVNC_port=noVNC_port,
)
elif self.provider_type == VMProviderType.LUME:
self.config.vm_provider = VMProviderFactory.create_provider(
self.provider_type,
port=port,
host=host,
storage=storage,
verbose=verbose,
ephemeral=ephemeral,
)
elif self.provider_type == VMProviderType.CLOUD:
self.config.vm_provider = VMProviderFactory.create_provider(
self.provider_type,
port=port,
host=host,
storage=storage,
verbose=verbose,
)
else:
raise ValueError(f"Unsupported provider type: {self.provider_type}")
self._provider_context = await self.config.vm_provider.__aenter__()
self.logger.verbose("VM provider context initialized successfully")
except ImportError as ie:

View File

@@ -46,7 +46,7 @@ class LumierProvider(BaseVMProvider):
self,
port: Optional[int] = 7777,
host: str = "localhost",
storage: Optional[str] = None,
storage: Optional[str] = None, # Can be a path or 'ephemeral'
shared_path: Optional[str] = None,
image: str = "macos-sequoia-cua:latest", # VM image to use
verbose: bool = False,

View File

@@ -512,6 +512,24 @@ class ImageContainerRegistry: @unchecked Sendable {
return false
}
// Check if we have a reassembled image
let reassembledCachePath = getImageCacheDirectory(manifestId: manifestId)
.appendingPathComponent("disk.img.reassembled")
if FileManager.default.fileExists(atPath: reassembledCachePath.path) {
Logger.info("Found reassembled disk image in cache validation")
// If we have a reassembled image, we only need to make sure the manifest matches
guard let cachedManifest = loadCachedManifest(manifestId: manifestId),
cachedManifest.layers == manifest.layers
else {
return false
}
// We have a reassembled image and the manifest matches
return true
}
// If no reassembled image, check layer files
// First check if manifest exists and matches
guard let cachedManifest = loadCachedManifest(manifestId: manifestId),
cachedManifest.layers == manifest.layers
@@ -612,6 +630,52 @@ class ImageContainerRegistry: @unchecked Sendable {
let metadata = try? JSONDecoder().decode(ImageMetadata.self, from: metadataData)
{
if metadata.image == image {
// Before removing, check if there's a reassembled image we should preserve
let reassembledPath = itemPath.appendingPathComponent("disk.img.reassembled")
let nvramPath = itemPath.appendingPathComponent("nvram.bin")
let configPath = itemPath.appendingPathComponent("config.json")
// Preserve reassembled image if it exists
if FileManager.default.fileExists(atPath: reassembledPath.path) {
Logger.info(
"Preserving reassembled disk image during cleanup",
metadata: ["manifest_id": item])
// Ensure the current cache directory exists
let currentCacheDir = getImageCacheDirectory(manifestId: currentManifestId)
try FileManager.default.createDirectory(
at: currentCacheDir, withIntermediateDirectories: true)
// Move reassembled image to current cache directory
let currentReassembledPath = currentCacheDir.appendingPathComponent(
"disk.img.reassembled")
if !FileManager.default.fileExists(atPath: currentReassembledPath.path) {
try FileManager.default.copyItem(
at: reassembledPath, to: currentReassembledPath)
}
// Also preserve nvram if it exists
if FileManager.default.fileExists(atPath: nvramPath.path) {
let currentNvramPath = currentCacheDir.appendingPathComponent(
"nvram.bin")
if !FileManager.default.fileExists(atPath: currentNvramPath.path) {
try FileManager.default.copyItem(
at: nvramPath, to: currentNvramPath)
}
}
// Also preserve config if it exists
if FileManager.default.fileExists(atPath: configPath.path) {
let currentConfigPath = currentCacheDir.appendingPathComponent(
"config.json")
if !FileManager.default.fileExists(atPath: currentConfigPath.path) {
try FileManager.default.copyItem(
at: configPath, to: currentConfigPath)
}
}
}
// Now remove the old directory
try FileManager.default.removeItem(at: itemPath)
Logger.info(
"Removed old version of image",
@@ -652,10 +716,12 @@ class ImageContainerRegistry: @unchecked Sendable {
// Use provided name or derive from image
let vmName = name ?? image.split(separator: ":").first.map(String.init) ?? ""
// Determine if locationName is a direct path or a named storage location
let vmDir: VMDirectory
if let locationName = locationName, locationName.contains("/") || locationName.contains("\\") {
if let locationName = locationName,
locationName.contains("/") || locationName.contains("\\")
{
// Direct path
vmDir = try home.getVMDirectoryFromPath(vmName, storagePath: locationName)
} else {
@@ -1417,9 +1483,85 @@ class ImageContainerRegistry: @unchecked Sendable {
let outputURL = destination.appendingPathComponent("disk.img")
var expectedTotalSize: UInt64? = nil // Use optional to handle missing config
// Define the path for the reassembled cache image
let cacheDir = getImageCacheDirectory(manifestId: manifestId)
let reassembledCachePath = cacheDir.appendingPathComponent("disk.img.reassembled")
let nvramCachePath = cacheDir.appendingPathComponent("nvram.bin")
// First check if we already have a reassembled image in the cache
if FileManager.default.fileExists(atPath: reassembledCachePath.path) {
Logger.info("Found reassembled disk image in cache, using it directly")
// Copy reassembled disk image
try FileManager.default.copyItem(at: reassembledCachePath, to: outputURL)
// Copy nvram if it exists
if FileManager.default.fileExists(atPath: nvramCachePath.path) {
try FileManager.default.copyItem(
at: nvramCachePath,
to: destination.appendingPathComponent("nvram.bin")
)
Logger.info("Using cached nvram.bin file")
} else {
// Look for nvram in layer cache if needed
let nvramLayers = manifest.layers.filter {
$0.mediaType == "application/octet-stream"
}
if let nvramLayer = nvramLayers.first {
let cachedNvram = getCachedLayerPath(
manifestId: manifestId, digest: nvramLayer.digest)
if FileManager.default.fileExists(atPath: cachedNvram.path) {
try FileManager.default.copyItem(
at: cachedNvram,
to: destination.appendingPathComponent("nvram.bin")
)
// Also save it to the dedicated nvram location for future use
try FileManager.default.copyItem(at: cachedNvram, to: nvramCachePath)
Logger.info("Recovered nvram.bin from layer cache")
}
}
}
// Copy config if it exists
let configCachePath = cacheDir.appendingPathComponent("config.json")
if FileManager.default.fileExists(atPath: configCachePath.path) {
try FileManager.default.copyItem(
at: configCachePath,
to: destination.appendingPathComponent("config.json")
)
Logger.info("Using cached config.json file")
} else {
// Look for config in layer cache if needed
let configLayers = manifest.layers.filter {
$0.mediaType == "application/vnd.oci.image.config.v1+json"
}
if let configLayer = configLayers.first {
let cachedConfig = getCachedLayerPath(
manifestId: manifestId, digest: configLayer.digest)
if FileManager.default.fileExists(atPath: cachedConfig.path) {
try FileManager.default.copyItem(
at: cachedConfig,
to: destination.appendingPathComponent("config.json")
)
// Also save it to the dedicated config location for future use
try FileManager.default.copyItem(at: cachedConfig, to: configCachePath)
Logger.info("Recovered config.json from layer cache")
}
}
}
Logger.info("Cache copy complete using reassembled image")
return
}
// If we don't have a reassembled image, proceed with legacy part handling
Logger.info("No reassembled image found, using part-based reassembly")
// Instantiate collector
let diskPartsCollector = DiskPartsCollector()
var lz4LayerCount = 0 // Count lz4 layers found
var hasNvram = false
var configPath: URL? = nil
// First identify disk parts and non-disk files
for layer in manifest.layers {
@@ -1447,9 +1589,13 @@ class ImageContainerRegistry: @unchecked Sendable {
switch layer.mediaType {
case "application/vnd.oci.image.config.v1+json":
fileName = "config.json"
configPath = cachedLayer
case "application/octet-stream":
// Assume nvram if config layer exists, otherwise assume single disk image
fileName = manifest.config != nil ? "nvram.bin" : "disk.img"
if fileName == "nvram.bin" {
hasNvram = true
}
case "application/vnd.oci.image.layer.v1.tar",
"application/octet-stream+gzip":
// Assume disk image for these types as well if encountered in cache scenario
@@ -1700,6 +1846,89 @@ class ImageContainerRegistry: @unchecked Sendable {
try chmodProcess.run()
chmodProcess.waitUntilExit()
}
// After successful reassembly, store the reassembled image in the cache
if cachingEnabled {
Logger.info("Saving reassembled disk image to cache for future use")
// Copy the reassembled disk image to the cache
try FileManager.default.copyItem(at: outputURL, to: reassembledCachePath)
// Clean up disk parts after successful reassembly
Logger.info("Cleaning up disk part files from cache")
// Use an array to track unique file paths to avoid trying to delete the same file multiple times
var processedPaths: [String] = []
for (_, partURL) in diskPartSources {
let path = partURL.path
// Skip if we've already processed this exact path
if processedPaths.contains(path) {
Logger.info("Skipping duplicate part file: \(partURL.lastPathComponent)")
continue
}
// Add to processed array
processedPaths.append(path)
// Check if file exists before attempting to delete
if FileManager.default.fileExists(atPath: path) {
do {
try FileManager.default.removeItem(at: partURL)
Logger.info("Removed disk part: \(partURL.lastPathComponent)")
} catch {
Logger.info(
"Failed to remove disk part: \(partURL.lastPathComponent) - \(error.localizedDescription)"
)
}
} else {
Logger.info("Disk part already removed: \(partURL.lastPathComponent)")
}
}
// Also save nvram if we have it
if hasNvram {
let srcNvram = destination.appendingPathComponent("nvram.bin")
if FileManager.default.fileExists(atPath: srcNvram.path) {
try? FileManager.default.copyItem(at: srcNvram, to: nvramCachePath)
}
}
// Save config.json in the cache for future use if it exists
if let configPath = configPath {
let cacheConfigPath = cacheDir.appendingPathComponent("config.json")
try? FileManager.default.copyItem(at: configPath, to: cacheConfigPath)
}
// Perform a final cleanup to catch any leftover part files
Logger.info("Performing final cleanup of any remaining part files")
do {
let cacheContents = try FileManager.default.contentsOfDirectory(
at: cacheDir, includingPropertiesForKeys: nil)
for item in cacheContents {
let fileName = item.lastPathComponent
// Only remove sha256_ files that aren't the reassembled image, nvram or config
if fileName.starts(with: "sha256_") && fileName != "disk.img.reassembled"
&& fileName != "nvram.bin" && fileName != "config.json"
&& fileName != "manifest.json" && fileName != "metadata.json"
{
do {
try FileManager.default.removeItem(at: item)
Logger.info(
"Removed leftover file during final cleanup: \(fileName)")
} catch {
Logger.info(
"Failed to remove leftover file: \(fileName) - \(error.localizedDescription)"
)
}
}
}
} catch {
Logger.info("Error during final cleanup: \(error.localizedDescription)")
}
}
}
Logger.info("Cache copy complete")