Added support for large file reading (#306)

This commit is contained in:
Dillon DuPont
2025-06-27 10:38:45 -04:00
parent e61b92e0b9
commit 6a6ff87cdb
7 changed files with 189 additions and 16 deletions

View File

@@ -44,11 +44,6 @@ class BaseFileHandler(ABC):
"""Write text content to a file."""
pass
@abstractmethod
async def read_bytes(self, path: str) -> Dict[str, Any]:
"""Read the binary contents of a file. Sent over the websocket as a base64 string."""
pass
@abstractmethod
async def write_bytes(self, path: str, content_b64: str) -> Dict[str, Any]:
"""Write binary content to a file. Sent over the websocket as a base64 string."""
@@ -69,6 +64,22 @@ class BaseFileHandler(ABC):
"""Delete a directory."""
pass
@abstractmethod
async def read_bytes(self, path: str, offset: int = 0, length: Optional[int] = None) -> Dict[str, Any]:
"""Read the binary contents of a file. Sent over the websocket as a base64 string.
Args:
path: Path to the file
offset: Byte offset to start reading from (default: 0)
length: Number of bytes to read (default: None for entire file)
"""
pass
@abstractmethod
async def get_file_size(self, path: str) -> Dict[str, Any]:
"""Get the size of a file in bytes."""
pass
class BaseAutomationHandler(ABC):
"""Abstract base class for OS-specific automation handlers.

View File

@@ -7,7 +7,7 @@ Includes:
"""
from pathlib import Path
from typing import Dict, Any
from typing import Dict, Any, Optional
from .base import BaseFileHandler
import base64
@@ -54,9 +54,27 @@ class GenericFileHandler(BaseFileHandler):
except Exception as e:
return {"success": False, "error": str(e)}
async def read_bytes(self, path: str) -> Dict[str, Any]:
async def read_bytes(self, path: str, offset: int = 0, length: Optional[int] = None) -> Dict[str, Any]:
try:
return {"success": True, "content_b64": base64.b64encode(resolve_path(path).read_bytes()).decode('utf-8')}
file_path = resolve_path(path)
with open(file_path, 'rb') as f:
if offset > 0:
f.seek(offset)
if length is not None:
content = f.read(length)
else:
content = f.read()
return {"success": True, "content_b64": base64.b64encode(content).decode('utf-8')}
except Exception as e:
return {"success": False, "error": str(e)}
async def get_file_size(self, path: str) -> Dict[str, Any]:
try:
file_path = resolve_path(path)
size = file_path.stat().st_size
return {"success": True, "size": size}
except Exception as e:
return {"success": False, "error": str(e)}

View File

@@ -172,6 +172,7 @@ async def websocket_endpoint(websocket: WebSocket):
"write_text": manager.file_handler.write_text,
"read_bytes": manager.file_handler.read_bytes,
"write_bytes": manager.file_handler.write_bytes,
"get_file_size": manager.file_handler.get_file_size,
"delete_file": manager.file_handler.delete_file,
"create_dir": manager.file_handler.create_dir,
"delete_dir": manager.file_handler.delete_dir,

View File

@@ -208,8 +208,14 @@ class BaseComputerInterface(ABC):
pass
@abstractmethod
async def read_bytes(self, path: str) -> bytes:
"""Read file binary contents."""
async def read_bytes(self, path: str, offset: int = 0, length: Optional[int] = None) -> bytes:
"""Read file binary contents with optional seeking support.
Args:
path: Path to the file
offset: Byte offset to start reading from (default: 0)
length: Number of bytes to read (default: None for entire file)
"""
pass
@abstractmethod
@@ -232,6 +238,11 @@ class BaseComputerInterface(ABC):
"""Delete directory."""
pass
@abstractmethod
async def get_file_size(self, path: str) -> int:
"""Get the size of a file in bytes."""
pass
@abstractmethod
async def run_command(self, command: str) -> CommandResult:
"""Run shell command and return structured result.

View File

@@ -588,13 +588,57 @@ class LinuxComputerInterface(BaseComputerInterface):
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to write file"))
async def read_bytes(self, path: str) -> bytes:
result = await self._send_command("read_bytes", {"path": path})
async def read_bytes(self, path: str, offset: int = 0, length: Optional[int] = None) -> bytes:
# For large files, use chunked reading
if length is None:
# Get file size first to determine if we need chunking
file_size = await self.get_file_size(path)
# If file is larger than 5MB, read in chunks
if file_size > 5 * 1024 * 1024: # 5MB threshold
return await self._read_bytes_chunked(path, offset, file_size - offset if offset > 0 else file_size)
result = await self._send_command("read_bytes", {
"path": path,
"offset": offset,
"length": length
})
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to read file"))
content_b64 = result.get("content_b64", "")
return decode_base64_image(content_b64)
async def get_file_size(self, path: str) -> int:
result = await self._send_command("get_file_size", {"path": path})
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to get file size"))
return result.get("size", 0)
async def _read_bytes_chunked(self, path: str, offset: int, total_length: int, chunk_size: int = 1024 * 1024) -> bytes:
"""Read large files in chunks to avoid memory issues."""
chunks = []
current_offset = offset
remaining = total_length
while remaining > 0:
read_size = min(chunk_size, remaining)
result = await self._send_command("read_bytes", {
"path": path,
"offset": current_offset,
"length": read_size
})
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to read file chunk"))
content_b64 = result.get("content_b64", "")
chunk_data = decode_base64_image(content_b64)
chunks.append(chunk_data)
current_offset += read_size
remaining -= read_size
return b''.join(chunks)
async def write_bytes(self, path: str, content: bytes) -> None:
result = await self._send_command("write_bytes", {"path": path, "content_b64": encode_base64_image(content)})
if not result.get("success", False):

View File

@@ -595,13 +595,57 @@ class MacOSComputerInterface(BaseComputerInterface):
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to write file"))
async def read_bytes(self, path: str) -> bytes:
result = await self._send_command("read_bytes", {"path": path})
async def read_bytes(self, path: str, offset: int = 0, length: Optional[int] = None) -> bytes:
# For large files, use chunked reading
if length is None:
# Get file size first to determine if we need chunking
file_size = await self.get_file_size(path)
# If file is larger than 5MB, read in chunks
if file_size > 5 * 1024 * 1024: # 5MB threshold
return await self._read_bytes_chunked(path, offset, file_size - offset if offset > 0 else file_size)
result = await self._send_command("read_bytes", {
"path": path,
"offset": offset,
"length": length
})
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to read file"))
content_b64 = result.get("content_b64", "")
return decode_base64_image(content_b64)
async def get_file_size(self, path: str) -> int:
result = await self._send_command("get_file_size", {"path": path})
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to get file size"))
return result.get("size", 0)
async def _read_bytes_chunked(self, path: str, offset: int, total_length: int, chunk_size: int = 1024 * 1024) -> bytes:
"""Read large files in chunks to avoid memory issues."""
chunks = []
current_offset = offset
remaining = total_length
while remaining > 0:
read_size = min(chunk_size, remaining)
result = await self._send_command("read_bytes", {
"path": path,
"offset": current_offset,
"length": read_size
})
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to read file chunk"))
content_b64 = result.get("content_b64", "")
chunk_data = decode_base64_image(content_b64)
chunks.append(chunk_data)
current_offset += read_size
remaining -= read_size
return b''.join(chunks)
async def write_bytes(self, path: str, content: bytes) -> None:
result = await self._send_command("write_bytes", {"path": path, "content_b64": encode_base64_image(content)})
if not result.get("success", False):

View File

@@ -587,13 +587,57 @@ class WindowsComputerInterface(BaseComputerInterface):
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to write file"))
async def read_bytes(self, path: str) -> bytes:
result = await self._send_command("read_bytes", {"path": path})
async def read_bytes(self, path: str, offset: int = 0, length: Optional[int] = None) -> bytes:
# For large files, use chunked reading
if length is None:
# Get file size first to determine if we need chunking
file_size = await self.get_file_size(path)
# If file is larger than 5MB, read in chunks
if file_size > 5 * 1024 * 1024: # 5MB threshold
return await self._read_bytes_chunked(path, offset, file_size - offset if offset > 0 else file_size)
result = await self._send_command("read_bytes", {
"path": path,
"offset": offset,
"length": length
})
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to read file"))
content_b64 = result.get("content_b64", "")
return decode_base64_image(content_b64)
async def get_file_size(self, path: str) -> int:
result = await self._send_command("get_file_size", {"path": path})
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to get file size"))
return result.get("size", 0)
async def _read_bytes_chunked(self, path: str, offset: int, total_length: int, chunk_size: int = 1024 * 1024) -> bytes:
"""Read large files in chunks to avoid memory issues."""
chunks = []
current_offset = offset
remaining = total_length
while remaining > 0:
read_size = min(chunk_size, remaining)
result = await self._send_command("read_bytes", {
"path": path,
"offset": current_offset,
"length": read_size
})
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to read file chunk"))
content_b64 = result.get("content_b64", "")
chunk_data = decode_base64_image(content_b64)
chunks.append(chunk_data)
current_offset += read_size
remaining -= read_size
return b''.join(chunks)
async def write_bytes(self, path: str, content: bytes) -> None:
result = await self._send_command("write_bytes", {"path": path, "content_b64": encode_base64_image(content)})
if not result.get("success", False):