diff --git a/README.md b/README.md index c07df910..7655e888 100644 --- a/README.md +++ b/README.md @@ -249,7 +249,8 @@ For complete examples, see [computer_examples.py](./examples/computer_examples.p ```python # Shell Actions -await computer.interface.run_command(cmd) # Run shell command +result = await computer.interface.run_command(cmd) # Run shell command +# result.stdout, result.stderr, result.returncode # Mouse Actions await computer.interface.left_click(x, y) # Left click at coordinates diff --git a/libs/python/agent/agent/providers/anthropic/tools/bash.py b/libs/python/agent/agent/providers/anthropic/tools/bash.py index babbacfd..479e1127 100644 --- a/libs/python/agent/agent/providers/anthropic/tools/bash.py +++ b/libs/python/agent/agent/providers/anthropic/tools/bash.py @@ -50,8 +50,8 @@ class BashTool(BaseBashTool, BaseAnthropicTool): try: async with asyncio.timeout(self._timeout): - stdout, stderr = await self.computer.interface.run_command(command) - return CLIResult(output=stdout or "", error=stderr or "") + result = await self.computer.interface.run_command(command) + return CLIResult(output=result.stdout or "", error=result.stderr or "") except asyncio.TimeoutError as e: raise ToolError(f"Command timed out after {self._timeout} seconds") from e except Exception as e: diff --git a/libs/python/agent/agent/providers/anthropic/tools/edit.py b/libs/python/agent/agent/providers/anthropic/tools/edit.py index e4da1f85..1114b586 100644 --- a/libs/python/agent/agent/providers/anthropic/tools/edit.py +++ b/libs/python/agent/agent/providers/anthropic/tools/edit.py @@ -95,13 +95,13 @@ class EditTool(BaseEditTool, BaseAnthropicTool): result = await self.computer.interface.run_command( f'[ -e "{str(path)}" ] && echo "exists" || echo "not exists"' ) - exists = result[0].strip() == "exists" + exists = result.stdout.strip() == "exists" if exists: result = await self.computer.interface.run_command( f'[ -d "{str(path)}" ] && echo "dir" || echo "file"' ) - is_dir = result[0].strip() == "dir" + is_dir = result.stdout.strip() == "dir" else: is_dir = False @@ -126,7 +126,7 @@ class EditTool(BaseEditTool, BaseAnthropicTool): result = await self.computer.interface.run_command( f'[ -d "{str(path)}" ] && echo "dir" || echo "file"' ) - is_dir = result[0].strip() == "dir" + is_dir = result.stdout.strip() == "dir" if is_dir: if view_range: @@ -136,7 +136,7 @@ class EditTool(BaseEditTool, BaseAnthropicTool): # List directory contents using ls result = await self.computer.interface.run_command(f'ls -la "{str(path)}"') - contents = result[0] + contents = result.stdout if contents: stdout = f"Here's the files and directories in {path}:\n{contents}\n" else: @@ -272,9 +272,9 @@ class EditTool(BaseEditTool, BaseAnthropicTool): """Read the content of a file using cat command.""" try: result = await self.computer.interface.run_command(f'cat "{str(path)}"') - if result[1]: # If there's stderr output - raise ToolError(f"Error reading file: {result[1]}") - return result[0] + if result.stderr: # If there's stderr output + raise ToolError(f"Error reading file: {result.stderr}") + return result.stdout except Exception as e: raise ToolError(f"Failed to read {path}: {str(e)}") @@ -291,8 +291,8 @@ class EditTool(BaseEditTool, BaseAnthropicTool): {content} EOFCUA""" result = await self.computer.interface.run_command(cmd) - if result[1]: # If there's stderr output - raise ToolError(f"Error writing file: {result[1]}") + if result.stderr: # If there's stderr output + raise ToolError(f"Error writing file: {result.stderr}") except Exception as e: raise ToolError(f"Failed to write to {path}: {str(e)}") diff --git a/libs/python/agent/pyproject.toml b/libs/python/agent/pyproject.toml index a87d72b6..fd25a0e8 100644 --- a/libs/python/agent/pyproject.toml +++ b/libs/python/agent/pyproject.toml @@ -19,7 +19,7 @@ dependencies = [ "pydantic>=2.6.4", "rich>=13.7.1", "python-dotenv>=1.0.1", - "cua-computer>=0.2.0,<0.3.0", + "cua-computer>=0.3.0,<0.4.0", "cua-core>=0.1.0,<0.2.0", "certifi>=2024.2.2" ] diff --git a/libs/python/computer/computer/interface/base.py b/libs/python/computer/computer/interface/base.py index 09cc46f2..a069e8c8 100644 --- a/libs/python/computer/computer/interface/base.py +++ b/libs/python/computer/computer/interface/base.py @@ -3,8 +3,7 @@ from abc import ABC, abstractmethod from typing import Optional, Dict, Any, Tuple, List from ..logger import Logger, LogLevel -from .models import MouseButton - +from .models import MouseButton, CommandResult class BaseComputerInterface(ABC): """Base class for computer control interfaces.""" @@ -234,8 +233,31 @@ class BaseComputerInterface(ABC): pass @abstractmethod - async def run_command(self, command: str) -> Tuple[str, str]: - """Run shell command.""" + async def run_command(self, command: str) -> CommandResult: + """Run shell command and return structured result. + + Executes a shell command using subprocess.run with shell=True and check=False. + The command is run in the target environment and captures both stdout and stderr. + + Args: + command (str): The shell command to execute + + Returns: + CommandResult: A structured result containing: + - stdout (str): Standard output from the command + - stderr (str): Standard error from the command + - returncode (int): Exit code from the command (0 indicates success) + + Raises: + RuntimeError: If the command execution fails at the system level + + Example: + result = await interface.run_command("ls -la") + if result.returncode == 0: + print(f"Output: {result.stdout}") + else: + print(f"Error: {result.stderr}, Exit code: {result.returncode}") + """ pass # Accessibility Actions diff --git a/libs/python/computer/computer/interface/linux.py b/libs/python/computer/computer/interface/linux.py index 23b542b0..b87118a2 100644 --- a/libs/python/computer/computer/interface/linux.py +++ b/libs/python/computer/computer/interface/linux.py @@ -9,8 +9,7 @@ import websockets from ..logger import Logger, LogLevel from .base import BaseComputerInterface from ..utils import decode_base64_image, encode_base64_image, bytes_to_image, draw_box, resize_image -from .models import Key, KeyType, MouseButton - +from .models import Key, KeyType, MouseButton, CommandResult class LinuxComputerInterface(BaseComputerInterface): """Interface for Linux.""" @@ -616,11 +615,15 @@ class LinuxComputerInterface(BaseComputerInterface): if not result.get("success", False): raise RuntimeError(result.get("error", "Failed to delete directory")) - async def run_command(self, command: str) -> Tuple[str, str]: + async def run_command(self, command: str) -> CommandResult: result = await self._send_command("run_command", {"command": command}) if not result.get("success", False): raise RuntimeError(result.get("error", "Failed to run command")) - return result.get("stdout", ""), result.get("stderr", "") + return CommandResult( + stdout=result.get("stdout", ""), + stderr=result.get("stderr", ""), + returncode=result.get("return_code", 0) + ) # Accessibility Actions async def get_accessibility_tree(self) -> Dict[str, Any]: diff --git a/libs/python/computer/computer/interface/macos.py b/libs/python/computer/computer/interface/macos.py index a8821d60..cd31e74e 100644 --- a/libs/python/computer/computer/interface/macos.py +++ b/libs/python/computer/computer/interface/macos.py @@ -9,8 +9,7 @@ import websockets from ..logger import Logger, LogLevel from .base import BaseComputerInterface from ..utils import decode_base64_image, encode_base64_image, bytes_to_image, draw_box, resize_image -from .models import Key, KeyType, MouseButton - +from .models import Key, KeyType, MouseButton, CommandResult class MacOSComputerInterface(BaseComputerInterface): """Interface for macOS.""" @@ -623,11 +622,15 @@ class MacOSComputerInterface(BaseComputerInterface): if not result.get("success", False): raise RuntimeError(result.get("error", "Failed to delete directory")) - async def run_command(self, command: str) -> Tuple[str, str]: + async def run_command(self, command: str) -> CommandResult: result = await self._send_command("run_command", {"command": command}) if not result.get("success", False): raise RuntimeError(result.get("error", "Failed to run command")) - return result.get("stdout", ""), result.get("stderr", "") + return CommandResult( + stdout=result.get("stdout", ""), + stderr=result.get("stderr", ""), + returncode=result.get("return_code", 0) + ) # Accessibility Actions async def get_accessibility_tree(self) -> Dict[str, Any]: diff --git a/libs/python/computer/computer/interface/models.py b/libs/python/computer/computer/interface/models.py index 515b5f2b..223ac321 100644 --- a/libs/python/computer/computer/interface/models.py +++ b/libs/python/computer/computer/interface/models.py @@ -1,5 +1,17 @@ from enum import Enum from typing import Dict, List, Any, TypedDict, Union, Literal +from dataclasses import dataclass + +@dataclass +class CommandResult: + stdout: str + stderr: str + returncode: int + + def __init__(self, stdout: str, stderr: str, returncode: int): + self.stdout = stdout + self.stderr = stderr + self.returncode = returncode # Navigation key literals NavigationKey = Literal['pagedown', 'pageup', 'home', 'end', 'left', 'right', 'up', 'down'] diff --git a/libs/python/computer/computer/interface/windows.py b/libs/python/computer/computer/interface/windows.py index b88c9138..7f471e7a 100644 --- a/libs/python/computer/computer/interface/windows.py +++ b/libs/python/computer/computer/interface/windows.py @@ -9,8 +9,7 @@ import websockets from ..logger import Logger, LogLevel from .base import BaseComputerInterface from ..utils import decode_base64_image, encode_base64_image, bytes_to_image, draw_box, resize_image -from .models import Key, KeyType, MouseButton - +from .models import Key, KeyType, MouseButton, CommandResult class WindowsComputerInterface(BaseComputerInterface): """Interface for Windows.""" @@ -615,11 +614,15 @@ class WindowsComputerInterface(BaseComputerInterface): if not result.get("success", False): raise RuntimeError(result.get("error", "Failed to delete directory")) - async def run_command(self, command: str) -> Tuple[str, str]: + async def run_command(self, command: str) -> CommandResult: result = await self._send_command("run_command", {"command": command}) if not result.get("success", False): raise RuntimeError(result.get("error", "Failed to run command")) - return result.get("stdout", ""), result.get("stderr", "") + return CommandResult( + stdout=result.get("stdout", ""), + stderr=result.get("stderr", ""), + returncode=result.get("return_code", 0) + ) # Accessibility Actions async def get_accessibility_tree(self) -> Dict[str, Any]: diff --git a/libs/python/computer/pyproject.toml b/libs/python/computer/pyproject.toml index 4326eb98..2e564fa9 100644 --- a/libs/python/computer/pyproject.toml +++ b/libs/python/computer/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "pdm.backend" [project] name = "cua-computer" -version = "0.2.0" +version = "0.3.0" description = "Computer-Use Interface (CUI) framework powering Cua" readme = "README.md" authors = [ diff --git a/tests/files.py b/tests/files.py index 388b7656..236ef9e2 100644 --- a/tests/files.py +++ b/tests/files.py @@ -28,24 +28,24 @@ for path in pythonpath.split(":"): sys.path.insert(0, path) # Insert at beginning to prioritize print(f"Added to sys.path: {path}") -from computer.computer import Computer +from computer import Computer, VMProviderType @pytest.fixture(scope="session") async def computer(): """Shared Computer instance for all test cases.""" - # # Create a remote Linux computer with C/ua - # computer = Computer( - # os_type="linux", - # api_key=os.getenv("CUA_API_KEY"), - # name=str(os.getenv("CUA_CONTAINER_NAME")), - # provider_type=VMProviderType.CLOUD, - # ) + # Create a remote Linux computer with C/ua + computer = Computer( + os_type="linux", + api_key=os.getenv("CUA_API_KEY"), + name=str(os.getenv("CUA_CONTAINER_NAME")), + provider_type=VMProviderType.CLOUD, + ) # Create a local macOS computer with C/ua # computer = Computer() # Connect to host computer - computer = Computer(use_host_computer_server=True) + # computer = Computer(use_host_computer_server=True) try: await computer.run() diff --git a/tests/shell_bash.py b/tests/shell_bash.py new file mode 100644 index 00000000..af34ff0e --- /dev/null +++ b/tests/shell_bash.py @@ -0,0 +1,86 @@ +""" +Shell Command Tests (Bash) +Tests for the run_command method of the Computer interface using bash commands. +Required environment variables: +- CUA_API_KEY: API key for C/ua cloud provider +- CUA_CONTAINER_NAME: Name of the container to use +""" + +import os +import asyncio +import pytest +from pathlib import Path +import sys +import traceback + +# Load environment variables from .env file +project_root = Path(__file__).parent.parent +env_file = project_root / ".env" +print(f"Loading environment from: {env_file}") +from dotenv import load_dotenv + +load_dotenv(env_file) + +# Add paths to sys.path if needed +pythonpath = os.environ.get("PYTHONPATH", "") +for path in pythonpath.split(":"): + if path and path not in sys.path: + sys.path.insert(0, path) # Insert at beginning to prioritize + print(f"Added to sys.path: {path}") + +from computer import Computer, VMProviderType + +@pytest.fixture(scope="session") +async def computer(): + """Shared Computer instance for all test cases.""" + # Create a remote Linux computer with C/ua + computer = Computer( + os_type="linux", + api_key=os.getenv("CUA_API_KEY"), + name=str(os.getenv("CUA_CONTAINER_NAME")), + provider_type=VMProviderType.CLOUD, + ) + + try: + await computer.run() + yield computer + finally: + await computer.disconnect() + + +# Sample test cases +@pytest.mark.asyncio(loop_scope="session") +async def test_bash_echo_command(computer): + """Test basic echo command with bash.""" + result = await computer.interface.run_command("echo 'Hello World'") + + assert result.stdout.strip() == "Hello World" + assert result.stderr == "" + assert result.returncode == 0 + + +@pytest.mark.asyncio(loop_scope="session") +async def test_bash_ls_command(computer): + """Test ls command to list directory contents.""" + result = await computer.interface.run_command("ls -la /tmp") + + assert result.returncode == 0 + assert result.stderr == "" + assert "total" in result.stdout # ls -la typically starts with "total" + assert "." in result.stdout # Current directory entry + assert ".." in result.stdout # Parent directory entry + + +@pytest.mark.asyncio(loop_scope="session") +async def test_bash_command_with_error(computer): + """Test command that produces an error.""" + result = await computer.interface.run_command("ls /nonexistent_directory_12345") + + assert result.returncode != 0 + assert result.stdout == "" + assert "No such file or directory" in result.stderr or "cannot access" in result.stderr + + +if __name__ == "__main__": + # Run tests directly + pytest.main([__file__, "-v"]) diff --git a/tests/shell_cmd.py b/tests/shell_cmd.py new file mode 100644 index 00000000..a210e453 --- /dev/null +++ b/tests/shell_cmd.py @@ -0,0 +1,87 @@ +""" +Shell Command Tests (CMD) +Tests for the run_command method of the Computer interface using cmd.exe commands. +Required environment variables: +- CUA_API_KEY: API key for C/ua cloud provider +- CUA_CONTAINER_NAME: Name of the container to use +""" + +import os +import asyncio +import pytest +from pathlib import Path +import sys +import traceback + +# Load environment variables from .env file +project_root = Path(__file__).parent.parent +env_file = project_root / ".env" +print(f"Loading environment from: {env_file}") +from dotenv import load_dotenv + +load_dotenv(env_file) + +# Add paths to sys.path if needed +pythonpath = os.environ.get("PYTHONPATH", "") +for path in pythonpath.split(":"): + if path and path not in sys.path: + sys.path.insert(0, path) # Insert at beginning to prioritize + print(f"Added to sys.path: {path}") + +from computer import Computer, VMProviderType + +@pytest.fixture(scope="session") +async def computer(): + """Shared Computer instance for all test cases.""" + # Create a remote Windows computer with C/ua + computer = Computer( + os_type="windows", + api_key=os.getenv("CUA_API_KEY"), + name=str(os.getenv("CUA_CONTAINER_NAME")), + provider_type=VMProviderType.CLOUD, + ) + + try: + await computer.run() + yield computer + finally: + await computer.disconnect() + + +# Sample test cases +@pytest.mark.asyncio(loop_scope="session") +async def test_cmd_echo_command(computer): + """Test basic echo command with cmd.exe.""" + result = await computer.interface.run_command("echo Hello World") + + assert result.stdout.strip() == "Hello World" + assert result.stderr == "" + assert result.returncode == 0 + + +@pytest.mark.asyncio(loop_scope="session") +async def test_cmd_dir_command(computer): + """Test dir command to list directory contents.""" + result = await computer.interface.run_command("dir C:\\") + + assert result.returncode == 0 + assert result.stderr == "" + assert "Directory of C:\\" in result.stdout + assert "bytes" in result.stdout.lower() # dir typically shows file sizes + + +@pytest.mark.asyncio(loop_scope="session") +async def test_cmd_command_with_error(computer): + """Test command that produces an error.""" + result = await computer.interface.run_command("dir C:\\nonexistent_directory_12345") + + assert result.returncode != 0 + assert result.stdout == "" + assert ("File Not Found" in result.stderr or + "cannot find the path" in result.stderr or + "The system cannot find" in result.stderr) + + +if __name__ == "__main__": + # Run tests directly + pytest.main([__file__, "-v"]) diff --git a/tests/venv.py b/tests/venv.py index 7097c2fd..522a4727 100644 --- a/tests/venv.py +++ b/tests/venv.py @@ -29,24 +29,23 @@ for path in pythonpath.split(":"): sys.path.insert(0, path) # Insert at beginning to prioritize print(f"Added to sys.path: {path}") -from computer.computer import Computer -from computer.providers.base import VMProviderType +from computer import Computer, VMProviderType from computer.helpers import sandboxed, set_default_computer @pytest.fixture(scope="session") async def computer(): """Shared Computer instance for all test cases.""" - # # Create a remote Linux computer with C/ua - # computer = Computer( - # os_type="linux", - # api_key=os.getenv("CUA_API_KEY"), - # name=str(os.getenv("CUA_CONTAINER_NAME")), - # provider_type=VMProviderType.CLOUD, - # ) + # Create a remote Linux computer with C/ua + computer = Computer( + os_type="linux", + api_key=os.getenv("CUA_API_KEY"), + name=str(os.getenv("CUA_CONTAINER_NAME")), + provider_type=VMProviderType.CLOUD, + ) - # Create a local macOS computer with C/ua - computer = Computer() + # # Create a local macOS computer with C/ua + # computer = Computer() try: await computer.run()