From 0a14a5225ba8b1d62fc50568c6adf994bacbff34 Mon Sep 17 00:00:00 2001 From: "synacktra.work@gmail.com" Date: Tue, 2 Dec 2025 17:46:47 +0530 Subject: [PATCH 1/8] feat: add helper utilities to execute commands and get OS type --- .../computer_server/handlers/factory.py | 67 +++++++-------- .../computer-server/computer_server/main.py | 12 +-- .../computer_server/utils/__init__.py | 4 +- .../computer_server/utils/helpers.py | 81 +++++++++++++++++++ 4 files changed, 113 insertions(+), 51 deletions(-) create mode 100644 libs/python/computer-server/computer_server/utils/helpers.py diff --git a/libs/python/computer-server/computer_server/handlers/factory.py b/libs/python/computer-server/computer_server/handlers/factory.py index 56cfe6c2..26a0792d 100644 --- a/libs/python/computer-server/computer_server/handlers/factory.py +++ b/libs/python/computer-server/computer_server/handlers/factory.py @@ -1,9 +1,8 @@ -import platform -import subprocess -from typing import Tuple, Type +from typing import Tuple from computer_server.diorama.base import BaseDioramaHandler +from ..utils.helpers import get_current_os from .base import ( BaseAccessibilityHandler, BaseAutomationHandler, @@ -12,15 +11,23 @@ from .base import ( BaseWindowHandler, ) -# Conditionally import platform-specific handlers -system = platform.system().lower() -if system == "darwin": +OS_TYPE = get_current_os() + +if OS_TYPE == "android": + from .android import ( + AndroidAccessibilityHandler, + AndroidAutomationHandler, + AndroidDesktopHandler, + AndroidFileHandler, + AndroidWindowHandler, + ) +elif OS_TYPE == "darwin": from computer_server.diorama.macos import MacOSDioramaHandler from .macos import MacOSAccessibilityHandler, MacOSAutomationHandler -elif system == "linux": +elif OS_TYPE == "linux": from .linux import LinuxAccessibilityHandler, LinuxAutomationHandler -elif system == "windows": +elif OS_TYPE == "windows": from .windows import WindowsAccessibilityHandler, WindowsAutomationHandler from .generic import GenericDesktopHandler, GenericFileHandler, GenericWindowHandler @@ -29,31 +36,6 @@ from .generic import GenericDesktopHandler, GenericFileHandler, GenericWindowHan class HandlerFactory: """Factory for creating OS-specific handlers.""" - @staticmethod - def _get_current_os() -> str: - """Determine the current OS. - - Returns: - str: The OS type ('darwin' for macOS, 'linux' for Linux, or 'windows' for Windows) - - Raises: - RuntimeError: If unable to determine the current OS - """ - try: - # Use platform.system() as primary method - system = platform.system().lower() - if system in ["darwin", "linux", "windows"]: - return system - - # Fallback to uname if platform.system() doesn't return expected values (Unix-like systems only) - result = subprocess.run(["uname", "-s"], capture_output=True, text=True) - if result.returncode == 0: - return result.stdout.strip().lower() - - raise RuntimeError(f"Unsupported OS: {system}") - except Exception as e: - raise RuntimeError(f"Failed to determine current OS: {str(e)}") - @staticmethod def create_handlers() -> Tuple[ BaseAccessibilityHandler, @@ -73,9 +55,16 @@ class HandlerFactory: NotImplementedError: If the current OS is not supported RuntimeError: If unable to determine the current OS """ - os_type = HandlerFactory._get_current_os() - - if os_type == "darwin": + if OS_TYPE == "android": + return ( + AndroidAccessibilityHandler(), + AndroidAutomationHandler(), + BaseDioramaHandler(), + AndroidFileHandler(), + AndroidDesktopHandler(), + AndroidWindowHandler(), + ) + elif OS_TYPE == "darwin": return ( MacOSAccessibilityHandler(), MacOSAutomationHandler(), @@ -84,7 +73,7 @@ class HandlerFactory: GenericDesktopHandler(), GenericWindowHandler(), ) - elif os_type == "linux": + elif OS_TYPE == "linux": return ( LinuxAccessibilityHandler(), LinuxAutomationHandler(), @@ -93,7 +82,7 @@ class HandlerFactory: GenericDesktopHandler(), GenericWindowHandler(), ) - elif os_type == "windows": + elif OS_TYPE == "windows": return ( WindowsAccessibilityHandler(), WindowsAutomationHandler(), @@ -103,4 +92,4 @@ class HandlerFactory: GenericWindowHandler(), ) else: - raise NotImplementedError(f"OS '{os_type}' is not supported") + raise NotImplementedError(f"OS '{OS_TYPE}' is not supported") diff --git a/libs/python/computer-server/computer_server/main.py b/libs/python/computer-server/computer_server/main.py index 3ae97ebc..f29ed94e 100644 --- a/libs/python/computer-server/computer_server/main.py +++ b/libs/python/computer-server/computer_server/main.py @@ -24,7 +24,7 @@ from fastapi import ( from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse, StreamingResponse -from .handlers.factory import HandlerFactory +from .handlers.factory import OS_TYPE, HandlerFactory # Authentication session TTL (in seconds). Override via env var CUA_AUTH_TTL_SECONDS. Default: 60s AUTH_SESSION_TTL_SECONDS: int = int(os.environ.get("CUA_AUTH_TTL_SECONDS", "60")) @@ -261,19 +261,11 @@ auth_manager = AuthenticationManager() @app.get("/status") async def status(): - sys = platform.system().lower() - # get os type - if "darwin" in sys or sys == "macos" or sys == "mac": - os_type = "macos" - elif "windows" in sys: - os_type = "windows" - else: - os_type = "linux" # get computer-server features features = [] if HAS_AGENT: features.append("agent") - return {"status": "ok", "os_type": os_type, "features": features} + return {"status": "ok", "os_type": OS_TYPE, "features": features} @app.websocket("/ws", name="websocket_endpoint") diff --git a/libs/python/computer-server/computer_server/utils/__init__.py b/libs/python/computer-server/computer_server/utils/__init__.py index 995e1f60..3cf2481d 100644 --- a/libs/python/computer-server/computer_server/utils/__init__.py +++ b/libs/python/computer-server/computer_server/utils/__init__.py @@ -1,3 +1,3 @@ -from . import wallpaper +from . import helpers, wallpaper -__all__ = ["wallpaper"] +__all__ = ["helpers", "wallpaper"] diff --git a/libs/python/computer-server/computer_server/utils/helpers.py b/libs/python/computer-server/computer_server/utils/helpers.py new file mode 100644 index 00000000..64c80a0e --- /dev/null +++ b/libs/python/computer-server/computer_server/utils/helpers.py @@ -0,0 +1,81 @@ +import asyncio +import os +import platform +import subprocess +from typing import overload + + +def get_current_os() -> str: + """Determine the current OS. + + Returns: + str: The OS type ('android', 'darwin' for macOS, 'linux' for Linux, or 'windows' for Windows) + + Raises: + RuntimeError: If unable to determine the current OS + """ + try: + if os.environ.get("IS_CUA_ANDROID") == "true": + # Verify emulator is actually running by checking adb devices + try: + result = subprocess.run( + ["adb", "devices"], capture_output=True, text=True, timeout=5 + ) + if result.returncode == 0 and "emulator-5554" in result.stdout: + return "android" + else: + raise RuntimeError( + "IS_CUA_ANDROID is set but no emulator found. " + "Ensure Android emulator is running and accessible via adb." + ) + except subprocess.TimeoutExpired: + raise RuntimeError( + "IS_CUA_ANDROID is set but adb command timed out. " + "Emulator may be starting up or unresponsive." + ) + + system = platform.system().lower() + if system in ["darwin", "linux", "windows"]: + return system + + # Fallback to uname if platform.system() doesn't return expected values (Unix-like systems only) + result = subprocess.run(["uname", "-s"], capture_output=True, text=True) + if result.returncode == 0: + return result.stdout.strip().lower() + + raise RuntimeError(f"Unsupported OS: {system}") + except Exception as e: + raise RuntimeError(f"Failed to determine current OS: {str(e)}") + + +class CommandExecuctor: + def __init__(self, *base_cmd: str) -> None: + """Initialize with a base command. + + Args: + base_cmd: The base command and its initial arguments. + """ + self.__base_cmd = list(base_cmd) + + @overload + async def run(self, *args: str, timeout: int = 10) -> tuple[bool, bytes]: ... + @overload + async def run(self, *args: str, decode: bool = True, timeout: int = 10) -> tuple[bool, str]: ... + + async def run( + self, *args: str, decode: bool = False, timeout: int = 10 + ) -> tuple[bool, bytes | str]: + cmd = self.__base_cmd + list(args) + try: + result = await asyncio.create_subprocess_exec( + *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) + stdout, stderr = await asyncio.wait_for(result.communicate(), timeout=timeout) + output = stdout or stderr + if decode: + output = output.decode("utf-8") + return result.returncode == 0, output + except asyncio.TimeoutError: + return False, f"Command timed out after {timeout}s".encode("utf-8") + except Exception as e: + return False, str(e).encode("utf-8") From 85f90e8ea73e9282e74137dac5b1af427aacb8c0 Mon Sep 17 00:00:00 2001 From: "synacktra.work@gmail.com" Date: Tue, 2 Dec 2025 17:48:05 +0530 Subject: [PATCH 2/8] feat: add android handlers --- .../computer_server/handlers/android.py | 744 ++++++++++++++++++ 1 file changed, 744 insertions(+) create mode 100644 libs/python/computer-server/computer_server/handlers/android.py diff --git a/libs/python/computer-server/computer_server/handlers/android.py b/libs/python/computer-server/computer_server/handlers/android.py new file mode 100644 index 00000000..f6d0b201 --- /dev/null +++ b/libs/python/computer-server/computer_server/handlers/android.py @@ -0,0 +1,744 @@ +import asyncio +import base64 +from typing import Any, Dict, List, Optional, Tuple + +from ..utils.helpers import CommandExecuctor +from .base import ( + BaseAccessibilityHandler, + BaseAutomationHandler, + BaseDesktopHandler, + BaseFileHandler, + BaseWindowHandler, +) + +# Map common key names to Android keycodes +ANDROID_KEY_MAP = { + "return": "66", + "enter": "66", + "backspace": "67", + "delete": "67", + "tab": "61", + "escape": "111", + "esc": "111", + "home": "3", + "back": "4", + "space": "62", + "up": "19", + "down": "20", + "left": "21", + "right": "22", +} + +adb_exec = CommandExecuctor("adb", "-s", "emulator-5554") + + +class AndroidAccessibilityHandler(BaseAccessibilityHandler): + """Android accessibility handler using UI Automator.""" + + async def get_accessibility_tree(self) -> Dict[str, Any]: + """Get the accessibility tree using uiautomator dump.""" + raise NotImplementedError("get_accessibility_tree not yet implemented for Android") + + async def find_element( + self, role: Optional[str] = None, title: Optional[str] = None, value: Optional[str] = None + ) -> Dict[str, Any]: + """Find an element in the UI hierarchy.""" + raise NotImplementedError("find_element not yet implemented for Android") + + +class AndroidAutomationHandler(BaseAutomationHandler): + """Android automation handler using ADB input commands.""" + + # Mouse Actions + async def mouse_down( + self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left" + ) -> Dict[str, Any]: + """Simulate mouse down (touch down) at position. + Note: Android doesn't support separate touch down/up via ADB. + This is a simulated implementation.""" + if x is None or y is None: + raise ValueError("x and y coordinates are required for mouse_down on Android") + # Android doesn't support separate touch down/up through ADB + # We simulate by doing a very short tap + success, output = await adb_exec.run( + "shell", + "input", + "swipe", + str(x), + str(y), + str(x), + str(y), + "100", + decode=True, + ) + if success: + return {} + else: + raise RuntimeError(f"Mouse down failed: {output}") + + async def mouse_up( + self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left" + ) -> Dict[str, Any]: + """Simulate mouse up (touch up) at position. + Note: Android doesn't support separate touch down/up via ADB. + This is a simulated implementation.""" + # Android doesn't support separate touch down/up through ADB + # This is essentially a no-op as mouse_down already completes the touch + return {} + + async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> Dict[str, Any]: + """Perform a tap at the specified position.""" + if x is None or y is None: + raise ValueError("x and y coordinates are required for left_click on Android") + + success, output = await adb_exec.run("shell", "input", "tap", str(x), str(y), decode=True) + if success: + return {} + else: + raise RuntimeError(f"Tap failed: {output}") + + async def right_click(self, x: Optional[int] = None, y: Optional[int] = None) -> Dict[str, Any]: + """Simulate right click (long press) at position.""" + if x is None or y is None: + raise ValueError("x and y coordinates are required for right_click on Android") + + # Long press: swipe with long duration simulates touch and hold + success, output = await adb_exec.run( + "shell", + "input", + "swipe", + str(x), + str(y), + str(x), + str(y), + "1000", + decode=True, + ) + if success: + return {} + else: + raise RuntimeError(f"Long press failed: {output}") + + async def double_click( + self, x: Optional[int] = None, y: Optional[int] = None + ) -> Dict[str, Any]: + """Perform a double tap at the specified position.""" + if x is None or y is None: + raise ValueError("x and y coordinates are required for double_click on Android") + + # Perform two taps in quick succession + for _ in range(2): + success, output = await adb_exec.run( + "shell", "input", "tap", str(x), str(y), decode=True + ) + if not success: + raise RuntimeError(f"Double tap failed: {output}") + await asyncio.sleep(0.1) # Short delay between taps + + return {} + + async def move_cursor(self, x: int, y: int) -> Dict[str, Any]: + """Move cursor - not supported on touch devices.""" + raise NotImplementedError("move_cursor not supported on Android (touch-based interface)") + + async def drag_to( + self, x: int, y: int, button: str = "left", duration: float = 0.5 + ) -> Dict[str, Any]: + """Drag from current position to target coordinates. + Note: Android doesn't track cursor position. This requires the last tap position.""" + # Since Android doesn't track cursor position, we can't implement drag_to properly + # without knowing the start position. Use drag() with explicit path instead. + raise NotImplementedError( + "drag_to not well supported on Android (no cursor tracking). " + "Use drag(path) with explicit start and end coordinates instead." + ) + + async def drag( + self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5 + ) -> Dict[str, Any]: + """Drag along a path of coordinates.""" + if len(path) < 2: + raise ValueError("Path must contain at least 2 coordinates for drag") + + # Use first and last points for swipe gesture + start_x, start_y = path[0] + end_x, end_y = path[-1] + + # Convert duration to milliseconds + duration_ms = int(duration * 1000) + + success, output = await adb_exec.run( + "shell", + "input", + "swipe", + str(start_x), + str(start_y), + str(end_x), + str(end_y), + str(duration_ms), + ) + if success: + return {} + else: + raise RuntimeError(f"Drag failed: {output}") + + # Keyboard Actions + async def key_down(self, key: str) -> Dict[str, Any]: + """Press and hold key - limited support on Android. + Note: Android doesn't support separate key down/up via ADB.""" + # Android doesn't support key hold through ADB input + # We simulate by sending the keyevent once + return await self.press_key(key) + + async def key_up(self, key: str) -> Dict[str, Any]: + """Release key - limited support on Android. + Note: Android doesn't support separate key down/up via ADB.""" + # Android doesn't support separate key up through ADB + # This is essentially a no-op + return {} + + async def type_text(self, text: str) -> Dict[str, Any]: + """Type text using Android input method.""" + # Escape special characters for ADB shell + # Replace spaces with %s (Android's escape for space) + escaped_text = text.replace(" ", "%s").replace("'", "\\'").replace('"', '\\"') + + success, output = await adb_exec.run("shell", "input", "text", escaped_text, decode=True) + if success: + return {} + else: + raise RuntimeError(f"Type text failed: {output}") + + async def press_key(self, key: str) -> Dict[str, Any]: + """Press a key using keyevent.""" + keycode = ANDROID_KEY_MAP.get(key.lower(), key) + success, output = await adb_exec.run("shell", "input", "keyevent", keycode, decode=True) + if success: + return {} + else: + raise RuntimeError(f"Press key failed: {output}") + + async def hotkey(self, keys: List[str]) -> Dict[str, Any]: + """Press key combination - sends keys in sequence on Android.""" + # Android doesn't support simultaneous key presses via ADB + # We send keys sequentially + for key in keys: + await self.press_key(key) + await asyncio.sleep(0.05) # Small delay between keys + return {} + + # Scrolling Actions + async def scroll(self, x: int, y: int) -> Dict[str, Any]: + """Scroll by x and y amounts.""" + # Get screen size to calculate swipe positions + screen_size = await self.get_screen_size() + width, height = screen_size["width"], screen_size["height"] + + # Use center of screen as starting point + center_x, center_y = width // 2, height // 2 + + # Calculate end points (negative y means scroll down, positive means scroll up) + end_x = center_x + x + end_y = center_y - y # Inverted because swipe up scrolls content down + + success, output = await adb_exec.run( + "shell", + "input", + "swipe", + str(center_x), + str(center_y), + str(end_x), + str(end_y), + "300", + decode=True, + ) + if success: + return {} + else: + raise RuntimeError(f"Scroll failed: {output}") + + async def scroll_down(self, clicks: int = 1) -> Dict[str, Any]: + """Scroll down by specified number of clicks.""" + # Get screen size + screen_size = await self.get_screen_size() + width, height = screen_size["width"], screen_size["height"] + + # Swipe up to scroll content down + center_x = width // 2 + start_y = int(height * 0.7) + end_y = int(height * 0.3) + + for _ in range(clicks): + success, output = await adb_exec.run( + "shell", + "input", + "swipe", + str(center_x), + str(start_y), + str(center_x), + str(end_y), + "300", + decode=True, + ) + if not success: + raise RuntimeError(f"Scroll down failed: {output}") + await asyncio.sleep(0.1) # Small delay between scrolls + + return {} + + async def scroll_up(self, clicks: int = 1) -> Dict[str, Any]: + """Scroll up by specified number of clicks.""" + # Get screen size + screen_size = await self.get_screen_size() + width, height = screen_size["width"], screen_size["height"] + + # Swipe down to scroll content up + center_x = width // 2 + start_y = int(height * 0.3) + end_y = int(height * 0.7) + + for _ in range(clicks): + success, output = await adb_exec.run( + "shell", + "input", + "swipe", + str(center_x), + str(start_y), + str(center_x), + str(end_y), + "300", + decode=True, + ) + if not success: + raise RuntimeError(f"Scroll up failed: {output}") + await asyncio.sleep(0.1) # Small delay between scrolls + + return {} + + # Screen Actions + async def screenshot(self) -> Dict[str, Any]: + """Take a screenshot and return base64 encoded image.""" + success, output = await adb_exec.run("shell", "screencap", "-p") + if success and output: + image_b64 = base64.b64encode(output).decode("utf-8") + return {"image_data": image_b64} + else: + raise RuntimeError(f"Screenshot failed: {output.decode('utf-8')}") + + async def get_screen_size(self) -> Dict[str, Any]: + """Get the screen size of the Android device.""" + success, output = await adb_exec.run("shell", "wm", "size", decode=True) + if success and "x" in output: + # Parse "Physical size: 1080x1920" + size_str = output.split(":")[-1].strip() + width, height = map(int, size_str.split("x")) + return {"width": width, "height": height} + else: + raise RuntimeError(f"Failed to get screen size: {output}") + + async def get_cursor_position(self) -> Dict[str, Any]: + """Get cursor position - not supported on touch devices.""" + raise NotImplementedError( + "get_cursor_position not supported on Android (touch-based interface)" + ) + + # Clipboard Actions + async def copy_to_clipboard(self) -> Dict[str, Any]: + """Get clipboard content.""" + # Android 10+ supports clipboard via cmd + success, output = await adb_exec.run("shell", "cmd", "clipboard", "get-text", decode=True) + if success: + return {"text": output.strip()} + else: + raise RuntimeError(f"Failed to get clipboard: {output}") + + async def set_clipboard(self, text: str) -> Dict[str, Any]: + """Set clipboard content.""" + # Android 10+ supports clipboard via cmd + success, output = await adb_exec.run( + "shell", "cmd", "clipboard", "set-text", text, decode=True + ) + if success: + return {} + else: + raise RuntimeError(f"Failed to set clipboard: {output}") + + # Other + async def run_command(self, command: str) -> Dict[str, Any]: + """Run a shell command on Android device.""" + success, output = await adb_exec.run("shell", command, decode=True) + return {"output": output, "success": success} + + +class AndroidFileHandler(BaseFileHandler): + """Android file handler using ADB shell commands.""" + + async def file_exists(self, path: str) -> Dict[str, Any]: + """Check if a file exists.""" + success, output = await adb_exec.run( + "shell", f"test -f '{path}' && echo 'yes' || echo 'no'", decode=True + ) + exists = success and output.strip() == "yes" + return {"exists": exists} + + async def directory_exists(self, path: str) -> Dict[str, Any]: + """Check if a directory exists.""" + success, output = await adb_exec.run( + "shell", f"test -d '{path}' && echo 'yes' || echo 'no'", decode=True + ) + exists = success and output.strip() == "yes" + return {"exists": exists} + + async def list_dir(self, path: str) -> Dict[str, Any]: + """List directory contents.""" + success, output = await adb_exec.run("shell", "ls", "-la", path, decode=True) + if success: + # Parse ls -la output + lines = output.strip().split("\n") + entries = [] + for line in lines[1:]: # Skip "total" line + if line: + parts = line.split() + if len(parts) >= 9: + name = " ".join(parts[8:]) + if name not in [".", ".."]: + entries.append( + { + "name": name, + "is_dir": parts[0].startswith("d"), + "size": int(parts[4]) if parts[4].isdigit() else 0, + } + ) + return {"entries": entries} + else: + raise RuntimeError(f"Failed to list directory: {output}") + + async def read_text(self, path: str) -> Dict[str, Any]: + """Read text file contents.""" + success, output = await adb_exec.run("shell", "cat", path, decode=True) + if success: + return {"content": output} + else: + raise RuntimeError(f"Failed to read file: {output}") + + async def write_text(self, path: str, content: str) -> Dict[str, Any]: + """Write text to file.""" + # Escape single quotes in content + escaped_content = content.replace("'", "'\"'\"'") + success, output = await adb_exec.run( + "shell", f"printf '%s' '{escaped_content}' > '{path}'", decode=True + ) + if success: + return {} + else: + raise RuntimeError(f"Failed to write file: {output}") + + async def write_bytes(self, path: str, content_b64: str) -> Dict[str, Any]: + """Write binary content to file.""" + # Decode base64 and write to temp file, then push to device + import os + import tempfile + + content_bytes = base64.b64decode(content_b64) + + # Create temp file + with tempfile.NamedTemporaryFile(delete=False) as tmp: + tmp.write(content_bytes) + tmp_path = tmp.name + + try: + # Push file to device + success, output = await adb_exec.run("push", tmp_path, path, decode=True) + if success: + return {} + else: + raise RuntimeError(f"Failed to write bytes: {output}") + finally: + os.unlink(tmp_path) + + async def delete_file(self, path: str) -> Dict[str, Any]: + """Delete a file.""" + success, output = await adb_exec.run("shell", "rm", "-f", path, decode=True) + if success: + return {} + else: + raise RuntimeError(f"Failed to delete file: {output}") + + async def create_dir(self, path: str) -> Dict[str, Any]: + """Create a directory.""" + success, output = await adb_exec.run("shell", "mkdir", "-p", path, decode=True) + if success: + return {} + else: + raise RuntimeError(f"Failed to create directory: {output}") + + async def delete_dir(self, path: str) -> Dict[str, Any]: + """Delete a directory.""" + success, output = await adb_exec.run("shell", "rm", "-rf", path, decode=True) + if success: + return {} + else: + raise RuntimeError(f"Failed to delete directory: {output}") + + async def read_bytes( + self, path: str, offset: int = 0, length: Optional[int] = None + ) -> Dict[str, Any]: + """Read binary file contents.""" + # Pull file from device and read bytes + import os + import tempfile + + with tempfile.NamedTemporaryFile(delete=False) as tmp: + tmp_path = tmp.name + + try: + # Pull file from device + success, output = await adb_exec.run("pull", path, tmp_path, decode=True) + if not success: + raise RuntimeError(f"Failed to pull file: {output}") + + # Read bytes from temp file + with open(tmp_path, "rb") as f: + f.seek(offset) + if length is not None: + content_bytes = f.read(length) + else: + content_bytes = f.read() + + content_b64 = base64.b64encode(content_bytes).decode("utf-8") + return {"content": content_b64} + finally: + if os.path.exists(tmp_path): + os.unlink(tmp_path) + + async def get_file_size(self, path: str) -> Dict[str, Any]: + """Get file size in bytes.""" + success, output = await adb_exec.run("shell", f"wc -c < '{path}'", decode=True) + if success: + try: + size = int(output.strip()) + return {"size": size} + except ValueError: + raise RuntimeError(f"Failed to parse file size: {output}") + else: + raise RuntimeError(f"Failed to get file size: {output}") + + +class AndroidWindowHandler(BaseWindowHandler): + """Android window/app handler using activity manager.""" + + async def open(self, target: str) -> Dict[str, Any]: + """Open a URL or file with default app.""" + # Use ACTION_VIEW intent to open URL or file + success, output = await adb_exec.run( + "shell", "am", "start", "-a", "android.intent.action.VIEW", "-d", target, decode=True + ) + if success: + return {} + else: + raise RuntimeError(f"Failed to open target: {output}") + + async def launch(self, app: str, args: Optional[List[str]] = None) -> Dict[str, Any]: + """Launch an Android app by package name or activity.""" + # If app contains '/', it's package/activity, otherwise just package + if "/" in app: + cmd = ["shell", "am", "start", "-n", app] + else: + # Launch main activity for package + cmd = ["shell", "monkey", "-p", app, "-c", "android.intent.category.LAUNCHER", "1"] + + if args: + # Add extras if provided + for arg in args: + if "=" in arg: + key, value = arg.split("=", 1) + cmd.extend(["--es", key, value]) + + success, output = await adb_exec.run(*cmd, decode=True) + if success: + return {} + else: + raise RuntimeError(f"Failed to launch app: {output}") + + async def get_current_window_id(self) -> Dict[str, Any]: + """Get the currently focused activity.""" + import logging + + logger = logging.getLogger(__name__) + + success, output = await adb_exec.run("shell", "dumpsys", "window", decode=True) + if success: + # Parse mCurrentFocus line + for line in output.split("\n"): + if "mCurrentFocus" in line: + logger.info(f"Found mCurrentFocus line: {line}") + # Example: mCurrentFocus=Window{abc123 u0 com.android.launcher3/com.android.launcher3.Launcher} + import re + + match = re.search(r"([a-zA-Z0-9._]+/[a-zA-Z0-9._$]+)\}", line) + if match: + window_id = match.group(1) + logger.info(f"Extracted window_id: {window_id}") + return {"window_id": window_id} + else: + logger.warning(f"Regex did not match line: {line}") + logger.warning("No mCurrentFocus line found in dumpsys output") + return {"window_id": "unknown"} + else: + raise RuntimeError(f"Failed to get current window: {output}") + + async def get_application_windows(self, app: str) -> Dict[str, Any]: + """Get activities for an app.""" + # List all activities in the package + success, output = await adb_exec.run("shell", "dumpsys", "package", app, decode=True) + if success: + activities = [] + in_activity_section = False + for line in output.split("\n"): + if "Activity Resolver Table:" in line: + in_activity_section = True + elif in_activity_section and app in line: + import re + + match = re.search(r"([a-z0-9.]+/[a-z0-9.]+)", line) + if match: + activities.append(match.group(1)) + + return {"windows": activities} + else: + raise RuntimeError(f"Failed to get application windows: {output}") + + async def get_window_name(self, window_id: str) -> Dict[str, Any]: + """Get the name of an activity.""" + # window_id is in format package/activity + if "/" in str(window_id): + activity_name = str(window_id).split("/")[-1] + return {"name": activity_name} + else: + return {"name": str(window_id)} + + async def get_window_size(self, window_id: str | int) -> Dict[str, Any]: + """Get window size (returns screen size on Android).""" + # Android apps are typically fullscreen, return screen size + success, output = await adb_exec.run("shell", "wm", "size", decode=True) + if success and "x" in output: + size_str = output.split(":")[-1].strip() + width, height = map(int, size_str.split("x")) + return {"width": width, "height": height} + else: + raise RuntimeError(f"Failed to get window size: {output}") + + async def activate_window(self, window_id: str | int) -> Dict[str, Any]: + """Bring an app to foreground.""" + # window_id should be package/activity format + window_str = str(window_id) + success, output = await adb_exec.run("shell", "am", "start", "-n", window_str, decode=True) + if success: + return {} + else: + raise RuntimeError(f"Failed to activate window: {output}") + + async def close_window(self, window_id: str | int) -> Dict[str, Any]: + """Force stop an app.""" + # Extract package name from window_id (package/activity format) + window_str = str(window_id) + package = window_str.split("/")[0] if "/" in window_str else window_str + + success, output = await adb_exec.run("shell", "am", "force-stop", package, decode=True) + if success: + return {} + else: + raise RuntimeError(f"Failed to close window: {output}") + + async def get_window_position(self, window_id: str | int) -> Dict[str, Any]: + """Get window position - not supported on Android.""" + raise NotImplementedError( + "get_window_position not supported on Android (no windowing system)" + ) + + async def set_window_size( + self, window_id: str | int, width: int, height: int + ) -> Dict[str, Any]: + """Set window size - not supported on Android.""" + raise NotImplementedError("set_window_size not supported on Android (apps are fullscreen)") + + async def set_window_position(self, window_id: str | int, x: int, y: int) -> Dict[str, Any]: + """Set window position - not supported on Android.""" + raise NotImplementedError( + "set_window_position not supported on Android (no windowing system)" + ) + + async def maximize_window(self, window_id: str | int) -> Dict[str, Any]: + """Maximize window - not supported on Android.""" + raise NotImplementedError( + "maximize_window not supported on Android (apps always fullscreen)" + ) + + async def minimize_window(self, window_id: str | int) -> Dict[str, Any]: + """Minimize window (send to background).""" + # Press HOME key to minimize current app + success, output = await adb_exec.run("shell", "input", "keyevent", "3", decode=True) + if success: + return {} + else: + raise RuntimeError(f"Failed to minimize window: {output}") + + +class AndroidDesktopHandler(BaseDesktopHandler): + """Android desktop handler - minimal implementation.""" + + async def get_desktop_environment(self) -> Dict[str, Any]: + """Get desktop environment name.""" + return {"desktop_environment": "android"} + + async def set_wallpaper(self, path: str): + """ + Set the wallpaper using our custom helper APK. + + Args: + path: Absolute path to image on device (e.g. /sdcard/Pictures/wall.jpg) + """ + # Copy file to /data/local/tmp where all apps can read it + # (/sdcard uses FUSE with restrictive permissions that chmod can't change) + import os + + temp_path = f"/data/local/tmp/wallpaper_{os.path.basename(path)}" + + # Copy to temp location with world-readable permissions + copy_success, _ = await adb_exec.run("shell", "cp", path, temp_path, decode=True) + if not copy_success: + raise RuntimeError(f"Failed to copy file to temp location: {path}") + + # Make temp file readable + await adb_exec.run("shell", "chmod", "644", temp_path, decode=True) + + package = "com.example.cua.wallpaper" + component = f"{package}/.SetWallpaperActivity" + + success, output = await adb_exec.run( + "shell", + "am", + "start", + "-n", + component, + "-a", + "com.example.cua.wallpaper.SET_WALLPAPER", + "-e", + "path", + temp_path, + "-e", + "target", + "home", + decode=True, + ) + + if success: + # Give it a moment to set the wallpaper + await asyncio.sleep(1) + # Clean up temp file + await adb_exec.run("shell", "rm", temp_path, decode=True) + return {} + + # Clean up on failure too + await adb_exec.run("shell", "rm", temp_path, decode=True) + raise RuntimeError(f"Failed to set wallpaper: {output}") From 6e13ce5cf18d064f52a0f90387db6eb7e06d7c2a Mon Sep 17 00:00:00 2001 From: "synacktra.work@gmail.com" Date: Tue, 2 Dec 2025 17:49:07 +0530 Subject: [PATCH 3/8] fix: adapt linux automation handler to base automation handler structure --- .../computer_server/handlers/linux.py | 22 ------------------- 1 file changed, 22 deletions(-) diff --git a/libs/python/computer-server/computer_server/handlers/linux.py b/libs/python/computer-server/computer_server/handlers/linux.py index f536440c..bdafb156 100644 --- a/libs/python/computer-server/computer_server/handlers/linux.py +++ b/libs/python/computer-server/computer_server/handlers/linux.py @@ -279,28 +279,6 @@ class LinuxAutomationHandler(BaseAutomationHandler): return {"success": False, "error": str(e)} async def drag( - self, start_x: int, start_y: int, end_x: int, end_y: int, button: str = "left" - ) -> Dict[str, Any]: - """Drag from start coordinates to end coordinates. - - Args: - start_x: The starting x coordinate. - start_y: The starting y coordinate. - end_x: The ending x coordinate. - end_y: The ending y coordinate. - button: The mouse button to use for dragging. - - Returns: - Dict[str, Any]: A dictionary with success status and error message if failed. - """ - try: - pyautogui.moveTo(start_x, start_y) - pyautogui.dragTo(end_x, end_y, duration=0.5, button=button) - return {"success": True} - except Exception as e: - return {"success": False, "error": str(e)} - - async def drag_path( self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5 ) -> Dict[str, Any]: """Drag along a path defined by a list of coordinates. From b8f52a826cc659308e926882dd88907aabc39e28 Mon Sep 17 00:00:00 2001 From: "synacktra.work@gmail.com" Date: Tue, 2 Dec 2025 18:00:02 +0530 Subject: [PATCH 4/8] feat: add wallpaper manager APK source --- .../wallpaper-manager/.gitignore | 15 ++++ .../wallpaper-manager/app/build.gradle | 33 ++++++++ .../app/src/main/AndroidManifest.xml | 33 ++++++++ .../cua/wallpaper/SetWallpaperActivity.java | 80 +++++++++++++++++++ .../wallpaper-manager/build.gradle | 20 +++++ .../gradle/wrapper/gradle-wrapper.properties | 6 ++ libs/android-docker/wallpaper-manager/gradlew | 75 +++++++++++++++++ .../wallpaper-manager/settings.gradle | 2 + 8 files changed, 264 insertions(+) create mode 100644 libs/android-docker/wallpaper-manager/.gitignore create mode 100644 libs/android-docker/wallpaper-manager/app/build.gradle create mode 100644 libs/android-docker/wallpaper-manager/app/src/main/AndroidManifest.xml create mode 100644 libs/android-docker/wallpaper-manager/app/src/main/java/com/example/cua/wallpaper/SetWallpaperActivity.java create mode 100644 libs/android-docker/wallpaper-manager/build.gradle create mode 100644 libs/android-docker/wallpaper-manager/gradle/wrapper/gradle-wrapper.properties create mode 100644 libs/android-docker/wallpaper-manager/gradlew create mode 100644 libs/android-docker/wallpaper-manager/settings.gradle diff --git a/libs/android-docker/wallpaper-manager/.gitignore b/libs/android-docker/wallpaper-manager/.gitignore new file mode 100644 index 00000000..2b3ae9ee --- /dev/null +++ b/libs/android-docker/wallpaper-manager/.gitignore @@ -0,0 +1,15 @@ +# Gradle +.gradle/ +build/ +gradle/wrapper/gradle-wrapper.jar + +# Android Studio +.idea/ +*.iml +local.properties + +# Build outputs +*.apk +*.aab +*.dex +*.class diff --git a/libs/android-docker/wallpaper-manager/app/build.gradle b/libs/android-docker/wallpaper-manager/app/build.gradle new file mode 100644 index 00000000..d0f67d63 --- /dev/null +++ b/libs/android-docker/wallpaper-manager/app/build.gradle @@ -0,0 +1,33 @@ +apply plugin: 'com.android.application' + +android { + compileSdkVersion 30 + + defaultConfig { + applicationId "com.example.cua.wallpaper" + minSdkVersion 21 + targetSdkVersion 30 + versionCode 1 + versionName "1.0" + } + + buildTypes { + debug { + debuggable true + } + release { + minifyEnabled false + proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), + 'proguard-rules.pro' + } + } + + compileOptions { + sourceCompatibility JavaVersion.VERSION_1_8 + targetCompatibility JavaVersion.VERSION_1_8 + } +} + +dependencies { + // No special dependencies needed +} diff --git a/libs/android-docker/wallpaper-manager/app/src/main/AndroidManifest.xml b/libs/android-docker/wallpaper-manager/app/src/main/AndroidManifest.xml new file mode 100644 index 00000000..a2c7c981 --- /dev/null +++ b/libs/android-docker/wallpaper-manager/app/src/main/AndroidManifest.xml @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/android-docker/wallpaper-manager/app/src/main/java/com/example/cua/wallpaper/SetWallpaperActivity.java b/libs/android-docker/wallpaper-manager/app/src/main/java/com/example/cua/wallpaper/SetWallpaperActivity.java new file mode 100644 index 00000000..bfe00319 --- /dev/null +++ b/libs/android-docker/wallpaper-manager/app/src/main/java/com/example/cua/wallpaper/SetWallpaperActivity.java @@ -0,0 +1,80 @@ +package com.example.cua.wallpaper; + +import android.app.Activity; +import android.app.WallpaperManager; +import android.graphics.Bitmap; +import android.graphics.BitmapFactory; +import android.os.Build; +import android.os.Bundle; +import android.util.Log; + +import java.io.File; + +public class SetWallpaperActivity extends Activity { + + private static final String TAG = "CuaWallpaperManager"; + + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + + // Expected extras: + // "path" -> absolute path to image on device (e.g. /sdcard/Pictures/wall.jpg) + // "target" -> optional: "home", "lock", or "both" (default: "home") + String path = getIntent().getStringExtra("path"); + String target = getIntent().getStringExtra("target"); + if (target == null) { + target = "home"; + } + + if (path == null || path.trim().isEmpty()) { + Log.e(TAG, "No path provided"); + finish(); + return; + } + + try { + File file = new File(path); + if (!file.exists()) { + Log.e(TAG, "File does not exist: " + path); + finish(); + return; + } + + Bitmap bitmap = BitmapFactory.decodeFile(file.getAbsolutePath()); + if (bitmap == null) { + Log.e(TAG, "Failed to decode image at: " + path); + finish(); + return; + } + + WallpaperManager wm = WallpaperManager.getInstance(this); + + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.N) { + int which; + switch (target.toLowerCase()) { + case "lock": + which = WallpaperManager.FLAG_LOCK; + break; + case "both": + which = WallpaperManager.FLAG_SYSTEM | WallpaperManager.FLAG_LOCK; + break; + case "home": + default: + which = WallpaperManager.FLAG_SYSTEM; + break; + } + wm.setBitmap(bitmap, null, true, which); + } else { + // Pre-N, no flags API; this sets the home screen wallpaper + wm.setBitmap(bitmap); + } + + Log.i(TAG, "Wallpaper set successfully from: " + path + " target=" + target); + } catch (Exception e) { + Log.e(TAG, "Error setting wallpaper", e); + } finally { + finish(); + } + } +} diff --git a/libs/android-docker/wallpaper-manager/build.gradle b/libs/android-docker/wallpaper-manager/build.gradle new file mode 100644 index 00000000..599620d1 --- /dev/null +++ b/libs/android-docker/wallpaper-manager/build.gradle @@ -0,0 +1,20 @@ +buildscript { + repositories { + google() + mavenCentral() + } + dependencies { + classpath "com.android.tools.build:gradle:7.4.2" + } +} + +allprojects { + repositories { + google() + mavenCentral() + } +} + +task clean(type: Delete) { + delete rootProject.buildDir +} diff --git a/libs/android-docker/wallpaper-manager/gradle/wrapper/gradle-wrapper.properties b/libs/android-docker/wallpaper-manager/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 00000000..f398c33c --- /dev/null +++ b/libs/android-docker/wallpaper-manager/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,6 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-7.6-bin.zip +networkTimeout=10000 +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/libs/android-docker/wallpaper-manager/gradlew b/libs/android-docker/wallpaper-manager/gradlew new file mode 100644 index 00000000..f7b82cc7 --- /dev/null +++ b/libs/android-docker/wallpaper-manager/gradlew @@ -0,0 +1,75 @@ +#!/bin/sh + +############################################################################## +# Gradle start up script for UN*X +############################################################################## + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD=maximum + +warn () { + echo "$*" +} >&2 + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "$( uname -s )" in + CYGWIN* ) cygwin=true ;; + Darwin* ) darwin=true ;; + MSYS* | MINGW* ) msys=true ;; + NONSTOP* ) nonstop=true ;; +esac + +# Determine the script directory +DIRNAME=$(cd "$(dirname "$0")" && pwd) +APP_HOME=$DIRNAME +APP_BASE_NAME=$(basename "$0") + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD=$JAVA_HOME/jre/sh/java + else + JAVACMD=$JAVA_HOME/bin/java + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD=java + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Escape application args +save () { + for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done + echo " " +} +APP_ARGS=$(save "$@") + +# Collect all arguments for the java command, following the shell quoting and substitution rules +eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" + +exec "$JAVACMD" "$@" diff --git a/libs/android-docker/wallpaper-manager/settings.gradle b/libs/android-docker/wallpaper-manager/settings.gradle new file mode 100644 index 00000000..2d4a61a5 --- /dev/null +++ b/libs/android-docker/wallpaper-manager/settings.gradle @@ -0,0 +1,2 @@ +rootProject.name = "CuaWallpaperManager" +include ':app' From e07b7855915ab83012e18992de930dd7244fafbc Mon Sep 17 00:00:00 2001 From: "synacktra.work@gmail.com" Date: Tue, 2 Dec 2025 18:02:41 +0530 Subject: [PATCH 5/8] feat: add entrypoint script and multi-stage docker files --- libs/android-docker/Dockerfile | 75 +++++++++++++++++++++++++++ libs/android-docker/dev.Dockerfile | 83 ++++++++++++++++++++++++++++++ libs/android-docker/entry.sh | 56 ++++++++++++++++++++ 3 files changed, 214 insertions(+) create mode 100644 libs/android-docker/Dockerfile create mode 100644 libs/android-docker/dev.Dockerfile create mode 100644 libs/android-docker/entry.sh diff --git a/libs/android-docker/Dockerfile b/libs/android-docker/Dockerfile new file mode 100644 index 00000000..0ee8735f --- /dev/null +++ b/libs/android-docker/Dockerfile @@ -0,0 +1,75 @@ +# ============================================================================ +# Stage 1: Build wallpaper-manager APK +# ============================================================================ +FROM eclipse-temurin:17-jdk AS builder + +RUN apt-get update && apt-get install -y wget unzip && \ + mkdir -p /opt/android-sdk/cmdline-tools && \ + cd /opt/android-sdk/cmdline-tools && \ + wget -q https://dl.google.com/android/repository/commandlinetools-linux-9477386_latest.zip && \ + unzip commandlinetools-linux-9477386_latest.zip && \ + rm commandlinetools-linux-9477386_latest.zip && \ + mv cmdline-tools latest + +ENV ANDROID_HOME=/opt/android-sdk +ENV PATH="${ANDROID_HOME}/cmdline-tools/latest/bin:${ANDROID_HOME}/platform-tools:${PATH}" + +RUN yes | sdkmanager --licenses && \ + sdkmanager "platforms;android-30" "build-tools;30.0.3" + +COPY wallpaper-manager /build/wallpaper-manager +WORKDIR /build/wallpaper-manager + +RUN curl -fsSL -o gradle/wrapper/gradle-wrapper.jar \ + https://raw.githubusercontent.com/gradle/gradle/v7.6.0/gradle/wrapper/gradle-wrapper.jar && \ + chmod +x gradlew + +RUN ./gradlew assembleDebug --no-daemon + +# ============================================================================ +# Stage 2: Runtime image with Android emulator & Computer server +# ============================================================================ +FROM budtmo/docker-android:emulator_11.0 + +USER root + +# Set environment variable to identify this as CUA Android container +ENV IS_CUA_ANDROID=true + +# Copy wallpaper-manager APK from builder stage +COPY --from=builder /build/wallpaper-manager/app/build/outputs/apk/debug/app-debug.apk /opt/apks/wallpaper-manager.apk + +RUN apt-get update && \ + apt-get install -y \ + python3 \ + python3-pip \ + python3-venv \ + python3-dev \ + python3-tk \ + build-essential \ + curl \ + && rm -rf /var/lib/apt/lists/* + +RUN python3 -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +RUN /opt/venv/bin/pip install --no-cache-dir --upgrade pip && \ + /opt/venv/bin/pip install --no-cache-dir cua-computer-server + +COPY entry.sh /usr/local/bin/entry.sh +RUN chmod +x /usr/local/bin/entry.sh + +# Make venv accessible to androidusr +RUN chown -R 1300:1301 /opt/venv + +EXPOSE 8000 + +HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \ + CMD adb devices | grep -q "emulator" && curl -f http://localhost:8000/status || exit 1 + +# Switch back to androidusr user (as per base image) +USER 1300:1301 + +WORKDIR /home/androidusr + +ENTRYPOINT ["/usr/local/bin/entry.sh"] diff --git a/libs/android-docker/dev.Dockerfile b/libs/android-docker/dev.Dockerfile new file mode 100644 index 00000000..7164f9a8 --- /dev/null +++ b/libs/android-docker/dev.Dockerfile @@ -0,0 +1,83 @@ +# ============================================================================ +# Development Dockerfile - builds from libs/ directory with local sources +# Build command: docker build -f android-docker/dev.Dockerfile -t android-cua:dev . +# ============================================================================ + +# ============================================================================ +# Stage 1: Build wallpaper-manager APK +# ============================================================================ +FROM eclipse-temurin:17-jdk AS builder + +RUN apt-get update && apt-get install -y wget unzip && \ + mkdir -p /opt/android-sdk/cmdline-tools && \ + cd /opt/android-sdk/cmdline-tools && \ + wget -q https://dl.google.com/android/repository/commandlinetools-linux-9477386_latest.zip && \ + unzip commandlinetools-linux-9477386_latest.zip && \ + rm commandlinetools-linux-9477386_latest.zip && \ + mv cmdline-tools latest + +ENV ANDROID_HOME=/opt/android-sdk +ENV PATH="${ANDROID_HOME}/cmdline-tools/latest/bin:${ANDROID_HOME}/platform-tools:${PATH}" + +RUN yes | sdkmanager --licenses && \ + sdkmanager "platforms;android-30" "build-tools;30.0.3" + +COPY android-docker/wallpaper-manager /build/wallpaper-manager +WORKDIR /build/wallpaper-manager + +RUN curl -fsSL -o gradle/wrapper/gradle-wrapper.jar \ + https://raw.githubusercontent.com/gradle/gradle/v7.6.0/gradle/wrapper/gradle-wrapper.jar && \ + chmod +x gradlew + +RUN ./gradlew assembleDebug --no-daemon + +# ============================================================================ +# Stage 2: Runtime image with Android emulator & Computer server +# ============================================================================ +FROM budtmo/docker-android:emulator_11.0 + +USER root + +# Set environment variable to identify this as CUA Android container +ENV IS_CUA_ANDROID=true + +# Copy wallpaper-manager APK from builder stage +COPY --from=builder /build/wallpaper-manager/app/build/outputs/apk/debug/app-debug.apk /opt/apks/wallpaper-manager.apk + +RUN apt-get update && \ + apt-get install -y \ + python3 \ + python3-pip \ + python3-venv \ + python3-dev \ + python3-tk \ + build-essential \ + curl \ + && rm -rf /var/lib/apt/lists/* + +RUN python3 -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +# Install computer-server from local source for development +COPY python/computer-server /tmp/computer-server +WORKDIR /tmp/computer-server +RUN /opt/venv/bin/pip install --no-cache-dir --upgrade pip && \ + /opt/venv/bin/pip install --no-cache-dir -e . + +COPY android-docker/entry.sh /usr/local/bin/entry.sh +RUN chmod +x /usr/local/bin/entry.sh + +# Make venv accessible to androidusr +RUN chown -R 1300:1301 /opt/venv + +EXPOSE 8000 + +HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \ + CMD adb devices | grep -q "emulator" && curl -f http://localhost:8000/status || exit 1 + +# Switch back to androidusr user (as per base image) +USER 1300:1301 + +WORKDIR /home/androidusr + +ENTRYPOINT ["/usr/local/bin/entry.sh"] diff --git a/libs/android-docker/entry.sh b/libs/android-docker/entry.sh new file mode 100644 index 00000000..15c1ec0f --- /dev/null +++ b/libs/android-docker/entry.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +set -Eeuo pipefail + +info () { printf "%b%s%b" "\E[1;34m❯ \E[1;36m" "${1:-}" "\E[0m\n"; } +error () { printf "%b%s%b" "\E[1;31m❯ " "ERROR: ${1:-}" "\E[0m\n" >&2; } +warn () { printf "%b%s%b" "\E[1;31m❯ " "Warning: ${1:-}" "\E[0m\n" >&2; } + +# Start the original docker-android entrypoint in background +# This handles emulator startup, VNC, noVNC, etc. +info "Starting \"${EMULATOR_DEVICE}\" emulator..." +/home/androidusr/docker-android/mixins/scripts/run.sh & + +# Wait for ADB device to appear and boot to complete +info "Waiting for emulator to be ready..." +counter=0 +timeout=300 +while [ $counter -lt $timeout ]; do + if adb devices 2>/dev/null | grep -q "emulator"; then + # Check if boot is complete + boot_completed=$(adb shell getprop sys.boot_completed 2>&1 | tr -d '\r\n' | grep -o "1" || echo "0") + if [ "$boot_completed" = "1" ]; then + info "✓ Emulator \"${EMULATOR_DEVICE}\" is ready!" + break + fi + fi + sleep 2 + counter=$((counter + 2)) + + # Show progress every 10 seconds + if [ $((counter % 10)) -eq 0 ]; then + info " Still waiting... ($counter/$timeout seconds)" + fi +done + +if [ $counter -ge $timeout ]; then + error "✗ Emulator \"${EMULATOR_DEVICE}\" failed to start within $timeout seconds" + exit 1 +fi + +sleep 5 + +if adb shell pm list packages | grep -q "com.example.cua.wallpaper"; then + info "✓ Wallpaper Manager already installed" +else + info "Installing wallpaper-manager.apk..." + adb install -r /opt/apks/wallpaper-manager.apk + if [ $? -eq 0 ]; then + info "✓ Wallpaper Manager installed successfully" + else + warn "✗ Failed to install Wallpaper Manager APK" + fi +fi + +info "Starting Computer Server..." +source /opt/venv/bin/activate +DISPLAY= python -m computer_server --host 0.0.0.0 --port 8000 --log-level info From 138e615bbdb09d9655dbd28bb0c9e3342ef2da1d Mon Sep 17 00:00:00 2001 From: "synacktra.work@gmail.com" Date: Tue, 2 Dec 2025 18:03:42 +0530 Subject: [PATCH 6/8] docs: add readme doc with usage and examples --- libs/android-docker/README.md | 80 +++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 libs/android-docker/README.md diff --git a/libs/android-docker/README.md b/libs/android-docker/README.md new file mode 100644 index 00000000..ea871fd8 --- /dev/null +++ b/libs/android-docker/README.md @@ -0,0 +1,80 @@ +# Android Docker + +Docker image that runs an Android emulator with CUA Computer Server integration, enabling programmatic control of Android devices via HTTP API. + +## Features + +- **Android 11 Emulator** - Based on budtmo/docker-android +- **CUA Computer Server** - HTTP API for automation, file operations, window management +- **Custom Wallpaper Manager APK** - Programmatically set wallpapers without user interaction +- **VNC Access** - View and interact with the Android screen via web browser + +## What's Inside + +- **wallpaper-manager/** - Custom Android APK that uses WallpaperManager API to set wallpapers +- **entry.sh** - Container startup script that launches emulator and server +- **Dockerfile** - Production build (installs cua-computer-server from PyPI) +- **dev.Dockerfile** - Development build (uses local source code) + +## Quick Start + +### Production Build + +```bash +cd android-docker +docker build -t cua-android . +docker run -d -p 6080:6080 -p 8000:8000 \ + -e EMULATOR_DEVICE="Samsung Galaxy S10" \ + -e WEB_VNC=true \ + --device /dev/kvm \ + --name android-container \ + cua-android +``` + +### Development Build + +```bash +cd .. # Go to libs/ directory +docker build -f android-docker/dev.Dockerfile -t cua-android:dev . +docker run -d -p 6080:6080 -p 8000:8000 \ + -e EMULATOR_DEVICE="Samsung Galaxy S10" \ + -e WEB_VNC=true \ + --device /dev/kvm \ + --name android-container \ + cua-android:dev +``` + +## Access Points + +- **VNC Web UI**: http://localhost:6080 +- **Computer Server API**: http://localhost:8000 +- **API Documentation**: http://localhost:8000/docs + +## API Examples + +```bash +# Get screen size +curl -X POST http://localhost:8000/cmd \ + -H "Content-Type: application/json" \ + -d '{"command": "get_screen_size", "params": {}}' + +# Set wallpaper (automatically handles permissions) +curl -X POST http://localhost:8000/cmd \ + -H "Content-Type: application/json" \ + -d '{"command": "set_wallpaper", "params": {"path": "/sdcard/image.jpg", "target": "home"}}' + +# Launch app +curl -X POST http://localhost:8000/cmd \ + -H "Content-Type: application/json" \ + -d '{"command": "launch", "params": {"app": "com.android.settings"}}' +``` + +## Custom Wallpaper Solution + +Android doesn't provide native ADB commands for setting wallpapers. We solved this by: + +1. **Building a custom APK** (`wallpaper-manager`) that uses Android's WallpaperManager API +2. **Multi-stage Docker build** - APK is compiled during image build +3. **Auto-installation** - APK installs automatically on container startup +4. **Permission handling** - Files are copied to `/data/local/tmp` where all apps have read access +5. **Seamless integration** - `set_wallpaper()` API handles everything automatically From 276ad0609b8a7040f41e106f87f3753206a75d30 Mon Sep 17 00:00:00 2001 From: "synacktra.work@gmail.com" Date: Tue, 2 Dec 2025 18:23:06 +0530 Subject: [PATCH 7/8] fix: typo in command executor class --- .../computer-server/computer_server/handlers/android.py | 4 ++-- libs/python/computer-server/computer_server/utils/helpers.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libs/python/computer-server/computer_server/handlers/android.py b/libs/python/computer-server/computer_server/handlers/android.py index f6d0b201..1bd9adc8 100644 --- a/libs/python/computer-server/computer_server/handlers/android.py +++ b/libs/python/computer-server/computer_server/handlers/android.py @@ -2,7 +2,7 @@ import asyncio import base64 from typing import Any, Dict, List, Optional, Tuple -from ..utils.helpers import CommandExecuctor +from ..utils.helpers import CommandExecutor from .base import ( BaseAccessibilityHandler, BaseAutomationHandler, @@ -29,7 +29,7 @@ ANDROID_KEY_MAP = { "right": "22", } -adb_exec = CommandExecuctor("adb", "-s", "emulator-5554") +adb_exec = CommandExecutor("adb", "-s", "emulator-5554") class AndroidAccessibilityHandler(BaseAccessibilityHandler): diff --git a/libs/python/computer-server/computer_server/utils/helpers.py b/libs/python/computer-server/computer_server/utils/helpers.py index 64c80a0e..d95aa840 100644 --- a/libs/python/computer-server/computer_server/utils/helpers.py +++ b/libs/python/computer-server/computer_server/utils/helpers.py @@ -48,7 +48,7 @@ def get_current_os() -> str: raise RuntimeError(f"Failed to determine current OS: {str(e)}") -class CommandExecuctor: +class CommandExecutor: def __init__(self, *base_cmd: str) -> None: """Initialize with a base command. From ba34052344be4baa7073fe280e77904e2b333c14 Mon Sep 17 00:00:00 2001 From: "synacktra.work@gmail.com" Date: Sun, 21 Dec 2025 01:54:14 +0530 Subject: [PATCH 8/8] chore(android): rename "cua-android" to "cua-droid" --- libs/android-docker/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libs/android-docker/README.md b/libs/android-docker/README.md index ea871fd8..f085c3e7 100644 --- a/libs/android-docker/README.md +++ b/libs/android-docker/README.md @@ -22,26 +22,26 @@ Docker image that runs an Android emulator with CUA Computer Server integration, ```bash cd android-docker -docker build -t cua-android . +docker build -t trycua/cua-droid . docker run -d -p 6080:6080 -p 8000:8000 \ -e EMULATOR_DEVICE="Samsung Galaxy S10" \ -e WEB_VNC=true \ --device /dev/kvm \ --name android-container \ - cua-android + trycua/cua-droid ``` ### Development Build ```bash cd .. # Go to libs/ directory -docker build -f android-docker/dev.Dockerfile -t cua-android:dev . +docker build -f android-docker/dev.Dockerfile -t trycua/cua-droid:dev . docker run -d -p 6080:6080 -p 8000:8000 \ -e EMULATOR_DEVICE="Samsung Galaxy S10" \ -e WEB_VNC=true \ --device /dev/kvm \ --name android-container \ - cua-android:dev + trycua/cua-droid:dev ``` ## Access Points