Merge pull request #632 from synacktraa/feat/android-docker

Add Android Docker Support with Computer Server Integration
This commit is contained in:
Harsh Verma
2025-12-21 02:02:12 +05:30
committed by GitHub
18 changed files with 1415 additions and 73 deletions

View File

@@ -0,0 +1,75 @@
# ============================================================================
# Stage 1: Build wallpaper-manager APK
# ============================================================================
FROM eclipse-temurin:17-jdk AS builder
RUN apt-get update && apt-get install -y wget unzip && \
mkdir -p /opt/android-sdk/cmdline-tools && \
cd /opt/android-sdk/cmdline-tools && \
wget -q https://dl.google.com/android/repository/commandlinetools-linux-9477386_latest.zip && \
unzip commandlinetools-linux-9477386_latest.zip && \
rm commandlinetools-linux-9477386_latest.zip && \
mv cmdline-tools latest
ENV ANDROID_HOME=/opt/android-sdk
ENV PATH="${ANDROID_HOME}/cmdline-tools/latest/bin:${ANDROID_HOME}/platform-tools:${PATH}"
RUN yes | sdkmanager --licenses && \
sdkmanager "platforms;android-30" "build-tools;30.0.3"
COPY wallpaper-manager /build/wallpaper-manager
WORKDIR /build/wallpaper-manager
RUN curl -fsSL -o gradle/wrapper/gradle-wrapper.jar \
https://raw.githubusercontent.com/gradle/gradle/v7.6.0/gradle/wrapper/gradle-wrapper.jar && \
chmod +x gradlew
RUN ./gradlew assembleDebug --no-daemon
# ============================================================================
# Stage 2: Runtime image with Android emulator & Computer server
# ============================================================================
FROM budtmo/docker-android:emulator_11.0
USER root
# Set environment variable to identify this as CUA Android container
ENV IS_CUA_ANDROID=true
# Copy wallpaper-manager APK from builder stage
COPY --from=builder /build/wallpaper-manager/app/build/outputs/apk/debug/app-debug.apk /opt/apks/wallpaper-manager.apk
RUN apt-get update && \
apt-get install -y \
python3 \
python3-pip \
python3-venv \
python3-dev \
python3-tk \
build-essential \
curl \
&& rm -rf /var/lib/apt/lists/*
RUN python3 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
RUN /opt/venv/bin/pip install --no-cache-dir --upgrade pip && \
/opt/venv/bin/pip install --no-cache-dir cua-computer-server
COPY entry.sh /usr/local/bin/entry.sh
RUN chmod +x /usr/local/bin/entry.sh
# Make venv accessible to androidusr
RUN chown -R 1300:1301 /opt/venv
EXPOSE 8000
HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
CMD adb devices | grep -q "emulator" && curl -f http://localhost:8000/status || exit 1
# Switch back to androidusr user (as per base image)
USER 1300:1301
WORKDIR /home/androidusr
ENTRYPOINT ["/usr/local/bin/entry.sh"]

View File

@@ -0,0 +1,80 @@
# Android Docker
Docker image that runs an Android emulator with CUA Computer Server integration, enabling programmatic control of Android devices via HTTP API.
## Features
- **Android 11 Emulator** - Based on budtmo/docker-android
- **CUA Computer Server** - HTTP API for automation, file operations, window management
- **Custom Wallpaper Manager APK** - Programmatically set wallpapers without user interaction
- **VNC Access** - View and interact with the Android screen via web browser
## What's Inside
- **wallpaper-manager/** - Custom Android APK that uses WallpaperManager API to set wallpapers
- **entry.sh** - Container startup script that launches emulator and server
- **Dockerfile** - Production build (installs cua-computer-server from PyPI)
- **dev.Dockerfile** - Development build (uses local source code)
## Quick Start
### Production Build
```bash
cd android-docker
docker build -t trycua/cua-droid .
docker run -d -p 6080:6080 -p 8000:8000 \
-e EMULATOR_DEVICE="Samsung Galaxy S10" \
-e WEB_VNC=true \
--device /dev/kvm \
--name android-container \
trycua/cua-droid
```
### Development Build
```bash
cd .. # Go to libs/ directory
docker build -f android-docker/dev.Dockerfile -t trycua/cua-droid:dev .
docker run -d -p 6080:6080 -p 8000:8000 \
-e EMULATOR_DEVICE="Samsung Galaxy S10" \
-e WEB_VNC=true \
--device /dev/kvm \
--name android-container \
trycua/cua-droid:dev
```
## Access Points
- **VNC Web UI**: http://localhost:6080
- **Computer Server API**: http://localhost:8000
- **API Documentation**: http://localhost:8000/docs
## API Examples
```bash
# Get screen size
curl -X POST http://localhost:8000/cmd \
-H "Content-Type: application/json" \
-d '{"command": "get_screen_size", "params": {}}'
# Set wallpaper (automatically handles permissions)
curl -X POST http://localhost:8000/cmd \
-H "Content-Type: application/json" \
-d '{"command": "set_wallpaper", "params": {"path": "/sdcard/image.jpg", "target": "home"}}'
# Launch app
curl -X POST http://localhost:8000/cmd \
-H "Content-Type: application/json" \
-d '{"command": "launch", "params": {"app": "com.android.settings"}}'
```
## Custom Wallpaper Solution
Android doesn't provide native ADB commands for setting wallpapers. We solved this by:
1. **Building a custom APK** (`wallpaper-manager`) that uses Android's WallpaperManager API
2. **Multi-stage Docker build** - APK is compiled during image build
3. **Auto-installation** - APK installs automatically on container startup
4. **Permission handling** - Files are copied to `/data/local/tmp` where all apps have read access
5. **Seamless integration** - `set_wallpaper()` API handles everything automatically

View File

@@ -0,0 +1,83 @@
# ============================================================================
# Development Dockerfile - builds from libs/ directory with local sources
# Build command: docker build -f android-docker/dev.Dockerfile -t android-cua:dev .
# ============================================================================
# ============================================================================
# Stage 1: Build wallpaper-manager APK
# ============================================================================
FROM eclipse-temurin:17-jdk AS builder
RUN apt-get update && apt-get install -y wget unzip && \
mkdir -p /opt/android-sdk/cmdline-tools && \
cd /opt/android-sdk/cmdline-tools && \
wget -q https://dl.google.com/android/repository/commandlinetools-linux-9477386_latest.zip && \
unzip commandlinetools-linux-9477386_latest.zip && \
rm commandlinetools-linux-9477386_latest.zip && \
mv cmdline-tools latest
ENV ANDROID_HOME=/opt/android-sdk
ENV PATH="${ANDROID_HOME}/cmdline-tools/latest/bin:${ANDROID_HOME}/platform-tools:${PATH}"
RUN yes | sdkmanager --licenses && \
sdkmanager "platforms;android-30" "build-tools;30.0.3"
COPY android-docker/wallpaper-manager /build/wallpaper-manager
WORKDIR /build/wallpaper-manager
RUN curl -fsSL -o gradle/wrapper/gradle-wrapper.jar \
https://raw.githubusercontent.com/gradle/gradle/v7.6.0/gradle/wrapper/gradle-wrapper.jar && \
chmod +x gradlew
RUN ./gradlew assembleDebug --no-daemon
# ============================================================================
# Stage 2: Runtime image with Android emulator & Computer server
# ============================================================================
FROM budtmo/docker-android:emulator_11.0
USER root
# Set environment variable to identify this as CUA Android container
ENV IS_CUA_ANDROID=true
# Copy wallpaper-manager APK from builder stage
COPY --from=builder /build/wallpaper-manager/app/build/outputs/apk/debug/app-debug.apk /opt/apks/wallpaper-manager.apk
RUN apt-get update && \
apt-get install -y \
python3 \
python3-pip \
python3-venv \
python3-dev \
python3-tk \
build-essential \
curl \
&& rm -rf /var/lib/apt/lists/*
RUN python3 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# Install computer-server from local source for development
COPY python/computer-server /tmp/computer-server
WORKDIR /tmp/computer-server
RUN /opt/venv/bin/pip install --no-cache-dir --upgrade pip && \
/opt/venv/bin/pip install --no-cache-dir -e .
COPY android-docker/entry.sh /usr/local/bin/entry.sh
RUN chmod +x /usr/local/bin/entry.sh
# Make venv accessible to androidusr
RUN chown -R 1300:1301 /opt/venv
EXPOSE 8000
HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
CMD adb devices | grep -q "emulator" && curl -f http://localhost:8000/status || exit 1
# Switch back to androidusr user (as per base image)
USER 1300:1301
WORKDIR /home/androidusr
ENTRYPOINT ["/usr/local/bin/entry.sh"]

View File

@@ -0,0 +1,56 @@
#!/usr/bin/env bash
set -Eeuo pipefail
info () { printf "%b%s%b" "\E[1;34m \E[1;36m" "${1:-}" "\E[0m\n"; }
error () { printf "%b%s%b" "\E[1;31m " "ERROR: ${1:-}" "\E[0m\n" >&2; }
warn () { printf "%b%s%b" "\E[1;31m " "Warning: ${1:-}" "\E[0m\n" >&2; }
# Start the original docker-android entrypoint in background
# This handles emulator startup, VNC, noVNC, etc.
info "Starting \"${EMULATOR_DEVICE}\" emulator..."
/home/androidusr/docker-android/mixins/scripts/run.sh &
# Wait for ADB device to appear and boot to complete
info "Waiting for emulator to be ready..."
counter=0
timeout=300
while [ $counter -lt $timeout ]; do
if adb devices 2>/dev/null | grep -q "emulator"; then
# Check if boot is complete
boot_completed=$(adb shell getprop sys.boot_completed 2>&1 | tr -d '\r\n' | grep -o "1" || echo "0")
if [ "$boot_completed" = "1" ]; then
info "✓ Emulator \"${EMULATOR_DEVICE}\" is ready!"
break
fi
fi
sleep 2
counter=$((counter + 2))
# Show progress every 10 seconds
if [ $((counter % 10)) -eq 0 ]; then
info " Still waiting... ($counter/$timeout seconds)"
fi
done
if [ $counter -ge $timeout ]; then
error "✗ Emulator \"${EMULATOR_DEVICE}\" failed to start within $timeout seconds"
exit 1
fi
sleep 5
if adb shell pm list packages | grep -q "com.example.cua.wallpaper"; then
info "✓ Wallpaper Manager already installed"
else
info "Installing wallpaper-manager.apk..."
adb install -r /opt/apks/wallpaper-manager.apk
if [ $? -eq 0 ]; then
info "✓ Wallpaper Manager installed successfully"
else
warn "✗ Failed to install Wallpaper Manager APK"
fi
fi
info "Starting Computer Server..."
source /opt/venv/bin/activate
DISPLAY= python -m computer_server --host 0.0.0.0 --port 8000 --log-level info

View File

@@ -0,0 +1,15 @@
# Gradle
.gradle/
build/
gradle/wrapper/gradle-wrapper.jar
# Android Studio
.idea/
*.iml
local.properties
# Build outputs
*.apk
*.aab
*.dex
*.class

View File

@@ -0,0 +1,33 @@
apply plugin: 'com.android.application'
android {
compileSdkVersion 30
defaultConfig {
applicationId "com.example.cua.wallpaper"
minSdkVersion 21
targetSdkVersion 30
versionCode 1
versionName "1.0"
}
buildTypes {
debug {
debuggable true
}
release {
minifyEnabled false
proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'),
'proguard-rules.pro'
}
}
compileOptions {
sourceCompatibility JavaVersion.VERSION_1_8
targetCompatibility JavaVersion.VERSION_1_8
}
}
dependencies {
// No special dependencies needed
}

View File

@@ -0,0 +1,33 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="com.example.cua.wallpaper">
<!-- Allows setting wallpapers -->
<uses-permission android:name="android.permission.SET_WALLPAPER" />
<!-- Needed for reading from /sdcard/* -->
<uses-permission
android:name="android.permission.READ_EXTERNAL_STORAGE"
android:maxSdkVersion="32" />
<application
android:allowBackup="false"
android:label="CUA Wallpaper Manager"
android:supportsRtl="true"
android:theme="@android:style/Theme.Translucent.NoTitleBar">
<!-- No launcher icon; this is a helper-only activity -->
<activity
android:name=".SetWallpaperActivity"
android:exported="true"
android:theme="@android:style/Theme.Translucent.NoTitleBar">
<intent-filter>
<!-- Custom action to call from ADB / Python -->
<action android:name="com.example.cua.wallpaper.SET_WALLPAPER" />
<category android:name="android.intent.category.DEFAULT" />
</intent-filter>
</activity>
</application>
</manifest>

View File

@@ -0,0 +1,80 @@
package com.example.cua.wallpaper;
import android.app.Activity;
import android.app.WallpaperManager;
import android.graphics.Bitmap;
import android.graphics.BitmapFactory;
import android.os.Build;
import android.os.Bundle;
import android.util.Log;
import java.io.File;
public class SetWallpaperActivity extends Activity {
private static final String TAG = "CuaWallpaperManager";
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
// Expected extras:
// "path" -> absolute path to image on device (e.g. /sdcard/Pictures/wall.jpg)
// "target" -> optional: "home", "lock", or "both" (default: "home")
String path = getIntent().getStringExtra("path");
String target = getIntent().getStringExtra("target");
if (target == null) {
target = "home";
}
if (path == null || path.trim().isEmpty()) {
Log.e(TAG, "No path provided");
finish();
return;
}
try {
File file = new File(path);
if (!file.exists()) {
Log.e(TAG, "File does not exist: " + path);
finish();
return;
}
Bitmap bitmap = BitmapFactory.decodeFile(file.getAbsolutePath());
if (bitmap == null) {
Log.e(TAG, "Failed to decode image at: " + path);
finish();
return;
}
WallpaperManager wm = WallpaperManager.getInstance(this);
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.N) {
int which;
switch (target.toLowerCase()) {
case "lock":
which = WallpaperManager.FLAG_LOCK;
break;
case "both":
which = WallpaperManager.FLAG_SYSTEM | WallpaperManager.FLAG_LOCK;
break;
case "home":
default:
which = WallpaperManager.FLAG_SYSTEM;
break;
}
wm.setBitmap(bitmap, null, true, which);
} else {
// Pre-N, no flags API; this sets the home screen wallpaper
wm.setBitmap(bitmap);
}
Log.i(TAG, "Wallpaper set successfully from: " + path + " target=" + target);
} catch (Exception e) {
Log.e(TAG, "Error setting wallpaper", e);
} finally {
finish();
}
}
}

View File

@@ -0,0 +1,20 @@
buildscript {
repositories {
google()
mavenCentral()
}
dependencies {
classpath "com.android.tools.build:gradle:7.4.2"
}
}
allprojects {
repositories {
google()
mavenCentral()
}
}
task clean(type: Delete) {
delete rootProject.buildDir
}

View File

@@ -0,0 +1,6 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-7.6-bin.zip
networkTimeout=10000
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists

View File

@@ -0,0 +1,75 @@
#!/bin/sh
##############################################################################
# Gradle start up script for UN*X
##############################################################################
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD=maximum
warn () {
echo "$*"
} >&2
die () {
echo
echo "$*"
echo
exit 1
} >&2
# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "$( uname -s )" in
CYGWIN* ) cygwin=true ;;
Darwin* ) darwin=true ;;
MSYS* | MINGW* ) msys=true ;;
NONSTOP* ) nonstop=true ;;
esac
# Determine the script directory
DIRNAME=$(cd "$(dirname "$0")" && pwd)
APP_HOME=$DIRNAME
APP_BASE_NAME=$(basename "$0")
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD=$JAVA_HOME/jre/sh/java
else
JAVACMD=$JAVA_HOME/bin/java
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD=java
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
# Escape application args
save () {
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
echo " "
}
APP_ARGS=$(save "$@")
# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
exec "$JAVACMD" "$@"

View File

@@ -0,0 +1,2 @@
rootProject.name = "CuaWallpaperManager"
include ':app'

View File

@@ -0,0 +1,744 @@
import asyncio
import base64
from typing import Any, Dict, List, Optional, Tuple
from ..utils.helpers import CommandExecutor
from .base import (
BaseAccessibilityHandler,
BaseAutomationHandler,
BaseDesktopHandler,
BaseFileHandler,
BaseWindowHandler,
)
# Map common key names to Android keycodes
ANDROID_KEY_MAP = {
"return": "66",
"enter": "66",
"backspace": "67",
"delete": "67",
"tab": "61",
"escape": "111",
"esc": "111",
"home": "3",
"back": "4",
"space": "62",
"up": "19",
"down": "20",
"left": "21",
"right": "22",
}
adb_exec = CommandExecutor("adb", "-s", "emulator-5554")
class AndroidAccessibilityHandler(BaseAccessibilityHandler):
"""Android accessibility handler using UI Automator."""
async def get_accessibility_tree(self) -> Dict[str, Any]:
"""Get the accessibility tree using uiautomator dump."""
raise NotImplementedError("get_accessibility_tree not yet implemented for Android")
async def find_element(
self, role: Optional[str] = None, title: Optional[str] = None, value: Optional[str] = None
) -> Dict[str, Any]:
"""Find an element in the UI hierarchy."""
raise NotImplementedError("find_element not yet implemented for Android")
class AndroidAutomationHandler(BaseAutomationHandler):
"""Android automation handler using ADB input commands."""
# Mouse Actions
async def mouse_down(
self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left"
) -> Dict[str, Any]:
"""Simulate mouse down (touch down) at position.
Note: Android doesn't support separate touch down/up via ADB.
This is a simulated implementation."""
if x is None or y is None:
raise ValueError("x and y coordinates are required for mouse_down on Android")
# Android doesn't support separate touch down/up through ADB
# We simulate by doing a very short tap
success, output = await adb_exec.run(
"shell",
"input",
"swipe",
str(x),
str(y),
str(x),
str(y),
"100",
decode=True,
)
if success:
return {}
else:
raise RuntimeError(f"Mouse down failed: {output}")
async def mouse_up(
self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left"
) -> Dict[str, Any]:
"""Simulate mouse up (touch up) at position.
Note: Android doesn't support separate touch down/up via ADB.
This is a simulated implementation."""
# Android doesn't support separate touch down/up through ADB
# This is essentially a no-op as mouse_down already completes the touch
return {}
async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> Dict[str, Any]:
"""Perform a tap at the specified position."""
if x is None or y is None:
raise ValueError("x and y coordinates are required for left_click on Android")
success, output = await adb_exec.run("shell", "input", "tap", str(x), str(y), decode=True)
if success:
return {}
else:
raise RuntimeError(f"Tap failed: {output}")
async def right_click(self, x: Optional[int] = None, y: Optional[int] = None) -> Dict[str, Any]:
"""Simulate right click (long press) at position."""
if x is None or y is None:
raise ValueError("x and y coordinates are required for right_click on Android")
# Long press: swipe with long duration simulates touch and hold
success, output = await adb_exec.run(
"shell",
"input",
"swipe",
str(x),
str(y),
str(x),
str(y),
"1000",
decode=True,
)
if success:
return {}
else:
raise RuntimeError(f"Long press failed: {output}")
async def double_click(
self, x: Optional[int] = None, y: Optional[int] = None
) -> Dict[str, Any]:
"""Perform a double tap at the specified position."""
if x is None or y is None:
raise ValueError("x and y coordinates are required for double_click on Android")
# Perform two taps in quick succession
for _ in range(2):
success, output = await adb_exec.run(
"shell", "input", "tap", str(x), str(y), decode=True
)
if not success:
raise RuntimeError(f"Double tap failed: {output}")
await asyncio.sleep(0.1) # Short delay between taps
return {}
async def move_cursor(self, x: int, y: int) -> Dict[str, Any]:
"""Move cursor - not supported on touch devices."""
raise NotImplementedError("move_cursor not supported on Android (touch-based interface)")
async def drag_to(
self, x: int, y: int, button: str = "left", duration: float = 0.5
) -> Dict[str, Any]:
"""Drag from current position to target coordinates.
Note: Android doesn't track cursor position. This requires the last tap position."""
# Since Android doesn't track cursor position, we can't implement drag_to properly
# without knowing the start position. Use drag() with explicit path instead.
raise NotImplementedError(
"drag_to not well supported on Android (no cursor tracking). "
"Use drag(path) with explicit start and end coordinates instead."
)
async def drag(
self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5
) -> Dict[str, Any]:
"""Drag along a path of coordinates."""
if len(path) < 2:
raise ValueError("Path must contain at least 2 coordinates for drag")
# Use first and last points for swipe gesture
start_x, start_y = path[0]
end_x, end_y = path[-1]
# Convert duration to milliseconds
duration_ms = int(duration * 1000)
success, output = await adb_exec.run(
"shell",
"input",
"swipe",
str(start_x),
str(start_y),
str(end_x),
str(end_y),
str(duration_ms),
)
if success:
return {}
else:
raise RuntimeError(f"Drag failed: {output}")
# Keyboard Actions
async def key_down(self, key: str) -> Dict[str, Any]:
"""Press and hold key - limited support on Android.
Note: Android doesn't support separate key down/up via ADB."""
# Android doesn't support key hold through ADB input
# We simulate by sending the keyevent once
return await self.press_key(key)
async def key_up(self, key: str) -> Dict[str, Any]:
"""Release key - limited support on Android.
Note: Android doesn't support separate key down/up via ADB."""
# Android doesn't support separate key up through ADB
# This is essentially a no-op
return {}
async def type_text(self, text: str) -> Dict[str, Any]:
"""Type text using Android input method."""
# Escape special characters for ADB shell
# Replace spaces with %s (Android's escape for space)
escaped_text = text.replace(" ", "%s").replace("'", "\\'").replace('"', '\\"')
success, output = await adb_exec.run("shell", "input", "text", escaped_text, decode=True)
if success:
return {}
else:
raise RuntimeError(f"Type text failed: {output}")
async def press_key(self, key: str) -> Dict[str, Any]:
"""Press a key using keyevent."""
keycode = ANDROID_KEY_MAP.get(key.lower(), key)
success, output = await adb_exec.run("shell", "input", "keyevent", keycode, decode=True)
if success:
return {}
else:
raise RuntimeError(f"Press key failed: {output}")
async def hotkey(self, keys: List[str]) -> Dict[str, Any]:
"""Press key combination - sends keys in sequence on Android."""
# Android doesn't support simultaneous key presses via ADB
# We send keys sequentially
for key in keys:
await self.press_key(key)
await asyncio.sleep(0.05) # Small delay between keys
return {}
# Scrolling Actions
async def scroll(self, x: int, y: int) -> Dict[str, Any]:
"""Scroll by x and y amounts."""
# Get screen size to calculate swipe positions
screen_size = await self.get_screen_size()
width, height = screen_size["width"], screen_size["height"]
# Use center of screen as starting point
center_x, center_y = width // 2, height // 2
# Calculate end points (negative y means scroll down, positive means scroll up)
end_x = center_x + x
end_y = center_y - y # Inverted because swipe up scrolls content down
success, output = await adb_exec.run(
"shell",
"input",
"swipe",
str(center_x),
str(center_y),
str(end_x),
str(end_y),
"300",
decode=True,
)
if success:
return {}
else:
raise RuntimeError(f"Scroll failed: {output}")
async def scroll_down(self, clicks: int = 1) -> Dict[str, Any]:
"""Scroll down by specified number of clicks."""
# Get screen size
screen_size = await self.get_screen_size()
width, height = screen_size["width"], screen_size["height"]
# Swipe up to scroll content down
center_x = width // 2
start_y = int(height * 0.7)
end_y = int(height * 0.3)
for _ in range(clicks):
success, output = await adb_exec.run(
"shell",
"input",
"swipe",
str(center_x),
str(start_y),
str(center_x),
str(end_y),
"300",
decode=True,
)
if not success:
raise RuntimeError(f"Scroll down failed: {output}")
await asyncio.sleep(0.1) # Small delay between scrolls
return {}
async def scroll_up(self, clicks: int = 1) -> Dict[str, Any]:
"""Scroll up by specified number of clicks."""
# Get screen size
screen_size = await self.get_screen_size()
width, height = screen_size["width"], screen_size["height"]
# Swipe down to scroll content up
center_x = width // 2
start_y = int(height * 0.3)
end_y = int(height * 0.7)
for _ in range(clicks):
success, output = await adb_exec.run(
"shell",
"input",
"swipe",
str(center_x),
str(start_y),
str(center_x),
str(end_y),
"300",
decode=True,
)
if not success:
raise RuntimeError(f"Scroll up failed: {output}")
await asyncio.sleep(0.1) # Small delay between scrolls
return {}
# Screen Actions
async def screenshot(self) -> Dict[str, Any]:
"""Take a screenshot and return base64 encoded image."""
success, output = await adb_exec.run("shell", "screencap", "-p")
if success and output:
image_b64 = base64.b64encode(output).decode("utf-8")
return {"image_data": image_b64}
else:
raise RuntimeError(f"Screenshot failed: {output.decode('utf-8')}")
async def get_screen_size(self) -> Dict[str, Any]:
"""Get the screen size of the Android device."""
success, output = await adb_exec.run("shell", "wm", "size", decode=True)
if success and "x" in output:
# Parse "Physical size: 1080x1920"
size_str = output.split(":")[-1].strip()
width, height = map(int, size_str.split("x"))
return {"width": width, "height": height}
else:
raise RuntimeError(f"Failed to get screen size: {output}")
async def get_cursor_position(self) -> Dict[str, Any]:
"""Get cursor position - not supported on touch devices."""
raise NotImplementedError(
"get_cursor_position not supported on Android (touch-based interface)"
)
# Clipboard Actions
async def copy_to_clipboard(self) -> Dict[str, Any]:
"""Get clipboard content."""
# Android 10+ supports clipboard via cmd
success, output = await adb_exec.run("shell", "cmd", "clipboard", "get-text", decode=True)
if success:
return {"text": output.strip()}
else:
raise RuntimeError(f"Failed to get clipboard: {output}")
async def set_clipboard(self, text: str) -> Dict[str, Any]:
"""Set clipboard content."""
# Android 10+ supports clipboard via cmd
success, output = await adb_exec.run(
"shell", "cmd", "clipboard", "set-text", text, decode=True
)
if success:
return {}
else:
raise RuntimeError(f"Failed to set clipboard: {output}")
# Other
async def run_command(self, command: str) -> Dict[str, Any]:
"""Run a shell command on Android device."""
success, output = await adb_exec.run("shell", command, decode=True)
return {"output": output, "success": success}
class AndroidFileHandler(BaseFileHandler):
"""Android file handler using ADB shell commands."""
async def file_exists(self, path: str) -> Dict[str, Any]:
"""Check if a file exists."""
success, output = await adb_exec.run(
"shell", f"test -f '{path}' && echo 'yes' || echo 'no'", decode=True
)
exists = success and output.strip() == "yes"
return {"exists": exists}
async def directory_exists(self, path: str) -> Dict[str, Any]:
"""Check if a directory exists."""
success, output = await adb_exec.run(
"shell", f"test -d '{path}' && echo 'yes' || echo 'no'", decode=True
)
exists = success and output.strip() == "yes"
return {"exists": exists}
async def list_dir(self, path: str) -> Dict[str, Any]:
"""List directory contents."""
success, output = await adb_exec.run("shell", "ls", "-la", path, decode=True)
if success:
# Parse ls -la output
lines = output.strip().split("\n")
entries = []
for line in lines[1:]: # Skip "total" line
if line:
parts = line.split()
if len(parts) >= 9:
name = " ".join(parts[8:])
if name not in [".", ".."]:
entries.append(
{
"name": name,
"is_dir": parts[0].startswith("d"),
"size": int(parts[4]) if parts[4].isdigit() else 0,
}
)
return {"entries": entries}
else:
raise RuntimeError(f"Failed to list directory: {output}")
async def read_text(self, path: str) -> Dict[str, Any]:
"""Read text file contents."""
success, output = await adb_exec.run("shell", "cat", path, decode=True)
if success:
return {"content": output}
else:
raise RuntimeError(f"Failed to read file: {output}")
async def write_text(self, path: str, content: str) -> Dict[str, Any]:
"""Write text to file."""
# Escape single quotes in content
escaped_content = content.replace("'", "'\"'\"'")
success, output = await adb_exec.run(
"shell", f"printf '%s' '{escaped_content}' > '{path}'", decode=True
)
if success:
return {}
else:
raise RuntimeError(f"Failed to write file: {output}")
async def write_bytes(self, path: str, content_b64: str) -> Dict[str, Any]:
"""Write binary content to file."""
# Decode base64 and write to temp file, then push to device
import os
import tempfile
content_bytes = base64.b64decode(content_b64)
# Create temp file
with tempfile.NamedTemporaryFile(delete=False) as tmp:
tmp.write(content_bytes)
tmp_path = tmp.name
try:
# Push file to device
success, output = await adb_exec.run("push", tmp_path, path, decode=True)
if success:
return {}
else:
raise RuntimeError(f"Failed to write bytes: {output}")
finally:
os.unlink(tmp_path)
async def delete_file(self, path: str) -> Dict[str, Any]:
"""Delete a file."""
success, output = await adb_exec.run("shell", "rm", "-f", path, decode=True)
if success:
return {}
else:
raise RuntimeError(f"Failed to delete file: {output}")
async def create_dir(self, path: str) -> Dict[str, Any]:
"""Create a directory."""
success, output = await adb_exec.run("shell", "mkdir", "-p", path, decode=True)
if success:
return {}
else:
raise RuntimeError(f"Failed to create directory: {output}")
async def delete_dir(self, path: str) -> Dict[str, Any]:
"""Delete a directory."""
success, output = await adb_exec.run("shell", "rm", "-rf", path, decode=True)
if success:
return {}
else:
raise RuntimeError(f"Failed to delete directory: {output}")
async def read_bytes(
self, path: str, offset: int = 0, length: Optional[int] = None
) -> Dict[str, Any]:
"""Read binary file contents."""
# Pull file from device and read bytes
import os
import tempfile
with tempfile.NamedTemporaryFile(delete=False) as tmp:
tmp_path = tmp.name
try:
# Pull file from device
success, output = await adb_exec.run("pull", path, tmp_path, decode=True)
if not success:
raise RuntimeError(f"Failed to pull file: {output}")
# Read bytes from temp file
with open(tmp_path, "rb") as f:
f.seek(offset)
if length is not None:
content_bytes = f.read(length)
else:
content_bytes = f.read()
content_b64 = base64.b64encode(content_bytes).decode("utf-8")
return {"content": content_b64}
finally:
if os.path.exists(tmp_path):
os.unlink(tmp_path)
async def get_file_size(self, path: str) -> Dict[str, Any]:
"""Get file size in bytes."""
success, output = await adb_exec.run("shell", f"wc -c < '{path}'", decode=True)
if success:
try:
size = int(output.strip())
return {"size": size}
except ValueError:
raise RuntimeError(f"Failed to parse file size: {output}")
else:
raise RuntimeError(f"Failed to get file size: {output}")
class AndroidWindowHandler(BaseWindowHandler):
"""Android window/app handler using activity manager."""
async def open(self, target: str) -> Dict[str, Any]:
"""Open a URL or file with default app."""
# Use ACTION_VIEW intent to open URL or file
success, output = await adb_exec.run(
"shell", "am", "start", "-a", "android.intent.action.VIEW", "-d", target, decode=True
)
if success:
return {}
else:
raise RuntimeError(f"Failed to open target: {output}")
async def launch(self, app: str, args: Optional[List[str]] = None) -> Dict[str, Any]:
"""Launch an Android app by package name or activity."""
# If app contains '/', it's package/activity, otherwise just package
if "/" in app:
cmd = ["shell", "am", "start", "-n", app]
else:
# Launch main activity for package
cmd = ["shell", "monkey", "-p", app, "-c", "android.intent.category.LAUNCHER", "1"]
if args:
# Add extras if provided
for arg in args:
if "=" in arg:
key, value = arg.split("=", 1)
cmd.extend(["--es", key, value])
success, output = await adb_exec.run(*cmd, decode=True)
if success:
return {}
else:
raise RuntimeError(f"Failed to launch app: {output}")
async def get_current_window_id(self) -> Dict[str, Any]:
"""Get the currently focused activity."""
import logging
logger = logging.getLogger(__name__)
success, output = await adb_exec.run("shell", "dumpsys", "window", decode=True)
if success:
# Parse mCurrentFocus line
for line in output.split("\n"):
if "mCurrentFocus" in line:
logger.info(f"Found mCurrentFocus line: {line}")
# Example: mCurrentFocus=Window{abc123 u0 com.android.launcher3/com.android.launcher3.Launcher}
import re
match = re.search(r"([a-zA-Z0-9._]+/[a-zA-Z0-9._$]+)\}", line)
if match:
window_id = match.group(1)
logger.info(f"Extracted window_id: {window_id}")
return {"window_id": window_id}
else:
logger.warning(f"Regex did not match line: {line}")
logger.warning("No mCurrentFocus line found in dumpsys output")
return {"window_id": "unknown"}
else:
raise RuntimeError(f"Failed to get current window: {output}")
async def get_application_windows(self, app: str) -> Dict[str, Any]:
"""Get activities for an app."""
# List all activities in the package
success, output = await adb_exec.run("shell", "dumpsys", "package", app, decode=True)
if success:
activities = []
in_activity_section = False
for line in output.split("\n"):
if "Activity Resolver Table:" in line:
in_activity_section = True
elif in_activity_section and app in line:
import re
match = re.search(r"([a-z0-9.]+/[a-z0-9.]+)", line)
if match:
activities.append(match.group(1))
return {"windows": activities}
else:
raise RuntimeError(f"Failed to get application windows: {output}")
async def get_window_name(self, window_id: str) -> Dict[str, Any]:
"""Get the name of an activity."""
# window_id is in format package/activity
if "/" in str(window_id):
activity_name = str(window_id).split("/")[-1]
return {"name": activity_name}
else:
return {"name": str(window_id)}
async def get_window_size(self, window_id: str | int) -> Dict[str, Any]:
"""Get window size (returns screen size on Android)."""
# Android apps are typically fullscreen, return screen size
success, output = await adb_exec.run("shell", "wm", "size", decode=True)
if success and "x" in output:
size_str = output.split(":")[-1].strip()
width, height = map(int, size_str.split("x"))
return {"width": width, "height": height}
else:
raise RuntimeError(f"Failed to get window size: {output}")
async def activate_window(self, window_id: str | int) -> Dict[str, Any]:
"""Bring an app to foreground."""
# window_id should be package/activity format
window_str = str(window_id)
success, output = await adb_exec.run("shell", "am", "start", "-n", window_str, decode=True)
if success:
return {}
else:
raise RuntimeError(f"Failed to activate window: {output}")
async def close_window(self, window_id: str | int) -> Dict[str, Any]:
"""Force stop an app."""
# Extract package name from window_id (package/activity format)
window_str = str(window_id)
package = window_str.split("/")[0] if "/" in window_str else window_str
success, output = await adb_exec.run("shell", "am", "force-stop", package, decode=True)
if success:
return {}
else:
raise RuntimeError(f"Failed to close window: {output}")
async def get_window_position(self, window_id: str | int) -> Dict[str, Any]:
"""Get window position - not supported on Android."""
raise NotImplementedError(
"get_window_position not supported on Android (no windowing system)"
)
async def set_window_size(
self, window_id: str | int, width: int, height: int
) -> Dict[str, Any]:
"""Set window size - not supported on Android."""
raise NotImplementedError("set_window_size not supported on Android (apps are fullscreen)")
async def set_window_position(self, window_id: str | int, x: int, y: int) -> Dict[str, Any]:
"""Set window position - not supported on Android."""
raise NotImplementedError(
"set_window_position not supported on Android (no windowing system)"
)
async def maximize_window(self, window_id: str | int) -> Dict[str, Any]:
"""Maximize window - not supported on Android."""
raise NotImplementedError(
"maximize_window not supported on Android (apps always fullscreen)"
)
async def minimize_window(self, window_id: str | int) -> Dict[str, Any]:
"""Minimize window (send to background)."""
# Press HOME key to minimize current app
success, output = await adb_exec.run("shell", "input", "keyevent", "3", decode=True)
if success:
return {}
else:
raise RuntimeError(f"Failed to minimize window: {output}")
class AndroidDesktopHandler(BaseDesktopHandler):
"""Android desktop handler - minimal implementation."""
async def get_desktop_environment(self) -> Dict[str, Any]:
"""Get desktop environment name."""
return {"desktop_environment": "android"}
async def set_wallpaper(self, path: str):
"""
Set the wallpaper using our custom helper APK.
Args:
path: Absolute path to image on device (e.g. /sdcard/Pictures/wall.jpg)
"""
# Copy file to /data/local/tmp where all apps can read it
# (/sdcard uses FUSE with restrictive permissions that chmod can't change)
import os
temp_path = f"/data/local/tmp/wallpaper_{os.path.basename(path)}"
# Copy to temp location with world-readable permissions
copy_success, _ = await adb_exec.run("shell", "cp", path, temp_path, decode=True)
if not copy_success:
raise RuntimeError(f"Failed to copy file to temp location: {path}")
# Make temp file readable
await adb_exec.run("shell", "chmod", "644", temp_path, decode=True)
package = "com.example.cua.wallpaper"
component = f"{package}/.SetWallpaperActivity"
success, output = await adb_exec.run(
"shell",
"am",
"start",
"-n",
component,
"-a",
"com.example.cua.wallpaper.SET_WALLPAPER",
"-e",
"path",
temp_path,
"-e",
"target",
"home",
decode=True,
)
if success:
# Give it a moment to set the wallpaper
await asyncio.sleep(1)
# Clean up temp file
await adb_exec.run("shell", "rm", temp_path, decode=True)
return {}
# Clean up on failure too
await adb_exec.run("shell", "rm", temp_path, decode=True)
raise RuntimeError(f"Failed to set wallpaper: {output}")

View File

@@ -1,9 +1,8 @@
import platform
import subprocess
from typing import Tuple, Type
from typing import Tuple
from computer_server.diorama.base import BaseDioramaHandler
from ..utils.helpers import get_current_os
from .base import (
BaseAccessibilityHandler,
BaseAutomationHandler,
@@ -12,15 +11,23 @@ from .base import (
BaseWindowHandler,
)
# Conditionally import platform-specific handlers
system = platform.system().lower()
if system == "darwin":
OS_TYPE = get_current_os()
if OS_TYPE == "android":
from .android import (
AndroidAccessibilityHandler,
AndroidAutomationHandler,
AndroidDesktopHandler,
AndroidFileHandler,
AndroidWindowHandler,
)
elif OS_TYPE == "darwin":
from computer_server.diorama.macos import MacOSDioramaHandler
from .macos import MacOSAccessibilityHandler, MacOSAutomationHandler
elif system == "linux":
elif OS_TYPE == "linux":
from .linux import LinuxAccessibilityHandler, LinuxAutomationHandler
elif system == "windows":
elif OS_TYPE == "windows":
from .windows import WindowsAccessibilityHandler, WindowsAutomationHandler
from .generic import GenericDesktopHandler, GenericFileHandler, GenericWindowHandler
@@ -29,31 +36,6 @@ from .generic import GenericDesktopHandler, GenericFileHandler, GenericWindowHan
class HandlerFactory:
"""Factory for creating OS-specific handlers."""
@staticmethod
def _get_current_os() -> str:
"""Determine the current OS.
Returns:
str: The OS type ('darwin' for macOS, 'linux' for Linux, or 'windows' for Windows)
Raises:
RuntimeError: If unable to determine the current OS
"""
try:
# Use platform.system() as primary method
system = platform.system().lower()
if system in ["darwin", "linux", "windows"]:
return system
# Fallback to uname if platform.system() doesn't return expected values (Unix-like systems only)
result = subprocess.run(["uname", "-s"], capture_output=True, text=True)
if result.returncode == 0:
return result.stdout.strip().lower()
raise RuntimeError(f"Unsupported OS: {system}")
except Exception as e:
raise RuntimeError(f"Failed to determine current OS: {str(e)}")
@staticmethod
def create_handlers() -> Tuple[
BaseAccessibilityHandler,
@@ -73,9 +55,16 @@ class HandlerFactory:
NotImplementedError: If the current OS is not supported
RuntimeError: If unable to determine the current OS
"""
os_type = HandlerFactory._get_current_os()
if os_type == "darwin":
if OS_TYPE == "android":
return (
AndroidAccessibilityHandler(),
AndroidAutomationHandler(),
BaseDioramaHandler(),
AndroidFileHandler(),
AndroidDesktopHandler(),
AndroidWindowHandler(),
)
elif OS_TYPE == "darwin":
return (
MacOSAccessibilityHandler(),
MacOSAutomationHandler(),
@@ -84,7 +73,7 @@ class HandlerFactory:
GenericDesktopHandler(),
GenericWindowHandler(),
)
elif os_type == "linux":
elif OS_TYPE == "linux":
return (
LinuxAccessibilityHandler(),
LinuxAutomationHandler(),
@@ -93,7 +82,7 @@ class HandlerFactory:
GenericDesktopHandler(),
GenericWindowHandler(),
)
elif os_type == "windows":
elif OS_TYPE == "windows":
return (
WindowsAccessibilityHandler(),
WindowsAutomationHandler(),
@@ -103,4 +92,4 @@ class HandlerFactory:
GenericWindowHandler(),
)
else:
raise NotImplementedError(f"OS '{os_type}' is not supported")
raise NotImplementedError(f"OS '{OS_TYPE}' is not supported")

View File

@@ -279,28 +279,6 @@ class LinuxAutomationHandler(BaseAutomationHandler):
return {"success": False, "error": str(e)}
async def drag(
self, start_x: int, start_y: int, end_x: int, end_y: int, button: str = "left"
) -> Dict[str, Any]:
"""Drag from start coordinates to end coordinates.
Args:
start_x: The starting x coordinate.
start_y: The starting y coordinate.
end_x: The ending x coordinate.
end_y: The ending y coordinate.
button: The mouse button to use for dragging.
Returns:
Dict[str, Any]: A dictionary with success status and error message if failed.
"""
try:
pyautogui.moveTo(start_x, start_y)
pyautogui.dragTo(end_x, end_y, duration=0.5, button=button)
return {"success": True}
except Exception as e:
return {"success": False, "error": str(e)}
async def drag_path(
self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5
) -> Dict[str, Any]:
"""Drag along a path defined by a list of coordinates.

View File

@@ -25,7 +25,7 @@ from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, StreamingResponse
from .browser import get_browser_manager
from .handlers.factory import HandlerFactory
from .handlers.factory import OS_TYPE, HandlerFactory
# Authentication session TTL (in seconds). Override via env var CUA_AUTH_TTL_SECONDS. Default: 60s
AUTH_SESSION_TTL_SECONDS: int = int(os.environ.get("CUA_AUTH_TTL_SECONDS", "60"))
@@ -262,19 +262,11 @@ auth_manager = AuthenticationManager()
@app.get("/status")
async def status():
sys = platform.system().lower()
# get os type
if "darwin" in sys or sys == "macos" or sys == "mac":
os_type = "macos"
elif "windows" in sys:
os_type = "windows"
else:
os_type = "linux"
# get computer-server features
features = []
if HAS_AGENT:
features.append("agent")
return {"status": "ok", "os_type": os_type, "features": features}
return {"status": "ok", "os_type": OS_TYPE, "features": features}
@app.websocket("/ws", name="websocket_endpoint")

View File

@@ -1,3 +1,3 @@
from . import wallpaper
from . import helpers, wallpaper
__all__ = ["wallpaper"]
__all__ = ["helpers", "wallpaper"]

View File

@@ -0,0 +1,81 @@
import asyncio
import os
import platform
import subprocess
from typing import overload
def get_current_os() -> str:
"""Determine the current OS.
Returns:
str: The OS type ('android', 'darwin' for macOS, 'linux' for Linux, or 'windows' for Windows)
Raises:
RuntimeError: If unable to determine the current OS
"""
try:
if os.environ.get("IS_CUA_ANDROID") == "true":
# Verify emulator is actually running by checking adb devices
try:
result = subprocess.run(
["adb", "devices"], capture_output=True, text=True, timeout=5
)
if result.returncode == 0 and "emulator-5554" in result.stdout:
return "android"
else:
raise RuntimeError(
"IS_CUA_ANDROID is set but no emulator found. "
"Ensure Android emulator is running and accessible via adb."
)
except subprocess.TimeoutExpired:
raise RuntimeError(
"IS_CUA_ANDROID is set but adb command timed out. "
"Emulator may be starting up or unresponsive."
)
system = platform.system().lower()
if system in ["darwin", "linux", "windows"]:
return system
# Fallback to uname if platform.system() doesn't return expected values (Unix-like systems only)
result = subprocess.run(["uname", "-s"], capture_output=True, text=True)
if result.returncode == 0:
return result.stdout.strip().lower()
raise RuntimeError(f"Unsupported OS: {system}")
except Exception as e:
raise RuntimeError(f"Failed to determine current OS: {str(e)}")
class CommandExecutor:
def __init__(self, *base_cmd: str) -> None:
"""Initialize with a base command.
Args:
base_cmd: The base command and its initial arguments.
"""
self.__base_cmd = list(base_cmd)
@overload
async def run(self, *args: str, timeout: int = 10) -> tuple[bool, bytes]: ...
@overload
async def run(self, *args: str, decode: bool = True, timeout: int = 10) -> tuple[bool, str]: ...
async def run(
self, *args: str, decode: bool = False, timeout: int = 10
) -> tuple[bool, bytes | str]:
cmd = self.__base_cmd + list(args)
try:
result = await asyncio.create_subprocess_exec(
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await asyncio.wait_for(result.communicate(), timeout=timeout)
output = stdout or stderr
if decode:
output = output.decode("utf-8")
return result.returncode == 0, output
except asyncio.TimeoutError:
return False, f"Command timed out after {timeout}s".encode("utf-8")
except Exception as e:
return False, str(e).encode("utf-8")