diff --git a/libs/docker-xfce/.dockerignore b/libs/docker-xfce/.dockerignore new file mode 100644 index 00000000..d4352f88 --- /dev/null +++ b/libs/docker-xfce/.dockerignore @@ -0,0 +1,5 @@ +README.md +.git +.gitignore +*.md +LICENSE diff --git a/libs/docker-xfce/.gitignore b/libs/docker-xfce/.gitignore new file mode 100644 index 00000000..0a2449a1 --- /dev/null +++ b/libs/docker-xfce/.gitignore @@ -0,0 +1,4 @@ +storage/ +shared/ +*.log +.DS_Store diff --git a/libs/docker-xfce/Dockerfile b/libs/docker-xfce/Dockerfile new file mode 100644 index 00000000..f6411919 --- /dev/null +++ b/libs/docker-xfce/Dockerfile @@ -0,0 +1,131 @@ +# CUA Docker XFCE Container +# Vanilla XFCE desktop with noVNC and computer-server + +FROM ubuntu:22.04 + +# Avoid prompts from apt +ENV DEBIAN_FRONTEND=noninteractive + +# Set environment variables +ENV HOME=/home/cua +ENV DISPLAY=:1 +ENV VNC_PORT=5901 +ENV NOVNC_PORT=6901 +ENV API_PORT=8000 +ENV VNC_RESOLUTION=1024x768 +ENV VNC_COL_DEPTH=24 + +# Install system dependencies first (including sudo) +RUN apt-get update && apt-get install -y \ + # System utilities + sudo \ + # Desktop environment + xfce4 \ + xfce4-terminal \ + xfce4-goodies \ + dbus-x11 \ + # VNC server + tigervnc-standalone-server \ + tigervnc-common \ + # noVNC dependencies + python3 \ + python3-pip \ + python3-numpy \ + git \ + net-tools \ + netcat \ + supervisor \ + # Computer-server dependencies + python3-tk \ + python3-dev \ + gnome-screenshot \ + wmctrl \ + ffmpeg \ + socat \ + xclip \ + # Browser + wget \ + software-properties-common \ + # Build tools + build-essential \ + libncursesw5-dev \ + libssl-dev \ + libsqlite3-dev \ + tk-dev \ + libgdbm-dev \ + libc6-dev \ + libbz2-dev \ + libffi-dev \ + zlib1g-dev \ + && rm -rf /var/lib/apt/lists/* + +# Remove power manager to avoid popup in container +RUN apt-get remove -y xfce4-power-manager xfce4-power-manager-data || true + +# Create user after sudo is installed +RUN useradd -m -s /bin/bash -G sudo cua && \ + echo "cua:password" | chpasswd && \ + echo "cua ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers + +# Install Firefox from Mozilla PPA (snap-free) - inline to avoid script issues +RUN apt-get update && \ + add-apt-repository -y ppa:mozillateam/ppa && \ + echo 'Package: *\nPin: release o=LP-PPA-mozillateam\nPin-Priority: 1001' > /etc/apt/preferences.d/mozilla-firefox && \ + apt-get update && \ + apt-get install -y firefox && \ + echo 'pref("datareporting.policy.firstRunURL", "");\npref("datareporting.policy.dataSubmissionEnabled", false);\npref("datareporting.healthreport.service.enabled", false);\npref("datareporting.healthreport.uploadEnabled", false);\npref("trailhead.firstrun.branches", "nofirstrun-empty");\npref("browser.aboutwelcome.enabled", false);' > /usr/lib/firefox/browser/defaults/preferences/firefox.js && \ + update-alternatives --install /usr/bin/x-www-browser x-www-browser /usr/bin/firefox 100 && \ + update-alternatives --install /usr/bin/gnome-www-browser gnome-www-browser /usr/bin/firefox 100 && \ + rm -rf /var/lib/apt/lists/* + +# Install noVNC +RUN git clone https://github.com/novnc/noVNC.git /opt/noVNC && \ + git clone https://github.com/novnc/websockify /opt/noVNC/utils/websockify && \ + ln -s /opt/noVNC/vnc.html /opt/noVNC/index.html + +# Pre-create cache directory with correct ownership before pip install +RUN mkdir -p /home/cua/.cache && \ + chown -R cua:cua /home/cua/.cache + +# Install computer-server +RUN pip3 install cua-computer-server + +# Fix any cache files created by pip +RUN chown -R cua:cua /home/cua/.cache + +# Copy startup scripts +COPY src/supervisor/ /etc/supervisor/conf.d/ +COPY src/scripts/ /usr/local/bin/ + +# Make scripts executable +RUN chmod +x /usr/local/bin/*.sh + +# Setup VNC +USER cua +WORKDIR /home/cua + +# Create VNC directory (no password needed with SecurityTypes None) +RUN mkdir -p $HOME/.vnc + +# Configure XFCE for first start +RUN mkdir -p $HOME/.config/xfce4/xfconf/xfce-perchannel-xml $HOME/.config/xfce4 $HOME/.config/autostart + +# Copy XFCE config to disable browser launching and welcome screens +COPY --chown=cua:cua src/xfce-config/helpers.rc $HOME/.config/xfce4/helpers.rc +COPY --chown=cua:cua src/xfce-config/xfce4-session.xml $HOME/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-session.xml + +# Disable panel plugins that might try to open browsers +RUN echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/xfce4-tips-autostart.desktop && \ + chown -R cua:cua $HOME/.config + +# Create storage and shared directories, and Firefox cache directory +RUN mkdir -p $HOME/storage $HOME/shared $HOME/.cache/dconf $HOME/.mozilla/firefox && \ + chown -R cua:cua $HOME/storage $HOME/shared $HOME/.cache $HOME/.mozilla $HOME/.vnc + +USER root + +# Expose ports +EXPOSE $VNC_PORT $NOVNC_PORT $API_PORT + +# Start services via supervisor +CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"] diff --git a/libs/docker-xfce/README.md b/libs/docker-xfce/README.md new file mode 100644 index 00000000..9ecdff00 --- /dev/null +++ b/libs/docker-xfce/README.md @@ -0,0 +1,261 @@ +# CUA Docker XFCE Container + +Vanilla XFCE desktop container for Computer-Using Agents (CUA) with noVNC and computer-server. This is a lightweight alternative to the Kasm-based container with minimal dependencies. + +## Features + +- Ubuntu 22.04 (Jammy) with vanilla XFCE desktop environment +- TigerVNC server for remote desktop access +- noVNC for web-based VNC access (no client required) +- Pre-installed computer-server for remote computer control +- Python 3.11 with necessary libraries +- Screen capture tools (gnome-screenshot, wmctrl, ffmpeg) +- Clipboard utilities (xclip, socat) +- Firefox browser with telemetry disabled + +## Architecture + +``` +┌─────────────────────────────────────────┐ +│ Docker Container (Ubuntu 22.04) │ +├─────────────────────────────────────────┤ +│ XFCE Desktop Environment │ +│ ├── Firefox │ +│ ├── XFCE Terminal │ +│ └── Desktop utilities │ +├─────────────────────────────────────────┤ +│ TigerVNC Server (Port 5901) │ +│ └── X11 Display :1 │ +├─────────────────────────────────────────┤ +│ noVNC Web Interface (Port 6901) │ +│ └── WebSocket proxy to VNC │ +├─────────────────────────────────────────┤ +│ CUA Computer Server (Port 8000) │ +│ └── WebSocket API for automation │ +└─────────────────────────────────────────┘ +``` + +## Building the Container + +```bash +docker build -t cua-docker-xfce:latest . +``` + +## Pushing to Registry + +```bash +# Tag for Docker Hub (replace 'trycua' with your Docker Hub username) +docker tag cua-docker-xfce:latest trycua/cua-docker-xfce:latest + +# Login to Docker Hub +docker login + +# Push to Docker Hub +docker push trycua/cua-docker-xfce:latest +``` + +## Running the Container Manually + +### Basic Usage + +```bash +docker run --rm -it \ + --shm-size=512m \ + -p 5901:5901 \ + -p 6901:6901 \ + -p 8000:8000 \ + cua-docker-xfce:latest +``` + +### With Custom Resolution + +```bash +docker run --rm -it \ + --shm-size=512m \ + -p 5901:5901 \ + -p 6901:6901 \ + -p 8000:8000 \ + -e VNC_RESOLUTION=1280x720 \ + cua-docker-xfce:latest +``` + +### With Persistent Storage + +```bash +docker run --rm -it \ + --shm-size=512m \ + -p 5901:5901 \ + -p 6901:6901 \ + -p 8000:8000 \ + -v $(pwd)/storage:/home/cua/storage \ + cua-docker-xfce:latest +``` + +## Accessing the Container + +- **noVNC Web Interface**: Open `http://localhost:6901` in your browser (no password required) +- **VNC Client**: Connect to `localhost:5901` (no password required) +- **Computer Server API**: Available at `http://localhost:8000` + +## Using with CUA Docker Provider + +This container is designed to work with the CUA Docker provider. Simply specify the docker-xfce image: + +```python +from computer import Computer + +# Create computer with docker-xfce container +computer = Computer( + os_type="linux", + provider_type="docker", + image="trycua/cua-docker-xfce:latest", # Use docker-xfce instead of Kasm + display="1024x768", + memory="4GB", + cpu="2" +) + +# Use the computer +async with computer: + # Take a screenshot + screenshot = await computer.interface.screenshot() + + # Click and type + await computer.interface.left_click(100, 100) + await computer.interface.type_text("Hello from CUA!") + + # Run commands + result = await computer.interface.run_command("ls -la") + print(result.stdout) +``` + +### Switching between Kasm and docker-xfce + +The Docker provider automatically detects which image you're using: + +```python +# Use Kasm-based container (default for Linux) +computer_kasm = Computer( + os_type="linux", + provider_type="docker", + image="trycua/cua-ubuntu:latest", # Kasm image +) + +# Use docker-xfce container (vanilla XFCE) +computer_xfce = Computer( + os_type="linux", + provider_type="docker", + image="trycua/cua-docker-xfce:latest", # docker-xfce image +) +``` + +Both provide the same API and functionality - the provider automatically configures the correct paths and settings based on the image. + +## Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `VNC_RESOLUTION` | `1024x768` | Screen resolution | +| `VNC_COL_DEPTH` | `24` | Color depth | +| `VNC_PORT` | `5901` | VNC server port | +| `NOVNC_PORT` | `6901` | noVNC web interface port | +| `API_PORT` | `8000` | Computer-server API port | +| `DISPLAY` | `:1` | X11 display number | + +## Exposed Ports + +- **5901**: TigerVNC server +- **6901**: noVNC web interface +- **8000**: Computer-server WebSocket API + +## Volume Mount Points + +- `/home/cua/storage`: Persistent storage mount point +- `/home/cua/shared`: Shared folder mount point + +## User Credentials + +- **Username**: `cua` +- **Password**: `password` (for shell login only) +- **Sudo access**: Enabled without password +- **VNC access**: No password required + +## Creating Snapshots + +### Filesystem Snapshot +```bash +docker commit cua-docker-xfce-snapshot:latest +``` + +### Running from Snapshot +```bash +docker run --rm -it \ + --shm-size=512m \ + -p 6901:6901 \ + -p 8000:8000 \ + cua-docker-xfce-snapshot:latest +``` + +## Comparison with Kasm Container + +| Feature | Kasm Container | Docker XFCE Container | +|---------|---------------|----------------------| +| Base Image | KasmWeb Ubuntu | Vanilla Ubuntu | +| VNC Server | KasmVNC | TigerVNC | +| Dependencies | Higher | Lower | +| Configuration | Pre-configured | Minimal | +| Size | Larger | Smaller | +| Maintenance | Depends on Kasm | Independent | + +## Process Management + +The container uses `supervisord` to manage three main processes: + +1. **VNC Server** (Priority 10): TigerVNC with XFCE desktop +2. **noVNC** (Priority 20): WebSocket proxy for browser access +3. **Computer Server** (Priority 30): CUA automation API + +All processes are automatically restarted on failure. + +## Troubleshooting + +### VNC server won't start +Check if X11 lock files exist: +```bash +docker exec rm -rf /tmp/.X1-lock /tmp/.X11-unix/X1 +``` + +### noVNC shows black screen +Ensure VNC server is running: +```bash +docker exec supervisorctl status vncserver +``` + +### Computer-server not responding +Check if X server is accessible: +```bash +docker exec env DISPLAY=:1 xdpyinfo +``` + +### View logs +```bash +docker exec tail -f /var/log/supervisor/supervisord.log +docker exec supervisorctl status +``` + +## Integration with CUA System + +This container provides the same functionality as the Kasm container but with: +- **Reduced dependencies**: No reliance on KasmWeb infrastructure +- **Smaller image size**: Minimal base configuration +- **Full control**: Direct access to all components +- **Easy customization**: Simple to modify and extend + +The container integrates seamlessly with: +- CUA Computer library (via WebSocket API) +- Docker provider for lifecycle management +- Standard VNC clients for debugging +- Web browsers for visual monitoring + +## License + +MIT License - See LICENSE file for details diff --git a/libs/docker-xfce/src/scripts/resize-display.sh b/libs/docker-xfce/src/scripts/resize-display.sh new file mode 100644 index 00000000..ea663dce --- /dev/null +++ b/libs/docker-xfce/src/scripts/resize-display.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Dynamic display resolution script +# Can be called to change the VNC display resolution + +RESOLUTION=${1:-1920x1080} + +# Wait for display to be ready +for i in {1..10}; do + if DISPLAY=:1 xdpyinfo >/dev/null 2>&1; then + break + fi + sleep 1 +done + +# Change resolution using xrandr +DISPLAY=:1 xrandr --output VNC-0 --mode "$RESOLUTION" 2>/dev/null || \ +DISPLAY=:1 xrandr --fb "$RESOLUTION" 2>/dev/null || \ +echo "Failed to set resolution to $RESOLUTION" + +echo "Display resolution set to: $RESOLUTION" diff --git a/libs/docker-xfce/src/scripts/start-computer-server.sh b/libs/docker-xfce/src/scripts/start-computer-server.sh new file mode 100644 index 00000000..bc27a3db --- /dev/null +++ b/libs/docker-xfce/src/scripts/start-computer-server.sh @@ -0,0 +1,13 @@ +#!/bin/bash +set -e + +# Wait for X server to be ready +echo "Waiting for X server to start..." +while ! xdpyinfo -display :1 >/dev/null 2>&1; do + sleep 1 +done +echo "X server is ready" + +# Start computer-server +export DISPLAY=:1 +python3 -m computer_server --port ${API_PORT:-8000} diff --git a/libs/docker-xfce/src/scripts/start-novnc.sh b/libs/docker-xfce/src/scripts/start-novnc.sh new file mode 100644 index 00000000..07894acb --- /dev/null +++ b/libs/docker-xfce/src/scripts/start-novnc.sh @@ -0,0 +1,12 @@ +#!/bin/bash +set -e + +# Give VNC a moment to start (supervisor starts it with priority 10, this is priority 20) +echo "Waiting for VNC server to start..." +sleep 5 + +# Start noVNC +cd /opt/noVNC +/opt/noVNC/utils/novnc_proxy \ + --vnc localhost:${VNC_PORT:-5901} \ + --listen ${NOVNC_PORT:-6901} diff --git a/libs/docker-xfce/src/scripts/start-vnc.sh b/libs/docker-xfce/src/scripts/start-vnc.sh new file mode 100644 index 00000000..934e6d3c --- /dev/null +++ b/libs/docker-xfce/src/scripts/start-vnc.sh @@ -0,0 +1,23 @@ +#!/bin/bash +set -e + +# Clean up any existing VNC lock files +rm -rf /tmp/.X1-lock /tmp/.X11-unix/X1 + +# Start VNC server without password authentication +vncserver :1 \ + -geometry ${VNC_RESOLUTION:-1920x1080} \ + -depth ${VNC_COL_DEPTH:-24} \ + -rfbport ${VNC_PORT:-5901} \ + -localhost no \ + -SecurityTypes None \ + -AlwaysShared \ + -AcceptPointerEvents \ + -AcceptKeyEvents \ + -AcceptCutText \ + -SendCutText \ + -xstartup /usr/local/bin/xstartup.sh \ + --I-KNOW-THIS-IS-INSECURE + +# Keep the process running +tail -f /home/cua/.vnc/*.log diff --git a/libs/docker-xfce/src/scripts/xstartup.sh b/libs/docker-xfce/src/scripts/xstartup.sh new file mode 100644 index 00000000..49bb46a2 --- /dev/null +++ b/libs/docker-xfce/src/scripts/xstartup.sh @@ -0,0 +1,21 @@ +#!/bin/bash +set -e + +# Start D-Bus +if [ -z "$DBUS_SESSION_BUS_ADDRESS" ]; then + eval $(dbus-launch --sh-syntax --exit-with-session) +fi + +# Start XFCE +startxfce4 & + +# Wait for XFCE to start +sleep 2 + +# Disable screensaver and power management +xset s off +xset -dpms +xset s noblank + +# Wait for the session +wait diff --git a/libs/docker-xfce/src/supervisor/supervisord.conf b/libs/docker-xfce/src/supervisor/supervisord.conf new file mode 100644 index 00000000..fb367c4f --- /dev/null +++ b/libs/docker-xfce/src/supervisor/supervisord.conf @@ -0,0 +1,30 @@ +[supervisord] +nodaemon=true +user=root +logfile=/var/log/supervisor/supervisord.log +pidfile=/var/run/supervisord.pid +childlogdir=/var/log/supervisor + +[program:vncserver] +command=/usr/local/bin/start-vnc.sh +user=cua +autorestart=true +stdout_logfile=/var/log/supervisor/vncserver.log +stderr_logfile=/var/log/supervisor/vncserver.error.log +priority=10 + +[program:novnc] +command=/usr/local/bin/start-novnc.sh +user=cua +autorestart=true +stdout_logfile=/var/log/supervisor/novnc.log +stderr_logfile=/var/log/supervisor/novnc.error.log +priority=20 + +[program:computer-server] +command=/usr/local/bin/start-computer-server.sh +user=cua +autorestart=true +stdout_logfile=/var/log/supervisor/computer-server.log +stderr_logfile=/var/log/supervisor/computer-server.error.log +priority=30 diff --git a/libs/docker-xfce/src/xfce-config/helpers.rc b/libs/docker-xfce/src/xfce-config/helpers.rc new file mode 100644 index 00000000..b2270633 --- /dev/null +++ b/libs/docker-xfce/src/xfce-config/helpers.rc @@ -0,0 +1,2 @@ +# XFCE preferred applications - set Firefox as default browser +WebBrowser=firefox diff --git a/libs/docker-xfce/src/xfce-config/xfce4-session.xml b/libs/docker-xfce/src/xfce-config/xfce4-session.xml new file mode 100644 index 00000000..d7b834d9 --- /dev/null +++ b/libs/docker-xfce/src/xfce-config/xfce4-session.xml @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/python/computer/computer/providers/docker/provider.py b/libs/python/computer/computer/providers/docker/provider.py index 82ad411c..487edc28 100644 --- a/libs/python/computer/computer/providers/docker/provider.py +++ b/libs/python/computer/computer/providers/docker/provider.py @@ -36,7 +36,7 @@ class DockerProvider(BaseVMProvider): """ def __init__( - self, + self, port: Optional[int] = 8000, host: str = "localhost", storage: Optional[str] = None, @@ -47,13 +47,16 @@ class DockerProvider(BaseVMProvider): vnc_port: Optional[int] = 6901, ): """Initialize the Docker VM Provider. - + Args: port: Currently unused (VM provider port) host: Hostname for the API server (default: localhost) storage: Path for persistent VM storage shared_path: Path for shared folder between host and container image: Docker image to use (default: "trycua/cua-ubuntu:latest") + Supported images: + - "trycua/cua-ubuntu:latest" (Kasm-based) + - "trycua/cua-docker-xfce:latest" (vanilla XFCE) verbose: Enable verbose logging ephemeral: Use ephemeral (temporary) storage vnc_port: Port for VNC interface (default: 6901) @@ -62,19 +65,35 @@ class DockerProvider(BaseVMProvider): self.api_port = 8000 self.vnc_port = vnc_port self.ephemeral = ephemeral - + # Handle ephemeral storage (temporary directory) if ephemeral: self.storage = "ephemeral" else: self.storage = storage - + self.shared_path = shared_path self.image = image self.verbose = verbose self._container_id = None self._running_containers = {} # Track running containers by name + + # Detect image type and configure user directory accordingly + self._detect_image_config() + def _detect_image_config(self): + """Detect image type and configure paths accordingly.""" + # Detect if this is a docker-xfce image or Kasm image + if "docker-xfce" in self.image.lower() or "xfce" in self.image.lower(): + self._home_dir = "/home/cua" + self._image_type = "docker-xfce" + logger.info(f"Detected docker-xfce image: using {self._home_dir}") + else: + # Default to Kasm configuration + self._home_dir = "/home/kasm-user" + self._image_type = "kasm" + logger.info(f"Detected Kasm image: using {self._home_dir}") + @property def provider_type(self) -> VMProviderType: """Return the provider type.""" @@ -277,12 +296,13 @@ class DockerProvider(BaseVMProvider): # Add volume mounts if storage is specified storage_path = storage or self.storage if storage_path and storage_path != "ephemeral": - # Mount storage directory - cmd.extend(["-v", f"{storage_path}:/home/kasm-user/storage"]) - + # Mount storage directory using detected home directory + cmd.extend(["-v", f"{storage_path}:{self._home_dir}/storage"]) + # Add shared path if specified if self.shared_path: - cmd.extend(["-v", f"{self.shared_path}:/home/kasm-user/shared"]) + # Mount shared directory using detected home directory + cmd.extend(["-v", f"{self.shared_path}:{self._home_dir}/shared"]) # Add environment variables cmd.extend(["-e", "VNC_PW=password"]) # Set VNC password