Merge pull request #464 from trycua/feature/docker-xfce

Add docker-xfce provider with vanilla XFCE desktop
This commit is contained in:
f-trycua
2025-10-12 07:36:58 +02:00
committed by GitHub
13 changed files with 601 additions and 8 deletions
+5
View File
@@ -0,0 +1,5 @@
README.md
.git
.gitignore
*.md
LICENSE
+4
View File
@@ -0,0 +1,4 @@
storage/
shared/
*.log
.DS_Store
+131
View File
@@ -0,0 +1,131 @@
# CUA Docker XFCE Container
# Vanilla XFCE desktop with noVNC and computer-server
FROM ubuntu:22.04
# Avoid prompts from apt
ENV DEBIAN_FRONTEND=noninteractive
# Set environment variables
ENV HOME=/home/cua
ENV DISPLAY=:1
ENV VNC_PORT=5901
ENV NOVNC_PORT=6901
ENV API_PORT=8000
ENV VNC_RESOLUTION=1024x768
ENV VNC_COL_DEPTH=24
# Install system dependencies first (including sudo)
RUN apt-get update && apt-get install -y \
# System utilities
sudo \
# Desktop environment
xfce4 \
xfce4-terminal \
xfce4-goodies \
dbus-x11 \
# VNC server
tigervnc-standalone-server \
tigervnc-common \
# noVNC dependencies
python3 \
python3-pip \
python3-numpy \
git \
net-tools \
netcat \
supervisor \
# Computer-server dependencies
python3-tk \
python3-dev \
gnome-screenshot \
wmctrl \
ffmpeg \
socat \
xclip \
# Browser
wget \
software-properties-common \
# Build tools
build-essential \
libncursesw5-dev \
libssl-dev \
libsqlite3-dev \
tk-dev \
libgdbm-dev \
libc6-dev \
libbz2-dev \
libffi-dev \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
# Remove power manager to avoid popup in container
RUN apt-get remove -y xfce4-power-manager xfce4-power-manager-data || true
# Create user after sudo is installed
RUN useradd -m -s /bin/bash -G sudo cua && \
echo "cua:password" | chpasswd && \
echo "cua ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
# Install Firefox from Mozilla PPA (snap-free) - inline to avoid script issues
RUN apt-get update && \
add-apt-repository -y ppa:mozillateam/ppa && \
echo 'Package: *\nPin: release o=LP-PPA-mozillateam\nPin-Priority: 1001' > /etc/apt/preferences.d/mozilla-firefox && \
apt-get update && \
apt-get install -y firefox && \
echo 'pref("datareporting.policy.firstRunURL", "");\npref("datareporting.policy.dataSubmissionEnabled", false);\npref("datareporting.healthreport.service.enabled", false);\npref("datareporting.healthreport.uploadEnabled", false);\npref("trailhead.firstrun.branches", "nofirstrun-empty");\npref("browser.aboutwelcome.enabled", false);' > /usr/lib/firefox/browser/defaults/preferences/firefox.js && \
update-alternatives --install /usr/bin/x-www-browser x-www-browser /usr/bin/firefox 100 && \
update-alternatives --install /usr/bin/gnome-www-browser gnome-www-browser /usr/bin/firefox 100 && \
rm -rf /var/lib/apt/lists/*
# Install noVNC
RUN git clone https://github.com/novnc/noVNC.git /opt/noVNC && \
git clone https://github.com/novnc/websockify /opt/noVNC/utils/websockify && \
ln -s /opt/noVNC/vnc.html /opt/noVNC/index.html
# Pre-create cache directory with correct ownership before pip install
RUN mkdir -p /home/cua/.cache && \
chown -R cua:cua /home/cua/.cache
# Install computer-server
RUN pip3 install cua-computer-server
# Fix any cache files created by pip
RUN chown -R cua:cua /home/cua/.cache
# Copy startup scripts
COPY src/supervisor/ /etc/supervisor/conf.d/
COPY src/scripts/ /usr/local/bin/
# Make scripts executable
RUN chmod +x /usr/local/bin/*.sh
# Setup VNC
USER cua
WORKDIR /home/cua
# Create VNC directory (no password needed with SecurityTypes None)
RUN mkdir -p $HOME/.vnc
# Configure XFCE for first start
RUN mkdir -p $HOME/.config/xfce4/xfconf/xfce-perchannel-xml $HOME/.config/xfce4 $HOME/.config/autostart
# Copy XFCE config to disable browser launching and welcome screens
COPY --chown=cua:cua src/xfce-config/helpers.rc $HOME/.config/xfce4/helpers.rc
COPY --chown=cua:cua src/xfce-config/xfce4-session.xml $HOME/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-session.xml
# Disable panel plugins that might try to open browsers
RUN echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/xfce4-tips-autostart.desktop && \
chown -R cua:cua $HOME/.config
# Create storage and shared directories, and Firefox cache directory
RUN mkdir -p $HOME/storage $HOME/shared $HOME/.cache/dconf $HOME/.mozilla/firefox && \
chown -R cua:cua $HOME/storage $HOME/shared $HOME/.cache $HOME/.mozilla $HOME/.vnc
USER root
# Expose ports
EXPOSE $VNC_PORT $NOVNC_PORT $API_PORT
# Start services via supervisor
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"]
+261
View File
@@ -0,0 +1,261 @@
# CUA Docker XFCE Container
Vanilla XFCE desktop container for Computer-Using Agents (CUA) with noVNC and computer-server. This is a lightweight alternative to the Kasm-based container with minimal dependencies.
## Features
- Ubuntu 22.04 (Jammy) with vanilla XFCE desktop environment
- TigerVNC server for remote desktop access
- noVNC for web-based VNC access (no client required)
- Pre-installed computer-server for remote computer control
- Python 3.11 with necessary libraries
- Screen capture tools (gnome-screenshot, wmctrl, ffmpeg)
- Clipboard utilities (xclip, socat)
- Firefox browser with telemetry disabled
## Architecture
```
┌─────────────────────────────────────────┐
│ Docker Container (Ubuntu 22.04) │
├─────────────────────────────────────────┤
│ XFCE Desktop Environment │
│ ├── Firefox │
│ ├── XFCE Terminal │
│ └── Desktop utilities │
├─────────────────────────────────────────┤
│ TigerVNC Server (Port 5901) │
│ └── X11 Display :1 │
├─────────────────────────────────────────┤
│ noVNC Web Interface (Port 6901) │
│ └── WebSocket proxy to VNC │
├─────────────────────────────────────────┤
│ CUA Computer Server (Port 8000) │
│ └── WebSocket API for automation │
└─────────────────────────────────────────┘
```
## Building the Container
```bash
docker build -t cua-docker-xfce:latest .
```
## Pushing to Registry
```bash
# Tag for Docker Hub (replace 'trycua' with your Docker Hub username)
docker tag cua-docker-xfce:latest trycua/cua-docker-xfce:latest
# Login to Docker Hub
docker login
# Push to Docker Hub
docker push trycua/cua-docker-xfce:latest
```
## Running the Container Manually
### Basic Usage
```bash
docker run --rm -it \
--shm-size=512m \
-p 5901:5901 \
-p 6901:6901 \
-p 8000:8000 \
cua-docker-xfce:latest
```
### With Custom Resolution
```bash
docker run --rm -it \
--shm-size=512m \
-p 5901:5901 \
-p 6901:6901 \
-p 8000:8000 \
-e VNC_RESOLUTION=1280x720 \
cua-docker-xfce:latest
```
### With Persistent Storage
```bash
docker run --rm -it \
--shm-size=512m \
-p 5901:5901 \
-p 6901:6901 \
-p 8000:8000 \
-v $(pwd)/storage:/home/cua/storage \
cua-docker-xfce:latest
```
## Accessing the Container
- **noVNC Web Interface**: Open `http://localhost:6901` in your browser (no password required)
- **VNC Client**: Connect to `localhost:5901` (no password required)
- **Computer Server API**: Available at `http://localhost:8000`
## Using with CUA Docker Provider
This container is designed to work with the CUA Docker provider. Simply specify the docker-xfce image:
```python
from computer import Computer
# Create computer with docker-xfce container
computer = Computer(
os_type="linux",
provider_type="docker",
image="trycua/cua-docker-xfce:latest", # Use docker-xfce instead of Kasm
display="1024x768",
memory="4GB",
cpu="2"
)
# Use the computer
async with computer:
# Take a screenshot
screenshot = await computer.interface.screenshot()
# Click and type
await computer.interface.left_click(100, 100)
await computer.interface.type_text("Hello from CUA!")
# Run commands
result = await computer.interface.run_command("ls -la")
print(result.stdout)
```
### Switching between Kasm and docker-xfce
The Docker provider automatically detects which image you're using:
```python
# Use Kasm-based container (default for Linux)
computer_kasm = Computer(
os_type="linux",
provider_type="docker",
image="trycua/cua-ubuntu:latest", # Kasm image
)
# Use docker-xfce container (vanilla XFCE)
computer_xfce = Computer(
os_type="linux",
provider_type="docker",
image="trycua/cua-docker-xfce:latest", # docker-xfce image
)
```
Both provide the same API and functionality - the provider automatically configures the correct paths and settings based on the image.
## Environment Variables
| Variable | Default | Description |
|----------|---------|-------------|
| `VNC_RESOLUTION` | `1024x768` | Screen resolution |
| `VNC_COL_DEPTH` | `24` | Color depth |
| `VNC_PORT` | `5901` | VNC server port |
| `NOVNC_PORT` | `6901` | noVNC web interface port |
| `API_PORT` | `8000` | Computer-server API port |
| `DISPLAY` | `:1` | X11 display number |
## Exposed Ports
- **5901**: TigerVNC server
- **6901**: noVNC web interface
- **8000**: Computer-server WebSocket API
## Volume Mount Points
- `/home/cua/storage`: Persistent storage mount point
- `/home/cua/shared`: Shared folder mount point
## User Credentials
- **Username**: `cua`
- **Password**: `password` (for shell login only)
- **Sudo access**: Enabled without password
- **VNC access**: No password required
## Creating Snapshots
### Filesystem Snapshot
```bash
docker commit <container_id> cua-docker-xfce-snapshot:latest
```
### Running from Snapshot
```bash
docker run --rm -it \
--shm-size=512m \
-p 6901:6901 \
-p 8000:8000 \
cua-docker-xfce-snapshot:latest
```
## Comparison with Kasm Container
| Feature | Kasm Container | Docker XFCE Container |
|---------|---------------|----------------------|
| Base Image | KasmWeb Ubuntu | Vanilla Ubuntu |
| VNC Server | KasmVNC | TigerVNC |
| Dependencies | Higher | Lower |
| Configuration | Pre-configured | Minimal |
| Size | Larger | Smaller |
| Maintenance | Depends on Kasm | Independent |
## Process Management
The container uses `supervisord` to manage three main processes:
1. **VNC Server** (Priority 10): TigerVNC with XFCE desktop
2. **noVNC** (Priority 20): WebSocket proxy for browser access
3. **Computer Server** (Priority 30): CUA automation API
All processes are automatically restarted on failure.
## Troubleshooting
### VNC server won't start
Check if X11 lock files exist:
```bash
docker exec <container_id> rm -rf /tmp/.X1-lock /tmp/.X11-unix/X1
```
### noVNC shows black screen
Ensure VNC server is running:
```bash
docker exec <container_id> supervisorctl status vncserver
```
### Computer-server not responding
Check if X server is accessible:
```bash
docker exec <container_id> env DISPLAY=:1 xdpyinfo
```
### View logs
```bash
docker exec <container_id> tail -f /var/log/supervisor/supervisord.log
docker exec <container_id> supervisorctl status
```
## Integration with CUA System
This container provides the same functionality as the Kasm container but with:
- **Reduced dependencies**: No reliance on KasmWeb infrastructure
- **Smaller image size**: Minimal base configuration
- **Full control**: Direct access to all components
- **Easy customization**: Simple to modify and extend
The container integrates seamlessly with:
- CUA Computer library (via WebSocket API)
- Docker provider for lifecycle management
- Standard VNC clients for debugging
- Web browsers for visual monitoring
## License
MIT License - See LICENSE file for details
@@ -0,0 +1,20 @@
#!/bin/bash
# Dynamic display resolution script
# Can be called to change the VNC display resolution
RESOLUTION=${1:-1920x1080}
# Wait for display to be ready
for i in {1..10}; do
if DISPLAY=:1 xdpyinfo >/dev/null 2>&1; then
break
fi
sleep 1
done
# Change resolution using xrandr
DISPLAY=:1 xrandr --output VNC-0 --mode "$RESOLUTION" 2>/dev/null || \
DISPLAY=:1 xrandr --fb "$RESOLUTION" 2>/dev/null || \
echo "Failed to set resolution to $RESOLUTION"
echo "Display resolution set to: $RESOLUTION"
@@ -0,0 +1,13 @@
#!/bin/bash
set -e
# Wait for X server to be ready
echo "Waiting for X server to start..."
while ! xdpyinfo -display :1 >/dev/null 2>&1; do
sleep 1
done
echo "X server is ready"
# Start computer-server
export DISPLAY=:1
python3 -m computer_server --port ${API_PORT:-8000}
@@ -0,0 +1,12 @@
#!/bin/bash
set -e
# Give VNC a moment to start (supervisor starts it with priority 10, this is priority 20)
echo "Waiting for VNC server to start..."
sleep 5
# Start noVNC
cd /opt/noVNC
/opt/noVNC/utils/novnc_proxy \
--vnc localhost:${VNC_PORT:-5901} \
--listen ${NOVNC_PORT:-6901}
+23
View File
@@ -0,0 +1,23 @@
#!/bin/bash
set -e
# Clean up any existing VNC lock files
rm -rf /tmp/.X1-lock /tmp/.X11-unix/X1
# Start VNC server without password authentication
vncserver :1 \
-geometry ${VNC_RESOLUTION:-1920x1080} \
-depth ${VNC_COL_DEPTH:-24} \
-rfbport ${VNC_PORT:-5901} \
-localhost no \
-SecurityTypes None \
-AlwaysShared \
-AcceptPointerEvents \
-AcceptKeyEvents \
-AcceptCutText \
-SendCutText \
-xstartup /usr/local/bin/xstartup.sh \
--I-KNOW-THIS-IS-INSECURE
# Keep the process running
tail -f /home/cua/.vnc/*.log
+21
View File
@@ -0,0 +1,21 @@
#!/bin/bash
set -e
# Start D-Bus
if [ -z "$DBUS_SESSION_BUS_ADDRESS" ]; then
eval $(dbus-launch --sh-syntax --exit-with-session)
fi
# Start XFCE
startxfce4 &
# Wait for XFCE to start
sleep 2
# Disable screensaver and power management
xset s off
xset -dpms
xset s noblank
# Wait for the session
wait
@@ -0,0 +1,30 @@
[supervisord]
nodaemon=true
user=root
logfile=/var/log/supervisor/supervisord.log
pidfile=/var/run/supervisord.pid
childlogdir=/var/log/supervisor
[program:vncserver]
command=/usr/local/bin/start-vnc.sh
user=cua
autorestart=true
stdout_logfile=/var/log/supervisor/vncserver.log
stderr_logfile=/var/log/supervisor/vncserver.error.log
priority=10
[program:novnc]
command=/usr/local/bin/start-novnc.sh
user=cua
autorestart=true
stdout_logfile=/var/log/supervisor/novnc.log
stderr_logfile=/var/log/supervisor/novnc.error.log
priority=20
[program:computer-server]
command=/usr/local/bin/start-computer-server.sh
user=cua
autorestart=true
stdout_logfile=/var/log/supervisor/computer-server.log
stderr_logfile=/var/log/supervisor/computer-server.error.log
priority=30
@@ -0,0 +1,2 @@
# XFCE preferred applications - set Firefox as default browser
WebBrowser=firefox
@@ -0,0 +1,51 @@
<?xml version="1.0" encoding="UTF-8"?>
<channel name="xfce4-session" version="1.0">
<property name="general" type="empty">
<property name="FailsafeSessionName" type="string" value="Failsafe"/>
<property name="SessionName" type="string" value="Default"/>
<property name="SaveOnExit" type="bool" value="false"/>
</property>
<property name="sessions" type="empty">
<property name="Failsafe" type="empty">
<property name="IsFailsafe" type="bool" value="true"/>
<property name="Count" type="int" value="5"/>
<property name="Client0_Command" type="array">
<value type="string" value="xfwm4"/>
</property>
<property name="Client0_Priority" type="int" value="15"/>
<property name="Client0_PerScreen" type="bool" value="false"/>
<property name="Client1_Command" type="array">
<value type="string" value="xfce4-panel"/>
</property>
<property name="Client1_Priority" type="int" value="25"/>
<property name="Client1_PerScreen" type="bool" value="false"/>
<property name="Client2_Command" type="array">
<value type="string" value="xfdesktop"/>
</property>
<property name="Client2_Priority" type="int" value="35"/>
<property name="Client2_PerScreen" type="bool" value="false"/>
<property name="Client3_Command" type="array">
<value type="string" value="xfsettingsd"/>
</property>
<property name="Client3_Priority" type="int" value="10"/>
<property name="Client3_PerScreen" type="bool" value="false"/>
<property name="Client4_Command" type="array">
<value type="string" value="xfce4-notifyd"/>
</property>
<property name="Client4_Priority" type="int" value="20"/>
<property name="Client4_PerScreen" type="bool" value="false"/>
</property>
</property>
<property name="splash" type="empty">
<property name="Engine" type="string" value=""/>
</property>
<property name="compat" type="empty">
<property name="LaunchGNOME" type="bool" value="false"/>
</property>
<property name="shutdown" type="empty">
<property name="ShowSuspend" type="bool" value="false"/>
<property name="ShowHibernate" type="bool" value="false"/>
<property name="ShowHybridSleep" type="bool" value="false"/>
<property name="ShowSwitchUser" type="bool" value="false"/>
</property>
</channel>
@@ -36,7 +36,7 @@ class DockerProvider(BaseVMProvider):
"""
def __init__(
self,
self,
port: Optional[int] = 8000,
host: str = "localhost",
storage: Optional[str] = None,
@@ -47,13 +47,16 @@ class DockerProvider(BaseVMProvider):
vnc_port: Optional[int] = 6901,
):
"""Initialize the Docker VM Provider.
Args:
port: Currently unused (VM provider port)
host: Hostname for the API server (default: localhost)
storage: Path for persistent VM storage
shared_path: Path for shared folder between host and container
image: Docker image to use (default: "trycua/cua-ubuntu:latest")
Supported images:
- "trycua/cua-ubuntu:latest" (Kasm-based)
- "trycua/cua-docker-xfce:latest" (vanilla XFCE)
verbose: Enable verbose logging
ephemeral: Use ephemeral (temporary) storage
vnc_port: Port for VNC interface (default: 6901)
@@ -62,19 +65,35 @@ class DockerProvider(BaseVMProvider):
self.api_port = 8000
self.vnc_port = vnc_port
self.ephemeral = ephemeral
# Handle ephemeral storage (temporary directory)
if ephemeral:
self.storage = "ephemeral"
else:
self.storage = storage
self.shared_path = shared_path
self.image = image
self.verbose = verbose
self._container_id = None
self._running_containers = {} # Track running containers by name
# Detect image type and configure user directory accordingly
self._detect_image_config()
def _detect_image_config(self):
"""Detect image type and configure paths accordingly."""
# Detect if this is a docker-xfce image or Kasm image
if "docker-xfce" in self.image.lower() or "xfce" in self.image.lower():
self._home_dir = "/home/cua"
self._image_type = "docker-xfce"
logger.info(f"Detected docker-xfce image: using {self._home_dir}")
else:
# Default to Kasm configuration
self._home_dir = "/home/kasm-user"
self._image_type = "kasm"
logger.info(f"Detected Kasm image: using {self._home_dir}")
@property
def provider_type(self) -> VMProviderType:
"""Return the provider type."""
@@ -277,12 +296,13 @@ class DockerProvider(BaseVMProvider):
# Add volume mounts if storage is specified
storage_path = storage or self.storage
if storage_path and storage_path != "ephemeral":
# Mount storage directory
cmd.extend(["-v", f"{storage_path}:/home/kasm-user/storage"])
# Mount storage directory using detected home directory
cmd.extend(["-v", f"{storage_path}:{self._home_dir}/storage"])
# Add shared path if specified
if self.shared_path:
cmd.extend(["-v", f"{self.shared_path}:/home/kasm-user/shared"])
# Mount shared directory using detected home directory
cmd.extend(["-v", f"{self.shared_path}:{self._home_dir}/shared"])
# Add environment variables
cmd.extend(["-e", "VNC_PW=password"]) # Set VNC password