diff --git a/.github/workflows/docker-publish-kasm.yml b/.github/workflows/docker-publish-kasm.yml index 882d6869..d97dbf6f 100644 --- a/.github/workflows/docker-publish-kasm.yml +++ b/.github/workflows/docker-publish-kasm.yml @@ -9,141 +9,21 @@ on: paths: - "libs/kasm/**" - ".github/workflows/docker-publish-kasm.yml" + - ".github/workflows/docker-reusable-publish.yml" pull_request: paths: - "libs/kasm/**" - ".github/workflows/docker-publish-kasm.yml" - - -env: - IMAGE_NAME: cua-ubuntu - DOCKER_HUB_ORG: trycua + - ".github/workflows/docker-reusable-publish.yml" jobs: - build-and-push: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - platform: - - linux/amd64 - - linux/arm64 - # todo unsupported base image - # - windows/amd64 - # - darwin/amd64 - # - darwin/arm64 - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Prepare platform tag - id: platform - run: | - # Convert platform (e.g., linux/amd64) to a valid tag suffix (e.g., linux-amd64) - PLATFORM_TAG=$(echo "${{ matrix.platform }}" | sed 's/\//-/g') - echo "tag=${PLATFORM_TAG}" >> $GITHUB_OUTPUT - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Log in to Docker Hub - uses: docker/login-action@v3 - with: - username: trycua - password: ${{ secrets.DOCKER_HUB_TOKEN }} - - - name: Extract metadata (PR) - if: github.event_name == 'pull_request' - id: meta-pr - uses: docker/metadata-action@v5 - with: - images: ${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }} - tags: | - type=raw,value=${{ github.sha }} - - - name: Extract metadata (main branch) - if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main' - id: meta-main - uses: docker/metadata-action@v5 - with: - images: ${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }} - tags: | - type=raw,value=latest - - - name: Extract metadata (semantic version tag) - if: startsWith(github.ref, 'refs/tags/docker-kasm-v') - id: meta-semver - uses: docker/metadata-action@v5 - with: - images: ${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }} - tags: | - type=semver,pattern={{version}},prefix=docker-kasm-v - type=semver,pattern={{major}}.{{minor}},prefix=docker-kasm-v - type=semver,pattern={{major}},prefix=docker-kasm-v - type=raw,value=latest - - - name: Build and push Docker image (PR) - if: github.event_name == 'pull_request' - uses: docker/build-push-action@v5 - with: - context: ./libs/kasm - file: ./libs/kasm/Dockerfile - push: true - tags: ${{ steps.meta-pr.outputs.tags }} - labels: ${{ steps.meta-pr.outputs.labels }} - platforms: ${{ matrix.platform }} - cache-from: | - type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:buildcache-${{ steps.platform.outputs.tag }} - type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:latest - cache-to: type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:buildcache-${{ steps.platform.outputs.tag }},mode=max - - - name: Build and push Docker image (main branch) - if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main' - uses: docker/build-push-action@v5 - with: - context: ./libs/kasm - file: ./libs/kasm/Dockerfile - push: true - tags: ${{ steps.meta-main.outputs.tags }} - labels: ${{ steps.meta-main.outputs.labels }} - platforms: ${{ matrix.platform }} - cache-from: | - type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:buildcache-${{ steps.platform.outputs.tag }} - type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:latest - cache-to: type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:buildcache-${{ steps.platform.outputs.tag }},mode=max - - - name: Build and push Docker image (semantic version tag) - if: startsWith(github.ref, 'refs/tags/docker-kasm-v') - uses: docker/build-push-action@v5 - with: - context: ./libs/kasm - file: ./libs/kasm/Dockerfile - push: true - tags: ${{ steps.meta-semver.outputs.tags }} - labels: ${{ steps.meta-semver.outputs.labels }} - platforms: ${{ matrix.platform }} - cache-from: | - type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:buildcache-${{ steps.platform.outputs.tag }} - type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:latest - cache-to: type=registry,ref=${{ env.DOCKER_HUB_ORG }}/${{ env.IMAGE_NAME }}:buildcache-${{ steps.platform.outputs.tag }},mode=max - - - name: Image digest - if: github.event_name == 'pull_request' || github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/docker-kasm-v') - run: | - if [ "${{ github.event_name }}" == "pull_request" ]; then - echo "Image pushed with digest ${{ steps.meta-pr.outputs.digest }}" - elif [[ "${{ github.ref }}" == refs/tags/docker-kasm-v* ]]; then - echo "Image pushed with digest ${{ steps.meta-semver.outputs.digest }}" - else - echo "Image pushed with digest ${{ steps.meta-main.outputs.digest }}" - fi - - - name: print image tags - run: | - if [ "${{ github.event_name }}" == "pull_request" ]; then - echo "Image tags: ${{ steps.meta-pr.outputs.tags }}" - elif [[ "${{ github.ref }}" == refs/tags/docker-kasm-v* ]]; then - echo "Image tags: ${{ steps.meta-semver.outputs.tags }}" - else - echo "Image tags: ${{ steps.meta-main.outputs.tags }}" - fi + publish: + uses: ./.github/workflows/docker-reusable-publish.yml + with: + image_name: cua-ubuntu + context_dir: libs/kasm + dockerfile_path: Dockerfile + tag_prefix: docker-kasm-v + docker_hub_org: trycua + secrets: + DOCKER_HUB_TOKEN: ${{ secrets.DOCKER_HUB_TOKEN }} diff --git a/.github/workflows/docker-publish-xfce.yml b/.github/workflows/docker-publish-xfce.yml new file mode 100644 index 00000000..fa64849e --- /dev/null +++ b/.github/workflows/docker-publish-xfce.yml @@ -0,0 +1,29 @@ +name: Build and Publish CUA XFCE Container + +on: + push: + branches: + - main + tags: + - "docker-xfce-v*.*.*" + paths: + - "libs/xfce/**" + - ".github/workflows/docker-publish-xfce.yml" + - ".github/workflows/docker-reusable-publish.yml" + pull_request: + paths: + - "libs/xfce/**" + - ".github/workflows/docker-publish-xfce.yml" + - ".github/workflows/docker-reusable-publish.yml" + +jobs: + publish: + uses: ./.github/workflows/docker-reusable-publish.yml + with: + image_name: cua-xfce + context_dir: libs/xfce + dockerfile_path: Dockerfile + tag_prefix: docker-xfce-v + docker_hub_org: trycua + secrets: + DOCKER_HUB_TOKEN: ${{ secrets.DOCKER_HUB_TOKEN }} diff --git a/.github/workflows/docker-reusable-publish.yml b/.github/workflows/docker-reusable-publish.yml new file mode 100644 index 00000000..3472883f --- /dev/null +++ b/.github/workflows/docker-reusable-publish.yml @@ -0,0 +1,155 @@ +name: Reusable Docker Publish Workflow + +on: + workflow_call: + inputs: + image_name: + description: "Name of the Docker image (e.g. cua-ubuntu, cua-xfce)" + required: true + type: string + context_dir: + description: "Directory containing the Dockerfile relative to workspace root (e.g. libs/kasm, libs/xfce)" + required: true + type: string + dockerfile_path: + description: "Path to Dockerfile relative to context_dir (e.g. Dockerfile)" + required: false + type: string + default: "Dockerfile" + tag_prefix: + description: "Prefix for semantic version tags (e.g. docker-kasm-v, docker-xfce-v)" + required: true + type: string + docker_hub_org: + description: "Docker Hub organization name" + required: false + type: string + default: "trycua" + secrets: + DOCKER_HUB_TOKEN: + required: true + +jobs: + build-and-push: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + platform: + - linux/amd64 + - linux/arm64 + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Prepare platform tag + id: platform + run: | + # Convert platform (e.g., linux/amd64) to a valid tag suffix (e.g., linux-amd64) + PLATFORM_TAG=$(echo "${{ matrix.platform }}" | sed 's/\//-/g') + echo "tag=${PLATFORM_TAG}" >> $GITHUB_OUTPUT + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ inputs.docker_hub_org }} + password: ${{ secrets.DOCKER_HUB_TOKEN }} + + - name: Extract metadata (PR) + if: github.event_name == 'pull_request' + id: meta-pr + uses: docker/metadata-action@v5 + with: + images: ${{ inputs.docker_hub_org }}/${{ inputs.image_name }} + tags: | + type=raw,value=${{ github.sha }} + + - name: Extract metadata (main branch) + if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main' + id: meta-main + uses: docker/metadata-action@v5 + with: + images: ${{ inputs.docker_hub_org }}/${{ inputs.image_name }} + tags: | + type=raw,value=latest + + - name: Extract metadata (semantic version tag) + if: startsWith(github.ref, format('refs/tags/{0}', inputs.tag_prefix)) + id: meta-semver + uses: docker/metadata-action@v5 + with: + images: ${{ inputs.docker_hub_org }}/${{ inputs.image_name }} + tags: | + type=semver,pattern={{version}},prefix=${{ inputs.tag_prefix }} + type=semver,pattern={{major}}.{{minor}},prefix=${{ inputs.tag_prefix }} + type=semver,pattern={{major}},prefix=${{ inputs.tag_prefix }} + type=raw,value=latest + + - name: Build and push Docker image (PR) + if: github.event_name == 'pull_request' + uses: docker/build-push-action@v5 + with: + context: ./${{ inputs.context_dir }} + file: ./${{ inputs.context_dir }}/${{ inputs.dockerfile_path }} + push: true + tags: ${{ steps.meta-pr.outputs.tags }} + labels: ${{ steps.meta-pr.outputs.labels }} + platforms: ${{ matrix.platform }} + cache-from: | + type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }} + type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:latest + cache-to: type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }},mode=max + + - name: Build and push Docker image (main branch) + if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main' + uses: docker/build-push-action@v5 + with: + context: ./${{ inputs.context_dir }} + file: ./${{ inputs.context_dir }}/${{ inputs.dockerfile_path }} + push: true + tags: ${{ steps.meta-main.outputs.tags }} + labels: ${{ steps.meta-main.outputs.labels }} + platforms: ${{ matrix.platform }} + cache-from: | + type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }} + type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:latest + cache-to: type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }},mode=max + + - name: Build and push Docker image (semantic version tag) + if: startsWith(github.ref, format('refs/tags/{0}', inputs.tag_prefix)) + uses: docker/build-push-action@v5 + with: + context: ./${{ inputs.context_dir }} + file: ./${{ inputs.context_dir }}/${{ inputs.dockerfile_path }} + push: true + tags: ${{ steps.meta-semver.outputs.tags }} + labels: ${{ steps.meta-semver.outputs.labels }} + platforms: ${{ matrix.platform }} + cache-from: | + type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }} + type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:latest + cache-to: type=registry,ref=${{ inputs.docker_hub_org }}/${{ inputs.image_name }}:buildcache-${{ steps.platform.outputs.tag }},mode=max + + - name: Image digest + if: github.event_name == 'pull_request' || github.ref == 'refs/heads/main' || startsWith(github.ref, format('refs/tags/{0}', inputs.tag_prefix)) + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Image pushed with digest ${{ steps.meta-pr.outputs.digest }}" + elif [[ "${{ github.ref }}" == refs/tags/${{ inputs.tag_prefix }}* ]]; then + echo "Image pushed with digest ${{ steps.meta-semver.outputs.digest }}" + else + echo "Image pushed with digest ${{ steps.meta-main.outputs.digest }}" + fi + + - name: print image tags + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Image tags: ${{ steps.meta-pr.outputs.tags }}" + elif [[ "${{ github.ref }}" == refs/tags/${{ inputs.tag_prefix }}* ]]; then + echo "Image tags: ${{ steps.meta-semver.outputs.tags }}" + else + echo "Image tags: ${{ steps.meta-main.outputs.tags }}" + fi diff --git a/README.md b/README.md index cd3d9a3a..f7904c19 100644 --- a/README.md +++ b/README.md @@ -153,7 +153,7 @@ from computer import Computer async with Computer( os_type="linux", provider_type="cloud", - name="your-container-name", + name="your-sandbox-name", api_key="your-api-key" ) as computer: # Take screenshot diff --git a/docs/content/docs/agent-sdk/agent-loops.mdx b/docs/content/docs/agent-sdk/agent-loops.mdx index 6cd8daef..db1d8455 100644 --- a/docs/content/docs/agent-sdk/agent-loops.mdx +++ b/docs/content/docs/agent-sdk/agent-loops.mdx @@ -23,8 +23,8 @@ async def take_screenshot(): async with Computer( os_type="linux", provider_type="cloud", - name="m-linux-h3sj2qbz2a", - api_key="" + name="your-sandbox-name", + api_key="your-api-key" ) as computer: agent = ComputerAgent( diff --git a/docs/content/docs/computer-sdk/computers.mdx b/docs/content/docs/computer-sdk/computers.mdx index 2a653f46..1c3558da 100644 --- a/docs/content/docs/computer-sdk/computers.mdx +++ b/docs/content/docs/computer-sdk/computers.mdx @@ -9,9 +9,11 @@ Before we can automate apps using AI, we need to first connect to a Computer Ser Cua Computers are preconfigured virtual machines running the Computer Server. They can be either macOS, Linux, or Windows. They're found in either a cloud-native container, or on your host desktop. -## Cua Cloud Sandbox +## Cloud Sandbox -This is a Cloud Sandbox running the Computer Server. This is the easiest & safest way to get a cua computer, and can be done by going on the trycua.com website. +**Easiest & safest way to get started - works on any host OS** + +This is a Cloud Sandbox running the Computer Server. Get a container at [trycua.com](https://www.trycua.com/). @@ -21,7 +23,7 @@ This is a Cloud Sandbox running the Computer Server. This is the easiest & safes computer = Computer( os_type="linux", provider_type="cloud", - name="your-container-name", + name="your-sandbox-name", api_key="your-api-key" ) @@ -35,7 +37,7 @@ This is a Cloud Sandbox running the Computer Server. This is the easiest & safes const computer = new Computer({ osType: OSType.LINUX, - name: "your-container-name", + name: "your-sandbox-name", apiKey: "your-api-key" }); @@ -45,16 +47,48 @@ This is a Cloud Sandbox running the Computer Server. This is the easiest & safes -## Cua Local Sandbox +## Linux on Docker -Cua provides local sandboxes using different providers depending on your host operating system: +**Run Linux desktop locally on macOS, Windows, or Linux hosts** - - +Cua provides two Docker images for running Linux desktops: + + + + + **Recommended for most use cases** - lightweight XFCE desktop with Firefox 1. Install Docker Desktop or Docker Engine - 2. Build or pull the CUA Ubuntu sandbox + 2. Pull the CUA XFCE image + + ```bash + docker pull --platform=linux/amd64 trycua/cua-xfce:latest + ``` + + 3. Connect with Computer + + ```python + from computer import Computer + + computer = Computer( + os_type="linux", + provider_type="docker", + image="trycua/cua-xfce:latest", + name="my-xfce-container" + ) + + await computer.run() # Launch & connect to Docker sandbox + ``` + + + + + **Full-featured Ubuntu desktop** with additional applications + + 1. Install Docker Desktop or Docker Engine + + 2. Build or pull the CUA KASM image ```bash # Option 1: Pull from Docker Hub @@ -74,68 +108,70 @@ Cua provides local sandboxes using different providers depending on your host op os_type="linux", provider_type="docker", image="trycua/cua-ubuntu:latest", - name="my-cua-container" + name="my-kasm-container" ) await computer.run() # Launch & connect to Docker sandbox ``` - - - 1. Enable Windows Sandbox (requires Windows 10 Pro/Enterprise or Windows 11) - 2. Install pywinsandbox dependency - - ```bash - pip install -U git+git://github.com/karkason/pywinsandbox.git - ``` - - 3. Windows Sandbox will be automatically configured when you run the CLI - - ```python - from computer import Computer - - computer = Computer( - os_type="windows", - provider_type="winsandbox", - ephemeral=True # Windows Sandbox is always ephemeral - ) - - await computer.run() # Launch & connect to Windows Sandbox - ``` - - - - - 1. Install lume cli - - ```bash - /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" - ``` - - 2. Start a local cua sandbox - - ```bash - lume run macos-sequoia-cua:latest - ``` - - 3. Connect with Computer - - ```python - from computer import Computer - - computer = Computer( - os_type="macos", - provider_type="lume", - name="macos-sequoia-cua:latest" - ) - - await computer.run() # Launch & connect to the sandbox - ``` - - +## Windows Sandbox + +**Windows hosts only - requires Windows 10 Pro/Enterprise or Windows 11** + +1. Enable Windows Sandbox +2. Install pywinsandbox dependency + +```bash +pip install -U git+git://github.com/karkason/pywinsandbox.git +``` + +3. Connect with Computer + +```python +from computer import Computer + +computer = Computer( + os_type="windows", + provider_type="winsandbox", + ephemeral=True # Windows Sandbox is always ephemeral +) + +await computer.run() # Launch & connect to Windows Sandbox +``` + +## macOS VM + +**macOS hosts only - requires Lume CLI** + +1. Install lume cli + +```bash +/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" +``` + +2. Start a local cua macOS VM + +```bash +lume run macos-sequoia-cua:latest +``` + +3. Connect with Computer + +```python +from computer import Computer + +computer = Computer( + os_type="macos", + provider_type="lume", + name="macos-sequoia-cua:latest" +) + +await computer.run() # Launch & connect to the sandbox +``` + ## Your host desktop You can also have agents control your desktop directly by running Computer Server without any containerization layer. Beware that AI models may perform risky actions. diff --git a/docs/content/docs/quickstart-cli.mdx b/docs/content/docs/quickstart-cli.mdx index a22907ce..7bf53773 100644 --- a/docs/content/docs/quickstart-cli.mdx +++ b/docs/content/docs/quickstart-cli.mdx @@ -23,39 +23,45 @@ cua combines Computer (interface) + Agent (AI) for automating desktop apps. The ## Set Up Your Computer Environment -Choose how you want to run your cua computer. **Cloud containers are recommended** for the easiest setup: +Choose how you want to run your cua computer. **Cloud Sandbox is recommended** for the easiest setup: + + + + + **Easiest & safest way to get started - works on any host OS** - - - - **Easiest & safest way to get started** - 1. Go to [trycua.com/signin](https://www.trycua.com/signin) 2. Navigate to **Dashboard > Containers > Create Instance** 3. Create a **Medium, Ubuntu 22** container 4. Note your container name and API key - + Your cloud container will be automatically configured and ready to use. - - - 1. Install lume cli + + + **Run Linux desktop locally on macOS, Windows, or Linux hosts** + + 1. Install Docker Desktop or Docker Engine + + 2. Pull the CUA XFCE container (lightweight desktop) ```bash - /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" + docker pull --platform=linux/amd64 trycua/cua-xfce:latest ``` - 2. Start a local cua container + Or use KASM for a full-featured desktop: ```bash - lume run macos-sequoia-cua:latest + docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest ``` - - - 1. Enable Windows Sandbox (requires Windows 10 Pro/Enterprise or Windows 11) + + + **Windows hosts only - requires Windows 10 Pro/Enterprise or Windows 11** + + 1. Enable Windows Sandbox 2. Install pywinsandbox dependency ```bash @@ -65,14 +71,20 @@ Choose how you want to run your cua computer. **Cloud containers are recommended 3. Windows Sandbox will be automatically configured when you run the CLI - - - 1. Install Docker Desktop or Docker Engine + - 2. Pull the CUA Ubuntu container + **macOS hosts only - requires Lume CLI** + + 1. Install lume cli ```bash - docker pull --platform=linux/amd64 trycua/cua-ubuntu:latest + /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" + ``` + + 2. Start a local cua macOS VM + + ```bash + lume run macos-sequoia-cua:latest ``` @@ -300,7 +312,7 @@ python -m agent.cli omniparser+ollama_chat/llama3.2:latest If you haven't set up environment variables, the CLI will guide you through the setup: -1. **Container Name**: Enter your cua container name (or get one at [trycua.com](https://www.trycua.com/)) +1. **Sandbox Name**: Enter your cua sandbox name (or get one at [trycua.com](https://www.trycua.com/)) 2. **CUA API Key**: Enter your cua API key 3. **Provider API Key**: Enter your AI provider API key (OpenAI, Anthropic, etc.) diff --git a/docs/content/docs/quickstart-devs.mdx b/docs/content/docs/quickstart-devs.mdx index 5b7b28a3..8a9adea7 100644 --- a/docs/content/docs/quickstart-devs.mdx +++ b/docs/content/docs/quickstart-devs.mdx @@ -103,7 +103,7 @@ Connect to your Cua computer and perform basic interactions, such as taking scre computer = Computer( os_type="linux", provider_type="cloud", - name="your-container-name", + name="your-sandbox-name", api_key="your-api-key" ) await computer.run() # Connect to the sandbox @@ -189,7 +189,7 @@ Connect to your Cua computer and perform basic interactions, such as taking scre const computer = new Computer({ osType: OSType.LINUX, - name: "your-container-name", + name: "your-sandbox-name", apiKey: "your-api-key" }); await computer.run(); // Connect to the sandbox diff --git a/libs/python/computer/computer/providers/docker/provider.py b/libs/python/computer/computer/providers/docker/provider.py index 82ad411c..487edc28 100644 --- a/libs/python/computer/computer/providers/docker/provider.py +++ b/libs/python/computer/computer/providers/docker/provider.py @@ -36,7 +36,7 @@ class DockerProvider(BaseVMProvider): """ def __init__( - self, + self, port: Optional[int] = 8000, host: str = "localhost", storage: Optional[str] = None, @@ -47,13 +47,16 @@ class DockerProvider(BaseVMProvider): vnc_port: Optional[int] = 6901, ): """Initialize the Docker VM Provider. - + Args: port: Currently unused (VM provider port) host: Hostname for the API server (default: localhost) storage: Path for persistent VM storage shared_path: Path for shared folder between host and container image: Docker image to use (default: "trycua/cua-ubuntu:latest") + Supported images: + - "trycua/cua-ubuntu:latest" (Kasm-based) + - "trycua/cua-docker-xfce:latest" (vanilla XFCE) verbose: Enable verbose logging ephemeral: Use ephemeral (temporary) storage vnc_port: Port for VNC interface (default: 6901) @@ -62,19 +65,35 @@ class DockerProvider(BaseVMProvider): self.api_port = 8000 self.vnc_port = vnc_port self.ephemeral = ephemeral - + # Handle ephemeral storage (temporary directory) if ephemeral: self.storage = "ephemeral" else: self.storage = storage - + self.shared_path = shared_path self.image = image self.verbose = verbose self._container_id = None self._running_containers = {} # Track running containers by name + + # Detect image type and configure user directory accordingly + self._detect_image_config() + def _detect_image_config(self): + """Detect image type and configure paths accordingly.""" + # Detect if this is a docker-xfce image or Kasm image + if "docker-xfce" in self.image.lower() or "xfce" in self.image.lower(): + self._home_dir = "/home/cua" + self._image_type = "docker-xfce" + logger.info(f"Detected docker-xfce image: using {self._home_dir}") + else: + # Default to Kasm configuration + self._home_dir = "/home/kasm-user" + self._image_type = "kasm" + logger.info(f"Detected Kasm image: using {self._home_dir}") + @property def provider_type(self) -> VMProviderType: """Return the provider type.""" @@ -277,12 +296,13 @@ class DockerProvider(BaseVMProvider): # Add volume mounts if storage is specified storage_path = storage or self.storage if storage_path and storage_path != "ephemeral": - # Mount storage directory - cmd.extend(["-v", f"{storage_path}:/home/kasm-user/storage"]) - + # Mount storage directory using detected home directory + cmd.extend(["-v", f"{storage_path}:{self._home_dir}/storage"]) + # Add shared path if specified if self.shared_path: - cmd.extend(["-v", f"{self.shared_path}:/home/kasm-user/shared"]) + # Mount shared directory using detected home directory + cmd.extend(["-v", f"{self.shared_path}:{self._home_dir}/shared"]) # Add environment variables cmd.extend(["-e", "VNC_PW=password"]) # Set VNC password diff --git a/libs/python/computer/pyproject.toml b/libs/python/computer/pyproject.toml index 6b5b3381..3cf06f41 100644 --- a/libs/python/computer/pyproject.toml +++ b/libs/python/computer/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "pdm.backend" [project] name = "cua-computer" -version = "0.4.7" +version = "0.4.8" description = "Computer-Use Interface (CUI) framework powering Cua" readme = "README.md" authors = [ diff --git a/libs/xfce/.dockerignore b/libs/xfce/.dockerignore new file mode 100644 index 00000000..d4352f88 --- /dev/null +++ b/libs/xfce/.dockerignore @@ -0,0 +1,5 @@ +README.md +.git +.gitignore +*.md +LICENSE diff --git a/libs/xfce/.gitignore b/libs/xfce/.gitignore new file mode 100644 index 00000000..0a2449a1 --- /dev/null +++ b/libs/xfce/.gitignore @@ -0,0 +1,4 @@ +storage/ +shared/ +*.log +.DS_Store diff --git a/libs/xfce/Dockerfile b/libs/xfce/Dockerfile new file mode 100644 index 00000000..d44bdb95 --- /dev/null +++ b/libs/xfce/Dockerfile @@ -0,0 +1,141 @@ +# CUA Docker XFCE Container +# Vanilla XFCE desktop with noVNC and computer-server + +FROM ubuntu:22.04 + +# Avoid prompts from apt +ENV DEBIAN_FRONTEND=noninteractive + +# Set environment variables +ENV HOME=/home/cua +ENV DISPLAY=:1 +ENV VNC_PORT=5901 +ENV NOVNC_PORT=6901 +ENV API_PORT=8000 +ENV VNC_RESOLUTION=1024x768 +ENV VNC_COL_DEPTH=24 + +# Install system dependencies first (including sudo) +RUN apt-get update && apt-get install -y \ + # System utilities + sudo \ + # Desktop environment + xfce4 \ + xfce4-terminal \ + dbus-x11 \ + # VNC server + tigervnc-standalone-server \ + tigervnc-common \ + # noVNC dependencies + python3 \ + python3-pip \ + python3-numpy \ + git \ + net-tools \ + netcat \ + supervisor \ + # Computer-server dependencies + python3-tk \ + python3-dev \ + gnome-screenshot \ + wmctrl \ + ffmpeg \ + socat \ + xclip \ + # Browser + wget \ + software-properties-common \ + # Build tools + build-essential \ + libncursesw5-dev \ + libssl-dev \ + libsqlite3-dev \ + tk-dev \ + libgdbm-dev \ + libc6-dev \ + libbz2-dev \ + libffi-dev \ + zlib1g-dev \ + && rm -rf /var/lib/apt/lists/* + +# Remove screensavers and power manager to avoid popups and lock screens +RUN apt-get remove -y \ + xfce4-power-manager \ + xfce4-power-manager-data \ + xfce4-power-manager-plugins \ + xfce4-screensaver \ + light-locker \ + xscreensaver \ + xscreensaver-data || true + +# Create user after sudo is installed +RUN useradd -m -s /bin/bash -G sudo cua && \ + echo "cua:cua" | chpasswd && \ + echo "cua ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers + +# Install Firefox from Mozilla PPA (snap-free) - inline to avoid script issues +RUN apt-get update && \ + add-apt-repository -y ppa:mozillateam/ppa && \ + echo 'Package: *\nPin: release o=LP-PPA-mozillateam\nPin-Priority: 1001' > /etc/apt/preferences.d/mozilla-firefox && \ + apt-get update && \ + apt-get install -y firefox && \ + echo 'pref("datareporting.policy.firstRunURL", "");\npref("datareporting.policy.dataSubmissionEnabled", false);\npref("datareporting.healthreport.service.enabled", false);\npref("datareporting.healthreport.uploadEnabled", false);\npref("trailhead.firstrun.branches", "nofirstrun-empty");\npref("browser.aboutwelcome.enabled", false);' > /usr/lib/firefox/browser/defaults/preferences/firefox.js && \ + update-alternatives --install /usr/bin/x-www-browser x-www-browser /usr/bin/firefox 100 && \ + update-alternatives --install /usr/bin/gnome-www-browser gnome-www-browser /usr/bin/firefox 100 && \ + rm -rf /var/lib/apt/lists/* + +# Install noVNC +RUN git clone https://github.com/novnc/noVNC.git /opt/noVNC && \ + git clone https://github.com/novnc/websockify /opt/noVNC/utils/websockify && \ + ln -s /opt/noVNC/vnc.html /opt/noVNC/index.html + +# Pre-create cache directory with correct ownership before pip install +RUN mkdir -p /home/cua/.cache && \ + chown -R cua:cua /home/cua/.cache + +# Install computer-server +RUN pip3 install cua-computer-server + +# Fix any cache files created by pip +RUN chown -R cua:cua /home/cua/.cache + +# Copy startup scripts +COPY src/supervisor/ /etc/supervisor/conf.d/ +COPY src/scripts/ /usr/local/bin/ + +# Make scripts executable +RUN chmod +x /usr/local/bin/*.sh + +# Setup VNC +USER cua +WORKDIR /home/cua + +# Create VNC directory (no password needed with SecurityTypes None) +RUN mkdir -p $HOME/.vnc + +# Configure XFCE for first start +RUN mkdir -p $HOME/.config/xfce4/xfconf/xfce-perchannel-xml $HOME/.config/xfce4 $HOME/.config/autostart + +# Copy XFCE config to disable browser launching and welcome screens +COPY --chown=cua:cua src/xfce-config/helpers.rc $HOME/.config/xfce4/helpers.rc +COPY --chown=cua:cua src/xfce-config/xfce4-session.xml $HOME/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-session.xml +COPY --chown=cua:cua src/xfce-config/xfce4-power-manager.xml $HOME/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-power-manager.xml + +# Disable autostart for screensaver, lock screen, and power manager +RUN echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/xfce4-tips-autostart.desktop && \ + echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/xfce4-screensaver.desktop && \ + echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/light-locker.desktop && \ + echo "[Desktop Entry]\nHidden=true" > $HOME/.config/autostart/xfce4-power-manager.desktop && \ + chown -R cua:cua $HOME/.config + +# Create storage and shared directories, and Firefox cache directory +RUN mkdir -p $HOME/storage $HOME/shared $HOME/.cache/dconf $HOME/.mozilla/firefox && \ + chown -R cua:cua $HOME/storage $HOME/shared $HOME/.cache $HOME/.mozilla $HOME/.vnc + +USER root + +# Expose ports +EXPOSE $VNC_PORT $NOVNC_PORT $API_PORT + +# Start services via supervisor +CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"] diff --git a/libs/xfce/README.md b/libs/xfce/README.md new file mode 100644 index 00000000..9ecdff00 --- /dev/null +++ b/libs/xfce/README.md @@ -0,0 +1,261 @@ +# CUA Docker XFCE Container + +Vanilla XFCE desktop container for Computer-Using Agents (CUA) with noVNC and computer-server. This is a lightweight alternative to the Kasm-based container with minimal dependencies. + +## Features + +- Ubuntu 22.04 (Jammy) with vanilla XFCE desktop environment +- TigerVNC server for remote desktop access +- noVNC for web-based VNC access (no client required) +- Pre-installed computer-server for remote computer control +- Python 3.11 with necessary libraries +- Screen capture tools (gnome-screenshot, wmctrl, ffmpeg) +- Clipboard utilities (xclip, socat) +- Firefox browser with telemetry disabled + +## Architecture + +``` +┌─────────────────────────────────────────┐ +│ Docker Container (Ubuntu 22.04) │ +├─────────────────────────────────────────┤ +│ XFCE Desktop Environment │ +│ ├── Firefox │ +│ ├── XFCE Terminal │ +│ └── Desktop utilities │ +├─────────────────────────────────────────┤ +│ TigerVNC Server (Port 5901) │ +│ └── X11 Display :1 │ +├─────────────────────────────────────────┤ +│ noVNC Web Interface (Port 6901) │ +│ └── WebSocket proxy to VNC │ +├─────────────────────────────────────────┤ +│ CUA Computer Server (Port 8000) │ +│ └── WebSocket API for automation │ +└─────────────────────────────────────────┘ +``` + +## Building the Container + +```bash +docker build -t cua-docker-xfce:latest . +``` + +## Pushing to Registry + +```bash +# Tag for Docker Hub (replace 'trycua' with your Docker Hub username) +docker tag cua-docker-xfce:latest trycua/cua-docker-xfce:latest + +# Login to Docker Hub +docker login + +# Push to Docker Hub +docker push trycua/cua-docker-xfce:latest +``` + +## Running the Container Manually + +### Basic Usage + +```bash +docker run --rm -it \ + --shm-size=512m \ + -p 5901:5901 \ + -p 6901:6901 \ + -p 8000:8000 \ + cua-docker-xfce:latest +``` + +### With Custom Resolution + +```bash +docker run --rm -it \ + --shm-size=512m \ + -p 5901:5901 \ + -p 6901:6901 \ + -p 8000:8000 \ + -e VNC_RESOLUTION=1280x720 \ + cua-docker-xfce:latest +``` + +### With Persistent Storage + +```bash +docker run --rm -it \ + --shm-size=512m \ + -p 5901:5901 \ + -p 6901:6901 \ + -p 8000:8000 \ + -v $(pwd)/storage:/home/cua/storage \ + cua-docker-xfce:latest +``` + +## Accessing the Container + +- **noVNC Web Interface**: Open `http://localhost:6901` in your browser (no password required) +- **VNC Client**: Connect to `localhost:5901` (no password required) +- **Computer Server API**: Available at `http://localhost:8000` + +## Using with CUA Docker Provider + +This container is designed to work with the CUA Docker provider. Simply specify the docker-xfce image: + +```python +from computer import Computer + +# Create computer with docker-xfce container +computer = Computer( + os_type="linux", + provider_type="docker", + image="trycua/cua-docker-xfce:latest", # Use docker-xfce instead of Kasm + display="1024x768", + memory="4GB", + cpu="2" +) + +# Use the computer +async with computer: + # Take a screenshot + screenshot = await computer.interface.screenshot() + + # Click and type + await computer.interface.left_click(100, 100) + await computer.interface.type_text("Hello from CUA!") + + # Run commands + result = await computer.interface.run_command("ls -la") + print(result.stdout) +``` + +### Switching between Kasm and docker-xfce + +The Docker provider automatically detects which image you're using: + +```python +# Use Kasm-based container (default for Linux) +computer_kasm = Computer( + os_type="linux", + provider_type="docker", + image="trycua/cua-ubuntu:latest", # Kasm image +) + +# Use docker-xfce container (vanilla XFCE) +computer_xfce = Computer( + os_type="linux", + provider_type="docker", + image="trycua/cua-docker-xfce:latest", # docker-xfce image +) +``` + +Both provide the same API and functionality - the provider automatically configures the correct paths and settings based on the image. + +## Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `VNC_RESOLUTION` | `1024x768` | Screen resolution | +| `VNC_COL_DEPTH` | `24` | Color depth | +| `VNC_PORT` | `5901` | VNC server port | +| `NOVNC_PORT` | `6901` | noVNC web interface port | +| `API_PORT` | `8000` | Computer-server API port | +| `DISPLAY` | `:1` | X11 display number | + +## Exposed Ports + +- **5901**: TigerVNC server +- **6901**: noVNC web interface +- **8000**: Computer-server WebSocket API + +## Volume Mount Points + +- `/home/cua/storage`: Persistent storage mount point +- `/home/cua/shared`: Shared folder mount point + +## User Credentials + +- **Username**: `cua` +- **Password**: `password` (for shell login only) +- **Sudo access**: Enabled without password +- **VNC access**: No password required + +## Creating Snapshots + +### Filesystem Snapshot +```bash +docker commit cua-docker-xfce-snapshot:latest +``` + +### Running from Snapshot +```bash +docker run --rm -it \ + --shm-size=512m \ + -p 6901:6901 \ + -p 8000:8000 \ + cua-docker-xfce-snapshot:latest +``` + +## Comparison with Kasm Container + +| Feature | Kasm Container | Docker XFCE Container | +|---------|---------------|----------------------| +| Base Image | KasmWeb Ubuntu | Vanilla Ubuntu | +| VNC Server | KasmVNC | TigerVNC | +| Dependencies | Higher | Lower | +| Configuration | Pre-configured | Minimal | +| Size | Larger | Smaller | +| Maintenance | Depends on Kasm | Independent | + +## Process Management + +The container uses `supervisord` to manage three main processes: + +1. **VNC Server** (Priority 10): TigerVNC with XFCE desktop +2. **noVNC** (Priority 20): WebSocket proxy for browser access +3. **Computer Server** (Priority 30): CUA automation API + +All processes are automatically restarted on failure. + +## Troubleshooting + +### VNC server won't start +Check if X11 lock files exist: +```bash +docker exec rm -rf /tmp/.X1-lock /tmp/.X11-unix/X1 +``` + +### noVNC shows black screen +Ensure VNC server is running: +```bash +docker exec supervisorctl status vncserver +``` + +### Computer-server not responding +Check if X server is accessible: +```bash +docker exec env DISPLAY=:1 xdpyinfo +``` + +### View logs +```bash +docker exec tail -f /var/log/supervisor/supervisord.log +docker exec supervisorctl status +``` + +## Integration with CUA System + +This container provides the same functionality as the Kasm container but with: +- **Reduced dependencies**: No reliance on KasmWeb infrastructure +- **Smaller image size**: Minimal base configuration +- **Full control**: Direct access to all components +- **Easy customization**: Simple to modify and extend + +The container integrates seamlessly with: +- CUA Computer library (via WebSocket API) +- Docker provider for lifecycle management +- Standard VNC clients for debugging +- Web browsers for visual monitoring + +## License + +MIT License - See LICENSE file for details diff --git a/libs/xfce/src/scripts/resize-display.sh b/libs/xfce/src/scripts/resize-display.sh new file mode 100644 index 00000000..ea663dce --- /dev/null +++ b/libs/xfce/src/scripts/resize-display.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Dynamic display resolution script +# Can be called to change the VNC display resolution + +RESOLUTION=${1:-1920x1080} + +# Wait for display to be ready +for i in {1..10}; do + if DISPLAY=:1 xdpyinfo >/dev/null 2>&1; then + break + fi + sleep 1 +done + +# Change resolution using xrandr +DISPLAY=:1 xrandr --output VNC-0 --mode "$RESOLUTION" 2>/dev/null || \ +DISPLAY=:1 xrandr --fb "$RESOLUTION" 2>/dev/null || \ +echo "Failed to set resolution to $RESOLUTION" + +echo "Display resolution set to: $RESOLUTION" diff --git a/libs/xfce/src/scripts/start-computer-server.sh b/libs/xfce/src/scripts/start-computer-server.sh new file mode 100644 index 00000000..bc27a3db --- /dev/null +++ b/libs/xfce/src/scripts/start-computer-server.sh @@ -0,0 +1,13 @@ +#!/bin/bash +set -e + +# Wait for X server to be ready +echo "Waiting for X server to start..." +while ! xdpyinfo -display :1 >/dev/null 2>&1; do + sleep 1 +done +echo "X server is ready" + +# Start computer-server +export DISPLAY=:1 +python3 -m computer_server --port ${API_PORT:-8000} diff --git a/libs/xfce/src/scripts/start-novnc.sh b/libs/xfce/src/scripts/start-novnc.sh new file mode 100644 index 00000000..07894acb --- /dev/null +++ b/libs/xfce/src/scripts/start-novnc.sh @@ -0,0 +1,12 @@ +#!/bin/bash +set -e + +# Give VNC a moment to start (supervisor starts it with priority 10, this is priority 20) +echo "Waiting for VNC server to start..." +sleep 5 + +# Start noVNC +cd /opt/noVNC +/opt/noVNC/utils/novnc_proxy \ + --vnc localhost:${VNC_PORT:-5901} \ + --listen ${NOVNC_PORT:-6901} diff --git a/libs/xfce/src/scripts/start-vnc.sh b/libs/xfce/src/scripts/start-vnc.sh new file mode 100644 index 00000000..934e6d3c --- /dev/null +++ b/libs/xfce/src/scripts/start-vnc.sh @@ -0,0 +1,23 @@ +#!/bin/bash +set -e + +# Clean up any existing VNC lock files +rm -rf /tmp/.X1-lock /tmp/.X11-unix/X1 + +# Start VNC server without password authentication +vncserver :1 \ + -geometry ${VNC_RESOLUTION:-1920x1080} \ + -depth ${VNC_COL_DEPTH:-24} \ + -rfbport ${VNC_PORT:-5901} \ + -localhost no \ + -SecurityTypes None \ + -AlwaysShared \ + -AcceptPointerEvents \ + -AcceptKeyEvents \ + -AcceptCutText \ + -SendCutText \ + -xstartup /usr/local/bin/xstartup.sh \ + --I-KNOW-THIS-IS-INSECURE + +# Keep the process running +tail -f /home/cua/.vnc/*.log diff --git a/libs/xfce/src/scripts/xstartup.sh b/libs/xfce/src/scripts/xstartup.sh new file mode 100644 index 00000000..49bb46a2 --- /dev/null +++ b/libs/xfce/src/scripts/xstartup.sh @@ -0,0 +1,21 @@ +#!/bin/bash +set -e + +# Start D-Bus +if [ -z "$DBUS_SESSION_BUS_ADDRESS" ]; then + eval $(dbus-launch --sh-syntax --exit-with-session) +fi + +# Start XFCE +startxfce4 & + +# Wait for XFCE to start +sleep 2 + +# Disable screensaver and power management +xset s off +xset -dpms +xset s noblank + +# Wait for the session +wait diff --git a/libs/xfce/src/supervisor/supervisord.conf b/libs/xfce/src/supervisor/supervisord.conf new file mode 100644 index 00000000..fb367c4f --- /dev/null +++ b/libs/xfce/src/supervisor/supervisord.conf @@ -0,0 +1,30 @@ +[supervisord] +nodaemon=true +user=root +logfile=/var/log/supervisor/supervisord.log +pidfile=/var/run/supervisord.pid +childlogdir=/var/log/supervisor + +[program:vncserver] +command=/usr/local/bin/start-vnc.sh +user=cua +autorestart=true +stdout_logfile=/var/log/supervisor/vncserver.log +stderr_logfile=/var/log/supervisor/vncserver.error.log +priority=10 + +[program:novnc] +command=/usr/local/bin/start-novnc.sh +user=cua +autorestart=true +stdout_logfile=/var/log/supervisor/novnc.log +stderr_logfile=/var/log/supervisor/novnc.error.log +priority=20 + +[program:computer-server] +command=/usr/local/bin/start-computer-server.sh +user=cua +autorestart=true +stdout_logfile=/var/log/supervisor/computer-server.log +stderr_logfile=/var/log/supervisor/computer-server.error.log +priority=30 diff --git a/libs/xfce/src/xfce-config/helpers.rc b/libs/xfce/src/xfce-config/helpers.rc new file mode 100644 index 00000000..b2270633 --- /dev/null +++ b/libs/xfce/src/xfce-config/helpers.rc @@ -0,0 +1,2 @@ +# XFCE preferred applications - set Firefox as default browser +WebBrowser=firefox diff --git a/libs/xfce/src/xfce-config/xfce4-power-manager.xml b/libs/xfce/src/xfce-config/xfce4-power-manager.xml new file mode 100644 index 00000000..56447c1e --- /dev/null +++ b/libs/xfce/src/xfce-config/xfce4-power-manager.xml @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + diff --git a/libs/xfce/src/xfce-config/xfce4-session.xml b/libs/xfce/src/xfce-config/xfce4-session.xml new file mode 100644 index 00000000..5af36711 --- /dev/null +++ b/libs/xfce/src/xfce-config/xfce4-session.xml @@ -0,0 +1,55 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +