From cb23ba49ef6d64a568164f512eb38b9bc70bd8cc Mon Sep 17 00:00:00 2001 From: Morgan Dean Date: Wed, 11 Jun 2025 15:22:05 -0400 Subject: [PATCH] Fix styling, fix images --- docs/.prettierrc | 14 +++ docs/biome.json | 86 ------------------ docs/content/docs/libraries/agent/index.mdx | 74 +++++++++++---- .../docs/libraries/computer-server/index.mdx | 41 +++++++-- .../content/docs/libraries/computer/index.mdx | 71 ++++++++++++--- docs/content/docs/libraries/core/index.mdx | 51 +++++++++++ docs/content/docs/libraries/index.mdx | 2 +- .../docs/libraries/lume/API-Reference.mdx | 39 +++++--- .../docs/libraries/lume/Development.mdx | 4 +- docs/content/docs/libraries/lume/FAQ.mdx | 5 + docs/content/docs/libraries/lume/index.mdx | 53 ++++++++--- docs/content/docs/libraries/lumier/index.mdx | 48 ++++++++-- .../docs/libraries/mcp-server/index.mdx | 69 ++++++++++---- docs/content/docs/libraries/pylume/index.mdx | 42 +++++++-- docs/content/docs/libraries/som/index.mdx | 51 +++++++++-- .../libraries/agent => public/img}/agent.png | Bin .../agent => public/img}/agent_gradio_ui.png | Bin .../libraries/lume => public/img}/cli.png | Bin .../computer => public/img}/computer.png | Bin 19 files changed, 454 insertions(+), 196 deletions(-) create mode 100644 docs/.prettierrc delete mode 100644 docs/biome.json create mode 100644 docs/content/docs/libraries/core/index.mdx rename docs/{content/docs/libraries/agent => public/img}/agent.png (100%) rename docs/{content/docs/libraries/agent => public/img}/agent_gradio_ui.png (100%) rename docs/{content/docs/libraries/lume => public/img}/cli.png (100%) rename docs/{content/docs/libraries/computer => public/img}/computer.png (100%) diff --git a/docs/.prettierrc b/docs/.prettierrc new file mode 100644 index 00000000..4ab8c475 --- /dev/null +++ b/docs/.prettierrc @@ -0,0 +1,14 @@ +{ + "printWidth": 80, + "tabWidth": 2, + "useTabs": false, + "semi": true, + "singleQuote": true, + "quoteProps": "as-needed", + "jsxSingleQuote": false, + "trailingComma": "es5", + "bracketSpacing": true, + "bracketSameLine": false, + "arrowParens": "always", + "endOfLine": "lf" +} \ No newline at end of file diff --git a/docs/biome.json b/docs/biome.json deleted file mode 100644 index 1ce2e363..00000000 --- a/docs/biome.json +++ /dev/null @@ -1,86 +0,0 @@ -{ - "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json", - "vcs": { - "enabled": false, - "clientKind": "git", - "useIgnoreFile": false - }, - "files": { - "ignoreUnknown": false, - "ignore": [ - ".next", - "build" - ] - }, - "formatter": { - "enabled": true, - "useEditorconfig": true, - "formatWithErrors": false, - "indentStyle": "space", - "indentWidth": 2, - "lineEnding": "lf", - "lineWidth": 80, - "attributePosition": "auto", - "bracketSpacing": true - }, - "organizeImports": { - "enabled": true - }, - "linter": { - "enabled": true, - "rules": { - "recommended": true, - "style": { - "useSelfClosingElements": "warn", - "noUnusedTemplateLiteral": "warn", - "noNonNullAssertion": "off" - }, - "a11y": { - "useMediaCaption": "off", - "useKeyWithClickEvents": "warn", - "useKeyWithMouseEvents": "warn", - "noSvgWithoutTitle": "off", - "useButtonType": "warn", - "noAutofocus": "off" - }, - "suspicious": { - "noArrayIndexKey": "off" - }, - "correctness": { - "noUnusedVariables": "warn", - "noUnusedFunctionParameters": "warn", - "noUnusedImports": "warn" - }, - "complexity": { - "useOptionalChain": "info" - }, - "nursery": { - "useSortedClasses": { - "level": "warn", - "fix": "safe", - "options": { - "attributes": [ - "className" - ], - "functions": [ - "cn" - ] - } - } - } - } - }, - "javascript": { - "formatter": { - "jsxQuoteStyle": "double", - "quoteProperties": "asNeeded", - "trailingCommas": "es5", - "semicolons": "always", - "arrowParentheses": "always", - "bracketSameLine": false, - "quoteStyle": "single", - "attributePosition": "auto", - "bracketSpacing": true - } - } -} \ No newline at end of file diff --git a/docs/content/docs/libraries/agent/index.mdx b/docs/content/docs/libraries/agent/index.mdx index 2bde81fe..69dbdeab 100644 --- a/docs/content/docs/libraries/agent/index.mdx +++ b/docs/content/docs/libraries/agent/index.mdx @@ -2,11 +2,40 @@ title: Agent --- -
-Python -macOS -Discord -PyPI +
+ + Python + + + macOS + + + Discord + + + PyPI +
**cua-agent** is a general Computer-Use framework for running multi-app agentic workflows targeting macOS and Linux sandbox created with Cua, supporting local (Ollama) and cloud model providers (OpenAI, Anthropic, Groq, DeepSeek, Qwen). @@ -14,7 +43,7 @@ title: Agent ### Get started with Agent
- +
## Install @@ -80,7 +109,7 @@ Refer to these notebooks for step-by-step guides on how to use the Computer-Use The agent includes a Gradio-based user interface for easier interaction.
- +
To use it: @@ -119,14 +148,16 @@ Without these environment variables, the UI will show "No models available" for ### Using Local Models -You can use local models with the OMNI loop provider by selecting "Custom model..." from the dropdown. The default provider URL is set to `http://localhost:1234/v1` which works with LM Studio. +You can use local models with the OMNI loop provider by selecting "Custom model..." from the dropdown. The default provider URL is set to `http://localhost:1234/v1` which works with LM Studio. If you're using a different local model server: + - vLLM: `http://localhost:8000/v1` - LocalAI: `http://localhost:8080/v1` - Ollama with OpenAI compat API: `http://localhost:11434/v1` The Gradio UI provides: + - Selection of different agent loops (OpenAI, Anthropic, OMNI) - Model selection for each provider - Configuration of agent parameters @@ -137,6 +168,7 @@ The Gradio UI provides: The UI-TARS models are available in two forms: 1. **MLX UI-TARS models** (Default): These models run locally using MLXVLM provider + - `mlx-community/UI-TARS-1.5-7B-4bit` (default) - 4-bit quantized version - `mlx-community/UI-TARS-1.5-7B-6bit` - 6-bit quantized version for higher quality @@ -149,14 +181,15 @@ The UI-TARS models are available in two forms: ``` 2. **OpenAI-compatible UI-TARS**: For using the original ByteDance model + - If you want to use the original ByteDance UI-TARS model via an OpenAI-compatible API, follow the [deployment guide](https://github.com/bytedance/UI-TARS/blob/main/README_deploy.md) - This will give you a provider URL like `https://**************.us-east-1.aws.endpoints.huggingface.cloud/v1` which you can use in the code or Gradio UI: - ```python + ```python agent = ComputerAgent( computer=macos_computer, loop=AgentLoop.UITARS, - model=LLM(provider=LLMProvider.OAICOMPAT, name="tgi", + model=LLM(provider=LLMProvider.OAICOMPAT, name="tgi", provider_base_url="https://**************.us-east-1.aws.endpoints.huggingface.cloud/v1") ) ``` @@ -165,14 +198,15 @@ The UI-TARS models are available in two forms: The `cua-agent` package provides three agent loops variations, based on different CUA models providers and techniques: -| Agent Loop | Supported Models | Description | Set-Of-Marks | -|:-----------|:-----------------|:------------|:-------------| -| `AgentLoop.OPENAI` | • `computer_use_preview` | Use OpenAI Operator CUA model | Not Required | -| `AgentLoop.ANTHROPIC` | • `claude-3-5-sonnet-20240620`
• `claude-3-7-sonnet-20250219` | Use Anthropic Computer-Use | Not Required | -| `AgentLoop.UITARS` | • `mlx-community/UI-TARS-1.5-7B-4bit` (default)
• `mlx-community/UI-TARS-1.5-7B-6bit`
• `ByteDance-Seed/UI-TARS-1.5-7B` (via openAI-compatible endpoint) | Uses UI-TARS models with MLXVLM (default) or OAICOMPAT providers | Not Required | -| `AgentLoop.OMNI` | • `claude-3-5-sonnet-20240620`
• `claude-3-7-sonnet-20250219`
• `gpt-4.5-preview`
• `gpt-4o`
• `gpt-4`
• `phi4`
• `phi4-mini`
• `gemma3`
• `...`
• `Any Ollama or OpenAI-compatible model` | Use OmniParser for element pixel-detection (SoM) and any VLMs for UI Grounding and Reasoning | OmniParser | +| Agent Loop | Supported Models | Description | Set-Of-Marks | +| :-------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------- | :----------- | +| `AgentLoop.OPENAI` | • `computer_use_preview` | Use OpenAI Operator CUA model | Not Required | +| `AgentLoop.ANTHROPIC` | • `claude-3-5-sonnet-20240620`
• `claude-3-7-sonnet-20250219` | Use Anthropic Computer-Use | Not Required | +| `AgentLoop.UITARS` | • `mlx-community/UI-TARS-1.5-7B-4bit` (default)
• `mlx-community/UI-TARS-1.5-7B-6bit`
• `ByteDance-Seed/UI-TARS-1.5-7B` (via openAI-compatible endpoint) | Uses UI-TARS models with MLXVLM (default) or OAICOMPAT providers | Not Required | +| `AgentLoop.OMNI` | • `claude-3-5-sonnet-20240620`
• `claude-3-7-sonnet-20250219`
• `gpt-4.5-preview`
• `gpt-4o`
• `gpt-4`
• `phi4`
• `phi4-mini`
• `gemma3`
• `...`
• `Any Ollama or OpenAI-compatible model` | Use OmniParser for element pixel-detection (SoM) and any VLMs for UI Grounding and Reasoning | OmniParser | ## AgentResponse + The `AgentResponse` class represents the structured output returned after each agent turn. It contains the agent's response, reasoning, tool usage, and other metadata. The response format aligns with the new [OpenAI Agent SDK specification](https://platform.openai.com/docs/api-reference/responses) for better consistency across different agent loops. ```python @@ -213,7 +247,7 @@ async for result in agent.run(task): **Note on Settings Persistence:** -* The Gradio UI automatically saves your configuration (Agent Loop, Model Choice, Custom Base URL, Save Trajectory state, Recent Images count) to a file named `.gradio_settings.json` in the project's root directory when you successfully run a task. -* This allows your preferences to persist between sessions. -* API keys entered into the custom provider field are **not** saved in this file for security reasons. Manage API keys using environment variables (e.g., `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`) or a `.env` file. -* It's recommended to add `.gradio_settings.json` to your `.gitignore` file. +- The Gradio UI automatically saves your configuration (Agent Loop, Model Choice, Custom Base URL, Save Trajectory state, Recent Images count) to a file named `.gradio_settings.json` in the project's root directory when you successfully run a task. +- This allows your preferences to persist between sessions. +- API keys entered into the custom provider field are **not** saved in this file for security reasons. Manage API keys using environment variables (e.g., `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`) or a `.env` file. +- It's recommended to add `.gradio_settings.json` to your `.gitignore` file. diff --git a/docs/content/docs/libraries/computer-server/index.mdx b/docs/content/docs/libraries/computer-server/index.mdx index a6c88d7e..ac158e12 100644 --- a/docs/content/docs/libraries/computer-server/index.mdx +++ b/docs/content/docs/libraries/computer-server/index.mdx @@ -2,11 +2,40 @@ title: Computer Server --- -
-Python -macOS -Discord -PyPI +
+ + Python + + + macOS + + + Discord + + + PyPI +
**Computer Server** is the server component for the Computer-Use Interface (CUI) framework powering Cua for interacting with local macOS and Linux sandboxes, PyAutoGUI-compatible, and pluggable with any AI agent systems (Cua, Langchain, CrewAI, AutoGen). @@ -29,4 +58,4 @@ pip install cua-computer-server Refer to this notebook for a step-by-step guide on how to use the Computer-Use Server on the host system or VM: -- [Computer-Use Server](https://github.com/trycua/cua/tree/main/notebooks/samples/computer_server_nb.ipynb) \ No newline at end of file +- [Computer-Use Server](https://github.com/trycua/cua/tree/main/notebooks/samples/computer_server_nb.ipynb) diff --git a/docs/content/docs/libraries/computer/index.mdx b/docs/content/docs/libraries/computer/index.mdx index 6fb87bf8..d10cb255 100644 --- a/docs/content/docs/libraries/computer/index.mdx +++ b/docs/content/docs/libraries/computer/index.mdx @@ -2,11 +2,40 @@ title: Computer --- -
-Python -macOS -Discord -PyPI +
+ + Python + + + macOS + + + Discord + + + PyPI +
**cua-computer** is a Computer-Use Interface (CUI) framework powering Cua for interacting with local macOS and Linux sandboxes, PyAutoGUI-compatible, and pluggable with any AI agent systems (Cua, Langchain, CrewAI, AutoGen). Computer relies on [Lume](https://github.com/trycua/lume) for creating and managing sandbox environments. @@ -14,7 +43,7 @@ title: Computer ### Get started with Computer
- +
```python @@ -23,11 +52,11 @@ from computer import Computer computer = Computer(os_type="macos", display="1024x768", memory="8GB", cpu="4") try: await computer.run() - + screenshot = await computer.interface.screenshot() with open("screenshot.png", "wb") as f: f.write(screenshot) - + await computer.interface.move_cursor(100, 100) await computer.interface.left_click() await computer.interface.right_click(300, 300) @@ -100,8 +129,12 @@ For examples, see [Computer UI Examples](https://github.com/trycua/cua/tree/main #### 3. Record Your Tasks
-View demonstration video - + View demonstration video +
Record yourself performing various computer tasks using the UI. @@ -109,8 +142,12 @@ Record yourself performing various computer tasks using the UI. #### 4. Save Your Demonstrations
-View demonstration video - + View demonstration video +
Save each task by picking a descriptive name and adding relevant tags (e.g., "office", "web-browsing", "coding"). @@ -122,11 +159,16 @@ Repeat steps 3 and 4 until you have a good amount of demonstrations covering dif #### 6. Upload to Huggingface
-View demonstration video - + View demonstration video +
Upload your dataset to Huggingface by: + - Naming it as `{your_username}/{dataset_name}` - Choosing public or private visibility - Optionally selecting specific tags to upload only tasks with certain tags @@ -135,4 +177,3 @@ Upload your dataset to Huggingface by: - Example Dataset: [ddupont/test-dataset](https://huggingface.co/datasets/ddupont/test-dataset) - Find Community Datasets: 🔍 [Browse CUA Datasets on Huggingface](https://huggingface.co/datasets?other=cua) - diff --git a/docs/content/docs/libraries/core/index.mdx b/docs/content/docs/libraries/core/index.mdx new file mode 100644 index 00000000..cd94c4d2 --- /dev/null +++ b/docs/content/docs/libraries/core/index.mdx @@ -0,0 +1,51 @@ +--- +title: c/ua Core +--- + +
+ + Python + + + macOS + + + Discord + + + PyPI + +
+ +**Cua Core** provides essential shared functionality and utilities used across the Cua ecosystem: + +- Privacy-focused telemetry system for transparent usage analytics +- Common helper functions and utilities used by other Cua packages +- Core infrastructure components shared between modules + +## Installation + +```bash +pip install cua-core +``` diff --git a/docs/content/docs/libraries/index.mdx b/docs/content/docs/libraries/index.mdx index 4e050d9a..507bd8c5 100644 --- a/docs/content/docs/libraries/index.mdx +++ b/docs/content/docs/libraries/index.mdx @@ -5,4 +5,4 @@ description: Libraries ## Libraries -The CUA project provides several libraries for building Computer-Use AI agents. \ No newline at end of file +The CUA project provides several libraries for building Computer-Use AI agents. diff --git a/docs/content/docs/libraries/lume/API-Reference.mdx b/docs/content/docs/libraries/lume/API-Reference.mdx index 14b014dc..91a5538b 100644 --- a/docs/content/docs/libraries/lume/API-Reference.mdx +++ b/docs/content/docs/libraries/lume/API-Reference.mdx @@ -23,6 +23,7 @@ curl --connect-timeout 6000 \ }' \ http://localhost:7777/lume/vms ``` +
@@ -53,6 +54,7 @@ curl --connect-timeout 6000 \ }' \ http://localhost:7777/lume/vms/lume_vm/run ``` +
@@ -63,6 +65,7 @@ curl --connect-timeout 6000 \ --max-time 5000 \ http://localhost:7777/lume/vms ``` + ``` [ { @@ -83,6 +86,7 @@ curl --connect-timeout 6000 \ } ] ``` +
@@ -99,6 +103,7 @@ curl --connect-timeout 6000 \ --max-time 5000 \ http://localhost:7777/lume/vms/lume_vm?storage=ssd ``` + ``` { "name": "lume_vm", @@ -109,6 +114,7 @@ curl --connect-timeout 6000 \ "diskSize": "64GB" } ``` +
@@ -127,6 +133,7 @@ curl --connect-timeout 6000 \ }' \ http://localhost:7777/lume/vms/my-vm-name ``` +
@@ -145,6 +152,7 @@ curl --connect-timeout 6000 \ -X POST \ http://localhost:7777/lume/vms/my-vm-name/stop?storage=ssd ``` +
@@ -163,6 +171,7 @@ curl --connect-timeout 6000 \ -X DELETE \ http://localhost:7777/lume/vms/my-vm-name?storage=ssd ``` +
@@ -194,6 +203,7 @@ curl --connect-timeout 6000 \ }' \ http://localhost:7777/lume/pull ``` +
@@ -206,15 +216,15 @@ curl --connect-timeout 6000 \ -X POST \ -H "Content-Type: application/json" \ -d '{ - "name": "my-local-vm", + "name": "my-local-vm", "imageName": "my-image", "tags": ["latest", "v1"], - "organization": "my-org", + "organization": "my-org", "registry": "ghcr.io", "chunkSizeMb": 512, - "storage": null + "storage": null }' \ - http://localhost:7777/lume/vms/push + http://localhost:7777/lume/vms/push ``` **Response (202 Accepted):** @@ -224,12 +234,10 @@ curl --connect-timeout 6000 \ "message": "Push initiated in background", "name": "my-local-vm", "imageName": "my-image", - "tags": [ - "latest", - "v1" - ] + "tags": ["latest", "v1"] } ``` +
@@ -248,6 +256,7 @@ curl --connect-timeout 6000 \ }' \ http://localhost:7777/lume/vms/clone ``` +
@@ -258,6 +267,7 @@ curl --connect-timeout 6000 \ --max-time 5000 \ http://localhost:7777/lume/ipsw ``` +
@@ -272,12 +282,10 @@ curl --connect-timeout 6000 \ ```json { - "local": [ - "macos-sequoia-xcode:latest", - "macos-sequoia-vanilla:latest" - ] + "local": ["macos-sequoia-xcode:latest", "macos-sequoia-vanilla:latest"] } ``` +
@@ -289,6 +297,7 @@ curl --connect-timeout 6000 \ -X POST \ http://localhost:7777/lume/prune ``` +
@@ -307,6 +316,7 @@ curl --connect-timeout 6000 \ "cachingEnabled": true } ``` +
@@ -324,6 +334,7 @@ curl --connect-timeout 6000 \ }' \ http://localhost:7777/lume/config ``` +
@@ -349,6 +360,7 @@ curl --connect-timeout 6000 \ } ] ``` +
@@ -365,6 +377,7 @@ curl --connect-timeout 6000 \ }' \ http://localhost:7777/lume/config/locations ``` +
@@ -376,6 +389,7 @@ curl --connect-timeout 6000 \ -X DELETE \ http://localhost:7777/lume/config/locations/ssd ``` +
@@ -387,4 +401,5 @@ curl --connect-timeout 6000 \ -X POST \ http://localhost:7777/lume/config/locations/default/ssd ``` +
diff --git a/docs/content/docs/libraries/lume/Development.mdx b/docs/content/docs/libraries/lume/Development.mdx index 6c6edd70..ce50b8b9 100644 --- a/docs/content/docs/libraries/lume/Development.mdx +++ b/docs/content/docs/libraries/lume/Development.mdx @@ -1,6 +1,7 @@ --- title: Development Guide --- + # Development Guide This guide will help you set up your development environment and understand the process for contributing code to lume. @@ -8,6 +9,7 @@ This guide will help you set up your development environment and understand the ## Environment Setup Lume development requires: + - Swift 6 or higher - Xcode 15 or higher - macOS Sequoia 15.2 or higher @@ -16,7 +18,7 @@ Lume development requires: ## Setting Up the Repository Locally 1. **Fork the Repository**: Create your own fork of lume -2. **Clone the Repository**: +2. **Clone the Repository**: ```bash git clone https://github.com/trycua/lume.git cd lume diff --git a/docs/content/docs/libraries/lume/FAQ.mdx b/docs/content/docs/libraries/lume/FAQ.mdx index 890f1d68..e4152261 100644 --- a/docs/content/docs/libraries/lume/FAQ.mdx +++ b/docs/content/docs/libraries/lume/FAQ.mdx @@ -1,6 +1,7 @@ --- title: FAQs --- + # FAQs ### Where are the VMs stored? @@ -18,10 +19,12 @@ Lume follows the XDG Base Directory specification for the configuration file: - Configuration is stored in `$XDG_CONFIG_HOME/lume/config.yaml` (defaults to `~/.config/lume/config.yaml`) By default, other data is stored in: + - VM data: `~/.lume` - Cache files: `~/.lume/cache` The config file contains settings for: + - VM storage locations and the default location - Cache directory location - Whether caching is enabled @@ -89,6 +92,7 @@ lume delete ### How to Install macOS from an IPSW Image #### Create a new macOS VM using the latest supported IPSW image: + Run the following command to create a new macOS virtual machine using the latest available IPSW image: ```bash @@ -96,6 +100,7 @@ lume create --os macos --ipsw latest ``` #### Create a new macOS VM using a specific IPSW image: + To create a macOS virtual machine from an older or specific IPSW file, first download the desired IPSW (UniversalMac) from a trusted source. Then, use the downloaded IPSW path: diff --git a/docs/content/docs/libraries/lume/index.mdx b/docs/content/docs/libraries/lume/index.mdx index 982218b8..405b8e4b 100644 --- a/docs/content/docs/libraries/lume/index.mdx +++ b/docs/content/docs/libraries/lume/index.mdx @@ -2,22 +2,44 @@ title: Lume --- -
-Swift 6 -macOS -Discord + - **lume** is a lightweight Command Line Interface and local API server to create, run and manage macOS and Linux virtual machines (VMs) with near-native performance on Apple Silicon, using Apple's `Virtualization.Framework`. ### Run prebuilt macOS images in just 1 step
- lume cli + lume cli
- ```bash lume run macos-sequoia-vanilla:latest ``` @@ -30,6 +52,7 @@ If you're working on Lume in the context of the CUA monorepo, we recommend using # Open VS Code workspace from the root of the monorepo code .vscode/lume.code-workspace ``` + This workspace is preconfigured with Swift language support, build tasks, and debug configurations. ## Usage @@ -153,7 +176,7 @@ You can also download the `lume.pkg.tar.gz` archive from the [latest release](ht ## Prebuilt Images -Pre-built images are available in the registry [ghcr.io/trycua](https://github.com/orgs/trycua/packages). +Pre-built images are available in the registry [ghcr.io/trycua](https://github.com/orgs/trycua/packages). **Important Note (v0.2.0+):** Images are being re-uploaded with sparse file system optimizations enabled, resulting in significantly lower actual disk usage. Older images (without the `-sparse` suffix) are now **deprecated**. The last version of `lume` fully supporting the non-sparse images was `v0.1.x`. Starting from `v0.2.0`, lume will automatically pull images optimized with sparse file system support. @@ -161,17 +184,17 @@ These images come with an SSH server pre-configured and auto-login enabled. For the security of your VM, change the default password `lume` immediately after your first login. -| Image | Tag | Description | Logical Size | -|-------|------------|-------------|------| -| `macos-sequoia-vanilla` | `latest`, `15.2` | macOS Sequoia 15.2 image | 20GB | -| `macos-sequoia-xcode` | `latest`, `15.2` | macOS Sequoia 15.2 image with Xcode command line tools | 22GB | -| `macos-sequoia-cua` | `latest`, `15.3` | macOS Sequoia 15.3 image compatible with the Computer interface | 24GB | -| `ubuntu-noble-vanilla` | `latest`, `24.04.1` | [Ubuntu Server for ARM 24.04.1 LTS](https://ubuntu.com/download/server/arm) with Ubuntu Desktop | 20GB | +| Image | Tag | Description | Logical Size | +| ----------------------- | ------------------- | ----------------------------------------------------------------------------------------------- | ------------ | +| `macos-sequoia-vanilla` | `latest`, `15.2` | macOS Sequoia 15.2 image | 20GB | +| `macos-sequoia-xcode` | `latest`, `15.2` | macOS Sequoia 15.2 image with Xcode command line tools | 22GB | +| `macos-sequoia-cua` | `latest`, `15.3` | macOS Sequoia 15.3 image compatible with the Computer interface | 24GB | +| `ubuntu-noble-vanilla` | `latest`, `24.04.1` | [Ubuntu Server for ARM 24.04.1 LTS](https://ubuntu.com/download/server/arm) with Ubuntu Desktop | 20GB | For additional disk space, resize the VM disk after pulling the image using the `lume set --disk-size ` command. Note that the actual disk space used by sparse images will be much lower than the logical size listed. ## Local API Server - + `lume` exposes a local HTTP API server that listens on `http://localhost:7777/lume`, enabling automated management of VMs. ```bash diff --git a/docs/content/docs/libraries/lumier/index.mdx b/docs/content/docs/libraries/lumier/index.mdx index 672ccf29..d9795739 100644 --- a/docs/content/docs/libraries/lumier/index.mdx +++ b/docs/content/docs/libraries/lumier/index.mdx @@ -2,19 +2,48 @@ title: Lumier --- -
-Swift 6 -macOS -Discord + macOS and Linux virtual machines in a Docker container.
- +
## What is Lumier? + **Lumier** is an interface for running macOS virtual machines with minimal setup. It uses Docker as a packaging system to deliver a pre-configured environment that connects to the `lume` virtualization service running on your host machine. With Lumier, you get: - A ready-to-use macOS or Linux virtual machine in minutes @@ -29,6 +58,7 @@ Before using Lumier, make sure you have: 1. **Docker for Apple Silicon** - download it [here](https://desktop.docker.com/mac/main/arm64/Docker.dmg) and follow the installation instructions. 2. **Lume** - This is the virtualization CLI that powers Lumier. Install it with this command: + ```bash /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)" ``` @@ -160,10 +190,10 @@ services: container_name: lumier-vm restart: unless-stopped ports: - - "8006:8006" # Port for VNC access + - '8006:8006' # Port for VNC access volumes: - - ./storage:/storage # VM persistent storage - - ./shared:/shared # Shared folder accessible in the VM + - ./storage:/storage # VM persistent storage + - ./shared:/shared # Shared folder accessible in the VM environment: - VM_NAME=lumier-vm - VERSION=ghcr.io/trycua/macos-sequoia-cua:latest @@ -239,6 +269,7 @@ When running Lumier, you'll need to configure a few things: - **Port forwarding** (`-p 8006:8006`): Makes the VM's VNC interface accessible in your browser. If port 8006 is already in use, you can use a different port like `-p 8007:8006`. - **Environment variables** (`-e`): Configure your VM settings: + - `VM_NAME`: A name for your virtual machine - `VERSION`: The macOS image to use - `CPU_CORES`: Number of CPU cores to allocate @@ -253,6 +284,7 @@ When running Lumier, you'll need to configure a few things: This project was inspired by [dockur/windows](https://github.com/dockur/windows) and [dockur/macos](https://github.com/dockur/macos), which pioneered the approach of running Windows and macOS VMs in Docker containers. Main differences with dockur/macos: + - Lumier is specifically designed for macOS virtualization - Lumier supports Apple Silicon (M1/M2/M3/M4) while dockur/macos only supports Intel - Lumier uses the Apple Virtualization Framework (Vz) through the `lume` CLI to create true virtual machines, while dockur relies on KVM. diff --git a/docs/content/docs/libraries/mcp-server/index.mdx b/docs/content/docs/libraries/mcp-server/index.mdx index 6e4e9e33..1e868e84 100644 --- a/docs/content/docs/libraries/mcp-server/index.mdx +++ b/docs/content/docs/libraries/mcp-server/index.mdx @@ -2,14 +2,44 @@ title: MCP Server --- -
-Swift 6 -macOS -Discord -Python + **cua-mcp-server** is a MCP server for the Computer-Use Agent (CUA), allowing you to run CUA through Claude Desktop or other MCP clients. + ### Get started with Agent ## Prerequisites @@ -32,8 +62,9 @@ pip install cua-mcp-server ``` This will install: + - The MCP server -- CUA agent and computer dependencies +- CUA agent and computer dependencies - An executable `cua-mcp-server` script in your PATH ## Easy Setup Script @@ -45,6 +76,7 @@ curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/mcp-server/scr ``` This script will: + - Create the ~/.cua directory if it doesn't exist - Generate a startup script at ~/.cua/start_mcp_server.sh - Make the script executable @@ -53,7 +85,7 @@ This script will: You can then use the script in your MCP configuration like this: ```json -{ +{ "mcpServers": { "cua-agent": { "command": "/bin/bash", @@ -92,6 +124,7 @@ If you want to develop with the cua-mcp-server directly without installation, yo ``` This configuration: + - Uses the start_mcp_server.sh script which automatically sets up the Python path and runs the server module - Works with Claude Desktop, Cursor, or any other MCP client - Automatically uses your development code without requiring installation @@ -103,6 +136,7 @@ Just add this to your MCP client's configuration and it will use your local deve If you get a `/bin/bash: ~/cua/libs/mcp-server/scripts/start_mcp_server.sh: No such file or directory` error, try changing the path to the script to be absolute instead of relative. To see the logs: + ``` tail -n 20 -f ~/Library/Logs/Claude/mcp*.log ``` @@ -127,20 +161,21 @@ For more information on MCP with Cursor, see the [official Cursor MCP documentat ### First-time Usage Notes **API Keys**: Ensure you have valid API keys: - - Add your Anthropic API key, or other model provider API key in the Claude Desktop config (as shown above) - - Or set it as an environment variable in your shell profile + +- Add your Anthropic API key, or other model provider API key in the Claude Desktop config (as shown above) +- Or set it as an environment variable in your shell profile ## Configuration The server is configured using environment variables (can be set in the Claude Desktop config): -| Variable | Description | Default | -|----------|-------------|---------| -| `CUA_AGENT_LOOP` | Agent loop to use (OPENAI, ANTHROPIC, UITARS, OMNI) | OMNI | -| `CUA_MODEL_PROVIDER` | Model provider (ANTHROPIC, OPENAI, OLLAMA, OAICOMPAT) | ANTHROPIC | -| `CUA_MODEL_NAME` | Model name to use | None (provider default) | -| `CUA_PROVIDER_BASE_URL` | Base URL for provider API | None | -| `CUA_MAX_IMAGES` | Maximum number of images to keep in context | 3 | +| Variable | Description | Default | +| ----------------------- | ----------------------------------------------------- | ----------------------- | +| `CUA_AGENT_LOOP` | Agent loop to use (OPENAI, ANTHROPIC, UITARS, OMNI) | OMNI | +| `CUA_MODEL_PROVIDER` | Model provider (ANTHROPIC, OPENAI, OLLAMA, OAICOMPAT) | ANTHROPIC | +| `CUA_MODEL_NAME` | Model name to use | None (provider default) | +| `CUA_PROVIDER_BASE_URL` | Base URL for provider API | None | +| `CUA_MAX_IMAGES` | Maximum number of images to keep in context | 3 | ## Available Tools @@ -158,4 +193,4 @@ Once configured, you can simply ask Claude to perform computer tasks: - "Find all PDFs in my Downloads folder" - "Take a screenshot and highlight the error message" -Claude will automatically use your CUA agent to perform these tasks. \ No newline at end of file +Claude will automatically use your CUA agent to perform these tasks. diff --git a/docs/content/docs/libraries/pylume/index.mdx b/docs/content/docs/libraries/pylume/index.mdx index 0787dd79..b06fe174 100644 --- a/docs/content/docs/libraries/pylume/index.mdx +++ b/docs/content/docs/libraries/pylume/index.mdx @@ -2,21 +2,45 @@ title: PyLume --- -
-Python -macOS -Discord -PyPI + - **pylume** is a lightweight Python library based on [lume](https://github.com/trycua/lume) to create, run and manage macOS and Linux virtual machines (VMs) natively on Apple Silicon.
-lume-py + lume-py
- ```bash pip install pylume ``` @@ -27,7 +51,7 @@ Please refer to this [Notebook](https://github.com/trycua/cua/blob/main/notebook ## Prebuilt Images -Pre-built images are available on [ghcr.io/trycua](https://github.com/orgs/trycua/packages). +Pre-built images are available on [ghcr.io/trycua](https://github.com/orgs/trycua/packages). These images come pre-configured with an SSH server and auto-login enabled. ## Contributing diff --git a/docs/content/docs/libraries/som/index.mdx b/docs/content/docs/libraries/som/index.mdx index 3a13316f..2468d135 100644 --- a/docs/content/docs/libraries/som/index.mdx +++ b/docs/content/docs/libraries/som/index.mdx @@ -2,11 +2,40 @@ title: Set-of-Mark --- -
-Python -macOS -Discord -PyPI + **Som** (Set-of-Mark) is a visual grounding component for the Computer-Use Agent (CUA) framework powering Cua, for detecting and analyzing UI elements in screenshots. Optimized for macOS Silicon with Metal Performance Shaders (MPS), it combines YOLO-based icon detection with EasyOCR text recognition to provide comprehensive UI element analysis. @@ -27,7 +56,6 @@ title: Set-of-Mark - Uses Metal Performance Shaders (MPS) - Multi-scale detection enabled - ~0.4s average detection time - - **Supported**: Any Python 3.11+ environment - Falls back to CPU if no GPU available - Single-scale detection on CPU @@ -74,7 +102,9 @@ for elem in result.elements: ### Detection Parameters #### Box Threshold (0.3) + Controls the confidence threshold for accepting detections: + ``` High Threshold (0.3): Low Threshold (0.01): +----------------+ +----------------+ @@ -86,12 +116,15 @@ High Threshold (0.3): Low Threshold (0.01): +----------------+ +----------------+ conf = 0.85 conf = 0.02 ``` + - Higher values (0.3) yield more precise but fewer detections - Lower values (0.01) catch more potential icons but increase false positives - Default is 0.3 for optimal precision/recall balance #### IOU Threshold (0.1) + Controls how overlapping detections are merged: + ``` IOU = Intersection Area / Union Area @@ -106,6 +139,7 @@ Low Overlap (Keep Both): High Overlap (Merge): +----------+ IOU ≈ 0.05 (Keep Both) IOU ≈ 0.7 (Merge) ``` + - Lower values (0.1) more aggressively remove overlapping boxes - Higher values (0.5) allow more overlapping detections - Default is 0.1 to handle densely packed UI elements @@ -113,6 +147,7 @@ IOU ≈ 0.05 (Keep Both) IOU ≈ 0.7 (Merge) ### OCR Configuration - **Engine**: EasyOCR + - Primary choice for all platforms - Fast initialization and processing - Built-in English language support @@ -129,6 +164,7 @@ IOU ≈ 0.05 (Keep Both) IOU ≈ 0.7 (Merge) ### Hardware Acceleration #### MPS (Metal Performance Shaders) + - Multi-scale detection (640px, 1280px, 1920px) - Test-time augmentation enabled - Half-precision (FP16) @@ -136,6 +172,7 @@ IOU ≈ 0.05 (Keep Both) IOU ≈ 0.7 (Merge) - Best for production use when available #### CPU + - Single-scale detection (1280px) - Full-precision (FP32) - Average detection time: ~1.3s @@ -160,11 +197,13 @@ examples/output/ ## Development ### Test Data + - Place test screenshots in `examples/test_data/` - Not tracked in git to keep repository size manageable - Default test image: `test_screen.png` (1920x1080) ### Running Tests + ```bash # Run benchmark with no OCR python examples/omniparser_examples.py examples/test_data/test_screen.png --runs 5 --ocr none diff --git a/docs/content/docs/libraries/agent/agent.png b/docs/public/img/agent.png similarity index 100% rename from docs/content/docs/libraries/agent/agent.png rename to docs/public/img/agent.png diff --git a/docs/content/docs/libraries/agent/agent_gradio_ui.png b/docs/public/img/agent_gradio_ui.png similarity index 100% rename from docs/content/docs/libraries/agent/agent_gradio_ui.png rename to docs/public/img/agent_gradio_ui.png diff --git a/docs/content/docs/libraries/lume/cli.png b/docs/public/img/cli.png similarity index 100% rename from docs/content/docs/libraries/lume/cli.png rename to docs/public/img/cli.png diff --git a/docs/content/docs/libraries/computer/computer.png b/docs/public/img/computer.png similarity index 100% rename from docs/content/docs/libraries/computer/computer.png rename to docs/public/img/computer.png