mirror of
https://github.com/trycua/computer.git
synced 2026-04-22 22:50:37 -05:00
Move text from README to Cua documentation
This commit is contained in:
@@ -22,7 +22,7 @@ agent = ComputerAgent(
|
||||
tools=[computer]
|
||||
)
|
||||
|
||||
prompt = "open github, navigate to trycua/cua"
|
||||
prompt = "Take a screenshot and tell me what you see"
|
||||
|
||||
async for result in agent.run(prompt):
|
||||
if result["output"][-1]["type"] == "message":
|
||||
@@ -31,6 +31,69 @@ async for result in agent.run(prompt):
|
||||
|
||||
For a list of supported models and configurations, see the [Supported Agents](./supported-agents/computer-use-agents) page.
|
||||
|
||||
### Response Format
|
||||
|
||||
```python
|
||||
{
|
||||
"output": [
|
||||
{
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [{"type": "output_text", "text": "I can see..."}]
|
||||
},
|
||||
{
|
||||
"type": "computer_call",
|
||||
"action": {"type": "screenshot"},
|
||||
"call_id": "call_123"
|
||||
},
|
||||
{
|
||||
"type": "computer_call_output",
|
||||
"call_id": "call_123",
|
||||
"output": {"image_url": "data:image/png;base64,..."}
|
||||
}
|
||||
],
|
||||
"usage": {
|
||||
"prompt_tokens": 150,
|
||||
"completion_tokens": 75,
|
||||
"total_tokens": 225,
|
||||
"response_cost": 0.01,
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Environment Variables
|
||||
|
||||
Use the following environment variables to configure the agent and its access to cloud computers and LLM providers:
|
||||
|
||||
```bash
|
||||
# Computer instance (cloud)
|
||||
export CUA_CONTAINER_NAME="your-container-name"
|
||||
export CUA_API_KEY="your-cua-api-key"
|
||||
|
||||
# LLM API keys
|
||||
export ANTHROPIC_API_KEY="your-anthropic-key"
|
||||
export OPENAI_API_KEY="your-openai-key"
|
||||
```
|
||||
|
||||
### Input and output
|
||||
|
||||
The input prompt passed to `Agent.run` can either be a string or a list of message dictionaries:
|
||||
|
||||
```python
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Take a screenshot and describe what you see"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "I'll take a screenshot for you."
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
The output is an AsyncGenerator that yields response chunks.
|
||||
|
||||
### Parameters
|
||||
|
||||
The `ComputerAgent` constructor provides a wide range of options for customizing agent behavior, tool integration, callbacks, resource management, and more.
|
||||
@@ -79,4 +142,30 @@ agent = ComputerAgent(
|
||||
use_prompt_caching=True,
|
||||
max_trajectory_budget={"max_budget": 5.0, "raise_error": True}
|
||||
)
|
||||
```
|
||||
|
||||
### Streaming Responses
|
||||
|
||||
```python
|
||||
async for result in agent.run(messages, stream=True):
|
||||
# Process streaming chunks
|
||||
for item in result["output"]:
|
||||
if item["type"] == "message":
|
||||
print(item["content"][0]["text"], end="", flush=True)
|
||||
elif item["type"] == "computer_call":
|
||||
action = item["action"]
|
||||
print(f"\n[Action: {action['type']}]")
|
||||
```
|
||||
|
||||
### Error Handling
|
||||
|
||||
```python
|
||||
try:
|
||||
async for result in agent.run(messages):
|
||||
# Process results
|
||||
pass
|
||||
except BudgetExceededException:
|
||||
print("Budget limit exceeded")
|
||||
except Exception as e:
|
||||
print(f"Agent error: {e}")
|
||||
```
|
||||
@@ -28,19 +28,23 @@ agent = ComputerAgent(
|
||||
## Budget Manager Shorthand
|
||||
|
||||
```python
|
||||
# Simple budget limit
|
||||
agent = ComputerAgent(
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
tools=[computer],
|
||||
max_trajectory_budget=5.0 # Auto-adds BudgetManagerCallback
|
||||
max_trajectory_budget=5.0 # $5 limit
|
||||
)
|
||||
```
|
||||
|
||||
**Or with options:**
|
||||
```python
|
||||
# Advanced budget configuration
|
||||
agent = ComputerAgent(
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
tools=[computer],
|
||||
max_trajectory_budget={"max_budget": 5.0, "raise_error": True}
|
||||
max_trajectory_budget={
|
||||
"max_budget": 10.0,
|
||||
"raise_error": True, # Raise error when exceeded
|
||||
"reset_after_each_run": False # Persistent across runs
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
@@ -4,26 +4,61 @@ title: Callbacks
|
||||
|
||||
Callbacks in the Agent SDK provide hooks into the agent's lifecycle, allowing for custom functionality to be executed at various stages of an agent's run. They enable extensibility by allowing developers to integrate their own logic for tasks such as logging, cost management, and data anonymization.
|
||||
|
||||
The callback lifecycle is described in [Agent Lifecycle](callbacks/agent-lifecycle).
|
||||
|
||||
## Usage
|
||||
|
||||
You can add preprocessing and postprocessing hooks using callbacks, or write your own by subclassing `AsyncCallbackHandler`:
|
||||
You can add preprocessing and postprocessing hooks using callbacks, or write your own by subclassing `AsyncCallbackHandler`.
|
||||
|
||||
### Built-in Callbacks
|
||||
|
||||
Built-in callbacks can be used as follows:
|
||||
|
||||
```python
|
||||
from agent.callbacks import ImageRetentionCallback, PIIAnonymizationCallback
|
||||
from agent.callbacks import (
|
||||
ImageRetentionCallback,
|
||||
TrajectorySaverCallback,
|
||||
BudgetManagerCallback,
|
||||
LoggingCallback
|
||||
)
|
||||
|
||||
agent = ComputerAgent(
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
tools=[computer],
|
||||
callbacks=[ImageRetentionCallback(only_n_most_recent_images=3)]
|
||||
callbacks=[
|
||||
ImageRetentionCallback(only_n_most_recent_images=3),
|
||||
TrajectorySaverCallback(trajectory_dir="trajectories"),
|
||||
BudgetManagerCallback(max_budget=10.0, raise_error=True),
|
||||
LoggingCallback(level=logging.INFO)
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
## Built-in Callbacks
|
||||
The following built-in callbacks are available:
|
||||
|
||||
- [BudgetManagerCallback](callbacks/cost-saving): Stops execution when budget exceeded
|
||||
- [LoggingCallback](callbacks/trajectories): Logs agent activities
|
||||
- **ImageRetentionCallback**: Limits recent images in context
|
||||
- **TrajectorySaverCallback**: Saves conversation trajectories
|
||||
- [PII Anonymization](callbacks/pii-anonymization)
|
||||
|
||||
### Custom Callbacks
|
||||
|
||||
Create custom callbacks using knowlege of the callback lifecycle as described in [Agent Lifecycle](callbacks/agent-lifecycle).
|
||||
|
||||
```python
|
||||
from agent.callbacks.base import AsyncCallbackHandler
|
||||
|
||||
class CustomCallback(AsyncCallbackHandler):
|
||||
async def on_llm_start(self, messages):
|
||||
"""Preprocess messages before LLM call"""
|
||||
# Add custom preprocessing logic
|
||||
return messages
|
||||
|
||||
async def on_llm_end(self, messages):
|
||||
"""Postprocess messages after LLM call"""
|
||||
# Add custom postprocessing logic
|
||||
return messages
|
||||
|
||||
async def on_usage(self, usage):
|
||||
"""Track usage information"""
|
||||
print(f"Tokens used: {usage.total_tokens}")
|
||||
```
|
||||
|
||||
@@ -29,8 +29,8 @@ agent = ComputerAgent(
|
||||
```python
|
||||
agent = ComputerAgent(
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
tools=[computer],
|
||||
trajectory_dir="trajectories" # Auto-adds TrajectorySaverCallback
|
||||
trajectory_dir="trajectories", # Auto-save trajectories
|
||||
tools=[computer]
|
||||
)
|
||||
```
|
||||
|
||||
@@ -46,6 +46,12 @@ The viewer provides:
|
||||
|
||||
## Trajectory Structure
|
||||
|
||||
Trajectories are saved with:
|
||||
- Complete conversation history
|
||||
- Usage statistics and costs
|
||||
- Timestamps and metadata
|
||||
- Screenshots and computer actions
|
||||
|
||||
Each trajectory contains:
|
||||
- **metadata.json**: Run info, timestamps, usage stats (`total_tokens`, `response_cost`)
|
||||
- **turn_000/**: Turn-by-turn conversation history (api calls, responses, computer calls, screenshots)
|
||||
|
||||
+21
-3
@@ -1,11 +1,29 @@
|
||||
---
|
||||
title: Sandboxed Tools
|
||||
slug: sandboxed-tools
|
||||
title: Custom Tools
|
||||
slug: custom-tools
|
||||
---
|
||||
|
||||
The Agent SDK supports defining custom Python tools that run securely in sandboxed environments on remote Cua Computers. This enables safe execution of user-defined functions, isolation of dependencies, and robust automation workflows.
|
||||
|
||||
## Example: Defining a Sandboxed Tool
|
||||
## Custom Tools
|
||||
|
||||
Define a custom tool for an agent:
|
||||
|
||||
```python
|
||||
def calculate(a: int, b: int) -> int:
|
||||
"""Calculate the sum of two integers"""
|
||||
return a + b
|
||||
|
||||
# Use with agent
|
||||
agent = ComputerAgent(
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
tools=[computer, calculate]
|
||||
)
|
||||
```
|
||||
|
||||
## Sandboxed Tools
|
||||
|
||||
Define a sandboxed tool:
|
||||
|
||||
```python
|
||||
from computer.helpers import sandboxed
|
||||
@@ -4,11 +4,11 @@
|
||||
"pages": [
|
||||
"agent-loops",
|
||||
"supported-agents",
|
||||
"supported-model-providers",
|
||||
"chat-history",
|
||||
"callbacks",
|
||||
"sandboxed-tools",
|
||||
"custom-tools",
|
||||
"custom-computer-handlers",
|
||||
"local-models",
|
||||
"prompt-caching",
|
||||
"usage-tracking",
|
||||
"benchmarks",
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
---
|
||||
title: Supported Model Providers
|
||||
---
|
||||
|
||||
## Supported Models
|
||||
|
||||
### Anthropic Claude (Computer Use API)
|
||||
```python
|
||||
model="anthropic/claude-3-5-sonnet-20241022"
|
||||
model="anthropic/claude-3-7-sonnet-20250219"
|
||||
model="anthropic/claude-opus-4-20250514"
|
||||
model="anthropic/claude-sonnet-4-20250514"
|
||||
```
|
||||
|
||||
### OpenAI Computer Use Preview
|
||||
```python
|
||||
model="openai/computer-use-preview"
|
||||
```
|
||||
|
||||
### UI-TARS (Local or Huggingface Inference)
|
||||
```python
|
||||
model="huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B"
|
||||
model="ollama_chat/0000/ui-tars-1.5-7b"
|
||||
```
|
||||
|
||||
### Omniparser + Any LLM
|
||||
```python
|
||||
model="omniparser+ollama_chat/mistral-small3.2"
|
||||
model="omniparser+vertex_ai/gemini-pro"
|
||||
model="omniparser+anthropic/claude-3-5-sonnet-20241022"
|
||||
model="omniparser+openai/gpt-4o"
|
||||
```
|
||||
@@ -87,6 +87,16 @@ Choose how you want to run your cua computer. **Cloud containers are recommended
|
||||
<Tab value="Python">
|
||||
```bash
|
||||
pip install "cua-agent[all]" cua-computer
|
||||
|
||||
# or install specific providers
|
||||
pip install "cua-agent[openai]" # OpenAI computer-use-preview support
|
||||
pip install "cua-agent[anthropic]" # Anthropic Claude support
|
||||
pip install "cua-agent[omni]" # Omniparser + any LLM support
|
||||
pip install "cua-agent[uitars]" # UI-TARS
|
||||
pip install "cua-agent[uitars-mlx]" # UI-TARS + MLX support
|
||||
pip install "cua-agent[uitars-hf]" # UI-TARS + Huggingface support
|
||||
pip install "cua-agent[glm45v-hf]" # GLM-4.5V + Huggingface support
|
||||
pip install "cua-agent[ui]" # Gradio UI support
|
||||
```
|
||||
</Tab>
|
||||
<Tab value="TypeScript">
|
||||
|
||||
Reference in New Issue
Block a user