Merge pull request #95 from trycua/feature/agent/mcp-server

[Agent] Add MCP server
This commit is contained in:
f-trycua
2025-04-06 17:29:57 -07:00
committed by GitHub
12 changed files with 1639 additions and 7 deletions

View File

@@ -0,0 +1,68 @@
name: Publish MCP Server Package
on:
push:
tags:
- 'mcp-server-v*'
workflow_dispatch:
inputs:
version:
description: 'Version to publish (without v prefix)'
required: true
default: '0.1.0'
workflow_call:
inputs:
version:
description: 'Version to publish'
required: true
type: string
outputs:
version:
description: "The version that was published"
value: ${{ jobs.determine-version.outputs.version }}
# Adding permissions at workflow level
permissions:
contents: write
jobs:
determine-version:
runs-on: macos-latest
outputs:
version: ${{ steps.get-version.outputs.version }}
steps:
- uses: actions/checkout@v4
- name: Determine version
id: get-version
run: |
if [ "${{ github.event_name }}" == "push" ]; then
# Extract version from tag (for package-specific tags)
if [[ "${{ github.ref }}" =~ ^refs/tags/mcp-server-v([0-9]+\.[0-9]+\.[0-9]+) ]]; then
VERSION=${BASH_REMATCH[1]}
else
echo "Invalid tag format for mcp-server"
exit 1
fi
elif [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
# Use version from workflow dispatch
VERSION=${{ github.event.inputs.version }}
else
# Use version from workflow_call
VERSION=${{ inputs.version }}
fi
echo "VERSION=$VERSION"
echo "version=$VERSION" >> $GITHUB_OUTPUT
publish:
needs: determine-version
uses: ./.github/workflows/reusable-publish.yml
with:
package_name: "mcp-server"
package_dir: "libs/mcp-server"
version: ${{ needs.determine-version.outputs.version }}
is_lume_package: false
base_package_name: "cua-mcp-server"
make_latest: false
secrets:
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}

View File

@@ -220,6 +220,38 @@ jobs:
echo "# Run the server" >> release_notes.md
echo "cua-computer-server" >> release_notes.md
echo '```' >> release_notes.md
elif [ "${{ inputs.package_name }}" = "mcp-server" ]; then
echo "## MCP Server for the Computer-Use Agent (CUA)" >> release_notes.md
echo "" >> release_notes.md
echo "This package provides MCP (Model Context Protocol) integration for CUA agents, allowing them to be used with Claude Desktop, Cursor, and other MCP clients." >> release_notes.md
echo "" >> release_notes.md
echo "## Dependencies" >> release_notes.md
echo "* cua-computer: ${COMPUTER_VERSION:-latest}" >> release_notes.md
echo "* cua-agent: ${AGENT_VERSION:-latest}" >> release_notes.md
echo "" >> release_notes.md
echo "## Usage" >> release_notes.md
echo '```bash' >> release_notes.md
echo "# Run the MCP server directly" >> release_notes.md
echo "cua-mcp-server" >> release_notes.md
echo '```' >> release_notes.md
echo "" >> release_notes.md
echo "## Claude Desktop Integration" >> release_notes.md
echo "Add to your Claude Desktop configuration (~/.config/claude-desktop/claude_desktop_config.json or OS-specific location):" >> release_notes.md
echo '```json' >> release_notes.md
echo '"mcpServers": {' >> release_notes.md
echo ' "cua-agent": {' >> release_notes.md
echo ' "command": "cua-mcp-server",' >> release_notes.md
echo ' "args": [],' >> release_notes.md
echo ' "env": {' >> release_notes.md
echo ' "CUA_AGENT_LOOP": "OMNI",' >> release_notes.md
echo ' "CUA_MODEL_PROVIDER": "ANTHROPIC",' >> release_notes.md
echo ' "CUA_MODEL_NAME": "claude-3-opus-20240229",' >> release_notes.md
echo ' "ANTHROPIC_API_KEY": "your-api-key",' >> release_notes.md
echo ' "PYTHONIOENCODING": "utf-8"' >> release_notes.md
echo ' }' >> release_notes.md
echo ' }' >> release_notes.md
echo '}' >> release_notes.md
echo '```' >> release_notes.md
fi
# Add installation section if not agent (which has its own installation section)

View File

@@ -33,13 +33,13 @@ async def run_agent_example():
loop=AgentLoop.OMNI,
# model=LLM(provider=LLMProvider.OPENAI), # No model name for Operator CUA
# model=LLM(provider=LLMProvider.OPENAI, name="gpt-4o"),
model=LLM(provider=LLMProvider.ANTHROPIC, name="claude-3-7-sonnet-20250219"),
# model=LLM(provider=LLMProvider.ANTHROPIC, name="claude-3-7-sonnet-20250219"),
# model=LLM(provider=LLMProvider.OLLAMA, name="gemma3:4b-it-q4_K_M"),
# model=LLM(
# provider=LLMProvider.OAICOMPAT,
# name="qwen2.5-vl-7b-instruct",
# provider_base_url="http://localhost:1234/v1", # LM Studio local endpoint
# ),
model=LLM(
provider=LLMProvider.OAICOMPAT,
name="gemma-3-12b-it",
provider_base_url="http://localhost:1234/v1", # LM Studio local endpoint
),
save_trajectory=True,
only_n_most_recent_images=3,
verbosity=logging.DEBUG,

121
libs/mcp-server/README.md Normal file
View File

@@ -0,0 +1,121 @@
<div align="center">
<h1>
<div class="image-wrapper" style="display: inline-block;">
<picture>
<source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="../../img/logo_white.png" style="display: block; margin: auto;">
<source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="../../img/logo_black.png" style="display: block; margin: auto;">
<img alt="Shows my svg">
</picture>
</div>
[![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#)
[![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#)
[![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85)
[![PyPI](https://img.shields.io/pypi/v/cua-computer?color=333333)](https://pypi.org/project/cua-computer/)
</h1>
</div>
**cua-mcp-server** is a MCP server for the Computer-Use Agent (CUA), allowing you to run CUA through Claude Desktop or other MCP clients.
### Get started with Agent
## Installation
Install the package from PyPI:
```bash
pip install cua-mcp-server
```
This will install:
- The MCP server
- CUA agent and computer dependencies
- An executable `cua-mcp-server` script in your PATH
## Claude Desktop Integration
To use with Claude Desktop, add an entry to your Claude Desktop configuration (`claude_desktop_config.json`, typically found in `~/.config/claude-desktop/`):
```json
"mcpServers": {
"cua-agent": {
"command": "cua-mcp-server",
"args": [],
"env": {
"CUA_AGENT_LOOP": "OMNI",
"CUA_MODEL_PROVIDER": "ANTHROPIC",
"CUA_MODEL_NAME": "claude-3-opus-20240229",
"ANTHROPIC_API_KEY": "your-api-key",
"PYTHONIOENCODING": "utf-8"
}
}
}
```
For more information on MCP with Claude Desktop, see the [official MCP User Guide](https://modelcontextprotocol.io/quickstart/user).
## Cursor Integration
To use with Cursor, add an MCP configuration file in one of these locations:
- **Project-specific**: Create `.cursor/mcp.json` in your project directory
- **Global**: Create `~/.cursor/mcp.json` in your home directory
The configuration format is similar to Claude Desktop's:
```json
{
"mcpServers": {
"cua-agent": {
"command": "cua-mcp-server",
"args": [],
"env": {
"CUA_AGENT_LOOP": "OMNI",
"CUA_MODEL_PROVIDER": "ANTHROPIC",
"CUA_MODEL_NAME": "claude-3-7-sonnet-20250219",
"ANTHROPIC_API_KEY": "your-api-key",
"PYTHONPATH": "/path/to/your/cua/installation"
}
}
}
}
```
After configuration, you can simply tell Cursor's Agent to perform computer tasks by explicitly mentioning the CUA agent, such as "Use the computer control tools to open Safari."
For more information on MCP with Cursor, see the [official Cursor MCP documentation](https://docs.cursor.com/context/model-context-protocol).
### First-time Usage Notes
**API Keys**: Ensure you have valid API keys:
- Add your Anthropic API key, or other model provider API key in the Claude Desktop config (as shown above)
- Or set it as an environment variable in your shell profile
## Configuration
The server is configured using environment variables (can be set in the Claude Desktop config):
| Variable | Description | Default |
|----------|-------------|---------|
| `CUA_AGENT_LOOP` | Agent loop to use (OPENAI, ANTHROPIC, OMNI) | OMNI |
| `CUA_MODEL_PROVIDER` | Model provider (ANTHROPIC, OPENAI, OLLAMA, OAICOMPAT) | ANTHROPIC |
| `CUA_MODEL_NAME` | Model name to use | None (provider default) |
| `CUA_PROVIDER_BASE_URL` | Base URL for provider API | None |
| `CUA_MAX_IMAGES` | Maximum number of images to keep in context | 3 |
## Available Tools
The MCP server exposes the following tools to Claude:
1. `run_cua_task` - Run a single Computer-Use Agent task with the given instruction
2. `run_multi_cua_tasks` - Run multiple tasks in sequence
## Usage
Once configured, you can simply ask Claude to perform computer tasks:
- "Open Chrome and go to github.com"
- "Create a folder called 'Projects' on my desktop"
- "Find all PDFs in my Downloads folder"
- "Take a screenshot and highlight the error message"
Claude will automatically use your CUA agent to perform these tasks.

View File

@@ -0,0 +1,19 @@
"""MCP Server for Computer-Use Agent (CUA)."""
import sys
import os
# Add detailed debugging at import time
with open("/tmp/mcp_server_debug.log", "w") as f:
f.write(f"Python executable: {sys.executable}\n")
f.write(f"Python version: {sys.version}\n")
f.write(f"Working directory: {os.getcwd()}\n")
f.write(f"Python path:\n{chr(10).join(sys.path)}\n")
f.write(f"Environment variables:\n")
for key, value in os.environ.items():
f.write(f"{key}={value}\n")
from .server import server, main
__version__ = "0.1.0"
__all__ = ["server", "main"]

View File

@@ -0,0 +1,7 @@
#!/usr/bin/env python
"""Entry point for the MCP server module."""
from .server import main
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,193 @@
import asyncio
import logging
import os
import sys
import traceback
from typing import Any, Dict, List, Optional, Union
# Configure logging to output to stderr for debug visibility
logging.basicConfig(
level=logging.DEBUG, # Changed to DEBUG
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
stream=sys.stderr,
)
logger = logging.getLogger("mcp-server")
# More visible startup message
logger.debug("MCP Server module loading...")
try:
from mcp.server.fastmcp import Context, FastMCP
logger.debug("Successfully imported FastMCP")
except ImportError as e:
logger.error(f"Failed to import FastMCP: {e}")
traceback.print_exc(file=sys.stderr)
sys.exit(1)
try:
from computer import Computer
from agent import ComputerAgent, LLMProvider, LLM, AgentLoop
logger.debug("Successfully imported Computer and Agent modules")
except ImportError as e:
logger.error(f"Failed to import Computer/Agent modules: {e}")
traceback.print_exc(file=sys.stderr)
sys.exit(1)
# Global computer instance for reuse
global_computer = None
def get_env_bool(key: str, default: bool = False) -> bool:
"""Get boolean value from environment variable."""
return os.getenv(key, str(default)).lower() in ("true", "1", "yes")
def serve() -> FastMCP:
"""Create and configure the MCP server."""
server = FastMCP("cua-agent")
@server.tool()
async def run_cua_task(ctx: Context, task: str) -> str:
"""
Run a Computer-Use Agent (CUA) task and return the results.
Args:
ctx: The MCP context
task: The instruction or task for the agent to perform
Returns:
A string containing the agent's response
"""
global global_computer
try:
logger.info(f"Starting CUA task: {task}")
# Initialize computer if needed
if global_computer is None:
global_computer = Computer(verbosity=logging.INFO)
await global_computer.run()
# Determine which loop to use
loop_str = os.getenv("CUA_AGENT_LOOP", "OMNI")
if loop_str == "OPENAI":
loop = AgentLoop.OPENAI
elif loop_str == "ANTHROPIC":
loop = AgentLoop.ANTHROPIC
else:
loop = AgentLoop.OMNI
# Determine provider
provider_str = os.getenv("CUA_MODEL_PROVIDER", "ANTHROPIC")
provider = getattr(LLMProvider, provider_str)
# Get model name (if specified)
model_name = os.getenv("CUA_MODEL_NAME", None)
# Get base URL for provider (if needed)
provider_base_url = os.getenv("CUA_PROVIDER_BASE_URL", None)
# Create agent with the specified configuration
agent = ComputerAgent(
computer=global_computer,
loop=loop,
model=LLM(
provider=provider,
name=model_name,
provider_base_url=provider_base_url,
),
save_trajectory=False,
only_n_most_recent_images=int(os.getenv("CUA_MAX_IMAGES", "3")),
verbosity=logging.INFO,
)
# Collect all results
full_result = ""
async for result in agent.run(task):
logger.info(f"Agent step complete: {result.get('id', 'unknown')}")
# Add response ID to output
full_result += f"\n[Response ID: {result.get('id', 'unknown')}]\n"
# Extract and concatenate text responses
if "text" in result:
# Handle both string and dict responses
text_response = result.get("text", "")
if isinstance(text_response, str):
full_result += f"Response: {text_response}\n"
else:
# If it's a dict or other structure, convert to string representation
full_result += f"Response: {str(text_response)}\n"
# Log detailed information
if "tools" in result:
tools_info = result.get("tools")
logger.debug(f"Tools used: {tools_info}")
full_result += f"\nTools used: {tools_info}\n"
# Process output if available
outputs = result.get("output", [])
for output in outputs:
output_type = output.get("type")
if output_type == "reasoning":
logger.debug(f"Reasoning: {output}")
full_result += f"\nReasoning: {output.get('content', '')}\n"
elif output_type == "computer_call":
logger.debug(f"Computer call: {output}")
action = output.get("action", "")
result_value = output.get("result", "")
full_result += f"\nComputer Action: {action}\nResult: {result_value}\n"
# Add separator between steps
full_result += "\n" + "-" * 40 + "\n"
logger.info(f"CUA task completed successfully")
return full_result or "Task completed with no text output."
except Exception as e:
error_msg = f"Error running CUA task: {str(e)}\n{traceback.format_exc()}"
logger.error(error_msg)
return f"Error during task execution: {str(e)}"
@server.tool()
async def run_multi_cua_tasks(ctx: Context, tasks: List[str]) -> str:
"""
Run multiple CUA tasks in sequence and return the combined results.
Args:
ctx: The MCP context
tasks: List of tasks to run in sequence
Returns:
Combined results from all tasks
"""
results = []
for i, task in enumerate(tasks):
logger.info(f"Running task {i+1}/{len(tasks)}: {task}")
result = await run_cua_task(ctx, task)
results.append(f"Task {i+1}: {task}\nResult: {result}\n")
return "\n".join(results)
return server
server = serve()
def main():
"""Run the MCP server."""
try:
logger.debug("Starting MCP server...")
server.run()
except Exception as e:
logger.error(f"Error starting server: {e}")
traceback.print_exc(file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()

1143
libs/mcp-server/pdm.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,40 @@
[build-system]
requires = ["pdm-backend"]
build-backend = "pdm.backend"
[project]
name = "cua-mcp-server"
description = "MCP Server for Computer-Use Agent (CUA)"
readme = "README.md"
requires-python = ">=3.10,<3.13"
version = "0.1.0"
authors = [
{name = "TryCua", email = "gh@trycua.com"}
]
dependencies = [
"mcp>=1.6.0,<2.0.0",
"cua-agent>=0.1.0,<0.2.0",
"cua-computer>=0.1.0,<0.2.0",
]
[project.scripts]
cua-mcp-server = "mcp_server.server:main"
[tool.pdm]
distribution = true
[tool.pdm.dev-dependencies]
dev = [
"black>=23.9.1",
"ruff>=0.0.292",
]
[tool.black]
line-length = 100
target-version = ["py310"]
[tool.ruff]
line-length = 100
target-version = "py310"
select = ["E", "F", "B", "I"]
fix = true

View File

@@ -56,6 +56,7 @@ cua-omniparser = { path = "libs/omniparser" }
cua-agent = { path = "libs/agent" }
pylume = { path = "libs/pylume" }
cua-computer-server = { path = "libs/computer-server" }
cua-mcp-server = { path = "libs/mcp-server" }
[tool.black]
line-length = 100

View File

@@ -104,13 +104,16 @@ install_package "libs/agent" "agent" "all"
# Install computer-server
install_package "libs/computer-server" "computer-server"
# Install mcp-server
install_package "libs/mcp-server" "mcp-server"
# Install development tools from root project
print_step "Installing development dependencies..."
pip install -e ".[dev,test,docs]"
# Create a .env file for VS Code to use the virtual environment
print_step "Creating .env file for VS Code..."
echo "PYTHONPATH=${PROJECT_ROOT}/libs/core:${PROJECT_ROOT}/libs/computer:${PROJECT_ROOT}/libs/agent:${PROJECT_ROOT}/libs/som:${PROJECT_ROOT}/libs/pylume:${PROJECT_ROOT}/libs/computer-server" > .env
echo "PYTHONPATH=${PROJECT_ROOT}/libs/core:${PROJECT_ROOT}/libs/computer:${PROJECT_ROOT}/libs/agent:${PROJECT_ROOT}/libs/som:${PROJECT_ROOT}/libs/pylume:${PROJECT_ROOT}/libs/computer-server:${PROJECT_ROOT}/libs/mcp-server" > .env
print_success "All packages installed successfully!"
print_step "Your virtual environment is ready. To activate it:"

View File

@@ -64,6 +64,11 @@ find . -type d -name ".pdm-build" -exec rm -rf {} +
find . -name ".pdm-python" -delete # .pdm-python is a file, not a directory
print_success "PDM-related files removed"
# Remove MCP-related files
print_step "Removing MCP-related files..."
find . -name "mcp_server.log" -delete
print_success "MCP-related files removed"
# Remove .env file
print_step "Removing .env file..."
rm -f .env