mirror of
https://github.com/trycua/lume.git
synced 2026-05-12 15:19:17 -05:00
feat(cua-driver): add Claude Code computer-use compatibility mode (#1424)
* feat(cua-driver): add Claude Code computer-use compatibility mode * docs(cua-driver): avoid hard-wrapped compatibility prose * docs(cua-driver): clarify Claude Code vision mode needs MCP * fix(cua-driver): address CodeRabbit review findings * docs(cua-driver): sync generated reference docs * fix(docs): escape generated cua-driver mdx
This commit is contained in:
committed by
GitHub
parent
e4b047a43e
commit
e69d1cbc06
@@ -113,6 +113,9 @@ Cua Driver speaks MCP over stdio. Use `cua-driver mcp-config --client <name>` to
|
||||
# Claude Code (add --scope project|global as needed)
|
||||
claude mcp add --transport stdio cua-driver -- ~/.local/bin/cua-driver mcp
|
||||
|
||||
# Claude Code computer-use compatibility mode
|
||||
claude mcp add --transport stdio cua-computer-use -- ~/.local/bin/cua-driver mcp --claude-code-computer-use-compat
|
||||
|
||||
# Codex (OpenAI)
|
||||
codex mcp add cua-driver -- ~/.local/bin/cua-driver mcp
|
||||
|
||||
@@ -120,6 +123,10 @@ codex mcp add cua-driver -- ~/.local/bin/cua-driver mcp
|
||||
cua-driver mcp-config --client openclaw | sh
|
||||
```
|
||||
|
||||
The Claude Code compatibility mode keeps CuaDriver's normal MCP tools, but replaces `screenshot` with a window-only screenshot shim that requires `pid` and `window_id`. Use it when you want Claude Code's vision/computer-use-style flow to ground on CuaDriver window captures.
|
||||
|
||||
Use MCP for that Claude Code vision/computer-use-style path. Shelling out to `cua-driver screenshot` can capture a window, but it does not expose the `mcp__cua-computer-use__screenshot` tool name that Claude Code appears to use as the image-grounding cue.
|
||||
|
||||
### Clients configured via a config file
|
||||
|
||||
Cursor, OpenCode, and Hermes all configure MCP servers via files. Use `mcp-config` to print the exact snippet, paste it into the right path:
|
||||
|
||||
@@ -14,6 +14,8 @@ import { Callout } from 'fumadocs-ui/components/callout';
|
||||
|
||||
## Claude Code
|
||||
|
||||
Standard MCP registration:
|
||||
|
||||
```bash
|
||||
claude mcp add --transport stdio cua-driver -- cua-driver mcp
|
||||
```
|
||||
@@ -25,6 +27,22 @@ claude mcp list
|
||||
# cua-driver: cua-driver mcp (stdio) - ✓ Connected
|
||||
```
|
||||
|
||||
### Claude Code computer-use compatibility mode
|
||||
|
||||
Claude Code vision/computer-use-style flows appear to use the presence of a screenshot tool as a cue for image-grounded operation. If you want that behavior, register the compatibility server instead:
|
||||
|
||||
```bash
|
||||
claude mcp add --transport stdio cua-computer-use -- cua-driver mcp --claude-code-computer-use-compat
|
||||
```
|
||||
|
||||
This mode still exposes the normal CuaDriver tools. The only changed tool is `screenshot`: it requires `pid` and `window_id`, captures that window only, and returns a window-local image coordinate frame. Start with `launch_app` or `list_windows`, then call `screenshot` with the target window.
|
||||
|
||||
For this Claude Code vision/computer-use-style path, use MCP rather than shelling out to the CLI. CLI screenshots can still capture windows, but they do not expose the `mcp__cua-computer-use__screenshot` tool name that Claude Code appears to use as the image-grounding cue.
|
||||
|
||||
<Callout type="info">
|
||||
This does not call Anthropic APIs or expose Anthropic's native computer-use API tool. It is a CuaDriver MCP compatibility mode for Claude Code.
|
||||
</Callout>
|
||||
|
||||
## GitHub Copilot CLI
|
||||
|
||||
Add to `~/.copilot/mcp-config.json`:
|
||||
|
||||
@@ -6,8 +6,8 @@ description: Command Line Interface reference for Cua Driver
|
||||
{/*
|
||||
AUTO-GENERATED FILE - DO NOT EDIT DIRECTLY
|
||||
Generated by: npx tsx scripts/docs-generators/cua-driver.ts
|
||||
Source: libs/cua-driver/Sources/**/*.swift
|
||||
Version: 0.1.0
|
||||
Source: recursive Swift sources under libs/cua-driver/Sources
|
||||
Version: 0.1.1
|
||||
*/}
|
||||
|
||||
import { Callout } from 'fumadocs-ui/components/callout';
|
||||
@@ -16,7 +16,7 @@ import { VersionHeader } from '@/components/version-selector';
|
||||
<VersionHeader
|
||||
versions={[{"version":"0.1","href":"/cua-driver/reference/cli-reference","isCurrent":true}]}
|
||||
currentVersion="0.1"
|
||||
fullVersion="0.1.0"
|
||||
fullVersion="0.1.1"
|
||||
packageName="cua-driver"
|
||||
installCommand="curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/cua-driver/scripts/install.sh | bash"
|
||||
/>
|
||||
@@ -36,8 +36,8 @@ provided, the tool is called with no arguments.
|
||||
|
||||
Examples:
|
||||
cua-driver call list_apps
|
||||
cua-driver call launch_app '{"bundle_id":"com.apple.finder"}'
|
||||
echo '{"pid":844,"window_id":1234}' | cua-driver call get_window_state
|
||||
cua-driver call launch_app '{"bundle_id":"com.apple.finder"}'
|
||||
echo '{"pid":844,"window_id":1234}' | cua-driver call get_window_state
|
||||
|
||||
**Arguments:**
|
||||
|
||||
@@ -106,6 +106,12 @@ Print a tool's full description and JSON input schema.
|
||||
|
||||
Run the stdio MCP server.
|
||||
|
||||
**Flags:**
|
||||
|
||||
| Name | Description |
|
||||
| ---- | ----------- |
|
||||
| `--claude-code-computer-use-compat` | Expose normal CuaDriver tools, replacing only `screenshot` with a Claude Code-friendly window-only screenshot that establishes the vision coordinate frame. |
|
||||
|
||||
### cua-driver serve
|
||||
|
||||
Run cua-driver as a long-running daemon on a Unix domain socket.
|
||||
@@ -158,6 +164,12 @@ Print MCP server config or a client-specific install command.
|
||||
| ---- | ---- | ------- | ----------- |
|
||||
| `--client` | String | — | Client to print the install command for: claude \| codex \| cursor \| openclaw \| opencode \| hermes \| pi. Omit for the generic JSON snippet. |
|
||||
|
||||
**Flags:**
|
||||
|
||||
| Name | Description |
|
||||
| ---- | ----------- |
|
||||
| `--claude-code-computer-use-compat` | Print config for Claude Code's window-scoped screenshot compatibility mode registered as `cua-computer-use`. |
|
||||
|
||||
## Trajectory recording
|
||||
|
||||
### cua-driver recording
|
||||
|
||||
@@ -6,8 +6,8 @@ description: Reference for every MCP tool cua-driver exposes
|
||||
{/*
|
||||
AUTO-GENERATED FILE - DO NOT EDIT DIRECTLY
|
||||
Generated by: npx tsx scripts/docs-generators/cua-driver.ts
|
||||
Source: libs/cua-driver/Sources/**/*.swift
|
||||
Version: 0.1.0
|
||||
Source: recursive Swift sources under libs/cua-driver/Sources
|
||||
Version: 0.1.1
|
||||
*/}
|
||||
|
||||
import { Callout } from 'fumadocs-ui/components/callout';
|
||||
@@ -25,7 +25,7 @@ Tool names are `snake_case`. Responses are MCP `CallTool.Result` envelopes: a te
|
||||
Report TCC permission status for Accessibility and Screen Recording.
|
||||
By default also raises the system permission dialogs for any missing
|
||||
grants — Apple's request APIs are no-ops when the grant is already
|
||||
active, so this is safe to call repeatedly. Pass {"prompt": false}
|
||||
active, so this is safe to call repeatedly. Pass {"prompt": false}
|
||||
for a purely read-only status check.
|
||||
|
||||
**Arguments:**
|
||||
@@ -208,18 +208,18 @@ at startup. Sibling to `set_config` / `cua-driver config`.
|
||||
|
||||
Current schema:
|
||||
|
||||
{
|
||||
{
|
||||
"schema_version": 1,
|
||||
"capture_mode": "vision" | "ax" | "som",
|
||||
"agent_cursor": {
|
||||
"agent_cursor": {
|
||||
"enabled": true,
|
||||
"motion": {
|
||||
"motion": {
|
||||
"start_handle": 0.3, "end_handle": 0.3,
|
||||
"arc_size": 0.25, "arc_flow": 0.0,
|
||||
"spring": 0.72
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
**Arguments:** none.
|
||||
|
||||
@@ -436,10 +436,10 @@ later to resolve a target.
|
||||
- `additional_arguments` (array of string, optional): Extra command-line arguments passed to the launched process. Passed directly as argv entries — no shell expansion. Example: ["--user-data-dir=/tmp/cua-session", "--no-first-run"] for an isolated Chrome session.
|
||||
- `bundle_id` (string, optional): App bundle identifier, e.g. com.apple.calculator.
|
||||
- `creates_new_application_instance` (boolean, optional): Force a brand-new process even if the app is already running. Useful for isolated browser sessions: pass creates_new_application_instance=true together with additional_arguments=["--user-data-dir=/tmp/session-a", "--no-first-run", "--no-default-browser-check"] to launch a sandboxed Chrome that cannot see the user's real profile, cookies, or extensions. Each session gets its own pid and window identity and can be controlled independently.
|
||||
- `electron_debugging_port` (integer, optional): Launch an Electron app with --remote-debugging-port=<N> so the `page` tool gets full renderer/DOM access. Use 9222 unless running multiple Electron apps. Ignored for non-Electron apps.
|
||||
- `electron_debugging_port` (integer, optional): Launch an Electron app with --remote-debugging-port=<N> so the `page` tool gets full renderer/DOM access. Use 9222 unless running multiple Electron apps. Ignored for non-Electron apps.
|
||||
- `name` (string, optional): App display name. Used only when bundle_id is absent.
|
||||
- `urls` (array of string, optional): Optional file:// or http(s):// URLs (or plain paths with ~ expansion) to hand to the launched app via application(_:open:). For Finder, pass a folder URL or path to open a backgrounded Finder window rooted at that folder — no activation. Apps that don't implement application(_:open:) launch normally and ignore these.
|
||||
- `webkit_inspector_port` (integer, optional): Launch a Tauri/WKWebView app with WEBKIT_INSPECTOR_SERVER=127.0.0.1:<N> so the `page` tool can reach its WebKit inspector. Use 9226 (reserved WebKit range: 9226–9228, distinct from Electron's 9222–9225). Requires developerExtrasEnabled=true in the WKWebView config (default in Tauri debug builds).
|
||||
- `webkit_inspector_port` (integer, optional): Launch a Tauri/WKWebView app with WEBKIT_INSPECTOR_SERVER=127.0.0.1:<N> so the `page` tool can reach its WebKit inspector. Use 9226 (reserved WebKit range: 9226–9228, distinct from Electron's 9222–9225). Requires developerExtrasEnabled=true in the WKWebView config (default in Tauri debug builds).
|
||||
|
||||
### list_apps
|
||||
|
||||
@@ -457,7 +457,7 @@ apps come from scanning /Applications, /Applications/Utilities,
|
||||
Use this for "is X installed?" as well as "is X running?". For
|
||||
per-window state — on-screen, on-current-Space, minimized,
|
||||
window titles — call list_windows instead. For just opening an
|
||||
app — running or not — call launch_app({bundle_id: ...}) directly;
|
||||
app — running or not — call launch_app({bundle_id: ...}) directly;
|
||||
list_apps is not a prerequisite.
|
||||
|
||||
**Arguments:** none.
|
||||
@@ -696,10 +696,9 @@ propagate modifier keys).
|
||||
### screenshot
|
||||
|
||||
Capture a screenshot using ScreenCaptureKit. Returns base64-encoded
|
||||
image data in the requested format (default png).
|
||||
image data for a single window in the requested format (default png).
|
||||
|
||||
Without `window_id`, captures the full main display. With `window_id`,
|
||||
captures just that window (get the id from `list_windows`).
|
||||
`window_id` is required. Get window ids from `list_windows`.
|
||||
|
||||
Requires the Screen Recording TCC grant — call `check_permissions`
|
||||
first if unsure.
|
||||
@@ -708,7 +707,11 @@ first if unsure.
|
||||
|
||||
- `format` (string, optional): Image format. Default: png.
|
||||
- `quality` (integer, optional): JPEG quality 1-95; ignored for png.
|
||||
- `window_id` (integer, optional): Optional CGWindowID / kCGWindowNumber to capture just that window.
|
||||
- `window_id` (integer, required): Required CGWindowID / kCGWindowNumber to capture.
|
||||
|
||||
```json
|
||||
{"window_id":10725}
|
||||
```
|
||||
|
||||
### scroll
|
||||
|
||||
@@ -843,13 +846,13 @@ optional; omitted fields keep their current value.
|
||||
string) to revert to the procedural arrow.
|
||||
|
||||
Example — brand-colored arrow:
|
||||
{"gradient_colors": ["#A855F7", "#6366F1"], "bloom_color": "#A855F7"}
|
||||
{"gradient_colors": ["#A855F7", "#6366F1"], "bloom_color": "#A855F7"}
|
||||
|
||||
Example — custom PNG cursor:
|
||||
{"image_path": "~/cursors/my-cursor.png"}
|
||||
{"image_path": "~/cursors/my-cursor.png"}
|
||||
|
||||
Example — revert to default:
|
||||
{"gradient_colors": [], "bloom_color": "", "image_path": ""}
|
||||
{"gradient_colors": [], "bloom_color": "", "image_path": ""}
|
||||
|
||||
**Arguments:**
|
||||
|
||||
@@ -931,7 +934,7 @@ Set a value on a UI element. Two modes depending on element role:
|
||||
- **AXPopUpButton / select dropdown**: finds the child option whose
|
||||
title or value matches `value` (case-insensitive) and AXPresses it
|
||||
directly — the native macOS popup menu is never opened, so focus
|
||||
is never stolen. Use this for HTML <select> elements in Safari or
|
||||
is never stolen. Use this for HTML <select> elements in Safari or
|
||||
any native NSPopUpButton. Pass the option's display label as `value`
|
||||
(e.g. "Blue", not "blue").
|
||||
|
||||
|
||||
@@ -143,4 +143,4 @@ async with Sandbox.ephemeral(Image.linux(), local=True) as sb: ...
|
||||
async with Sandbox.ephemeral(Image.macos(), local=True) as sb: ...
|
||||
```
|
||||
|
||||
See [Self-Hosted Sandboxes](/cua/guide/get-started/self-hosted-sandboxes) for local setup instructions.
|
||||
See [Set Up a Sandbox](/cua/guide/get-started/set-up-sandbox) for local setup instructions.
|
||||
|
||||
@@ -3,3 +3,21 @@
|
||||
Background computer-use driver for any agents. Speaks MCP over stdio; drives native macOS apps without stealing focus.
|
||||
|
||||
**[Documentation](https://cua.ai/docs/cua-driver)** - Installation, guides, and API reference.
|
||||
|
||||
## Claude Code computer-use compatibility
|
||||
|
||||
Standard Claude Code MCP registration:
|
||||
|
||||
```bash
|
||||
claude mcp add --transport stdio cua-driver -- cua-driver mcp
|
||||
```
|
||||
|
||||
If you want Claude Code's vision/computer-use-style flow to ground on CuaDriver window screenshots, register the compatibility mode:
|
||||
|
||||
```bash
|
||||
claude mcp add --transport stdio cua-computer-use -- cua-driver mcp --claude-code-computer-use-compat
|
||||
```
|
||||
|
||||
This keeps CuaDriver's normal MCP tools and changes only `screenshot`, which requires `pid` and `window_id` and captures that window only.
|
||||
|
||||
Use MCP for this Claude Code vision/computer-use-style path. CLI screenshots still work as CuaDriver calls, but they do not expose the `mcp__cua-computer-use__screenshot` tool name that Claude Code appears to use as the image-grounding cue.
|
||||
|
||||
@@ -85,6 +85,18 @@ also invoke it explicitly:
|
||||
/cua-driver
|
||||
```
|
||||
|
||||
## Claude Code MCP compatibility mode
|
||||
|
||||
For normal skill-driven use, prefer the CLI or the standard MCP server. If you want Claude Code's vision/computer-use-style flow to ground on CuaDriver screenshots, register the compatibility server:
|
||||
|
||||
```bash
|
||||
claude mcp add --transport stdio cua-computer-use -- cua-driver mcp --claude-code-computer-use-compat
|
||||
```
|
||||
|
||||
This mode exposes the normal CuaDriver tools and changes only `screenshot`. The compatibility screenshot requires `pid` and `window_id`, captures that window only, and establishes a window-local pixel coordinate frame. It does not call Anthropic APIs or expose Anthropic's native computer-use API tool.
|
||||
|
||||
Use MCP for this Claude Code vision/computer-use-style path. CLI screenshots still work as CuaDriver calls, but they do not expose the `mcp__cua-computer-use__screenshot` tool name that Claude Code appears to use as the image-grounding cue.
|
||||
|
||||
## Files
|
||||
|
||||
- `SKILL.md` — the main skill body (~500 lines). Loaded on first
|
||||
|
||||
@@ -140,6 +140,18 @@ Every reference to `click(...)`, `get_window_state(...)` etc. in this
|
||||
skill means `cua-driver click '{...}'` — translate to MCP form only
|
||||
when MCP is requested.
|
||||
|
||||
### Claude Code computer-use compatibility mode
|
||||
|
||||
For normal Claude Code use, keep the default CLI or `cua-driver` MCP server path above. If the user explicitly wants Claude Code's vision/computer-use-style flow, they can register:
|
||||
|
||||
```bash
|
||||
claude mcp add --transport stdio cua-computer-use -- cua-driver mcp --claude-code-computer-use-compat
|
||||
```
|
||||
|
||||
Observation: Claude Code vision flows appear to treat a screenshot MCP tool as the image-grounding anchor. This compatibility mode keeps the normal CuaDriver tools and changes only `screenshot`. The compatibility `screenshot` requires `pid` and `window_id`, captures only that target window, and returns the window-local pixel coordinate frame. Start with `launch_app` or `list_windows`, then call `screenshot({pid, window_id})`; do not assume desktop coordinates or a full-screen capture.
|
||||
|
||||
Use MCP for this Claude Code vision/computer-use-style path. Do not shell out to `cua-driver screenshot` as a substitute: CLI screenshots still work as CuaDriver calls, but they do not expose the `mcp__cua-computer-use__screenshot` tool name that Claude Code appears to use as the image-grounding cue.
|
||||
|
||||
Intent → tool mapping. If you find yourself reaching for the right
|
||||
column, something has gone wrong — re-read "The no-foreground
|
||||
contract" above:
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import AppKit
|
||||
import ArgumentParser
|
||||
import CuaDriverServer
|
||||
import Foundation
|
||||
@@ -171,6 +172,7 @@ struct CallCommand: AsyncParsableCommand {
|
||||
|
||||
let result: CallTool.Result
|
||||
do {
|
||||
await bootstrapAppKitForInProcessCallIfNeeded(toolName: toolName)
|
||||
// Route through `registry.call(...)` so the recording hook
|
||||
// (and any future cross-cutting wrapper) fires consistently
|
||||
// with the MCP and daemon paths. The in-process one-shot
|
||||
@@ -264,6 +266,35 @@ struct CallCommand: AsyncParsableCommand {
|
||||
}
|
||||
}
|
||||
|
||||
private func bootstrapAppKitForInProcessCallIfNeeded(toolName: String) async {
|
||||
let appKitBackedTools: Set<String> = [
|
||||
"check_permissions",
|
||||
"click",
|
||||
"double_click",
|
||||
"drag",
|
||||
"get_accessibility_tree",
|
||||
"get_cursor_position",
|
||||
"get_window_state",
|
||||
"hotkey",
|
||||
"launch_app",
|
||||
"list_apps",
|
||||
"list_windows",
|
||||
"move_cursor",
|
||||
"press_key",
|
||||
"right_click",
|
||||
"screenshot",
|
||||
"scroll",
|
||||
"set_value",
|
||||
"type_text",
|
||||
"type_text_chars",
|
||||
"zoom",
|
||||
]
|
||||
guard appKitBackedTools.contains(toolName) else { return }
|
||||
await MainActor.run {
|
||||
_ = NSApplication.shared.setActivationPolicy(.accessory)
|
||||
}
|
||||
}
|
||||
|
||||
struct ListToolsCommand: AsyncParsableCommand {
|
||||
static let configuration = CommandConfiguration(
|
||||
commandName: "list-tools",
|
||||
|
||||
@@ -18,6 +18,8 @@ import MCP
|
||||
///
|
||||
/// Keys are dotted snake_case paths:
|
||||
/// - `schema_version`
|
||||
/// - `capture_mode`
|
||||
/// - `max_image_dimension`
|
||||
/// - `agent_cursor.enabled`
|
||||
/// - `agent_cursor.motion.{start_handle,end_handle,arc_size,arc_flow,spring}`
|
||||
struct ConfigCommand: AsyncParsableCommand {
|
||||
@@ -31,6 +33,8 @@ struct ConfigCommand: AsyncParsableCommand {
|
||||
|
||||
Examples:
|
||||
cua-driver config # print full config
|
||||
cua-driver config get capture_mode
|
||||
cua-driver config set capture_mode vision
|
||||
cua-driver config get agent_cursor.enabled
|
||||
cua-driver config set agent_cursor.enabled false
|
||||
cua-driver config set agent_cursor.motion.arc_size 0.4
|
||||
@@ -147,6 +151,10 @@ struct ConfigGetCommand: AsyncParsableCommand {
|
||||
switch key {
|
||||
case "schema_version":
|
||||
print(config.schemaVersion)
|
||||
case "capture_mode":
|
||||
print(config.captureMode.rawValue)
|
||||
case "max_image_dimension":
|
||||
print(config.maxImageDimension)
|
||||
case "agent_cursor.enabled":
|
||||
print(config.agentCursor.enabled)
|
||||
case "agent_cursor.motion.start_handle":
|
||||
|
||||
@@ -43,33 +43,62 @@ struct MCPConfigCommand: ParsableCommand {
|
||||
help: "Client to print the install command for: claude | codex | cursor | openclaw | opencode | hermes | pi. Omit for the generic JSON snippet.")
|
||||
var client: String?
|
||||
|
||||
@Flag(
|
||||
name: .long,
|
||||
help: "Print config for Claude Code's window-scoped screenshot compatibility mode registered as `cua-computer-use`."
|
||||
)
|
||||
var claudeCodeComputerUseCompat: Bool = false
|
||||
|
||||
func run() throws {
|
||||
let binary = resolvedBinaryPath()
|
||||
let shellBinary = shellEscape(binary)
|
||||
// Observed Claude Code behavior: the exact config key "computer-use"
|
||||
// is reserved, so external stdio registrations use a distinct key.
|
||||
let serverName = claudeCodeComputerUseCompat ? "cua-computer-use" : "cua-driver"
|
||||
let args = claudeCodeComputerUseCompat
|
||||
? "[\"mcp\", \"--claude-code-computer-use-compat\"]"
|
||||
: "[\"mcp\"]"
|
||||
let commandArgs = claudeCodeComputerUseCompat
|
||||
? "mcp --claude-code-computer-use-compat"
|
||||
: "mcp"
|
||||
switch client?.lowercased() {
|
||||
case nil, "":
|
||||
print(genericMcpServersSnippet(binary: binary, includeType: false))
|
||||
print(genericMcpServersSnippet(
|
||||
serverName: serverName,
|
||||
binary: binary,
|
||||
args: args,
|
||||
includeType: false
|
||||
))
|
||||
case "claude":
|
||||
print("claude mcp add --transport stdio cua-driver -- \(binary) mcp")
|
||||
print("claude mcp add --transport stdio \(serverName) -- \(shellBinary) \(commandArgs)")
|
||||
case "codex":
|
||||
print("codex mcp add cua-driver -- \(binary) mcp")
|
||||
print("codex mcp add \(serverName) -- \(shellBinary) \(commandArgs)")
|
||||
case "cursor":
|
||||
// Cursor has no CLI — emit JSON the user pastes into
|
||||
// ~/.cursor/mcp.json (global) or .cursor/mcp.json (project).
|
||||
print(genericMcpServersSnippet(binary: binary, includeType: true))
|
||||
print(genericMcpServersSnippet(
|
||||
serverName: serverName,
|
||||
binary: binary,
|
||||
args: args,
|
||||
includeType: true
|
||||
))
|
||||
case "openclaw":
|
||||
// OpenClaw has a CLI registry — set with a JSON arg.
|
||||
print("openclaw mcp set cua-driver '{\"command\":\"\(binary)\",\"args\":[\"mcp\"]}'")
|
||||
print("openclaw mcp set \(serverName) '{\"command\":\"\(binary)\",\"args\":\(args)}'")
|
||||
case "opencode":
|
||||
// OpenCode (sst/opencode) uses opencode.json with type:"local"
|
||||
// and command as a single merged array.
|
||||
let commandArray = claudeCodeComputerUseCompat
|
||||
? "[\"\(binary)\", \"mcp\", \"--claude-code-computer-use-compat\"]"
|
||||
: "[\"\(binary)\", \"mcp\"]"
|
||||
let snippet = """
|
||||
// paste under "mcp" in opencode.json (or opencode.jsonc):
|
||||
{
|
||||
"$schema": "https://opencode.ai/config.json",
|
||||
"mcp": {
|
||||
"cua-driver": {
|
||||
"\(serverName)": {
|
||||
"type": "local",
|
||||
"command": ["\(binary)", "mcp"],
|
||||
"command": \(commandArray),
|
||||
"enabled": true
|
||||
}
|
||||
}
|
||||
@@ -83,9 +112,9 @@ struct MCPConfigCommand: ParsableCommand {
|
||||
# paste under mcp_servers in ~/.hermes/config.yaml,
|
||||
# then run /reload-mcp inside Hermes:
|
||||
mcp_servers:
|
||||
cua-driver:
|
||||
\(serverName):
|
||||
command: "\(binary)"
|
||||
args: ["mcp"]
|
||||
args: \(args)
|
||||
"""
|
||||
print(snippet)
|
||||
case "pi":
|
||||
@@ -117,14 +146,19 @@ struct MCPConfigCommand: ParsableCommand {
|
||||
}
|
||||
}
|
||||
|
||||
private func genericMcpServersSnippet(binary: String, includeType: Bool) -> String {
|
||||
private func genericMcpServersSnippet(
|
||||
serverName: String,
|
||||
binary: String,
|
||||
args: String,
|
||||
includeType: Bool
|
||||
) -> String {
|
||||
let typeLine = includeType ? ",\n \"type\": \"stdio\"" : ""
|
||||
return """
|
||||
{
|
||||
"mcpServers": {
|
||||
"cua-driver": {
|
||||
"\(serverName)": {
|
||||
"command": "\(binary)",
|
||||
"args": ["mcp"]\(typeLine)
|
||||
"args": \(args)\(typeLine)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -140,6 +174,10 @@ struct MCPConfigCommand: ParsableCommand {
|
||||
}
|
||||
return CommandLine.arguments.first ?? "cua-driver"
|
||||
}
|
||||
|
||||
private func shellEscape(_ value: String) -> String {
|
||||
"'\(value.replacingOccurrences(of: "'", with: "'\"'\"'"))'"
|
||||
}
|
||||
}
|
||||
|
||||
/// Top-level entry point. Before handing to ArgumentParser, rewrite
|
||||
@@ -307,6 +345,17 @@ struct MCPCommand: ParsableCommand {
|
||||
abstract: "Run the stdio MCP server."
|
||||
)
|
||||
|
||||
@Flag(
|
||||
name: .long,
|
||||
help: """
|
||||
Expose normal CuaDriver tools, replacing only `screenshot` with a \
|
||||
Claude Code-friendly window-only screenshot that establishes the \
|
||||
vision coordinate frame. This does not use Anthropic's native \
|
||||
computer_2025 API tool.
|
||||
"""
|
||||
)
|
||||
var claudeCodeComputerUseCompat: Bool = false
|
||||
|
||||
func run() throws {
|
||||
// MCP stdio runs for the lifetime of the host process, so we
|
||||
// bootstrap AppKit here — the agent cursor overlay (disabled
|
||||
@@ -342,7 +391,12 @@ struct MCPCommand: ParsableCommand {
|
||||
AgentCursor.shared.apply(config: config.agentCursor)
|
||||
}
|
||||
|
||||
let server = await CuaDriverMCPServer.make()
|
||||
let server = await CuaDriverMCPServer.make(
|
||||
serverName: claudeCodeComputerUseCompat ? "computer-use" : "cua-driver",
|
||||
registry: claudeCodeComputerUseCompat
|
||||
? .claudeCodeComputerUseCompat
|
||||
: .default
|
||||
)
|
||||
let transport = StdioTransport()
|
||||
try await server.start(transport: transport)
|
||||
await server.waitUntilCompleted()
|
||||
|
||||
@@ -94,7 +94,9 @@ enum CLIDocExtractor {
|
||||
discussion: nil,
|
||||
arguments: [],
|
||||
options: [],
|
||||
flags: [],
|
||||
flags: [
|
||||
FlagDoc(name: "claude-code-computer-use-compat", shortName: nil, help: "Expose normal CuaDriver tools, replacing only `screenshot` with a Claude Code-friendly window-only screenshot that establishes the vision coordinate frame.", defaultValue: false),
|
||||
],
|
||||
subcommands: []
|
||||
)
|
||||
}
|
||||
@@ -412,7 +414,9 @@ enum CLIDocExtractor {
|
||||
options: [
|
||||
OptionDoc(name: "client", shortName: nil, help: "Client to print the install command for: claude | codex | cursor | openclaw | opencode | hermes | pi. Omit for the generic JSON snippet.", type: "String", defaultValue: nil, isOptional: true),
|
||||
],
|
||||
flags: [],
|
||||
flags: [
|
||||
FlagDoc(name: "claude-code-computer-use-compat", shortName: nil, help: "Print config for Claude Code's window-scoped screenshot compatibility mode registered as `cua-computer-use`.", defaultValue: false),
|
||||
],
|
||||
subcommands: []
|
||||
)
|
||||
}
|
||||
|
||||
@@ -98,15 +98,21 @@ public enum MouseInput {
|
||||
/// screen points (top-left origin) and deliver them to `pid`.
|
||||
/// `modifiers` accepts the same names as `KeyboardInput`
|
||||
/// (`cmd` / `command`, `shift`, `option` / `alt`, `ctrl` /
|
||||
/// `control`, `fn`); unknown names are ignored. Events are
|
||||
/// posted via auth-signed `SLEventPostToPid` AND the public HID
|
||||
/// tap — see the file-level doc for the rationale.
|
||||
/// `control`, `fn`); unknown names are ignored.
|
||||
///
|
||||
/// When `useFrontmostHIDPath` is true (the default) and `pid` is
|
||||
/// frontmost, events are posted through the public HID tap, which
|
||||
/// can move the global cursor and is required for some viewport
|
||||
/// apps. When false, the function skips that path and uses only
|
||||
/// pid-routed delivery, preserving the system cursor for callers
|
||||
/// that rely on background-style dispatch.
|
||||
public static func click(
|
||||
at point: CGPoint,
|
||||
toPid pid: pid_t,
|
||||
button: Button,
|
||||
count: Int = 1,
|
||||
modifiers: [String] = []
|
||||
modifiers: [String] = [],
|
||||
useFrontmostHIDPath: Bool = true
|
||||
) throws {
|
||||
// When the target is frontmost, route via the public HID tap
|
||||
// (`CGEventPost(tap: .cghidEventTap)`) with a preceding
|
||||
@@ -123,7 +129,7 @@ public enum MouseInput {
|
||||
// (Chrome/Slack/etc); they just don't work on viewports.
|
||||
let targetIsFrontmost =
|
||||
NSRunningApplication(processIdentifier: pid)?.isActive ?? false
|
||||
if targetIsFrontmost {
|
||||
if useFrontmostHIDPath && targetIsFrontmost {
|
||||
try clickFrontmostViaHIDTap(
|
||||
at: point, button: button, count: count, modifiers: modifiers)
|
||||
return
|
||||
|
||||
@@ -0,0 +1,161 @@
|
||||
import AppKit
|
||||
import CuaDriverCore
|
||||
import Foundation
|
||||
import MCP
|
||||
|
||||
private struct CompatWindowContext: Sendable {
|
||||
let window: WindowInfo
|
||||
let scaleFactor: Double
|
||||
}
|
||||
|
||||
private actor ClaudeCodeComputerUseCompatSession {
|
||||
static let shared = ClaudeCodeComputerUseCompatSession()
|
||||
|
||||
private var activeWindow: CompatWindowContext?
|
||||
|
||||
func setActiveWindow(_ context: CompatWindowContext?) {
|
||||
activeWindow = context
|
||||
}
|
||||
|
||||
func currentActiveWindow() -> CompatWindowContext? {
|
||||
activeWindow
|
||||
}
|
||||
}
|
||||
|
||||
public enum ClaudeCodeComputerUseCompatTools {
|
||||
private static let capture = WindowCapture()
|
||||
|
||||
public static let screenshot = ToolHandler(
|
||||
tool: Tool(
|
||||
name: "screenshot",
|
||||
description: """
|
||||
Capture a target window and return a JPEG image. Coordinates accepted
|
||||
by CuaDriver's pixel tools are pixels in this window screenshot's
|
||||
coordinate space.
|
||||
|
||||
This is the compatibility anchor for Claude Code vision flows:
|
||||
CuaDriver remains window-scoped, and all other tools are the
|
||||
normal CuaDriver tools.
|
||||
""",
|
||||
inputSchema: [
|
||||
"type": "object",
|
||||
"required": ["pid", "window_id"],
|
||||
"properties": [
|
||||
"pid": [
|
||||
"type": "integer",
|
||||
"description": "Target process ID from `list_windows` or `launch_app`.",
|
||||
],
|
||||
"window_id": [
|
||||
"type": "integer",
|
||||
"description": "Target CGWindowID from `list_windows` or `launch_app`.",
|
||||
],
|
||||
],
|
||||
"additionalProperties": false,
|
||||
],
|
||||
annotations: .init(
|
||||
readOnlyHint: true,
|
||||
destructiveHint: false,
|
||||
idempotentHint: false,
|
||||
openWorldHint: false
|
||||
)
|
||||
),
|
||||
invoke: { arguments in
|
||||
do {
|
||||
guard let rawPid = arguments?["pid"]?.intValue else {
|
||||
return errorResult("Missing required integer field `pid`.")
|
||||
}
|
||||
guard let pid = Int32(exactly: rawPid) else {
|
||||
return errorResult("pid \(rawPid) is outside the supported Int32 range.")
|
||||
}
|
||||
guard let rawWindowID = arguments?["window_id"]?.intValue else {
|
||||
return errorResult("Missing required integer field `window_id`.")
|
||||
}
|
||||
guard let windowID = UInt32(exactly: rawWindowID) else {
|
||||
return errorResult(
|
||||
"window_id \(rawWindowID) is outside the supported UInt32 range.")
|
||||
}
|
||||
guard let context = compatWindowContext(
|
||||
forPid: pid,
|
||||
windowID: windowID
|
||||
) else {
|
||||
return errorResult(
|
||||
"No visible layer-0 window \(rawWindowID) found for pid \(rawPid). Use `list_windows` to choose an on-screen target window."
|
||||
)
|
||||
}
|
||||
let shot = try await capture.captureWindow(
|
||||
windowID: windowID,
|
||||
format: .jpeg,
|
||||
quality: 85
|
||||
)
|
||||
await ClaudeCodeComputerUseCompatSession.shared.setActiveWindow(
|
||||
CompatWindowContext(
|
||||
window: context.window,
|
||||
scaleFactor: shot.scaleFactor
|
||||
)
|
||||
)
|
||||
let base64 = shot.imageData.base64EncodedString()
|
||||
return CallTool.Result(content: [
|
||||
.image(data: base64, mimeType: "image/jpeg", annotations: nil, _meta: nil),
|
||||
.text(
|
||||
text: "Captured window screenshot \(shot.width)x\(shot.height) for \(context.window.owner) [pid: \(context.window.pid), window_id: \(context.window.id)]. Use CuaDriver pixel tools with this window-local coordinate space.",
|
||||
annotations: nil,
|
||||
_meta: nil
|
||||
),
|
||||
])
|
||||
} catch CaptureError.permissionDenied {
|
||||
return errorResult(
|
||||
"Screen Recording permission is not granted for CuaDriver.")
|
||||
} catch {
|
||||
return errorResult("Screenshot failed: \(error)")
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
public static let all: [ToolHandler] = [
|
||||
screenshot,
|
||||
]
|
||||
}
|
||||
|
||||
extension ToolRegistry {
|
||||
public static let claudeCodeComputerUseCompat: ToolRegistry = {
|
||||
let shimNames = Set(ClaudeCodeComputerUseCompatTools.all.map(\.tool.name))
|
||||
let nativeHandlers = ToolRegistry.default.handlers.values
|
||||
.filter { !shimNames.contains($0.tool.name) }
|
||||
return ToolRegistry(
|
||||
handlers: Array(nativeHandlers) + ClaudeCodeComputerUseCompatTools.all
|
||||
)
|
||||
}()
|
||||
}
|
||||
|
||||
private func compatWindowContext(
|
||||
forPid pid: Int32,
|
||||
windowID: UInt32
|
||||
) -> CompatWindowContext? {
|
||||
guard let window = WindowEnumerator.visibleWindows()
|
||||
.first(where: {
|
||||
$0.pid == pid
|
||||
&& UInt32(exactly: $0.id) == windowID
|
||||
&& $0.layer == 0
|
||||
&& $0.isOnScreen
|
||||
&& $0.bounds.width > 1
|
||||
&& $0.bounds.height > 1
|
||||
})
|
||||
else {
|
||||
return nil
|
||||
}
|
||||
return CompatWindowContext(
|
||||
window: window,
|
||||
scaleFactor: defaultScaleFactor()
|
||||
)
|
||||
}
|
||||
|
||||
private func defaultScaleFactor() -> Double {
|
||||
ScreenInfo.mainScreenSize()?.scaleFactor ?? 1.0
|
||||
}
|
||||
|
||||
private func errorResult(_ text: String) -> CallTool.Result {
|
||||
CallTool.Result(
|
||||
content: [.text(text: text, annotations: nil, _meta: nil)],
|
||||
isError: true
|
||||
)
|
||||
}
|
||||
@@ -7,11 +7,12 @@ public enum CuaDriverMCPServer {
|
||||
/// The caller is responsible for calling ``Server/start(transport:initializeHook:)``
|
||||
/// and ``Server/waitUntilCompleted()``.
|
||||
public static func make(
|
||||
serverName: String = "cua-driver",
|
||||
version: String = CuaDriverCore.version,
|
||||
registry: ToolRegistry = .default
|
||||
) async -> Server {
|
||||
let server = Server(
|
||||
name: "cua-driver",
|
||||
name: serverName,
|
||||
version: version,
|
||||
capabilities: Server.Capabilities(tools: .init(listChanged: false))
|
||||
)
|
||||
|
||||
@@ -11,16 +11,16 @@ public enum ScreenshotTool {
|
||||
name: "screenshot",
|
||||
description: """
|
||||
Capture a screenshot using ScreenCaptureKit. Returns base64-encoded
|
||||
image data in the requested format (default png).
|
||||
image data for a single window in the requested format (default png).
|
||||
|
||||
Without `window_id`, captures the full main display. With `window_id`,
|
||||
captures just that window (get the id from `list_windows`).
|
||||
`window_id` is required. Get window ids from `list_windows`.
|
||||
|
||||
Requires the Screen Recording TCC grant — call `check_permissions`
|
||||
first if unsure.
|
||||
""",
|
||||
inputSchema: [
|
||||
"type": "object",
|
||||
"required": ["window_id"],
|
||||
"properties": [
|
||||
"format": [
|
||||
"type": "string",
|
||||
@@ -36,7 +36,7 @@ public enum ScreenshotTool {
|
||||
"window_id": [
|
||||
"type": "integer",
|
||||
"description":
|
||||
"Optional CGWindowID / kCGWindowNumber to capture just that window.",
|
||||
"Required CGWindowID / kCGWindowNumber to capture.",
|
||||
],
|
||||
],
|
||||
"additionalProperties": false,
|
||||
@@ -52,36 +52,42 @@ public enum ScreenshotTool {
|
||||
let format =
|
||||
ImageFormat(rawValue: arguments?["format"]?.stringValue ?? "png") ?? .png
|
||||
let quality = arguments?["quality"]?.intValue ?? 95
|
||||
let windowID = arguments?["window_id"]?.intValue
|
||||
guard let rawWindowID = arguments?["window_id"]?.intValue else {
|
||||
return CallTool.Result(
|
||||
content: [
|
||||
.text(
|
||||
text: "Missing required `window_id`. Use `list_windows` first, then call `screenshot` for one window.",
|
||||
annotations: nil,
|
||||
_meta: nil
|
||||
)
|
||||
],
|
||||
isError: true
|
||||
)
|
||||
}
|
||||
guard let windowID = UInt32(exactly: rawWindowID) else {
|
||||
return CallTool.Result(
|
||||
content: [
|
||||
.text(
|
||||
text: "Invalid `window_id` \(rawWindowID). Use `list_windows` first, then pass a valid UInt32 window id.",
|
||||
annotations: nil,
|
||||
_meta: nil
|
||||
)
|
||||
],
|
||||
isError: true
|
||||
)
|
||||
}
|
||||
|
||||
do {
|
||||
let shot: Screenshot
|
||||
if let windowID {
|
||||
shot = try await capture.captureWindow(
|
||||
windowID: UInt32(windowID),
|
||||
format: format,
|
||||
quality: quality
|
||||
)
|
||||
} else {
|
||||
shot = try await capture.captureMainDisplay(
|
||||
format: format,
|
||||
quality: quality
|
||||
)
|
||||
}
|
||||
let shot = try await capture.captureWindow(
|
||||
windowID: windowID,
|
||||
format: format,
|
||||
quality: quality
|
||||
)
|
||||
let base64 = shot.imageData.base64EncodedString()
|
||||
let mime = format == .png ? "image/png" : "image/jpeg"
|
||||
let visibleWindows = WindowEnumerator.visibleWindows().filter { $0.layer == 0 }
|
||||
var summaryLines: [String] = [
|
||||
"✅ Screenshot — \(shot.width)x\(shot.height) \(format.rawValue)"
|
||||
"✅ Window screenshot — \(shot.width)x\(shot.height) \(format.rawValue) [window_id: \(rawWindowID)]"
|
||||
]
|
||||
if !visibleWindows.isEmpty {
|
||||
summaryLines.append("\nOn-screen windows:")
|
||||
for w in visibleWindows {
|
||||
let title = w.name.isEmpty ? "(no title)" : "\"\(w.name)\""
|
||||
summaryLines.append("- \(w.owner) (pid \(w.pid)) \(title) [window_id: \(w.id)]")
|
||||
}
|
||||
summaryLines.append("→ Call get_window_state(pid, window_id) to inspect a window's UI.")
|
||||
}
|
||||
let summary = summaryLines.joined(separator: "\n")
|
||||
return CallTool.Result(
|
||||
content: [
|
||||
|
||||
@@ -232,6 +232,10 @@ Next steps:
|
||||
2. Verify the CLI: $BIN_LINK --version
|
||||
3. Wire into an MCP client:
|
||||
$BIN_LINK mcp-config | pbcopy
|
||||
Claude Code compatibility:
|
||||
$BIN_LINK mcp-config --client claude --claude-code-computer-use-compat
|
||||
Use MCP for Claude Code vision/computer-use-style flows; CLI screenshots
|
||||
do not expose the mcp__cua-computer-use__screenshot tool name cue.
|
||||
|
||||
Uninstall: $CUA_DRIVER_DIR/scripts/uninstall.sh
|
||||
|
||||
|
||||
@@ -300,6 +300,14 @@ Next steps:
|
||||
• Claude Code:
|
||||
claude mcp add --transport stdio cua-driver -- $BIN_LINK mcp
|
||||
|
||||
Claude Code computer-use compatibility mode:
|
||||
claude mcp add --transport stdio cua-computer-use -- $BIN_LINK mcp --claude-code-computer-use-compat
|
||||
Use this when you want Claude Code's vision/computer-use-style flow
|
||||
to ground on CuaDriver window screenshots. It keeps the normal
|
||||
CuaDriver tools and changes only the screenshot tool.
|
||||
Use MCP for this path; CLI screenshots do not expose the
|
||||
mcp__cua-computer-use__screenshot tool name cue.
|
||||
|
||||
• Codex (OpenAI):
|
||||
codex mcp add cua-driver -- $BIN_LINK mcp
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
# - /Applications/CuaDriver.app bundle
|
||||
# - ~/.cua-driver/ (telemetry id + install marker)
|
||||
# - ~/Library/Application Support/Cua Driver/ (config.json)
|
||||
# - ~/Library/Caches/cua-driver/ (daemon/cache state)
|
||||
#
|
||||
# Does NOT revoke TCC grants (Accessibility + Screen Recording).
|
||||
#
|
||||
@@ -17,6 +18,7 @@ SYSTEM_BIN_LINK="/usr/local/bin/cua-driver"
|
||||
APP_BUNDLE="/Applications/CuaDriver.app"
|
||||
USER_DATA="$HOME/.cua-driver"
|
||||
CONFIG_DIR="$HOME/Library/Application Support/Cua Driver"
|
||||
CACHE_DIR="$HOME/Library/Caches/cua-driver"
|
||||
# Legacy — remove if present from older installs.
|
||||
LEGACY_UPDATE_SCRIPT="/usr/local/bin/cua-driver-update"
|
||||
LEGACY_UPDATER_PLIST="$HOME/Library/LaunchAgents/com.trycua.cua_driver_updater.plist"
|
||||
@@ -74,6 +76,14 @@ else
|
||||
log "no config at $CONFIG_DIR (skipping)"
|
||||
fi
|
||||
|
||||
# Cache / daemon state.
|
||||
if [[ -d "$CACHE_DIR" ]]; then
|
||||
rm -rf "$CACHE_DIR"
|
||||
log "removed $CACHE_DIR"
|
||||
else
|
||||
log "no cache at $CACHE_DIR (skipping)"
|
||||
fi
|
||||
|
||||
# Agent skill symlinks (Claude Code + Codex). Only remove when the link
|
||||
# is ours — a dev user pointing the symlink at a working copy of the repo
|
||||
# keeps theirs untouched.
|
||||
@@ -91,6 +101,124 @@ for SKILL_LINK in \
|
||||
fi
|
||||
done
|
||||
|
||||
# Claude Code MCP registrations. `claude mcp remove` only removes from
|
||||
# the current project / user scopes, while ~/.claude.json can also contain
|
||||
# stale project entries for other directories. Scrub only registrations
|
||||
# that are explicitly named cua-driver or whose command points at a
|
||||
# cua-driver binary, so unrelated servers named "computer-use" are left
|
||||
# alone.
|
||||
CLAUDE_JSON="$HOME/.claude.json"
|
||||
if [[ -f "$CLAUDE_JSON" ]] && command -v python3 >/dev/null 2>&1; then
|
||||
PY_OUTPUT="$(
|
||||
CLAUDE_JSON="$CLAUDE_JSON" python3 <<'PY'
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
|
||||
path = os.environ["CLAUDE_JSON"]
|
||||
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
except Exception as exc:
|
||||
print(f"could not read Claude config {path}: {exc}", file=sys.stderr)
|
||||
raise SystemExit(0)
|
||||
|
||||
removed = []
|
||||
|
||||
def text_parts(value):
|
||||
if isinstance(value, str):
|
||||
return [value]
|
||||
if isinstance(value, list):
|
||||
return [item for item in value if isinstance(item, str)]
|
||||
return []
|
||||
|
||||
def invokes_cua_driver(server):
|
||||
if not isinstance(server, dict):
|
||||
return False
|
||||
parts = []
|
||||
parts.extend(text_parts(server.get("command")))
|
||||
parts.extend(text_parts(server.get("args")))
|
||||
joined = " ".join(parts)
|
||||
return "cua-driver" in joined or "CuaDriver.app" in joined
|
||||
|
||||
def should_remove(name, server):
|
||||
return name in {"cua-driver", "cua-computer-use"} or invokes_cua_driver(server)
|
||||
|
||||
def scrub_servers(servers, scope):
|
||||
if not isinstance(servers, dict):
|
||||
return
|
||||
for name in list(servers.keys()):
|
||||
if should_remove(name, servers[name]):
|
||||
del servers[name]
|
||||
removed.append(f"{scope}:{name}")
|
||||
|
||||
scrub_servers(data.get("mcpServers"), "user")
|
||||
|
||||
projects = data.get("projects")
|
||||
if isinstance(projects, dict):
|
||||
for project in projects.values():
|
||||
if isinstance(project, dict):
|
||||
scrub_servers(project.get("mcpServers"), "project")
|
||||
|
||||
if not removed:
|
||||
raise SystemExit(0)
|
||||
|
||||
backup = f"{path}.bak-cua-driver-uninstall-{int(time.time())}"
|
||||
shutil.copy2(path, backup)
|
||||
|
||||
directory = os.path.dirname(path) or "."
|
||||
fd, tmp_path = tempfile.mkstemp(
|
||||
prefix=".claude.json.",
|
||||
suffix=".tmp",
|
||||
dir=directory,
|
||||
text=True,
|
||||
)
|
||||
try:
|
||||
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
f.write("\n")
|
||||
os.replace(tmp_path, path)
|
||||
except Exception:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
|
||||
print(f"removed Claude MCP registration(s): {', '.join(removed)}")
|
||||
print(f"backed up Claude config to {backup}")
|
||||
PY
|
||||
)"
|
||||
if [[ -n "$PY_OUTPUT" ]]; then
|
||||
while IFS= read -r line; do
|
||||
log "$line"
|
||||
done <<< "$PY_OUTPUT"
|
||||
else
|
||||
log "no Claude MCP registrations for cua-driver found in $CLAUDE_JSON"
|
||||
fi
|
||||
else
|
||||
log "no Claude config cleanup via python3 (missing $CLAUDE_JSON or python3)"
|
||||
fi
|
||||
|
||||
# Best-effort CLI cleanup for the active Claude project. This covers
|
||||
# .mcp.json / current-working-directory scopes when present and is harmless
|
||||
# when the entries were already removed above.
|
||||
if command -v claude >/dev/null 2>&1; then
|
||||
for SERVER in cua-driver cua-computer-use; do
|
||||
for SCOPE in local project user; do
|
||||
if claude mcp remove "$SERVER" -s "$SCOPE" >/dev/null 2>&1; then
|
||||
log "removed Claude MCP server $SERVER from $SCOPE scope"
|
||||
fi
|
||||
done
|
||||
done
|
||||
else
|
||||
log "claude CLI not found (skipping Claude MCP CLI cleanup)"
|
||||
fi
|
||||
|
||||
cat << 'FINALUNMSG'
|
||||
|
||||
cua-driver uninstalled.
|
||||
|
||||
@@ -299,7 +299,7 @@ export function generateCLIReferenceMDX(docs: CLIDocumentation, releasedVersion:
|
||||
lines.push(`{/*
|
||||
AUTO-GENERATED FILE - DO NOT EDIT DIRECTLY
|
||||
Generated by: npx tsx scripts/docs-generators/cua-driver.ts
|
||||
Source: libs/cua-driver/Sources/**/*.swift
|
||||
Source: recursive Swift sources under libs/cua-driver/Sources
|
||||
Version: ${releasedVersion}
|
||||
*/}`);
|
||||
lines.push('');
|
||||
@@ -320,7 +320,7 @@ export function generateCLIReferenceMDX(docs: CLIDocumentation, releasedVersion:
|
||||
lines.push('');
|
||||
|
||||
// Introduction
|
||||
lines.push(`${docs.abstract}`);
|
||||
lines.push(escapeMdxText(docs.abstract));
|
||||
lines.push('');
|
||||
|
||||
// Group commands by category
|
||||
@@ -394,11 +394,11 @@ export function generateCommandDoc(cmd: CommandDoc): string[] {
|
||||
|
||||
lines.push(`### cua-driver ${cmd.name}`);
|
||||
lines.push('');
|
||||
lines.push(cmd.abstract);
|
||||
lines.push(escapeMdxText(cmd.abstract));
|
||||
lines.push('');
|
||||
|
||||
if (cmd.discussion) {
|
||||
lines.push(cmd.discussion);
|
||||
lines.push(escapeMdxText(cmd.discussion));
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
@@ -451,11 +451,11 @@ export function generateCommandDoc(cmd: CommandDoc): string[] {
|
||||
for (const sub of cmd.subcommands) {
|
||||
lines.push(`#### cua-driver ${cmd.name} ${sub.name}`);
|
||||
lines.push('');
|
||||
lines.push(sub.abstract);
|
||||
lines.push(escapeMdxText(sub.abstract));
|
||||
lines.push('');
|
||||
|
||||
if (sub.discussion) {
|
||||
lines.push(sub.discussion);
|
||||
lines.push(escapeMdxText(sub.discussion));
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
@@ -505,11 +505,11 @@ export function generateCommandDoc(cmd: CommandDoc): string[] {
|
||||
for (const nested of sub.subcommands) {
|
||||
lines.push(`##### cua-driver ${cmd.name} ${sub.name} ${nested.name}`);
|
||||
lines.push('');
|
||||
lines.push(nested.abstract);
|
||||
lines.push(escapeMdxText(nested.abstract));
|
||||
lines.push('');
|
||||
|
||||
if (nested.discussion) {
|
||||
lines.push(nested.discussion);
|
||||
lines.push(escapeMdxText(nested.discussion));
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
@@ -577,7 +577,7 @@ export function generateMCPToolsMDX(docs: MCPDocumentation, releasedVersion: str
|
||||
lines.push(`{/*
|
||||
AUTO-GENERATED FILE - DO NOT EDIT DIRECTLY
|
||||
Generated by: npx tsx scripts/docs-generators/cua-driver.ts
|
||||
Source: libs/cua-driver/Sources/**/*.swift
|
||||
Source: recursive Swift sources under libs/cua-driver/Sources
|
||||
Version: ${releasedVersion}
|
||||
*/}`);
|
||||
lines.push('');
|
||||
@@ -613,7 +613,7 @@ export function generateMCPToolDoc(tool: MCPToolDoc): string[] {
|
||||
|
||||
lines.push(`### ${tool.name}`);
|
||||
lines.push('');
|
||||
lines.push(tool.description);
|
||||
lines.push(escapeMdxText(tool.description));
|
||||
lines.push('');
|
||||
|
||||
const properties = tool.input_schema.properties ?? {};
|
||||
@@ -631,7 +631,7 @@ export function generateMCPToolDoc(tool: MCPToolDoc): string[] {
|
||||
const isRequired = required.has(propName);
|
||||
const requiredLabel = isRequired ? 'required' : 'optional';
|
||||
const typeLabel = formatPropertyType(prop);
|
||||
lines.push(`- \`${propName}\` (${typeLabel}, ${requiredLabel}): ${prop.description ?? ''}`);
|
||||
lines.push(`- \`${propName}\` (${typeLabel}, ${requiredLabel}): ${escapeMdxText(prop.description ?? '')}`);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
@@ -659,7 +659,23 @@ export function generateMCPToolDoc(tool: MCPToolDoc): string[] {
|
||||
}
|
||||
|
||||
function escapeTableCell(value: string): string {
|
||||
return value.replace(/\|/g, '\\|').replace(/\n/g, ' ');
|
||||
return escapeMdxText(value.replace(/\n/g, ' ')).replace(/\|/g, '\\|');
|
||||
}
|
||||
|
||||
function escapeMdxText(value: string): string {
|
||||
return value
|
||||
.split(/(`[^`]*`)/g)
|
||||
.map((segment) => {
|
||||
if (segment.startsWith('`') && segment.endsWith('`')) {
|
||||
return segment;
|
||||
}
|
||||
return segment
|
||||
.replace(/\{/g, '{')
|
||||
.replace(/\}/g, '}')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>');
|
||||
})
|
||||
.join('');
|
||||
}
|
||||
|
||||
function formatPropertyType(prop: MCPPropertyDoc): string {
|
||||
|
||||
Reference in New Issue
Block a user