Rename @cua/ to @trycua/, move ts example to ./examples, rename workflows for better organization

2026-01-05 21:09:58 -06:00 · 2025-06-30 10:14:52 -07:00
parent 8bfa8fbc92
commit b0666df203
30 changed files with 825 additions and 516 deletions
--- a/examples/computer-example-ts/src/helpers.ts
+++ b/examples/computer-example-ts/src/helpers.ts
@@ -0,0 +1,63 @@
+import type { Computer } from "@trycua/computer";
+import type OpenAI from "openai";
+
+export async function executeAction(
+	computer: Computer,
+	action: OpenAI.Responses.ResponseComputerToolCall["action"],
+) {
+	switch (action.type) {
+		case "click": {
+			const { x, y, button } = action;
+			console.log(`Executing click at (${x}, ${y}) with button '${button}'.`);
+			await computer.interface.moveCursor(x, y);
+			if (button === "right") await computer.interface.rightClick();
+			else await computer.interface.leftClick();
+			break;
+		}
+		case "type":
+			{
+				const { text } = action;
+				console.log(`Typing text: ${text}`);
+				await computer.interface.typeText(text);
+			}
+			break;
+		case "scroll": {
+			const { x: locX, y: locY, scroll_x, scroll_y } = action;
+			console.log(
+				`Scrolling at (${locX}, ${locY}) with offsets (scroll_x=${scroll_x}, scroll_y=${scroll_y}).`,
+			);
+			await computer.interface.moveCursor(locX, locY);
+			await computer.interface.scroll(scroll_x, scroll_y);
+			break;
+		}
+		case "keypress": {
+			const { keys } = action;
+			for (const key of keys) {
+				console.log(`Pressing key: ${key}.`);
+				// Map common key names to CUA equivalents
+				if (key.toLowerCase() === "enter") {
+					await computer.interface.pressKey("return");
+				} else if (key.toLowerCase() === "space") {
+					await computer.interface.pressKey("space");
+				} else {
+					await computer.interface.pressKey(key);
+				}
+			}
+			break;
+		}
+		case "wait": {
+			console.log(`Waiting for 3 seconds.`);
+			await new Promise((resolve) => setTimeout(resolve, 3 * 1000));
+			break;
+		}
+		case "screenshot": {
+			console.log("Taking screenshot.");
+			// This is handled automatically in the main loop, but we can take an extra one if requested
+			const screenshot = await computer.interface.screenshot();
+			return screenshot;
+		}
+		default:
+			console.log(`Unrecognized action: ${action.type}`);
+			break;
+	}
+}
--- a/examples/computer-example-ts/src/index.ts
+++ b/examples/computer-example-ts/src/index.ts
@@ -0,0 +1,104 @@
+import { Computer, OSType } from "@trycua/computer";
+import OpenAI from "openai";
+import { executeAction } from "./helpers";
+
+import "dotenv/config";
+
+const openai = new OpenAI({ apiKey: process.env.OPENAI_KEY });
+
+const COMPUTER_USE_PROMPT = "Open firefox and go to trycua.com";
+
+// Initialize the Computer Connection
+const computer = new Computer({
+	apiKey: process.env.CUA_KEY!,
+	name: process.env.CUA_CONTAINER_NAME!,
+	osType: OSType.LINUX,
+});
+
+await computer.run();
+// Take the initial screenshot
+const screenshot = await computer.interface.screenshot();
+const screenshotBase64 = screenshot.toString("base64");
+
+// Setup openai config for computer use
+const computerUseConfig: OpenAI.Responses.ResponseCreateParamsNonStreaming = {
+	model: "computer-use-preview",
+	tools: [
+		{
+			type: "computer_use_preview",
+			display_width: 1024,
+			display_height: 768,
+			environment: "linux", // we're using a linux vm
+		},
+	],
+	truncation: "auto",
+};
+
+// Send initial screenshot to the openai computer use model
+let res = await openai.responses.create({
+	...computerUseConfig,
+	input: [
+		{
+			role: "user",
+			content: [
+				// what we want the ai to do
+				{ type: "input_text", text: COMPUTER_USE_PROMPT },
+				// current screenshot of the vm
+				{
+					type: "input_image",
+					image_url: `data:image/png;base64,${screenshotBase64}`,
+					detail: "auto",
+				},
+			],
+		},
+	],
+});
+
+// Loop until there are no more computer use actions.
+while (true) {
+	const computerCalls = res.output.filter((o) => o.type === "computer_call");
+	if (computerCalls.length < 1) {
+		console.log("No more computer calls. Loop complete.");
+		break;
+	}
+	// Get the first call
+	const call = computerCalls[0];
+	const action = call.action;
+	console.log("Received action from OpenAI Responses API:", action);
+	let ackChecks: OpenAI.Responses.ResponseComputerToolCall.PendingSafetyCheck[] =
+		[];
+	if (call.pending_safety_checks.length > 0) {
+		console.log("Safety checks pending:", call.pending_safety_checks);
+		// In a real implementation, you would want to get user confirmation here
+		ackChecks = call.pending_safety_checks;
+	}
+
+	// Execute the action in the container
+	await executeAction(computer, action);
+	// Wait for changes to process within the container (1sec)
+	await new Promise((resolve) => setTimeout(resolve, 1000));
+
+	// Capture new screenshot
+	const newScreenshot = await computer.interface.screenshot();
+	const newScreenshotBase64 = newScreenshot.toString("base64");
+
+	// Screenshot back as computer_call_output
+
+	res = await openai.responses.create({
+		...computerUseConfig,
+		previous_response_id: res.id,
+		input: [
+			{
+				type: "computer_call_output",
+				call_id: call.call_id,
+				acknowledged_safety_checks: ackChecks,
+				output: {
+					type: "computer_screenshot",
+					image_url: `data:image/png;base64,${newScreenshotBase64}`,
+				},
+			},
+		],
+	});
+}
+
+process.exit();