mirror of
https://github.com/trycua/computer.git
synced 2026-01-04 12:30:08 -06:00
Create example for using @cua/computer with cloud container and openai
This commit is contained in:
3
libs/typescript/examples/cua-cloud-openai/.env.example
Normal file
3
libs/typescript/examples/cua-cloud-openai/.env.example
Normal file
@@ -0,0 +1,3 @@
|
||||
OPENAI_KEY=
|
||||
CUA_KEY=
|
||||
CUA_CONTAINER_NAME=
|
||||
3
libs/typescript/examples/cua-cloud-openai/.gitignore
vendored
Normal file
3
libs/typescript/examples/cua-cloud-openai/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
node_modules
|
||||
.DS_Store
|
||||
.env
|
||||
7
libs/typescript/examples/cua-cloud-openai/.prettierrc
Normal file
7
libs/typescript/examples/cua-cloud-openai/.prettierrc
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"useTabs": false,
|
||||
"semi": true,
|
||||
"singleQuote": true,
|
||||
"trailingComma": "es5",
|
||||
"bracketSpacing": true
|
||||
}
|
||||
25
libs/typescript/examples/cua-cloud-openai/package.json
Normal file
25
libs/typescript/examples/cua-cloud-openai/package.json
Normal file
@@ -0,0 +1,25 @@
|
||||
{
|
||||
"name": "cua-cloud-openai",
|
||||
"version": "1.0.0",
|
||||
"description": "",
|
||||
"type": "module",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"dev": "tsx watch src/index.ts",
|
||||
"start": "tsx src/index.ts"
|
||||
},
|
||||
"keywords": [],
|
||||
"author": "",
|
||||
"license": "MIT",
|
||||
"packageManager": "pnpm@10.12.3",
|
||||
"dependencies": {
|
||||
"@cua/computer": "link:../../computer",
|
||||
"dotenv": "^16.5.0",
|
||||
"openai": "^5.7.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^22.15.33",
|
||||
"tsx": "^4.20.3",
|
||||
"typescript": "^5.8.3"
|
||||
}
|
||||
}
|
||||
56
libs/typescript/examples/cua-cloud-openai/src/helpers.ts
Normal file
56
libs/typescript/examples/cua-cloud-openai/src/helpers.ts
Normal file
@@ -0,0 +1,56 @@
|
||||
import { Computer } from "@cua/computer";
|
||||
import OpenAI from "openai";
|
||||
|
||||
export async function executeAction(
|
||||
computer: Computer,
|
||||
action: OpenAI.Responses.ResponseComputerToolCall['action']
|
||||
) {
|
||||
switch (action.type) {
|
||||
case 'click':
|
||||
const { x, y, button } = action;
|
||||
console.log(`Executing click at (${x}, ${y}) with button '${button}'.`);
|
||||
await computer.interface.moveCursor(x, y);
|
||||
if (button === 'right') await computer.interface.rightClick();
|
||||
else await computer.interface.leftClick();
|
||||
break;
|
||||
case 'type':
|
||||
const { text } = action;
|
||||
console.log(`Typing text: ${text}`);
|
||||
await computer.interface.typeText(text);
|
||||
break;
|
||||
case 'scroll':
|
||||
const { x: locX, y: locY, scroll_x, scroll_y } = action;
|
||||
console.log(
|
||||
`Scrolling at (${locX}, ${locY}) with offsets (scroll_x=${scroll_x}, scroll_y=${scroll_y}).`
|
||||
);
|
||||
await computer.interface.moveCursor(locX, locY);
|
||||
await computer.interface.scroll(scroll_x, scroll_y);
|
||||
break;
|
||||
case 'keypress':
|
||||
const { keys } = action;
|
||||
for (const key of keys) {
|
||||
console.log(`Pressing key: ${key}.`);
|
||||
// Map common key names to CUA equivalents
|
||||
if (key.toLowerCase() === 'enter') {
|
||||
await computer.interface.pressKey('return');
|
||||
} else if (key.toLowerCase() === 'space') {
|
||||
await computer.interface.pressKey('space');
|
||||
} else {
|
||||
await computer.interface.pressKey(key);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 'wait':
|
||||
console.log(`Waiting for 3 seconds.`);
|
||||
await new Promise((resolve) => setTimeout(resolve, 3 * 1000));
|
||||
break;
|
||||
case 'screenshot':
|
||||
console.log('Taking screenshot.');
|
||||
// This is handled automatically in the main loop, but we can take an extra one if requested
|
||||
const screenshot = await computer.interface.screenshot();
|
||||
return screenshot;
|
||||
default:
|
||||
console.log(`Unrecognized action: ${action.type}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
104
libs/typescript/examples/cua-cloud-openai/src/index.ts
Normal file
104
libs/typescript/examples/cua-cloud-openai/src/index.ts
Normal file
@@ -0,0 +1,104 @@
|
||||
import { Computer, OSType } from '@cua/computer';
|
||||
import OpenAI from 'openai';
|
||||
import { executeAction } from './helpers';
|
||||
|
||||
import 'dotenv/config';
|
||||
|
||||
const openai = new OpenAI({ apiKey: process.env.OPENAI_KEY });
|
||||
|
||||
const COMPUTER_USE_PROMPT = 'Open firefox and go to trycua.com';
|
||||
|
||||
// Initialize the Computer Connection
|
||||
const computer = new Computer({
|
||||
apiKey: process.env.CUA_KEY!,
|
||||
name: process.env.CUA_CONTAINER_NAME!,
|
||||
osType: OSType.LINUX,
|
||||
});
|
||||
|
||||
await computer.run();
|
||||
// Take the initial screenshot
|
||||
const screenshot = await computer.interface.screenshot();
|
||||
const screenshotBase64 = screenshot.toString('base64');
|
||||
|
||||
// Setup openai config for computer use
|
||||
const computerUseConfig: OpenAI.Responses.ResponseCreateParamsNonStreaming = {
|
||||
model: 'computer-use-preview',
|
||||
tools: [
|
||||
{
|
||||
type: 'computer_use_preview',
|
||||
display_width: 1024,
|
||||
display_height: 768,
|
||||
environment: 'linux', // we're using a linux vm
|
||||
},
|
||||
],
|
||||
truncation: 'auto',
|
||||
};
|
||||
|
||||
// Send initial screenshot to the openai computer use model
|
||||
let res = await openai.responses.create({
|
||||
...computerUseConfig,
|
||||
input: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
// what we want the ai to do
|
||||
{ type: 'input_text', text: COMPUTER_USE_PROMPT },
|
||||
// current screenshot of the vm
|
||||
{
|
||||
type: 'input_image',
|
||||
image_url: `data:image/png;base64,${screenshotBase64}`,
|
||||
detail: 'auto',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
// Loop until there are no more computer use actions.
|
||||
while (true) {
|
||||
const computerCalls = res.output.filter((o) => o.type === 'computer_call');
|
||||
if (computerCalls.length < 1) {
|
||||
console.log('No more computer calls. Loop complete.');
|
||||
break;
|
||||
}
|
||||
// Get the first call
|
||||
const call = computerCalls[0];
|
||||
const action = call.action;
|
||||
console.log('Received action from OpenAI Responses API:', action);
|
||||
let ackChecks: OpenAI.Responses.ResponseComputerToolCall.PendingSafetyCheck[] =
|
||||
[];
|
||||
if (call.pending_safety_checks.length > 0) {
|
||||
console.log('Safety checks pending:', call.pending_safety_checks);
|
||||
// In a real implementation, you would want to get user confirmation here
|
||||
ackChecks = call.pending_safety_checks;
|
||||
}
|
||||
|
||||
// Execute the action in the container
|
||||
await executeAction(computer, action);
|
||||
// Wait for changes to process within the container (1sec)
|
||||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||||
|
||||
// Capture new screenshot
|
||||
const newScreenshot = await computer.interface.screenshot();
|
||||
const newScreenshotBase64 = newScreenshot.toString('base64');
|
||||
|
||||
// Screenshot back as computer_call_output
|
||||
|
||||
res = await openai.responses.create({
|
||||
...computerUseConfig,
|
||||
previous_response_id: res.id,
|
||||
input: [
|
||||
{
|
||||
type: 'computer_call_output',
|
||||
call_id: call.call_id,
|
||||
acknowledged_safety_checks: ackChecks,
|
||||
output: {
|
||||
type: 'computer_screenshot',
|
||||
image_url: `data:image/png;base64,${newScreenshotBase64}`,
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
}
|
||||
|
||||
process.exit();
|
||||
29
libs/typescript/examples/cua-cloud-openai/tsconfig.json
Normal file
29
libs/typescript/examples/cua-cloud-openai/tsconfig.json
Normal file
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "esnext",
|
||||
"lib": [
|
||||
"es2023"
|
||||
],
|
||||
"moduleDetection": "force",
|
||||
"module": "preserve",
|
||||
"moduleResolution": "bundler",
|
||||
"allowImportingTsExtensions": true,
|
||||
"resolveJsonModule": true,
|
||||
"types": [
|
||||
"node"
|
||||
],
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"strict": true,
|
||||
"noUnusedLocals": true,
|
||||
"declaration": true,
|
||||
"emitDeclarationOnly": true,
|
||||
"esModuleInterop": true,
|
||||
"isolatedModules": true,
|
||||
"verbatimModuleSyntax": true,
|
||||
"skipLibCheck": true,
|
||||
"outDir": "build",
|
||||
},
|
||||
"include": [
|
||||
"src"
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user