Fix issues with websocket response handling, export types, update readme -- it works!

This commit is contained in:
Morgan Dean
2025-06-20 10:16:37 -07:00
parent 613745da7f
commit 00919f0365
5 changed files with 127 additions and 132 deletions

View File

@@ -20,12 +20,10 @@ pnpm add @cua/computer
import { Computer } from '@cua/computer';
// Create a new computer instance
const computer = new Computer({
display: '1024x768',
memory: '8GB',
cpu: '4',
osType: 'macos',
image: 'macos-sequoia-cua:latest'
const computer = new CloudComputer({
osType: OSType.LINUX,
name: 's-linux-vm_id'
apiKey: 'your-api-key'
});
// Start the computer
@@ -59,12 +57,12 @@ The library is organized into the following structure:
### Provider Implementations
- **LumeComputer**: Implementation for Lume API-based VMs
- **LumeComputer**: Implementation for Lume API-based VMs (Unmaintained)
- **CloudComputer**: Implementation for cloud-based VMs
### Utility Functions
- **Lume API Utilities**: Functions for interacting with the Lume API (lumeApiGet, lumeApiRun, lumeApiStop, etc.)
- **Lume API Utilities**: Functions for interacting with the Lume API (lumeApiGet, lumeApiRun, lumeApiStop, etc.) (Unmaintained)
- **Helper Functions**: Parsing utilities for display and memory strings
## Development
@@ -97,7 +95,6 @@ pnpm typecheck
**WARNING:** Some parts of this library, particularly the provider implementations (like Lume), were created as test/example implementations and are not maintained or expected to work in production environments. They serve as references for how providers might be implemented but should not be used in production.
## License
[MIT](./LICENSE) License 2025 [C/UA](https://github.com/trycua)

View File

@@ -12,9 +12,9 @@ const logger = pino({ name: "computer-cloud" });
* Cloud-specific computer implementation
*/
export class CloudComputer extends BaseComputer {
protected apiKey: string;
protected static vmProviderType: VMProviderType.CLOUD;
private interface?: BaseComputerInterface;
protected apiKey: string;
private iface?: BaseComputerInterface;
private initialized = false;
constructor(config: CloudComputerConfig) {
@@ -23,7 +23,8 @@ export class CloudComputer extends BaseComputer {
}
get ip() {
return `${this.name}.containers.cloud.trycua.com`;
return "192.168.64.9";
//return `${this.name}.containers.cloud.trycua.com`;
}
/**
@@ -43,7 +44,7 @@ export class CloudComputer extends BaseComputer {
logger.info(`Connecting to cloud VM at ${ipAddress}`);
// Create the interface with API key authentication
this.interface = InterfaceFactory.createInterfaceForOS(
this.iface = InterfaceFactory.createInterfaceForOS(
this.osType,
ipAddress,
this.apiKey,
@@ -52,7 +53,7 @@ export class CloudComputer extends BaseComputer {
// Wait for the interface to be ready
logger.info("Waiting for interface to be ready...");
await this.interface.waitForReady();
await this.iface.waitForReady();
this.initialized = true;
logger.info("Cloud computer ready");
@@ -68,9 +69,9 @@ export class CloudComputer extends BaseComputer {
async stop(): Promise<void> {
logger.info("Stopping cloud computer...");
if (this.interface) {
this.interface.disconnect();
this.interface = undefined;
if (this.iface) {
this.iface.disconnect();
this.iface = undefined;
}
this.initialized = false;
@@ -80,53 +81,11 @@ export class CloudComputer extends BaseComputer {
/**
* Get the computer interface
*/
getInterface(): BaseComputerInterface {
if (!this.interface) {
get interface(): BaseComputerInterface {
if (!this.iface) {
throw new Error("Computer not initialized. Call run() first.");
}
return this.interface;
}
/**
* Take a screenshot
*/
async screenshot(): Promise<Buffer> {
return this.getInterface().screenshot();
}
/**
* Click at coordinates
*/
async click(x?: number, y?: number): Promise<void> {
return this.getInterface().leftClick(x, y);
}
/**
* Type text
*/
async type(text: string): Promise<void> {
return this.getInterface().typeText(text);
}
/**
* Press a key
*/
async key(key: string): Promise<void> {
return this.getInterface().pressKey(key);
}
/**
* Press hotkey combination
*/
async hotkey(...keys: string[]): Promise<void> {
return this.getInterface().hotkey(...keys);
}
/**
* Run a command
*/
async runCommand(command: string): Promise<[string, string]> {
return this.getInterface().runCommand(command);
return this.iface;
}
/**

View File

@@ -2,3 +2,5 @@
export * from "./computer";
//todo: figure out what types to export and how to do that
//
export { OSType } from "./types";

View File

@@ -75,12 +75,12 @@ export abstract class BaseComputerInterface {
*/
protected get wsUri(): string {
const protocol = this.secure ? "wss" : "ws";
// Check if ipAddress already includes a port
if (this.ipAddress.includes(":")) {
return `${protocol}://${this.ipAddress}/ws`;
}
// Otherwise, append the default port
const port = this.secure ? "8443" : "8000";
return `${protocol}://${this.ipAddress}:${port}/ws`;
@@ -162,10 +162,10 @@ export abstract class BaseComputerInterface {
/**
* Send a command to the WebSocket server.
*/
public async sendCommand(command: {
action: string;
[key: string]: unknown;
}): Promise<{ [key: string]: unknown }> {
public async sendCommand(
command: string,
params: { [key: string]: unknown } = {}
): Promise<{ [key: string]: unknown }> {
// Create a new promise for this specific command
const commandPromise = new Promise<{ [key: string]: unknown }>(
(resolve, reject) => {
@@ -190,11 +190,12 @@ export abstract class BaseComputerInterface {
} catch (error) {
innerReject(error);
}
this.ws!.off("message", messageHandler);
this.ws.off("message", messageHandler);
};
this.ws!.on("message", messageHandler);
this.ws!.send(JSON.stringify(command));
this.ws.on("message", messageHandler);
const wsCommand = { command, params };
this.ws.send(JSON.stringify(wsCommand));
}
);
};

View File

@@ -13,7 +13,7 @@ export class MacOSComputerInterface extends BaseComputerInterface {
y?: number,
button: MouseButton = "left"
): Promise<void> {
await this.sendCommand({ action: "mouse_down", x, y, button });
await this.sendCommand("mouse_down", { x, y, button });
}
async mouseUp(
@@ -21,23 +21,23 @@ export class MacOSComputerInterface extends BaseComputerInterface {
y?: number,
button: MouseButton = "left"
): Promise<void> {
await this.sendCommand({ action: "mouse_up", x, y, button });
await this.sendCommand("mouse_up", { x, y, button });
}
async leftClick(x?: number, y?: number): Promise<void> {
await this.sendCommand({ action: "left_click", x, y });
await this.sendCommand("left_click", { x, y });
}
async rightClick(x?: number, y?: number): Promise<void> {
await this.sendCommand({ action: "right_click", x, y });
await this.sendCommand("right_click", { x, y });
}
async doubleClick(x?: number, y?: number): Promise<void> {
await this.sendCommand({ action: "double_click", x, y });
await this.sendCommand("double_click", { x, y });
}
async moveCursor(x: number, y: number): Promise<void> {
await this.sendCommand({ action: "move_cursor", x, y });
await this.sendCommand("move_cursor", { x, y });
}
async dragTo(
@@ -46,7 +46,7 @@ export class MacOSComputerInterface extends BaseComputerInterface {
button: MouseButton = "left",
duration = 0.5
): Promise<void> {
await this.sendCommand({ action: "drag_to", x, y, button, duration });
await this.sendCommand("drag_to", { x, y, button, duration });
}
async drag(
@@ -54,154 +54,190 @@ export class MacOSComputerInterface extends BaseComputerInterface {
button: MouseButton = "left",
duration = 0.5
): Promise<void> {
await this.sendCommand({ action: "drag", path, button, duration });
await this.sendCommand("drag", { path, button, duration });
}
// Keyboard Actions
async keyDown(key: string): Promise<void> {
await this.sendCommand({ action: "key_down", key });
await this.sendCommand("key_down", { key });
}
async keyUp(key: string): Promise<void> {
await this.sendCommand({ action: "key_up", key });
await this.sendCommand("key_up", { key });
}
async typeText(text: string): Promise<void> {
await this.sendCommand({ action: "type_text", text });
await this.sendCommand("type_text", { text });
}
async pressKey(key: string): Promise<void> {
await this.sendCommand({ action: "press_key", key });
await this.sendCommand("press_key", { key });
}
async hotkey(...keys: string[]): Promise<void> {
await this.sendCommand({ action: "hotkey", keys });
await this.sendCommand("hotkey", { keys });
}
// Scrolling Actions
async scroll(x: number, y: number): Promise<void> {
await this.sendCommand({ action: "scroll", x, y });
await this.sendCommand("scroll", { x, y });
}
async scrollDown(clicks = 1): Promise<void> {
await this.sendCommand({ action: "scroll_down", clicks });
await this.sendCommand("scroll_down", { clicks });
}
async scrollUp(clicks = 1): Promise<void> {
await this.sendCommand({ action: "scroll_up", clicks });
await this.sendCommand("scroll_up", { clicks });
}
// Screen Actions
async screenshot(): Promise<Buffer> {
const response = await this.sendCommand({ action: "screenshot" });
return Buffer.from(response.data as string, "base64");
const response = await this.sendCommand("screenshot");
if (!response.image_data) {
throw new Error("Failed to take screenshot");
}
return Buffer.from(response.image_data as string, "base64");
}
async getScreenSize(): Promise<ScreenSize> {
const response = await this.sendCommand({ action: "get_screen_size" });
return response.data as ScreenSize;
const response = await this.sendCommand("get_screen_size");
if (!response.success || !response.size) {
throw new Error("Failed to get screen size");
}
return response.size as ScreenSize;
}
async getCursorPosition(): Promise<CursorPosition> {
const response = await this.sendCommand({ action: "get_cursor_position" });
return response.data as CursorPosition;
const response = await this.sendCommand("get_cursor_position");
if (!response.success || !response.position) {
throw new Error("Failed to get cursor position");
}
return response.position as CursorPosition;
}
// Clipboard Actions
async copyToClipboard(): Promise<string> {
const response = await this.sendCommand({ action: "copy_to_clipboard" });
return response.data as string;
const response = await this.sendCommand("copy_to_clipboard");
if (!response.success || !response.content) {
throw new Error("Failed to get clipboard content");
}
return response.content as string;
}
async setClipboard(text: string): Promise<void> {
await this.sendCommand({ action: "set_clipboard", text });
await this.sendCommand("set_clipboard", { text });
}
// File System Actions
async fileExists(path: string): Promise<boolean> {
const response = await this.sendCommand({ action: "file_exists", path });
return response.data as boolean;
const response = await this.sendCommand("file_exists", { path });
return (response.exists as boolean) || false;
}
async directoryExists(path: string): Promise<boolean> {
const response = await this.sendCommand({
action: "directory_exists",
path,
});
return response.data as boolean;
const response = await this.sendCommand("directory_exists", { path });
return (response.exists as boolean) || false;
}
async listDir(path: string): Promise<string[]> {
const response = await this.sendCommand({ action: "list_dir", path });
return response.data as string[];
const response = await this.sendCommand("list_dir", { path });
if (!response.success) {
throw new Error(response.error as string || "Failed to list directory");
}
return (response.files as string[]) || [];
}
async readText(path: string): Promise<string> {
const response = await this.sendCommand({ action: "read_text", path });
return response.data as string;
const response = await this.sendCommand("read_text", { path });
if (!response.success) {
throw new Error(response.error as string || "Failed to read file");
}
return (response.content as string) || "";
}
async writeText(path: string, content: string): Promise<void> {
await this.sendCommand({ action: "write_text", path, content });
const response = await this.sendCommand("write_text", { path, content });
if (!response.success) {
throw new Error(response.error as string || "Failed to write file");
}
}
async readBytes(path: string): Promise<Buffer> {
const response = await this.sendCommand({ action: "read_bytes", path });
return Buffer.from(response.data as string, "base64");
const response = await this.sendCommand("read_bytes", { path });
if (!response.success) {
throw new Error(response.error as string || "Failed to read file");
}
return Buffer.from(response.content_b64 as string, "base64");
}
async writeBytes(path: string, content: Buffer): Promise<void> {
await this.sendCommand({
action: "write_bytes",
const response = await this.sendCommand("write_bytes", {
path,
content: content.toString("base64"),
content_b64: content.toString("base64"),
});
if (!response.success) {
throw new Error(response.error as string || "Failed to write file");
}
}
async deleteFile(path: string): Promise<void> {
await this.sendCommand({ action: "delete_file", path });
const response = await this.sendCommand("delete_file", { path });
if (!response.success) {
throw new Error(response.error as string || "Failed to delete file");
}
}
async createDir(path: string): Promise<void> {
await this.sendCommand({ action: "create_dir", path });
const response = await this.sendCommand("create_dir", { path });
if (!response.success) {
throw new Error(response.error as string || "Failed to create directory");
}
}
async deleteDir(path: string): Promise<void> {
await this.sendCommand({ action: "delete_dir", path });
const response = await this.sendCommand("delete_dir", { path });
if (!response.success) {
throw new Error(response.error as string || "Failed to delete directory");
}
}
async runCommand(command: string): Promise<[string, string]> {
const response = await this.sendCommand({ action: "run_command", command });
const data = response.data as { stdout: string; stderr: string };
return [data.stdout, data.stderr];
const response = await this.sendCommand("run_command", { command });
if (!response.success) {
throw new Error(response.error as string || "Failed to run command");
}
return [(response.stdout as string) || "", (response.stderr as string) || ""];
}
// Accessibility Actions
async getAccessibilityTree(): Promise<AccessibilityNode> {
const response = await this.sendCommand({
action: "get_accessibility_tree",
});
return response.data as AccessibilityNode;
const response = await this.sendCommand("get_accessibility_tree");
if (!response.success) {
throw new Error(response.error as string || "Failed to get accessibility tree");
}
return response as unknown as AccessibilityNode;
}
async toScreenCoordinates(x: number, y: number): Promise<[number, number]> {
const response = await this.sendCommand({
action: "to_screen_coordinates",
x,
y,
});
return response.data as [number, number];
const response = await this.sendCommand("to_screen_coordinates", { x, y });
if (!response.success || !response.coordinates) {
throw new Error("Failed to convert to screen coordinates");
}
return response.coordinates as [number, number];
}
async toScreenshotCoordinates(
x: number,
y: number
): Promise<[number, number]> {
const response = await this.sendCommand({
action: "to_screenshot_coordinates",
const response = await this.sendCommand("to_screenshot_coordinates", {
x,
y,
});
return response.data as [number, number];
if (!response.success || !response.coordinates) {
throw new Error("Failed to convert to screenshot coordinates");
}
return response.coordinates as [number, number];
}
}