mirror of
https://github.com/trycua/computer.git
synced 2026-01-05 21:09:58 -06:00
Add window management commands
This commit is contained in:
@@ -22,6 +22,51 @@ Execute shell commands and get detailed results:
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
## Window Management
|
||||
|
||||
Control application launching and windows:
|
||||
|
||||
<Tabs items={['Python', 'TypeScript']}>
|
||||
<Tab value="Python">
|
||||
```python
|
||||
# Launch applications
|
||||
await computer.interface.launch("xfce4-terminal")
|
||||
await computer.interface.launch("libreoffice --writer")
|
||||
await computer.interface.open("https://www.google.com")
|
||||
|
||||
# Window management
|
||||
windows = await computer.interface.get_application_windows("xfce4-terminal")
|
||||
window_id = windows[0]
|
||||
await computer.interface.activate_window(window_id)
|
||||
|
||||
window_id = await computer.interface.get_current_window_id() # get the current active window id
|
||||
await computer.interface.window_size(window_id)
|
||||
await computer.interface.get_window_title(window_id)
|
||||
await computer.interface.close_window(window_id)
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab value="TypeScript">
|
||||
```typescript
|
||||
// Launch applications
|
||||
await computer.interface.launch("xfce4-terminal");
|
||||
await computer.interface.launch("libreoffice --writer");
|
||||
await computer.interface.open("https://www.google.com");
|
||||
|
||||
// Window management
|
||||
const windows = await computer.interface.getApplicationWindows("xfce4-terminal");
|
||||
let windowId = windows[0];
|
||||
await computer.interface.activateWindow(windowId);
|
||||
|
||||
windowId = await computer.interface.getCurrentWindowId(); // current active window id
|
||||
await computer.interface.getWindowSize(windowId);
|
||||
await computer.interface.getWindowName(windowId);
|
||||
await computer.interface.closeWindow(windowId);
|
||||
```
|
||||
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
## Mouse Actions
|
||||
|
||||
Precise mouse control and interaction:
|
||||
|
||||
@@ -105,6 +105,55 @@ class BaseDesktopHandler(ABC):
|
||||
pass
|
||||
|
||||
|
||||
class BaseWindowHandler(ABC):
|
||||
"""Abstract class for OS-specific window management handlers.
|
||||
|
||||
Categories:
|
||||
- Window Management: Methods for application/window control
|
||||
"""
|
||||
|
||||
# Window Management
|
||||
@abstractmethod
|
||||
async def open(self, target: str) -> Dict[str, Any]:
|
||||
"""Open a file or URL with the default application."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def launch(self, app: str, args: Optional[List[str]] = None) -> Dict[str, Any]:
|
||||
"""Launch an application with optional arguments."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_current_window_id(self) -> Dict[str, Any]:
|
||||
"""Get the currently active window ID."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_application_windows(self, app: str) -> Dict[str, Any]:
|
||||
"""Get windows belonging to an application (by name or bundle)."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_window_name(self, window_id: str) -> Dict[str, Any]:
|
||||
"""Get the title/name of a window by ID."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_window_size(self, window_id: str) -> Dict[str, Any]:
|
||||
"""Get the size of a window by ID as {width, height}."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def activate_window(self, window_id: str) -> Dict[str, Any]:
|
||||
"""Bring a window to the foreground by ID."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def close_window(self, window_id: str) -> Dict[str, Any]:
|
||||
"""Close a window by ID."""
|
||||
pass
|
||||
|
||||
|
||||
class BaseAutomationHandler(ABC):
|
||||
"""Abstract base class for OS-specific automation handlers.
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ from .base import (
|
||||
BaseAutomationHandler,
|
||||
BaseDesktopHandler,
|
||||
BaseFileHandler,
|
||||
BaseWindowHandler,
|
||||
)
|
||||
|
||||
# Conditionally import platform-specific handlers
|
||||
@@ -22,7 +23,7 @@ elif system == "linux":
|
||||
elif system == "windows":
|
||||
from .windows import WindowsAccessibilityHandler, WindowsAutomationHandler
|
||||
|
||||
from .generic import GenericDesktopHandler, GenericFileHandler
|
||||
from .generic import GenericDesktopHandler, GenericFileHandler, GenericWindowHandler
|
||||
|
||||
|
||||
class HandlerFactory:
|
||||
@@ -61,6 +62,7 @@ class HandlerFactory:
|
||||
BaseDioramaHandler,
|
||||
BaseFileHandler,
|
||||
BaseDesktopHandler,
|
||||
BaseWindowHandler,
|
||||
]
|
||||
):
|
||||
"""Create and return appropriate handlers for the current OS.
|
||||
@@ -82,6 +84,7 @@ class HandlerFactory:
|
||||
MacOSDioramaHandler(),
|
||||
GenericFileHandler(),
|
||||
GenericDesktopHandler(),
|
||||
GenericWindowHandler(),
|
||||
)
|
||||
elif os_type == "linux":
|
||||
return (
|
||||
@@ -90,6 +93,7 @@ class HandlerFactory:
|
||||
BaseDioramaHandler(),
|
||||
GenericFileHandler(),
|
||||
GenericDesktopHandler(),
|
||||
GenericWindowHandler(),
|
||||
)
|
||||
elif os_type == "windows":
|
||||
return (
|
||||
@@ -98,6 +102,7 @@ class HandlerFactory:
|
||||
BaseDioramaHandler(),
|
||||
GenericFileHandler(),
|
||||
GenericDesktopHandler(),
|
||||
GenericWindowHandler(),
|
||||
)
|
||||
else:
|
||||
raise NotImplementedError(f"OS '{os_type}' is not supported")
|
||||
|
||||
@@ -8,11 +8,20 @@ Includes:
|
||||
"""
|
||||
|
||||
import base64
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
import webbrowser
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from ..utils import wallpaper
|
||||
from .base import BaseDesktopHandler, BaseFileHandler
|
||||
from .base import BaseDesktopHandler, BaseFileHandler, BaseWindowHandler
|
||||
|
||||
try:
|
||||
import pywinctl as pwc
|
||||
except Exception: # pragma: no cover
|
||||
pwc = None # type: ignore
|
||||
|
||||
|
||||
def resolve_path(path: str) -> Path:
|
||||
@@ -27,6 +36,9 @@ def resolve_path(path: str) -> Path:
|
||||
return Path(path).expanduser().resolve()
|
||||
|
||||
|
||||
# ===== Cross-platform Desktop command handlers =====
|
||||
|
||||
|
||||
class GenericDesktopHandler(BaseDesktopHandler):
|
||||
"""
|
||||
Generic desktop handler providing desktop-related operations.
|
||||
@@ -67,6 +79,125 @@ class GenericDesktopHandler(BaseDesktopHandler):
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
|
||||
# ===== Cross-platform window control command handlers =====
|
||||
|
||||
|
||||
class GenericWindowHandler(BaseWindowHandler):
|
||||
"""
|
||||
Cross-platform window management using pywinctl where possible.
|
||||
"""
|
||||
|
||||
async def open(self, target: str) -> Dict[str, Any]:
|
||||
try:
|
||||
if target.startswith("http://") or target.startswith("https://"):
|
||||
ok = webbrowser.open(target)
|
||||
return {"success": bool(ok)}
|
||||
path = str(resolve_path(target))
|
||||
sys = platform.system().lower()
|
||||
if sys == "darwin":
|
||||
subprocess.Popen(["open", path])
|
||||
elif sys == "linux":
|
||||
subprocess.Popen(["xdg-open", path])
|
||||
elif sys == "windows":
|
||||
os.startfile(path) # type: ignore[attr-defined]
|
||||
else:
|
||||
return {"success": False, "error": f"Unsupported OS: {sys}"}
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def launch(self, app: str, args: Optional[list[str]] = None) -> Dict[str, Any]:
|
||||
try:
|
||||
if args:
|
||||
proc = subprocess.Popen([app, *args])
|
||||
else:
|
||||
# allow shell command like "libreoffice --writer"
|
||||
proc = subprocess.Popen(app, shell=True)
|
||||
return {"success": True, "pid": proc.pid}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
def _get_window_by_id(self, window_id: int | str):
|
||||
if pwc is None:
|
||||
raise RuntimeError("pywinctl not available")
|
||||
try:
|
||||
windows = pwc.getAllWindowsDict()
|
||||
return windows.get(window_id) or windows.get(int(window_id))
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
async def get_current_window_id(self) -> Dict[str, Any]:
|
||||
try:
|
||||
if pwc is None:
|
||||
return {"success": False, "error": "pywinctl not available"}
|
||||
win = pwc.getActiveWindow()
|
||||
if not win:
|
||||
return {"success": False, "error": "No active window"}
|
||||
return {"success": True, "window_id": win.getHandle()}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def get_application_windows(self, app: str) -> Dict[str, Any]:
|
||||
try:
|
||||
if pwc is None:
|
||||
return {"success": False, "error": "pywinctl not available"}
|
||||
wins = pwc.getWindowsWithTitle(app, condition=pwc.Re.CONTAINS, flags=pwc.Re.IGNORECASE)
|
||||
ids = [w.getHandle() for w in wins]
|
||||
return {"success": True, "windows": ids}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def get_window_name(self, window_id: int | str) -> Dict[str, Any]:
|
||||
try:
|
||||
if pwc is None:
|
||||
return {"success": False, "error": "pywinctl not available"}
|
||||
w = self._get_window_by_id(window_id)
|
||||
if not w:
|
||||
return {"success": False, "error": "Window not found"}
|
||||
return {"success": True, "name": w.title}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def get_window_size(self, window_id: int | str) -> Dict[str, Any]:
|
||||
try:
|
||||
if pwc is None:
|
||||
return {"success": False, "error": "pywinctl not available"}
|
||||
w = self._get_window_by_id(window_id)
|
||||
if not w:
|
||||
return {"success": False, "error": "Window not found"}
|
||||
width, height = w.size
|
||||
return {"success": True, "width": int(width), "height": int(height)}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def activate_window(self, window_id: int | str) -> Dict[str, Any]:
|
||||
try:
|
||||
if pwc is None:
|
||||
return {"success": False, "error": "pywinctl not available"}
|
||||
w = self._get_window_by_id(window_id)
|
||||
if not w:
|
||||
return {"success": False, "error": "Window not found"}
|
||||
ok = w.activate()
|
||||
return {"success": bool(ok)}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def close_window(self, window_id: int | str) -> Dict[str, Any]:
|
||||
try:
|
||||
if pwc is None:
|
||||
return {"success": False, "error": "pywinctl not available"}
|
||||
w = self._get_window_by_id(window_id)
|
||||
if not w:
|
||||
return {"success": False, "error": "Window not found"}
|
||||
ok = w.close()
|
||||
return {"success": bool(ok)}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
|
||||
# ===== Cross-platform file system command handlers =====
|
||||
|
||||
|
||||
class GenericFileHandler(BaseFileHandler):
|
||||
"""
|
||||
Generic file handler that provides file system operations for all operating systems.
|
||||
|
||||
@@ -75,7 +75,7 @@ except Exception:
|
||||
except Exception:
|
||||
package_version = "unknown"
|
||||
|
||||
accessibility_handler, automation_handler, diorama_handler, file_handler, desktop_handler = (
|
||||
accessibility_handler, automation_handler, diorama_handler, file_handler, desktop_handler, window_handler = (
|
||||
HandlerFactory.create_handlers()
|
||||
)
|
||||
handlers = {
|
||||
@@ -102,6 +102,15 @@ handlers = {
|
||||
# Desktop commands
|
||||
"get_desktop_environment": desktop_handler.get_desktop_environment,
|
||||
"set_wallpaper": desktop_handler.set_wallpaper,
|
||||
# Window management
|
||||
"open": window_handler.open,
|
||||
"launch": window_handler.launch,
|
||||
"get_current_window_id": window_handler.get_current_window_id,
|
||||
"get_application_windows": window_handler.get_application_windows,
|
||||
"get_window_name": window_handler.get_window_name,
|
||||
"get_window_size": window_handler.get_window_size,
|
||||
"activate_window": window_handler.activate_window,
|
||||
"close_window": window_handler.close_window,
|
||||
# Mouse commands
|
||||
"mouse_down": automation_handler.mouse_down,
|
||||
"mouse_up": automation_handler.mouse_up,
|
||||
|
||||
@@ -23,6 +23,7 @@ dependencies = [
|
||||
"aiohttp>=3.9.1",
|
||||
"pyperclip>=1.9.0",
|
||||
"websockets>=12.0",
|
||||
"pywinctl>=0.4.1",
|
||||
# OS-specific runtime deps
|
||||
"pyobjc-framework-Cocoa>=10.1; sys_platform == 'darwin'",
|
||||
"pyobjc-framework-Quartz>=10.1; sys_platform == 'darwin'",
|
||||
|
||||
@@ -499,6 +499,62 @@ class GenericComputerInterface(BaseComputerInterface):
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to set wallpaper"))
|
||||
|
||||
# Window management
|
||||
async def open(self, target: str) -> None:
|
||||
result = await self._send_command("open", {"target": target})
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to open target"))
|
||||
|
||||
async def launch(self, app: str, args: list[str] | None = None) -> int | None:
|
||||
payload: dict[str, object] = {"app": app}
|
||||
if args is not None:
|
||||
payload["args"] = args
|
||||
result = await self._send_command("launch", payload)
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to launch application"))
|
||||
return result.get("pid") # type: ignore[return-value]
|
||||
|
||||
async def get_current_window_id(self) -> int | str:
|
||||
result = await self._send_command("get_current_window_id")
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to get current window id"))
|
||||
return result["window_id"] # type: ignore[return-value]
|
||||
|
||||
async def get_application_windows(self, app: str) -> list[int | str]:
|
||||
result = await self._send_command("get_application_windows", {"app": app})
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to get application windows"))
|
||||
return list(result.get("windows", [])) # type: ignore[return-value]
|
||||
|
||||
async def get_window_name(self, window_id: int | str) -> str:
|
||||
result = await self._send_command("get_window_name", {"window_id": window_id})
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to get window name"))
|
||||
return result.get("name", "") # type: ignore[return-value]
|
||||
|
||||
async def get_window_size(self, window_id: int | str) -> tuple[int, int]:
|
||||
result = await self._send_command("get_window_size", {"window_id": window_id})
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to get window size"))
|
||||
return int(result.get("width", 0)), int(result.get("height", 0))
|
||||
|
||||
async def activate_window(self, window_id: int | str) -> None:
|
||||
result = await self._send_command("activate_window", {"window_id": window_id})
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to activate window"))
|
||||
|
||||
async def close_window(self, window_id: int | str) -> None:
|
||||
result = await self._send_command("close_window", {"window_id": window_id})
|
||||
if not result.get("success", False):
|
||||
raise RuntimeError(result.get("error", "Failed to close window"))
|
||||
|
||||
# Convenience aliases
|
||||
async def get_window_title(self, window_id: int | str) -> str:
|
||||
return await self.get_window_name(window_id)
|
||||
|
||||
async def window_size(self, window_id: int | str) -> tuple[int, int]:
|
||||
return await self.get_window_size(window_id)
|
||||
|
||||
# Command execution
|
||||
async def run_command(self, command: str) -> CommandResult:
|
||||
result = await self._send_command("run_command", {"command": command})
|
||||
|
||||
@@ -314,6 +314,16 @@ export abstract class BaseComputerInterface {
|
||||
abstract getScreenSize(): Promise<ScreenSize>;
|
||||
abstract getCursorPosition(): Promise<CursorPosition>;
|
||||
|
||||
// Window Management
|
||||
abstract open(target: string): Promise<void>;
|
||||
abstract launch(app: string, args?: string[]): Promise<number | undefined>;
|
||||
abstract getCurrentWindowId(): Promise<number | string>;
|
||||
abstract getApplicationWindows(app: string): Promise<Array<number | string>>;
|
||||
abstract getWindowName(windowId: number | string): Promise<string>;
|
||||
abstract getWindowSize(windowId: number | string): Promise<[number, number]>;
|
||||
abstract activateWindow(windowId: number | string): Promise<void>;
|
||||
abstract closeWindow(windowId: number | string): Promise<void>;
|
||||
|
||||
// Desktop Actions
|
||||
abstract getDesktopEnvironment(): Promise<string>;
|
||||
abstract setWallpaper(path: string): Promise<void>;
|
||||
|
||||
@@ -212,6 +212,76 @@ export class MacOSComputerInterface extends BaseComputerInterface {
|
||||
return response.position as CursorPosition;
|
||||
}
|
||||
|
||||
// Window Management
|
||||
/** Open a file path or URL with the default handler. */
|
||||
async open(target: string): Promise<void> {
|
||||
const response = await this.sendCommand('open', { target });
|
||||
if (!response.success) {
|
||||
throw new Error((response.error as string) || 'Failed to open target');
|
||||
}
|
||||
}
|
||||
|
||||
/** Launch an application (string may include args). Returns pid if available. */
|
||||
async launch(app: string, args?: string[]): Promise<number | undefined> {
|
||||
const response = await this.sendCommand('launch', args ? { app, args } : { app });
|
||||
if (!response.success) {
|
||||
throw new Error((response.error as string) || 'Failed to launch application');
|
||||
}
|
||||
return (response.pid as number) || undefined;
|
||||
}
|
||||
|
||||
/** Get the current active window id. */
|
||||
async getCurrentWindowId(): Promise<number | string> {
|
||||
const response = await this.sendCommand('get_current_window_id');
|
||||
if (!response.success || response.window_id === undefined) {
|
||||
throw new Error((response.error as string) || 'Failed to get current window id');
|
||||
}
|
||||
return response.window_id as number | string;
|
||||
}
|
||||
|
||||
/** Get windows belonging to an application (by name). */
|
||||
async getApplicationWindows(app: string): Promise<Array<number | string>> {
|
||||
const response = await this.sendCommand('get_application_windows', { app });
|
||||
if (!response.success) {
|
||||
throw new Error((response.error as string) || 'Failed to get application windows');
|
||||
}
|
||||
return (response.windows as Array<number | string>) || [];
|
||||
}
|
||||
|
||||
/** Get window title/name by id. */
|
||||
async getWindowName(windowId: number | string): Promise<string> {
|
||||
const response = await this.sendCommand('get_window_name', { window_id: windowId });
|
||||
if (!response.success) {
|
||||
throw new Error((response.error as string) || 'Failed to get window name');
|
||||
}
|
||||
return (response.name as string) || '';
|
||||
}
|
||||
|
||||
/** Get window size as [width, height]. */
|
||||
async getWindowSize(windowId: number | string): Promise<[number, number]> {
|
||||
const response = await this.sendCommand('get_window_size', { window_id: windowId });
|
||||
if (!response.success) {
|
||||
throw new Error((response.error as string) || 'Failed to get window size');
|
||||
}
|
||||
return [Number(response.width) || 0, Number(response.height) || 0];
|
||||
}
|
||||
|
||||
/** Activate a window by id. */
|
||||
async activateWindow(windowId: number | string): Promise<void> {
|
||||
const response = await this.sendCommand('activate_window', { window_id: windowId });
|
||||
if (!response.success) {
|
||||
throw new Error((response.error as string) || 'Failed to activate window');
|
||||
}
|
||||
}
|
||||
|
||||
/** Close a window by id. */
|
||||
async closeWindow(windowId: number | string): Promise<void> {
|
||||
const response = await this.sendCommand('close_window', { window_id: windowId });
|
||||
if (!response.success) {
|
||||
throw new Error((response.error as string) || 'Failed to close window');
|
||||
}
|
||||
}
|
||||
|
||||
// Desktop Actions
|
||||
/**
|
||||
* Get the current desktop environment string (e.g., 'xfce4', 'gnome', 'kde', 'mac', 'windows').
|
||||
|
||||
Reference in New Issue
Block a user