diff --git a/docs/content/docs/computer-sdk/commands.mdx b/docs/content/docs/computer-sdk/commands.mdx
index 4d36baa4..349d67d8 100644
--- a/docs/content/docs/computer-sdk/commands.mdx
+++ b/docs/content/docs/computer-sdk/commands.mdx
@@ -22,6 +22,51 @@ Execute shell commands and get detailed results:
+## Window Management
+
+Control application launching and windows:
+
+
+
+ ```python
+ # Launch applications
+ await computer.interface.launch("xfce4-terminal")
+ await computer.interface.launch("libreoffice --writer")
+ await computer.interface.open("https://www.google.com")
+
+ # Window management
+ windows = await computer.interface.get_application_windows("xfce4-terminal")
+ window_id = windows[0]
+ await computer.interface.activate_window(window_id)
+
+ window_id = await computer.interface.get_current_window_id() # get the current active window id
+ await computer.interface.window_size(window_id)
+ await computer.interface.get_window_title(window_id)
+ await computer.interface.close_window(window_id)
+ ```
+
+
+
+ ```typescript
+ // Launch applications
+ await computer.interface.launch("xfce4-terminal");
+ await computer.interface.launch("libreoffice --writer");
+ await computer.interface.open("https://www.google.com");
+
+ // Window management
+ const windows = await computer.interface.getApplicationWindows("xfce4-terminal");
+ let windowId = windows[0];
+ await computer.interface.activateWindow(windowId);
+
+ windowId = await computer.interface.getCurrentWindowId(); // current active window id
+ await computer.interface.getWindowSize(windowId);
+ await computer.interface.getWindowName(windowId);
+ await computer.interface.closeWindow(windowId);
+ ```
+
+
+
+
## Mouse Actions
Precise mouse control and interaction:
diff --git a/libs/python/computer-server/computer_server/handlers/base.py b/libs/python/computer-server/computer_server/handlers/base.py
index 73250d44..a49f232c 100644
--- a/libs/python/computer-server/computer_server/handlers/base.py
+++ b/libs/python/computer-server/computer_server/handlers/base.py
@@ -105,6 +105,55 @@ class BaseDesktopHandler(ABC):
pass
+class BaseWindowHandler(ABC):
+ """Abstract class for OS-specific window management handlers.
+
+ Categories:
+ - Window Management: Methods for application/window control
+ """
+
+ # Window Management
+ @abstractmethod
+ async def open(self, target: str) -> Dict[str, Any]:
+ """Open a file or URL with the default application."""
+ pass
+
+ @abstractmethod
+ async def launch(self, app: str, args: Optional[List[str]] = None) -> Dict[str, Any]:
+ """Launch an application with optional arguments."""
+ pass
+
+ @abstractmethod
+ async def get_current_window_id(self) -> Dict[str, Any]:
+ """Get the currently active window ID."""
+ pass
+
+ @abstractmethod
+ async def get_application_windows(self, app: str) -> Dict[str, Any]:
+ """Get windows belonging to an application (by name or bundle)."""
+ pass
+
+ @abstractmethod
+ async def get_window_name(self, window_id: str) -> Dict[str, Any]:
+ """Get the title/name of a window by ID."""
+ pass
+
+ @abstractmethod
+ async def get_window_size(self, window_id: str) -> Dict[str, Any]:
+ """Get the size of a window by ID as {width, height}."""
+ pass
+
+ @abstractmethod
+ async def activate_window(self, window_id: str) -> Dict[str, Any]:
+ """Bring a window to the foreground by ID."""
+ pass
+
+ @abstractmethod
+ async def close_window(self, window_id: str) -> Dict[str, Any]:
+ """Close a window by ID."""
+ pass
+
+
class BaseAutomationHandler(ABC):
"""Abstract base class for OS-specific automation handlers.
diff --git a/libs/python/computer-server/computer_server/handlers/factory.py b/libs/python/computer-server/computer_server/handlers/factory.py
index 77d88e5f..aadcac15 100644
--- a/libs/python/computer-server/computer_server/handlers/factory.py
+++ b/libs/python/computer-server/computer_server/handlers/factory.py
@@ -9,6 +9,7 @@ from .base import (
BaseAutomationHandler,
BaseDesktopHandler,
BaseFileHandler,
+ BaseWindowHandler,
)
# Conditionally import platform-specific handlers
@@ -22,7 +23,7 @@ elif system == "linux":
elif system == "windows":
from .windows import WindowsAccessibilityHandler, WindowsAutomationHandler
-from .generic import GenericDesktopHandler, GenericFileHandler
+from .generic import GenericDesktopHandler, GenericFileHandler, GenericWindowHandler
class HandlerFactory:
@@ -61,6 +62,7 @@ class HandlerFactory:
BaseDioramaHandler,
BaseFileHandler,
BaseDesktopHandler,
+ BaseWindowHandler,
]
):
"""Create and return appropriate handlers for the current OS.
@@ -82,6 +84,7 @@ class HandlerFactory:
MacOSDioramaHandler(),
GenericFileHandler(),
GenericDesktopHandler(),
+ GenericWindowHandler(),
)
elif os_type == "linux":
return (
@@ -90,6 +93,7 @@ class HandlerFactory:
BaseDioramaHandler(),
GenericFileHandler(),
GenericDesktopHandler(),
+ GenericWindowHandler(),
)
elif os_type == "windows":
return (
@@ -98,6 +102,7 @@ class HandlerFactory:
BaseDioramaHandler(),
GenericFileHandler(),
GenericDesktopHandler(),
+ GenericWindowHandler(),
)
else:
raise NotImplementedError(f"OS '{os_type}' is not supported")
diff --git a/libs/python/computer-server/computer_server/handlers/generic.py b/libs/python/computer-server/computer_server/handlers/generic.py
index c7348312..e39a693c 100644
--- a/libs/python/computer-server/computer_server/handlers/generic.py
+++ b/libs/python/computer-server/computer_server/handlers/generic.py
@@ -8,11 +8,20 @@ Includes:
"""
import base64
+import os
+import platform
+import subprocess
+import webbrowser
from pathlib import Path
from typing import Any, Dict, Optional
from ..utils import wallpaper
-from .base import BaseDesktopHandler, BaseFileHandler
+from .base import BaseDesktopHandler, BaseFileHandler, BaseWindowHandler
+
+try:
+ import pywinctl as pwc
+except Exception: # pragma: no cover
+ pwc = None # type: ignore
def resolve_path(path: str) -> Path:
@@ -27,6 +36,9 @@ def resolve_path(path: str) -> Path:
return Path(path).expanduser().resolve()
+# ===== Cross-platform Desktop command handlers =====
+
+
class GenericDesktopHandler(BaseDesktopHandler):
"""
Generic desktop handler providing desktop-related operations.
@@ -67,6 +79,125 @@ class GenericDesktopHandler(BaseDesktopHandler):
return {"success": False, "error": str(e)}
+# ===== Cross-platform window control command handlers =====
+
+
+class GenericWindowHandler(BaseWindowHandler):
+ """
+ Cross-platform window management using pywinctl where possible.
+ """
+
+ async def open(self, target: str) -> Dict[str, Any]:
+ try:
+ if target.startswith("http://") or target.startswith("https://"):
+ ok = webbrowser.open(target)
+ return {"success": bool(ok)}
+ path = str(resolve_path(target))
+ sys = platform.system().lower()
+ if sys == "darwin":
+ subprocess.Popen(["open", path])
+ elif sys == "linux":
+ subprocess.Popen(["xdg-open", path])
+ elif sys == "windows":
+ os.startfile(path) # type: ignore[attr-defined]
+ else:
+ return {"success": False, "error": f"Unsupported OS: {sys}"}
+ return {"success": True}
+ except Exception as e:
+ return {"success": False, "error": str(e)}
+
+ async def launch(self, app: str, args: Optional[list[str]] = None) -> Dict[str, Any]:
+ try:
+ if args:
+ proc = subprocess.Popen([app, *args])
+ else:
+ # allow shell command like "libreoffice --writer"
+ proc = subprocess.Popen(app, shell=True)
+ return {"success": True, "pid": proc.pid}
+ except Exception as e:
+ return {"success": False, "error": str(e)}
+
+ def _get_window_by_id(self, window_id: int | str):
+ if pwc is None:
+ raise RuntimeError("pywinctl not available")
+ try:
+ windows = pwc.getAllWindowsDict()
+ return windows.get(window_id) or windows.get(int(window_id))
+ except Exception:
+ return None
+
+ async def get_current_window_id(self) -> Dict[str, Any]:
+ try:
+ if pwc is None:
+ return {"success": False, "error": "pywinctl not available"}
+ win = pwc.getActiveWindow()
+ if not win:
+ return {"success": False, "error": "No active window"}
+ return {"success": True, "window_id": win.getHandle()}
+ except Exception as e:
+ return {"success": False, "error": str(e)}
+
+ async def get_application_windows(self, app: str) -> Dict[str, Any]:
+ try:
+ if pwc is None:
+ return {"success": False, "error": "pywinctl not available"}
+ wins = pwc.getWindowsWithTitle(app, condition=pwc.Re.CONTAINS, flags=pwc.Re.IGNORECASE)
+ ids = [w.getHandle() for w in wins]
+ return {"success": True, "windows": ids}
+ except Exception as e:
+ return {"success": False, "error": str(e)}
+
+ async def get_window_name(self, window_id: int | str) -> Dict[str, Any]:
+ try:
+ if pwc is None:
+ return {"success": False, "error": "pywinctl not available"}
+ w = self._get_window_by_id(window_id)
+ if not w:
+ return {"success": False, "error": "Window not found"}
+ return {"success": True, "name": w.title}
+ except Exception as e:
+ return {"success": False, "error": str(e)}
+
+ async def get_window_size(self, window_id: int | str) -> Dict[str, Any]:
+ try:
+ if pwc is None:
+ return {"success": False, "error": "pywinctl not available"}
+ w = self._get_window_by_id(window_id)
+ if not w:
+ return {"success": False, "error": "Window not found"}
+ width, height = w.size
+ return {"success": True, "width": int(width), "height": int(height)}
+ except Exception as e:
+ return {"success": False, "error": str(e)}
+
+ async def activate_window(self, window_id: int | str) -> Dict[str, Any]:
+ try:
+ if pwc is None:
+ return {"success": False, "error": "pywinctl not available"}
+ w = self._get_window_by_id(window_id)
+ if not w:
+ return {"success": False, "error": "Window not found"}
+ ok = w.activate()
+ return {"success": bool(ok)}
+ except Exception as e:
+ return {"success": False, "error": str(e)}
+
+ async def close_window(self, window_id: int | str) -> Dict[str, Any]:
+ try:
+ if pwc is None:
+ return {"success": False, "error": "pywinctl not available"}
+ w = self._get_window_by_id(window_id)
+ if not w:
+ return {"success": False, "error": "Window not found"}
+ ok = w.close()
+ return {"success": bool(ok)}
+ except Exception as e:
+ return {"success": False, "error": str(e)}
+
+
+# ===== Cross-platform file system command handlers =====
+
+
class GenericFileHandler(BaseFileHandler):
"""
Generic file handler that provides file system operations for all operating systems.
diff --git a/libs/python/computer-server/computer_server/main.py b/libs/python/computer-server/computer_server/main.py
index 005c7ddb..fc9b6354 100644
--- a/libs/python/computer-server/computer_server/main.py
+++ b/libs/python/computer-server/computer_server/main.py
@@ -75,7 +75,7 @@ except Exception:
except Exception:
package_version = "unknown"
-accessibility_handler, automation_handler, diorama_handler, file_handler, desktop_handler = (
+accessibility_handler, automation_handler, diorama_handler, file_handler, desktop_handler, window_handler = (
HandlerFactory.create_handlers()
)
handlers = {
@@ -102,6 +102,15 @@ handlers = {
# Desktop commands
"get_desktop_environment": desktop_handler.get_desktop_environment,
"set_wallpaper": desktop_handler.set_wallpaper,
+ # Window management
+ "open": window_handler.open,
+ "launch": window_handler.launch,
+ "get_current_window_id": window_handler.get_current_window_id,
+ "get_application_windows": window_handler.get_application_windows,
+ "get_window_name": window_handler.get_window_name,
+ "get_window_size": window_handler.get_window_size,
+ "activate_window": window_handler.activate_window,
+ "close_window": window_handler.close_window,
# Mouse commands
"mouse_down": automation_handler.mouse_down,
"mouse_up": automation_handler.mouse_up,
diff --git a/libs/python/computer-server/pyproject.toml b/libs/python/computer-server/pyproject.toml
index a8ecfb23..20fff950 100644
--- a/libs/python/computer-server/pyproject.toml
+++ b/libs/python/computer-server/pyproject.toml
@@ -23,6 +23,7 @@ dependencies = [
"aiohttp>=3.9.1",
"pyperclip>=1.9.0",
"websockets>=12.0",
+ "pywinctl>=0.4.1",
# OS-specific runtime deps
"pyobjc-framework-Cocoa>=10.1; sys_platform == 'darwin'",
"pyobjc-framework-Quartz>=10.1; sys_platform == 'darwin'",
diff --git a/libs/python/computer/computer/interface/generic.py b/libs/python/computer/computer/interface/generic.py
index 7cf47461..80b24199 100644
--- a/libs/python/computer/computer/interface/generic.py
+++ b/libs/python/computer/computer/interface/generic.py
@@ -499,6 +499,62 @@ class GenericComputerInterface(BaseComputerInterface):
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to set wallpaper"))
+ # Window management
+ async def open(self, target: str) -> None:
+ result = await self._send_command("open", {"target": target})
+ if not result.get("success", False):
+ raise RuntimeError(result.get("error", "Failed to open target"))
+
+ async def launch(self, app: str, args: list[str] | None = None) -> int | None:
+ payload: dict[str, object] = {"app": app}
+ if args is not None:
+ payload["args"] = args
+ result = await self._send_command("launch", payload)
+ if not result.get("success", False):
+ raise RuntimeError(result.get("error", "Failed to launch application"))
+ return result.get("pid") # type: ignore[return-value]
+
+ async def get_current_window_id(self) -> int | str:
+ result = await self._send_command("get_current_window_id")
+ if not result.get("success", False):
+ raise RuntimeError(result.get("error", "Failed to get current window id"))
+ return result["window_id"] # type: ignore[return-value]
+
+ async def get_application_windows(self, app: str) -> list[int | str]:
+ result = await self._send_command("get_application_windows", {"app": app})
+ if not result.get("success", False):
+ raise RuntimeError(result.get("error", "Failed to get application windows"))
+ return list(result.get("windows", [])) # type: ignore[return-value]
+
+ async def get_window_name(self, window_id: int | str) -> str:
+ result = await self._send_command("get_window_name", {"window_id": window_id})
+ if not result.get("success", False):
+ raise RuntimeError(result.get("error", "Failed to get window name"))
+ return result.get("name", "") # type: ignore[return-value]
+
+ async def get_window_size(self, window_id: int | str) -> tuple[int, int]:
+ result = await self._send_command("get_window_size", {"window_id": window_id})
+ if not result.get("success", False):
+ raise RuntimeError(result.get("error", "Failed to get window size"))
+ return int(result.get("width", 0)), int(result.get("height", 0))
+
+ async def activate_window(self, window_id: int | str) -> None:
+ result = await self._send_command("activate_window", {"window_id": window_id})
+ if not result.get("success", False):
+ raise RuntimeError(result.get("error", "Failed to activate window"))
+
+ async def close_window(self, window_id: int | str) -> None:
+ result = await self._send_command("close_window", {"window_id": window_id})
+ if not result.get("success", False):
+ raise RuntimeError(result.get("error", "Failed to close window"))
+
+ # Convenience aliases
+ async def get_window_title(self, window_id: int | str) -> str:
+ return await self.get_window_name(window_id)
+
+ async def window_size(self, window_id: int | str) -> tuple[int, int]:
+ return await self.get_window_size(window_id)
+
# Command execution
async def run_command(self, command: str) -> CommandResult:
result = await self._send_command("run_command", {"command": command})
diff --git a/libs/typescript/computer/src/interface/base.ts b/libs/typescript/computer/src/interface/base.ts
index e41c4416..333060fa 100644
--- a/libs/typescript/computer/src/interface/base.ts
+++ b/libs/typescript/computer/src/interface/base.ts
@@ -314,6 +314,16 @@ export abstract class BaseComputerInterface {
abstract getScreenSize(): Promise;
abstract getCursorPosition(): Promise;
+ // Window Management
+ abstract open(target: string): Promise;
+ abstract launch(app: string, args?: string[]): Promise;
+ abstract getCurrentWindowId(): Promise;
+ abstract getApplicationWindows(app: string): Promise>;
+ abstract getWindowName(windowId: number | string): Promise;
+ abstract getWindowSize(windowId: number | string): Promise<[number, number]>;
+ abstract activateWindow(windowId: number | string): Promise;
+ abstract closeWindow(windowId: number | string): Promise;
+
// Desktop Actions
abstract getDesktopEnvironment(): Promise;
abstract setWallpaper(path: string): Promise;
diff --git a/libs/typescript/computer/src/interface/macos.ts b/libs/typescript/computer/src/interface/macos.ts
index 86522042..8b317687 100644
--- a/libs/typescript/computer/src/interface/macos.ts
+++ b/libs/typescript/computer/src/interface/macos.ts
@@ -212,6 +212,76 @@ export class MacOSComputerInterface extends BaseComputerInterface {
return response.position as CursorPosition;
}
+ // Window Management
+ /** Open a file path or URL with the default handler. */
+ async open(target: string): Promise {
+ const response = await this.sendCommand('open', { target });
+ if (!response.success) {
+ throw new Error((response.error as string) || 'Failed to open target');
+ }
+ }
+
+ /** Launch an application (string may include args). Returns pid if available. */
+ async launch(app: string, args?: string[]): Promise {
+ const response = await this.sendCommand('launch', args ? { app, args } : { app });
+ if (!response.success) {
+ throw new Error((response.error as string) || 'Failed to launch application');
+ }
+ return (response.pid as number) || undefined;
+ }
+
+ /** Get the current active window id. */
+ async getCurrentWindowId(): Promise {
+ const response = await this.sendCommand('get_current_window_id');
+ if (!response.success || response.window_id === undefined) {
+ throw new Error((response.error as string) || 'Failed to get current window id');
+ }
+ return response.window_id as number | string;
+ }
+
+ /** Get windows belonging to an application (by name). */
+ async getApplicationWindows(app: string): Promise> {
+ const response = await this.sendCommand('get_application_windows', { app });
+ if (!response.success) {
+ throw new Error((response.error as string) || 'Failed to get application windows');
+ }
+ return (response.windows as Array) || [];
+ }
+
+ /** Get window title/name by id. */
+ async getWindowName(windowId: number | string): Promise {
+ const response = await this.sendCommand('get_window_name', { window_id: windowId });
+ if (!response.success) {
+ throw new Error((response.error as string) || 'Failed to get window name');
+ }
+ return (response.name as string) || '';
+ }
+
+ /** Get window size as [width, height]. */
+ async getWindowSize(windowId: number | string): Promise<[number, number]> {
+ const response = await this.sendCommand('get_window_size', { window_id: windowId });
+ if (!response.success) {
+ throw new Error((response.error as string) || 'Failed to get window size');
+ }
+ return [Number(response.width) || 0, Number(response.height) || 0];
+ }
+
+ /** Activate a window by id. */
+ async activateWindow(windowId: number | string): Promise {
+ const response = await this.sendCommand('activate_window', { window_id: windowId });
+ if (!response.success) {
+ throw new Error((response.error as string) || 'Failed to activate window');
+ }
+ }
+
+ /** Close a window by id. */
+ async closeWindow(windowId: number | string): Promise {
+ const response = await this.sendCommand('close_window', { window_id: windowId });
+ if (!response.success) {
+ throw new Error((response.error as string) || 'Failed to close window');
+ }
+ }
+
// Desktop Actions
/**
* Get the current desktop environment string (e.g., 'xfce4', 'gnome', 'kde', 'mac', 'windows').