Merge pull request #516 from trycua/feat/add-desktop-commands

Add desktop and window management commands to computer interface
This commit is contained in:
ddupont
2025-10-29 16:20:12 -04:00
committed by GitHub
13 changed files with 3684 additions and 11 deletions

View File

@@ -18,7 +18,7 @@ Execute shell commands and get detailed results:
# Run shell command
result = await computer.interface.run_command(cmd) # result.stdout, result.stderr, result.returncode
```
</Tab>
<Tab value="TypeScript">
@@ -30,6 +30,63 @@ Execute shell commands and get detailed results:
</Tab>
</Tabs>
## Window Management
Control application launching and windows:
<Tabs items={['Python', 'TypeScript']}>
<Tab value="Python">
```python
# Launch applications
await computer.interface.launch("xfce4-terminal")
await computer.interface.launch("libreoffice --writer")
await computer.interface.open("https://www.google.com")
# Window management
windows = await computer.interface.get_application_windows("xfce4-terminal")
window_id = windows[0]
await computer.interface.activate_window(window_id)
window_id = await computer.interface.get_current_window_id() # get the current active window id
await computer.interface.window_size(window_id)
await computer.interface.get_window_title(window_id)
await computer.interface.get_window_position(window_id)
await computer.interface.set_window_size(window_id, 1200, 800)
await computer.interface.set_window_position(window_id, 100, 100)
await computer.interface.maximize_window(window_id)
await computer.interface.minimize_window(window_id)
await computer.interface.close_window(window_id)
```
</Tab>
<Tab value="TypeScript">
```typescript
// Launch applications
await computer.interface.launch("xfce4-terminal");
await computer.interface.launch("libreoffice --writer");
await computer.interface.open("https://www.google.com");
// Window management
const windows = await computer.interface.getApplicationWindows("xfce4-terminal");
let windowId = windows[0];
await computer.interface.activateWindow(windowId);
windowId = await computer.interface.getCurrentWindowId(); // current active window id
await computer.interface.getWindowSize(windowId);
await computer.interface.getWindowName(windowId);
await computer.interface.getWindowPosition(windowId);
await computer.interface.setWindowSize(windowId, 1200, 800);
await computer.interface.setWindowPosition(windowId, 100, 100);
await computer.interface.maximizeWindow(windowId);
await computer.interface.minimizeWindow(windowId);
await computer.interface.closeWindow(windowId);
```
</Tab>
</Tabs>
## Mouse Actions
Precise mouse control and interaction:
@@ -162,6 +219,35 @@ Screen capture and display information:
</Tab>
</Tabs>
## Desktop Actions
Control desktop environment features like wallpaper:
<Tabs items={['Python', 'TypeScript']}>
<Tab value="Python">
```python
# Get current desktop environment (e.g., 'xfce4', 'gnome', 'kde', 'mac', 'windows')
env = await computer.interface.get_desktop_environment()
print(env) # "xfce4"
# Set desktop wallpaper to an image file accessible on the VM
await computer.interface.set_wallpaper("/home/cua/shared/wallpaper.png")
```
</Tab>
<Tab value="TypeScript">
```typescript
// Get current desktop environment
const env = await computer.interface.getDesktopEnvironment();
print(env) # "xfce4"
// Set desktop wallpaper to an image file accessible on the VM
await computer.interface.setWallpaper('/home/cua/shared/wallpaper.png');
```
</Tab>
</Tabs>
## Clipboard Actions
System clipboard management:

View File

@@ -85,6 +85,102 @@ class BaseFileHandler(ABC):
pass
class BaseDesktopHandler(ABC):
"""Abstract base class for OS-specific desktop handlers.
Categories:
- Wallpaper Actions: Methods for wallpaper operations
- Desktop shortcut actions: Methods for managing desktop shortcuts
"""
# Wallpaper Actions
@abstractmethod
async def get_desktop_environment(self) -> Dict[str, Any]:
"""Get the current desktop environment name."""
pass
@abstractmethod
async def set_wallpaper(self, path: str) -> Dict[str, Any]:
"""Set the desktop wallpaper to the file at path."""
pass
class BaseWindowHandler(ABC):
"""Abstract class for OS-specific window management handlers.
Categories:
- Window Management: Methods for application/window control
"""
# Window Management
@abstractmethod
async def open(self, target: str) -> Dict[str, Any]:
"""Open a file or URL with the default application."""
pass
@abstractmethod
async def launch(self, app: str, args: Optional[List[str]] = None) -> Dict[str, Any]:
"""Launch an application with optional arguments."""
pass
@abstractmethod
async def get_current_window_id(self) -> Dict[str, Any]:
"""Get the currently active window ID."""
pass
@abstractmethod
async def get_application_windows(self, app: str) -> Dict[str, Any]:
"""Get windows belonging to an application (by name or bundle)."""
pass
@abstractmethod
async def get_window_name(self, window_id: str) -> Dict[str, Any]:
"""Get the title/name of a window by ID."""
pass
@abstractmethod
async def get_window_size(self, window_id: str | int) -> Dict[str, Any]:
"""Get the size of a window by ID as {width, height}."""
pass
@abstractmethod
async def activate_window(self, window_id: str | int) -> Dict[str, Any]:
"""Bring a window to the foreground by ID."""
pass
@abstractmethod
async def close_window(self, window_id: str | int) -> Dict[str, Any]:
"""Close a window by ID."""
pass
@abstractmethod
async def get_window_position(self, window_id: str | int) -> Dict[str, Any]:
"""Get the top-left position of a window as {x, y}."""
pass
@abstractmethod
async def set_window_size(
self, window_id: str | int, width: int, height: int
) -> Dict[str, Any]:
"""Set the size of a window by ID."""
pass
@abstractmethod
async def set_window_position(self, window_id: str | int, x: int, y: int) -> Dict[str, Any]:
"""Set the position of a window by ID."""
pass
@abstractmethod
async def maximize_window(self, window_id: str | int) -> Dict[str, Any]:
"""Maximize a window by ID."""
pass
@abstractmethod
async def minimize_window(self, window_id: str | int) -> Dict[str, Any]:
"""Minimize a window by ID."""
pass
class BaseAutomationHandler(ABC):
"""Abstract base class for OS-specific automation handlers.

View File

@@ -4,7 +4,13 @@ from typing import Tuple, Type
from computer_server.diorama.base import BaseDioramaHandler
from .base import BaseAccessibilityHandler, BaseAutomationHandler, BaseFileHandler
from .base import (
BaseAccessibilityHandler,
BaseAutomationHandler,
BaseDesktopHandler,
BaseFileHandler,
BaseWindowHandler,
)
# Conditionally import platform-specific handlers
system = platform.system().lower()
@@ -17,7 +23,7 @@ elif system == "linux":
elif system == "windows":
from .windows import WindowsAccessibilityHandler, WindowsAutomationHandler
from .generic import GenericFileHandler
from .generic import GenericDesktopHandler, GenericFileHandler, GenericWindowHandler
class HandlerFactory:
@@ -49,9 +55,14 @@ class HandlerFactory:
raise RuntimeError(f"Failed to determine current OS: {str(e)}")
@staticmethod
def create_handlers() -> (
Tuple[BaseAccessibilityHandler, BaseAutomationHandler, BaseDioramaHandler, BaseFileHandler]
):
def create_handlers() -> Tuple[
BaseAccessibilityHandler,
BaseAutomationHandler,
BaseDioramaHandler,
BaseFileHandler,
BaseDesktopHandler,
BaseWindowHandler,
]:
"""Create and return appropriate handlers for the current OS.
Returns:
@@ -70,6 +81,8 @@ class HandlerFactory:
MacOSAutomationHandler(),
MacOSDioramaHandler(),
GenericFileHandler(),
GenericDesktopHandler(),
GenericWindowHandler(),
)
elif os_type == "linux":
return (
@@ -77,6 +90,8 @@ class HandlerFactory:
LinuxAutomationHandler(),
BaseDioramaHandler(),
GenericFileHandler(),
GenericDesktopHandler(),
GenericWindowHandler(),
)
elif os_type == "windows":
return (
@@ -84,6 +99,8 @@ class HandlerFactory:
WindowsAutomationHandler(),
BaseDioramaHandler(),
GenericFileHandler(),
GenericDesktopHandler(),
GenericWindowHandler(),
)
else:
raise NotImplementedError(f"OS '{os_type}' is not supported")

View File

@@ -2,15 +2,26 @@
Generic handlers for all OSes.
Includes:
- DesktopHandler
- FileHandler
"""
import base64
import os
import platform
import subprocess
import webbrowser
from pathlib import Path
from typing import Any, Dict, Optional
from .base import BaseFileHandler
from ..utils import wallpaper
from .base import BaseDesktopHandler, BaseFileHandler, BaseWindowHandler
try:
import pywinctl as pwc
except Exception: # pragma: no cover
pwc = None # type: ignore
def resolve_path(path: str) -> Path:
@@ -25,6 +36,233 @@ def resolve_path(path: str) -> Path:
return Path(path).expanduser().resolve()
# ===== Cross-platform Desktop command handlers =====
class GenericDesktopHandler(BaseDesktopHandler):
"""
Generic desktop handler providing desktop-related operations.
Implements:
- get_desktop_environment: detect current desktop environment
- set_wallpaper: set desktop wallpaper path
"""
async def get_desktop_environment(self) -> Dict[str, Any]:
"""
Get the current desktop environment.
Returns:
Dict containing 'success' boolean and either 'environment' string or 'error' string
"""
try:
env = wallpaper.get_desktop_environment()
return {"success": True, "environment": env}
except Exception as e:
return {"success": False, "error": str(e)}
async def set_wallpaper(self, path: str) -> Dict[str, Any]:
"""
Set the desktop wallpaper to the specified path.
Args:
path: The file path to set as wallpaper
Returns:
Dict containing 'success' boolean and optionally 'error' string
"""
try:
file_path = resolve_path(path)
ok = wallpaper.set_wallpaper(str(file_path))
return {"success": bool(ok)}
except Exception as e:
return {"success": False, "error": str(e)}
# ===== Cross-platform window control command handlers =====
class GenericWindowHandler(BaseWindowHandler):
"""
Cross-platform window management using pywinctl where possible.
"""
async def open(self, target: str) -> Dict[str, Any]:
try:
if target.startswith("http://") or target.startswith("https://"):
ok = webbrowser.open(target)
return {"success": bool(ok)}
path = str(resolve_path(target))
sys = platform.system().lower()
if sys == "darwin":
subprocess.Popen(["open", path])
elif sys == "linux":
subprocess.Popen(["xdg-open", path])
elif sys == "windows":
os.startfile(path) # type: ignore[attr-defined]
else:
return {"success": False, "error": f"Unsupported OS: {sys}"}
return {"success": True}
except Exception as e:
return {"success": False, "error": str(e)}
async def launch(self, app: str, args: Optional[list[str]] = None) -> Dict[str, Any]:
try:
if args:
proc = subprocess.Popen([app, *args])
else:
# allow shell command like "libreoffice --writer"
proc = subprocess.Popen(app, shell=True)
return {"success": True, "pid": proc.pid}
except Exception as e:
return {"success": False, "error": str(e)}
def _get_window_by_id(self, window_id: int | str) -> Optional[Any]:
if pwc is None:
raise RuntimeError("pywinctl not available")
# Find by native handle among Window objects; getAllWindowsDict keys are titles
try:
for w in pwc.getAllWindows():
if str(w.getHandle()) == str(window_id):
return w
return None
except Exception:
return None
async def get_current_window_id(self) -> Dict[str, Any]:
try:
if pwc is None:
return {"success": False, "error": "pywinctl not available"}
win = pwc.getActiveWindow()
if not win:
return {"success": False, "error": "No active window"}
return {"success": True, "window_id": win.getHandle()}
except Exception as e:
return {"success": False, "error": str(e)}
async def get_application_windows(self, app: str) -> Dict[str, Any]:
try:
if pwc is None:
return {"success": False, "error": "pywinctl not available"}
wins = pwc.getWindowsWithTitle(app, condition=pwc.Re.CONTAINS, flags=pwc.Re.IGNORECASE)
ids = [w.getHandle() for w in wins]
return {"success": True, "windows": ids}
except Exception as e:
return {"success": False, "error": str(e)}
async def get_window_name(self, window_id: int | str) -> Dict[str, Any]:
try:
if pwc is None:
return {"success": False, "error": "pywinctl not available"}
w = self._get_window_by_id(window_id)
if not w:
return {"success": False, "error": "Window not found"}
return {"success": True, "name": w.title}
except Exception as e:
return {"success": False, "error": str(e)}
async def get_window_size(self, window_id: int | str) -> Dict[str, Any]:
try:
if pwc is None:
return {"success": False, "error": "pywinctl not available"}
w = self._get_window_by_id(window_id)
if not w:
return {"success": False, "error": "Window not found"}
width, height = w.size
return {"success": True, "width": int(width), "height": int(height)}
except Exception as e:
return {"success": False, "error": str(e)}
async def get_window_position(self, window_id: int | str) -> Dict[str, Any]:
try:
if pwc is None:
return {"success": False, "error": "pywinctl not available"}
w = self._get_window_by_id(window_id)
if not w:
return {"success": False, "error": "Window not found"}
x, y = w.position
return {"success": True, "x": int(x), "y": int(y)}
except Exception as e:
return {"success": False, "error": str(e)}
async def set_window_size(
self, window_id: int | str, width: int, height: int
) -> Dict[str, Any]:
try:
if pwc is None:
return {"success": False, "error": "pywinctl not available"}
w = self._get_window_by_id(window_id)
if not w:
return {"success": False, "error": "Window not found"}
ok = w.resizeTo(int(width), int(height))
return {"success": bool(ok)}
except Exception as e:
return {"success": False, "error": str(e)}
async def set_window_position(self, window_id: int | str, x: int, y: int) -> Dict[str, Any]:
try:
if pwc is None:
return {"success": False, "error": "pywinctl not available"}
w = self._get_window_by_id(window_id)
if not w:
return {"success": False, "error": "Window not found"}
ok = w.moveTo(int(x), int(y))
return {"success": bool(ok)}
except Exception as e:
return {"success": False, "error": str(e)}
async def maximize_window(self, window_id: int | str) -> Dict[str, Any]:
try:
if pwc is None:
return {"success": False, "error": "pywinctl not available"}
w = self._get_window_by_id(window_id)
if not w:
return {"success": False, "error": "Window not found"}
ok = w.maximize()
return {"success": bool(ok)}
except Exception as e:
return {"success": False, "error": str(e)}
async def minimize_window(self, window_id: int | str) -> Dict[str, Any]:
try:
if pwc is None:
return {"success": False, "error": "pywinctl not available"}
w = self._get_window_by_id(window_id)
if not w:
return {"success": False, "error": "Window not found"}
ok = w.minimize()
return {"success": bool(ok)}
except Exception as e:
return {"success": False, "error": str(e)}
async def activate_window(self, window_id: int | str) -> Dict[str, Any]:
try:
if pwc is None:
return {"success": False, "error": "pywinctl not available"}
w = self._get_window_by_id(window_id)
if not w:
return {"success": False, "error": "Window not found"}
ok = w.activate()
return {"success": bool(ok)}
except Exception as e:
return {"success": False, "error": str(e)}
async def close_window(self, window_id: int | str) -> Dict[str, Any]:
try:
if pwc is None:
return {"success": False, "error": "pywinctl not available"}
w = self._get_window_by_id(window_id)
if not w:
return {"success": False, "error": "Window not found"}
ok = w.close()
return {"success": bool(ok)}
except Exception as e:
return {"success": False, "error": str(e)}
# ===== Cross-platform file system command handlers =====
class GenericFileHandler(BaseFileHandler):
"""
Generic file handler that provides file system operations for all operating systems.

View File

@@ -75,9 +75,14 @@ except Exception:
except Exception:
package_version = "unknown"
accessibility_handler, automation_handler, diorama_handler, file_handler = (
HandlerFactory.create_handlers()
)
(
accessibility_handler,
automation_handler,
diorama_handler,
file_handler,
desktop_handler,
window_handler,
) = HandlerFactory.create_handlers()
handlers = {
"version": lambda: {"protocol": protocol_version, "package": package_version},
# App-Use commands
@@ -99,6 +104,23 @@ handlers = {
"delete_file": file_handler.delete_file,
"create_dir": file_handler.create_dir,
"delete_dir": file_handler.delete_dir,
# Desktop commands
"get_desktop_environment": desktop_handler.get_desktop_environment,
"set_wallpaper": desktop_handler.set_wallpaper,
# Window management
"open": window_handler.open,
"launch": window_handler.launch,
"get_current_window_id": window_handler.get_current_window_id,
"get_application_windows": window_handler.get_application_windows,
"get_window_name": window_handler.get_window_name,
"get_window_size": window_handler.get_window_size,
"get_window_position": window_handler.get_window_position,
"set_window_size": window_handler.set_window_size,
"set_window_position": window_handler.set_window_position,
"maximize_window": window_handler.maximize_window,
"minimize_window": window_handler.minimize_window,
"activate_window": window_handler.activate_window,
"close_window": window_handler.close_window,
# Mouse commands
"mouse_down": automation_handler.mouse_down,
"mouse_up": automation_handler.mouse_up,

View File

@@ -0,0 +1,3 @@
from . import wallpaper
__all__ = ["wallpaper"]

View File

@@ -0,0 +1,321 @@
"""Set the desktop wallpaper."""
import os
import subprocess
import sys
from pathlib import Path
def get_desktop_environment() -> str:
"""
Returns the name of the current desktop environment.
"""
# From https://stackoverflow.com/a/21213358/2624876
# which takes from:
# http://stackoverflow.com/questions/2035657/what-is-my-current-desktop-environment
# and http://ubuntuforums.org/showthread.php?t=652320
# and http://ubuntuforums.org/showthread.php?t=1139057
if sys.platform in ["win32", "cygwin"]:
return "windows"
elif sys.platform == "darwin":
return "mac"
else: # Most likely either a POSIX system or something not much common
desktop_session = os.environ.get("DESKTOP_SESSION")
if (
desktop_session is not None
): # easier to match if we doesn't have to deal with character cases
desktop_session = desktop_session.lower()
if desktop_session in [
"gnome",
"unity",
"cinnamon",
"mate",
"xfce4",
"lxde",
"fluxbox",
"blackbox",
"openbox",
"icewm",
"jwm",
"afterstep",
"trinity",
"kde",
]:
return desktop_session
## Special cases ##
# Canonical sets $DESKTOP_SESSION to Lubuntu rather than LXDE if using LXDE.
# There is no guarantee that they will not do the same with the other desktop environments.
elif "xfce" in desktop_session or desktop_session.startswith("xubuntu"):
return "xfce4"
elif desktop_session.startswith("ubuntustudio"):
return "kde"
elif desktop_session.startswith("ubuntu"):
return "gnome"
elif desktop_session.startswith("lubuntu"):
return "lxde"
elif desktop_session.startswith("kubuntu"):
return "kde"
elif desktop_session.startswith("razor"): # e.g. razorkwin
return "razor-qt"
elif desktop_session.startswith("wmaker"): # e.g. wmaker-common
return "windowmaker"
gnome_desktop_session_id = os.environ.get("GNOME_DESKTOP_SESSION_ID")
if os.environ.get("KDE_FULL_SESSION") == "true":
return "kde"
elif gnome_desktop_session_id:
if "deprecated" not in gnome_desktop_session_id:
return "gnome2"
# From http://ubuntuforums.org/showthread.php?t=652320
elif is_running("xfce-mcs-manage"):
return "xfce4"
elif is_running("ksmserver"):
return "kde"
return "unknown"
def is_running(process: str) -> bool:
"""Returns whether a process with the given name is (likely) currently running.
Uses a basic text search, and so may have false positives.
"""
# From http://www.bloggerpolis.com/2011/05/how-to-check-if-a-process-is-running-using-python/
# and http://richarddingwall.name/2009/06/18/windows-equivalents-of-ps-and-kill-commands/
try: # Linux/Unix
s = subprocess.Popen(["ps", "axw"], stdout=subprocess.PIPE)
except: # Windows
s = subprocess.Popen(["tasklist", "/v"], stdout=subprocess.PIPE)
assert s.stdout is not None
for x in s.stdout:
# if re.search(process, x):
if process in str(x):
return True
return False
def set_wallpaper(file_loc: str, first_run: bool = True):
"""Sets the wallpaper to the given file location."""
# From https://stackoverflow.com/a/21213504/2624876
# I have not personally tested most of this. -- @1j01
# -----------------------------------------
# Note: There are two common Linux desktop environments where
# I have not been able to set the desktop background from
# command line: KDE, Enlightenment
desktop_env = get_desktop_environment()
if desktop_env in ["gnome", "unity", "cinnamon"]:
# Tested on Ubuntu 22 -- @1j01
uri = Path(file_loc).as_uri()
SCHEMA = "org.gnome.desktop.background"
KEY = "picture-uri"
# Needed for Ubuntu 22 in dark mode
# Might be better to set only one or the other, depending on the current theme
# In the settings it will say "This background selection only applies to the dark style"
# even if it's set for both, arguably referring to the selection that you can make on that page.
# -- @1j01
KEY_DARK = "picture-uri-dark"
try:
from gi.repository import Gio # type: ignore
gsettings = Gio.Settings.new(SCHEMA) # type: ignore
gsettings.set_string(KEY, uri)
gsettings.set_string(KEY_DARK, uri)
except Exception:
# Fallback tested on Ubuntu 22 -- @1j01
args = ["gsettings", "set", SCHEMA, KEY, uri]
subprocess.Popen(args)
args = ["gsettings", "set", SCHEMA, KEY_DARK, uri]
subprocess.Popen(args)
elif desktop_env == "mate":
try: # MATE >= 1.6
# info from http://wiki.mate-desktop.org/docs:gsettings
args = ["gsettings", "set", "org.mate.background", "picture-filename", file_loc]
subprocess.Popen(args)
except Exception: # MATE < 1.6
# From https://bugs.launchpad.net/variety/+bug/1033918
args = [
"mateconftool-2",
"-t",
"string",
"--set",
"/desktop/mate/background/picture_filename",
file_loc,
]
subprocess.Popen(args)
elif desktop_env == "gnome2": # Not tested
# From https://bugs.launchpad.net/variety/+bug/1033918
args = [
"gconftool-2",
"-t",
"string",
"--set",
"/desktop/gnome/background/picture_filename",
file_loc,
]
subprocess.Popen(args)
## KDE4 is difficult
## see http://blog.zx2c4.com/699 for a solution that might work
elif desktop_env in ["kde3", "trinity"]:
# From http://ubuntuforums.org/archive/index.php/t-803417.html
args = ["dcop", "kdesktop", "KBackgroundIface", "setWallpaper", "0", file_loc, "6"]
subprocess.Popen(args)
elif desktop_env == "xfce4":
# Iterate over all wallpaper-related keys and set to file_loc
try:
list_proc = subprocess.run(
["xfconf-query", "-c", "xfce4-desktop", "-l"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
)
keys = []
if list_proc.stdout:
for line in list_proc.stdout.splitlines():
line = line.strip()
if not line:
continue
# Common keys: .../last-image and .../image-path
if "/last-image" in line or "/image-path" in line:
keys.append(line)
# Fallback: known defaults if none were listed
if not keys:
keys = [
"/backdrop/screen0/monitorVNC-0/workspace0/last-image",
"/backdrop/screen0/monitor0/image-path",
]
for key in keys:
subprocess.run(
[
"xfconf-query",
"-c",
"xfce4-desktop",
"-p",
key,
"-s",
file_loc,
],
check=False,
)
except Exception:
pass
# Reload xfdesktop to apply changes
subprocess.Popen(["xfdesktop", "--reload"])
elif desktop_env == "razor-qt": # TODO: implement reload of desktop when possible
if first_run:
import configparser
desktop_conf = configparser.ConfigParser()
# Development version
desktop_conf_file = os.path.join(get_config_dir("razor"), "desktop.conf")
if os.path.isfile(desktop_conf_file):
config_option = R"screens\1\desktops\1\wallpaper"
else:
desktop_conf_file = os.path.join(get_home_dir(), ".razor/desktop.conf")
config_option = R"desktops\1\wallpaper"
desktop_conf.read(os.path.join(desktop_conf_file))
try:
if desktop_conf.has_option("razor", config_option): # only replacing a value
desktop_conf.set("razor", config_option, file_loc)
with open(desktop_conf_file, "w", encoding="utf-8", errors="replace") as f:
desktop_conf.write(f)
except Exception:
pass
else:
# TODO: reload desktop when possible
pass
elif desktop_env in ["fluxbox", "jwm", "openbox", "afterstep"]:
# http://fluxbox-wiki.org/index.php/Howto_set_the_background
# used fbsetbg on jwm too since I am too lazy to edit the XML configuration
# now where fbsetbg does the job excellent anyway.
# and I have not figured out how else it can be set on Openbox and AfterSTep
# but fbsetbg works excellent here too.
try:
args = ["fbsetbg", file_loc]
subprocess.Popen(args)
except Exception:
sys.stderr.write("ERROR: Failed to set wallpaper with fbsetbg!\n")
sys.stderr.write("Please make sre that You have fbsetbg installed.\n")
elif desktop_env == "icewm":
# command found at http://urukrama.wordpress.com/2007/12/05/desktop-backgrounds-in-window-managers/
args = ["icewmbg", file_loc]
subprocess.Popen(args)
elif desktop_env == "blackbox":
# command found at http://blackboxwm.sourceforge.net/BlackboxDocumentation/BlackboxBackground
args = ["bsetbg", "-full", file_loc]
subprocess.Popen(args)
elif desktop_env == "lxde":
args = ["pcmanfm", "--set-wallpaper", file_loc, "--wallpaper-mode=scaled"]
subprocess.Popen(args)
elif desktop_env == "windowmaker":
# From http://www.commandlinefu.com/commands/view/3857/set-wallpaper-on-windowmaker-in-one-line
args = ["wmsetbg", "-s", "-u", file_loc]
subprocess.Popen(args)
# elif desktop_env == "enlightenment": # I have not been able to make it work on e17. On e16 it would have been something in this direction
# args = ["enlightenment_remote", "-desktop-bg-add", "0", "0", "0", "0", file_loc]
# subprocess.Popen(args)
elif desktop_env == "windows":
# From https://stackoverflow.com/questions/1977694/change-desktop-background
# Tested on Windows 10. -- @1j01
import ctypes
SPI_SETDESKWALLPAPER = 20
ctypes.windll.user32.SystemParametersInfoW(SPI_SETDESKWALLPAPER, 0, file_loc, 0) # type: ignore
elif desktop_env == "mac":
# From https://stackoverflow.com/questions/431205/how-can-i-programatically-change-the-background-in-mac-os-x
try:
# Tested on macOS 10.14.6 (Mojave) -- @1j01
assert (
sys.platform == "darwin"
) # ignore `Import "appscript" could not be resolved` for other platforms
from appscript import app, mactypes
app("Finder").desktop_picture.set(mactypes.File(file_loc))
except ImportError:
# Tested on macOS 10.14.6 (Mojave) -- @1j01
# import subprocess
# SCRIPT = f"""/usr/bin/osascript<<END
# tell application "Finder" to set desktop picture to POSIX file "{file_loc}"
# END"""
# subprocess.Popen(SCRIPT, shell=True)
# Safer version, avoiding string interpolation,
# to protect against command injection (both in the shell and in AppleScript):
OSASCRIPT = """
on run (clp)
if clp's length is not 1 then error "Incorrect Parameters"
local file_loc
set file_loc to clp's item 1
tell application "Finder" to set desktop picture to POSIX file file_loc
end run
"""
subprocess.Popen(["osascript", "-e", OSASCRIPT, "--", file_loc])
else:
if first_run: # don't spam the user with the same message over and over again
sys.stderr.write(
"Warning: Failed to set wallpaper. Your desktop environment is not supported."
)
sys.stderr.write(f"You can try manually to set your wallpaper to {file_loc}")
return False
return True
def get_config_dir(app_name: str) -> str:
"""Returns the configuration directory for the given application name."""
if "XDG_CONFIG_HOME" in os.environ:
config_home = os.environ["XDG_CONFIG_HOME"]
elif "APPDATA" in os.environ: # On Windows
config_home = os.environ["APPDATA"]
else:
try:
from xdg import BaseDirectory
config_home = BaseDirectory.xdg_config_home
except ImportError: # Most likely a Linux/Unix system anyway
config_home = os.path.join(get_home_dir(), ".config")
config_dir = os.path.join(config_home, app_name)
return config_dir
def get_home_dir() -> str:
"""Returns the home directory of the current user."""
return os.path.expanduser("~")

View File

@@ -23,6 +23,7 @@ dependencies = [
"aiohttp>=3.9.1",
"pyperclip>=1.9.0",
"websockets>=12.0",
"pywinctl>=0.4.1",
# OS-specific runtime deps
"pyobjc-framework-Cocoa>=10.1; sys_platform == 'darwin'",
"pyobjc-framework-Quartz>=10.1; sys_platform == 'darwin'",

View File

@@ -436,6 +436,189 @@ class BaseComputerInterface(ABC):
"""
pass
# Desktop actions
@abstractmethod
async def get_desktop_environment(self) -> str:
"""Get the current desktop environment.
Returns:
The name of the current desktop environment.
"""
pass
@abstractmethod
async def set_wallpaper(self, path: str) -> None:
"""Set the desktop wallpaper to the specified path.
Args:
path: The file path to set as wallpaper
"""
pass
# Window management
@abstractmethod
async def open(self, target: str) -> None:
"""Open a target using the system's default handler.
Typically opens files, folders, or URLs with the associated application.
Args:
target: The file path, folder path, or URL to open.
"""
pass
@abstractmethod
async def launch(self, app: str, args: List[str] | None = None) -> Optional[int]:
"""Launch an application with optional arguments.
Args:
app: The application executable or bundle identifier.
args: Optional list of arguments to pass to the application.
Returns:
Optional process ID (PID) of the launched application if available, otherwise None.
"""
pass
@abstractmethod
async def get_current_window_id(self) -> int | str:
"""Get the identifier of the currently active/focused window.
Returns:
A window identifier that can be used with other window management methods.
"""
pass
@abstractmethod
async def get_application_windows(self, app: str) -> List[int | str]:
"""Get all window identifiers for a specific application.
Args:
app: The application name, executable, or identifier to query.
Returns:
A list of window identifiers belonging to the specified application.
"""
pass
@abstractmethod
async def get_window_name(self, window_id: int | str) -> str:
"""Get the title/name of a window.
Args:
window_id: The window identifier.
Returns:
The window's title or name string.
"""
pass
@abstractmethod
async def get_window_size(self, window_id: int | str) -> tuple[int, int]:
"""Get the size of a window in pixels.
Args:
window_id: The window identifier.
Returns:
A tuple of (width, height) representing the window size in pixels.
"""
pass
@abstractmethod
async def get_window_position(self, window_id: int | str) -> tuple[int, int]:
"""Get the screen position of a window.
Args:
window_id: The window identifier.
Returns:
A tuple of (x, y) representing the window's top-left corner in screen coordinates.
"""
pass
@abstractmethod
async def set_window_size(self, window_id: int | str, width: int, height: int) -> None:
"""Set the size of a window in pixels.
Args:
window_id: The window identifier.
width: Desired width in pixels.
height: Desired height in pixels.
"""
pass
@abstractmethod
async def set_window_position(self, window_id: int | str, x: int, y: int) -> None:
"""Move a window to a specific position on the screen.
Args:
window_id: The window identifier.
x: X coordinate for the window's top-left corner.
y: Y coordinate for the window's top-left corner.
"""
pass
@abstractmethod
async def maximize_window(self, window_id: int | str) -> None:
"""Maximize a window.
Args:
window_id: The window identifier.
"""
pass
@abstractmethod
async def minimize_window(self, window_id: int | str) -> None:
"""Minimize a window.
Args:
window_id: The window identifier.
"""
pass
@abstractmethod
async def activate_window(self, window_id: int | str) -> None:
"""Bring a window to the foreground and focus it.
Args:
window_id: The window identifier.
"""
pass
@abstractmethod
async def close_window(self, window_id: int | str) -> None:
"""Close a window.
Args:
window_id: The window identifier.
"""
pass
# Convenience aliases
async def get_window_title(self, window_id: int | str) -> str:
"""Convenience alias for get_window_name().
Args:
window_id: The window identifier.
Returns:
The window's title or name string.
"""
return await self.get_window_name(window_id)
async def window_size(self, window_id: int | str) -> tuple[int, int]:
"""Convenience alias for get_window_size().
Args:
window_id: The window identifier.
Returns:
A tuple of (width, height) representing the window size in pixels.
"""
return await self.get_window_size(window_id)
# Shell actions
@abstractmethod
async def run_command(self, command: str) -> CommandResult:
"""Run shell command and return structured result.

View File

@@ -487,6 +487,104 @@ class GenericComputerInterface(BaseComputerInterface):
raise RuntimeError(result.get("error", "Failed to list directory"))
return result.get("files", [])
# Desktop actions
async def get_desktop_environment(self) -> str:
result = await self._send_command("get_desktop_environment")
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to get desktop environment"))
return result.get("environment", "unknown")
async def set_wallpaper(self, path: str) -> None:
result = await self._send_command("set_wallpaper", {"path": path})
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to set wallpaper"))
# Window management
async def open(self, target: str) -> None:
result = await self._send_command("open", {"target": target})
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to open target"))
async def launch(self, app: str, args: list[str] | None = None) -> int | None:
payload: dict[str, object] = {"app": app}
if args is not None:
payload["args"] = args
result = await self._send_command("launch", payload)
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to launch application"))
return result.get("pid") # type: ignore[return-value]
async def get_current_window_id(self) -> int | str:
result = await self._send_command("get_current_window_id")
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to get current window id"))
return result["window_id"] # type: ignore[return-value]
async def get_application_windows(self, app: str) -> list[int | str]:
result = await self._send_command("get_application_windows", {"app": app})
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to get application windows"))
return list(result.get("windows", [])) # type: ignore[return-value]
async def get_window_name(self, window_id: int | str) -> str:
result = await self._send_command("get_window_name", {"window_id": window_id})
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to get window name"))
return result.get("name", "") # type: ignore[return-value]
async def get_window_size(self, window_id: int | str) -> tuple[int, int]:
result = await self._send_command("get_window_size", {"window_id": window_id})
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to get window size"))
return int(result.get("width", 0)), int(result.get("height", 0))
async def get_window_position(self, window_id: int | str) -> tuple[int, int]:
result = await self._send_command("get_window_position", {"window_id": window_id})
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to get window position"))
return int(result.get("x", 0)), int(result.get("y", 0))
async def set_window_size(self, window_id: int | str, width: int, height: int) -> None:
result = await self._send_command(
"set_window_size", {"window_id": window_id, "width": width, "height": height}
)
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to set window size"))
async def set_window_position(self, window_id: int | str, x: int, y: int) -> None:
result = await self._send_command(
"set_window_position", {"window_id": window_id, "x": x, "y": y}
)
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to set window position"))
async def maximize_window(self, window_id: int | str) -> None:
result = await self._send_command("maximize_window", {"window_id": window_id})
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to maximize window"))
async def minimize_window(self, window_id: int | str) -> None:
result = await self._send_command("minimize_window", {"window_id": window_id})
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to minimize window"))
async def activate_window(self, window_id: int | str) -> None:
result = await self._send_command("activate_window", {"window_id": window_id})
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to activate window"))
async def close_window(self, window_id: int | str) -> None:
result = await self._send_command("close_window", {"window_id": window_id})
if not result.get("success", False):
raise RuntimeError(result.get("error", "Failed to close window"))
# Convenience aliases
async def get_window_title(self, window_id: int | str) -> str:
return await self.get_window_name(window_id)
async def window_size(self, window_id: int | str) -> tuple[int, int]:
return await self.get_window_size(window_id)
# Command execution
async def run_command(self, command: str) -> CommandResult:
result = await self._send_command("run_command", {"command": command})

View File

@@ -314,6 +314,25 @@ export abstract class BaseComputerInterface {
abstract getScreenSize(): Promise<ScreenSize>;
abstract getCursorPosition(): Promise<CursorPosition>;
// Window Management
abstract open(target: string): Promise<void>;
abstract launch(app: string, args?: string[]): Promise<number | undefined>;
abstract getCurrentWindowId(): Promise<number | string>;
abstract getApplicationWindows(app: string): Promise<Array<number | string>>;
abstract getWindowName(windowId: number | string): Promise<string>;
abstract getWindowSize(windowId: number | string): Promise<[number, number]>;
abstract getWindowPosition(windowId: number | string): Promise<[number, number]>;
abstract setWindowSize(windowId: number | string, width: number, height: number): Promise<void>;
abstract setWindowPosition(windowId: number | string, x: number, y: number): Promise<void>;
abstract maximizeWindow(windowId: number | string): Promise<void>;
abstract minimizeWindow(windowId: number | string): Promise<void>;
abstract activateWindow(windowId: number | string): Promise<void>;
abstract closeWindow(windowId: number | string): Promise<void>;
// Desktop Actions
abstract getDesktopEnvironment(): Promise<string>;
abstract setWallpaper(path: string): Promise<void>;
// Clipboard Actions
abstract copyToClipboard(): Promise<string>;
abstract setClipboard(text: string): Promise<void>;

View File

@@ -3,8 +3,8 @@
*/
import type { ScreenSize } from '../types';
import { BaseComputerInterface } from './base';
import type { AccessibilityNode, CursorPosition, MouseButton } from './base';
import { BaseComputerInterface } from './base';
export class MacOSComputerInterface extends BaseComputerInterface {
// Mouse Actions
@@ -212,6 +212,148 @@ export class MacOSComputerInterface extends BaseComputerInterface {
return response.position as CursorPosition;
}
// Window Management
/** Open a file path or URL with the default handler. */
async open(target: string): Promise<void> {
const response = await this.sendCommand('open', { target });
if (!response.success) {
throw new Error((response.error as string) || 'Failed to open target');
}
}
/** Launch an application (string may include args). Returns pid if available. */
async launch(app: string, args?: string[]): Promise<number | undefined> {
const response = await this.sendCommand('launch', args ? { app, args } : { app });
if (!response.success) {
throw new Error((response.error as string) || 'Failed to launch application');
}
return (response.pid as number) || undefined;
}
/** Get the current active window id. */
async getCurrentWindowId(): Promise<number | string> {
const response = await this.sendCommand('get_current_window_id');
if (!response.success || response.window_id === undefined) {
throw new Error((response.error as string) || 'Failed to get current window id');
}
return response.window_id as number | string;
}
/** Get windows belonging to an application (by name). */
async getApplicationWindows(app: string): Promise<Array<number | string>> {
const response = await this.sendCommand('get_application_windows', { app });
if (!response.success) {
throw new Error((response.error as string) || 'Failed to get application windows');
}
return (response.windows as Array<number | string>) || [];
}
/** Get window title/name by id. */
async getWindowName(windowId: number | string): Promise<string> {
const response = await this.sendCommand('get_window_name', { window_id: windowId });
if (!response.success) {
throw new Error((response.error as string) || 'Failed to get window name');
}
return (response.name as string) || '';
}
/** Get window size as [width, height]. */
async getWindowSize(windowId: number | string): Promise<[number, number]> {
const response = await this.sendCommand('get_window_size', { window_id: windowId });
if (!response.success) {
throw new Error((response.error as string) || 'Failed to get window size');
}
return [Number(response.width) || 0, Number(response.height) || 0];
}
/** Get window position as [x, y]. */
async getWindowPosition(windowId: number | string): Promise<[number, number]> {
const response = await this.sendCommand('get_window_position', { window_id: windowId });
if (!response.success) {
throw new Error((response.error as string) || 'Failed to get window position');
}
return [Number(response.x) || 0, Number(response.y) || 0];
}
/** Set window size. */
async setWindowSize(windowId: number | string, width: number, height: number): Promise<void> {
const response = await this.sendCommand('set_window_size', {
window_id: windowId,
width,
height,
});
if (!response.success) {
throw new Error((response.error as string) || 'Failed to set window size');
}
}
/** Set window position. */
async setWindowPosition(windowId: number | string, x: number, y: number): Promise<void> {
const response = await this.sendCommand('set_window_position', {
window_id: windowId,
x,
y,
});
if (!response.success) {
throw new Error((response.error as string) || 'Failed to set window position');
}
}
/** Maximize a window. */
async maximizeWindow(windowId: number | string): Promise<void> {
const response = await this.sendCommand('maximize_window', { window_id: windowId });
if (!response.success) {
throw new Error((response.error as string) || 'Failed to maximize window');
}
}
/** Minimize a window. */
async minimizeWindow(windowId: number | string): Promise<void> {
const response = await this.sendCommand('minimize_window', { window_id: windowId });
if (!response.success) {
throw new Error((response.error as string) || 'Failed to minimize window');
}
}
/** Activate a window by id. */
async activateWindow(windowId: number | string): Promise<void> {
const response = await this.sendCommand('activate_window', { window_id: windowId });
if (!response.success) {
throw new Error((response.error as string) || 'Failed to activate window');
}
}
/** Close a window by id. */
async closeWindow(windowId: number | string): Promise<void> {
const response = await this.sendCommand('close_window', { window_id: windowId });
if (!response.success) {
throw new Error((response.error as string) || 'Failed to close window');
}
}
// Desktop Actions
/**
* Get the current desktop environment string (e.g., 'xfce4', 'gnome', 'kde', 'mac', 'windows').
*/
async getDesktopEnvironment(): Promise<string> {
const response = await this.sendCommand('get_desktop_environment');
if (!response.success) {
throw new Error((response.error as string) || 'Failed to get desktop environment');
}
return (response.environment as string) || 'unknown';
}
/**
* Set the desktop wallpaper image.
* @param path Absolute path to the image file on the VM
*/
async setWallpaper(path: string): Promise<void> {
const response = await this.sendCommand('set_wallpaper', { path });
if (!response.success) {
throw new Error((response.error as string) || 'Failed to set wallpaper');
}
}
// Clipboard Actions
/**
* Copy current selection to clipboard and return the content.

2447
uv.lock generated

File diff suppressed because it is too large Load Diff