mirror of
https://github.com/trycua/computer.git
synced 2026-05-01 04:30:50 -05:00
Merge branch 'main' into feat/browser-tool
This commit is contained in:
@@ -8,6 +8,7 @@ from . import (
|
||||
composed_grounded,
|
||||
gelato,
|
||||
gemini,
|
||||
generic_vlm,
|
||||
glm45v,
|
||||
gta1,
|
||||
holo,
|
||||
@@ -16,7 +17,6 @@ from . import (
|
||||
omniparser,
|
||||
openai,
|
||||
opencua,
|
||||
generic_vlm,
|
||||
uiins,
|
||||
uitars,
|
||||
uitars2,
|
||||
@@ -24,19 +24,19 @@ from . import (
|
||||
|
||||
__all__ = [
|
||||
"anthropic",
|
||||
"openai",
|
||||
"uitars",
|
||||
"omniparser",
|
||||
"gta1",
|
||||
"composed_grounded",
|
||||
"glm45v",
|
||||
"opencua",
|
||||
"internvl",
|
||||
"holo",
|
||||
"moondream3",
|
||||
"gelato",
|
||||
"gemini",
|
||||
"generic_vlm",
|
||||
"glm45v",
|
||||
"gta1",
|
||||
"holo",
|
||||
"internvl",
|
||||
"moondream3",
|
||||
"omniparser",
|
||||
"openai",
|
||||
"opencua",
|
||||
"uiins",
|
||||
"gelato",
|
||||
"uitars",
|
||||
"uitars2",
|
||||
]
|
||||
|
||||
@@ -442,7 +442,7 @@ def get_all_element_descriptions(responses_items: List[Dict[str, Any]]) -> List[
|
||||
|
||||
# Conversion functions between responses_items and completion messages formats
|
||||
def convert_responses_items_to_completion_messages(
|
||||
messages: List[Dict[str, Any]],
|
||||
messages: List[Dict[str, Any]],
|
||||
allow_images_in_tool_results: bool = True,
|
||||
send_multiple_user_images_per_parallel_tool_results: bool = False,
|
||||
) -> List[Dict[str, Any]]:
|
||||
@@ -573,25 +573,33 @@ def convert_responses_items_to_completion_messages(
|
||||
"computer_call_output",
|
||||
]
|
||||
# Send tool message + separate user message with image (OpenAI compatible)
|
||||
completion_messages += [
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": call_id,
|
||||
"content": "[Execution completed. See screenshot below]",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "image_url", "image_url": {"url": output.get("image_url")}}
|
||||
],
|
||||
},
|
||||
] if send_multiple_user_images_per_parallel_tool_results or (not is_next_message_image_result) else [
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": call_id,
|
||||
"content": "[Execution completed. See screenshot below]",
|
||||
},
|
||||
]
|
||||
completion_messages += (
|
||||
[
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": call_id,
|
||||
"content": "[Execution completed. See screenshot below]",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": output.get("image_url")},
|
||||
}
|
||||
],
|
||||
},
|
||||
]
|
||||
if send_multiple_user_images_per_parallel_tool_results
|
||||
or (not is_next_message_image_result)
|
||||
else [
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": call_id,
|
||||
"content": "[Execution completed. See screenshot below]",
|
||||
},
|
||||
]
|
||||
)
|
||||
else:
|
||||
# Handle text output as tool response
|
||||
completion_messages.append(
|
||||
|
||||
@@ -24,7 +24,7 @@ dependencies = [
|
||||
"certifi>=2024.2.2",
|
||||
"litellm>=1.74.12"
|
||||
]
|
||||
requires-python = ">=3.12"
|
||||
requires-python = ">=3.12,<3.14"
|
||||
|
||||
[project.optional-dependencies]
|
||||
openai = []
|
||||
|
||||
@@ -12,7 +12,7 @@ authors = [
|
||||
]
|
||||
readme = "README.md"
|
||||
license = { text = "MIT" }
|
||||
requires-python = ">=3.12"
|
||||
requires-python = ">=3.12,<3.14"
|
||||
dependencies = [
|
||||
"fastapi>=0.111.0",
|
||||
"uvicorn[standard]>=0.27.0",
|
||||
|
||||
@@ -45,7 +45,9 @@ class CloudProvider(BaseVMProvider):
|
||||
# Fall back to environment variable if api_key not provided
|
||||
if api_key is None:
|
||||
api_key = os.getenv("CUA_API_KEY")
|
||||
assert api_key, "api_key required for CloudProvider (provide via parameter or CUA_API_KEY environment variable)"
|
||||
assert (
|
||||
api_key
|
||||
), "api_key required for CloudProvider (provide via parameter or CUA_API_KEY environment variable)"
|
||||
self.api_key = api_key
|
||||
self.verbose = verbose
|
||||
self.api_base = (api_base or DEFAULT_API_BASE).rstrip("/")
|
||||
|
||||
@@ -19,7 +19,7 @@ dependencies = [
|
||||
"pydantic>=2.11.1",
|
||||
"mslex>=1.3.0",
|
||||
]
|
||||
requires-python = ">=3.12"
|
||||
requires-python = ">=3.12,<3.14"
|
||||
|
||||
[project.optional-dependencies]
|
||||
lume = [
|
||||
|
||||
@@ -15,7 +15,7 @@ dependencies = [
|
||||
"httpx>=0.24.0",
|
||||
"posthog>=3.20.0"
|
||||
]
|
||||
requires-python = ">=3.12"
|
||||
requires-python = ">=3.12,<3.14"
|
||||
|
||||
[tool.pdm]
|
||||
distribution = true
|
||||
|
||||
@@ -6,7 +6,7 @@ build-backend = "pdm.backend"
|
||||
name = "cua-mcp-server"
|
||||
description = "MCP Server for Computer-Use Agent (CUA)"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.12"
|
||||
requires-python = ">=3.12,<3.14"
|
||||
version = "0.1.15"
|
||||
authors = [
|
||||
{name = "TryCua", email = "gh@trycua.com"}
|
||||
|
||||
@@ -24,7 +24,7 @@ dependencies = [
|
||||
"typing-extensions>=4.9.0",
|
||||
"pydantic>=2.6.3"
|
||||
]
|
||||
requires-python = ">=3.12"
|
||||
requires-python = ">=3.12,<3.14"
|
||||
readme = "README.md"
|
||||
license = {text = "AGPL-3.0-or-later"}
|
||||
keywords = ["computer-vision", "ocr", "ui-analysis", "icon-detection"]
|
||||
|
||||
Reference in New Issue
Block a user