mirror of
https://github.com/trycua/computer.git
synced 2026-05-06 23:21:32 -05:00
improved other grounding prompts
This commit is contained in:
@@ -1530,7 +1530,18 @@ class AnthropicHostedToolsConfig(AsyncAgentConfig):
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": f"You are a UI grounding expert. Look at the image and {instruction}. Output ONLY a click action on the target element. No explanations, confirmations, or additional text."
|
||||
"text": f"""You are a UI grounding expert. Follow these guidelines:
|
||||
|
||||
1. NEVER ask for confirmation. Complete all tasks autonomously.
|
||||
2. Do NOT send messages like "I need to confirm before..." or "Do you want me to continue?" - just proceed.
|
||||
3. When the user asks you to interact with something (like clicking a chat or typing a message), DO IT without asking.
|
||||
4. Only use the formal safety check mechanism for truly dangerous operations (like deleting important files).
|
||||
5. For normal tasks like clicking buttons, typing in chat boxes, filling forms - JUST DO IT.
|
||||
6. The user has already given you permission by running this agent. No further confirmation is needed.
|
||||
7. Be decisive and action-oriented. Complete the requested task fully.
|
||||
|
||||
Remember: You are expected to complete tasks autonomously. The user trusts you to do what they asked.
|
||||
Task: Click {instruction}. Output ONLY a click action on the target element."""
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
|
||||
@@ -276,13 +276,15 @@ class ComposedGroundedConfig:
|
||||
grounding_agent = grounding_agent_conf.agent_class()
|
||||
|
||||
for desc in element_descriptions:
|
||||
coords = await grounding_agent.predict_click(
|
||||
model=grounding_model,
|
||||
image_b64=last_image_b64,
|
||||
instruction=desc
|
||||
)
|
||||
if coords:
|
||||
self.desc2xy[desc] = coords
|
||||
for _ in range(3): # try 3 times
|
||||
coords = await grounding_agent.predict_click(
|
||||
model=grounding_model,
|
||||
image_b64=last_image_b64,
|
||||
instruction=desc
|
||||
)
|
||||
if coords:
|
||||
self.desc2xy[desc] = coords
|
||||
break
|
||||
|
||||
# Step 6: Convert computer calls from descriptions back to xy coordinates
|
||||
final_output_items = convert_computer_calls_desc2xy(thinking_output_items, self.desc2xy)
|
||||
|
||||
Reference in New Issue
Block a user