diff --git a/libs/python/agent/agent/loops/anthropic.py b/libs/python/agent/agent/loops/anthropic.py
index 50fbd24e..306c5f33 100644
--- a/libs/python/agent/agent/loops/anthropic.py
+++ b/libs/python/agent/agent/loops/anthropic.py
@@ -1530,7 +1530,18 @@ class AnthropicHostedToolsConfig(AsyncAgentConfig):
                 "content": [
                     {
                         "type": "text",
-                        "text": f"You are a UI grounding expert. Look at the image and {instruction}. Output ONLY a click action on the target element. No explanations, confirmations, or additional text."
+                        "text": f"""You are a UI grounding expert. Follow these guidelines:
+
+1. NEVER ask for confirmation. Complete all tasks autonomously.
+2. Do NOT send messages like "I need to confirm before..." or "Do you want me to continue?" - just proceed.
+3. When the user asks you to interact with something (like clicking a chat or typing a message), DO IT without asking.
+4. Only use the formal safety check mechanism for truly dangerous operations (like deleting important files).
+5. For normal tasks like clicking buttons, typing in chat boxes, filling forms - JUST DO IT.
+6. The user has already given you permission by running this agent. No further confirmation is needed.
+7. Be decisive and action-oriented. Complete the requested task fully.
+
+Remember: You are expected to complete tasks autonomously. The user trusts you to do what they asked.
+Task: Click {instruction}. Output ONLY a click action on the target element."""
                     },
                     {
                         "type": "image_url",
diff --git a/libs/python/agent/agent/loops/composed_grounded.py b/libs/python/agent/agent/loops/composed_grounded.py
index a5de9895..770f274d 100644
--- a/libs/python/agent/agent/loops/composed_grounded.py
+++ b/libs/python/agent/agent/loops/composed_grounded.py
@@ -276,13 +276,15 @@ class ComposedGroundedConfig:
                 grounding_agent = grounding_agent_conf.agent_class()
                 
                 for desc in element_descriptions:
-                    coords = await grounding_agent.predict_click(
-                        model=grounding_model,
-                        image_b64=last_image_b64,
-                        instruction=desc
-                    )
-                    if coords:
-                        self.desc2xy[desc] = coords
+                    for _ in range(3): # try 3 times
+                        coords = await grounding_agent.predict_click(
+                            model=grounding_model,
+                            image_b64=last_image_b64,
+                            instruction=desc
+                        )
+                        if coords:
+                            self.desc2xy[desc] = coords
+                            break
         
         # Step 6: Convert computer calls from descriptions back to xy coordinates
         final_output_items = convert_computer_calls_desc2xy(thinking_output_items, self.desc2xy)