From 7d267701a4bfcd3f09be3e75d861c2cc86c2241e Mon Sep 17 00:00:00 2001
From: Dillon DuPont <ddupont@mit.edu>
Date: Sat, 14 Jun 2025 10:05:12 -0400
Subject: [PATCH 1/6] Removed pre/post action screenshots in tools. Improved
 image retention (#289)

---
 libs/agent/agent/core/messages.py             |  47 ++++-
 .../providers/anthropic/tools/computer.py     | 174 +-----------------
 .../agent/providers/openai/tools/computer.py  |  50 +----
 3 files changed, 57 insertions(+), 214 deletions(-)

diff --git a/libs/agent/agent/core/messages.py b/libs/agent/agent/core/messages.py
index 2a582a7a..d2c70558 100644
--- a/libs/agent/agent/core/messages.py
+++ b/libs/agent/agent/core/messages.py
@@ -81,16 +81,27 @@ class StandardMessageManager:
         if not self.config.num_images_to_keep:
             return messages
 
-        # Find user messages with images
+        # Find messages with images (both user messages and tool call outputs)
         image_messages = []
         for msg in messages:
+            has_image = False
+            
+            # Check user messages with images
             if msg["role"] == "user" and isinstance(msg["content"], list):
                 has_image = any(
                     item.get("type") == "image_url" or item.get("type") == "image"
                     for item in msg["content"]
                 )
-                if has_image:
-                    image_messages.append(msg)
+            
+            # Check assistant messages with tool calls that have images
+            elif msg["role"] == "assistant" and isinstance(msg["content"], list):
+                for item in msg["content"]:
+                    if item.get("type") == "tool_result" and "base64_image" in item:
+                        has_image = True
+                        break
+            
+            if has_image:
+                image_messages.append(msg)
 
         # If we don't have more images than the limit, return all messages
         if len(image_messages) <= self.config.num_images_to_keep:
@@ -100,13 +111,35 @@ class StandardMessageManager:
         images_to_keep = image_messages[-self.config.num_images_to_keep :]
         images_to_remove = image_messages[: -self.config.num_images_to_keep]
 
-        # Create a new message list without the older images
+        # Create a new message list, removing images from older messages
         result = []
         for msg in messages:
             if msg in images_to_remove:
-                # Skip this message
-                continue
-            result.append(msg)
+                # Remove images from this message but keep the text content
+                if msg["role"] == "user" and isinstance(msg["content"], list):
+                    # Keep only text content, remove images
+                    new_content = [
+                        item for item in msg["content"] 
+                        if item.get("type") not in ["image_url", "image"]
+                    ]
+                    if new_content:  # Only add if there's still content
+                        result.append({"role": msg["role"], "content": new_content})
+                elif msg["role"] == "assistant" and isinstance(msg["content"], list):
+                    # Remove base64_image from tool_result items
+                    new_content = []
+                    for item in msg["content"]:
+                        if item.get("type") == "tool_result" and "base64_image" in item:
+                            # Create a copy without the base64_image
+                            new_item = {k: v for k, v in item.items() if k != "base64_image"}
+                            new_content.append(new_item)
+                        else:
+                            new_content.append(item)
+                    result.append({"role": msg["role"], "content": new_content})
+                else:
+                    # For other message types, keep as is
+                    result.append(msg)
+            else:
+                result.append(msg)
 
         return result
 
diff --git a/libs/agent/agent/providers/anthropic/tools/computer.py b/libs/agent/agent/providers/anthropic/tools/computer.py
index 2bb944ea..dd1dc281 100644
--- a/libs/agent/agent/providers/anthropic/tools/computer.py
+++ b/libs/agent/agent/providers/anthropic/tools/computer.py
@@ -205,26 +205,6 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
                 self.logger.info(f"  Coordinates: ({x}, {y})")
 
                 try:
-                    # Take pre-action screenshot to get current dimensions
-                    pre_screenshot = await self.computer.interface.screenshot()
-                    pre_img = Image.open(io.BytesIO(pre_screenshot))
-
-                    # Scale image to match screen dimensions if needed
-                    if pre_img.size != (self.width, self.height):
-                        self.logger.info(
-                            f"Scaling image from {pre_img.size} to {self.width}x{self.height} to match screen dimensions"
-                        )
-                        if not isinstance(self.width, int) or not isinstance(self.height, int):
-                            raise ToolError("Screen dimensions must be integers")
-                        size = (int(self.width), int(self.height))
-                        pre_img = pre_img.resize(size, Image.Resampling.LANCZOS)
-                        # Save the scaled image back to bytes
-                        buffer = io.BytesIO()
-                        pre_img.save(buffer, format="PNG")
-                        pre_screenshot = buffer.getvalue()
-
-                    self.logger.info(f"  Current dimensions: {pre_img.width}x{pre_img.height}")
-
                     # Perform the click action
                     if action == "left_click":
                         self.logger.info(f"Clicking at ({x}, {y})")
@@ -242,45 +222,14 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
                     # Wait briefly for any UI changes
                     await asyncio.sleep(0.5)
 
-                    # Take and save post-action screenshot
-                    post_screenshot = await self.computer.interface.screenshot()
-                    post_img = Image.open(io.BytesIO(post_screenshot))
-
-                    # Scale post-action image if needed
-                    if post_img.size != (self.width, self.height):
-                        self.logger.info(
-                            f"Scaling post-action image from {post_img.size} to {self.width}x{self.height}"
-                        )
-                        post_img = post_img.resize(
-                            (self.width, self.height), Image.Resampling.LANCZOS
-                        )
-                        buffer = io.BytesIO()
-                        post_img.save(buffer, format="PNG")
-                        post_screenshot = buffer.getvalue()
-
                     return ToolResult(
                         output=f"Performed {action} at ({x}, {y})",
-                        base64_image=base64.b64encode(post_screenshot).decode(),
                     )
                 except Exception as e:
                     self.logger.error(f"Error during {action} action: {str(e)}")
                     raise ToolError(f"Failed to perform {action}: {str(e)}")
             else:
                 try:
-                    # Take pre-action screenshot
-                    pre_screenshot = await self.computer.interface.screenshot()
-                    pre_img = Image.open(io.BytesIO(pre_screenshot))
-
-                    # Scale image if needed
-                    if pre_img.size != (self.width, self.height):
-                        self.logger.info(
-                            f"Scaling image from {pre_img.size} to {self.width}x{self.height}"
-                        )
-                        if not isinstance(self.width, int) or not isinstance(self.height, int):
-                            raise ToolError("Screen dimensions must be integers")
-                        size = (int(self.width), int(self.height))
-                        pre_img = pre_img.resize(size, Image.Resampling.LANCZOS)
-
                     # Perform the click action
                     if action == "left_click":
                         self.logger.info("Performing left click at current position")
@@ -295,25 +244,8 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
                     # Wait briefly for any UI changes
                     await asyncio.sleep(0.5)
 
-                    # Take post-action screenshot
-                    post_screenshot = await self.computer.interface.screenshot()
-                    post_img = Image.open(io.BytesIO(post_screenshot))
-
-                    # Scale post-action image if needed
-                    if post_img.size != (self.width, self.height):
-                        self.logger.info(
-                            f"Scaling post-action image from {post_img.size} to {self.width}x{self.height}"
-                        )
-                        post_img = post_img.resize(
-                            (self.width, self.height), Image.Resampling.LANCZOS
-                        )
-                        buffer = io.BytesIO()
-                        post_img.save(buffer, format="PNG")
-                        post_screenshot = buffer.getvalue()
-
                     return ToolResult(
                         output=f"Performed {action} at current position",
-                        base64_image=base64.b64encode(post_screenshot).decode(),
                     )
                 except Exception as e:
                     self.logger.error(f"Error during {action} action: {str(e)}")
@@ -328,20 +260,6 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
                 raise ToolError(f"{text} must be a string")
 
             try:
-                # Take pre-action screenshot
-                pre_screenshot = await self.computer.interface.screenshot()
-                pre_img = Image.open(io.BytesIO(pre_screenshot))
-
-                # Scale image if needed
-                if pre_img.size != (self.width, self.height):
-                    self.logger.info(
-                        f"Scaling image from {pre_img.size} to {self.width}x{self.height}"
-                    )
-                    if not isinstance(self.width, int) or not isinstance(self.height, int):
-                        raise ToolError("Screen dimensions must be integers")
-                    size = (int(self.width), int(self.height))
-                    pre_img = pre_img.resize(size, Image.Resampling.LANCZOS)
-
                 if action == "key":
                     # Special handling for page up/down on macOS
                     if text.lower() in ["pagedown", "page_down", "page down"]:
@@ -378,25 +296,8 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
                     # Wait briefly for UI changes
                     await asyncio.sleep(0.5)
 
-                    # Take post-action screenshot
-                    post_screenshot = await self.computer.interface.screenshot()
-                    post_img = Image.open(io.BytesIO(post_screenshot))
-
-                    # Scale post-action image if needed
-                    if post_img.size != (self.width, self.height):
-                        self.logger.info(
-                            f"Scaling post-action image from {post_img.size} to {self.width}x{self.height}"
-                        )
-                        post_img = post_img.resize(
-                            (self.width, self.height), Image.Resampling.LANCZOS
-                        )
-                        buffer = io.BytesIO()
-                        post_img.save(buffer, format="PNG")
-                        post_screenshot = buffer.getvalue()
-
                     return ToolResult(
                         output=f"Pressed key: {output_text}",
-                        base64_image=base64.b64encode(post_screenshot).decode(),
                     )
 
                 elif action == "type":
@@ -406,66 +307,13 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
                     # Wait briefly for UI changes
                     await asyncio.sleep(0.5)
 
-                    # Take post-action screenshot
-                    post_screenshot = await self.computer.interface.screenshot()
-                    post_img = Image.open(io.BytesIO(post_screenshot))
-
-                    # Scale post-action image if needed
-                    if post_img.size != (self.width, self.height):
-                        self.logger.info(
-                            f"Scaling post-action image from {post_img.size} to {self.width}x{self.height}"
-                        )
-                        post_img = post_img.resize(
-                            (self.width, self.height), Image.Resampling.LANCZOS
-                        )
-                        buffer = io.BytesIO()
-                        post_img.save(buffer, format="PNG")
-                        post_screenshot = buffer.getvalue()
-
                     return ToolResult(
                         output=f"Typed text: {text}",
-                        base64_image=base64.b64encode(post_screenshot).decode(),
                     )
             except Exception as e:
                 self.logger.error(f"Error during {action} action: {str(e)}")
                 raise ToolError(f"Failed to perform {action}: {str(e)}")
 
-        elif action in ("screenshot", "cursor_position"):
-            if text is not None:
-                raise ToolError(f"text is not accepted for {action}")
-            if coordinate is not None:
-                raise ToolError(f"coordinate is not accepted for {action}")
-
-            try:
-                if action == "screenshot":
-                    # Take screenshot
-                    screenshot = await self.computer.interface.screenshot()
-                    img = Image.open(io.BytesIO(screenshot))
-
-                    # Scale image if needed
-                    if img.size != (self.width, self.height):
-                        self.logger.info(
-                            f"Scaling image from {img.size} to {self.width}x{self.height}"
-                        )
-                        if not isinstance(self.width, int) or not isinstance(self.height, int):
-                            raise ToolError("Screen dimensions must be integers")
-                        size = (int(self.width), int(self.height))
-                        img = img.resize(size, Image.Resampling.LANCZOS)
-                        buffer = io.BytesIO()
-                        img.save(buffer, format="PNG")
-                        screenshot = buffer.getvalue()
-
-                    return ToolResult(base64_image=base64.b64encode(screenshot).decode())
-
-                elif action == "cursor_position":
-                    pos = await self.computer.interface.get_cursor_position()
-                    x, y = pos  # Unpack the tuple
-                    return ToolResult(output=f"X={int(x)},Y={int(y)}")
-
-            except Exception as e:
-                self.logger.error(f"Error during {action} action: {str(e)}")
-                raise ToolError(f"Failed to perform {action}: {str(e)}")
-
         elif action == "scroll":
             # Implement scroll action
             direction = kwargs.get("direction", "down")
@@ -487,28 +335,20 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
                 # Wait briefly for UI changes
                 await asyncio.sleep(0.5)
 
-                # Take post-action screenshot
-                post_screenshot = await self.computer.interface.screenshot()
-                post_img = Image.open(io.BytesIO(post_screenshot))
-
-                # Scale post-action image if needed
-                if post_img.size != (self.width, self.height):
-                    self.logger.info(
-                        f"Scaling post-action image from {post_img.size} to {self.width}x{self.height}"
-                    )
-                    post_img = post_img.resize((self.width, self.height), Image.Resampling.LANCZOS)
-                    buffer = io.BytesIO()
-                    post_img.save(buffer, format="PNG")
-                    post_screenshot = buffer.getvalue()
-
                 return ToolResult(
                     output=f"Scrolled {direction} by {amount} steps",
-                    base64_image=base64.b64encode(post_screenshot).decode(),
                 )
             except Exception as e:
                 self.logger.error(f"Error during scroll action: {str(e)}")
                 raise ToolError(f"Failed to perform scroll: {str(e)}")
 
+        elif action == "screenshot":
+            # Take screenshot
+            return await self.screenshot()
+        elif action == "cursor_position":
+            pos = await self.computer.interface.get_cursor_position()
+            x, y = pos  # Unpack the tuple
+            return ToolResult(output=f"X={int(x)},Y={int(y)}")
         raise ToolError(f"Invalid action: {action}")
 
     async def screenshot(self):
diff --git a/libs/agent/agent/providers/openai/tools/computer.py b/libs/agent/agent/providers/openai/tools/computer.py
index c5602f4e..5575c792 100644
--- a/libs/agent/agent/providers/openai/tools/computer.py
+++ b/libs/agent/agent/providers/openai/tools/computer.py
@@ -61,9 +61,6 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool):
     computer: Computer  # The CUA Computer instance
     logger = logging.getLogger(__name__)
 
-    _screenshot_delay = 1.0  # macOS is generally faster than X11
-    _scaling_enabled = True
-
     def __init__(self, computer: Computer):
         """Initialize the computer tool.
 
@@ -185,26 +182,23 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool):
             raise ToolError(f"Failed to execute {type}: {str(e)}")
 
     async def handle_click(self, button: str, x: int, y: int) -> ToolResult:
-        """Handle different click actions."""
+        """Handle mouse clicks."""
         try:
-            # Perform requested click action
+            # Perform the click based on button type
             if button == "left":
                 await self.computer.interface.left_click(x, y)
             elif button == "right":
                 await self.computer.interface.right_click(x, y)
             elif button == "double":
                 await self.computer.interface.double_click(x, y)
+            else:
+                raise ToolError(f"Unsupported button type: {button}")
 
-            # Wait for UI to update
-            await asyncio.sleep(0.5)
-
-            # Take screenshot after action
-            screenshot = await self.computer.interface.screenshot()
-            base64_screenshot = base64.b64encode(screenshot).decode("utf-8")
+            # Wait briefly for UI to update
+            await asyncio.sleep(0.3)
 
             return ToolResult(
                 output=f"Performed {button} click at ({x}, {y})",
-                base64_image=base64_screenshot,
             )
         except Exception as e:
             self.logger.error(f"Error in handle_click: {str(e)}")
@@ -218,11 +212,7 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool):
 
             await asyncio.sleep(0.3)
 
-            # Take screenshot after typing
-            screenshot = await self.computer.interface.screenshot()
-            base64_screenshot = base64.b64encode(screenshot).decode("utf-8")
-
-            return ToolResult(output=f"Typed: {text}", base64_image=base64_screenshot)
+            return ToolResult(output=f"Typed: {text}")
         except Exception as e:
             self.logger.error(f"Error in handle_typing: {str(e)}")
             raise ToolError(f"Failed to type '{text}': {str(e)}")
@@ -254,11 +244,7 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool):
             # Wait briefly
             await asyncio.sleep(0.3)
 
-            # Take screenshot after action
-            screenshot = await self.computer.interface.screenshot()
-            base64_screenshot = base64.b64encode(screenshot).decode("utf-8")
-
-            return ToolResult(output=f"Pressed key: {key}", base64_image=base64_screenshot)
+            return ToolResult(output=f"Pressed key: {key}")
         except Exception as e:
             self.logger.error(f"Error in handle_key: {str(e)}")
             raise ToolError(f"Failed to press key '{key}': {str(e)}")
@@ -272,11 +258,7 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool):
             # Wait briefly
             await asyncio.sleep(0.2)
 
-            # Take screenshot after action
-            screenshot = await self.computer.interface.screenshot()
-            base64_screenshot = base64.b64encode(screenshot).decode("utf-8")
-
-            return ToolResult(output=f"Moved cursor to ({x}, {y})", base64_image=base64_screenshot)
+            return ToolResult(output=f"Moved cursor to ({x}, {y})")
         except Exception as e:
             self.logger.error(f"Error in handle_mouse_move: {str(e)}")
             raise ToolError(f"Failed to move cursor to ({x}, {y}): {str(e)}")
@@ -296,14 +278,7 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool):
             # Wait for UI to update
             await asyncio.sleep(0.5)
 
-            # Take screenshot after action
-            screenshot = await self.computer.interface.screenshot()
-            base64_screenshot = base64.b64encode(screenshot).decode("utf-8")
-
-            return ToolResult(
-                output=f"Scrolled at ({x}, {y}) with delta ({scroll_x}, {scroll_y})",
-                base64_image=base64_screenshot,
-            )
+            return ToolResult(output=f"Scrolled at ({x}, {y}) by ({scroll_x}, {scroll_y})")
         except Exception as e:
             self.logger.error(f"Error in handle_scroll: {str(e)}")
             raise ToolError(f"Failed to scroll at ({x}, {y}): {str(e)}")
@@ -331,13 +306,8 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool):
             # Wait for UI to update
             await asyncio.sleep(0.5)
             
-            # Take screenshot after action
-            screenshot = await self.computer.interface.screenshot()
-            base64_screenshot = base64.b64encode(screenshot).decode("utf-8")
-            
             return ToolResult(
                 output=f"Dragged from ({path[0]['x']}, {path[0]['y']}) to ({path[-1]['x']}, {path[-1]['y']})",
-                base64_image=base64_screenshot,
             )
         except Exception as e:
             self.logger.error(f"Error in handle_drag: {str(e)}")

From 22b8a236d632d8edc97331e9a4c7e04e3c425b0b Mon Sep 17 00:00:00 2001
From: Dillon DuPont <ddupont@mit.edu>
Date: Sat, 14 Jun 2025 10:52:05 -0400
Subject: [PATCH 2/6] added cloud provider to computer UI

---
 libs/computer/computer/ui/gradio/app.py | 64 ++++++++++++++++++++++---
 1 file changed, 58 insertions(+), 6 deletions(-)

diff --git a/libs/computer/computer/ui/gradio/app.py b/libs/computer/computer/ui/gradio/app.py
index b1d131d9..8dc77786 100644
--- a/libs/computer/computer/ui/gradio/app.py
+++ b/libs/computer/computer/ui/gradio/app.py
@@ -528,13 +528,15 @@ async def execute(name, action, arguments):
     
     return results
 
-async def handle_init_computer(os_choice: str, app_list=None, provider="lume"):
+async def handle_init_computer(os_choice: str, app_list=None, provider="lume", container_name=None, api_key=None):
     """Initialize the computer instance and tools for macOS or Ubuntu
     
     Args:
         os_choice: The OS to use ("macOS" or "Ubuntu")
         app_list: Optional list of apps to focus on using the app-use experiment
-        provider: The provider to use ("lume" or "self")
+        provider: The provider to use ("lume" or "self" or "cloud")
+        container_name: The container name to use for cloud provider
+        api_key: The API key to use for cloud provider
     """
     global computer, tool_call_logs, tools
     
@@ -559,6 +561,16 @@ async def handle_init_computer(os_choice: str, app_list=None, provider="lume"):
             use_host_computer_server=True,
             experiments=experiments
         )
+    elif provider == "cloud":
+        # Use API key from environment variable or field input
+        cloud_api_key = os.environ.get("CUA_API_KEY") or api_key
+        computer = Computer(
+            os_type=os_type_str,
+            provider_type=VMProviderType.CLOUD,
+            name=container_name,
+            api_key=cloud_api_key,
+            experiments=experiments
+        )
     else:
         computer = Computer(
             image=image_str,
@@ -596,6 +608,10 @@ async def handle_init_computer(os_choice: str, app_list=None, provider="lume"):
         init_params["apps"] = app_list
         init_params["experiments"] = ["app-use"]
     
+    # Add container name to the log if using cloud provider
+    if provider == "cloud":
+        init_params["container_name"] = container_name
+    
     result = await execute("computer", "initialize", init_params)
 
     return result["screenshot"], json.dumps(tool_call_logs, indent=2)
@@ -1073,11 +1089,31 @@ def create_gradio_ui():
                             # Provider selection radio
                             provider_choice = gr.Radio(
                                 label="Provider",
-                                choices=["lume", "self"],
+                                choices=["lume", "self", "cloud"],
                                 value="lume",
-                                info="'lume' uses a VM, 'self' uses the host computer server"
+                                info="'lume' uses a VM, 'self' uses the host computer server, 'cloud' uses a cloud container"
                             )
                         
+                        # Container name field for cloud provider (initially hidden)
+                        container_name = gr.Textbox(
+                            label="Container Name",
+                            placeholder="Enter your container name",
+                            visible=False,
+                            info="Get your container from [trycua.com](https://trycua.com/)"
+                        )
+                        
+                        # Check if CUA_API_KEY is set in environment
+                        has_cua_key = os.environ.get("CUA_API_KEY") is not None
+                        
+                        # API key field for cloud provider (visible only if no env key and cloud selected)
+                        api_key_field = gr.Textbox(
+                            label="CUA API Key",
+                            placeholder="Enter your CUA API key",
+                            type="password",
+                            visible=False,
+                            info="Required for cloud provider. Set CUA_API_KEY environment variable to hide this field."
+                        )
+                        
                         # App filtering dropdown for app-use experiment
                         app_filter = gr.Dropdown(
                             label="Filter by apps (App-Use)",
@@ -1085,6 +1121,22 @@ def create_gradio_ui():
                             allow_custom_value=True,
                             info="When apps are selected, the computer will focus on those apps using the app-use experiment"
                         )
+                        
+                        # Function to show/hide container name and API key fields based on provider selection
+                        def update_cloud_fields_visibility(provider):
+                            show_container = provider == "cloud"
+                            show_api_key = provider == "cloud" and not has_cua_key
+                            return (
+                                gr.update(visible=show_container),
+                                gr.update(visible=show_api_key)
+                            )
+                        
+                        # Connect provider choice to field visibility
+                        provider_choice.change(
+                            update_cloud_fields_visibility,
+                            inputs=provider_choice,
+                            outputs=[container_name, api_key_field]
+                        )
                     
                     start_btn = gr.Button("Initialize Computer")
                 
@@ -1149,7 +1201,7 @@ def create_gradio_ui():
                         value=False
                     )
                     message_submit_btn = gr.Button("Submit Message")
-                    message_status = gr.Textbox(label="Status", value="")
+                    message_status = gr.Textbox(label="Status")
                 
                 with gr.Accordion("Clipboard Operations", open=False):
                     clipboard_content = gr.Textbox(label="Clipboard Content")
@@ -1250,7 +1302,7 @@ def create_gradio_ui():
         )
                 
         img.select(handle_click, inputs=[img, click_type], outputs=[img, action_log])
-        start_btn.click(handle_init_computer, inputs=[os_choice, app_filter, provider_choice], outputs=[img, action_log])
+        start_btn.click(handle_init_computer, inputs=[os_choice, app_filter, provider_choice, container_name, api_key_field], outputs=[img, action_log])
         wait_btn.click(handle_wait, outputs=[img, action_log])
         
         # DONE and FAIL buttons just do a placeholder action

From ca9308fbd223704087b8f943307cf54946a09d87 Mon Sep 17 00:00:00 2001
From: Dillon DuPont <ddupont@mit.edu>
Date: Sat, 14 Jun 2025 11:15:53 -0400
Subject: [PATCH 3/6] Enabled os_choice

---
 libs/computer/computer/ui/gradio/app.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libs/computer/computer/ui/gradio/app.py b/libs/computer/computer/ui/gradio/app.py
index 8dc77786..a9ae2154 100644
--- a/libs/computer/computer/ui/gradio/app.py
+++ b/libs/computer/computer/ui/gradio/app.py
@@ -1083,7 +1083,6 @@ def create_gradio_ui():
                                 label="OS",
                                 choices=["macOS", "Ubuntu"],
                                 value="macOS",
-                                interactive=False # disable until the ubuntu image is ready
                             )
                             
                             # Provider selection radio

From 52fc5dd56391a55e4dcdcd64b8cff0ad6381c290 Mon Sep 17 00:00:00 2001
From: f-trycua <f@trycua.com>
Date: Mon, 16 Jun 2025 17:12:08 -0700
Subject: [PATCH 4/6] Add proper exit codes when notarization fails

---
 .../scripts/build/build-release-notarized.sh    | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/libs/lume/scripts/build/build-release-notarized.sh b/libs/lume/scripts/build/build-release-notarized.sh
index 19fb2e88..018570b2 100755
--- a/libs/lume/scripts/build/build-release-notarized.sh
+++ b/libs/lume/scripts/build/build-release-notarized.sh
@@ -89,24 +89,33 @@ if [ "$LOG_LEVEL" = "minimal" ] || [ "$LOG_LEVEL" = "none" ]; then
       --password "${APP_SPECIFIC_PASSWORD}" \
       --wait 2>&1)
   
-  # Just show success or failure
+  # Check if notarization was successful
   if echo "$NOTARY_OUTPUT" | grep -q "status: Accepted"; then
     log "essential" "Notarization successful!"
   else
     log "error" "Notarization failed. Please check logs."
+    log "error" "Notarization output:"
+    echo "$NOTARY_OUTPUT"
+    exit 1
   fi
 else
   # Normal verbose output
-  xcrun notarytool submit ./.release/lume.pkg \
+  if ! xcrun notarytool submit ./.release/lume.pkg \
       --apple-id "${APPLE_ID}" \
       --team-id "${TEAM_ID}" \
       --password "${APP_SPECIFIC_PASSWORD}" \
-      --wait
+      --wait; then
+    log "error" "Notarization failed"
+    exit 1
+  fi
 fi
 
 # Staple the notarization ticket
 log "essential" "Stapling notarization ticket..."
-xcrun stapler staple ./.release/lume.pkg > /dev/null 2>&1
+if ! xcrun stapler staple ./.release/lume.pkg > /dev/null 2>&1; then
+  log "error" "Failed to staple notarization ticket"
+  exit 1
+fi
 
 # Create temporary directory for package extraction
 EXTRACT_ROOT=$(mktemp -d)

From dad7d4c30393b0b522f10a38d924f210fb674fa9 Mon Sep 17 00:00:00 2001
From: f-trycua <f@trycua.com>
Date: Mon, 16 Jun 2025 17:19:46 -0700
Subject: [PATCH 5/6] Fix package build error handling and certificate
 verification

- Add proper error checking for pkgbuild command
- Verify package exists before attempting notarization
- Improve certificate verification in GitHub workflow
- Show actual certificate details instead of just count
- Add specific checks for required Developer ID certificates

This should fix the 'file doesn't exist' error during notarization.
---
 .github/workflows/publish-lume.yml            | 19 ++++++++++++++++---
 .../scripts/build/build-release-notarized.sh  | 15 +++++++++++++--
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/publish-lume.yml b/.github/workflows/publish-lume.yml
index 3b2311ee..d90df18e 100644
--- a/.github/workflows/publish-lume.yml
+++ b/.github/workflows/publish-lume.yml
@@ -114,9 +114,22 @@ jobs:
           # Allow codesign to access the certificates (minimal output)
           security set-key-partition-list -S apple-tool:,apple:,codesign: -s -k "$KEYCHAIN_PASSWORD" build.keychain > /dev/null 2>&1
           
-          # Verify certificates were imported but only show count, not details
-          echo "Verifying signing identity (showing count only)..."
-          security find-identity -v -p codesigning | grep -c "valid identities found" || true
+          # Verify certificates were imported
+          echo "Verifying signing identities..."
+          security find-identity -v -p codesigning build.keychain
+          
+          # Verify specific certificates exist
+          if ! security find-identity -v -p codesigning build.keychain | grep -q "Developer ID Application: ${{ secrets.DEVELOPER_NAME }}"; then
+            echo "Error: Developer ID Application certificate not found"
+            exit 1
+          fi
+          
+          if ! security find-identity -v -p codesigning build.keychain | grep -q "Developer ID Installer: ${{ secrets.DEVELOPER_NAME }}"; then
+            echo "Error: Developer ID Installer certificate not found"
+            exit 1
+          fi
+          
+          echo "All required certificates verified successfully"
           
           # Clean up certificate files
           rm application.p12 installer.p12
diff --git a/libs/lume/scripts/build/build-release-notarized.sh b/libs/lume/scripts/build/build-release-notarized.sh
index 018570b2..603446b7 100755
--- a/libs/lume/scripts/build/build-release-notarized.sh
+++ b/libs/lume/scripts/build/build-release-notarized.sh
@@ -72,12 +72,23 @@ cp -f .build/release/lume "$TEMP_ROOT/usr/local/bin/"
 
 # Build the installer package
 log "essential" "Building installer package..."
-pkgbuild --root "$TEMP_ROOT" \
+if ! pkgbuild --root "$TEMP_ROOT" \
          --identifier "com.trycua.lume" \
          --version "1.0" \
          --install-location "/" \
          --sign "$CERT_INSTALLER_NAME" \
-         ./.release/lume.pkg 2> /dev/null
+         ./.release/lume.pkg; then
+    log "error" "Failed to build installer package"
+    exit 1
+fi
+
+# Verify the package was created
+if [ ! -f "./.release/lume.pkg" ]; then
+    log "error" "Package file ./.release/lume.pkg was not created"
+    exit 1
+fi
+
+log "essential" "Package created successfully"
 
 # Submit for notarization using stored credentials
 log "essential" "Submitting for notarization..."

From 8ccee43460d2df2da13f044d94bf9d245c6974d8 Mon Sep 17 00:00:00 2001
From: f-trycua <f@trycua.com>
Date: Mon, 16 Jun 2025 17:45:33 -0700
Subject: [PATCH 6/6] Fix certificate verification to handle GitHub secret
 masking

---
 .github/workflows/publish-lume.yml | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/publish-lume.yml b/.github/workflows/publish-lume.yml
index d90df18e..ec5e7550 100644
--- a/.github/workflows/publish-lume.yml
+++ b/.github/workflows/publish-lume.yml
@@ -116,19 +116,22 @@ jobs:
           
           # Verify certificates were imported
           echo "Verifying signing identities..."
-          security find-identity -v -p codesigning build.keychain
+          CERT_COUNT=$(security find-identity -v -p codesigning build.keychain | grep -c "Developer ID Application" || echo "0")
+          INSTALLER_COUNT=$(security find-identity -v build.keychain | grep -c "Developer ID Installer" || echo "0")
           
-          # Verify specific certificates exist
-          if ! security find-identity -v -p codesigning build.keychain | grep -q "Developer ID Application: ${{ secrets.DEVELOPER_NAME }}"; then
-            echo "Error: Developer ID Application certificate not found"
+          if [ "$CERT_COUNT" -eq 0 ]; then
+            echo "Error: No Developer ID Application certificate found"
+            security find-identity -v -p codesigning build.keychain
             exit 1
           fi
           
-          if ! security find-identity -v -p codesigning build.keychain | grep -q "Developer ID Installer: ${{ secrets.DEVELOPER_NAME }}"; then
-            echo "Error: Developer ID Installer certificate not found"
+          if [ "$INSTALLER_COUNT" -eq 0 ]; then
+            echo "Error: No Developer ID Installer certificate found"  
+            security find-identity -v build.keychain
             exit 1
           fi
           
+          echo "Found $CERT_COUNT Developer ID Application certificate(s) and $INSTALLER_COUNT Developer ID Installer certificate(s)"
           echo "All required certificates verified successfully"
           
           # Clean up certificate files