From a5b5bad05cd282c87fdd8346c2d351612a3b7f4f Mon Sep 17 00:00:00 2001
From: f-trycua <f@trycua.com>
Date: Sat, 10 May 2025 22:15:30 -0700
Subject: [PATCH 1/5] Add clipboard and audio device

---
 .../VMVirtualizationService.swift             | 42 +++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/libs/lume/src/Virtualization/VMVirtualizationService.swift b/libs/lume/src/Virtualization/VMVirtualizationService.swift
index 93cb4db0a..b358659b2 100644
--- a/libs/lume/src/Virtualization/VMVirtualizationService.swift
+++ b/libs/lume/src/Virtualization/VMVirtualizationService.swift
@@ -246,6 +246,27 @@ final class DarwinVirtualizationService: BaseVirtualizationService {
         ]
         vzConfig.memoryBalloonDevices = [VZVirtioTraditionalMemoryBalloonDeviceConfiguration()]
         vzConfig.entropyDevices = [VZVirtioEntropyDeviceConfiguration()]
+        
+        // Audio configuration
+        let soundDeviceConfiguration = VZVirtioSoundDeviceConfiguration()
+        let inputAudioStreamConfiguration = VZVirtioSoundDeviceInputStreamConfiguration()
+        let outputAudioStreamConfiguration = VZVirtioSoundDeviceOutputStreamConfiguration()
+        
+        inputAudioStreamConfiguration.source = VZHostAudioInputStreamSource()
+        outputAudioStreamConfiguration.sink = VZHostAudioOutputStreamSink()
+        
+        soundDeviceConfiguration.streams = [inputAudioStreamConfiguration, outputAudioStreamConfiguration]
+        vzConfig.audioDevices = [soundDeviceConfiguration]
+        
+        // Clipboard sharing via Spice agent
+        let spiceAgentConsoleDevice = VZVirtioConsoleDeviceConfiguration()
+        let spiceAgentPort = VZVirtioConsolePortConfiguration()
+        spiceAgentPort.name = VZSpiceAgentPortAttachment.spiceAgentPortName
+        let spiceAgentPortAttachment = VZSpiceAgentPortAttachment()
+        spiceAgentPortAttachment.sharesClipboard = true
+        spiceAgentPort.attachment = spiceAgentPortAttachment
+        spiceAgentConsoleDevice.ports[0] = spiceAgentPort
+        vzConfig.consoleDevices.append(spiceAgentConsoleDevice)
 
         // Directory sharing
         let directorySharingDevices = createDirectorySharingDevices(
@@ -376,6 +397,27 @@ final class LinuxVirtualizationService: BaseVirtualizationService {
         ]
         vzConfig.memoryBalloonDevices = [VZVirtioTraditionalMemoryBalloonDeviceConfiguration()]
         vzConfig.entropyDevices = [VZVirtioEntropyDeviceConfiguration()]
+        
+        // Audio configuration
+        let soundDeviceConfiguration = VZVirtioSoundDeviceConfiguration()
+        let inputAudioStreamConfiguration = VZVirtioSoundDeviceInputStreamConfiguration()
+        let outputAudioStreamConfiguration = VZVirtioSoundDeviceOutputStreamConfiguration()
+        
+        inputAudioStreamConfiguration.source = VZHostAudioInputStreamSource()
+        outputAudioStreamConfiguration.sink = VZHostAudioOutputStreamSink()
+        
+        soundDeviceConfiguration.streams = [inputAudioStreamConfiguration, outputAudioStreamConfiguration]
+        vzConfig.audioDevices = [soundDeviceConfiguration]
+
+        // Clipboard sharing via Spice agent
+        let spiceAgentConsoleDevice = VZVirtioConsoleDeviceConfiguration()
+        let spiceAgentPort = VZVirtioConsolePortConfiguration()
+        spiceAgentPort.name = VZSpiceAgentPortAttachment.spiceAgentPortName
+        let spiceAgentPortAttachment = VZSpiceAgentPortAttachment()
+        spiceAgentPortAttachment.sharesClipboard = true
+        spiceAgentPort.attachment = spiceAgentPortAttachment
+        spiceAgentConsoleDevice.ports[0] = spiceAgentPort
+        vzConfig.consoleDevices.append(spiceAgentConsoleDevice)
 
         // Directory sharing
         var directorySharingDevices = createDirectorySharingDevices(

From e51bbe2c0f54987391646ab45c57b3e3af6be146 Mon Sep 17 00:00:00 2001
From: Francesco Bonacci <francesco.bonacci@outlook.com>
Date: Sun, 11 May 2025 09:51:08 -0700
Subject: [PATCH 2/5] Update README.md

---
 README.md | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index dafef93da..c11134d96 100644
--- a/README.md
+++ b/README.md
@@ -13,10 +13,9 @@
 
 **c/ua** (pronounced "koo-ah") enables AI agents to control full operating systems in high-performance virtual containers with near-native speed on Apple Silicon.
 
-
-
 <div align="center">
-<video src="https://github.com/user-attachments/assets/06e1974f-8f73-477d-b18a-715d83148e45" width="800" controls></video></div>
+  <video src="https://github.com/user-attachments/assets/c619b4ea-bb8e-4382-860e-f3757e36af20" width="800" controls></video>
+</div>
 
 # 🚀 Quick Start
 

From ee7784e2ddc111f47ab82ab8aacf882854013c37 Mon Sep 17 00:00:00 2001
From: ddupont <3820588+ddupont808@users.noreply.github.com>
Date: Sun, 11 May 2025 21:13:53 -0400
Subject: [PATCH 3/5] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c11134d96..61ac0b870 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ This script will:
 - [UITARS-1.5](https://github.com/trycua/cua/blob/main/libs/agent/README.md#agent-loops) - Run locally on Apple Silicon with MLX, or use cloud providers
 - [OpenAI CUA](https://github.com/trycua/cua/blob/main/libs/agent/README.md#agent-loops) - Use OpenAI's Computer-Use Preview model
 - [Anthropic CUA](https://github.com/trycua/cua/blob/main/libs/agent/README.md#agent-loops) - Use Anthropic's Computer-Use capabilities
-- [OmniParser](https://github.com/trycua/cua/blob/main/libs/agent/README.md#agent-loops) - Control UI with [Set-of-Marks prompting](https://som-gpt4v.github.io/) using any vision model
+- [OmniParser-v2.0](https://github.com/trycua/cua/blob/main/libs/agent/README.md#agent-loops) - Control UI with [Set-of-Marks prompting](https://som-gpt4v.github.io/) using any vision model
 
 ### System Requirements
 

From aa0132222038539e88934aee01820dbd3e5384bc Mon Sep 17 00:00:00 2001
From: Dillon DuPont <v-ddupont@microsoft.com>
Date: Mon, 12 May 2025 08:54:28 -0400
Subject: [PATCH 4/5] Fixes issue #172

---
 libs/agent/agent/providers/openai/loop.py |   8 +-
 libs/agent/agent/providers/uitars/loop.py | 193 ++++++++++++----------
 2 files changed, 114 insertions(+), 87 deletions(-)

diff --git a/libs/agent/agent/providers/openai/loop.py b/libs/agent/agent/providers/openai/loop.py
index 87719d1b1..e791b8c98 100644
--- a/libs/agent/agent/providers/openai/loop.py
+++ b/libs/agent/agent/providers/openai/loop.py
@@ -133,22 +133,22 @@ class OpenAILoop(BaseLoop):
             logger.info("Starting OpenAI loop run")
 
             # Create queue for response streaming
-            queue = asyncio.Queue()
+            self.queue = asyncio.Queue()
 
             # Ensure tool manager is initialized
             await self.tool_manager.initialize()
 
             # Start loop in background task
-            self.loop_task = asyncio.create_task(self._run_loop(queue, messages))
+            self.loop_task = asyncio.create_task(self._run_loop(self.queue, messages))
 
             # Process and yield messages as they arrive
             while True:
                 try:
-                    item = await queue.get()
+                    item = await self.queue.get()
                     if item is None:  # Stop signal
                         break
                     yield item
-                    queue.task_done()
+                    self.queue.task_done()
                 except Exception as e:
                     logger.error(f"Error processing queue item: {str(e)}")
                     continue
diff --git a/libs/agent/agent/providers/uitars/loop.py b/libs/agent/agent/providers/uitars/loop.py
index 3766cd92d..133a3b83a 100644
--- a/libs/agent/agent/providers/uitars/loop.py
+++ b/libs/agent/agent/providers/uitars/loop.py
@@ -463,17 +463,40 @@ class UITARSLoop(BaseLoop):
         Yields:
             Agent response format
         """
-        # Initialize the message manager with the provided messages
-        self.message_manager.messages = messages.copy()
-        logger.info(f"Starting UITARSLoop run with {len(self.message_manager.messages)} messages")
-        
-        # Create a task to run the loop
-        self.loop_task = asyncio.create_task(self._run_loop(messages))
-
-        # Yield from the loop task
         try:
-            async for response in self.loop_task:
-                yield response
+            logger.info(f"Starting UITARSLoop run with {len(messages)} messages")
+            
+            # Initialize the message manager with the provided messages
+            self.message_manager.messages = messages.copy()
+            
+            # Create queue for response streaming
+            queue = asyncio.Queue()
+            
+            # Start loop in background task
+            self.loop_task = asyncio.create_task(self._run_loop(queue, messages))
+
+            # Process and yield messages as they arrive
+            while True:
+                try:
+                    item = await queue.get()
+                    if item is None:  # Stop signal
+                        break
+                    yield item
+                    queue.task_done()
+                except Exception as e:
+                    logger.error(f"Error processing queue item: {str(e)}")
+                    continue
+
+            # Wait for loop to complete
+            await self.loop_task
+
+            # Send completion message
+            yield {
+                "role": "assistant",
+                "content": "Task completed successfully.",
+                "metadata": {"title": "✅ Complete"},
+            }
+
         except Exception as e:
             logger.error(f"Error in run method: {str(e)}")
             yield {
@@ -482,14 +505,12 @@ class UITARSLoop(BaseLoop):
                 "metadata": {"title": "❌ Error"},
             }
             
-    async def _run_loop(self, messages: List[Dict[str, Any]]) -> AsyncGenerator[AgentResponse, None]:
+    async def _run_loop(self, queue: asyncio.Queue, messages: List[Dict[str, Any]]) -> None:
         """Internal method to run the agent loop with provided messages.
         
         Args:
+            queue: Queue to put responses into
             messages: List of messages in standard OpenAI format
-            
-        Yields:
-            Agent response format
         """
         # Continue running until explicitly told to stop
         running = True
@@ -500,88 +521,94 @@ class UITARSLoop(BaseLoop):
         attempt = 0
         max_attempts = 3
 
-        while running and attempt < max_attempts:
-            try:
-                # Create a new turn directory if it's not already created
-                if not turn_created:
-                    self._create_turn_dir()
-                    turn_created = True
+        try:
+            while running and attempt < max_attempts:
+                try:
+                    # Create a new turn directory if it's not already created
+                    if not turn_created:
+                        self._create_turn_dir()
+                        turn_created = True
 
-                # Ensure client is initialized
-                if self.client is None:
-                    logger.info("Initializing client...")
-                    await self.initialize_client()
+                    # Ensure client is initialized
                     if self.client is None:
-                        raise RuntimeError("Failed to initialize client")
-                    logger.info("Client initialized successfully")
+                        logger.info("Initializing client...")
+                        await self.initialize_client()
+                        if self.client is None:
+                            raise RuntimeError("Failed to initialize client")
+                        logger.info("Client initialized successfully")
 
-                # Get current screen
-                base64_screenshot = await self._get_current_screen()
-                
-                # Add screenshot to message history
-                self.message_manager.add_user_message(
-                    [
-                        {
-                            "type": "image_url",
-                            "image_url": {"url": f"data:image/png;base64,{base64_screenshot}"},
-                        }
-                    ]
-                )
-                logger.info("Added screenshot to message history")
+                    # Get current screen
+                    base64_screenshot = await self._get_current_screen()
+                    
+                    # Add screenshot to message history
+                    self.message_manager.add_user_message(
+                        [
+                            {
+                                "type": "image_url",
+                                "image_url": {"url": f"data:image/png;base64,{base64_screenshot}"},
+                            }
+                        ]
+                    )
+                    logger.info("Added screenshot to message history")
 
-                # Get system prompt
-                system_prompt = self._get_system_prompt()
+                    # Get system prompt
+                    system_prompt = self._get_system_prompt()
 
-                # Make API call with retries
-                response = await self._make_api_call(
-                    self.message_manager.messages, system_prompt
-                )
+                    # Make API call with retries
+                    response = await self._make_api_call(
+                        self.message_manager.messages, system_prompt
+                    )
 
-                # Handle the response (may execute actions)
-                # Returns: (should_continue, action_screenshot_saved)
-                should_continue, new_screenshot_saved = await self._handle_response(
-                    response, self.message_manager.messages
-                )
+                    # Handle the response (may execute actions)
+                    # Returns: (should_continue, action_screenshot_saved)
+                    should_continue, new_screenshot_saved = await self._handle_response(
+                        response, self.message_manager.messages
+                    )
 
-                # Update whether an action screenshot was saved this turn
-                action_screenshot_saved = action_screenshot_saved or new_screenshot_saved
-                
-                agent_response = await to_agent_response_format(
-                    response,
-                    messages,
-                    model=self.model,
-                )
-                # Log standardized response for ease of parsing
-                self._log_api_call("agent_response", request=None, response=agent_response)
-                yield agent_response
-                
-                # Check if we should continue this conversation
-                running = should_continue
+                    # Update whether an action screenshot was saved this turn
+                    action_screenshot_saved = action_screenshot_saved or new_screenshot_saved
+                    
+                    agent_response = await to_agent_response_format(
+                        response,
+                        messages,
+                        model=self.model,
+                    )
+                    # Log standardized response for ease of parsing
+                    self._log_api_call("agent_response", request=None, response=agent_response)
+                    
+                    # Put the response in the queue
+                    await queue.put(agent_response)
+                    
+                    # Check if we should continue this conversation
+                    running = should_continue
 
-                # Create a new turn directory if we're continuing
-                if running:
-                    turn_created = False
+                    # Create a new turn directory if we're continuing
+                    if running:
+                        turn_created = False
 
-                # Reset attempt counter on success
-                attempt = 0
+                    # Reset attempt counter on success
+                    attempt = 0
 
-            except Exception as e:
-                attempt += 1
-                error_msg = f"Error in run method (attempt {attempt}/{max_attempts}): {str(e)}"
-                logger.error(error_msg)
+                except Exception as e:
+                    attempt += 1
+                    error_msg = f"Error in run method (attempt {attempt}/{max_attempts}): {str(e)}"
+                    logger.error(error_msg)
 
-                # If this is our last attempt, provide more info about the error
-                if attempt >= max_attempts:
-                    logger.error(f"Maximum retry attempts reached. Last error was: {str(e)}")
+                    # If this is our last attempt, provide more info about the error
+                    if attempt >= max_attempts:
+                        logger.error(f"Maximum retry attempts reached. Last error was: {str(e)}")
 
-                yield {
-                    "role": "assistant",
-                    "content": f"Error: {str(e)}",
-                    "metadata": {"title": "❌ Error"},
-                }
+                    await queue.put({
+                        "role": "assistant",
+                        "content": f"Error: {str(e)}",
+                        "metadata": {"title": "❌ Error"},
+                    })
 
-                # Create a brief delay before retrying
-                await asyncio.sleep(1)
+                    # Create a brief delay before retrying
+                    await asyncio.sleep(1)
+        finally:
+            # Signal that we're done
+            await queue.put(None)
 
     async def cancel(self) -> None:
         """Cancel the currently running agent loop task.

From 1b1eb813741e00647649f774a58500bf4a1bd6cf Mon Sep 17 00:00:00 2001
From: Dillon DuPont <v-ddupont@microsoft.com>
Date: Mon, 12 May 2025 08:55:13 -0400
Subject: [PATCH 5/5] Fixes issue #172

---
 libs/agent/agent/providers/omni/loop.py | 60 +++++++++++++++++--------
 1 file changed, 42 insertions(+), 18 deletions(-)

diff --git a/libs/agent/agent/providers/omni/loop.py b/libs/agent/agent/providers/omni/loop.py
index b77194500..840b29166 100644
--- a/libs/agent/agent/providers/omni/loop.py
+++ b/libs/agent/agent/providers/omni/loop.py
@@ -581,17 +581,40 @@ class OmniLoop(BaseLoop):
         Yields:
             Agent response format
         """
-        # Initialize the message manager with the provided messages
-        self.message_manager.messages = messages.copy()
-        logger.info(f"Starting OmniLoop run with {len(self.message_manager.messages)} messages")
-        
-        # Create a task to run the loop
-        self.loop_task = asyncio.create_task(self._run_loop(messages))
-
-        # Yield from the loop task
         try:
-            async for response in self.loop_task:
-                yield response
+            logger.info(f"Starting OmniLoop run with {len(messages)} messages")
+            
+            # Initialize the message manager with the provided messages
+            self.message_manager.messages = messages.copy()
+            
+            # Create queue for response streaming
+            queue = asyncio.Queue()
+            
+            # Start loop in background task
+            self.loop_task = asyncio.create_task(self._run_loop(queue, messages))
+
+            # Process and yield messages as they arrive
+            while True:
+                try:
+                    item = await queue.get()
+                    if item is None:  # Stop signal
+                        break
+                    yield item
+                    queue.task_done()
+                except Exception as e:
+                    logger.error(f"Error processing queue item: {str(e)}")
+                    continue
+
+            # Wait for loop to complete
+            await self.loop_task
+
+            # Send completion message
+            yield {
+                "role": "assistant",
+                "content": "Task completed successfully.",
+                "metadata": {"title": "✅ Complete"},
+            }
+
         except Exception as e:
             logger.error(f"Error in run method: {str(e)}")
             yield {
@@ -600,14 +623,12 @@ class OmniLoop(BaseLoop):
                 "metadata": {"title": "❌ Error"},
             }
             
-    async def _run_loop(self, messages: List[Dict[str, Any]]) -> AsyncGenerator[AgentResponse, None]:
+    async def _run_loop(self, queue: asyncio.Queue, messages: List[Dict[str, Any]]) -> None:
         """Internal method to run the agent loop with provided messages.
         
         Args:
+            queue: Queue to put responses into
             messages: List of messages in standard OpenAI format
-            
-        Yields:
-            Agent response format
         """
         # Continue running until explicitly told to stop
         running = True
@@ -698,8 +719,8 @@ class OmniLoop(BaseLoop):
                 # Log standardized response for ease of parsing
                 self._log_api_call("agent_response", request=None, response=openai_compatible_response)
 
-                # Yield the response to the caller
-                yield openai_compatible_response
+                # Put the response in the queue
+                await queue.put(openai_compatible_response)
 
                 # Check if we should continue this conversation
                 running = should_continue
@@ -720,14 +741,17 @@ class OmniLoop(BaseLoop):
                 if attempt >= max_attempts:
                     logger.error(f"Maximum retry attempts reached. Last error was: {str(e)}")
 
-                yield {
+                await queue.put({
                     "role": "assistant",
                     "content": f"Error: {str(e)}",
                     "metadata": {"title": "❌ Error"},
-                }
+                })
 
                 # Create a brief delay before retrying
                 await asyncio.sleep(1)
+        finally:
+            # Signal that we're done
+            await queue.put(None)
                 
     async def cancel(self) -> None:
         """Cancel the currently running agent loop task.