diff --git a/libs/agent/agent/providers/uitars/clients/mlxvlm.py b/libs/agent/agent/providers/uitars/clients/mlxvlm.py
index 24f41f34..197b08cb 100644
--- a/libs/agent/agent/providers/uitars/clients/mlxvlm.py
+++ b/libs/agent/agent/providers/uitars/clients/mlxvlm.py
@@ -202,8 +202,10 @@ class MLXVLMUITarsClient(BaseUITarsClient):
             )
             tokenizer = cast(PreTrainedTokenizer, self.processor)
             
+            print("generating response...")
+            
             # Generate response
-            output = generate(
+            text_content, usage = generate(
                 self.model, 
                 tokenizer, 
                 str(prompt), 
@@ -212,6 +214,10 @@ class MLXVLMUITarsClient(BaseUITarsClient):
                 max_tokens=max_tokens
             )
             
+            from pprint import pprint
+            print("DEBUG - AGENT GENERATION --------")
+            pprint(text_content)
+            print("DEBUG - AGENT GENERATION --------")
         except Exception as e:
             logger.error(f"Error generating response: {str(e)}")
             return {
@@ -235,9 +241,9 @@ class MLXVLMUITarsClient(BaseUITarsClient):
             model_size = model_sizes[0]
             
             # Check if output contains box tokens that need processing
-            if "<|box_start|>" in output:
+            if "<|box_start|>" in text_content:
                 # Process coordinates from model space back to original image space
-                output = self._process_coordinates(output, orig_size, model_size)
+                text_content = self._process_coordinates(text_content, orig_size, model_size)
         
         # Format response to match OpenAI format
         response = {
@@ -245,12 +251,13 @@ class MLXVLMUITarsClient(BaseUITarsClient):
                 {
                     "message": {
                         "role": "assistant",
-                        "content": output
+                        "content": text_content
                     },
                     "finish_reason": "stop"
                 }
             ],
-            "model": self.model_name
+            "model": self.model_name,
+            "usage": usage
         }
         
         return response
diff --git a/libs/agent/agent/providers/uitars/utils.py b/libs/agent/agent/providers/uitars/utils.py
index cc904115..bdfd58cd 100644
--- a/libs/agent/agent/providers/uitars/utils.py
+++ b/libs/agent/agent/providers/uitars/utils.py
@@ -105,7 +105,7 @@ async def to_agent_response_format(
             }
         ],
         truncation="auto",
-        usage=response["usage"],
+        usage=response.get("usage", {}),
         user=None,
         metadata={},
         response=response
diff --git a/libs/agent/pyproject.toml b/libs/agent/pyproject.toml
index 8772575c..1289adca 100644
--- a/libs/agent/pyproject.toml
+++ b/libs/agent/pyproject.toml
@@ -36,7 +36,7 @@ openai = [
 ]
 uitars = [
     "httpx>=0.27.0,<0.29.0",
-    "mlx-vlm>=0.1.25"
+    "mlx-vlm @ git+https://github.com/prncvrm/mlx-vlm.git@fix/qwen2-position-id"
 ]
 ui = [
     "gradio>=5.23.3,<6.0.0",
@@ -86,7 +86,7 @@ all = [
     "ollama>=0.4.7,<0.5.0",
     "gradio>=5.23.3,<6.0.0",
     "python-dotenv>=1.0.1,<2.0.0",
-    "mlx-vlm>=0.1.25"
+    "mlx-vlm @ git+https://github.com/prncvrm/mlx-vlm.git@fix/qwen2-position-id"
 ]
 
 [tool.pdm]