use prncvrm's mlx-vlm patch for testing

2026-01-04 20:40:15 -06:00 · 2025-05-05 10:31:15 -04:00
parent 21abb93473
commit 6a6fe48dbc
3 changed files with 15 additions and 8 deletions
--- a/libs/agent/agent/providers/uitars/clients/mlxvlm.py
+++ b/libs/agent/agent/providers/uitars/clients/mlxvlm.py
@@ -202,8 +202,10 @@ class MLXVLMUITarsClient(BaseUITarsClient):
            )
            tokenizer = cast(PreTrainedTokenizer, self.processor)
            
+            print("generating response...")
+            
            # Generate response
-            output = generate(
+            text_content, usage = generate(
                self.model, 
                tokenizer, 
                str(prompt), 
@@ -212,6 +214,10 @@ class MLXVLMUITarsClient(BaseUITarsClient):
                max_tokens=max_tokens
            )
            
+            from pprint import pprint
+            print("DEBUG - AGENT GENERATION --------")
+            pprint(text_content)
+            print("DEBUG - AGENT GENERATION --------")
        except Exception as e:
            logger.error(f"Error generating response: {str(e)}")
            return {
@@ -235,9 +241,9 @@ class MLXVLMUITarsClient(BaseUITarsClient):
            model_size = model_sizes[0]
            
            # Check if output contains box tokens that need processing
-            if "<|box_start|>" in output:
+            if "<|box_start|>" in text_content:
                # Process coordinates from model space back to original image space
-                output = self._process_coordinates(output, orig_size, model_size)
+                text_content = self._process_coordinates(text_content, orig_size, model_size)
        
        # Format response to match OpenAI format
        response = {
@@ -245,12 +251,13 @@ class MLXVLMUITarsClient(BaseUITarsClient):
                {
                    "message": {
                        "role": "assistant",
-                        "content": output
+                        "content": text_content
                    },
                    "finish_reason": "stop"
                }
            ],
-            "model": self.model_name
+            "model": self.model_name,
+            "usage": usage
        }
        
        return response
--- a/libs/agent/agent/providers/uitars/utils.py
+++ b/libs/agent/agent/providers/uitars/utils.py
@@ -105,7 +105,7 @@ async def to_agent_response_format(
            }
        ],
        truncation="auto",
-        usage=response["usage"],
+        usage=response.get("usage", {}),
        user=None,
        metadata={},
        response=response
--- a/libs/agent/pyproject.toml
+++ b/libs/agent/pyproject.toml
@@ -36,7 +36,7 @@ openai = [
 ]
 uitars = [
    "httpx>=0.27.0,<0.29.0",
-    "mlx-vlm>=0.1.25"
+    "mlx-vlm @ git+https://github.com/prncvrm/mlx-vlm.git@fix/qwen2-position-id"
 ]
 ui = [
    "gradio>=5.23.3,<6.0.0",
@@ -86,7 +86,7 @@ all = [
    "ollama>=0.4.7,<0.5.0",
    "gradio>=5.23.3,<6.0.0",
    "python-dotenv>=1.0.1,<2.0.0",
-    "mlx-vlm>=0.1.25"
+    "mlx-vlm @ git+https://github.com/prncvrm/mlx-vlm.git@fix/qwen2-position-id"
 ]

 [tool.pdm]