diff --git a/libs/agent/agent/providers/uitars/clients/mlxvlm.py b/libs/agent/agent/providers/uitars/clients/mlxvlm.py index 24f41f34..197b08cb 100644 --- a/libs/agent/agent/providers/uitars/clients/mlxvlm.py +++ b/libs/agent/agent/providers/uitars/clients/mlxvlm.py @@ -202,8 +202,10 @@ class MLXVLMUITarsClient(BaseUITarsClient): ) tokenizer = cast(PreTrainedTokenizer, self.processor) + print("generating response...") + # Generate response - output = generate( + text_content, usage = generate( self.model, tokenizer, str(prompt), @@ -212,6 +214,10 @@ class MLXVLMUITarsClient(BaseUITarsClient): max_tokens=max_tokens ) + from pprint import pprint + print("DEBUG - AGENT GENERATION --------") + pprint(text_content) + print("DEBUG - AGENT GENERATION --------") except Exception as e: logger.error(f"Error generating response: {str(e)}") return { @@ -235,9 +241,9 @@ class MLXVLMUITarsClient(BaseUITarsClient): model_size = model_sizes[0] # Check if output contains box tokens that need processing - if "<|box_start|>" in output: + if "<|box_start|>" in text_content: # Process coordinates from model space back to original image space - output = self._process_coordinates(output, orig_size, model_size) + text_content = self._process_coordinates(text_content, orig_size, model_size) # Format response to match OpenAI format response = { @@ -245,12 +251,13 @@ class MLXVLMUITarsClient(BaseUITarsClient): { "message": { "role": "assistant", - "content": output + "content": text_content }, "finish_reason": "stop" } ], - "model": self.model_name + "model": self.model_name, + "usage": usage } return response diff --git a/libs/agent/agent/providers/uitars/utils.py b/libs/agent/agent/providers/uitars/utils.py index cc904115..bdfd58cd 100644 --- a/libs/agent/agent/providers/uitars/utils.py +++ b/libs/agent/agent/providers/uitars/utils.py @@ -105,7 +105,7 @@ async def to_agent_response_format( } ], truncation="auto", - usage=response["usage"], + usage=response.get("usage", {}), user=None, metadata={}, response=response diff --git a/libs/agent/pyproject.toml b/libs/agent/pyproject.toml index 8772575c..1289adca 100644 --- a/libs/agent/pyproject.toml +++ b/libs/agent/pyproject.toml @@ -36,7 +36,7 @@ openai = [ ] uitars = [ "httpx>=0.27.0,<0.29.0", - "mlx-vlm>=0.1.25" + "mlx-vlm @ git+https://github.com/prncvrm/mlx-vlm.git@fix/qwen2-position-id" ] ui = [ "gradio>=5.23.3,<6.0.0", @@ -86,7 +86,7 @@ all = [ "ollama>=0.4.7,<0.5.0", "gradio>=5.23.3,<6.0.0", "python-dotenv>=1.0.1,<2.0.0", - "mlx-vlm>=0.1.25" + "mlx-vlm @ git+https://github.com/prncvrm/mlx-vlm.git@fix/qwen2-position-id" ] [tool.pdm]