mirror of
https://github.com/trycua/computer.git
synced 2026-01-04 20:40:15 -06:00
use prncvrm's mlx-vlm patch for testing
This commit is contained in:
@@ -202,8 +202,10 @@ class MLXVLMUITarsClient(BaseUITarsClient):
|
||||
)
|
||||
tokenizer = cast(PreTrainedTokenizer, self.processor)
|
||||
|
||||
print("generating response...")
|
||||
|
||||
# Generate response
|
||||
output = generate(
|
||||
text_content, usage = generate(
|
||||
self.model,
|
||||
tokenizer,
|
||||
str(prompt),
|
||||
@@ -212,6 +214,10 @@ class MLXVLMUITarsClient(BaseUITarsClient):
|
||||
max_tokens=max_tokens
|
||||
)
|
||||
|
||||
from pprint import pprint
|
||||
print("DEBUG - AGENT GENERATION --------")
|
||||
pprint(text_content)
|
||||
print("DEBUG - AGENT GENERATION --------")
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating response: {str(e)}")
|
||||
return {
|
||||
@@ -235,9 +241,9 @@ class MLXVLMUITarsClient(BaseUITarsClient):
|
||||
model_size = model_sizes[0]
|
||||
|
||||
# Check if output contains box tokens that need processing
|
||||
if "<|box_start|>" in output:
|
||||
if "<|box_start|>" in text_content:
|
||||
# Process coordinates from model space back to original image space
|
||||
output = self._process_coordinates(output, orig_size, model_size)
|
||||
text_content = self._process_coordinates(text_content, orig_size, model_size)
|
||||
|
||||
# Format response to match OpenAI format
|
||||
response = {
|
||||
@@ -245,12 +251,13 @@ class MLXVLMUITarsClient(BaseUITarsClient):
|
||||
{
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": output
|
||||
"content": text_content
|
||||
},
|
||||
"finish_reason": "stop"
|
||||
}
|
||||
],
|
||||
"model": self.model_name
|
||||
"model": self.model_name,
|
||||
"usage": usage
|
||||
}
|
||||
|
||||
return response
|
||||
|
||||
@@ -105,7 +105,7 @@ async def to_agent_response_format(
|
||||
}
|
||||
],
|
||||
truncation="auto",
|
||||
usage=response["usage"],
|
||||
usage=response.get("usage", {}),
|
||||
user=None,
|
||||
metadata={},
|
||||
response=response
|
||||
|
||||
@@ -36,7 +36,7 @@ openai = [
|
||||
]
|
||||
uitars = [
|
||||
"httpx>=0.27.0,<0.29.0",
|
||||
"mlx-vlm>=0.1.25"
|
||||
"mlx-vlm @ git+https://github.com/prncvrm/mlx-vlm.git@fix/qwen2-position-id"
|
||||
]
|
||||
ui = [
|
||||
"gradio>=5.23.3,<6.0.0",
|
||||
@@ -86,7 +86,7 @@ all = [
|
||||
"ollama>=0.4.7,<0.5.0",
|
||||
"gradio>=5.23.3,<6.0.0",
|
||||
"python-dotenv>=1.0.1,<2.0.0",
|
||||
"mlx-vlm>=0.1.25"
|
||||
"mlx-vlm @ git+https://github.com/prncvrm/mlx-vlm.git@fix/qwen2-position-id"
|
||||
]
|
||||
|
||||
[tool.pdm]
|
||||
|
||||
Reference in New Issue
Block a user