mirror of
https://github.com/trycua/computer.git
synced 2026-01-01 02:50:15 -06:00
Improved trajectory saving
This commit is contained in:
@@ -94,6 +94,10 @@ class TrajectorySaverCallback(AsyncCallbackHandler):
|
||||
# format: turn_000/0000_name.json
|
||||
artifact_filename = f"{self.current_artifact:04d}_{name}"
|
||||
artifact_path = turn_dir / f"{artifact_filename}.json"
|
||||
# add created_at
|
||||
if isinstance(artifact, dict):
|
||||
artifact = artifact.copy()
|
||||
artifact["created_at"] = str(uuid.uuid1().time)
|
||||
with open(artifact_path, "w") as f:
|
||||
json.dump(sanitize_image_urls(artifact), f, indent=2)
|
||||
self.current_artifact += 1
|
||||
@@ -171,7 +175,7 @@ class TrajectorySaverCallback(AsyncCallbackHandler):
|
||||
"status": "completed",
|
||||
"completed_at": str(uuid.uuid1().time),
|
||||
"total_usage": self.total_usage,
|
||||
"new_items": sanitize_image_urls(new_items),
|
||||
"new_items": new_items,
|
||||
"total_turns": self.current_turn
|
||||
})
|
||||
|
||||
|
||||
@@ -41,6 +41,7 @@ class ProxyOperatorAgent(OperatorAgent):
|
||||
*,
|
||||
model: str | None = None,
|
||||
allowed_tools: list[str] | None = None,
|
||||
trajectory_dir: str | None = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
model = model or "computer-use-preview"
|
||||
@@ -54,8 +55,7 @@ class ProxyOperatorAgent(OperatorAgent):
|
||||
computer_agent = BaseComputerAgent(
|
||||
model=model,
|
||||
tools=[computer_shim],
|
||||
verbosity=20,
|
||||
trajectory_dir='trajectories'
|
||||
trajectory_dir=trajectory_dir
|
||||
)
|
||||
model_client = FakeAsyncOpenAI(computer_agent)
|
||||
|
||||
@@ -115,6 +115,7 @@ async def run_full_dataset(
|
||||
max_concurrent: int = 30,
|
||||
max_steps: int = 50,
|
||||
split: str = "train",
|
||||
trajectory_dir: str | None = None,
|
||||
) -> list[Any]:
|
||||
"""Run evaluation across the entire dataset using hud.datasets.run_dataset."""
|
||||
|
||||
@@ -134,7 +135,7 @@ async def run_full_dataset(
|
||||
name=job_name,
|
||||
dataset=dataset,
|
||||
agent_class=ProxyOperatorAgent,
|
||||
agent_config={"model": model, "allowed_tools": allowed_tools},
|
||||
agent_config={"model": model, "allowed_tools": allowed_tools, "trajectory_dir": trajectory_dir},
|
||||
max_concurrent=max_concurrent,
|
||||
metadata={"dataset": dataset_name},
|
||||
max_steps=max_steps,
|
||||
|
||||
Reference in New Issue
Block a user