Improved trajectory saving

This commit is contained in:
Dillon DuPont
2025-08-27 16:48:57 -04:00
parent f14e9288ac
commit 0d3f8ea3ff
3 changed files with 63 additions and 9 deletions

View File

@@ -94,6 +94,10 @@ class TrajectorySaverCallback(AsyncCallbackHandler):
# format: turn_000/0000_name.json
artifact_filename = f"{self.current_artifact:04d}_{name}"
artifact_path = turn_dir / f"{artifact_filename}.json"
# add created_at
if isinstance(artifact, dict):
artifact = artifact.copy()
artifact["created_at"] = str(uuid.uuid1().time)
with open(artifact_path, "w") as f:
json.dump(sanitize_image_urls(artifact), f, indent=2)
self.current_artifact += 1
@@ -171,7 +175,7 @@ class TrajectorySaverCallback(AsyncCallbackHandler):
"status": "completed",
"completed_at": str(uuid.uuid1().time),
"total_usage": self.total_usage,
"new_items": sanitize_image_urls(new_items),
"new_items": new_items,
"total_turns": self.current_turn
})

View File

@@ -41,6 +41,7 @@ class ProxyOperatorAgent(OperatorAgent):
*,
model: str | None = None,
allowed_tools: list[str] | None = None,
trajectory_dir: str | None = None,
**kwargs: Any,
) -> None:
model = model or "computer-use-preview"
@@ -54,8 +55,7 @@ class ProxyOperatorAgent(OperatorAgent):
computer_agent = BaseComputerAgent(
model=model,
tools=[computer_shim],
verbosity=20,
trajectory_dir='trajectories'
trajectory_dir=trajectory_dir
)
model_client = FakeAsyncOpenAI(computer_agent)
@@ -115,6 +115,7 @@ async def run_full_dataset(
max_concurrent: int = 30,
max_steps: int = 50,
split: str = "train",
trajectory_dir: str | None = None,
) -> list[Any]:
"""Run evaluation across the entire dataset using hud.datasets.run_dataset."""
@@ -134,7 +135,7 @@ async def run_full_dataset(
name=job_name,
dataset=dataset,
agent_class=ProxyOperatorAgent,
agent_config={"model": model, "allowed_tools": allowed_tools},
agent_config={"model": model, "allowed_tools": allowed_tools, "trajectory_dir": trajectory_dir},
max_concurrent=max_concurrent,
metadata={"dataset": dataset_name},
max_steps=max_steps,