From 4dedd06c5b7a5a4c5bde69403561dd6c9ff03d10 Mon Sep 17 00:00:00 2001 From: James Murdza Date: Fri, 12 Sep 2025 12:39:37 -0400 Subject: [PATCH 1/5] Improve notebook structure --- notebooks/hud_hackathon.ipynb | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/notebooks/hud_hackathon.ipynb b/notebooks/hud_hackathon.ipynb index edfdbd4e..e1e9d7b8 100644 --- a/notebooks/hud_hackathon.ipynb +++ b/notebooks/hud_hackathon.ipynb @@ -15,10 +15,16 @@ "id": "19f92431", "metadata": {}, "source": [ - "## Step 1: Connect to cloud services\n", - "\n", - "You will need a Cua account to run computer use agents in the cloud and a HUD account to evaluate them.\n", + "## ☁️ Connect to cloud services\n", "\n", + "Create Cua and HUD accounts and load your API keys. " + ] + }, + { + "cell_type": "markdown", + "id": "47171dc3", + "metadata": {}, + "source": [ "1. Create a Cua account at https://www.trycua.com/\n", "2. Start a Cua container at https://www.trycua.com/dashboard/containers\n", "3. Create a HUD account at https://www.hud.so/\n", @@ -56,8 +62,16 @@ "id": "5c8bef64", "metadata": {}, "source": [ - "## Step 2: Create a Computer Use Agent\n", + "## 🤖 Create a Computer Use Agent\n", "\n", + "Create and run a computer use agent using the Cua SDK." + ] + }, + { + "cell_type": "markdown", + "id": "54338496", + "metadata": {}, + "source": [ "Connect to your running Cua container using the Cua SDK and initialize an agent." ] }, @@ -99,8 +113,6 @@ "id": "12b9c22c", "metadata": {}, "source": [ - "## Step 3: Run a Simple Task\n", - "\n", "Try running the computer use agent on a simple task.\n", "\n", "Trajectories are saved in the format: `trajectories/YYYY-MM-DD_computer-use-pre_XXX`.\n", @@ -135,9 +147,9 @@ "id": "eb4edbb5", "metadata": {}, "source": [ - "## Step 4: Evaluate the Agent with HUD\n", + "## 🧐 Evaluate the Agent with HUD\n", "\n", - "Test your agent's performance on a selection of tasks from the OSWorld benchmark:" + "Test your agent's performance on a selection of tasks from the OSWorld benchmark." ] }, { @@ -174,7 +186,7 @@ "id": "5b89a103", "metadata": {}, "source": [ - "# Step 5: Improve your Agent\n", + "# 🦾 Improve your Agent\n", "\n", "Improve your agent to get the highest score possible on OSWorld-Verified. Here are some ideas to get you started:\n", "\n", From 28f206d824541ac66bd6f583482109430ec9c7ae Mon Sep 17 00:00:00 2001 From: James Murdza Date: Fri, 12 Sep 2025 12:54:50 -0400 Subject: [PATCH 2/5] Improve explanatory text in notebook --- notebooks/hud_hackathon.ipynb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/notebooks/hud_hackathon.ipynb b/notebooks/hud_hackathon.ipynb index e1e9d7b8..560dd2e5 100644 --- a/notebooks/hud_hackathon.ipynb +++ b/notebooks/hud_hackathon.ipynb @@ -7,7 +7,9 @@ "source": [ "# Computer-Use Agents SOTA Challenge\n", "\n", - "This notebook demonstrates how to create a computer use agent with Cua and evaluate it using HUD." + "Congrats on joining the Cua + HUD hackathon at Hack The North 2025!\n", + "\n", + "This notebook will show you how to create a computer use agent with Cua and evaluate it using HUD." ] }, { @@ -188,11 +190,9 @@ "source": [ "# 🦾 Improve your Agent\n", "\n", - "Improve your agent to get the highest score possible on OSWorld-Verified. Here are some ideas to get you started:\n", + "To improve your agent for OSWorld-Verified, experiment with different models and add custom tools that fit your use case. You can also dive into the ComputerAgent source code to design an improved version or subclass tailored to your needs.\n", "\n", - "- Experiment with different models or combinations of models\n", - "- Try adding your custom tools to the agent\n", - "- Read the ComputerAgent source code, and come up with your own improved version/subclass" + "Learn more about [Customizing Your ComputerAgent](https://docs.trycua.com/docs/agent-sdk/customizing-computeragent) in the docs." ] } ], From 1aca043006115e16225d718e068f2855df3a650c Mon Sep 17 00:00:00 2001 From: James Murdza Date: Fri, 12 Sep 2025 12:55:11 -0400 Subject: [PATCH 3/5] Automatically create .env file in notebook --- notebooks/hud_hackathon.ipynb | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/notebooks/hud_hackathon.ipynb b/notebooks/hud_hackathon.ipynb index 560dd2e5..dbc8103e 100644 --- a/notebooks/hud_hackathon.ipynb +++ b/notebooks/hud_hackathon.ipynb @@ -30,10 +30,19 @@ "1. Create a Cua account at https://www.trycua.com/\n", "2. Start a Cua container at https://www.trycua.com/dashboard/containers\n", "3. Create a HUD account at https://www.hud.so/\n", - "4. Create a .env file like this:\n", + "4. Create a .env file:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1757f145", + "metadata": {}, + "outputs": [], + "source": [ + "# Create a .env file if it doesn't exist\n", "\n", - "```\n", - "# Required environment variables:\n", + "ENV_TEMPLATE = \"\"\"# Required environment variables:\n", "CUA_API_KEY=\n", "CUA_CONTAINER_NAME=\n", "HUD_API_KEY=\n", @@ -41,7 +50,19 @@ "# Any LLM provider will work:\n", "ANTHROPIC_API_KEY=\n", "OPENAI_API_KEY=\n", - "```" + "\"\"\"\n", + "\n", + "import os\n", + "if not os.path.exists(\".env\"):\n", + " open(\".env\", \"w\").write(ENV_TEMPLATE)" + ] + }, + { + "cell_type": "markdown", + "id": "0949908d", + "metadata": {}, + "source": [ + "5. Fill in all missing values in the .env file" ] }, { From 68ecdcc99ad71c3cad7f57366ecd3029e5faa9a6 Mon Sep 17 00:00:00 2001 From: James Murdza Date: Fri, 12 Sep 2025 12:55:22 -0400 Subject: [PATCH 4/5] Assert Cua API keys exist in notebook --- notebooks/hud_hackathon.ipynb | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/notebooks/hud_hackathon.ipynb b/notebooks/hud_hackathon.ipynb index dbc8103e..af14f414 100644 --- a/notebooks/hud_hackathon.ipynb +++ b/notebooks/hud_hackathon.ipynb @@ -106,18 +106,21 @@ "outputs": [], "source": [ "import logging\n", - "from pathlib import Path\n", "import os\n", - "\n", + "from pathlib import Path\n", "from agent import ComputerAgent\n", "from computer import Computer, VMProviderType\n", "\n", + "api_key = os.getenv(\"CUA_API_KEY\")\n", + "container_name = os.getenv(\"CUA_CONTAINER_NAME\")\n", + "assert api_key and container_name\n", + "\n", "# Connect to your existing cloud container\n", "computer = Computer(\n", " os_type=\"linux\",\n", " provider_type=VMProviderType.CLOUD,\n", - " api_key=os.getenv(\"CUA_API_KEY\"),\n", - " name=os.getenv(\"CUA_CONTAINER_NAME\"),\n", + " api_key=api_key,\n", + " name=container_name,\n", " verbosity=logging.INFO\n", ")\n", "\n", From 4b3e2077fbd19869a5115f28093526409d609162 Mon Sep 17 00:00:00 2001 From: James Murdza Date: Fri, 12 Sep 2025 12:55:43 -0400 Subject: [PATCH 5/5] Remove dataset size limit during HUD evaluation --- notebooks/hud_hackathon.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/hud_hackathon.ipynb b/notebooks/hud_hackathon.ipynb index af14f414..05f377b8 100644 --- a/notebooks/hud_hackathon.ipynb +++ b/notebooks/hud_hackathon.ipynb @@ -198,7 +198,7 @@ " model=\"openai/computer-use-preview\", # Or any supported model string\n", " max_concurrent=20, # Tune to your infra\n", " max_steps=50, # Safety cap per task\n", - " split=\"train[:3]\" # Limit to just 3 tasks\n", + " #split=\"train[:5]\" # Limit to just 5 tasks\n", ")\n", "\n", "# results is a list from hud.datasets.run_dataset; inspect/aggregate as needed\n",