Files
computer/.github/workflows/test-cua-models.yml
T
Adam 2ba64f018d Fix/agent loop test (#532)
* Fix agent test condition trigger
2025-10-29 13:07:16 -07:00

117 lines
3.7 KiB
YAML

name: Test CUA Supporting Models
# This workflow tests all supported CUA models with API keys
# Run manually using workflow_dispatch with test_models=true
on:
pull_request_target:
branches: [main, master]
workflow_dispatch:
inputs:
test_models:
description: "Test all supported models (requires API keys)"
required: false
default: true
type: boolean
jobs:
# Test all CUA models - runs on PRs or when manually triggered
test-all-models:
if: ${{ github.event_name == 'pull_request_target' || fromJSON(inputs.test_models || 'false') }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
model:
# Anthropic Claude Models
# - anthropic/claude-3-5-sonnet-20241022
# - anthropic/claude-3-7-sonnet-20250219
# - anthropic/claude-opus-4-20250514
# - anthropic/claude-sonnet-4-20250514
# - anthropic/claude-opus-4-1-20250805
- anthropic/claude-sonnet-4-5-20250929
# - anthropic/claude-haiku-4-5-20251001
# OpenAI Models
# - openai/computer-use-preview
# Gemini Models
# - gemini-2.5-computer-use-preview-10-2025
# GLM-4.5V Models
# - openrouter/z-ai/glm-4.5v
# UI-TARS Models
# - huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B
# OpenCUA Models
# - huggingface-local/xlangai/OpenCUA-7B
# - huggingface-local/xlangai/OpenCUA-32B
# GTA1 Family Models
# - huggingface-local/HelloKKMe/GTA1-7B
# - huggingface-local/HelloKKMe/GTA1-32B
# - huggingface-local/HelloKKMe/GTA1-72B
# Holo 1.5 Family Models
# - huggingface-local/Hcompany/Holo1.5-3B
# - huggingface-local/Hcompany/Holo1.5-7B
# - huggingface-local/Hcompany/Holo1.5-72B
# InternVL 3.5 Family Models
# - huggingface-local/OpenGVLab/InternVL3_5-1B
# - huggingface-local/OpenGVLab/InternVL3_5-2B
# - huggingface-local/OpenGVLab/InternVL3_5-4B
# - huggingface-local/OpenGVLab/InternVL3_5-8B
# GLM-4.5V Local
# - huggingface-local/zai-org/GLM-4.5V
# Composed Models (Grounding + Planning)
# - omniparser+anthropic/claude-3-5-sonnet-20241022
# - omniparser+openai/gpt-4o-mini
# - moondream3+anthropic/claude-3-5-sonnet-20241022
# - moondream3+openai/gpt-4o-mini
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up uv and Python
uses: astral-sh/setup-uv@v4
with:
python-version: "3.12"
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y libgl1-mesa-dri libglib2.0-0
- name: Install CUA dependencies (uv)
run: |
uv venv
uv pip install -e libs/python/agent -e libs/python/computer
uv pip install -e libs/python/core
uv pip install "cua-agent[uitars-hf]"
uv pip install pytest
- name: Test model with agent loop
run: |
cd tests/agent_loop_testing
uv run python agent_test.py --model "${{ matrix.model }}"
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
# OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
# GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
# OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
- name: Upload test results
if: always()
uses: actions/upload-artifact@v4
with:
name: test-results-${{ matrix.model }}
path: |
tests/agent_loop_testing/test_images/
*.log
retention-days: 7