Added human/ provider to liteLLM

This commit is contained in:
Dillon DuPont
2025-08-11 15:24:56 -04:00
parent 1ffb7d1a9a
commit d8b1792cc5
6 changed files with 1286 additions and 2 deletions

View File

@@ -0,0 +1,348 @@
import os
import asyncio
import requests
from typing import List, Dict, Any, Iterator, AsyncIterator
from litellm.types.utils import GenericStreamingChunk, ModelResponse
from litellm.llms.custom_llm import CustomLLM
from litellm import completion, acompletion
class HumanAdapter(CustomLLM):
"""Human Adapter for human-in-the-loop completions.
This adapter sends completion requests to a human completion server
where humans can review and respond to AI requests.
"""
def __init__(self, base_url: str | None = None, timeout: float = 300.0, **kwargs):
"""Initialize the human adapter.
Args:
base_url: Base URL for the human completion server.
Defaults to HUMAN_BASE_URL environment variable or http://localhost:8002
timeout: Timeout in seconds for waiting for human response
**kwargs: Additional arguments
"""
super().__init__()
self.base_url = base_url or os.getenv('HUMAN_BASE_URL', 'http://localhost:8002')
self.timeout = timeout
# Ensure base_url doesn't end with slash
self.base_url = self.base_url.rstrip('/')
def _queue_completion(self, messages: List[Dict[str, Any]], model: str) -> str:
"""Queue a completion request and return the call ID.
Args:
messages: Messages in OpenAI format
model: Model name
Returns:
Call ID for tracking the request
Raises:
Exception: If queueing fails
"""
try:
response = requests.post(
f"{self.base_url}/queue",
json={"messages": messages, "model": model},
timeout=10
)
response.raise_for_status()
return response.json()["id"]
except requests.RequestException as e:
raise Exception(f"Failed to queue completion request: {e}")
def _wait_for_completion(self, call_id: str) -> Dict[str, Any]:
"""Wait for human to complete the call.
Args:
call_id: ID of the queued completion call
Returns:
Dict containing response and/or tool_calls
Raises:
TimeoutError: If timeout is exceeded
Exception: If completion fails
"""
import time
start_time = time.time()
while True:
try:
# Check status
status_response = requests.get(f"{self.base_url}/status/{call_id}")
status_response.raise_for_status()
status_data = status_response.json()
if status_data["status"] == "completed":
result = {}
if "response" in status_data and status_data["response"]:
result["response"] = status_data["response"]
if "tool_calls" in status_data and status_data["tool_calls"]:
result["tool_calls"] = status_data["tool_calls"]
return result
elif status_data["status"] == "failed":
error_msg = status_data.get("error", "Unknown error")
raise Exception(f"Completion failed: {error_msg}")
# Check timeout
if time.time() - start_time > self.timeout:
raise TimeoutError(f"Timeout waiting for human response after {self.timeout} seconds")
# Wait before checking again
time.sleep(1.0)
except requests.RequestException as e:
if time.time() - start_time > self.timeout:
raise TimeoutError(f"Timeout waiting for human response: {e}")
# Continue trying if we haven't timed out
time.sleep(1.0)
async def _async_wait_for_completion(self, call_id: str) -> Dict[str, Any]:
"""Async version of wait_for_completion.
Args:
call_id: ID of the queued completion call
Returns:
Dict containing response and/or tool_calls
Raises:
TimeoutError: If timeout is exceeded
Exception: If completion fails
"""
import aiohttp
import time
start_time = time.time()
async with aiohttp.ClientSession() as session:
while True:
try:
# Check status
async with session.get(f"{self.base_url}/status/{call_id}") as response:
response.raise_for_status()
status_data = await response.json()
if status_data["status"] == "completed":
result = {}
if "response" in status_data and status_data["response"]:
result["response"] = status_data["response"]
if "tool_calls" in status_data and status_data["tool_calls"]:
result["tool_calls"] = status_data["tool_calls"]
return result
elif status_data["status"] == "failed":
error_msg = status_data.get("error", "Unknown error")
raise Exception(f"Completion failed: {error_msg}")
# Check timeout
if time.time() - start_time > self.timeout:
raise TimeoutError(f"Timeout waiting for human response after {self.timeout} seconds")
# Wait before checking again
await asyncio.sleep(1.0)
except Exception as e:
if time.time() - start_time > self.timeout:
raise TimeoutError(f"Timeout waiting for human response: {e}")
# Continue trying if we haven't timed out
await asyncio.sleep(1.0)
def _generate_response(self, messages: List[Dict[str, Any]], model: str) -> Dict[str, Any]:
"""Generate a human response for the given messages.
Args:
messages: Messages in OpenAI format
model: Model name
Returns:
Dict containing response and/or tool_calls
"""
# Queue the completion request
call_id = self._queue_completion(messages, model)
# Wait for human response
response = self._wait_for_completion(call_id)
return response
async def _async_generate_response(self, messages: List[Dict[str, Any]], model: str) -> Dict[str, Any]:
"""Async version of _generate_response.
Args:
messages: Messages in OpenAI format
model: Model name
Returns:
Dict containing response and/or tool_calls
"""
# Queue the completion request (sync operation)
call_id = self._queue_completion(messages, model)
# Wait for human response (async)
response = await self._async_wait_for_completion(call_id)
return response
def completion(self, *args, **kwargs) -> ModelResponse:
"""Synchronous completion method.
Returns:
ModelResponse with human-generated text or tool calls
"""
messages = kwargs.get('messages', [])
model = kwargs.get('model', 'human')
# Generate human response
human_response_data = self._generate_response(messages, model)
# Create ModelResponse with proper structure
from litellm.types.utils import ModelResponse, Choices, Message
import uuid
import time
# Create message content based on response type
if "tool_calls" in human_response_data and human_response_data["tool_calls"]:
# Tool calls response
message = Message(
role="assistant",
content=human_response_data.get("response", ""),
tool_calls=human_response_data["tool_calls"]
)
else:
# Text response
message = Message(
role="assistant",
content=human_response_data.get("response", "")
)
choice = Choices(
finish_reason="stop",
index=0,
message=message
)
result = ModelResponse(
id=f"human-{uuid.uuid4()}",
choices=[choice],
created=int(time.time()),
model=f"human/{model}",
object="chat.completion"
)
return result
async def acompletion(self, *args, **kwargs) -> ModelResponse:
"""Asynchronous completion method.
Returns:
ModelResponse with human-generated text or tool calls
"""
messages = kwargs.get('messages', [])
model = kwargs.get('model', 'human')
# Generate human response
human_response_data = await self._async_generate_response(messages, model)
# Create ModelResponse with proper structure
from litellm.types.utils import ModelResponse, Choices, Message
import uuid
import time
# Create message content based on response type
if "tool_calls" in human_response_data and human_response_data["tool_calls"]:
# Tool calls response
message = Message(
role="assistant",
content=human_response_data.get("response", ""),
tool_calls=human_response_data["tool_calls"]
)
else:
# Text response
message = Message(
role="assistant",
content=human_response_data.get("response", "")
)
choice = Choices(
finish_reason="stop",
index=0,
message=message
)
result = ModelResponse(
id=f"human-{uuid.uuid4()}",
choices=[choice],
created=int(time.time()),
model=f"human/{model}",
object="chat.completion"
)
return result
def streaming(self, *args, **kwargs) -> Iterator[GenericStreamingChunk]:
"""Synchronous streaming method.
Yields:
Streaming chunks with human-generated text or tool calls
"""
messages = kwargs.get('messages', [])
model = kwargs.get('model', 'human')
# Generate human response
human_response_data = self._generate_response(messages, model)
import time
# Handle tool calls vs text response
if "tool_calls" in human_response_data and human_response_data["tool_calls"]:
# Stream tool calls as a single chunk
generic_chunk: GenericStreamingChunk = {
"finish_reason": "tool_calls",
"index": 0,
"is_finished": True,
"text": human_response_data.get("response", ""),
"tool_use": human_response_data["tool_calls"],
"usage": {"completion_tokens": 1, "prompt_tokens": 0, "total_tokens": 1},
}
yield generic_chunk
else:
# Stream text response
response_text = human_response_data.get("response", "")
generic_chunk: GenericStreamingChunk = {
"finish_reason": "stop",
"index": 0,
"is_finished": True,
"text": response_text,
"tool_use": None,
"usage": {"completion_tokens": len(response_text.split()), "prompt_tokens": 0, "total_tokens": len(response_text.split())},
}
yield generic_chunk
async def astreaming(self, *args, **kwargs) -> AsyncIterator[GenericStreamingChunk]:
"""Asynchronous streaming method.
Yields:
Streaming chunks with human-generated text or tool calls
"""
messages = kwargs.get('messages', [])
model = kwargs.get('model', 'human')
# Generate human response
human_response = await self._async_generate_response(messages, model)
# Return as single streaming chunk
generic_streaming_chunk: GenericStreamingChunk = {
"finish_reason": "stop",
"index": 0,
"is_finished": True,
"text": human_response,
"tool_use": None,
"usage": {"completion_tokens": len(human_response.split()), "prompt_tokens": 0, "total_tokens": len(human_response.split())},
}
yield generic_streaming_chunk

View File

@@ -13,7 +13,10 @@ import json
import litellm
import litellm.utils
import inspect
from .adapters import HuggingFaceLocalAdapter
from .adapters import (
HuggingFaceLocalAdapter,
HumanAdapter,
)
from .callbacks import (
ImageRetentionCallback,
LoggingCallback,
@@ -215,8 +218,10 @@ class ComputerAgent:
hf_adapter = HuggingFaceLocalAdapter(
device="auto"
)
human_adapter = HumanAdapter()
litellm.custom_provider_map = [
{"provider": "huggingface-local", "custom_handler": hf_adapter}
{"provider": "huggingface-local", "custom_handler": hf_adapter},
{"provider": "human", "custom_handler": human_adapter}
]
litellm.suppress_debug_info = True

View File

@@ -0,0 +1,29 @@
"""
Human-in-the-Loop Completion Tool
This package provides a human-in-the-loop completion system that allows
AI agents to request human assistance for complex decisions or responses.
Components:
- server.py: FastAPI server with completion queue management
- ui.py: Gradio UI for human interaction
- __main__.py: Combined server and UI application
Usage:
# Run the server and UI
python -m agent.human_tool
# Or run components separately
python -m agent.human_tool.server # API server only
python -m agent.human_tool.ui # UI only
"""
from .server import CompletionQueue, completion_queue
from .ui import HumanCompletionUI, create_ui
__all__ = [
"CompletionQueue",
"completion_queue",
"HumanCompletionUI",
"create_ui"
]

View File

@@ -0,0 +1,38 @@
#!/usr/bin/env python3
"""
Human-in-the-Loop Completion Server and UI
This module combines the FastAPI server for handling completion requests
with a Gradio UI for human interaction.
"""
import gradio as gr
from fastapi import FastAPI
from .server import app as fastapi_app
from .ui import create_ui
# Create the Gradio demo
gradio_demo = create_ui()
# Mount Gradio on FastAPI
CUSTOM_PATH = "/gradio"
app = gr.mount_gradio_app(fastapi_app, gradio_demo, path=CUSTOM_PATH)
# Add a redirect from root to Gradio UI
@fastapi_app.get("/")
async def redirect_to_ui():
"""Redirect root to Gradio UI."""
return {
"message": "Human Completion Server is running",
"ui_url": "/gradio",
"api_docs": "/docs"
}
if __name__ == "__main__":
import uvicorn
print("🚀 Starting Human-in-the-Loop Completion Server...")
print("📊 API Server: http://localhost:8002")
print("🎨 Gradio UI: http://localhost:8002/gradio")
print("📚 API Docs: http://localhost:8002/docs")
uvicorn.run(app, host="0.0.0.0", port=8002)

View File

@@ -0,0 +1,234 @@
import asyncio
import uuid
from datetime import datetime
from typing import Dict, List, Any, Optional
from dataclasses import dataclass, asdict
from enum import Enum
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
class CompletionStatus(str, Enum):
PENDING = "pending"
COMPLETED = "completed"
FAILED = "failed"
@dataclass
class CompletionCall:
id: str
messages: List[Dict[str, Any]]
model: str
status: CompletionStatus
created_at: datetime
completed_at: Optional[datetime] = None
response: Optional[str] = None
tool_calls: Optional[List[Dict[str, Any]]] = None
error: Optional[str] = None
class ToolCall(BaseModel):
id: str
type: str = "function"
function: Dict[str, Any]
class CompletionRequest(BaseModel):
messages: List[Dict[str, Any]]
model: str
class CompletionResponse(BaseModel):
response: Optional[str] = None
tool_calls: Optional[List[Dict[str, Any]]] = None
class CompletionQueue:
def __init__(self):
self._queue: Dict[str, CompletionCall] = {}
self._pending_order: List[str] = []
self._lock = asyncio.Lock()
async def add_completion(self, messages: List[Dict[str, Any]], model: str) -> str:
"""Add a completion call to the queue."""
async with self._lock:
call_id = str(uuid.uuid4())
completion_call = CompletionCall(
id=call_id,
messages=messages,
model=model,
status=CompletionStatus.PENDING,
created_at=datetime.now()
)
self._queue[call_id] = completion_call
self._pending_order.append(call_id)
return call_id
async def get_pending_calls(self) -> List[Dict[str, Any]]:
"""Get all pending completion calls."""
async with self._lock:
pending_calls = []
for call_id in self._pending_order:
if call_id in self._queue and self._queue[call_id].status == CompletionStatus.PENDING:
call = self._queue[call_id]
pending_calls.append({
"id": call.id,
"model": call.model,
"created_at": call.created_at.isoformat(),
"messages": call.messages
})
return pending_calls
async def get_call_status(self, call_id: str) -> Optional[Dict[str, Any]]:
"""Get the status of a specific completion call."""
async with self._lock:
if call_id not in self._queue:
return None
call = self._queue[call_id]
result = {
"id": call.id,
"status": call.status.value,
"created_at": call.created_at.isoformat(),
"model": call.model,
"messages": call.messages
}
if call.completed_at:
result["completed_at"] = call.completed_at.isoformat()
if call.response:
result["response"] = call.response
if call.tool_calls:
result["tool_calls"] = call.tool_calls
if call.error:
result["error"] = call.error
return result
async def complete_call(self, call_id: str, response: Optional[str] = None, tool_calls: Optional[List[Dict[str, Any]]] = None) -> bool:
"""Mark a completion call as completed with a response or tool calls."""
async with self._lock:
if call_id not in self._queue:
return False
call = self._queue[call_id]
if call.status != CompletionStatus.PENDING:
return False
call.status = CompletionStatus.COMPLETED
call.completed_at = datetime.now()
call.response = response
call.tool_calls = tool_calls
# Remove from pending order
if call_id in self._pending_order:
self._pending_order.remove(call_id)
return True
async def fail_call(self, call_id: str, error: str) -> bool:
"""Mark a completion call as failed with an error."""
async with self._lock:
if call_id not in self._queue:
return False
call = self._queue[call_id]
if call.status != CompletionStatus.PENDING:
return False
call.status = CompletionStatus.FAILED
call.completed_at = datetime.now()
call.error = error
# Remove from pending order
if call_id in self._pending_order:
self._pending_order.remove(call_id)
return True
async def wait_for_completion(self, call_id: str, timeout: float = 300.0) -> Optional[str]:
"""Wait for a completion call to be completed and return the response."""
start_time = asyncio.get_event_loop().time()
while True:
status = await self.get_call_status(call_id)
if not status:
return None
if status["status"] == CompletionStatus.COMPLETED.value:
return status.get("response")
elif status["status"] == CompletionStatus.FAILED.value:
raise Exception(f"Completion failed: {status.get('error', 'Unknown error')}")
# Check timeout
if asyncio.get_event_loop().time() - start_time > timeout:
await self.fail_call(call_id, "Timeout waiting for human response")
raise TimeoutError("Timeout waiting for human response")
# Wait a bit before checking again
await asyncio.sleep(0.5)
# Global queue instance
completion_queue = CompletionQueue()
# FastAPI app
app = FastAPI(title="Human Completion Server", version="1.0.0")
@app.post("/queue", response_model=Dict[str, str])
async def queue_completion(request: CompletionRequest):
"""Add a completion request to the queue."""
call_id = await completion_queue.add_completion(request.messages, request.model)
return {"id": call_id, "status": "queued"}
@app.get("/pending")
async def list_pending():
"""List all pending completion calls."""
pending_calls = await completion_queue.get_pending_calls()
return {"pending_calls": pending_calls}
@app.get("/status/{call_id}")
async def get_status(call_id: str):
"""Get the status of a specific completion call."""
status = await completion_queue.get_call_status(call_id)
if not status:
raise HTTPException(status_code=404, detail="Completion call not found")
return status
@app.post("/complete/{call_id}")
async def complete_call(call_id: str, response: CompletionResponse):
"""Complete a call with a human response."""
success = await completion_queue.complete_call(
call_id,
response=response.response,
tool_calls=response.tool_calls
)
if success:
return {"status": "success", "message": "Call completed"}
else:
raise HTTPException(status_code=404, detail="Call not found or already completed")
@app.post("/fail/{call_id}")
async def fail_call(call_id: str, error: Dict[str, str]):
"""Mark a call as failed."""
success = await completion_queue.fail_call(call_id, error.get("error", "Unknown error"))
if not success:
raise HTTPException(status_code=404, detail="Completion call not found or already completed")
return {"status": "failed"}
@app.get("/")
async def root():
"""Root endpoint."""
return {"message": "Human Completion Server is running"}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8002)

View File

@@ -0,0 +1,630 @@
import gradio as gr
import json
import time
from typing import List, Dict, Any, Optional
from datetime import datetime
import requests
from .server import completion_queue
import base64
import io
from PIL import Image
class HumanCompletionUI:
def __init__(self, server_url: str = "http://localhost:8002"):
self.server_url = server_url
self.current_call_id: Optional[str] = None
self.refresh_interval = 2.0 # seconds
self.last_image = None # Store the last image for display
def format_messages_for_chatbot(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Format messages for display in gr.Chatbot with type='messages'."""
formatted = []
for msg in messages:
role = msg.get("role", "user")
content = msg.get("content", "")
tool_calls = msg.get("tool_calls", [])
# Handle different content formats
if isinstance(content, list):
# Multi-modal content - can include text and images
formatted_content = []
for item in content:
if item.get("type") == "text":
text = item.get("text", "")
if text.strip(): # Only add non-empty text
formatted_content.append(text)
elif item.get("type") == "image_url":
image_url = item.get("image_url", {}).get("url", "")
if image_url:
# Check if it's a base64 image or URL
if image_url.startswith("data:image"):
# For base64 images, decode and create gr.Image
try:
header, data = image_url.split(",", 1)
image_data = base64.b64decode(data)
image = Image.open(io.BytesIO(image_data))
formatted_content.append(gr.Image(value=image))
except Exception as e:
print(f"Error loading image: {e}")
formatted_content.append(f"[Image loading error: {e}]")
else:
# For URL images, create gr.Image with URL
formatted_content.append(gr.Image(value=image_url))
# Determine final content format
if len(formatted_content) == 1:
content = formatted_content[0]
elif len(formatted_content) > 1:
content = formatted_content
else:
content = "[Empty content]"
# Ensure role is valid for Gradio Chatbot
if role not in ["user", "assistant"]:
role = "assistant" if role == "system" else "user"
# Invert roles for better display in human UI context
# (what the AI says becomes "user", what human should respond becomes "assistant")
if role == "user":
role = "assistant"
else:
role = "user"
# Add the main message if it has content
if content and str(content).strip():
formatted.append({"role": role, "content": content})
# Handle tool calls - create separate messages for each tool call
if tool_calls:
for tool_call in tool_calls:
function_name = tool_call.get("function", {}).get("name", "unknown")
arguments_str = tool_call.get("function", {}).get("arguments", "{}")
try:
# Parse arguments to format them nicely
arguments = json.loads(arguments_str)
formatted_args = json.dumps(arguments, indent=2)
except json.JSONDecodeError:
# If parsing fails, use the raw string
formatted_args = arguments_str
# Create a formatted message for the tool call
tool_call_content = f"```json\n{formatted_args}\n```"
formatted.append({
"role": role,
"content": tool_call_content,
"metadata": {"title": f"🛠️ Used {function_name}"}
})
return formatted
def get_pending_calls(self) -> List[Dict[str, Any]]:
"""Get pending calls from the server."""
try:
response = requests.get(f"{self.server_url}/pending", timeout=5)
if response.status_code == 200:
return response.json().get("pending_calls", [])
except Exception as e:
print(f"Error fetching pending calls: {e}")
return []
def complete_call_with_response(self, call_id: str, response: str) -> bool:
"""Complete a call with a text response."""
try:
response_data = {"response": response}
response_obj = requests.post(
f"{self.server_url}/complete/{call_id}",
json=response_data,
timeout=10
)
response_obj.raise_for_status()
return True
except requests.RequestException as e:
print(f"Error completing call: {e}")
return False
def complete_call_with_tool_calls(self, call_id: str, tool_calls: List[Dict[str, Any]]) -> bool:
"""Complete a call with tool calls."""
try:
response_data = {"tool_calls": tool_calls}
response_obj = requests.post(
f"{self.server_url}/complete/{call_id}",
json=response_data,
timeout=10
)
response_obj.raise_for_status()
return True
except requests.RequestException as e:
print(f"Error completing call: {e}")
return False
def complete_call(self, call_id: str, response: Optional[str] = None, tool_calls: Optional[List[Dict[str, Any]]] = None) -> bool:
"""Complete a call with either a response or tool calls."""
try:
response_data = {}
if response:
response_data["response"] = response
if tool_calls:
response_data["tool_calls"] = tool_calls
response_obj = requests.post(
f"{self.server_url}/complete/{call_id}",
json=response_data,
timeout=10
)
response_obj.raise_for_status()
return True
except requests.RequestException as e:
print(f"Error completing call: {e}")
return False
def get_last_image_from_messages(self, messages: List[Dict[str, Any]]) -> Optional[Any]:
"""Extract the last image from the messages for display above conversation."""
last_image = None
for msg in reversed(messages): # Start from the last message
content = msg.get("content", "")
if isinstance(content, list):
for item in reversed(content): # Get the last image in the message
if item.get("type") == "image_url":
image_url = item.get("image_url", {}).get("url", "")
if image_url:
if image_url.startswith("data:image"):
# For base64 images, create a gr.Image component
try:
header, data = image_url.split(",", 1)
image_data = base64.b64decode(data)
image = Image.open(io.BytesIO(image_data))
return image
except Exception as e:
print(f"Error loading image: {e}")
continue
else:
# For URL images, return the URL
return image_url
return last_image
def refresh_pending_calls(self):
"""Refresh the list of pending calls."""
pending_calls = self.get_pending_calls()
if not pending_calls:
return (
gr.update(choices=["latest"], value="latest"), # dropdown
gr.update(value=None), # image (no image)
gr.update(value=[]), # chatbot (empty messages)
gr.update(interactive=False) # submit button
)
# Sort pending calls by created_at to get oldest first
sorted_calls = sorted(pending_calls, key=lambda x: x.get("created_at", ""))
# Create choices for dropdown
choices = [("latest", "latest")] # Add "latest" option first
for call in sorted_calls:
call_id = call["id"]
model = call.get("model", "unknown")
created_at = call.get("created_at", "")
# Format timestamp
try:
dt = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
time_str = dt.strftime("%H:%M:%S")
except:
time_str = created_at
choice_label = f"{call_id[:8]}... ({model}) - {time_str}"
choices.append((choice_label, call_id))
# Default to "latest" which shows the oldest pending conversation
selected_call_id = "latest"
if selected_call_id == "latest" and sorted_calls:
# Use the oldest call (first in sorted list)
selected_call = sorted_calls[0]
conversation = self.format_messages_for_chatbot(selected_call.get("messages", []))
self.current_call_id = selected_call["id"]
# Get the last image from messages
self.last_image = self.get_last_image_from_messages(selected_call.get("messages", []))
else:
conversation = []
self.current_call_id = None
self.last_image = None
return (
gr.update(choices=choices, value="latest"),
gr.update(value=self.last_image),
gr.update(value=conversation),
gr.update(interactive=bool(choices))
)
def on_call_selected(self, selected_choice):
"""Handle when a call is selected from the dropdown."""
if not selected_choice:
return (
gr.update(value=None), # no image
gr.update(value=[]), # empty chatbot
gr.update(interactive=False)
)
pending_calls = self.get_pending_calls()
if not pending_calls:
return (
gr.update(value=None), # no image
gr.update(value=[]), # empty chatbot
gr.update(interactive=False)
)
# Handle "latest" option
if selected_choice == "latest":
# Sort calls by created_at to get oldest first
sorted_calls = sorted(pending_calls, key=lambda x: x.get("created_at", ""))
selected_call = sorted_calls[0] # Get the oldest call
call_id = selected_call["id"]
else:
# Extract call_id from the choice for specific calls
call_id = None
for call in pending_calls:
call_id_short = call["id"][:8]
if call_id_short in selected_choice:
call_id = call["id"]
break
if not call_id:
return (
gr.update(value=None), # no image
gr.update(value=[]), # empty chatbot
gr.update(interactive=False)
)
# Find the selected call
selected_call = next((c for c in pending_calls if c["id"] == call_id), None)
if not selected_call:
return (
gr.update(value=None), # no image
gr.update(value=[]), # empty chatbot
gr.update(interactive=False)
)
conversation = self.format_messages_for_chatbot(selected_call.get("messages", []))
self.current_call_id = call_id
# Get the last image from messages
self.last_image = self.get_last_image_from_messages(selected_call.get("messages", []))
return (
gr.update(value=self.last_image),
gr.update(value=conversation),
gr.update(interactive=True)
)
def submit_response(self, response_text: str):
"""Submit a text response to the current call."""
if not self.current_call_id:
return (
gr.update(value=response_text), # keep response text
gr.update(value="❌ No call selected") # status
)
if not response_text.strip():
return (
gr.update(value=response_text), # keep response text
gr.update(value="❌ Response cannot be empty") # status
)
success = self.complete_call_with_response(self.current_call_id, response_text)
if success:
status_msg = "✅ Response submitted successfully!"
return (
gr.update(value=""), # clear response text
gr.update(value=status_msg) # status
)
else:
return (
gr.update(value=response_text), # keep response text
gr.update(value="❌ Failed to submit response") # status
)
def submit_action(self, action_type: str, **kwargs) -> str:
"""Submit a computer action as a tool call."""
if not self.current_call_id:
return "❌ No call selected"
import uuid
# Create tool call structure
action_data = {"type": action_type, **kwargs}
tool_call = {
"id": f"call_{uuid.uuid4().hex[:24]}",
"type": "function",
"function": {
"name": "computer",
"arguments": json.dumps(action_data)
}
}
success = self.complete_call_with_tool_calls(self.current_call_id, [tool_call])
if success:
return f"{action_type.capitalize()} action submitted as tool call"
else:
return f"❌ Failed to submit {action_type} action"
def submit_click_action(self, x: int, y: int, action_type: str = "click", button: str = "left") -> str:
"""Submit a coordinate-based action."""
if action_type == "click":
return self.submit_action(action_type, x=x, y=y, button=button)
else:
return self.submit_action(action_type, x=x, y=y)
def submit_type_action(self, text: str) -> str:
"""Submit a type action."""
return self.submit_action("type", text=text)
def submit_hotkey_action(self, keys: str) -> str:
"""Submit a hotkey action."""
return self.submit_action("keypress", keys=keys)
def submit_description_click(self, description: str, action_type: str = "click", button: str = "left") -> str:
"""Submit a description-based action."""
if action_type == "click":
return self.submit_action(action_type, element_description=description, button=button)
else:
return self.submit_action(action_type, element_description=description)
def wait_for_pending_calls(self, max_seconds: float = 10.0, check_interval: float = 0.2):
"""Wait for pending calls to appear or until max_seconds elapsed.
This method loops and checks for pending calls at regular intervals,
returning as soon as a pending call is found or the maximum wait time is reached.
Args:
max_seconds: Maximum number of seconds to wait
check_interval: How often to check for pending calls (in seconds)
"""
import time
start_time = time.time()
while time.time() - start_time < max_seconds:
# Check if there are any pending calls
pending_calls = self.get_pending_calls()
if pending_calls:
# Found pending calls, return immediately
return self.refresh_pending_calls()
# Wait before checking again
time.sleep(check_interval)
# Max wait time reached, return current state
return self.refresh_pending_calls()
def create_ui():
"""Create the Gradio interface."""
ui_handler = HumanCompletionUI()
with gr.Blocks(title="Human-in-the-Loop Agent Tool") as demo:
gr.Markdown("# 🤖 Human-in-the-Loop Agent Tool")
gr.Markdown("Review AI conversation requests and provide human responses.")
with gr.Row():
with gr.Column(scale=2):
with gr.Group():
screenshot_image = gr.Image(
label="Screenshot",
interactive=False,
height=600
)
# Action type selection for image clicks
with gr.Row():
action_type_radio = gr.Radio(
label="Action Type",
choices=["click", "double_click", "move", "left_mouse_up", "left_mouse_down"],
value="click",
scale=2
)
action_button_radio = gr.Radio(
label="Button (for click only)",
choices=["left", "right", "wheel", "back", "forward"],
value="left",
visible=True,
scale=1
)
conversation_chatbot = gr.Chatbot(
label="Messages",
type="messages",
height=500,
show_copy_button=True
)
with gr.Column(scale=1):
with gr.Group():
call_dropdown = gr.Dropdown(
label="Select a pending call",
choices=["latest"],
interactive=True,
value="latest"
)
refresh_btn = gr.Button("🔄 Refresh", variant="secondary")
with gr.Group():
response_text = gr.Textbox(
label="Response",
lines=3,
placeholder="Enter your response here..."
)
submit_btn = gr.Button("📤 Submit Response", variant="primary", interactive=False)
# Action Accordions
with gr.Accordion("🖱️ Click Actions", open=False):
with gr.Group():
with gr.Row():
click_x = gr.Number(label="X", value=0, minimum=0)
click_y = gr.Number(label="Y", value=0, minimum=0)
with gr.Row():
click_action_type = gr.Dropdown(
label="Action Type",
choices=["click", "double_click", "move", "left_mouse_up", "left_mouse_down"],
value="click"
)
click_button = gr.Dropdown(
label="Button (for click only)",
choices=["left", "right", "wheel", "back", "forward"],
value="left"
)
click_submit_btn = gr.Button("Submit Action")
with gr.Accordion("📝 Type Action", open=False):
with gr.Group():
type_text = gr.Textbox(
label="Text to Type",
placeholder="Enter text to type..."
)
type_submit_btn = gr.Button("Submit Type")
with gr.Accordion("⌨️ Keypress Action", open=False):
with gr.Group():
keypress_text = gr.Textbox(
label="Keys",
placeholder="e.g., ctrl+c, alt+tab"
)
keypress_submit_btn = gr.Button("Submit Keypress")
with gr.Accordion("🎯 Description Action", open=False):
with gr.Group():
description_text = gr.Textbox(
label="Element Description",
placeholder="e.g., 'Privacy and security option in left sidebar'"
)
with gr.Row():
description_action_type = gr.Dropdown(
label="Action Type",
choices=["click", "double_click", "move", "left_mouse_up", "left_mouse_down"],
value="click"
)
description_button = gr.Radio(
label="Button (for click only)",
choices=["left", "right", "wheel", "back", "forward"],
value="left"
)
description_submit_btn = gr.Button("Submit Description Action")
status_display = gr.Textbox(
label="Status",
interactive=False,
value="Ready to receive calls..."
)
# Event handlers
refresh_btn.click(
fn=ui_handler.refresh_pending_calls,
outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn]
)
call_dropdown.change(
fn=ui_handler.on_call_selected,
inputs=[call_dropdown],
outputs=[screenshot_image, conversation_chatbot, submit_btn]
)
def handle_image_click(evt: gr.SelectData):
if evt.index is not None:
x, y = evt.index
action_type = action_type_radio.value or "click"
button = action_button_radio.value or "left"
result = ui_handler.submit_click_action(x, y, action_type, button)
ui_handler.wait_for_pending_calls()
return result
return "No coordinates selected"
screenshot_image.select(
fn=handle_image_click,
outputs=[status_display]
).then(
fn=ui_handler.wait_for_pending_calls,
outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn]
)
# Response submission
submit_btn.click(
fn=ui_handler.submit_response,
inputs=[response_text],
outputs=[response_text, status_display]
).then(
fn=ui_handler.refresh_pending_calls,
outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn]
)
# Toggle button radio visibility based on action type
def toggle_button_visibility(action_type):
return gr.update(visible=(action_type == "click"))
action_type_radio.change(
fn=toggle_button_visibility,
inputs=[action_type_radio],
outputs=[action_button_radio]
)
# Action accordion handlers
click_submit_btn.click(
fn=ui_handler.submit_click_action,
inputs=[click_x, click_y, click_action_type, click_button],
outputs=[status_display]
).then(
fn=ui_handler.wait_for_pending_calls,
outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn]
)
type_submit_btn.click(
fn=ui_handler.submit_type_action,
inputs=[type_text],
outputs=[status_display]
).then(
fn=ui_handler.wait_for_pending_calls,
outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn]
)
keypress_submit_btn.click(
fn=ui_handler.submit_hotkey_action,
inputs=[keypress_text],
outputs=[status_display]
).then(
fn=ui_handler.wait_for_pending_calls,
outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn]
)
def handle_description_submit(description, action_type, button):
if description:
result = ui_handler.submit_description_click(description, action_type, button)
ui_handler.wait_for_pending_calls()
return result
return "Please enter a description"
description_submit_btn.click(
fn=handle_description_submit,
inputs=[description_text, description_action_type, description_button],
outputs=[status_display]
).then(
fn=ui_handler.wait_for_pending_calls,
outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn]
)
# Load initial data
demo.load(
fn=ui_handler.refresh_pending_calls,
outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn]
)
return demo
if __name__ == "__main__":
demo = create_ui()
demo.queue()
demo.launch(server_name="0.0.0.0", server_port=7860)