Fixed watchdog test

This commit is contained in:
Dillon DuPont
2025-07-01 13:18:21 -04:00
parent 62832f7bea
commit 46ac7e3017
2 changed files with 39 additions and 13 deletions

View File

@@ -13,10 +13,16 @@ import argparse
import sys
async def test_connection(host="localhost", port=8000, keep_alive=False):
async def test_connection(host="localhost", port=8000, keep_alive=False, container_name=None):
"""Test connection to the Computer Server."""
uri = f"ws://{host}:{port}/ws"
print(f"Connecting to {uri}...")
if container_name:
# Container mode: use WSS with container domain and port 8443
uri = f"wss://{container_name}.containers.cloud.trycua.com:8443/ws"
print(f"Connecting to container {container_name} at {uri}...")
else:
# Local mode: use WS with specified host and port
uri = f"ws://{host}:{port}/ws"
print(f"Connecting to local server at {uri}...")
try:
async with websockets.connect(uri) as websocket:
@@ -54,13 +60,23 @@ def parse_args():
parser = argparse.ArgumentParser(description="Test connection to Computer Server")
parser.add_argument("--host", default="localhost", help="Host address (default: localhost)")
parser.add_argument("--port", type=int, default=8000, help="Port number (default: 8000)")
parser.add_argument("--container-name", help="Container name for cloud connection (uses WSS and port 8443)")
parser.add_argument("--keep-alive", action="store_true", help="Keep connection alive")
return parser.parse_args()
async def main():
args = parse_args()
success = await test_connection(args.host, args.port, args.keep_alive)
# Convert hyphenated argument to underscore for function parameter
container_name = getattr(args, 'container_name', None)
success = await test_connection(
host=args.host,
port=args.port,
keep_alive=args.keep_alive,
container_name=container_name
)
return 0 if success else 1

View File

@@ -49,7 +49,7 @@ async def computer():
await computer.disconnect()
@pytest.mark.asyncio
@pytest.mark.asyncio(loop_scope="session")
async def test_simple_server_ping(computer):
"""
Simple test to verify server connectivity before running watchdog tests.
@@ -66,7 +66,7 @@ async def test_simple_server_ping(computer):
pytest.fail(f"Basic server connectivity test failed: {e}")
@pytest.mark.asyncio
@pytest.mark.asyncio(loop_scope="session")
async def test_watchdog_recovery_after_hanging_command(computer):
"""
Test that the watchdog can recover the server after a hanging command.
@@ -80,15 +80,16 @@ async def test_watchdog_recovery_after_hanging_command(computer):
print("Starting watchdog recovery test...")
async def hanging_command():
"""Execute a command that takes 5 minutes to complete."""
"""Execute a command that sleeps forever to hang the server."""
try:
print("Starting hanging command (sleep 300)...")
result = await computer.interface.run_command("sleep 300")
print(f"Hanging command completed: {result}")
return result
print("Starting hanging command (sleep infinity)...")
# Use a very long sleep that should never complete naturally
result = await computer.interface.run_command("sleep 999999")
print(f"Hanging command completed unexpectedly: {result}")
return True # Should never reach here if watchdog works
except Exception as e:
print(f"Hanging command failed (expected if watchdog restarts): {e}")
return None
print(f"Hanging command interrupted (expected if watchdog restarts): {e}")
return None # Expected result when watchdog kills the process
async def ping_server():
"""Ping the server every 30 seconds with echo commands."""
@@ -183,13 +184,22 @@ async def test_watchdog_recovery_after_hanging_command(computer):
# Test passes if we had some successful pings, indicating recovery
assert successful_pings > 0, f"No successful pings detected. Server may not have recovered."
# Check if hanging command was killed (indicating watchdog restart)
if hanging_result is None:
print("✅ SUCCESS: Hanging command was killed - watchdog restart detected")
elif hanging_result is True:
print("⚠️ WARNING: Hanging command completed naturally - watchdog may not have restarted")
# If we had failures followed by successes, that indicates watchdog recovery
if failed_pings > 0 and successful_pings > 0:
print("✅ SUCCESS: Watchdog recovery detected - server became unresponsive then recovered")
# Additional check: hanging command should be None if watchdog worked
assert hanging_result is None, "Expected hanging command to be killed by watchdog restart"
elif successful_pings > 0 and failed_pings == 0:
print("✅ SUCCESS: Server remained responsive throughout test")
print(f"Test completed: {successful_pings} successful pings, {failed_pings} failed pings")
print(f"Hanging command result: {hanging_result} (None = killed by watchdog, True = completed naturally)")
else:
pytest.fail("Ping task did not complete - unable to assess server recovery")