Add health check and graceful shutdown support

- Implemented health check endpoint for Docker and orchestration systems.
- Added graceful shutdown configuration in Docker Compose and application code.
- Enhanced shutdown handling in main application and background tasks for improved diagnostics.
- Updated Dockerfile to include health check command.
- Introduced readiness check endpoint for Kubernetes-style orchestration.
This commit is contained in:
Admin9705
2025-06-22 20:39:19 -04:00
parent 8c3533991b
commit 0fee673acb
5 changed files with 173 additions and 11 deletions

View File

@@ -30,5 +30,9 @@ ENV TZ=UTC
# Expose port
EXPOSE 9705
# Add health check for Docker
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
CMD curl -f http://localhost:9705/health || exit 1
# Run the main application using the new entry point
CMD ["python3", "main.py"]

View File

@@ -12,6 +12,16 @@ services:
- TZ=${TZ:-UTC}
- BASE_URL=${BASE_URL:-}
restart: unless-stopped
# Graceful shutdown configuration
stop_signal: SIGTERM
stop_grace_period: 30s
# Health check configuration
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9705/health"]
interval: 30s
timeout: 10s
start_period: 40s
retries: 3
volumes:
huntarr-config:

48
main.py
View File

@@ -11,6 +11,7 @@ import signal
import logging # Use standard logging for initial setup
import atexit
import time
import time
# Import path configuration early to set up environment
try:
@@ -144,6 +145,14 @@ except Exception as e:
waitress_server = None
shutdown_requested = threading.Event()
# Global shutdown flag for health checks
_global_shutdown_flag = False
def is_shutting_down():
"""Check if the application is shutting down"""
global _global_shutdown_flag
return _global_shutdown_flag or shutdown_requested.is_set() or stop_event.is_set()
def refresh_sponsors_on_startup():
"""Refresh sponsors database from manifest.json on startup"""
import os
@@ -317,7 +326,15 @@ def run_web_server():
def main_shutdown_handler(signum, frame):
"""Gracefully shut down the application."""
huntarr_logger.info(f"Received signal {signum}. Initiating graceful shutdown...")
global _global_shutdown_flag
_global_shutdown_flag = True # Set global shutdown flag immediately
signal_name = "SIGINT" if signum == signal.SIGINT else "SIGTERM" if signum == signal.SIGTERM else f"Signal {signum}"
huntarr_logger.info(f"Received {signal_name}. Initiating graceful shutdown...")
# Set a reasonable timeout for shutdown operations
shutdown_start_time = time.time()
shutdown_timeout = 30 # 30 seconds total shutdown timeout
# Immediate database checkpoint to prevent corruption
try:
@@ -360,12 +377,19 @@ def main_shutdown_handler(signum, frame):
waitress_server.close()
except Exception as e:
huntarr_logger.warning(f"Error closing Waitress server: {e}")
# Force exit if shutdown takes too long (Docker container update scenario)
elapsed_time = time.time() - shutdown_start_time
if elapsed_time > shutdown_timeout:
huntarr_logger.warning(f"Shutdown timeout exceeded ({shutdown_timeout}s). Forcing exit with code 0.")
os._exit(0) # Clean exit for Docker updates
def cleanup_handler():
"""Cleanup function called at exit"""
cleanup_start_time = time.time()
huntarr_logger.info("Exit cleanup handler called")
# Shutdown databases gracefully
# Shutdown databases gracefully with timeout
try:
from primary.utils.database import get_database, get_logs_database
@@ -377,7 +401,8 @@ def cleanup_handler():
try:
with main_db.get_connection() as conn:
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)") # Flush WAL to main database
conn.execute("VACUUM") # Optimize database before shutdown
# Skip VACUUM for faster shutdown during updates
huntarr_logger.debug("Main database WAL checkpoint completed")
except Exception as db_error:
huntarr_logger.warning(f"Error during main database cleanup: {db_error}")
@@ -388,7 +413,8 @@ def cleanup_handler():
try:
with logs_db.get_logs_connection() as conn:
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)") # Flush WAL to logs database
conn.execute("VACUUM") # Optimize logs database before shutdown
# Skip VACUUM for faster shutdown during updates
huntarr_logger.debug("Logs database WAL checkpoint completed")
except Exception as logs_error:
huntarr_logger.warning(f"Error during logs database cleanup: {logs_error}")
@@ -397,10 +423,15 @@ def cleanup_handler():
except Exception as e:
huntarr_logger.warning(f"Error during database shutdown: {e}")
# Ensure stop events are set
if not stop_event.is_set():
stop_event.set()
if not shutdown_requested.is_set():
shutdown_requested.set()
# Log cleanup timing for Docker update diagnostics
cleanup_duration = time.time() - cleanup_start_time
huntarr_logger.info(f"Cleanup completed in {cleanup_duration:.2f} seconds")
def main():
"""Main entry point function for Huntarr application.
@@ -511,7 +542,14 @@ def main():
# shutdown_threads() # Uncomment if primary.main.shutdown_threads() does more cleanup
huntarr_logger.info("--- Huntarr Main Process Exiting ---")
return 0 # Success exit code
# Return appropriate exit code based on shutdown reason
if shutdown_requested.is_set() or stop_event.is_set():
huntarr_logger.info("Clean shutdown completed - Exit code 0")
return 0 # Clean shutdown
else:
huntarr_logger.warning("Unexpected shutdown - Exit code 1")
return 1 # Unexpected shutdown
if __name__ == '__main__':

View File

@@ -651,11 +651,17 @@ def check_and_restart_threads():
def shutdown_handler(signum, frame):
"""Handle termination signals (SIGINT, SIGTERM)."""
logger.info(f"Received signal {signum}. Initiating shutdown...")
signal_name = "SIGINT" if signum == signal.SIGINT else "SIGTERM" if signum == signal.SIGTERM else f"Signal {signum}"
logger.info(f"Received {signal_name}. Initiating background tasks shutdown...")
stop_event.set() # Signal all threads to stop
# Log shutdown progress for Docker diagnostics
logger.info("Background shutdown initiated - threads will stop gracefully")
def shutdown_threads():
"""Wait for all threads to finish."""
import time
shutdown_start = time.time()
logger.info("Waiting for all app threads to stop...")
# Stop the hourly API cap scheduler
@@ -688,12 +694,22 @@ def shutdown_threads():
except Exception as e:
logger.error(f"Error stopping schedule action engine: {e}")
# Wait for all threads to terminate
for thread in app_threads.values():
if thread.is_alive():
thread.join(timeout=10.0)
# Wait for all app threads to terminate
active_threads = [name for name, thread in app_threads.items() if thread.is_alive()]
if active_threads:
logger.info(f"Waiting for {len(active_threads)} app threads to stop: {', '.join(active_threads)}")
for name, thread in app_threads.items():
if thread.is_alive():
logger.debug(f"Waiting for {name} thread to stop...")
thread.join(timeout=10.0)
if thread.is_alive():
logger.warning(f"{name} thread did not stop gracefully within 10 seconds")
else:
logger.debug(f"{name} thread stopped successfully")
logger.info("All app threads stopped.")
shutdown_duration = time.time() - shutdown_start
logger.info(f"All app threads stopped. Shutdown completed in {shutdown_duration:.2f} seconds")
def hourly_cap_scheduler_loop():
"""Main loop for the hourly API cap scheduler thread

View File

@@ -44,6 +44,100 @@ def logo_files(filename):
# --- API Routes --- #
@common_bp.route('/health', methods=['GET'])
def health_check():
"""Health check endpoint for Docker and orchestration systems"""
try:
# Check if shutdown is in progress using multiple methods
from src.primary.background import stop_event
# Also check the global shutdown flag from main.py
try:
import main
is_shutting_down = main.is_shutting_down()
except:
is_shutting_down = stop_event.is_set()
if is_shutting_down:
return jsonify({
"status": "shutting_down",
"message": "Application is shutting down",
"ready": False
}), 503 # Service Unavailable
# Basic database connectivity check
from src.primary.utils.database import get_database
db = get_database()
# Quick database health check
with db.get_connection() as conn:
conn.execute("SELECT 1")
return jsonify({
"status": "healthy",
"message": "Application is running normally",
"ready": True,
"timestamp": datetime.utcnow().isoformat()
}), 200
except Exception as e:
logger.error(f"Health check failed: {e}")
return jsonify({
"status": "unhealthy",
"message": f"Health check failed: {str(e)}",
"ready": False
}), 503 # Service Unavailable
@common_bp.route('/ready', methods=['GET'])
def readiness_check():
"""Readiness check endpoint for Kubernetes-style orchestration"""
try:
# Check if the application is ready to serve traffic
from src.primary.background import stop_event
# Also check the global shutdown flag from main.py
try:
import main
is_shutting_down = main.is_shutting_down()
except:
is_shutting_down = stop_event.is_set()
if is_shutting_down:
return jsonify({
"ready": False,
"message": "Application is shutting down"
}), 503
# Check if setup is complete
from src.primary.utils.database import get_database
db = get_database()
if db.is_setup_in_progress():
return jsonify({
"ready": False,
"message": "Application setup in progress"
}), 503
# Check if user exists (setup complete)
from ..auth import user_exists
if not user_exists():
return jsonify({
"ready": False,
"message": "Application requires initial setup"
}), 503
return jsonify({
"ready": True,
"message": "Application is ready to serve traffic"
}), 200
except Exception as e:
logger.error(f"Readiness check failed: {e}")
return jsonify({
"ready": False,
"message": f"Readiness check failed: {str(e)}"
}), 503
@common_bp.route('/api/sleep.json', methods=['GET'])
def api_get_sleep_json():
"""API endpoint to serve sleep/cycle data from the database for frontend access"""