mirror of
https://github.com/plexguide/Huntarr-Sonarr.git
synced 2025-12-16 20:04:16 -06:00
Add health check and graceful shutdown support
- Implemented health check endpoint for Docker and orchestration systems. - Added graceful shutdown configuration in Docker Compose and application code. - Enhanced shutdown handling in main application and background tasks for improved diagnostics. - Updated Dockerfile to include health check command. - Introduced readiness check endpoint for Kubernetes-style orchestration.
This commit is contained in:
@@ -30,5 +30,9 @@ ENV TZ=UTC
|
||||
# Expose port
|
||||
EXPOSE 9705
|
||||
|
||||
# Add health check for Docker
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
|
||||
CMD curl -f http://localhost:9705/health || exit 1
|
||||
|
||||
# Run the main application using the new entry point
|
||||
CMD ["python3", "main.py"]
|
||||
@@ -12,6 +12,16 @@ services:
|
||||
- TZ=${TZ:-UTC}
|
||||
- BASE_URL=${BASE_URL:-}
|
||||
restart: unless-stopped
|
||||
# Graceful shutdown configuration
|
||||
stop_signal: SIGTERM
|
||||
stop_grace_period: 30s
|
||||
# Health check configuration
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9705/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
start_period: 40s
|
||||
retries: 3
|
||||
|
||||
volumes:
|
||||
huntarr-config:
|
||||
|
||||
48
main.py
48
main.py
@@ -11,6 +11,7 @@ import signal
|
||||
import logging # Use standard logging for initial setup
|
||||
import atexit
|
||||
import time
|
||||
import time
|
||||
|
||||
# Import path configuration early to set up environment
|
||||
try:
|
||||
@@ -144,6 +145,14 @@ except Exception as e:
|
||||
waitress_server = None
|
||||
shutdown_requested = threading.Event()
|
||||
|
||||
# Global shutdown flag for health checks
|
||||
_global_shutdown_flag = False
|
||||
|
||||
def is_shutting_down():
|
||||
"""Check if the application is shutting down"""
|
||||
global _global_shutdown_flag
|
||||
return _global_shutdown_flag or shutdown_requested.is_set() or stop_event.is_set()
|
||||
|
||||
def refresh_sponsors_on_startup():
|
||||
"""Refresh sponsors database from manifest.json on startup"""
|
||||
import os
|
||||
@@ -317,7 +326,15 @@ def run_web_server():
|
||||
|
||||
def main_shutdown_handler(signum, frame):
|
||||
"""Gracefully shut down the application."""
|
||||
huntarr_logger.info(f"Received signal {signum}. Initiating graceful shutdown...")
|
||||
global _global_shutdown_flag
|
||||
_global_shutdown_flag = True # Set global shutdown flag immediately
|
||||
|
||||
signal_name = "SIGINT" if signum == signal.SIGINT else "SIGTERM" if signum == signal.SIGTERM else f"Signal {signum}"
|
||||
huntarr_logger.info(f"Received {signal_name}. Initiating graceful shutdown...")
|
||||
|
||||
# Set a reasonable timeout for shutdown operations
|
||||
shutdown_start_time = time.time()
|
||||
shutdown_timeout = 30 # 30 seconds total shutdown timeout
|
||||
|
||||
# Immediate database checkpoint to prevent corruption
|
||||
try:
|
||||
@@ -360,12 +377,19 @@ def main_shutdown_handler(signum, frame):
|
||||
waitress_server.close()
|
||||
except Exception as e:
|
||||
huntarr_logger.warning(f"Error closing Waitress server: {e}")
|
||||
|
||||
# Force exit if shutdown takes too long (Docker container update scenario)
|
||||
elapsed_time = time.time() - shutdown_start_time
|
||||
if elapsed_time > shutdown_timeout:
|
||||
huntarr_logger.warning(f"Shutdown timeout exceeded ({shutdown_timeout}s). Forcing exit with code 0.")
|
||||
os._exit(0) # Clean exit for Docker updates
|
||||
|
||||
def cleanup_handler():
|
||||
"""Cleanup function called at exit"""
|
||||
cleanup_start_time = time.time()
|
||||
huntarr_logger.info("Exit cleanup handler called")
|
||||
|
||||
# Shutdown databases gracefully
|
||||
# Shutdown databases gracefully with timeout
|
||||
try:
|
||||
from primary.utils.database import get_database, get_logs_database
|
||||
|
||||
@@ -377,7 +401,8 @@ def cleanup_handler():
|
||||
try:
|
||||
with main_db.get_connection() as conn:
|
||||
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)") # Flush WAL to main database
|
||||
conn.execute("VACUUM") # Optimize database before shutdown
|
||||
# Skip VACUUM for faster shutdown during updates
|
||||
huntarr_logger.debug("Main database WAL checkpoint completed")
|
||||
except Exception as db_error:
|
||||
huntarr_logger.warning(f"Error during main database cleanup: {db_error}")
|
||||
|
||||
@@ -388,7 +413,8 @@ def cleanup_handler():
|
||||
try:
|
||||
with logs_db.get_logs_connection() as conn:
|
||||
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)") # Flush WAL to logs database
|
||||
conn.execute("VACUUM") # Optimize logs database before shutdown
|
||||
# Skip VACUUM for faster shutdown during updates
|
||||
huntarr_logger.debug("Logs database WAL checkpoint completed")
|
||||
except Exception as logs_error:
|
||||
huntarr_logger.warning(f"Error during logs database cleanup: {logs_error}")
|
||||
|
||||
@@ -397,10 +423,15 @@ def cleanup_handler():
|
||||
except Exception as e:
|
||||
huntarr_logger.warning(f"Error during database shutdown: {e}")
|
||||
|
||||
# Ensure stop events are set
|
||||
if not stop_event.is_set():
|
||||
stop_event.set()
|
||||
if not shutdown_requested.is_set():
|
||||
shutdown_requested.set()
|
||||
|
||||
# Log cleanup timing for Docker update diagnostics
|
||||
cleanup_duration = time.time() - cleanup_start_time
|
||||
huntarr_logger.info(f"Cleanup completed in {cleanup_duration:.2f} seconds")
|
||||
|
||||
def main():
|
||||
"""Main entry point function for Huntarr application.
|
||||
@@ -511,7 +542,14 @@ def main():
|
||||
# shutdown_threads() # Uncomment if primary.main.shutdown_threads() does more cleanup
|
||||
|
||||
huntarr_logger.info("--- Huntarr Main Process Exiting ---")
|
||||
return 0 # Success exit code
|
||||
|
||||
# Return appropriate exit code based on shutdown reason
|
||||
if shutdown_requested.is_set() or stop_event.is_set():
|
||||
huntarr_logger.info("Clean shutdown completed - Exit code 0")
|
||||
return 0 # Clean shutdown
|
||||
else:
|
||||
huntarr_logger.warning("Unexpected shutdown - Exit code 1")
|
||||
return 1 # Unexpected shutdown
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -651,11 +651,17 @@ def check_and_restart_threads():
|
||||
|
||||
def shutdown_handler(signum, frame):
|
||||
"""Handle termination signals (SIGINT, SIGTERM)."""
|
||||
logger.info(f"Received signal {signum}. Initiating shutdown...")
|
||||
signal_name = "SIGINT" if signum == signal.SIGINT else "SIGTERM" if signum == signal.SIGTERM else f"Signal {signum}"
|
||||
logger.info(f"Received {signal_name}. Initiating background tasks shutdown...")
|
||||
stop_event.set() # Signal all threads to stop
|
||||
|
||||
# Log shutdown progress for Docker diagnostics
|
||||
logger.info("Background shutdown initiated - threads will stop gracefully")
|
||||
|
||||
def shutdown_threads():
|
||||
"""Wait for all threads to finish."""
|
||||
import time
|
||||
shutdown_start = time.time()
|
||||
logger.info("Waiting for all app threads to stop...")
|
||||
|
||||
# Stop the hourly API cap scheduler
|
||||
@@ -688,12 +694,22 @@ def shutdown_threads():
|
||||
except Exception as e:
|
||||
logger.error(f"Error stopping schedule action engine: {e}")
|
||||
|
||||
# Wait for all threads to terminate
|
||||
for thread in app_threads.values():
|
||||
if thread.is_alive():
|
||||
thread.join(timeout=10.0)
|
||||
# Wait for all app threads to terminate
|
||||
active_threads = [name for name, thread in app_threads.items() if thread.is_alive()]
|
||||
if active_threads:
|
||||
logger.info(f"Waiting for {len(active_threads)} app threads to stop: {', '.join(active_threads)}")
|
||||
|
||||
for name, thread in app_threads.items():
|
||||
if thread.is_alive():
|
||||
logger.debug(f"Waiting for {name} thread to stop...")
|
||||
thread.join(timeout=10.0)
|
||||
if thread.is_alive():
|
||||
logger.warning(f"{name} thread did not stop gracefully within 10 seconds")
|
||||
else:
|
||||
logger.debug(f"{name} thread stopped successfully")
|
||||
|
||||
logger.info("All app threads stopped.")
|
||||
shutdown_duration = time.time() - shutdown_start
|
||||
logger.info(f"All app threads stopped. Shutdown completed in {shutdown_duration:.2f} seconds")
|
||||
|
||||
def hourly_cap_scheduler_loop():
|
||||
"""Main loop for the hourly API cap scheduler thread
|
||||
|
||||
@@ -44,6 +44,100 @@ def logo_files(filename):
|
||||
|
||||
# --- API Routes --- #
|
||||
|
||||
@common_bp.route('/health', methods=['GET'])
|
||||
def health_check():
|
||||
"""Health check endpoint for Docker and orchestration systems"""
|
||||
try:
|
||||
# Check if shutdown is in progress using multiple methods
|
||||
from src.primary.background import stop_event
|
||||
|
||||
# Also check the global shutdown flag from main.py
|
||||
try:
|
||||
import main
|
||||
is_shutting_down = main.is_shutting_down()
|
||||
except:
|
||||
is_shutting_down = stop_event.is_set()
|
||||
|
||||
if is_shutting_down:
|
||||
return jsonify({
|
||||
"status": "shutting_down",
|
||||
"message": "Application is shutting down",
|
||||
"ready": False
|
||||
}), 503 # Service Unavailable
|
||||
|
||||
# Basic database connectivity check
|
||||
from src.primary.utils.database import get_database
|
||||
db = get_database()
|
||||
|
||||
# Quick database health check
|
||||
with db.get_connection() as conn:
|
||||
conn.execute("SELECT 1")
|
||||
|
||||
return jsonify({
|
||||
"status": "healthy",
|
||||
"message": "Application is running normally",
|
||||
"ready": True,
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}), 200
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Health check failed: {e}")
|
||||
return jsonify({
|
||||
"status": "unhealthy",
|
||||
"message": f"Health check failed: {str(e)}",
|
||||
"ready": False
|
||||
}), 503 # Service Unavailable
|
||||
|
||||
@common_bp.route('/ready', methods=['GET'])
|
||||
def readiness_check():
|
||||
"""Readiness check endpoint for Kubernetes-style orchestration"""
|
||||
try:
|
||||
# Check if the application is ready to serve traffic
|
||||
from src.primary.background import stop_event
|
||||
|
||||
# Also check the global shutdown flag from main.py
|
||||
try:
|
||||
import main
|
||||
is_shutting_down = main.is_shutting_down()
|
||||
except:
|
||||
is_shutting_down = stop_event.is_set()
|
||||
|
||||
if is_shutting_down:
|
||||
return jsonify({
|
||||
"ready": False,
|
||||
"message": "Application is shutting down"
|
||||
}), 503
|
||||
|
||||
# Check if setup is complete
|
||||
from src.primary.utils.database import get_database
|
||||
db = get_database()
|
||||
|
||||
if db.is_setup_in_progress():
|
||||
return jsonify({
|
||||
"ready": False,
|
||||
"message": "Application setup in progress"
|
||||
}), 503
|
||||
|
||||
# Check if user exists (setup complete)
|
||||
from ..auth import user_exists
|
||||
if not user_exists():
|
||||
return jsonify({
|
||||
"ready": False,
|
||||
"message": "Application requires initial setup"
|
||||
}), 503
|
||||
|
||||
return jsonify({
|
||||
"ready": True,
|
||||
"message": "Application is ready to serve traffic"
|
||||
}), 200
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Readiness check failed: {e}")
|
||||
return jsonify({
|
||||
"ready": False,
|
||||
"message": f"Readiness check failed: {str(e)}"
|
||||
}), 503
|
||||
|
||||
@common_bp.route('/api/sleep.json', methods=['GET'])
|
||||
def api_get_sleep_json():
|
||||
"""API endpoint to serve sleep/cycle data from the database for frontend access"""
|
||||
|
||||
Reference in New Issue
Block a user