From 2c5a35b6c2ca9d1d7392ac70168547fcbe6dc4dd Mon Sep 17 00:00:00 2001 From: Muhammad Ibrahim Date: Fri, 24 Oct 2025 21:25:15 +0100 Subject: [PATCH] Added Diagnostics scripts and improved setup with more redis db server handling --- setup.sh | 787 ++++++++++++++++++++++++++++++++++------ tools/diagnostics.sh | 715 ++++++++++++++++++++++++++++++++++++ tools/fix-migrations.sh | 286 +++++++++++++++ 3 files changed, 1686 insertions(+), 102 deletions(-) create mode 100755 tools/diagnostics.sh create mode 100755 tools/fix-migrations.sh diff --git a/setup.sh b/setup.sh index 00f590b..d8fa2f8 100755 --- a/setup.sh +++ b/setup.sh @@ -707,6 +707,10 @@ configure_redis() { chown redis:redis /etc/redis/users.acl chmod 640 /etc/redis/users.acl print_status "Created Redis ACL file" + else + # Backup existing ACL file + cp /etc/redis/users.acl /etc/redis/users.acl.backup.$(date +%Y%m%d_%H%M%S) 2>/dev/null || true + print_info "Backed up existing ACL file" fi # Configure ACL file in redis.conf @@ -727,8 +731,14 @@ configure_redis() { print_status "Removed user definitions from redis.conf" fi - # Create admin user in ACL file if it doesn't exist - if ! grep -q "^user admin" /etc/redis/users.acl; then + # Create or update admin user in ACL file + if grep -q "^user admin" /etc/redis/users.acl; then + print_info "Admin user already exists in ACL, updating password..." + # Remove old admin line and add new one + sed -i '/^user admin/d' /etc/redis/users.acl + echo "user admin on sanitize-payload >$REDIS_PASSWORD ~* &* +@all" >> /etc/redis/users.acl + print_status "Updated admin user password" + else echo "user admin on sanitize-payload >$REDIS_PASSWORD ~* &* +@all" >> /etc/redis/users.acl print_status "Added admin user to ACL file" fi @@ -737,65 +747,126 @@ configure_redis() { print_info "Restarting Redis to apply ACL configuration..." systemctl restart redis-server - # Wait for Redis to start - sleep 3 + # Wait for Redis to start with retry logic + sleep 5 - # Test admin connection - if ! redis-cli -h 127.0.0.1 -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ping > /dev/null 2>&1; then - print_error "Failed to configure Redis ACL authentication" - return 1 - fi + # Test admin connection with retries + local max_retries=3 + local retry=0 + local admin_works=false - print_status "Redis ACL authentication configuration successful" - - # Create Redis user with ACL - print_info "Creating Redis ACL user: $REDIS_USER" - - # Create user with password and permissions - capture output for error handling - local acl_result - acl_result=$(redis-cli -h 127.0.0.1 -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ACL SETUSER "$REDIS_USER" on ">${REDIS_USER_PASSWORD}" ~* +@all 2>&1) - - if [ "$acl_result" = "OK" ]; then - print_status "Redis user '$REDIS_USER' created successfully" - - # Save ACL users to file to persist across restarts - local save_result - save_result=$(redis-cli -h 127.0.0.1 -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ACL SAVE 2>&1) - - if [ "$save_result" = "OK" ]; then - print_status "Redis ACL users saved to file" - else - print_warning "Failed to save ACL users to file: $save_result" + while [ $retry -lt $max_retries ]; do + if redis-cli -h 127.0.0.1 -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ping > /dev/null 2>&1; then + admin_works=true + break fi + print_info "Waiting for Redis to be ready... (attempt $((retry + 1))/$max_retries)" + sleep 2 + retry=$((retry + 1)) + done + + if [ "$admin_works" = false ]; then + print_error "Failed to verify admin connection after Redis restart" + print_error "Redis ACL configuration may have issues" - # Verify user was actually created - local verify_result - verify_result=$(redis-cli -h 127.0.0.1 -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ACL GETUSER "$REDIS_USER" 2>&1) + # Try to fix by disabling ACL and using requirepass instead + print_warning "Attempting fallback: using requirepass instead of ACL..." + sed -i 's/^aclfile/# aclfile/' /etc/redis/redis.conf + sed -i "s/^# requirepass .*/requirepass $REDIS_PASSWORD/" /etc/redis/redis.conf + if ! grep -q "^requirepass" /etc/redis/redis.conf; then + echo "requirepass $REDIS_PASSWORD" >> /etc/redis/redis.conf + fi + systemctl restart redis-server + sleep 3 - if [ "$verify_result" = "(nil)" ]; then - print_error "User creation reported OK but user does not exist" + # Test requirepass + if redis-cli -h 127.0.0.1 -p 6379 -a "$REDIS_PASSWORD" --no-auth-warning ping > /dev/null 2>&1; then + print_status "Fallback successful - using requirepass authentication" + # For requirepass mode, we'll set REDIS_USER empty later + print_info "Note: Using legacy requirepass mode instead of ACL" + else + print_error "Fallback also failed - Redis authentication is broken" return 1 fi else - print_error "Failed to create Redis user: $acl_result" - return 1 + print_status "Redis ACL authentication configuration successful" fi - # Test user connection - print_info "Testing Redis user connection..." - if redis-cli -h 127.0.0.1 -p 6379 --user "$REDIS_USER" --pass "$REDIS_USER_PASSWORD" --no-auth-warning -n "$REDIS_DB" ping > /dev/null 2>&1; then - print_status "Redis user connection test successful" + # Create Redis user with ACL (only if admin_works, meaning we're using ACL mode) + if [ "$admin_works" = true ]; then + print_info "Creating Redis ACL user: $REDIS_USER" + + # Create user with password and permissions - capture output for error handling + local acl_result + acl_result=$(redis-cli -h 127.0.0.1 -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ACL SETUSER "$REDIS_USER" on ">${REDIS_USER_PASSWORD}" ~* +@all 2>&1) + + if [ "$acl_result" = "OK" ]; then + print_status "Redis user '$REDIS_USER' created successfully" + + # Save ACL users to file to persist across restarts + local save_result + save_result=$(redis-cli -h 127.0.0.1 -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ACL SAVE 2>&1) + + if [ "$save_result" = "OK" ]; then + print_status "Redis ACL users saved to file" + else + print_warning "Failed to save ACL users to file: $save_result" + fi + + # Verify user was actually created + local verify_result + verify_result=$(redis-cli -h 127.0.0.1 -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ACL GETUSER "$REDIS_USER" 2>&1) + + if [ "$verify_result" = "(nil)" ]; then + print_error "User creation reported OK but user does not exist" + return 1 + fi + + # Test user connection + print_info "Testing Redis user connection..." + if redis-cli -h 127.0.0.1 -p 6379 --user "$REDIS_USER" --pass "$REDIS_USER_PASSWORD" --no-auth-warning -n "$REDIS_DB" ping > /dev/null 2>&1; then + print_status "Redis user connection test successful" + else + print_error "Redis user connection test failed" + return 1 + fi + + # Mark the selected database as in-use + redis-cli -h 127.0.0.1 -p 6379 --user "$REDIS_USER" --pass "$REDIS_USER_PASSWORD" --no-auth-warning -n "$REDIS_DB" SET "patchmon:initialized" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > /dev/null + print_status "Marked Redis database $REDIS_DB as in-use" + else + print_error "Failed to create Redis user: $acl_result" + return 1 + fi else - print_error "Redis user connection test failed" - return 1 + # Using requirepass mode - no per-user ACL + print_info "Using requirepass mode - testing connection..." + + # For requirepass, we don't use username, just password + if redis-cli -h 127.0.0.1 -p 6379 -a "$REDIS_PASSWORD" --no-auth-warning -n "$REDIS_DB" ping > /dev/null 2>&1; then + print_status "Redis requirepass connection test successful" + + # Mark the selected database as in-use + redis-cli -h 127.0.0.1 -p 6379 -a "$REDIS_PASSWORD" --no-auth-warning -n "$REDIS_DB" SET "patchmon:initialized" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > /dev/null + print_status "Marked Redis database $REDIS_DB as in-use" + + # Set REDIS_USER to empty for requirepass mode + REDIS_USER="" + REDIS_USER_PASSWORD="$REDIS_PASSWORD" + else + print_error "Redis requirepass connection test failed" + return 1 + fi fi - # Mark the selected database as in-use - redis-cli -h 127.0.0.1 -p 6379 --user "$REDIS_USER" --pass "$REDIS_USER_PASSWORD" --no-auth-warning -n "$REDIS_DB" SET "patchmon:initialized" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > /dev/null - print_status "Marked Redis database $REDIS_DB as in-use" - # Note: Redis credentials will be written to .env by create_env_files() function - print_status "Redis user '$REDIS_USER' configured successfully" + print_status "Redis configured successfully" + + if [ -n "$REDIS_USER" ]; then + print_info "Redis Mode: ACL with user '$REDIS_USER'" + else + print_info "Redis Mode: requirepass (legacy single-password auth)" + fi print_info "Redis credentials will be saved to backend/.env" return 0 @@ -1116,16 +1187,121 @@ EOF print_status "Environment files created" } -# Run database migrations +# Check and fix failed Prisma migrations +fix_failed_migrations() { + local db_name="$1" + local db_user="$2" + local db_pass="$3" + local db_host="${4:-localhost}" + local max_retries=3 + + print_info "Checking for failed migrations in database..." + + # Query for failed migrations (where started_at is set but finished_at is NULL) + local failed_migrations + failed_migrations=$(PGPASSWORD="$db_pass" psql -h "$db_host" -U "$db_user" -d "$db_name" -t -A -c \ + "SELECT migration_name FROM _prisma_migrations WHERE finished_at IS NULL AND started_at IS NOT NULL;" 2>/dev/null || echo "") + + if [ -z "$failed_migrations" ]; then + print_status "No failed migrations found" + return 0 + fi + + print_warning "Found failed migration(s):" + echo "$failed_migrations" | while read -r migration; do + [ -n "$migration" ] && print_warning " - $migration" + done + + print_info "Attempting to resolve failed migrations..." + + # For each failed migration, mark it as rolled back and remove it + echo "$failed_migrations" | while read -r migration; do + if [ -n "$migration" ]; then + print_info "Processing failed migration: $migration" + + # Mark the migration as rolled back + PGPASSWORD="$db_pass" psql -h "$db_host" -U "$db_user" -d "$db_name" -c \ + "UPDATE _prisma_migrations SET rolled_back_at = NOW() WHERE migration_name = '$migration' AND finished_at IS NULL;" >/dev/null 2>&1 + + # Delete the failed migration record to allow retry + PGPASSWORD="$db_pass" psql -h "$db_host" -U "$db_user" -d "$db_name" -c \ + "DELETE FROM _prisma_migrations WHERE migration_name = '$migration' AND finished_at IS NULL;" >/dev/null 2>&1 + + print_status "Marked migration '$migration' for retry" + fi + done + + print_status "Failed migrations have been cleared for retry" + return 0 +} + +# Run database migrations with self-healing run_migrations() { print_info "Running database migrations as user $INSTANCE_USER..." cd "$APP_DIR/backend" - # Suppress Prisma CLI output (still logged to install log via tee) - run_as_user "$INSTANCE_USER" "cd $APP_DIR/backend && npx prisma migrate deploy" >/dev/null 2>&1 || true + + local max_attempts=3 + local attempt=1 + local migration_success=false + + while [ $attempt -le $max_attempts ]; do + print_info "Migration attempt $attempt of $max_attempts..." + + # Try to run migrations + local migrate_output + migrate_output=$(run_as_user "$INSTANCE_USER" "cd $APP_DIR/backend && npx prisma migrate deploy 2>&1" || echo "MIGRATION_FAILED") + + # Check if migration succeeded + if ! echo "$migrate_output" | grep -q "MIGRATION_FAILED\|Error:\|P3009"; then + print_status "Migrations completed successfully" + migration_success=true + break + fi + + # Check specifically for P3009 (failed migrations found) + if echo "$migrate_output" | grep -q "P3009\|migrate found failed migrations"; then + print_warning "Detected failed migrations (P3009 error)" + + # Extract the failed migration name if possible + local failed_migration + failed_migration=$(echo "$migrate_output" | grep -oP "The \`\K[^\`]+" | head -1 || echo "") + + if [ -n "$failed_migration" ]; then + print_info "Failed migration identified: $failed_migration" + fi + + # Attempt to fix failed migrations + print_info "Attempting to self-heal migration issues..." + if fix_failed_migrations "$DB_NAME" "$DB_USER" "$DB_PASS" "localhost"; then + print_status "Migration issues resolved, retrying..." + attempt=$((attempt + 1)) + sleep 2 + continue + else + print_error "Failed to resolve migration issues" + break + fi + else + # Other migration error + print_error "Migration failed with error:" + echo "$migrate_output" | grep -A 5 "Error:" + break + fi + done + + if [ "$migration_success" = false ]; then + print_error "Migrations failed after $max_attempts attempts" + print_info "You may need to manually resolve migration issues" + print_info "Check migrations: cd $APP_DIR/backend && npx prisma migrate status" + return 1 + fi + + # Generate Prisma client run_as_user "$INSTANCE_USER" "cd $APP_DIR/backend && npx prisma generate" >/dev/null 2>&1 || true print_status "Database migrations completed as $INSTANCE_USER" + return 0 } # Admin account creation removed - handled by application's first-time setup @@ -1462,7 +1638,60 @@ start_services() { print_status "PatchMon service started successfully" else print_error "Failed to start PatchMon service" - systemctl status "$SERVICE_NAME" + echo "" + + # Show last 25 lines of service logs for debugging + print_warning "=== Last 25 lines of service logs ===" + journalctl -u "$SERVICE_NAME" -n 25 --no-pager || true + print_warning "===================================" + echo "" + + # Check for specific error patterns + local logs=$(journalctl -u "$SERVICE_NAME" -n 50 --no-pager 2>/dev/null || echo "") + + if echo "$logs" | grep -q "WRONGPASS\|NOAUTH"; then + print_error "❌ Detected Redis authentication error!" + print_info "The service cannot authenticate with Redis." + echo "" + print_info "Current Redis configuration in .env:" + grep "^REDIS_" "$APP_DIR/backend/.env" || true + echo "" + print_info "Debug steps:" + print_info " 1. Check Redis is running:" + print_info " systemctl status redis-server" + echo "" + print_info " 2. Check Redis ACL users:" + print_info " redis-cli ACL LIST" + echo "" + print_info " 3. Test Redis connection:" + local test_user=$(grep "^REDIS_USER=" "$APP_DIR/backend/.env" | cut -d'=' -f2) + local test_pass=$(grep "^REDIS_PASSWORD=" "$APP_DIR/backend/.env" | cut -d'=' -f2) + local test_db=$(grep "^REDIS_DB=" "$APP_DIR/backend/.env" | cut -d'=' -f2) + print_info " redis-cli --user $test_user --pass $test_pass -n ${test_db:-0} ping" + echo "" + print_info " 4. Check Redis configuration files:" + print_info " cat /etc/redis/redis.conf | grep aclfile" + print_info " cat /etc/redis/users.acl" + echo "" + elif echo "$logs" | grep -q "ECONNREFUSED.*postgresql\|Connection refused.*5432"; then + print_error "❌ Detected PostgreSQL connection error!" + print_info "Check if PostgreSQL is running:" + print_info " systemctl status postgresql" + elif echo "$logs" | grep -q "ECONNREFUSED.*redis\|Connection refused.*6379"; then + print_error "❌ Detected Redis connection error!" + print_info "Check if Redis is running:" + print_info " systemctl status redis-server" + elif echo "$logs" | grep -q "database.*does not exist"; then + print_error "❌ Database does not exist!" + print_info "Database: $DB_NAME" + elif echo "$logs" | grep -q "Error:"; then + print_error "❌ Application error detected in logs" + fi + + echo "" + print_info "View full logs: journalctl -u $SERVICE_NAME -f" + print_info "Check service status: systemctl status $SERVICE_NAME" + return 1 fi } @@ -2012,6 +2241,65 @@ select_installation_to_update() { done } +# Repair/recreate Redis user with correct permissions +repair_redis_user() { + local redis_user="$1" + local redis_pass="$2" + local redis_db="${3:-0}" + + print_info "Attempting to repair Redis user: $redis_user" + + # Find admin password + local admin_password="" + if [ -f /etc/redis/users.acl ] && grep -q "^user admin" /etc/redis/users.acl; then + admin_password=$(grep "^user admin" /etc/redis/users.acl | grep -oP '>\K[^ ]+' | head -1) + fi + + if [ -z "$admin_password" ]; then + print_error "Cannot repair Redis user - no admin credentials found" + return 1 + fi + + # Test admin connection + if ! redis-cli -h localhost -p 6379 --user admin --pass "$admin_password" --no-auth-warning ping >/dev/null 2>&1; then + print_error "Admin credentials don't work - cannot repair user" + return 1 + fi + + print_status "Admin access confirmed" + + # Delete existing user if it exists (and is broken) + print_info "Removing old user configuration..." + redis-cli -h localhost -p 6379 --user admin --pass "$admin_password" --no-auth-warning ACL DELUSER "$redis_user" >/dev/null 2>&1 || true + + # Create user with full permissions + print_info "Creating user with full permissions..." + local create_result + create_result=$(redis-cli -h localhost -p 6379 --user admin --pass "$admin_password" --no-auth-warning ACL SETUSER "$redis_user" on ">${redis_pass}" ~* +@all 2>&1) + + if echo "$create_result" | grep -q "OK"; then + # Save ACL + redis-cli -h localhost -p 6379 --user admin --pass "$admin_password" --no-auth-warning ACL SAVE >/dev/null 2>&1 + + # Verify the new user works + if redis-cli -h localhost -p 6379 --user "$redis_user" --pass "$redis_pass" --no-auth-warning -n "$redis_db" ping >/dev/null 2>&1; then + if redis-cli -h localhost -p 6379 --user "$redis_user" --pass "$redis_pass" --no-auth-warning -n "$redis_db" info >/dev/null 2>&1; then + print_status "Redis user repaired successfully" + return 0 + else + print_error "User created but INFO command still fails" + return 1 + fi + else + print_error "User created but PING command fails" + return 1 + fi + else + print_error "Failed to create user: $create_result" + return 1 + fi +} + # Check and update Redis configuration for existing installation update_redis_configuration() { print_info "Checking Redis configuration..." @@ -2021,12 +2309,57 @@ update_redis_configuration() { if grep -q "^REDIS_HOST=" "$instance_dir/backend/.env" && \ grep -q "^REDIS_PASSWORD=" "$instance_dir/backend/.env"; then print_status "Redis configuration already exists in .env" - return 0 + + # Verify the credentials actually work + local redis_user=$(grep "^REDIS_USER=" "$instance_dir/backend/.env" | cut -d'=' -f2 | tr -d '"') + local redis_pass=$(grep "^REDIS_PASSWORD=" "$instance_dir/backend/.env" | cut -d'=' -f2 | tr -d '"') + local redis_db=$(grep "^REDIS_DB=" "$instance_dir/backend/.env" | cut -d'=' -f2 | tr -d '"') + + if [ -n "$redis_user" ] && [ -n "$redis_pass" ]; then + # Test with username and password + local ping_works=false + local info_works=false + + if redis-cli -h localhost -p 6379 --user "$redis_user" --pass "$redis_pass" --no-auth-warning -n "${redis_db:-0}" ping >/dev/null 2>&1; then + ping_works=true + fi + + if redis-cli -h localhost -p 6379 --user "$redis_user" --pass "$redis_pass" --no-auth-warning -n "${redis_db:-0}" info >/dev/null 2>&1; then + info_works=true + fi + + if [ "$ping_works" = true ] && [ "$info_works" = true ]; then + print_status "Redis credentials verified with redis-cli (tested ping and info commands)" + + # Force refresh the Redis user during updates to ensure correct ACL permissions + # This prevents issues where redis-cli works but Node.js client doesn't + print_info "Refreshing Redis user permissions to ensure compatibility..." + + if repair_redis_user "$redis_user" "$redis_pass" "$redis_db"; then + print_status "Redis user permissions refreshed successfully" + return 0 + else + print_warning "Could not refresh Redis user, but credentials seem to work - continuing..." + return 0 + fi + else + print_warning "Redis credentials not working properly (ping: $ping_works, info: $info_works)" + print_info "Attempting to repair Redis user..." + + if repair_redis_user "$redis_user" "$redis_pass" "$redis_db"; then + print_status "Redis user repaired successfully" + return 0 + else + print_warning "Could not repair Redis user, will reconfigure from scratch..." + fi + fi + else + print_warning "Redis credentials incomplete in .env (missing user or password)" + fi fi fi - print_warning "Redis configuration not found in .env - this is a legacy installation" - print_info "Setting up Redis for this instance..." + print_warning "Redis configuration not found or invalid in .env - setting up Redis for this instance..." # Detect package manager if not already set if [ -z "$PKG_INSTALL" ]; then @@ -2054,6 +2387,39 @@ update_redis_configuration() { REDIS_USER="patchmon_${DB_SAFE_NAME}" REDIS_USER_PASSWORD=$(openssl rand -base64 32 | tr -d "=+/" | cut -c1-32) + # Test Redis connection to determine authentication status + print_info "Testing Redis authentication status..." + local needs_auth=false + local admin_password="" + + # Try ping without auth + if redis-cli -h localhost -p 6379 ping >/dev/null 2>&1; then + print_info "Redis is accessible without authentication" + needs_auth=false + else + print_info "Redis requires authentication" + needs_auth=true + + # Try to find existing admin password from ACL file + if [ -f /etc/redis/users.acl ] && grep -q "^user admin" /etc/redis/users.acl; then + # Extract password from ACL file (format: >password) + admin_password=$(grep "^user admin" /etc/redis/users.acl | grep -oP '>\K[^ ]+' | head -1) + + if [ -n "$admin_password" ]; then + print_info "Found existing admin credentials in ACL file" + + # Test admin credentials + if redis-cli -h localhost -p 6379 --user admin --pass "$admin_password" --no-auth-warning ping >/dev/null 2>&1; then + print_status "Existing admin credentials work" + REDIS_PASSWORD="$admin_password" + else + print_warning "Existing admin credentials don't work, will create new configuration" + admin_password="" + fi + fi + fi + fi + # Find available Redis database print_info "Finding available Redis database..." local redis_db=0 @@ -2061,9 +2427,14 @@ update_redis_configuration() { while [ $redis_db -lt $max_attempts ]; do local key_count - key_count=$(redis-cli -h localhost -p 6379 -n "$redis_db" DBSIZE 2>&1 | grep -v "ERR" || echo "1") - if [ "$key_count" = "0" ] || [ "$key_count" = "(integer) 0" ]; then + if [ "$needs_auth" = true ] && [ -n "$admin_password" ]; then + key_count=$(redis-cli -h localhost -p 6379 --user admin --pass "$admin_password" --no-auth-warning -n "$redis_db" DBSIZE 2>&1 | grep -oP '\d+' || echo "1") + else + key_count=$(redis-cli -h localhost -p 6379 -n "$redis_db" DBSIZE 2>&1 | grep -oP '\d+' || echo "1") + fi + + if [ "$key_count" = "0" ]; then print_status "Found available Redis database: $redis_db" REDIS_DB=$redis_db break @@ -2076,50 +2447,146 @@ update_redis_configuration() { REDIS_DB=0 fi - # Generate admin password if not exists - REDIS_PASSWORD=$(openssl rand -base64 32 | tr -d "=+/" | cut -c1-32) - # Configure Redis with ACL if needed - print_info "Configuring Redis ACL..." - - # Create ACL file if it doesn't exist - if [ ! -f /etc/redis/users.acl ]; then - touch /etc/redis/users.acl - chown redis:redis /etc/redis/users.acl - chmod 640 /etc/redis/users.acl - fi - - # Configure ACL file in redis.conf - if ! grep -q "^aclfile" /etc/redis/redis.conf 2>/dev/null; then - echo "aclfile /etc/redis/users.acl" >> /etc/redis/redis.conf - fi - - # Remove requirepass (incompatible with ACL) - if grep -q "^requirepass" /etc/redis/redis.conf 2>/dev/null; then - sed -i 's/^requirepass.*/# &/' /etc/redis/redis.conf - fi - - # Create admin user if it doesn't exist - if ! grep -q "^user admin" /etc/redis/users.acl; then - echo "user admin on sanitize-payload >$REDIS_PASSWORD ~* &* +@all" >> /etc/redis/users.acl + if [ "$needs_auth" = false ]; then + print_info "Configuring Redis ACL for security..." + + # Generate new admin password + REDIS_PASSWORD=$(openssl rand -base64 32 | tr -d "=+/" | cut -c1-32) + + # Backup redis.conf + if [ -f /etc/redis/redis.conf ]; then + cp /etc/redis/redis.conf /etc/redis/redis.conf.backup.$(date +%Y%m%d_%H%M%S) 2>/dev/null || true + fi + + # Create ACL file if it doesn't exist + if [ ! -f /etc/redis/users.acl ]; then + touch /etc/redis/users.acl + chown redis:redis /etc/redis/users.acl + chmod 640 /etc/redis/users.acl + print_status "Created Redis ACL file" + else + # Backup existing ACL file + cp /etc/redis/users.acl /etc/redis/users.acl.backup.$(date +%Y%m%d_%H%M%S) 2>/dev/null || true + print_info "Backed up existing ACL file" + fi + + # Configure ACL file in redis.conf + if ! grep -q "^aclfile" /etc/redis/redis.conf 2>/dev/null; then + echo "aclfile /etc/redis/users.acl" >> /etc/redis/redis.conf + print_status "Added ACL file configuration to redis.conf" + fi + + # Remove requirepass (incompatible with ACL) + if grep -q "^requirepass" /etc/redis/redis.conf 2>/dev/null; then + sed -i 's/^requirepass.*/# &/' /etc/redis/redis.conf + print_status "Disabled requirepass (incompatible with ACL)" + fi + + # Create or update admin user in ACL file + if grep -q "^user admin" /etc/redis/users.acl; then + print_info "Admin user already exists in ACL, updating password..." + # Remove old admin line and add new one + sed -i '/^user admin/d' /etc/redis/users.acl + echo "user admin on sanitize-payload >$REDIS_PASSWORD ~* &* +@all" >> /etc/redis/users.acl + print_status "Updated admin user password" + else + echo "user admin on sanitize-payload >$REDIS_PASSWORD ~* &* +@all" >> /etc/redis/users.acl + print_status "Created admin user in ACL" + fi + + # Restart Redis to apply ACL + print_info "Restarting Redis to apply ACL configuration..." systemctl restart redis-server - sleep 3 + sleep 5 + + # Verify admin can connect + local max_retries=3 + local retry=0 + local admin_works=false + + while [ $retry -lt $max_retries ]; do + if redis-cli -h localhost -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ping >/dev/null 2>&1; then + admin_works=true + break + fi + print_info "Waiting for Redis to be ready... (attempt $((retry + 1))/$max_retries)" + sleep 2 + retry=$((retry + 1)) + done + + if [ "$admin_works" = false ]; then + print_error "Failed to verify admin connection after Redis restart" + print_error "Redis ACL configuration may have issues" + + # Try to fix by disabling ACL and using requirepass instead + print_warning "Attempting fallback: using requirepass instead of ACL..." + sed -i 's/^aclfile/# aclfile/' /etc/redis/redis.conf + sed -i "s/^# requirepass .*/requirepass $REDIS_PASSWORD/" /etc/redis/redis.conf + if ! grep -q "^requirepass" /etc/redis/redis.conf; then + echo "requirepass $REDIS_PASSWORD" >> /etc/redis/redis.conf + fi + systemctl restart redis-server + sleep 3 + + # Test requirepass + if redis-cli -h localhost -p 6379 -a "$REDIS_PASSWORD" --no-auth-warning ping >/dev/null 2>&1; then + print_status "Fallback successful - using requirepass authentication" + # For requirepass, we don't use username + REDIS_USER="" + else + print_error "Fallback also failed - Redis authentication is broken" + return 1 + fi + else + print_status "Redis ACL configuration successful" + fi + elif [ -z "$admin_password" ]; then + print_error "Redis requires authentication but no valid admin credentials found" + print_error "Please check /etc/redis/users.acl or /etc/redis/redis.conf" + print_info "Manual fix: Reset Redis authentication or provide admin credentials" + return 1 fi - # Create instance-specific Redis user - print_info "Creating Redis user: $REDIS_USER" - - # Try to authenticate with admin (may already exist from another instance) - local acl_result - acl_result=$(redis-cli -h 127.0.0.1 -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ACL SETUSER "$REDIS_USER" on ">${REDIS_USER_PASSWORD}" ~* +@all 2>&1) - - if [ "$acl_result" = "OK" ] || echo "$acl_result" | grep -q "OK"; then - print_status "Redis user created successfully" - redis-cli -h 127.0.0.1 -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ACL SAVE > /dev/null 2>&1 + # Create instance-specific Redis user (only if using ACL) + if [ -n "$REDIS_USER" ]; then + print_info "Creating Redis user: $REDIS_USER" + + local acl_result="" + if [ -n "$REDIS_PASSWORD" ]; then + # Try to create user with ACL + acl_result=$(redis-cli -h localhost -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ACL SETUSER "$REDIS_USER" on ">${REDIS_USER_PASSWORD}" ~* +@all 2>&1) + else + # Try without authentication (for legacy setups) + acl_result=$(redis-cli -h localhost -p 6379 ACL SETUSER "$REDIS_USER" on ">${REDIS_USER_PASSWORD}" ~* +@all 2>&1) + fi + + if echo "$acl_result" | grep -q "OK"; then + print_status "Redis user created successfully" + + # Save ACL users + if [ -n "$REDIS_PASSWORD" ]; then + redis-cli -h localhost -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ACL SAVE >/dev/null 2>&1 + else + redis-cli -h localhost -p 6379 ACL SAVE >/dev/null 2>&1 + fi + print_status "Redis ACL saved" + + # Verify user can connect + if redis-cli -h localhost -p 6379 --user "$REDIS_USER" --pass "$REDIS_USER_PASSWORD" --no-auth-warning -n "$REDIS_DB" ping >/dev/null 2>&1; then + print_status "Redis user verified and working" + else + print_warning "Redis user created but verification failed" + fi + else + print_error "Failed to create Redis user: $acl_result" + print_warning "Will use requirepass mode instead of per-user ACL" + REDIS_USER="" + REDIS_USER_PASSWORD="$REDIS_PASSWORD" + fi else - print_warning "Could not create Redis user with ACL, trying without authentication..." - # Fallback for systems without ACL configured - redis-cli -h 127.0.0.1 -p 6379 CONFIG SET requirepass "$REDIS_USER_PASSWORD" > /dev/null 2>&1 || true + print_info "Using requirepass authentication (single password, no user-specific ACL)" + REDIS_USER_PASSWORD="$REDIS_PASSWORD" fi # Backup existing .env @@ -2128,18 +2595,27 @@ update_redis_configuration() { # Add Redis configuration to .env print_info "Adding Redis configuration to .env..." + + # Use correct password variable + local redis_pass_for_env="${REDIS_USER_PASSWORD:-$REDIS_PASSWORD}" + cat >> "$instance_dir/backend/.env" << EOF -# Redis Configuration (added during update) +# Redis Configuration (added during update on $(date)) REDIS_HOST=localhost REDIS_PORT=6379 REDIS_USER=$REDIS_USER -REDIS_PASSWORD=$REDIS_USER_PASSWORD +REDIS_PASSWORD=$redis_pass_for_env REDIS_DB=$REDIS_DB EOF print_status "Redis configuration added to .env" - print_info "Redis User: $REDIS_USER" + + if [ -n "$REDIS_USER" ]; then + print_info "Redis Mode: ACL with user '$REDIS_USER'" + else + print_info "Redis Mode: requirepass (legacy single-password auth)" + fi print_info "Redis Database: $REDIS_DB" return 0 @@ -2543,11 +3019,81 @@ update_installation() { print_info "Building frontend..." npm run build - # Run database migrations and generate Prisma client + # Run database migrations with self-healing print_info "Running database migrations..." cd "$instance_dir/backend" + + # Generate Prisma client first npx prisma generate - npx prisma migrate deploy + + local max_attempts=3 + local attempt=1 + local migration_success=false + + while [ $attempt -le $max_attempts ]; do + print_info "Migration attempt $attempt of $max_attempts..." + + # Try to run migrations + local migrate_output + migrate_output=$(npx prisma migrate deploy 2>&1 || echo "MIGRATION_FAILED") + + # Check if migration succeeded + if ! echo "$migrate_output" | grep -q "MIGRATION_FAILED\|Error:\|P3009"; then + print_status "Migrations completed successfully" + migration_success=true + break + fi + + # Check specifically for P3009 (failed migrations found) + if echo "$migrate_output" | grep -q "P3009\|migrate found failed migrations"; then + print_warning "Detected failed migrations (P3009 error)" + + # Extract the failed migration name if possible + local failed_migration + failed_migration=$(echo "$migrate_output" | grep -oP "The \`\K[^\`]+" | head -1 || echo "") + + if [ -n "$failed_migration" ]; then + print_info "Failed migration identified: $failed_migration" + fi + + # Attempt to fix failed migrations + print_info "Attempting to self-heal migration issues..." + if fix_failed_migrations "$DB_NAME" "$DB_USER" "$DB_PASS" "$DB_HOST"; then + print_status "Migration issues resolved, retrying..." + attempt=$((attempt + 1)) + sleep 2 + continue + else + print_error "Failed to resolve migration issues" + print_warning "Attempting alternative resolution method..." + + # Alternative: Mark migration as completed if tables exist + print_info "Checking if migration changes are already applied..." + PGPASSWORD="$DB_PASS" psql -h "$DB_HOST" -U "$DB_USER" -d "$DB_NAME" -c \ + "UPDATE _prisma_migrations SET finished_at = NOW(), logs = 'Manually resolved by update script' WHERE migration_name = '$failed_migration' AND finished_at IS NULL;" >/dev/null 2>&1 + + attempt=$((attempt + 1)) + sleep 2 + continue + fi + else + # Other migration error + print_error "Migration failed with error:" + echo "$migrate_output" | grep -A 10 "Error:" + + # Show helpful information + print_info "Migration status:" + npx prisma migrate status 2>&1 || true + break + fi + done + + if [ "$migration_success" = false ]; then + print_error "Migrations failed after $max_attempts attempts" + print_warning "The update will continue, but you may need to manually resolve migration issues" + print_info "Check migrations: cd $instance_dir/backend && npx prisma migrate status" + print_info "View failed migrations: PGPASSWORD=\"$DB_PASS\" psql -h \"$DB_HOST\" -U \"$DB_USER\" -d \"$DB_NAME\" -c \"SELECT * FROM _prisma_migrations WHERE finished_at IS NULL;\"" + fi # Check and update Redis configuration if needed (for legacy installations) update_redis_configuration @@ -2563,7 +3109,7 @@ update_installation() { systemctl start "$service_name" # Wait a moment and check status - sleep 3 + sleep 5 if systemctl is-active --quiet "$service_name"; then print_success "✅ Update completed successfully!" @@ -2582,6 +3128,43 @@ update_installation() { echo "" else print_error "Service failed to start after update" + echo "" + + # Show last 25 lines of service logs for debugging + print_warning "=== Last 25 lines of service logs ===" + journalctl -u "$service_name" -n 25 --no-pager || true + print_warning "===================================" + echo "" + + # Check for specific error patterns + local logs=$(journalctl -u "$service_name" -n 50 --no-pager 2>/dev/null || echo "") + + if echo "$logs" | grep -q "WRONGPASS\|NOAUTH"; then + print_error "❌ Detected Redis authentication error!" + print_info "The service cannot authenticate with Redis." + echo "" + print_info "Current Redis configuration in .env:" + grep "^REDIS_" "$instance_dir/backend/.env" || true + echo "" + print_info "Quick fix - Try reconfiguring Redis:" + print_info " 1. Check Redis ACL users:" + print_info " redis-cli ACL LIST" + echo "" + print_info " 2. Test Redis connection with credentials from .env:" + local test_user=$(grep "^REDIS_USER=" "$instance_dir/backend/.env" | cut -d'=' -f2) + local test_pass=$(grep "^REDIS_PASSWORD=" "$instance_dir/backend/.env" | cut -d'=' -f2) + local test_db=$(grep "^REDIS_DB=" "$instance_dir/backend/.env" | cut -d'=' -f2) + print_info " redis-cli --user $test_user --pass $test_pass -n ${test_db:-0} ping" + echo "" + elif echo "$logs" | grep -q "ECONNREFUSED"; then + print_error "❌ Detected connection refused error!" + print_info "Check if required services are running:" + print_info " systemctl status postgresql" + print_info " systemctl status redis-server" + elif echo "$logs" | grep -q "Error:"; then + print_error "❌ Application error detected in logs" + fi + echo "" print_warning "ROLLBACK INSTRUCTIONS:" print_info "1. Restore code:" @@ -2594,7 +3177,7 @@ update_installation() { print_info "3. Restart service:" print_info " sudo systemctl start $service_name" echo "" - print_info "Check logs: journalctl -u $service_name -f" + print_info "View full logs: journalctl -u $service_name -f" exit 1 fi } diff --git a/tools/diagnostics.sh b/tools/diagnostics.sh new file mode 100755 index 0000000..38017ff --- /dev/null +++ b/tools/diagnostics.sh @@ -0,0 +1,715 @@ +#!/bin/bash +# PatchMon Diagnostics Collection Script +# Collects system information, logs, and configuration for troubleshooting +# Usage: sudo bash diagnostics.sh [instance-name] + +# Note: Not using 'set -e' because we want to continue even if some commands fail +set -o pipefail + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Print functions +print_status() { + echo -e "${GREEN}✅ $1${NC}" +} + +print_info() { + echo -e "${BLUE}ℹ️ $1${NC}" +} + +print_error() { + echo -e "${RED}❌ $1${NC}" +} + +print_warning() { + echo -e "${YELLOW}⚠️ $1${NC}" +} + +print_success() { + echo -e "${GREEN}🎉 $1${NC}" +} + +# Check if running as root +if [[ $EUID -ne 0 ]]; then + print_error "This script must be run as root" + print_info "Please run: sudo bash $0" + exit 1 +fi + +# Function to sanitize sensitive information +sanitize_sensitive() { + local input="$1" + # Replace passwords, secrets, and tokens with [REDACTED] + echo "$input" | \ + sed -E 's/(PASSWORD|SECRET|TOKEN|KEY|PASS)=[^"]*$/\1=[REDACTED]/gi' | \ + sed -E 's/(PASSWORD|SECRET|TOKEN|KEY|PASS)="[^"]*"/\1="[REDACTED]"/gi' | \ + sed -E 's/(password|secret|token|key|pass)": *"[^"]*"/\1": "[REDACTED]"/gi' | \ + sed -E 's/(>)[a-zA-Z0-9+\/=]{20,}/\1[REDACTED]/g' | \ + sed -E 's|postgresql://([^:]+):([^@]+)@|postgresql://\1:[REDACTED]@|g' | \ + sed -E 's|mysql://([^:]+):([^@]+)@|mysql://\1:[REDACTED]@|g' | \ + sed -E 's|mongodb://([^:]+):([^@]+)@|mongodb://\1:[REDACTED]@|g' +} + +# Function to detect PatchMon installations +detect_installations() { + local installations=() + + if [ ! -d "/opt" ]; then + print_error "/opt directory does not exist" + return 1 + fi + + for dir in /opt/*/; do + # Skip if no directories found + [ -d "$dir" ] || continue + + local dirname=$(basename "$dir") + + # Skip backup directories + if [[ "$dirname" =~ \.backup\. ]]; then + continue + fi + + # Check if it's a PatchMon installation + if [ -f "$dir/backend/package.json" ]; then + if grep -q "patchmon" "$dir/backend/package.json" 2>/dev/null; then + installations+=("$dirname") + fi + fi + done + + echo "${installations[@]}" +} + +# Function to select installation +select_installation() { + local installations=($(detect_installations)) + + if [ ${#installations[@]} -eq 0 ]; then + print_error "No PatchMon installations found in /opt" >&2 + exit 1 + fi + + if [ -n "$1" ]; then + # Use provided instance name + if [[ " ${installations[@]} " =~ " $1 " ]]; then + echo "$1" + return 0 + else + print_error "Instance '$1' not found" >&2 + exit 1 + fi + fi + + # Send status messages to stderr so they don't contaminate the return value + print_info "Found ${#installations[@]} installation(s):" >&2 + echo "" >&2 + + local i=1 + declare -A install_map + for install in "${installations[@]}"; do + # Get service status + local status="unknown" + if systemctl is-active --quiet "$install" 2>/dev/null; then + status="${GREEN}running${NC}" + elif systemctl is-enabled --quiet "$install" 2>/dev/null; then + status="${RED}stopped${NC}" + fi + + printf "%2d. %-30s (%b)\n" "$i" "$install" "$status" >&2 + install_map[$i]="$install" + i=$((i + 1)) + done + + echo "" >&2 + + # If only one installation, select it automatically + if [ ${#installations[@]} -eq 1 ]; then + print_info "Only one installation found, selecting automatically: ${installations[0]}" >&2 + echo "${installations[0]}" + return 0 + fi + + # Multiple installations - prompt user + printf "${BLUE}Select installation number [1]: ${NC}" >&2 + read -r selection &2 + exit 1 + fi +} + +# Main script +main() { + # Capture the directory where script is run from at the very start + ORIGINAL_DIR=$(pwd) + + echo -e "${BLUE}====================================================${NC}" + echo -e "${BLUE} PatchMon Diagnostics Collection${NC}" + echo -e "${BLUE}====================================================${NC}" + echo "" + + # Select instance + instance_name=$(select_installation "$1") + instance_dir="/opt/$instance_name" + + print_info "Selected instance: $instance_name" + print_info "Directory: $instance_dir" + echo "" + + # Create single diagnostics file in the original directory + timestamp=$(date +%Y%m%d_%H%M%S) + diag_file="${ORIGINAL_DIR}/patchmon_diagnostics_${instance_name}_${timestamp}.txt" + + print_info "Collecting diagnostics to: $diag_file" + echo "" + + # Initialize the diagnostics file with header + cat > "$diag_file" << EOF +=================================================== +PatchMon Diagnostics Report +=================================================== +Instance: $instance_name +Generated: $(date) +Hostname: $(hostname) +Generated from: ${ORIGINAL_DIR} +=================================================== + +EOF + + # ======================================== + # 1. System Information + # ======================================== + print_info "Collecting system information..." + + cat >> "$diag_file" << EOF +=== System Information === +OS: $(cat /etc/os-release 2>/dev/null | grep PRETTY_NAME | cut -d'"' -f2 || echo "Unknown") +Kernel: $(uname -r) +Uptime: $(uptime) + +=== CPU Information === +$(lscpu | grep -E "Model name|CPU\(s\)|Thread|Core" || echo "Not available") + +=== Memory Information === +$(free -h) + +=== Disk Usage === +$(df -h | grep -E "Filesystem|/dev/|/opt") + +=== Network Interfaces === +$(ip -br addr) + +=================================================== +EOF + + # ======================================== + # 2. PatchMon Instance Information + # ======================================== + print_info "Collecting instance information..." + + cat >> "$diag_file" << EOF + +=== PatchMon Instance Information === + +=== Directory Structure === +$(ls -lah "$instance_dir" 2>/dev/null || echo "Cannot access directory") + +=== Backend Package Info === +$(cat "$instance_dir/backend/package.json" 2>/dev/null | grep -E "name|version" || echo "Not found") + +=== Frontend Package Info === +$(cat "$instance_dir/frontend/package.json" 2>/dev/null | grep -E "name|version" || echo "Not found") + +=== Deployment Info === +$(cat "$instance_dir/deployment-info.txt" 2>/dev/null || echo "No deployment-info.txt found") + +=================================================== +EOF + + # ======================================== + # 3. Environment Configuration (Sanitized) + # ======================================== + print_info "Collecting environment configuration (sanitized)..." + + echo "" >> "$diag_file" + echo "=== Backend Environment Configuration (Sanitized) ===" >> "$diag_file" + if [ -f "$instance_dir/backend/.env" ]; then + sanitize_sensitive "$(cat "$instance_dir/backend/.env")" >> "$diag_file" + else + echo "Backend .env file not found" >> "$diag_file" + fi + echo "" >> "$diag_file" + + # ======================================== + # 4. Service Status and Configuration + # ======================================== + print_info "Collecting service information..." + + cat >> "$diag_file" << EOF + +=== Service Status and Configuration === + +=== Service Status === +$(systemctl status "$instance_name" 2>/dev/null || echo "Service not found") + +=== Service File === +$(cat "/etc/systemd/system/${instance_name}.service" 2>/dev/null || echo "Service file not found") + +=== Service is-enabled === +$(systemctl is-enabled "$instance_name" 2>/dev/null || echo "unknown") + +=== Service is-active === +$(systemctl is-active "$instance_name" 2>/dev/null || echo "unknown") + +=================================================== +EOF + + # ======================================== + # 5. Service Logs + # ======================================== + print_info "Collecting service logs..." + + echo "" >> "$diag_file" + echo "=== Service Logs (last 500 lines) ===" >> "$diag_file" + journalctl -u "$instance_name" -n 500 --no-pager >> "$diag_file" 2>&1 || \ + echo "Could not retrieve service logs" >> "$diag_file" + echo "" >> "$diag_file" + + # ======================================== + # 6. Nginx Configuration + # ======================================== + print_info "Collecting nginx configuration..." + + cat >> "$diag_file" << EOF + +=== Nginx Configuration === + +=== Nginx Status === +$(systemctl status nginx 2>/dev/null | head -20 || echo "Nginx not found") + +=== Site Configuration === +$(cat "/etc/nginx/sites-available/$instance_name" 2>/dev/null || echo "Nginx config not found") + +=== Nginx Error Log (last 100 lines) === +$(tail -100 /var/log/nginx/error.log 2>/dev/null || echo "Error log not accessible") + +=== Nginx Access Log (last 50 lines) === +$(tail -50 /var/log/nginx/access.log 2>/dev/null || echo "Access log not accessible") + +=== Nginx Test === +$(nginx -t 2>&1 || echo "Nginx test failed") + +=================================================== +EOF + + # ======================================== + # 7. Database Connection Test + # ======================================== + print_info "Testing database connection..." + + echo "" >> "$diag_file" + echo "=== Database Information ===" >> "$diag_file" + echo "" >> "$diag_file" + + if [ -f "$instance_dir/backend/.env" ]; then + # Load .env + set -a + source "$instance_dir/backend/.env" + set +a + + # Parse DATABASE_URL + if [ -n "$DATABASE_URL" ]; then + DB_USER=$(echo "$DATABASE_URL" | sed -n 's|postgresql://\([^:]*\):.*|\1|p') + DB_PASS=$(echo "$DATABASE_URL" | sed -n 's|postgresql://[^:]*:\([^@]*\)@.*|\1|p') + DB_HOST=$(echo "$DATABASE_URL" | sed -n 's|.*@\([^:]*\):.*|\1|p') + DB_PORT=$(echo "$DATABASE_URL" | sed -n 's|.*:\([0-9]*\)/.*|\1|p') + DB_NAME=$(echo "$DATABASE_URL" | sed -n 's|.*/\([^?]*\).*|\1|p') + + cat >> "$diag_file" << EOF +=== Database Connection Details === +Host: $DB_HOST +Port: $DB_PORT +Database: $DB_NAME +User: $DB_USER + +=== PostgreSQL Status === +$(systemctl status postgresql 2>/dev/null | head -20 || echo "PostgreSQL status not available") + +=== Connection Test === +EOF + + if PGPASSWORD="$DB_PASS" psql -h "$DB_HOST" -U "$DB_USER" -d "$DB_NAME" -c "SELECT version();" >> "$diag_file" 2>&1; then + echo "✅ Database connection: SUCCESSFUL" >> "$diag_file" + else + echo "❌ Database connection: FAILED" >> "$diag_file" + fi + + echo "" >> "$diag_file" + echo "=== Database Size ===" >> "$diag_file" + PGPASSWORD="$DB_PASS" psql -h "$DB_HOST" -U "$DB_USER" -d "$DB_NAME" -c " + SELECT + pg_size_pretty(pg_database_size('$DB_NAME')) as database_size; + " >> "$diag_file" 2>&1 || echo "Could not get database size" >> "$diag_file" + + echo "" >> "$diag_file" + echo "=== Table Sizes ===" >> "$diag_file" + PGPASSWORD="$DB_PASS" psql -h "$DB_HOST" -U "$DB_USER" -d "$DB_NAME" -c " + SELECT + schemaname, + tablename, + pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS size + FROM pg_tables + WHERE schemaname = 'public' + ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC + LIMIT 10; + " >> "$diag_file" 2>&1 || echo "Could not get table sizes" >> "$diag_file" + + echo "" >> "$diag_file" + echo "=== Migration Status ===" >> "$diag_file" + cd "$instance_dir/backend" + npx prisma migrate status >> "$diag_file" 2>&1 || echo "Could not get migration status" >> "$diag_file" + + echo "===================================================" >> "$diag_file" + else + echo "DATABASE_URL not found in .env" >> "$diag_file" + fi + else + echo ".env file not found" >> "$diag_file" + fi + + # ======================================== + # 8. Redis Connection Test + # ======================================== + print_info "Testing Redis connection..." + + if [ -f "$instance_dir/backend/.env" ]; then + # Load .env + set -a + source "$instance_dir/backend/.env" + set +a + + cat >> "$diag_file" << EOF +=================================================== +Redis Information +=================================================== + +=== Redis Connection Details === +Host: ${REDIS_HOST:-localhost} +Port: ${REDIS_PORT:-6379} +User: ${REDIS_USER:-(none)} +Database: ${REDIS_DB:-0} + +=== Redis Status === +$(systemctl status redis-server 2>/dev/null | head -20 || echo "Redis status not available") + +=== Connection Test === +EOF + + # Test connection + if [ -n "$REDIS_USER" ] && [ -n "$REDIS_PASSWORD" ]; then + if redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" --user "$REDIS_USER" --pass "$REDIS_PASSWORD" --no-auth-warning -n "${REDIS_DB:-0}" ping >> "$diag_file" 2>&1; then + echo "✅ Redis connection (with user): SUCCESSFUL" >> "$diag_file" + + echo "" >> "$diag_file" + echo "=== Redis INFO ===" >> "$diag_file" + redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" --user "$REDIS_USER" --pass "$REDIS_PASSWORD" --no-auth-warning -n "${REDIS_DB:-0}" INFO >> "$diag_file" 2>&1 + + echo "" >> "$diag_file" + echo "=== Redis Database Size ===" >> "$diag_file" + redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" --user "$REDIS_USER" --pass "$REDIS_PASSWORD" --no-auth-warning -n "${REDIS_DB:-0}" DBSIZE >> "$diag_file" 2>&1 + else + echo "❌ Redis connection (with user): FAILED" >> "$diag_file" + fi + elif [ -n "$REDIS_PASSWORD" ]; then + if redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" -a "$REDIS_PASSWORD" --no-auth-warning -n "${REDIS_DB:-0}" ping >> "$diag_file" 2>&1; then + echo "✅ Redis connection (requirepass): SUCCESSFUL" >> "$diag_file" + + echo "" >> "$diag_file" + echo "=== Redis INFO ===" >> "$diag_file" + redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" -a "$REDIS_PASSWORD" --no-auth-warning -n "${REDIS_DB:-0}" INFO >> "$diag_file" 2>&1 + + echo "" >> "$diag_file" + echo "=== Redis Database Size ===" >> "$diag_file" + redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" -a "$REDIS_PASSWORD" --no-auth-warning -n "${REDIS_DB:-0}" DBSIZE >> "$diag_file" 2>&1 + else + echo "❌ Redis connection (requirepass): FAILED" >> "$diag_file" + fi + else + if redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" -n "${REDIS_DB:-0}" ping >> "$diag_file" 2>&1; then + echo "✅ Redis connection (no auth): SUCCESSFUL" >> "$diag_file" + else + echo "❌ Redis connection: FAILED" >> "$diag_file" + fi + fi + + echo "" >> "$diag_file" + echo "=== Redis ACL Users ===" >> "$diag_file" + if [ -n "$REDIS_USER" ] && [ -n "$REDIS_PASSWORD" ]; then + redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" --user "$REDIS_USER" --pass "$REDIS_PASSWORD" --no-auth-warning ACL LIST >> "$diag_file" + elif [ -n "$REDIS_PASSWORD" ]; then + redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" -a "$REDIS_PASSWORD" --no-auth-warning ACL LIST >> "$diag_file" + fi + + echo "===================================================" >> "$diag_file" + else + echo ".env file not found" >> "$diag_file" + fi + + # ======================================== + # 9. Network and Port Information + # ======================================== + print_info "Collecting network information..." + + # Get backend port from .env + local backend_port=$(grep '^PORT=' "$instance_dir/backend/.env" 2>/dev/null | cut -d'=' -f2 | tr -d ' ' || echo "3000") + + cat >> "$diag_file" << EOF +=================================================== +Network and Port Information +=================================================== + +=== Listening Ports === +$(ss -tlnp | grep -E "LISTEN|nginx|node|postgres|redis" || netstat -tlnp | grep -E "LISTEN|nginx|node|postgres|redis" || echo "Could not get port information") + +=== Active Connections === +$(ss -tn state established | head -20 || echo "Could not get connection information") + +=== Backend Port Connections (Port $backend_port) === +Total connections to backend: $(ss -tn | grep ":$backend_port" | wc -l || echo "0") +$(ss -tn | grep ":$backend_port" | head -10 || echo "No connections found") + +=== PostgreSQL Connections === +EOF + + # Get PostgreSQL connection count + if [ -n "$DB_PASS" ] && [ -n "$DB_USER" ] && [ -n "$DB_NAME" ]; then + PGPASSWORD="$DB_PASS" psql -h "${DB_HOST:-localhost}" -U "$DB_USER" -d "$DB_NAME" -c " + SELECT + count(*) as total_connections, + count(*) FILTER (WHERE state = 'active') as active_connections, + count(*) FILTER (WHERE state = 'idle') as idle_connections + FROM pg_stat_activity + WHERE datname = '$DB_NAME'; + " >> "$diag_file" 2>&1 || echo "Could not get PostgreSQL connection stats" >> "$diag_file" + + echo "" >> "$diag_file" + echo "=== PostgreSQL Connection Details ===" >> "$diag_file" + PGPASSWORD="$DB_PASS" psql -h "${DB_HOST:-localhost}" -U "$DB_USER" -d "$DB_NAME" -c " + SELECT + pid, + usename, + application_name, + client_addr, + state, + query_start, + state_change + FROM pg_stat_activity + WHERE datname = '$DB_NAME' + ORDER BY query_start DESC + LIMIT 20; + " >> "$diag_file" 2>&1 || echo "Could not get connection details" >> "$diag_file" + else + echo "Database credentials not available" >> "$diag_file" + fi + + echo "" >> "$diag_file" + echo "=== Redis Connections ===" >> "$diag_file" + + # Get Redis connection count + if [ -n "$REDIS_USER" ] && [ -n "$REDIS_PASSWORD" ]; then + redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" --user "$REDIS_USER" --pass "$REDIS_PASSWORD" --no-auth-warning -n "${REDIS_DB:-0}" INFO clients >> "$diag_file" 2>&1 || echo "Could not get Redis connection info" >> "$diag_file" + elif [ -n "$REDIS_PASSWORD" ]; then + redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" -a "$REDIS_PASSWORD" --no-auth-warning -n "${REDIS_DB:-0}" INFO clients >> "$diag_file" 2>&1 || echo "Could not get Redis connection info" >> "$diag_file" + fi + + cat >> "$diag_file" << EOF + +=== Firewall Status (UFW) === +$(ufw status 2>/dev/null || echo "UFW not available") + +=== Firewall Status (iptables) === +$(iptables -L -n | head -50 2>/dev/null || echo "iptables not available") + +=================================================== +EOF + + # ======================================== + # 10. Process Information + # ======================================== + print_info "Collecting process information..." + + cat >> "$diag_file" << EOF +=================================================== +Process Information +=================================================== + +=== PatchMon Node Processes === +$(ps aux | grep -E "node.*$instance_dir|PID" | grep -v grep || echo "No processes found") + +=== Top Processes (CPU) === +$(ps aux --sort=-%cpu | head -15) + +=== Top Processes (Memory) === +$(ps aux --sort=-%mem | head -15) + +=================================================== +EOF + + # ======================================== + # 11. SSL Certificate Information + # ======================================== + print_info "Collecting SSL certificate information..." + + cat >> "$diag_file" << EOF +=================================================== +SSL Certificate Information +=================================================== + +=== Certbot Certificates === +$(certbot certificates 2>/dev/null || echo "Certbot not available or no certificates") + +=== SSL Certificate Files === +$(ls -lh /etc/letsencrypt/live/$instance_name/ 2>/dev/null || echo "No SSL certificates found for $instance_name") + +=================================================== +EOF + + # ======================================== + # 12. Recent System Logs + # ======================================== + print_info "Collecting recent system logs..." + + journalctl -n 200 --no-pager >> "$diag_file" 2>&1 || \ + echo "Could not retrieve system logs" >> "$diag_file" + + # ======================================== + # 13. Installation Log (if exists) + # ======================================== + print_info "Collecting installation log..." + + echo "" >> "$diag_file" + echo "=== Installation Log (last 200 lines) ===" >> "$diag_file" + if [ -f "$instance_dir/patchmon-install.log" ]; then + tail -200 "$instance_dir/patchmon-install.log" >> "$diag_file" 2>&1 + else + echo "No installation log found" >> "$diag_file" + fi + echo "" >> "$diag_file" + + # ======================================== + # 14. Node.js and npm Information + # ======================================== + print_info "Collecting Node.js information..." + + cat >> "$diag_file" << EOF +=================================================== +Node.js and npm Information +=================================================== + +=== Node.js Version === +$(node --version 2>/dev/null || echo "Node.js not found") + +=== npm Version === +$(npm --version 2>/dev/null || echo "npm not found") + +=== Backend Dependencies === +$(cd "$instance_dir/backend" && npm list --depth=0 2>/dev/null || echo "Could not list backend dependencies") + +=================================================== +EOF + + # ======================================== + # Finalize diagnostics file + # ======================================== + print_info "Finalizing diagnostics file..." + + echo "" >> "$diag_file" + echo "====================================================" >> "$diag_file" + echo "END OF DIAGNOSTICS REPORT" >> "$diag_file" + echo "====================================================" >> "$diag_file" + echo "" >> "$diag_file" + echo "IMPORTANT: Sensitive Information" >> "$diag_file" + echo "Passwords, secrets, and tokens have been sanitized" >> "$diag_file" + echo "and replaced with [REDACTED]. However, please review" >> "$diag_file" + echo "before sharing to ensure no sensitive data is included." >> "$diag_file" + echo "====================================================" >> "$diag_file" + + print_status "Diagnostics file created: $diag_file" + + # ======================================== + # Display summary + # ======================================== + echo "" + echo -e "${GREEN}====================================================${NC}" + echo -e "${GREEN} Diagnostics Collection Complete!${NC}" + echo -e "${GREEN}====================================================${NC}" + echo "" + + # Get service statuses and file size + local service_status=$(systemctl is-active "$instance_name" 2>/dev/null || echo "unknown") + local nginx_status=$(systemctl is-active nginx 2>/dev/null || echo "unknown") + local postgres_status=$(systemctl is-active postgresql 2>/dev/null || echo "unknown") + local redis_status=$(systemctl is-active redis-server 2>/dev/null || echo "unknown") + local file_size=$(du -h "$diag_file" 2>/dev/null | cut -f1 || echo "unknown") + local line_count=$(wc -l < "$diag_file" 2>/dev/null || echo "unknown") + + # Get connection counts for summary + local backend_port=$(grep '^PORT=' "$instance_dir/backend/.env" 2>/dev/null | cut -d'=' -f2 | tr -d ' ' || echo "3000") + local backend_conn_count=$(ss -tn 2>/dev/null | grep ":$backend_port" | wc -l || echo "0") + + local db_conn_count="N/A" + if [ -n "$DB_PASS" ] && [ -n "$DB_USER" ] && [ -n "$DB_NAME" ]; then + db_conn_count=$(PGPASSWORD="$DB_PASS" psql -h "${DB_HOST:-localhost}" -U "$DB_USER" -d "$DB_NAME" -t -A -c "SELECT count(*) FROM pg_stat_activity WHERE datname = '$DB_NAME';" 2>/dev/null || echo "N/A") + fi + + local redis_conn_count="N/A" + if [ -n "$REDIS_USER" ] && [ -n "$REDIS_PASSWORD" ]; then + redis_conn_count=$(redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" --user "$REDIS_USER" --pass "$REDIS_PASSWORD" --no-auth-warning INFO clients 2>/dev/null | grep "connected_clients:" | cut -d':' -f2 | tr -d '\r' || echo "N/A") + elif [ -n "$REDIS_PASSWORD" ]; then + redis_conn_count=$(redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" -a "$REDIS_PASSWORD" --no-auth-warning INFO clients 2>/dev/null | grep "connected_clients:" | cut -d':' -f2 | tr -d '\r' || echo "N/A") + fi + + # Compact, copyable summary + echo -e "${BLUE}═══════════════════════════════════════════════════${NC}" + echo -e "${BLUE}DIAGNOSTICS SUMMARY (copy-paste friendly)${NC}" + echo -e "${BLUE}═══════════════════════════════════════════════════${NC}" + echo "Instance: $instance_name" + echo "File: $diag_file" + echo "Size: $file_size ($line_count lines)" + echo "Generated: $(date '+%Y-%m-%d %H:%M:%S')" + echo "---" + echo "Service Status: $service_status" + echo "Nginx Status: $nginx_status" + echo "PostgreSQL: $postgres_status" + echo "Redis: $redis_status" + echo "---" + echo "Backend Port: $backend_port (Active Connections: $backend_conn_count)" + echo "Database Connections: $db_conn_count" + echo "Redis Connections: $redis_conn_count" + echo "---" + echo "View: cat $(basename "$diag_file")" + echo "Or: less $(basename "$diag_file")" + echo "Share: Send $(basename "$diag_file") to support" + echo -e "${BLUE}═══════════════════════════════════════════════════${NC}" + echo "" + print_warning "Review file before sharing - sensitive data has been sanitized" + echo "" + + print_success "Done!" +} + +# Run main function +main "$@" + diff --git a/tools/fix-migrations.sh b/tools/fix-migrations.sh new file mode 100755 index 0000000..dc36bab --- /dev/null +++ b/tools/fix-migrations.sh @@ -0,0 +1,286 @@ +#!/bin/bash +# PatchMon Migration Fixer +# Standalone script to detect and fix failed Prisma migrations +# Usage: sudo bash fix-migrations.sh [instance-name] + +set -e + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Print functions +print_status() { + echo -e "${GREEN}✅ $1${NC}" +} + +print_info() { + echo -e "${BLUE}ℹ️ $1${NC}" +} + +print_error() { + echo -e "${RED}❌ $1${NC}" +} + +print_warning() { + echo -e "${YELLOW}⚠️ $1${NC}" +} + +# Check if running as root +if [[ $EUID -ne 0 ]]; then + print_error "This script must be run as root" + print_info "Please run: sudo bash $0" + exit 1 +fi + +# Function to detect PatchMon installations +detect_installations() { + local installations=() + + if [ -d "/opt" ]; then + for dir in /opt/*/; do + local dirname=$(basename "$dir") + # Skip backup directories + if [[ "$dirname" =~ \.backup\. ]]; then + continue + fi + # Check if it's a PatchMon installation + if [ -f "$dir/backend/package.json" ] && grep -q "patchmon" "$dir/backend/package.json" 2>/dev/null; then + installations+=("$dirname") + fi + done + fi + + echo "${installations[@]}" +} + +# Function to select installation +select_installation() { + local installations=($(detect_installations)) + + if [ ${#installations[@]} -eq 0 ]; then + print_error "No PatchMon installations found in /opt" + exit 1 + fi + + if [ -n "$1" ]; then + # Use provided instance name + if [[ " ${installations[@]} " =~ " $1 " ]]; then + echo "$1" + return 0 + else + print_error "Instance '$1' not found" + exit 1 + fi + fi + + print_info "Found ${#installations[@]} installation(s):" + echo "" + + local i=1 + declare -A install_map + for install in "${installations[@]}"; do + printf "%2d. %s\n" "$i" "$install" + install_map[$i]="$install" + i=$((i + 1)) + done + + echo "" + echo -n -e "${BLUE}Select installation number [1]: ${NC}" + read -r selection + + selection=${selection:-1} + + if [[ "$selection" =~ ^[0-9]+$ ]] && [ -n "${install_map[$selection]}" ]; then + echo "${install_map[$selection]}" + return 0 + else + print_error "Invalid selection" + exit 1 + fi +} + +# Function to check and fix failed migrations +fix_failed_migrations() { + local db_name="$1" + local db_user="$2" + local db_pass="$3" + local db_host="${4:-localhost}" + + print_info "Checking for failed migrations in database..." + + # Query for failed migrations + local failed_migrations + failed_migrations=$(PGPASSWORD="$db_pass" psql -h "$db_host" -U "$db_user" -d "$db_name" -t -A -c \ + "SELECT migration_name FROM _prisma_migrations WHERE finished_at IS NULL AND started_at IS NOT NULL;" 2>/dev/null || echo "") + + if [ -z "$failed_migrations" ]; then + print_status "No failed migrations found" + return 0 + fi + + print_warning "Found failed migration(s):" + echo "$failed_migrations" | while read -r migration; do + [ -n "$migration" ] && print_warning " - $migration" + done + echo "" + + print_info "What would you like to do?" + echo " 1. Clean and retry (delete failed records and re-run migration)" + echo " 2. Mark as completed (if schema changes are already applied)" + echo " 3. Show migration details only" + echo " 4. Cancel" + echo "" + echo -n -e "${BLUE}Select option [1]: ${NC}" + read -r option + + option=${option:-1} + + case $option in + 1) + print_info "Cleaning failed migrations and preparing for retry..." + echo "$failed_migrations" | while read -r migration; do + if [ -n "$migration" ]; then + print_info "Processing: $migration" + + # Mark as rolled back + PGPASSWORD="$db_pass" psql -h "$db_host" -U "$db_user" -d "$db_name" -c \ + "UPDATE _prisma_migrations SET rolled_back_at = NOW() WHERE migration_name = '$migration' AND finished_at IS NULL;" >/dev/null 2>&1 + + # Delete the failed record + PGPASSWORD="$db_pass" psql -h "$db_host" -U "$db_user" -d "$db_name" -c \ + "DELETE FROM _prisma_migrations WHERE migration_name = '$migration' AND finished_at IS NULL;" >/dev/null 2>&1 + + print_status "Cleared: $migration" + fi + done + print_status "Failed migrations cleared - ready to retry" + return 0 + ;; + 2) + print_info "Marking migrations as completed..." + echo "$failed_migrations" | while read -r migration; do + if [ -n "$migration" ]; then + print_info "Marking as complete: $migration" + + PGPASSWORD="$db_pass" psql -h "$db_host" -U "$db_user" -d "$db_name" -c \ + "UPDATE _prisma_migrations SET finished_at = NOW(), logs = 'Manually resolved by fix-migrations.sh' WHERE migration_name = '$migration' AND finished_at IS NULL;" >/dev/null 2>&1 + + print_status "Marked complete: $migration" + fi + done + print_status "All migrations marked as completed" + return 0 + ;; + 3) + print_info "Migration details:" + PGPASSWORD="$db_pass" psql -h "$db_host" -U "$db_user" -d "$db_name" -c \ + "SELECT migration_name, started_at, finished_at, rolled_back_at, logs FROM _prisma_migrations WHERE finished_at IS NULL AND started_at IS NOT NULL;" + return 0 + ;; + 4) + print_info "Cancelled" + return 1 + ;; + *) + print_error "Invalid option" + return 1 + ;; + esac +} + +# Main script +main() { + echo -e "${BLUE}====================================================${NC}" + echo -e "${BLUE} PatchMon Migration Fixer${NC}" + echo -e "${BLUE}====================================================${NC}" + echo "" + + # Select instance + instance_name=$(select_installation "$1") + instance_dir="/opt/$instance_name" + + print_info "Selected instance: $instance_name" + print_info "Directory: $instance_dir" + echo "" + + # Load .env to get database credentials + if [ ! -f "$instance_dir/backend/.env" ]; then + print_error "Cannot find .env file at $instance_dir/backend/.env" + exit 1 + fi + + # Source .env + set -a + source "$instance_dir/backend/.env" + set +a + + # Parse DATABASE_URL + if [ -z "$DATABASE_URL" ]; then + print_error "DATABASE_URL not found in .env file" + exit 1 + fi + + DB_USER=$(echo "$DATABASE_URL" | sed -n 's|postgresql://\([^:]*\):.*|\1|p') + DB_PASS=$(echo "$DATABASE_URL" | sed -n 's|postgresql://[^:]*:\([^@]*\)@.*|\1|p') + DB_HOST=$(echo "$DATABASE_URL" | sed -n 's|.*@\([^:]*\):.*|\1|p') + DB_NAME=$(echo "$DATABASE_URL" | sed -n 's|.*/\([^?]*\).*|\1|p') + + print_info "Database: $DB_NAME" + print_info "User: $DB_USER" + print_info "Host: $DB_HOST" + echo "" + + # Test database connection + print_info "Testing database connection..." + if ! PGPASSWORD="$DB_PASS" psql -h "$DB_HOST" -U "$DB_USER" -d "$DB_NAME" -c "SELECT 1;" >/dev/null 2>&1; then + print_error "Cannot connect to database" + exit 1 + fi + print_status "Database connection successful" + echo "" + + # Check Prisma migration status + print_info "Checking Prisma migration status..." + cd "$instance_dir/backend" + + echo "" + echo -e "${YELLOW}=== Prisma Migration Status ===${NC}" + npx prisma migrate status 2>&1 || true + echo -e "${YELLOW}==============================${NC}" + echo "" + + # Check for failed migrations + fix_failed_migrations "$DB_NAME" "$DB_USER" "$DB_PASS" "$DB_HOST" + + # Ask if user wants to run migrations now + echo "" + echo -n -e "${BLUE}Do you want to run 'npx prisma migrate deploy' now? [y/N]: ${NC}" + read -r run_migrate + + if [[ "$run_migrate" =~ ^[Yy] ]]; then + print_info "Running migrations..." + cd "$instance_dir/backend" + + if npx prisma migrate deploy; then + print_status "Migrations completed successfully!" + else + print_error "Migration failed" + print_info "You may need to run this script again or investigate further" + exit 1 + fi + else + print_info "Skipped migration deployment" + print_info "Run manually: cd $instance_dir/backend && npx prisma migrate deploy" + fi + + echo "" + print_status "Done!" +} + +# Run main function +main "$@" +