mirror of
https://github.com/9technologygroup/patchmon.net.git
synced 2026-01-08 22:19:35 -06:00
Added Diagnostics scripts and improved setup with more redis db server handling
This commit is contained in:
787
setup.sh
787
setup.sh
@@ -707,6 +707,10 @@ configure_redis() {
|
||||
chown redis:redis /etc/redis/users.acl
|
||||
chmod 640 /etc/redis/users.acl
|
||||
print_status "Created Redis ACL file"
|
||||
else
|
||||
# Backup existing ACL file
|
||||
cp /etc/redis/users.acl /etc/redis/users.acl.backup.$(date +%Y%m%d_%H%M%S) 2>/dev/null || true
|
||||
print_info "Backed up existing ACL file"
|
||||
fi
|
||||
|
||||
# Configure ACL file in redis.conf
|
||||
@@ -727,8 +731,14 @@ configure_redis() {
|
||||
print_status "Removed user definitions from redis.conf"
|
||||
fi
|
||||
|
||||
# Create admin user in ACL file if it doesn't exist
|
||||
if ! grep -q "^user admin" /etc/redis/users.acl; then
|
||||
# Create or update admin user in ACL file
|
||||
if grep -q "^user admin" /etc/redis/users.acl; then
|
||||
print_info "Admin user already exists in ACL, updating password..."
|
||||
# Remove old admin line and add new one
|
||||
sed -i '/^user admin/d' /etc/redis/users.acl
|
||||
echo "user admin on sanitize-payload >$REDIS_PASSWORD ~* &* +@all" >> /etc/redis/users.acl
|
||||
print_status "Updated admin user password"
|
||||
else
|
||||
echo "user admin on sanitize-payload >$REDIS_PASSWORD ~* &* +@all" >> /etc/redis/users.acl
|
||||
print_status "Added admin user to ACL file"
|
||||
fi
|
||||
@@ -737,65 +747,126 @@ configure_redis() {
|
||||
print_info "Restarting Redis to apply ACL configuration..."
|
||||
systemctl restart redis-server
|
||||
|
||||
# Wait for Redis to start
|
||||
sleep 3
|
||||
# Wait for Redis to start with retry logic
|
||||
sleep 5
|
||||
|
||||
# Test admin connection
|
||||
if ! redis-cli -h 127.0.0.1 -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ping > /dev/null 2>&1; then
|
||||
print_error "Failed to configure Redis ACL authentication"
|
||||
return 1
|
||||
fi
|
||||
# Test admin connection with retries
|
||||
local max_retries=3
|
||||
local retry=0
|
||||
local admin_works=false
|
||||
|
||||
print_status "Redis ACL authentication configuration successful"
|
||||
|
||||
# Create Redis user with ACL
|
||||
print_info "Creating Redis ACL user: $REDIS_USER"
|
||||
|
||||
# Create user with password and permissions - capture output for error handling
|
||||
local acl_result
|
||||
acl_result=$(redis-cli -h 127.0.0.1 -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ACL SETUSER "$REDIS_USER" on ">${REDIS_USER_PASSWORD}" ~* +@all 2>&1)
|
||||
|
||||
if [ "$acl_result" = "OK" ]; then
|
||||
print_status "Redis user '$REDIS_USER' created successfully"
|
||||
|
||||
# Save ACL users to file to persist across restarts
|
||||
local save_result
|
||||
save_result=$(redis-cli -h 127.0.0.1 -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ACL SAVE 2>&1)
|
||||
|
||||
if [ "$save_result" = "OK" ]; then
|
||||
print_status "Redis ACL users saved to file"
|
||||
else
|
||||
print_warning "Failed to save ACL users to file: $save_result"
|
||||
while [ $retry -lt $max_retries ]; do
|
||||
if redis-cli -h 127.0.0.1 -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ping > /dev/null 2>&1; then
|
||||
admin_works=true
|
||||
break
|
||||
fi
|
||||
print_info "Waiting for Redis to be ready... (attempt $((retry + 1))/$max_retries)"
|
||||
sleep 2
|
||||
retry=$((retry + 1))
|
||||
done
|
||||
|
||||
if [ "$admin_works" = false ]; then
|
||||
print_error "Failed to verify admin connection after Redis restart"
|
||||
print_error "Redis ACL configuration may have issues"
|
||||
|
||||
# Verify user was actually created
|
||||
local verify_result
|
||||
verify_result=$(redis-cli -h 127.0.0.1 -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ACL GETUSER "$REDIS_USER" 2>&1)
|
||||
# Try to fix by disabling ACL and using requirepass instead
|
||||
print_warning "Attempting fallback: using requirepass instead of ACL..."
|
||||
sed -i 's/^aclfile/# aclfile/' /etc/redis/redis.conf
|
||||
sed -i "s/^# requirepass .*/requirepass $REDIS_PASSWORD/" /etc/redis/redis.conf
|
||||
if ! grep -q "^requirepass" /etc/redis/redis.conf; then
|
||||
echo "requirepass $REDIS_PASSWORD" >> /etc/redis/redis.conf
|
||||
fi
|
||||
systemctl restart redis-server
|
||||
sleep 3
|
||||
|
||||
if [ "$verify_result" = "(nil)" ]; then
|
||||
print_error "User creation reported OK but user does not exist"
|
||||
# Test requirepass
|
||||
if redis-cli -h 127.0.0.1 -p 6379 -a "$REDIS_PASSWORD" --no-auth-warning ping > /dev/null 2>&1; then
|
||||
print_status "Fallback successful - using requirepass authentication"
|
||||
# For requirepass mode, we'll set REDIS_USER empty later
|
||||
print_info "Note: Using legacy requirepass mode instead of ACL"
|
||||
else
|
||||
print_error "Fallback also failed - Redis authentication is broken"
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
print_error "Failed to create Redis user: $acl_result"
|
||||
return 1
|
||||
print_status "Redis ACL authentication configuration successful"
|
||||
fi
|
||||
|
||||
# Test user connection
|
||||
print_info "Testing Redis user connection..."
|
||||
if redis-cli -h 127.0.0.1 -p 6379 --user "$REDIS_USER" --pass "$REDIS_USER_PASSWORD" --no-auth-warning -n "$REDIS_DB" ping > /dev/null 2>&1; then
|
||||
print_status "Redis user connection test successful"
|
||||
# Create Redis user with ACL (only if admin_works, meaning we're using ACL mode)
|
||||
if [ "$admin_works" = true ]; then
|
||||
print_info "Creating Redis ACL user: $REDIS_USER"
|
||||
|
||||
# Create user with password and permissions - capture output for error handling
|
||||
local acl_result
|
||||
acl_result=$(redis-cli -h 127.0.0.1 -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ACL SETUSER "$REDIS_USER" on ">${REDIS_USER_PASSWORD}" ~* +@all 2>&1)
|
||||
|
||||
if [ "$acl_result" = "OK" ]; then
|
||||
print_status "Redis user '$REDIS_USER' created successfully"
|
||||
|
||||
# Save ACL users to file to persist across restarts
|
||||
local save_result
|
||||
save_result=$(redis-cli -h 127.0.0.1 -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ACL SAVE 2>&1)
|
||||
|
||||
if [ "$save_result" = "OK" ]; then
|
||||
print_status "Redis ACL users saved to file"
|
||||
else
|
||||
print_warning "Failed to save ACL users to file: $save_result"
|
||||
fi
|
||||
|
||||
# Verify user was actually created
|
||||
local verify_result
|
||||
verify_result=$(redis-cli -h 127.0.0.1 -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ACL GETUSER "$REDIS_USER" 2>&1)
|
||||
|
||||
if [ "$verify_result" = "(nil)" ]; then
|
||||
print_error "User creation reported OK but user does not exist"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Test user connection
|
||||
print_info "Testing Redis user connection..."
|
||||
if redis-cli -h 127.0.0.1 -p 6379 --user "$REDIS_USER" --pass "$REDIS_USER_PASSWORD" --no-auth-warning -n "$REDIS_DB" ping > /dev/null 2>&1; then
|
||||
print_status "Redis user connection test successful"
|
||||
else
|
||||
print_error "Redis user connection test failed"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Mark the selected database as in-use
|
||||
redis-cli -h 127.0.0.1 -p 6379 --user "$REDIS_USER" --pass "$REDIS_USER_PASSWORD" --no-auth-warning -n "$REDIS_DB" SET "patchmon:initialized" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > /dev/null
|
||||
print_status "Marked Redis database $REDIS_DB as in-use"
|
||||
else
|
||||
print_error "Failed to create Redis user: $acl_result"
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
print_error "Redis user connection test failed"
|
||||
return 1
|
||||
# Using requirepass mode - no per-user ACL
|
||||
print_info "Using requirepass mode - testing connection..."
|
||||
|
||||
# For requirepass, we don't use username, just password
|
||||
if redis-cli -h 127.0.0.1 -p 6379 -a "$REDIS_PASSWORD" --no-auth-warning -n "$REDIS_DB" ping > /dev/null 2>&1; then
|
||||
print_status "Redis requirepass connection test successful"
|
||||
|
||||
# Mark the selected database as in-use
|
||||
redis-cli -h 127.0.0.1 -p 6379 -a "$REDIS_PASSWORD" --no-auth-warning -n "$REDIS_DB" SET "patchmon:initialized" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > /dev/null
|
||||
print_status "Marked Redis database $REDIS_DB as in-use"
|
||||
|
||||
# Set REDIS_USER to empty for requirepass mode
|
||||
REDIS_USER=""
|
||||
REDIS_USER_PASSWORD="$REDIS_PASSWORD"
|
||||
else
|
||||
print_error "Redis requirepass connection test failed"
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Mark the selected database as in-use
|
||||
redis-cli -h 127.0.0.1 -p 6379 --user "$REDIS_USER" --pass "$REDIS_USER_PASSWORD" --no-auth-warning -n "$REDIS_DB" SET "patchmon:initialized" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > /dev/null
|
||||
print_status "Marked Redis database $REDIS_DB as in-use"
|
||||
|
||||
# Note: Redis credentials will be written to .env by create_env_files() function
|
||||
print_status "Redis user '$REDIS_USER' configured successfully"
|
||||
print_status "Redis configured successfully"
|
||||
|
||||
if [ -n "$REDIS_USER" ]; then
|
||||
print_info "Redis Mode: ACL with user '$REDIS_USER'"
|
||||
else
|
||||
print_info "Redis Mode: requirepass (legacy single-password auth)"
|
||||
fi
|
||||
print_info "Redis credentials will be saved to backend/.env"
|
||||
|
||||
return 0
|
||||
@@ -1116,16 +1187,121 @@ EOF
|
||||
print_status "Environment files created"
|
||||
}
|
||||
|
||||
# Run database migrations
|
||||
# Check and fix failed Prisma migrations
|
||||
fix_failed_migrations() {
|
||||
local db_name="$1"
|
||||
local db_user="$2"
|
||||
local db_pass="$3"
|
||||
local db_host="${4:-localhost}"
|
||||
local max_retries=3
|
||||
|
||||
print_info "Checking for failed migrations in database..."
|
||||
|
||||
# Query for failed migrations (where started_at is set but finished_at is NULL)
|
||||
local failed_migrations
|
||||
failed_migrations=$(PGPASSWORD="$db_pass" psql -h "$db_host" -U "$db_user" -d "$db_name" -t -A -c \
|
||||
"SELECT migration_name FROM _prisma_migrations WHERE finished_at IS NULL AND started_at IS NOT NULL;" 2>/dev/null || echo "")
|
||||
|
||||
if [ -z "$failed_migrations" ]; then
|
||||
print_status "No failed migrations found"
|
||||
return 0
|
||||
fi
|
||||
|
||||
print_warning "Found failed migration(s):"
|
||||
echo "$failed_migrations" | while read -r migration; do
|
||||
[ -n "$migration" ] && print_warning " - $migration"
|
||||
done
|
||||
|
||||
print_info "Attempting to resolve failed migrations..."
|
||||
|
||||
# For each failed migration, mark it as rolled back and remove it
|
||||
echo "$failed_migrations" | while read -r migration; do
|
||||
if [ -n "$migration" ]; then
|
||||
print_info "Processing failed migration: $migration"
|
||||
|
||||
# Mark the migration as rolled back
|
||||
PGPASSWORD="$db_pass" psql -h "$db_host" -U "$db_user" -d "$db_name" -c \
|
||||
"UPDATE _prisma_migrations SET rolled_back_at = NOW() WHERE migration_name = '$migration' AND finished_at IS NULL;" >/dev/null 2>&1
|
||||
|
||||
# Delete the failed migration record to allow retry
|
||||
PGPASSWORD="$db_pass" psql -h "$db_host" -U "$db_user" -d "$db_name" -c \
|
||||
"DELETE FROM _prisma_migrations WHERE migration_name = '$migration' AND finished_at IS NULL;" >/dev/null 2>&1
|
||||
|
||||
print_status "Marked migration '$migration' for retry"
|
||||
fi
|
||||
done
|
||||
|
||||
print_status "Failed migrations have been cleared for retry"
|
||||
return 0
|
||||
}
|
||||
|
||||
# Run database migrations with self-healing
|
||||
run_migrations() {
|
||||
print_info "Running database migrations as user $INSTANCE_USER..."
|
||||
|
||||
cd "$APP_DIR/backend"
|
||||
# Suppress Prisma CLI output (still logged to install log via tee)
|
||||
run_as_user "$INSTANCE_USER" "cd $APP_DIR/backend && npx prisma migrate deploy" >/dev/null 2>&1 || true
|
||||
|
||||
local max_attempts=3
|
||||
local attempt=1
|
||||
local migration_success=false
|
||||
|
||||
while [ $attempt -le $max_attempts ]; do
|
||||
print_info "Migration attempt $attempt of $max_attempts..."
|
||||
|
||||
# Try to run migrations
|
||||
local migrate_output
|
||||
migrate_output=$(run_as_user "$INSTANCE_USER" "cd $APP_DIR/backend && npx prisma migrate deploy 2>&1" || echo "MIGRATION_FAILED")
|
||||
|
||||
# Check if migration succeeded
|
||||
if ! echo "$migrate_output" | grep -q "MIGRATION_FAILED\|Error:\|P3009"; then
|
||||
print_status "Migrations completed successfully"
|
||||
migration_success=true
|
||||
break
|
||||
fi
|
||||
|
||||
# Check specifically for P3009 (failed migrations found)
|
||||
if echo "$migrate_output" | grep -q "P3009\|migrate found failed migrations"; then
|
||||
print_warning "Detected failed migrations (P3009 error)"
|
||||
|
||||
# Extract the failed migration name if possible
|
||||
local failed_migration
|
||||
failed_migration=$(echo "$migrate_output" | grep -oP "The \`\K[^\`]+" | head -1 || echo "")
|
||||
|
||||
if [ -n "$failed_migration" ]; then
|
||||
print_info "Failed migration identified: $failed_migration"
|
||||
fi
|
||||
|
||||
# Attempt to fix failed migrations
|
||||
print_info "Attempting to self-heal migration issues..."
|
||||
if fix_failed_migrations "$DB_NAME" "$DB_USER" "$DB_PASS" "localhost"; then
|
||||
print_status "Migration issues resolved, retrying..."
|
||||
attempt=$((attempt + 1))
|
||||
sleep 2
|
||||
continue
|
||||
else
|
||||
print_error "Failed to resolve migration issues"
|
||||
break
|
||||
fi
|
||||
else
|
||||
# Other migration error
|
||||
print_error "Migration failed with error:"
|
||||
echo "$migrate_output" | grep -A 5 "Error:"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$migration_success" = false ]; then
|
||||
print_error "Migrations failed after $max_attempts attempts"
|
||||
print_info "You may need to manually resolve migration issues"
|
||||
print_info "Check migrations: cd $APP_DIR/backend && npx prisma migrate status"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Generate Prisma client
|
||||
run_as_user "$INSTANCE_USER" "cd $APP_DIR/backend && npx prisma generate" >/dev/null 2>&1 || true
|
||||
|
||||
print_status "Database migrations completed as $INSTANCE_USER"
|
||||
return 0
|
||||
}
|
||||
|
||||
# Admin account creation removed - handled by application's first-time setup
|
||||
@@ -1462,7 +1638,60 @@ start_services() {
|
||||
print_status "PatchMon service started successfully"
|
||||
else
|
||||
print_error "Failed to start PatchMon service"
|
||||
systemctl status "$SERVICE_NAME"
|
||||
echo ""
|
||||
|
||||
# Show last 25 lines of service logs for debugging
|
||||
print_warning "=== Last 25 lines of service logs ==="
|
||||
journalctl -u "$SERVICE_NAME" -n 25 --no-pager || true
|
||||
print_warning "==================================="
|
||||
echo ""
|
||||
|
||||
# Check for specific error patterns
|
||||
local logs=$(journalctl -u "$SERVICE_NAME" -n 50 --no-pager 2>/dev/null || echo "")
|
||||
|
||||
if echo "$logs" | grep -q "WRONGPASS\|NOAUTH"; then
|
||||
print_error "❌ Detected Redis authentication error!"
|
||||
print_info "The service cannot authenticate with Redis."
|
||||
echo ""
|
||||
print_info "Current Redis configuration in .env:"
|
||||
grep "^REDIS_" "$APP_DIR/backend/.env" || true
|
||||
echo ""
|
||||
print_info "Debug steps:"
|
||||
print_info " 1. Check Redis is running:"
|
||||
print_info " systemctl status redis-server"
|
||||
echo ""
|
||||
print_info " 2. Check Redis ACL users:"
|
||||
print_info " redis-cli ACL LIST"
|
||||
echo ""
|
||||
print_info " 3. Test Redis connection:"
|
||||
local test_user=$(grep "^REDIS_USER=" "$APP_DIR/backend/.env" | cut -d'=' -f2)
|
||||
local test_pass=$(grep "^REDIS_PASSWORD=" "$APP_DIR/backend/.env" | cut -d'=' -f2)
|
||||
local test_db=$(grep "^REDIS_DB=" "$APP_DIR/backend/.env" | cut -d'=' -f2)
|
||||
print_info " redis-cli --user $test_user --pass $test_pass -n ${test_db:-0} ping"
|
||||
echo ""
|
||||
print_info " 4. Check Redis configuration files:"
|
||||
print_info " cat /etc/redis/redis.conf | grep aclfile"
|
||||
print_info " cat /etc/redis/users.acl"
|
||||
echo ""
|
||||
elif echo "$logs" | grep -q "ECONNREFUSED.*postgresql\|Connection refused.*5432"; then
|
||||
print_error "❌ Detected PostgreSQL connection error!"
|
||||
print_info "Check if PostgreSQL is running:"
|
||||
print_info " systemctl status postgresql"
|
||||
elif echo "$logs" | grep -q "ECONNREFUSED.*redis\|Connection refused.*6379"; then
|
||||
print_error "❌ Detected Redis connection error!"
|
||||
print_info "Check if Redis is running:"
|
||||
print_info " systemctl status redis-server"
|
||||
elif echo "$logs" | grep -q "database.*does not exist"; then
|
||||
print_error "❌ Database does not exist!"
|
||||
print_info "Database: $DB_NAME"
|
||||
elif echo "$logs" | grep -q "Error:"; then
|
||||
print_error "❌ Application error detected in logs"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
print_info "View full logs: journalctl -u $SERVICE_NAME -f"
|
||||
print_info "Check service status: systemctl status $SERVICE_NAME"
|
||||
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
@@ -2012,6 +2241,65 @@ select_installation_to_update() {
|
||||
done
|
||||
}
|
||||
|
||||
# Repair/recreate Redis user with correct permissions
|
||||
repair_redis_user() {
|
||||
local redis_user="$1"
|
||||
local redis_pass="$2"
|
||||
local redis_db="${3:-0}"
|
||||
|
||||
print_info "Attempting to repair Redis user: $redis_user"
|
||||
|
||||
# Find admin password
|
||||
local admin_password=""
|
||||
if [ -f /etc/redis/users.acl ] && grep -q "^user admin" /etc/redis/users.acl; then
|
||||
admin_password=$(grep "^user admin" /etc/redis/users.acl | grep -oP '>\K[^ ]+' | head -1)
|
||||
fi
|
||||
|
||||
if [ -z "$admin_password" ]; then
|
||||
print_error "Cannot repair Redis user - no admin credentials found"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Test admin connection
|
||||
if ! redis-cli -h localhost -p 6379 --user admin --pass "$admin_password" --no-auth-warning ping >/dev/null 2>&1; then
|
||||
print_error "Admin credentials don't work - cannot repair user"
|
||||
return 1
|
||||
fi
|
||||
|
||||
print_status "Admin access confirmed"
|
||||
|
||||
# Delete existing user if it exists (and is broken)
|
||||
print_info "Removing old user configuration..."
|
||||
redis-cli -h localhost -p 6379 --user admin --pass "$admin_password" --no-auth-warning ACL DELUSER "$redis_user" >/dev/null 2>&1 || true
|
||||
|
||||
# Create user with full permissions
|
||||
print_info "Creating user with full permissions..."
|
||||
local create_result
|
||||
create_result=$(redis-cli -h localhost -p 6379 --user admin --pass "$admin_password" --no-auth-warning ACL SETUSER "$redis_user" on ">${redis_pass}" ~* +@all 2>&1)
|
||||
|
||||
if echo "$create_result" | grep -q "OK"; then
|
||||
# Save ACL
|
||||
redis-cli -h localhost -p 6379 --user admin --pass "$admin_password" --no-auth-warning ACL SAVE >/dev/null 2>&1
|
||||
|
||||
# Verify the new user works
|
||||
if redis-cli -h localhost -p 6379 --user "$redis_user" --pass "$redis_pass" --no-auth-warning -n "$redis_db" ping >/dev/null 2>&1; then
|
||||
if redis-cli -h localhost -p 6379 --user "$redis_user" --pass "$redis_pass" --no-auth-warning -n "$redis_db" info >/dev/null 2>&1; then
|
||||
print_status "Redis user repaired successfully"
|
||||
return 0
|
||||
else
|
||||
print_error "User created but INFO command still fails"
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
print_error "User created but PING command fails"
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
print_error "Failed to create user: $create_result"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Check and update Redis configuration for existing installation
|
||||
update_redis_configuration() {
|
||||
print_info "Checking Redis configuration..."
|
||||
@@ -2021,12 +2309,57 @@ update_redis_configuration() {
|
||||
if grep -q "^REDIS_HOST=" "$instance_dir/backend/.env" && \
|
||||
grep -q "^REDIS_PASSWORD=" "$instance_dir/backend/.env"; then
|
||||
print_status "Redis configuration already exists in .env"
|
||||
return 0
|
||||
|
||||
# Verify the credentials actually work
|
||||
local redis_user=$(grep "^REDIS_USER=" "$instance_dir/backend/.env" | cut -d'=' -f2 | tr -d '"')
|
||||
local redis_pass=$(grep "^REDIS_PASSWORD=" "$instance_dir/backend/.env" | cut -d'=' -f2 | tr -d '"')
|
||||
local redis_db=$(grep "^REDIS_DB=" "$instance_dir/backend/.env" | cut -d'=' -f2 | tr -d '"')
|
||||
|
||||
if [ -n "$redis_user" ] && [ -n "$redis_pass" ]; then
|
||||
# Test with username and password
|
||||
local ping_works=false
|
||||
local info_works=false
|
||||
|
||||
if redis-cli -h localhost -p 6379 --user "$redis_user" --pass "$redis_pass" --no-auth-warning -n "${redis_db:-0}" ping >/dev/null 2>&1; then
|
||||
ping_works=true
|
||||
fi
|
||||
|
||||
if redis-cli -h localhost -p 6379 --user "$redis_user" --pass "$redis_pass" --no-auth-warning -n "${redis_db:-0}" info >/dev/null 2>&1; then
|
||||
info_works=true
|
||||
fi
|
||||
|
||||
if [ "$ping_works" = true ] && [ "$info_works" = true ]; then
|
||||
print_status "Redis credentials verified with redis-cli (tested ping and info commands)"
|
||||
|
||||
# Force refresh the Redis user during updates to ensure correct ACL permissions
|
||||
# This prevents issues where redis-cli works but Node.js client doesn't
|
||||
print_info "Refreshing Redis user permissions to ensure compatibility..."
|
||||
|
||||
if repair_redis_user "$redis_user" "$redis_pass" "$redis_db"; then
|
||||
print_status "Redis user permissions refreshed successfully"
|
||||
return 0
|
||||
else
|
||||
print_warning "Could not refresh Redis user, but credentials seem to work - continuing..."
|
||||
return 0
|
||||
fi
|
||||
else
|
||||
print_warning "Redis credentials not working properly (ping: $ping_works, info: $info_works)"
|
||||
print_info "Attempting to repair Redis user..."
|
||||
|
||||
if repair_redis_user "$redis_user" "$redis_pass" "$redis_db"; then
|
||||
print_status "Redis user repaired successfully"
|
||||
return 0
|
||||
else
|
||||
print_warning "Could not repair Redis user, will reconfigure from scratch..."
|
||||
fi
|
||||
fi
|
||||
else
|
||||
print_warning "Redis credentials incomplete in .env (missing user or password)"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
print_warning "Redis configuration not found in .env - this is a legacy installation"
|
||||
print_info "Setting up Redis for this instance..."
|
||||
print_warning "Redis configuration not found or invalid in .env - setting up Redis for this instance..."
|
||||
|
||||
# Detect package manager if not already set
|
||||
if [ -z "$PKG_INSTALL" ]; then
|
||||
@@ -2054,6 +2387,39 @@ update_redis_configuration() {
|
||||
REDIS_USER="patchmon_${DB_SAFE_NAME}"
|
||||
REDIS_USER_PASSWORD=$(openssl rand -base64 32 | tr -d "=+/" | cut -c1-32)
|
||||
|
||||
# Test Redis connection to determine authentication status
|
||||
print_info "Testing Redis authentication status..."
|
||||
local needs_auth=false
|
||||
local admin_password=""
|
||||
|
||||
# Try ping without auth
|
||||
if redis-cli -h localhost -p 6379 ping >/dev/null 2>&1; then
|
||||
print_info "Redis is accessible without authentication"
|
||||
needs_auth=false
|
||||
else
|
||||
print_info "Redis requires authentication"
|
||||
needs_auth=true
|
||||
|
||||
# Try to find existing admin password from ACL file
|
||||
if [ -f /etc/redis/users.acl ] && grep -q "^user admin" /etc/redis/users.acl; then
|
||||
# Extract password from ACL file (format: >password)
|
||||
admin_password=$(grep "^user admin" /etc/redis/users.acl | grep -oP '>\K[^ ]+' | head -1)
|
||||
|
||||
if [ -n "$admin_password" ]; then
|
||||
print_info "Found existing admin credentials in ACL file"
|
||||
|
||||
# Test admin credentials
|
||||
if redis-cli -h localhost -p 6379 --user admin --pass "$admin_password" --no-auth-warning ping >/dev/null 2>&1; then
|
||||
print_status "Existing admin credentials work"
|
||||
REDIS_PASSWORD="$admin_password"
|
||||
else
|
||||
print_warning "Existing admin credentials don't work, will create new configuration"
|
||||
admin_password=""
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Find available Redis database
|
||||
print_info "Finding available Redis database..."
|
||||
local redis_db=0
|
||||
@@ -2061,9 +2427,14 @@ update_redis_configuration() {
|
||||
|
||||
while [ $redis_db -lt $max_attempts ]; do
|
||||
local key_count
|
||||
key_count=$(redis-cli -h localhost -p 6379 -n "$redis_db" DBSIZE 2>&1 | grep -v "ERR" || echo "1")
|
||||
|
||||
if [ "$key_count" = "0" ] || [ "$key_count" = "(integer) 0" ]; then
|
||||
if [ "$needs_auth" = true ] && [ -n "$admin_password" ]; then
|
||||
key_count=$(redis-cli -h localhost -p 6379 --user admin --pass "$admin_password" --no-auth-warning -n "$redis_db" DBSIZE 2>&1 | grep -oP '\d+' || echo "1")
|
||||
else
|
||||
key_count=$(redis-cli -h localhost -p 6379 -n "$redis_db" DBSIZE 2>&1 | grep -oP '\d+' || echo "1")
|
||||
fi
|
||||
|
||||
if [ "$key_count" = "0" ]; then
|
||||
print_status "Found available Redis database: $redis_db"
|
||||
REDIS_DB=$redis_db
|
||||
break
|
||||
@@ -2076,50 +2447,146 @@ update_redis_configuration() {
|
||||
REDIS_DB=0
|
||||
fi
|
||||
|
||||
# Generate admin password if not exists
|
||||
REDIS_PASSWORD=$(openssl rand -base64 32 | tr -d "=+/" | cut -c1-32)
|
||||
|
||||
# Configure Redis with ACL if needed
|
||||
print_info "Configuring Redis ACL..."
|
||||
|
||||
# Create ACL file if it doesn't exist
|
||||
if [ ! -f /etc/redis/users.acl ]; then
|
||||
touch /etc/redis/users.acl
|
||||
chown redis:redis /etc/redis/users.acl
|
||||
chmod 640 /etc/redis/users.acl
|
||||
fi
|
||||
|
||||
# Configure ACL file in redis.conf
|
||||
if ! grep -q "^aclfile" /etc/redis/redis.conf 2>/dev/null; then
|
||||
echo "aclfile /etc/redis/users.acl" >> /etc/redis/redis.conf
|
||||
fi
|
||||
|
||||
# Remove requirepass (incompatible with ACL)
|
||||
if grep -q "^requirepass" /etc/redis/redis.conf 2>/dev/null; then
|
||||
sed -i 's/^requirepass.*/# &/' /etc/redis/redis.conf
|
||||
fi
|
||||
|
||||
# Create admin user if it doesn't exist
|
||||
if ! grep -q "^user admin" /etc/redis/users.acl; then
|
||||
echo "user admin on sanitize-payload >$REDIS_PASSWORD ~* &* +@all" >> /etc/redis/users.acl
|
||||
if [ "$needs_auth" = false ]; then
|
||||
print_info "Configuring Redis ACL for security..."
|
||||
|
||||
# Generate new admin password
|
||||
REDIS_PASSWORD=$(openssl rand -base64 32 | tr -d "=+/" | cut -c1-32)
|
||||
|
||||
# Backup redis.conf
|
||||
if [ -f /etc/redis/redis.conf ]; then
|
||||
cp /etc/redis/redis.conf /etc/redis/redis.conf.backup.$(date +%Y%m%d_%H%M%S) 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Create ACL file if it doesn't exist
|
||||
if [ ! -f /etc/redis/users.acl ]; then
|
||||
touch /etc/redis/users.acl
|
||||
chown redis:redis /etc/redis/users.acl
|
||||
chmod 640 /etc/redis/users.acl
|
||||
print_status "Created Redis ACL file"
|
||||
else
|
||||
# Backup existing ACL file
|
||||
cp /etc/redis/users.acl /etc/redis/users.acl.backup.$(date +%Y%m%d_%H%M%S) 2>/dev/null || true
|
||||
print_info "Backed up existing ACL file"
|
||||
fi
|
||||
|
||||
# Configure ACL file in redis.conf
|
||||
if ! grep -q "^aclfile" /etc/redis/redis.conf 2>/dev/null; then
|
||||
echo "aclfile /etc/redis/users.acl" >> /etc/redis/redis.conf
|
||||
print_status "Added ACL file configuration to redis.conf"
|
||||
fi
|
||||
|
||||
# Remove requirepass (incompatible with ACL)
|
||||
if grep -q "^requirepass" /etc/redis/redis.conf 2>/dev/null; then
|
||||
sed -i 's/^requirepass.*/# &/' /etc/redis/redis.conf
|
||||
print_status "Disabled requirepass (incompatible with ACL)"
|
||||
fi
|
||||
|
||||
# Create or update admin user in ACL file
|
||||
if grep -q "^user admin" /etc/redis/users.acl; then
|
||||
print_info "Admin user already exists in ACL, updating password..."
|
||||
# Remove old admin line and add new one
|
||||
sed -i '/^user admin/d' /etc/redis/users.acl
|
||||
echo "user admin on sanitize-payload >$REDIS_PASSWORD ~* &* +@all" >> /etc/redis/users.acl
|
||||
print_status "Updated admin user password"
|
||||
else
|
||||
echo "user admin on sanitize-payload >$REDIS_PASSWORD ~* &* +@all" >> /etc/redis/users.acl
|
||||
print_status "Created admin user in ACL"
|
||||
fi
|
||||
|
||||
# Restart Redis to apply ACL
|
||||
print_info "Restarting Redis to apply ACL configuration..."
|
||||
systemctl restart redis-server
|
||||
sleep 3
|
||||
sleep 5
|
||||
|
||||
# Verify admin can connect
|
||||
local max_retries=3
|
||||
local retry=0
|
||||
local admin_works=false
|
||||
|
||||
while [ $retry -lt $max_retries ]; do
|
||||
if redis-cli -h localhost -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ping >/dev/null 2>&1; then
|
||||
admin_works=true
|
||||
break
|
||||
fi
|
||||
print_info "Waiting for Redis to be ready... (attempt $((retry + 1))/$max_retries)"
|
||||
sleep 2
|
||||
retry=$((retry + 1))
|
||||
done
|
||||
|
||||
if [ "$admin_works" = false ]; then
|
||||
print_error "Failed to verify admin connection after Redis restart"
|
||||
print_error "Redis ACL configuration may have issues"
|
||||
|
||||
# Try to fix by disabling ACL and using requirepass instead
|
||||
print_warning "Attempting fallback: using requirepass instead of ACL..."
|
||||
sed -i 's/^aclfile/# aclfile/' /etc/redis/redis.conf
|
||||
sed -i "s/^# requirepass .*/requirepass $REDIS_PASSWORD/" /etc/redis/redis.conf
|
||||
if ! grep -q "^requirepass" /etc/redis/redis.conf; then
|
||||
echo "requirepass $REDIS_PASSWORD" >> /etc/redis/redis.conf
|
||||
fi
|
||||
systemctl restart redis-server
|
||||
sleep 3
|
||||
|
||||
# Test requirepass
|
||||
if redis-cli -h localhost -p 6379 -a "$REDIS_PASSWORD" --no-auth-warning ping >/dev/null 2>&1; then
|
||||
print_status "Fallback successful - using requirepass authentication"
|
||||
# For requirepass, we don't use username
|
||||
REDIS_USER=""
|
||||
else
|
||||
print_error "Fallback also failed - Redis authentication is broken"
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
print_status "Redis ACL configuration successful"
|
||||
fi
|
||||
elif [ -z "$admin_password" ]; then
|
||||
print_error "Redis requires authentication but no valid admin credentials found"
|
||||
print_error "Please check /etc/redis/users.acl or /etc/redis/redis.conf"
|
||||
print_info "Manual fix: Reset Redis authentication or provide admin credentials"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Create instance-specific Redis user
|
||||
print_info "Creating Redis user: $REDIS_USER"
|
||||
|
||||
# Try to authenticate with admin (may already exist from another instance)
|
||||
local acl_result
|
||||
acl_result=$(redis-cli -h 127.0.0.1 -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ACL SETUSER "$REDIS_USER" on ">${REDIS_USER_PASSWORD}" ~* +@all 2>&1)
|
||||
|
||||
if [ "$acl_result" = "OK" ] || echo "$acl_result" | grep -q "OK"; then
|
||||
print_status "Redis user created successfully"
|
||||
redis-cli -h 127.0.0.1 -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ACL SAVE > /dev/null 2>&1
|
||||
# Create instance-specific Redis user (only if using ACL)
|
||||
if [ -n "$REDIS_USER" ]; then
|
||||
print_info "Creating Redis user: $REDIS_USER"
|
||||
|
||||
local acl_result=""
|
||||
if [ -n "$REDIS_PASSWORD" ]; then
|
||||
# Try to create user with ACL
|
||||
acl_result=$(redis-cli -h localhost -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ACL SETUSER "$REDIS_USER" on ">${REDIS_USER_PASSWORD}" ~* +@all 2>&1)
|
||||
else
|
||||
# Try without authentication (for legacy setups)
|
||||
acl_result=$(redis-cli -h localhost -p 6379 ACL SETUSER "$REDIS_USER" on ">${REDIS_USER_PASSWORD}" ~* +@all 2>&1)
|
||||
fi
|
||||
|
||||
if echo "$acl_result" | grep -q "OK"; then
|
||||
print_status "Redis user created successfully"
|
||||
|
||||
# Save ACL users
|
||||
if [ -n "$REDIS_PASSWORD" ]; then
|
||||
redis-cli -h localhost -p 6379 --user admin --pass "$REDIS_PASSWORD" --no-auth-warning ACL SAVE >/dev/null 2>&1
|
||||
else
|
||||
redis-cli -h localhost -p 6379 ACL SAVE >/dev/null 2>&1
|
||||
fi
|
||||
print_status "Redis ACL saved"
|
||||
|
||||
# Verify user can connect
|
||||
if redis-cli -h localhost -p 6379 --user "$REDIS_USER" --pass "$REDIS_USER_PASSWORD" --no-auth-warning -n "$REDIS_DB" ping >/dev/null 2>&1; then
|
||||
print_status "Redis user verified and working"
|
||||
else
|
||||
print_warning "Redis user created but verification failed"
|
||||
fi
|
||||
else
|
||||
print_error "Failed to create Redis user: $acl_result"
|
||||
print_warning "Will use requirepass mode instead of per-user ACL"
|
||||
REDIS_USER=""
|
||||
REDIS_USER_PASSWORD="$REDIS_PASSWORD"
|
||||
fi
|
||||
else
|
||||
print_warning "Could not create Redis user with ACL, trying without authentication..."
|
||||
# Fallback for systems without ACL configured
|
||||
redis-cli -h 127.0.0.1 -p 6379 CONFIG SET requirepass "$REDIS_USER_PASSWORD" > /dev/null 2>&1 || true
|
||||
print_info "Using requirepass authentication (single password, no user-specific ACL)"
|
||||
REDIS_USER_PASSWORD="$REDIS_PASSWORD"
|
||||
fi
|
||||
|
||||
# Backup existing .env
|
||||
@@ -2128,18 +2595,27 @@ update_redis_configuration() {
|
||||
|
||||
# Add Redis configuration to .env
|
||||
print_info "Adding Redis configuration to .env..."
|
||||
|
||||
# Use correct password variable
|
||||
local redis_pass_for_env="${REDIS_USER_PASSWORD:-$REDIS_PASSWORD}"
|
||||
|
||||
cat >> "$instance_dir/backend/.env" << EOF
|
||||
|
||||
# Redis Configuration (added during update)
|
||||
# Redis Configuration (added during update on $(date))
|
||||
REDIS_HOST=localhost
|
||||
REDIS_PORT=6379
|
||||
REDIS_USER=$REDIS_USER
|
||||
REDIS_PASSWORD=$REDIS_USER_PASSWORD
|
||||
REDIS_PASSWORD=$redis_pass_for_env
|
||||
REDIS_DB=$REDIS_DB
|
||||
EOF
|
||||
|
||||
print_status "Redis configuration added to .env"
|
||||
print_info "Redis User: $REDIS_USER"
|
||||
|
||||
if [ -n "$REDIS_USER" ]; then
|
||||
print_info "Redis Mode: ACL with user '$REDIS_USER'"
|
||||
else
|
||||
print_info "Redis Mode: requirepass (legacy single-password auth)"
|
||||
fi
|
||||
print_info "Redis Database: $REDIS_DB"
|
||||
|
||||
return 0
|
||||
@@ -2543,11 +3019,81 @@ update_installation() {
|
||||
print_info "Building frontend..."
|
||||
npm run build
|
||||
|
||||
# Run database migrations and generate Prisma client
|
||||
# Run database migrations with self-healing
|
||||
print_info "Running database migrations..."
|
||||
cd "$instance_dir/backend"
|
||||
|
||||
# Generate Prisma client first
|
||||
npx prisma generate
|
||||
npx prisma migrate deploy
|
||||
|
||||
local max_attempts=3
|
||||
local attempt=1
|
||||
local migration_success=false
|
||||
|
||||
while [ $attempt -le $max_attempts ]; do
|
||||
print_info "Migration attempt $attempt of $max_attempts..."
|
||||
|
||||
# Try to run migrations
|
||||
local migrate_output
|
||||
migrate_output=$(npx prisma migrate deploy 2>&1 || echo "MIGRATION_FAILED")
|
||||
|
||||
# Check if migration succeeded
|
||||
if ! echo "$migrate_output" | grep -q "MIGRATION_FAILED\|Error:\|P3009"; then
|
||||
print_status "Migrations completed successfully"
|
||||
migration_success=true
|
||||
break
|
||||
fi
|
||||
|
||||
# Check specifically for P3009 (failed migrations found)
|
||||
if echo "$migrate_output" | grep -q "P3009\|migrate found failed migrations"; then
|
||||
print_warning "Detected failed migrations (P3009 error)"
|
||||
|
||||
# Extract the failed migration name if possible
|
||||
local failed_migration
|
||||
failed_migration=$(echo "$migrate_output" | grep -oP "The \`\K[^\`]+" | head -1 || echo "")
|
||||
|
||||
if [ -n "$failed_migration" ]; then
|
||||
print_info "Failed migration identified: $failed_migration"
|
||||
fi
|
||||
|
||||
# Attempt to fix failed migrations
|
||||
print_info "Attempting to self-heal migration issues..."
|
||||
if fix_failed_migrations "$DB_NAME" "$DB_USER" "$DB_PASS" "$DB_HOST"; then
|
||||
print_status "Migration issues resolved, retrying..."
|
||||
attempt=$((attempt + 1))
|
||||
sleep 2
|
||||
continue
|
||||
else
|
||||
print_error "Failed to resolve migration issues"
|
||||
print_warning "Attempting alternative resolution method..."
|
||||
|
||||
# Alternative: Mark migration as completed if tables exist
|
||||
print_info "Checking if migration changes are already applied..."
|
||||
PGPASSWORD="$DB_PASS" psql -h "$DB_HOST" -U "$DB_USER" -d "$DB_NAME" -c \
|
||||
"UPDATE _prisma_migrations SET finished_at = NOW(), logs = 'Manually resolved by update script' WHERE migration_name = '$failed_migration' AND finished_at IS NULL;" >/dev/null 2>&1
|
||||
|
||||
attempt=$((attempt + 1))
|
||||
sleep 2
|
||||
continue
|
||||
fi
|
||||
else
|
||||
# Other migration error
|
||||
print_error "Migration failed with error:"
|
||||
echo "$migrate_output" | grep -A 10 "Error:"
|
||||
|
||||
# Show helpful information
|
||||
print_info "Migration status:"
|
||||
npx prisma migrate status 2>&1 || true
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$migration_success" = false ]; then
|
||||
print_error "Migrations failed after $max_attempts attempts"
|
||||
print_warning "The update will continue, but you may need to manually resolve migration issues"
|
||||
print_info "Check migrations: cd $instance_dir/backend && npx prisma migrate status"
|
||||
print_info "View failed migrations: PGPASSWORD=\"$DB_PASS\" psql -h \"$DB_HOST\" -U \"$DB_USER\" -d \"$DB_NAME\" -c \"SELECT * FROM _prisma_migrations WHERE finished_at IS NULL;\""
|
||||
fi
|
||||
|
||||
# Check and update Redis configuration if needed (for legacy installations)
|
||||
update_redis_configuration
|
||||
@@ -2563,7 +3109,7 @@ update_installation() {
|
||||
systemctl start "$service_name"
|
||||
|
||||
# Wait a moment and check status
|
||||
sleep 3
|
||||
sleep 5
|
||||
|
||||
if systemctl is-active --quiet "$service_name"; then
|
||||
print_success "✅ Update completed successfully!"
|
||||
@@ -2582,6 +3128,43 @@ update_installation() {
|
||||
echo ""
|
||||
else
|
||||
print_error "Service failed to start after update"
|
||||
echo ""
|
||||
|
||||
# Show last 25 lines of service logs for debugging
|
||||
print_warning "=== Last 25 lines of service logs ==="
|
||||
journalctl -u "$service_name" -n 25 --no-pager || true
|
||||
print_warning "==================================="
|
||||
echo ""
|
||||
|
||||
# Check for specific error patterns
|
||||
local logs=$(journalctl -u "$service_name" -n 50 --no-pager 2>/dev/null || echo "")
|
||||
|
||||
if echo "$logs" | grep -q "WRONGPASS\|NOAUTH"; then
|
||||
print_error "❌ Detected Redis authentication error!"
|
||||
print_info "The service cannot authenticate with Redis."
|
||||
echo ""
|
||||
print_info "Current Redis configuration in .env:"
|
||||
grep "^REDIS_" "$instance_dir/backend/.env" || true
|
||||
echo ""
|
||||
print_info "Quick fix - Try reconfiguring Redis:"
|
||||
print_info " 1. Check Redis ACL users:"
|
||||
print_info " redis-cli ACL LIST"
|
||||
echo ""
|
||||
print_info " 2. Test Redis connection with credentials from .env:"
|
||||
local test_user=$(grep "^REDIS_USER=" "$instance_dir/backend/.env" | cut -d'=' -f2)
|
||||
local test_pass=$(grep "^REDIS_PASSWORD=" "$instance_dir/backend/.env" | cut -d'=' -f2)
|
||||
local test_db=$(grep "^REDIS_DB=" "$instance_dir/backend/.env" | cut -d'=' -f2)
|
||||
print_info " redis-cli --user $test_user --pass $test_pass -n ${test_db:-0} ping"
|
||||
echo ""
|
||||
elif echo "$logs" | grep -q "ECONNREFUSED"; then
|
||||
print_error "❌ Detected connection refused error!"
|
||||
print_info "Check if required services are running:"
|
||||
print_info " systemctl status postgresql"
|
||||
print_info " systemctl status redis-server"
|
||||
elif echo "$logs" | grep -q "Error:"; then
|
||||
print_error "❌ Application error detected in logs"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
print_warning "ROLLBACK INSTRUCTIONS:"
|
||||
print_info "1. Restore code:"
|
||||
@@ -2594,7 +3177,7 @@ update_installation() {
|
||||
print_info "3. Restart service:"
|
||||
print_info " sudo systemctl start $service_name"
|
||||
echo ""
|
||||
print_info "Check logs: journalctl -u $service_name -f"
|
||||
print_info "View full logs: journalctl -u $service_name -f"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
715
tools/diagnostics.sh
Executable file
715
tools/diagnostics.sh
Executable file
@@ -0,0 +1,715 @@
|
||||
#!/bin/bash
|
||||
# PatchMon Diagnostics Collection Script
|
||||
# Collects system information, logs, and configuration for troubleshooting
|
||||
# Usage: sudo bash diagnostics.sh [instance-name]
|
||||
|
||||
# Note: Not using 'set -e' because we want to continue even if some commands fail
|
||||
set -o pipefail
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Print functions
|
||||
print_status() {
|
||||
echo -e "${GREEN}✅ $1${NC}"
|
||||
}
|
||||
|
||||
print_info() {
|
||||
echo -e "${BLUE}ℹ️ $1${NC}"
|
||||
}
|
||||
|
||||
print_error() {
|
||||
echo -e "${RED}❌ $1${NC}"
|
||||
}
|
||||
|
||||
print_warning() {
|
||||
echo -e "${YELLOW}⚠️ $1${NC}"
|
||||
}
|
||||
|
||||
print_success() {
|
||||
echo -e "${GREEN}🎉 $1${NC}"
|
||||
}
|
||||
|
||||
# Check if running as root
|
||||
if [[ $EUID -ne 0 ]]; then
|
||||
print_error "This script must be run as root"
|
||||
print_info "Please run: sudo bash $0"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Function to sanitize sensitive information
|
||||
sanitize_sensitive() {
|
||||
local input="$1"
|
||||
# Replace passwords, secrets, and tokens with [REDACTED]
|
||||
echo "$input" | \
|
||||
sed -E 's/(PASSWORD|SECRET|TOKEN|KEY|PASS)=[^"]*$/\1=[REDACTED]/gi' | \
|
||||
sed -E 's/(PASSWORD|SECRET|TOKEN|KEY|PASS)="[^"]*"/\1="[REDACTED]"/gi' | \
|
||||
sed -E 's/(password|secret|token|key|pass)": *"[^"]*"/\1": "[REDACTED]"/gi' | \
|
||||
sed -E 's/(>)[a-zA-Z0-9+\/=]{20,}/\1[REDACTED]/g' | \
|
||||
sed -E 's|postgresql://([^:]+):([^@]+)@|postgresql://\1:[REDACTED]@|g' | \
|
||||
sed -E 's|mysql://([^:]+):([^@]+)@|mysql://\1:[REDACTED]@|g' | \
|
||||
sed -E 's|mongodb://([^:]+):([^@]+)@|mongodb://\1:[REDACTED]@|g'
|
||||
}
|
||||
|
||||
# Function to detect PatchMon installations
|
||||
detect_installations() {
|
||||
local installations=()
|
||||
|
||||
if [ ! -d "/opt" ]; then
|
||||
print_error "/opt directory does not exist"
|
||||
return 1
|
||||
fi
|
||||
|
||||
for dir in /opt/*/; do
|
||||
# Skip if no directories found
|
||||
[ -d "$dir" ] || continue
|
||||
|
||||
local dirname=$(basename "$dir")
|
||||
|
||||
# Skip backup directories
|
||||
if [[ "$dirname" =~ \.backup\. ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
# Check if it's a PatchMon installation
|
||||
if [ -f "$dir/backend/package.json" ]; then
|
||||
if grep -q "patchmon" "$dir/backend/package.json" 2>/dev/null; then
|
||||
installations+=("$dirname")
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
echo "${installations[@]}"
|
||||
}
|
||||
|
||||
# Function to select installation
|
||||
select_installation() {
|
||||
local installations=($(detect_installations))
|
||||
|
||||
if [ ${#installations[@]} -eq 0 ]; then
|
||||
print_error "No PatchMon installations found in /opt" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -n "$1" ]; then
|
||||
# Use provided instance name
|
||||
if [[ " ${installations[@]} " =~ " $1 " ]]; then
|
||||
echo "$1"
|
||||
return 0
|
||||
else
|
||||
print_error "Instance '$1' not found" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Send status messages to stderr so they don't contaminate the return value
|
||||
print_info "Found ${#installations[@]} installation(s):" >&2
|
||||
echo "" >&2
|
||||
|
||||
local i=1
|
||||
declare -A install_map
|
||||
for install in "${installations[@]}"; do
|
||||
# Get service status
|
||||
local status="unknown"
|
||||
if systemctl is-active --quiet "$install" 2>/dev/null; then
|
||||
status="${GREEN}running${NC}"
|
||||
elif systemctl is-enabled --quiet "$install" 2>/dev/null; then
|
||||
status="${RED}stopped${NC}"
|
||||
fi
|
||||
|
||||
printf "%2d. %-30s (%b)\n" "$i" "$install" "$status" >&2
|
||||
install_map[$i]="$install"
|
||||
i=$((i + 1))
|
||||
done
|
||||
|
||||
echo "" >&2
|
||||
|
||||
# If only one installation, select it automatically
|
||||
if [ ${#installations[@]} -eq 1 ]; then
|
||||
print_info "Only one installation found, selecting automatically: ${installations[0]}" >&2
|
||||
echo "${installations[0]}"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Multiple installations - prompt user
|
||||
printf "${BLUE}Select installation number [1]: ${NC}" >&2
|
||||
read -r selection </dev/tty
|
||||
|
||||
selection=${selection:-1}
|
||||
|
||||
if [[ "$selection" =~ ^[0-9]+$ ]] && [ -n "${install_map[$selection]}" ]; then
|
||||
echo "${install_map[$selection]}"
|
||||
return 0
|
||||
else
|
||||
print_error "Invalid selection" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Main script
|
||||
main() {
|
||||
# Capture the directory where script is run from at the very start
|
||||
ORIGINAL_DIR=$(pwd)
|
||||
|
||||
echo -e "${BLUE}====================================================${NC}"
|
||||
echo -e "${BLUE} PatchMon Diagnostics Collection${NC}"
|
||||
echo -e "${BLUE}====================================================${NC}"
|
||||
echo ""
|
||||
|
||||
# Select instance
|
||||
instance_name=$(select_installation "$1")
|
||||
instance_dir="/opt/$instance_name"
|
||||
|
||||
print_info "Selected instance: $instance_name"
|
||||
print_info "Directory: $instance_dir"
|
||||
echo ""
|
||||
|
||||
# Create single diagnostics file in the original directory
|
||||
timestamp=$(date +%Y%m%d_%H%M%S)
|
||||
diag_file="${ORIGINAL_DIR}/patchmon_diagnostics_${instance_name}_${timestamp}.txt"
|
||||
|
||||
print_info "Collecting diagnostics to: $diag_file"
|
||||
echo ""
|
||||
|
||||
# Initialize the diagnostics file with header
|
||||
cat > "$diag_file" << EOF
|
||||
===================================================
|
||||
PatchMon Diagnostics Report
|
||||
===================================================
|
||||
Instance: $instance_name
|
||||
Generated: $(date)
|
||||
Hostname: $(hostname)
|
||||
Generated from: ${ORIGINAL_DIR}
|
||||
===================================================
|
||||
|
||||
EOF
|
||||
|
||||
# ========================================
|
||||
# 1. System Information
|
||||
# ========================================
|
||||
print_info "Collecting system information..."
|
||||
|
||||
cat >> "$diag_file" << EOF
|
||||
=== System Information ===
|
||||
OS: $(cat /etc/os-release 2>/dev/null | grep PRETTY_NAME | cut -d'"' -f2 || echo "Unknown")
|
||||
Kernel: $(uname -r)
|
||||
Uptime: $(uptime)
|
||||
|
||||
=== CPU Information ===
|
||||
$(lscpu | grep -E "Model name|CPU\(s\)|Thread|Core" || echo "Not available")
|
||||
|
||||
=== Memory Information ===
|
||||
$(free -h)
|
||||
|
||||
=== Disk Usage ===
|
||||
$(df -h | grep -E "Filesystem|/dev/|/opt")
|
||||
|
||||
=== Network Interfaces ===
|
||||
$(ip -br addr)
|
||||
|
||||
===================================================
|
||||
EOF
|
||||
|
||||
# ========================================
|
||||
# 2. PatchMon Instance Information
|
||||
# ========================================
|
||||
print_info "Collecting instance information..."
|
||||
|
||||
cat >> "$diag_file" << EOF
|
||||
|
||||
=== PatchMon Instance Information ===
|
||||
|
||||
=== Directory Structure ===
|
||||
$(ls -lah "$instance_dir" 2>/dev/null || echo "Cannot access directory")
|
||||
|
||||
=== Backend Package Info ===
|
||||
$(cat "$instance_dir/backend/package.json" 2>/dev/null | grep -E "name|version" || echo "Not found")
|
||||
|
||||
=== Frontend Package Info ===
|
||||
$(cat "$instance_dir/frontend/package.json" 2>/dev/null | grep -E "name|version" || echo "Not found")
|
||||
|
||||
=== Deployment Info ===
|
||||
$(cat "$instance_dir/deployment-info.txt" 2>/dev/null || echo "No deployment-info.txt found")
|
||||
|
||||
===================================================
|
||||
EOF
|
||||
|
||||
# ========================================
|
||||
# 3. Environment Configuration (Sanitized)
|
||||
# ========================================
|
||||
print_info "Collecting environment configuration (sanitized)..."
|
||||
|
||||
echo "" >> "$diag_file"
|
||||
echo "=== Backend Environment Configuration (Sanitized) ===" >> "$diag_file"
|
||||
if [ -f "$instance_dir/backend/.env" ]; then
|
||||
sanitize_sensitive "$(cat "$instance_dir/backend/.env")" >> "$diag_file"
|
||||
else
|
||||
echo "Backend .env file not found" >> "$diag_file"
|
||||
fi
|
||||
echo "" >> "$diag_file"
|
||||
|
||||
# ========================================
|
||||
# 4. Service Status and Configuration
|
||||
# ========================================
|
||||
print_info "Collecting service information..."
|
||||
|
||||
cat >> "$diag_file" << EOF
|
||||
|
||||
=== Service Status and Configuration ===
|
||||
|
||||
=== Service Status ===
|
||||
$(systemctl status "$instance_name" 2>/dev/null || echo "Service not found")
|
||||
|
||||
=== Service File ===
|
||||
$(cat "/etc/systemd/system/${instance_name}.service" 2>/dev/null || echo "Service file not found")
|
||||
|
||||
=== Service is-enabled ===
|
||||
$(systemctl is-enabled "$instance_name" 2>/dev/null || echo "unknown")
|
||||
|
||||
=== Service is-active ===
|
||||
$(systemctl is-active "$instance_name" 2>/dev/null || echo "unknown")
|
||||
|
||||
===================================================
|
||||
EOF
|
||||
|
||||
# ========================================
|
||||
# 5. Service Logs
|
||||
# ========================================
|
||||
print_info "Collecting service logs..."
|
||||
|
||||
echo "" >> "$diag_file"
|
||||
echo "=== Service Logs (last 500 lines) ===" >> "$diag_file"
|
||||
journalctl -u "$instance_name" -n 500 --no-pager >> "$diag_file" 2>&1 || \
|
||||
echo "Could not retrieve service logs" >> "$diag_file"
|
||||
echo "" >> "$diag_file"
|
||||
|
||||
# ========================================
|
||||
# 6. Nginx Configuration
|
||||
# ========================================
|
||||
print_info "Collecting nginx configuration..."
|
||||
|
||||
cat >> "$diag_file" << EOF
|
||||
|
||||
=== Nginx Configuration ===
|
||||
|
||||
=== Nginx Status ===
|
||||
$(systemctl status nginx 2>/dev/null | head -20 || echo "Nginx not found")
|
||||
|
||||
=== Site Configuration ===
|
||||
$(cat "/etc/nginx/sites-available/$instance_name" 2>/dev/null || echo "Nginx config not found")
|
||||
|
||||
=== Nginx Error Log (last 100 lines) ===
|
||||
$(tail -100 /var/log/nginx/error.log 2>/dev/null || echo "Error log not accessible")
|
||||
|
||||
=== Nginx Access Log (last 50 lines) ===
|
||||
$(tail -50 /var/log/nginx/access.log 2>/dev/null || echo "Access log not accessible")
|
||||
|
||||
=== Nginx Test ===
|
||||
$(nginx -t 2>&1 || echo "Nginx test failed")
|
||||
|
||||
===================================================
|
||||
EOF
|
||||
|
||||
# ========================================
|
||||
# 7. Database Connection Test
|
||||
# ========================================
|
||||
print_info "Testing database connection..."
|
||||
|
||||
echo "" >> "$diag_file"
|
||||
echo "=== Database Information ===" >> "$diag_file"
|
||||
echo "" >> "$diag_file"
|
||||
|
||||
if [ -f "$instance_dir/backend/.env" ]; then
|
||||
# Load .env
|
||||
set -a
|
||||
source "$instance_dir/backend/.env"
|
||||
set +a
|
||||
|
||||
# Parse DATABASE_URL
|
||||
if [ -n "$DATABASE_URL" ]; then
|
||||
DB_USER=$(echo "$DATABASE_URL" | sed -n 's|postgresql://\([^:]*\):.*|\1|p')
|
||||
DB_PASS=$(echo "$DATABASE_URL" | sed -n 's|postgresql://[^:]*:\([^@]*\)@.*|\1|p')
|
||||
DB_HOST=$(echo "$DATABASE_URL" | sed -n 's|.*@\([^:]*\):.*|\1|p')
|
||||
DB_PORT=$(echo "$DATABASE_URL" | sed -n 's|.*:\([0-9]*\)/.*|\1|p')
|
||||
DB_NAME=$(echo "$DATABASE_URL" | sed -n 's|.*/\([^?]*\).*|\1|p')
|
||||
|
||||
cat >> "$diag_file" << EOF
|
||||
=== Database Connection Details ===
|
||||
Host: $DB_HOST
|
||||
Port: $DB_PORT
|
||||
Database: $DB_NAME
|
||||
User: $DB_USER
|
||||
|
||||
=== PostgreSQL Status ===
|
||||
$(systemctl status postgresql 2>/dev/null | head -20 || echo "PostgreSQL status not available")
|
||||
|
||||
=== Connection Test ===
|
||||
EOF
|
||||
|
||||
if PGPASSWORD="$DB_PASS" psql -h "$DB_HOST" -U "$DB_USER" -d "$DB_NAME" -c "SELECT version();" >> "$diag_file" 2>&1; then
|
||||
echo "✅ Database connection: SUCCESSFUL" >> "$diag_file"
|
||||
else
|
||||
echo "❌ Database connection: FAILED" >> "$diag_file"
|
||||
fi
|
||||
|
||||
echo "" >> "$diag_file"
|
||||
echo "=== Database Size ===" >> "$diag_file"
|
||||
PGPASSWORD="$DB_PASS" psql -h "$DB_HOST" -U "$DB_USER" -d "$DB_NAME" -c "
|
||||
SELECT
|
||||
pg_size_pretty(pg_database_size('$DB_NAME')) as database_size;
|
||||
" >> "$diag_file" 2>&1 || echo "Could not get database size" >> "$diag_file"
|
||||
|
||||
echo "" >> "$diag_file"
|
||||
echo "=== Table Sizes ===" >> "$diag_file"
|
||||
PGPASSWORD="$DB_PASS" psql -h "$DB_HOST" -U "$DB_USER" -d "$DB_NAME" -c "
|
||||
SELECT
|
||||
schemaname,
|
||||
tablename,
|
||||
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS size
|
||||
FROM pg_tables
|
||||
WHERE schemaname = 'public'
|
||||
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC
|
||||
LIMIT 10;
|
||||
" >> "$diag_file" 2>&1 || echo "Could not get table sizes" >> "$diag_file"
|
||||
|
||||
echo "" >> "$diag_file"
|
||||
echo "=== Migration Status ===" >> "$diag_file"
|
||||
cd "$instance_dir/backend"
|
||||
npx prisma migrate status >> "$diag_file" 2>&1 || echo "Could not get migration status" >> "$diag_file"
|
||||
|
||||
echo "===================================================" >> "$diag_file"
|
||||
else
|
||||
echo "DATABASE_URL not found in .env" >> "$diag_file"
|
||||
fi
|
||||
else
|
||||
echo ".env file not found" >> "$diag_file"
|
||||
fi
|
||||
|
||||
# ========================================
|
||||
# 8. Redis Connection Test
|
||||
# ========================================
|
||||
print_info "Testing Redis connection..."
|
||||
|
||||
if [ -f "$instance_dir/backend/.env" ]; then
|
||||
# Load .env
|
||||
set -a
|
||||
source "$instance_dir/backend/.env"
|
||||
set +a
|
||||
|
||||
cat >> "$diag_file" << EOF
|
||||
===================================================
|
||||
Redis Information
|
||||
===================================================
|
||||
|
||||
=== Redis Connection Details ===
|
||||
Host: ${REDIS_HOST:-localhost}
|
||||
Port: ${REDIS_PORT:-6379}
|
||||
User: ${REDIS_USER:-(none)}
|
||||
Database: ${REDIS_DB:-0}
|
||||
|
||||
=== Redis Status ===
|
||||
$(systemctl status redis-server 2>/dev/null | head -20 || echo "Redis status not available")
|
||||
|
||||
=== Connection Test ===
|
||||
EOF
|
||||
|
||||
# Test connection
|
||||
if [ -n "$REDIS_USER" ] && [ -n "$REDIS_PASSWORD" ]; then
|
||||
if redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" --user "$REDIS_USER" --pass "$REDIS_PASSWORD" --no-auth-warning -n "${REDIS_DB:-0}" ping >> "$diag_file" 2>&1; then
|
||||
echo "✅ Redis connection (with user): SUCCESSFUL" >> "$diag_file"
|
||||
|
||||
echo "" >> "$diag_file"
|
||||
echo "=== Redis INFO ===" >> "$diag_file"
|
||||
redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" --user "$REDIS_USER" --pass "$REDIS_PASSWORD" --no-auth-warning -n "${REDIS_DB:-0}" INFO >> "$diag_file" 2>&1
|
||||
|
||||
echo "" >> "$diag_file"
|
||||
echo "=== Redis Database Size ===" >> "$diag_file"
|
||||
redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" --user "$REDIS_USER" --pass "$REDIS_PASSWORD" --no-auth-warning -n "${REDIS_DB:-0}" DBSIZE >> "$diag_file" 2>&1
|
||||
else
|
||||
echo "❌ Redis connection (with user): FAILED" >> "$diag_file"
|
||||
fi
|
||||
elif [ -n "$REDIS_PASSWORD" ]; then
|
||||
if redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" -a "$REDIS_PASSWORD" --no-auth-warning -n "${REDIS_DB:-0}" ping >> "$diag_file" 2>&1; then
|
||||
echo "✅ Redis connection (requirepass): SUCCESSFUL" >> "$diag_file"
|
||||
|
||||
echo "" >> "$diag_file"
|
||||
echo "=== Redis INFO ===" >> "$diag_file"
|
||||
redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" -a "$REDIS_PASSWORD" --no-auth-warning -n "${REDIS_DB:-0}" INFO >> "$diag_file" 2>&1
|
||||
|
||||
echo "" >> "$diag_file"
|
||||
echo "=== Redis Database Size ===" >> "$diag_file"
|
||||
redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" -a "$REDIS_PASSWORD" --no-auth-warning -n "${REDIS_DB:-0}" DBSIZE >> "$diag_file" 2>&1
|
||||
else
|
||||
echo "❌ Redis connection (requirepass): FAILED" >> "$diag_file"
|
||||
fi
|
||||
else
|
||||
if redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" -n "${REDIS_DB:-0}" ping >> "$diag_file" 2>&1; then
|
||||
echo "✅ Redis connection (no auth): SUCCESSFUL" >> "$diag_file"
|
||||
else
|
||||
echo "❌ Redis connection: FAILED" >> "$diag_file"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "" >> "$diag_file"
|
||||
echo "=== Redis ACL Users ===" >> "$diag_file"
|
||||
if [ -n "$REDIS_USER" ] && [ -n "$REDIS_PASSWORD" ]; then
|
||||
redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" --user "$REDIS_USER" --pass "$REDIS_PASSWORD" --no-auth-warning ACL LIST >> "$diag_file"
|
||||
elif [ -n "$REDIS_PASSWORD" ]; then
|
||||
redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" -a "$REDIS_PASSWORD" --no-auth-warning ACL LIST >> "$diag_file"
|
||||
fi
|
||||
|
||||
echo "===================================================" >> "$diag_file"
|
||||
else
|
||||
echo ".env file not found" >> "$diag_file"
|
||||
fi
|
||||
|
||||
# ========================================
|
||||
# 9. Network and Port Information
|
||||
# ========================================
|
||||
print_info "Collecting network information..."
|
||||
|
||||
# Get backend port from .env
|
||||
local backend_port=$(grep '^PORT=' "$instance_dir/backend/.env" 2>/dev/null | cut -d'=' -f2 | tr -d ' ' || echo "3000")
|
||||
|
||||
cat >> "$diag_file" << EOF
|
||||
===================================================
|
||||
Network and Port Information
|
||||
===================================================
|
||||
|
||||
=== Listening Ports ===
|
||||
$(ss -tlnp | grep -E "LISTEN|nginx|node|postgres|redis" || netstat -tlnp | grep -E "LISTEN|nginx|node|postgres|redis" || echo "Could not get port information")
|
||||
|
||||
=== Active Connections ===
|
||||
$(ss -tn state established | head -20 || echo "Could not get connection information")
|
||||
|
||||
=== Backend Port Connections (Port $backend_port) ===
|
||||
Total connections to backend: $(ss -tn | grep ":$backend_port" | wc -l || echo "0")
|
||||
$(ss -tn | grep ":$backend_port" | head -10 || echo "No connections found")
|
||||
|
||||
=== PostgreSQL Connections ===
|
||||
EOF
|
||||
|
||||
# Get PostgreSQL connection count
|
||||
if [ -n "$DB_PASS" ] && [ -n "$DB_USER" ] && [ -n "$DB_NAME" ]; then
|
||||
PGPASSWORD="$DB_PASS" psql -h "${DB_HOST:-localhost}" -U "$DB_USER" -d "$DB_NAME" -c "
|
||||
SELECT
|
||||
count(*) as total_connections,
|
||||
count(*) FILTER (WHERE state = 'active') as active_connections,
|
||||
count(*) FILTER (WHERE state = 'idle') as idle_connections
|
||||
FROM pg_stat_activity
|
||||
WHERE datname = '$DB_NAME';
|
||||
" >> "$diag_file" 2>&1 || echo "Could not get PostgreSQL connection stats" >> "$diag_file"
|
||||
|
||||
echo "" >> "$diag_file"
|
||||
echo "=== PostgreSQL Connection Details ===" >> "$diag_file"
|
||||
PGPASSWORD="$DB_PASS" psql -h "${DB_HOST:-localhost}" -U "$DB_USER" -d "$DB_NAME" -c "
|
||||
SELECT
|
||||
pid,
|
||||
usename,
|
||||
application_name,
|
||||
client_addr,
|
||||
state,
|
||||
query_start,
|
||||
state_change
|
||||
FROM pg_stat_activity
|
||||
WHERE datname = '$DB_NAME'
|
||||
ORDER BY query_start DESC
|
||||
LIMIT 20;
|
||||
" >> "$diag_file" 2>&1 || echo "Could not get connection details" >> "$diag_file"
|
||||
else
|
||||
echo "Database credentials not available" >> "$diag_file"
|
||||
fi
|
||||
|
||||
echo "" >> "$diag_file"
|
||||
echo "=== Redis Connections ===" >> "$diag_file"
|
||||
|
||||
# Get Redis connection count
|
||||
if [ -n "$REDIS_USER" ] && [ -n "$REDIS_PASSWORD" ]; then
|
||||
redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" --user "$REDIS_USER" --pass "$REDIS_PASSWORD" --no-auth-warning -n "${REDIS_DB:-0}" INFO clients >> "$diag_file" 2>&1 || echo "Could not get Redis connection info" >> "$diag_file"
|
||||
elif [ -n "$REDIS_PASSWORD" ]; then
|
||||
redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" -a "$REDIS_PASSWORD" --no-auth-warning -n "${REDIS_DB:-0}" INFO clients >> "$diag_file" 2>&1 || echo "Could not get Redis connection info" >> "$diag_file"
|
||||
fi
|
||||
|
||||
cat >> "$diag_file" << EOF
|
||||
|
||||
=== Firewall Status (UFW) ===
|
||||
$(ufw status 2>/dev/null || echo "UFW not available")
|
||||
|
||||
=== Firewall Status (iptables) ===
|
||||
$(iptables -L -n | head -50 2>/dev/null || echo "iptables not available")
|
||||
|
||||
===================================================
|
||||
EOF
|
||||
|
||||
# ========================================
|
||||
# 10. Process Information
|
||||
# ========================================
|
||||
print_info "Collecting process information..."
|
||||
|
||||
cat >> "$diag_file" << EOF
|
||||
===================================================
|
||||
Process Information
|
||||
===================================================
|
||||
|
||||
=== PatchMon Node Processes ===
|
||||
$(ps aux | grep -E "node.*$instance_dir|PID" | grep -v grep || echo "No processes found")
|
||||
|
||||
=== Top Processes (CPU) ===
|
||||
$(ps aux --sort=-%cpu | head -15)
|
||||
|
||||
=== Top Processes (Memory) ===
|
||||
$(ps aux --sort=-%mem | head -15)
|
||||
|
||||
===================================================
|
||||
EOF
|
||||
|
||||
# ========================================
|
||||
# 11. SSL Certificate Information
|
||||
# ========================================
|
||||
print_info "Collecting SSL certificate information..."
|
||||
|
||||
cat >> "$diag_file" << EOF
|
||||
===================================================
|
||||
SSL Certificate Information
|
||||
===================================================
|
||||
|
||||
=== Certbot Certificates ===
|
||||
$(certbot certificates 2>/dev/null || echo "Certbot not available or no certificates")
|
||||
|
||||
=== SSL Certificate Files ===
|
||||
$(ls -lh /etc/letsencrypt/live/$instance_name/ 2>/dev/null || echo "No SSL certificates found for $instance_name")
|
||||
|
||||
===================================================
|
||||
EOF
|
||||
|
||||
# ========================================
|
||||
# 12. Recent System Logs
|
||||
# ========================================
|
||||
print_info "Collecting recent system logs..."
|
||||
|
||||
journalctl -n 200 --no-pager >> "$diag_file" 2>&1 || \
|
||||
echo "Could not retrieve system logs" >> "$diag_file"
|
||||
|
||||
# ========================================
|
||||
# 13. Installation Log (if exists)
|
||||
# ========================================
|
||||
print_info "Collecting installation log..."
|
||||
|
||||
echo "" >> "$diag_file"
|
||||
echo "=== Installation Log (last 200 lines) ===" >> "$diag_file"
|
||||
if [ -f "$instance_dir/patchmon-install.log" ]; then
|
||||
tail -200 "$instance_dir/patchmon-install.log" >> "$diag_file" 2>&1
|
||||
else
|
||||
echo "No installation log found" >> "$diag_file"
|
||||
fi
|
||||
echo "" >> "$diag_file"
|
||||
|
||||
# ========================================
|
||||
# 14. Node.js and npm Information
|
||||
# ========================================
|
||||
print_info "Collecting Node.js information..."
|
||||
|
||||
cat >> "$diag_file" << EOF
|
||||
===================================================
|
||||
Node.js and npm Information
|
||||
===================================================
|
||||
|
||||
=== Node.js Version ===
|
||||
$(node --version 2>/dev/null || echo "Node.js not found")
|
||||
|
||||
=== npm Version ===
|
||||
$(npm --version 2>/dev/null || echo "npm not found")
|
||||
|
||||
=== Backend Dependencies ===
|
||||
$(cd "$instance_dir/backend" && npm list --depth=0 2>/dev/null || echo "Could not list backend dependencies")
|
||||
|
||||
===================================================
|
||||
EOF
|
||||
|
||||
# ========================================
|
||||
# Finalize diagnostics file
|
||||
# ========================================
|
||||
print_info "Finalizing diagnostics file..."
|
||||
|
||||
echo "" >> "$diag_file"
|
||||
echo "====================================================" >> "$diag_file"
|
||||
echo "END OF DIAGNOSTICS REPORT" >> "$diag_file"
|
||||
echo "====================================================" >> "$diag_file"
|
||||
echo "" >> "$diag_file"
|
||||
echo "IMPORTANT: Sensitive Information" >> "$diag_file"
|
||||
echo "Passwords, secrets, and tokens have been sanitized" >> "$diag_file"
|
||||
echo "and replaced with [REDACTED]. However, please review" >> "$diag_file"
|
||||
echo "before sharing to ensure no sensitive data is included." >> "$diag_file"
|
||||
echo "====================================================" >> "$diag_file"
|
||||
|
||||
print_status "Diagnostics file created: $diag_file"
|
||||
|
||||
# ========================================
|
||||
# Display summary
|
||||
# ========================================
|
||||
echo ""
|
||||
echo -e "${GREEN}====================================================${NC}"
|
||||
echo -e "${GREEN} Diagnostics Collection Complete!${NC}"
|
||||
echo -e "${GREEN}====================================================${NC}"
|
||||
echo ""
|
||||
|
||||
# Get service statuses and file size
|
||||
local service_status=$(systemctl is-active "$instance_name" 2>/dev/null || echo "unknown")
|
||||
local nginx_status=$(systemctl is-active nginx 2>/dev/null || echo "unknown")
|
||||
local postgres_status=$(systemctl is-active postgresql 2>/dev/null || echo "unknown")
|
||||
local redis_status=$(systemctl is-active redis-server 2>/dev/null || echo "unknown")
|
||||
local file_size=$(du -h "$diag_file" 2>/dev/null | cut -f1 || echo "unknown")
|
||||
local line_count=$(wc -l < "$diag_file" 2>/dev/null || echo "unknown")
|
||||
|
||||
# Get connection counts for summary
|
||||
local backend_port=$(grep '^PORT=' "$instance_dir/backend/.env" 2>/dev/null | cut -d'=' -f2 | tr -d ' ' || echo "3000")
|
||||
local backend_conn_count=$(ss -tn 2>/dev/null | grep ":$backend_port" | wc -l || echo "0")
|
||||
|
||||
local db_conn_count="N/A"
|
||||
if [ -n "$DB_PASS" ] && [ -n "$DB_USER" ] && [ -n "$DB_NAME" ]; then
|
||||
db_conn_count=$(PGPASSWORD="$DB_PASS" psql -h "${DB_HOST:-localhost}" -U "$DB_USER" -d "$DB_NAME" -t -A -c "SELECT count(*) FROM pg_stat_activity WHERE datname = '$DB_NAME';" 2>/dev/null || echo "N/A")
|
||||
fi
|
||||
|
||||
local redis_conn_count="N/A"
|
||||
if [ -n "$REDIS_USER" ] && [ -n "$REDIS_PASSWORD" ]; then
|
||||
redis_conn_count=$(redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" --user "$REDIS_USER" --pass "$REDIS_PASSWORD" --no-auth-warning INFO clients 2>/dev/null | grep "connected_clients:" | cut -d':' -f2 | tr -d '\r' || echo "N/A")
|
||||
elif [ -n "$REDIS_PASSWORD" ]; then
|
||||
redis_conn_count=$(redis-cli -h "${REDIS_HOST:-localhost}" -p "${REDIS_PORT:-6379}" -a "$REDIS_PASSWORD" --no-auth-warning INFO clients 2>/dev/null | grep "connected_clients:" | cut -d':' -f2 | tr -d '\r' || echo "N/A")
|
||||
fi
|
||||
|
||||
# Compact, copyable summary
|
||||
echo -e "${BLUE}═══════════════════════════════════════════════════${NC}"
|
||||
echo -e "${BLUE}DIAGNOSTICS SUMMARY (copy-paste friendly)${NC}"
|
||||
echo -e "${BLUE}═══════════════════════════════════════════════════${NC}"
|
||||
echo "Instance: $instance_name"
|
||||
echo "File: $diag_file"
|
||||
echo "Size: $file_size ($line_count lines)"
|
||||
echo "Generated: $(date '+%Y-%m-%d %H:%M:%S')"
|
||||
echo "---"
|
||||
echo "Service Status: $service_status"
|
||||
echo "Nginx Status: $nginx_status"
|
||||
echo "PostgreSQL: $postgres_status"
|
||||
echo "Redis: $redis_status"
|
||||
echo "---"
|
||||
echo "Backend Port: $backend_port (Active Connections: $backend_conn_count)"
|
||||
echo "Database Connections: $db_conn_count"
|
||||
echo "Redis Connections: $redis_conn_count"
|
||||
echo "---"
|
||||
echo "View: cat $(basename "$diag_file")"
|
||||
echo "Or: less $(basename "$diag_file")"
|
||||
echo "Share: Send $(basename "$diag_file") to support"
|
||||
echo -e "${BLUE}═══════════════════════════════════════════════════${NC}"
|
||||
echo ""
|
||||
print_warning "Review file before sharing - sensitive data has been sanitized"
|
||||
echo ""
|
||||
|
||||
print_success "Done!"
|
||||
}
|
||||
|
||||
# Run main function
|
||||
main "$@"
|
||||
|
||||
286
tools/fix-migrations.sh
Executable file
286
tools/fix-migrations.sh
Executable file
@@ -0,0 +1,286 @@
|
||||
#!/bin/bash
|
||||
# PatchMon Migration Fixer
|
||||
# Standalone script to detect and fix failed Prisma migrations
|
||||
# Usage: sudo bash fix-migrations.sh [instance-name]
|
||||
|
||||
set -e
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Print functions
|
||||
print_status() {
|
||||
echo -e "${GREEN}✅ $1${NC}"
|
||||
}
|
||||
|
||||
print_info() {
|
||||
echo -e "${BLUE}ℹ️ $1${NC}"
|
||||
}
|
||||
|
||||
print_error() {
|
||||
echo -e "${RED}❌ $1${NC}"
|
||||
}
|
||||
|
||||
print_warning() {
|
||||
echo -e "${YELLOW}⚠️ $1${NC}"
|
||||
}
|
||||
|
||||
# Check if running as root
|
||||
if [[ $EUID -ne 0 ]]; then
|
||||
print_error "This script must be run as root"
|
||||
print_info "Please run: sudo bash $0"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Function to detect PatchMon installations
|
||||
detect_installations() {
|
||||
local installations=()
|
||||
|
||||
if [ -d "/opt" ]; then
|
||||
for dir in /opt/*/; do
|
||||
local dirname=$(basename "$dir")
|
||||
# Skip backup directories
|
||||
if [[ "$dirname" =~ \.backup\. ]]; then
|
||||
continue
|
||||
fi
|
||||
# Check if it's a PatchMon installation
|
||||
if [ -f "$dir/backend/package.json" ] && grep -q "patchmon" "$dir/backend/package.json" 2>/dev/null; then
|
||||
installations+=("$dirname")
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
echo "${installations[@]}"
|
||||
}
|
||||
|
||||
# Function to select installation
|
||||
select_installation() {
|
||||
local installations=($(detect_installations))
|
||||
|
||||
if [ ${#installations[@]} -eq 0 ]; then
|
||||
print_error "No PatchMon installations found in /opt"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -n "$1" ]; then
|
||||
# Use provided instance name
|
||||
if [[ " ${installations[@]} " =~ " $1 " ]]; then
|
||||
echo "$1"
|
||||
return 0
|
||||
else
|
||||
print_error "Instance '$1' not found"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
print_info "Found ${#installations[@]} installation(s):"
|
||||
echo ""
|
||||
|
||||
local i=1
|
||||
declare -A install_map
|
||||
for install in "${installations[@]}"; do
|
||||
printf "%2d. %s\n" "$i" "$install"
|
||||
install_map[$i]="$install"
|
||||
i=$((i + 1))
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo -n -e "${BLUE}Select installation number [1]: ${NC}"
|
||||
read -r selection
|
||||
|
||||
selection=${selection:-1}
|
||||
|
||||
if [[ "$selection" =~ ^[0-9]+$ ]] && [ -n "${install_map[$selection]}" ]; then
|
||||
echo "${install_map[$selection]}"
|
||||
return 0
|
||||
else
|
||||
print_error "Invalid selection"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to check and fix failed migrations
|
||||
fix_failed_migrations() {
|
||||
local db_name="$1"
|
||||
local db_user="$2"
|
||||
local db_pass="$3"
|
||||
local db_host="${4:-localhost}"
|
||||
|
||||
print_info "Checking for failed migrations in database..."
|
||||
|
||||
# Query for failed migrations
|
||||
local failed_migrations
|
||||
failed_migrations=$(PGPASSWORD="$db_pass" psql -h "$db_host" -U "$db_user" -d "$db_name" -t -A -c \
|
||||
"SELECT migration_name FROM _prisma_migrations WHERE finished_at IS NULL AND started_at IS NOT NULL;" 2>/dev/null || echo "")
|
||||
|
||||
if [ -z "$failed_migrations" ]; then
|
||||
print_status "No failed migrations found"
|
||||
return 0
|
||||
fi
|
||||
|
||||
print_warning "Found failed migration(s):"
|
||||
echo "$failed_migrations" | while read -r migration; do
|
||||
[ -n "$migration" ] && print_warning " - $migration"
|
||||
done
|
||||
echo ""
|
||||
|
||||
print_info "What would you like to do?"
|
||||
echo " 1. Clean and retry (delete failed records and re-run migration)"
|
||||
echo " 2. Mark as completed (if schema changes are already applied)"
|
||||
echo " 3. Show migration details only"
|
||||
echo " 4. Cancel"
|
||||
echo ""
|
||||
echo -n -e "${BLUE}Select option [1]: ${NC}"
|
||||
read -r option
|
||||
|
||||
option=${option:-1}
|
||||
|
||||
case $option in
|
||||
1)
|
||||
print_info "Cleaning failed migrations and preparing for retry..."
|
||||
echo "$failed_migrations" | while read -r migration; do
|
||||
if [ -n "$migration" ]; then
|
||||
print_info "Processing: $migration"
|
||||
|
||||
# Mark as rolled back
|
||||
PGPASSWORD="$db_pass" psql -h "$db_host" -U "$db_user" -d "$db_name" -c \
|
||||
"UPDATE _prisma_migrations SET rolled_back_at = NOW() WHERE migration_name = '$migration' AND finished_at IS NULL;" >/dev/null 2>&1
|
||||
|
||||
# Delete the failed record
|
||||
PGPASSWORD="$db_pass" psql -h "$db_host" -U "$db_user" -d "$db_name" -c \
|
||||
"DELETE FROM _prisma_migrations WHERE migration_name = '$migration' AND finished_at IS NULL;" >/dev/null 2>&1
|
||||
|
||||
print_status "Cleared: $migration"
|
||||
fi
|
||||
done
|
||||
print_status "Failed migrations cleared - ready to retry"
|
||||
return 0
|
||||
;;
|
||||
2)
|
||||
print_info "Marking migrations as completed..."
|
||||
echo "$failed_migrations" | while read -r migration; do
|
||||
if [ -n "$migration" ]; then
|
||||
print_info "Marking as complete: $migration"
|
||||
|
||||
PGPASSWORD="$db_pass" psql -h "$db_host" -U "$db_user" -d "$db_name" -c \
|
||||
"UPDATE _prisma_migrations SET finished_at = NOW(), logs = 'Manually resolved by fix-migrations.sh' WHERE migration_name = '$migration' AND finished_at IS NULL;" >/dev/null 2>&1
|
||||
|
||||
print_status "Marked complete: $migration"
|
||||
fi
|
||||
done
|
||||
print_status "All migrations marked as completed"
|
||||
return 0
|
||||
;;
|
||||
3)
|
||||
print_info "Migration details:"
|
||||
PGPASSWORD="$db_pass" psql -h "$db_host" -U "$db_user" -d "$db_name" -c \
|
||||
"SELECT migration_name, started_at, finished_at, rolled_back_at, logs FROM _prisma_migrations WHERE finished_at IS NULL AND started_at IS NOT NULL;"
|
||||
return 0
|
||||
;;
|
||||
4)
|
||||
print_info "Cancelled"
|
||||
return 1
|
||||
;;
|
||||
*)
|
||||
print_error "Invalid option"
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Main script
|
||||
main() {
|
||||
echo -e "${BLUE}====================================================${NC}"
|
||||
echo -e "${BLUE} PatchMon Migration Fixer${NC}"
|
||||
echo -e "${BLUE}====================================================${NC}"
|
||||
echo ""
|
||||
|
||||
# Select instance
|
||||
instance_name=$(select_installation "$1")
|
||||
instance_dir="/opt/$instance_name"
|
||||
|
||||
print_info "Selected instance: $instance_name"
|
||||
print_info "Directory: $instance_dir"
|
||||
echo ""
|
||||
|
||||
# Load .env to get database credentials
|
||||
if [ ! -f "$instance_dir/backend/.env" ]; then
|
||||
print_error "Cannot find .env file at $instance_dir/backend/.env"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Source .env
|
||||
set -a
|
||||
source "$instance_dir/backend/.env"
|
||||
set +a
|
||||
|
||||
# Parse DATABASE_URL
|
||||
if [ -z "$DATABASE_URL" ]; then
|
||||
print_error "DATABASE_URL not found in .env file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
DB_USER=$(echo "$DATABASE_URL" | sed -n 's|postgresql://\([^:]*\):.*|\1|p')
|
||||
DB_PASS=$(echo "$DATABASE_URL" | sed -n 's|postgresql://[^:]*:\([^@]*\)@.*|\1|p')
|
||||
DB_HOST=$(echo "$DATABASE_URL" | sed -n 's|.*@\([^:]*\):.*|\1|p')
|
||||
DB_NAME=$(echo "$DATABASE_URL" | sed -n 's|.*/\([^?]*\).*|\1|p')
|
||||
|
||||
print_info "Database: $DB_NAME"
|
||||
print_info "User: $DB_USER"
|
||||
print_info "Host: $DB_HOST"
|
||||
echo ""
|
||||
|
||||
# Test database connection
|
||||
print_info "Testing database connection..."
|
||||
if ! PGPASSWORD="$DB_PASS" psql -h "$DB_HOST" -U "$DB_USER" -d "$DB_NAME" -c "SELECT 1;" >/dev/null 2>&1; then
|
||||
print_error "Cannot connect to database"
|
||||
exit 1
|
||||
fi
|
||||
print_status "Database connection successful"
|
||||
echo ""
|
||||
|
||||
# Check Prisma migration status
|
||||
print_info "Checking Prisma migration status..."
|
||||
cd "$instance_dir/backend"
|
||||
|
||||
echo ""
|
||||
echo -e "${YELLOW}=== Prisma Migration Status ===${NC}"
|
||||
npx prisma migrate status 2>&1 || true
|
||||
echo -e "${YELLOW}==============================${NC}"
|
||||
echo ""
|
||||
|
||||
# Check for failed migrations
|
||||
fix_failed_migrations "$DB_NAME" "$DB_USER" "$DB_PASS" "$DB_HOST"
|
||||
|
||||
# Ask if user wants to run migrations now
|
||||
echo ""
|
||||
echo -n -e "${BLUE}Do you want to run 'npx prisma migrate deploy' now? [y/N]: ${NC}"
|
||||
read -r run_migrate
|
||||
|
||||
if [[ "$run_migrate" =~ ^[Yy] ]]; then
|
||||
print_info "Running migrations..."
|
||||
cd "$instance_dir/backend"
|
||||
|
||||
if npx prisma migrate deploy; then
|
||||
print_status "Migrations completed successfully!"
|
||||
else
|
||||
print_error "Migration failed"
|
||||
print_info "You may need to run this script again or investigate further"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
print_info "Skipped migration deployment"
|
||||
print_info "Run manually: cd $instance_dir/backend && npx prisma migrate deploy"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
print_status "Done!"
|
||||
}
|
||||
|
||||
# Run main function
|
||||
main "$@"
|
||||
|
||||
Reference in New Issue
Block a user