Files
doorman/tests/test_multinode_mem_mode_validation.py
T
2025-10-08 21:44:54 -04:00

373 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Test Multi-Node MEM Mode Validation
Documents the implementation of multi-node detection to prevent rate limit bypass
"""
def test_multinode_mem_mode_validation():
"""Test multi-node MEM mode validation patterns"""
print("Multi-Node MEM Mode Validation - Implementation")
print("=" * 70)
print()
print("P0 Security Risk:")
print(" MEM mode in production with multiple nodes/threads")
print(" → Rate limiting NOT shared across workers")
print(" → Token revocation NOT shared across workers")
print(" → Attackers can bypass rate limits by spreading requests")
print(" → Revoked tokens still valid on other workers")
print()
print("=" * 70)
print()
print("Implementation Location:")
print()
print(" File: backend-services/doorman.py")
print(" Lines: 142-155 (production validation block)")
print(" Context: app_lifespan() startup validation")
print()
print("=" * 70)
print()
print("Validation Logic:")
print()
print("1. Check if MEM_OR_EXTERNAL=MEM (in-memory mode)")
print("2. Get THREADS environment variable (default: 1)")
print("3. If THREADS > 1, raise RuntimeError (multi-node deployment)")
print("4. If THREADS == 1, log warning (single-node is allowed)")
print()
print("=" * 70)
print()
print("Code Implementation:")
print()
print(" mem_or_external = os.getenv('MEM_OR_EXTERNAL', 'MEM').upper()")
print(" if mem_or_external == 'MEM':")
print(" num_threads = int(os.getenv('THREADS', 1))")
print(" if num_threads > 1:")
print(" raise RuntimeError(")
print(" 'In production with THREADS > 1, MEM_OR_EXTERNAL=MEM is unsafe. '")
print(" 'Rate limiting and token revocation are not shared across workers. '")
print(" 'Set MEM_OR_EXTERNAL=REDIS with REDIS_HOST configured.'")
print(" )")
print(" gateway_logger.warning(")
print(" 'Production deployment with MEM_OR_EXTERNAL=MEM detected. '")
print(" 'Single-node only. For multi-node HA, use REDIS or EXTERNAL mode.'")
print(" )")
print()
print("=" * 70)
print()
print("Validation Scenarios:")
print()
scenarios = [
{
'config': 'ENV=production, MEM_OR_EXTERNAL=MEM, THREADS=1',
'result': 'WARN',
'message': 'Production deployment with MEM_OR_EXTERNAL=MEM detected. Single-node only.',
'reason': 'Single-node is allowed, but warns about HA limitations'
},
{
'config': 'ENV=production, MEM_OR_EXTERNAL=MEM, THREADS=4',
'result': 'FAIL',
'message': 'In production with THREADS > 1, MEM_OR_EXTERNAL=MEM is unsafe.',
'reason': 'Multi-worker deployment requires Redis for shared state'
},
{
'config': 'ENV=production, MEM_OR_EXTERNAL=REDIS, THREADS=4',
'result': 'PASS',
'message': None,
'reason': 'Redis mode shares state across all workers'
},
{
'config': 'ENV=production, MEM_OR_EXTERNAL=EXTERNAL, THREADS=4',
'result': 'PASS',
'message': None,
'reason': 'External mode shares state across all workers'
},
{
'config': 'ENV=development, MEM_OR_EXTERNAL=MEM, THREADS=4',
'result': 'SKIP',
'message': None,
'reason': 'Validation only runs in production'
}
]
for i, scenario in enumerate(scenarios, 1):
print(f"{i}. {scenario['config']}")
print(f" Result: {scenario['result']}")
if scenario['message']:
print(f" Message: {scenario['message']}")
print(f" Reason: {scenario['reason']}")
print()
print("=" * 70)
print()
print("Attack Scenario BEFORE Fix:")
print()
print(" Configuration:")
print(" - ENV=production")
print(" - MEM_OR_EXTERNAL=MEM")
print(" - THREADS=4 (4 uvicorn workers)")
print()
print(" Rate Limit Bypass Attack:")
print(" 1. API has rate limit: 10 requests/minute per user")
print(" 2. Each worker has its own in-memory rate limit counter")
print(" 3. Attacker sends 10 requests → hits Worker 1's counter")
print(" 4. Attacker sends 10 more requests → hits Worker 2's counter")
print(" 5. Attacker sends 10 more requests → hits Worker 3's counter")
print(" 6. Attacker sends 10 more requests → hits Worker 4's counter")
print(" 7. → Total: 40 requests (4x the limit!)")
print()
print(" Token Revocation Bypass Attack:")
print(" 1. User logs out → token added to revocation list on Worker 1")
print(" 2. Attacker uses revoked token → hits Worker 2")
print(" 3. Worker 2 doesn't have token in its revocation list")
print(" 4. → Revoked token still works! Auth bypass!")
print()
print("=" * 70)
print()
print("Attack Scenario AFTER Fix:")
print()
print(" Configuration:")
print(" - ENV=production")
print(" - MEM_OR_EXTERNAL=MEM")
print(" - THREADS=4")
print()
print(" Server Startup:")
print(" 1. app_lifespan() validation runs")
print(" 2. Detects THREADS=4 with MEM mode")
print(" 3. RuntimeError raised")
print(" 4. → Server refuses to start")
print()
print(" Operator Action Required:")
print(" - Set MEM_OR_EXTERNAL=REDIS")
print(" - Set REDIS_HOST=redis.example.com")
print(" - Set REDIS_PASSWORD=strong-password")
print(" - Restart server")
print()
print(" Result with Redis:")
print(" - All workers share Redis for rate limiting")
print(" - All workers share Redis for token revocation")
print(" - Rate limits enforced correctly across all workers")
print(" - Revoked tokens blocked on all workers")
print()
print("=" * 70)
print()
print("Why MEM Mode is Unsafe in Multi-Node:")
print()
print("1. Rate Limiting:")
print(" - Each worker has separate in-memory rate limit counters")
print(" - User rate limit = configured_limit × num_workers")
print(" - Example: 10 req/min × 4 workers = 40 req/min actual")
print()
print("2. Token Revocation:")
print(" - Each worker has separate in-memory revocation list")
print(" - Token revoked on Worker 1 still valid on Workers 2-4")
print(" - Logout doesn't work reliably (token may still auth)")
print()
print("3. Throttling:")
print(" - Each worker has separate throttle state")
print(" - Throttle limits multiplied by num_workers")
print()
print("4. Circuit Breakers:")
print(" - Each worker tracks upstream failures separately")
print(" - Circuit may open on Worker 1 but closed on others")
print()
print("=" * 70)
print()
print("Environment Variable Configuration:")
print()
print("Single-Node Production (allowed):")
print(" ENV=production")
print(" MEM_OR_EXTERNAL=MEM")
print(" THREADS=1")
print(" Result: Warning logged, server starts")
print()
print("Multi-Node Production (required):")
print(" ENV=production")
print(" MEM_OR_EXTERNAL=REDIS")
print(" REDIS_HOST=redis.example.com")
print(" REDIS_PORT=6379")
print(" REDIS_PASSWORD=strong-password")
print(" THREADS=4")
print(" Result: Server starts with shared Redis state")
print()
print("Development (any configuration):")
print(" ENV=development")
print(" MEM_OR_EXTERNAL=MEM")
print(" THREADS=4")
print(" Result: Validation skipped, server starts")
print()
print("=" * 70)
print()
print("Error Messages:")
print()
print("1. Multi-node with MEM mode:")
print(" RuntimeError: In production with THREADS > 1, MEM_OR_EXTERNAL=MEM is unsafe.")
print(" Rate limiting and token revocation are not shared across workers.")
print(" Set MEM_OR_EXTERNAL=REDIS with REDIS_HOST configured.")
print()
print("2. Single-node with MEM mode (warning only):")
print(" WARNING: Production deployment with MEM_OR_EXTERNAL=MEM detected.")
print(" Single-node only. For multi-node HA, use REDIS or EXTERNAL mode.")
print()
print("=" * 70)
print()
print("Production Deployment Modes:")
print()
modes = [
{
'mode': 'Single-Node Production',
'config': 'MEM_OR_EXTERNAL=MEM, THREADS=1',
'allowed': True,
'shared_state': False,
'use_case': 'Small deployments, development staging'
},
{
'mode': 'Multi-Node Production',
'config': 'MEM_OR_EXTERNAL=REDIS, THREADS=4+',
'allowed': True,
'shared_state': True,
'use_case': 'Production HA, high availability, scalability'
},
{
'mode': 'Multi-Node with MEM (BLOCKED)',
'config': 'MEM_OR_EXTERNAL=MEM, THREADS=4+',
'allowed': False,
'shared_state': False,
'use_case': 'UNSAFE - rate limit bypass, revoked tokens work'
}
]
for mode in modes:
print(f"{mode['mode']}:")
print(f" Config: {mode['config']}")
print(f" Allowed: {'✓ Yes' if mode['allowed'] else '✗ No (RuntimeError)'}")
print(f" Shared State: {'✓ Yes' if mode['shared_state'] else '✗ No'}")
print(f" Use Case: {mode['use_case']}")
print()
print("=" * 70)
print()
print("THREADS Environment Variable:")
print()
print("What it controls:")
print(" - Number of uvicorn worker processes")
print(" - Each worker is a separate Python process")
print(" - Each worker has separate in-memory state (if MEM mode)")
print()
print("Common values:")
print(" THREADS=1 → Single worker (default)")
print(" THREADS=4 → 4 workers (typical for 4-core server)")
print(" THREADS=8 → 8 workers (for 8-core server)")
print()
print("Load balancing:")
print(" - uvicorn distributes requests across workers (round-robin)")
print(" - Each request handled by ONE worker")
print(" - Workers do NOT share memory")
print()
print("=" * 70)
print()
print("Testing Recommendations:")
print()
print("1. Test multi-node rejection:")
print(" - Set ENV=production, MEM_OR_EXTERNAL=MEM, THREADS=4")
print(" - Verify startup fails with RuntimeError")
print(" - Verify error message mentions THREADS > 1")
print()
print("2. Test single-node warning:")
print(" - Set ENV=production, MEM_OR_EXTERNAL=MEM, THREADS=1")
print(" - Verify startup succeeds with warning")
print(" - Verify warning mentions single-node only")
print()
print("3. Test Redis mode with multi-node:")
print(" - Set ENV=production, MEM_OR_EXTERNAL=REDIS, THREADS=4")
print(" - Set valid REDIS_HOST")
print(" - Verify startup succeeds (no error)")
print()
print("4. Test development mode skip:")
print(" - Set ENV=development, MEM_OR_EXTERNAL=MEM, THREADS=4")
print(" - Verify startup succeeds (validation skipped)")
print()
print("5. Integration test rate limiting:")
print(" - Start with REDIS mode, THREADS=4")
print(" - Send requests to different workers")
print(" - Verify rate limit enforced across all workers")
print()
print("=" * 70)
print()
print("Deployment Checklist for Multi-Node Production:")
print()
print("Before deploying with THREADS > 1:")
print(" 1. Set ENV=production")
print(" 2. Set MEM_OR_EXTERNAL=REDIS (or EXTERNAL)")
print(" 3. Set REDIS_HOST to valid Redis server")
print(" 4. Set REDIS_PORT (default: 6379)")
print(" 5. Set REDIS_PASSWORD for authentication")
print(" 6. Set REDIS_DB (default: 0)")
print(" 7. Verify Redis is running and accessible")
print(" 8. Set THREADS to desired worker count (e.g., 4)")
print(" 9. Test startup: python doorman.py")
print(" 10. Verify all workers connect to Redis successfully")
print()
print("=" * 70)
print()
print("Redis State Sharing:")
print()
print("What is shared across workers when using Redis:")
print(" ✓ Rate limiting counters (per user/IP)")
print(" ✓ Token revocation list (logged-out tokens)")
print(" ✓ Throttle state (per endpoint)")
print(" ✓ Cache data (user_cache, role_cache, api_cache, etc.)")
print(" ✓ Bandwidth usage tracking")
print()
print("What is NOT shared (still per-worker):")
print(" - HTTP connection pools")
print(" - Request routing state (handled by uvicorn)")
print(" - Logging buffers")
print()
print("=" * 70)
print()
print("Security Impact:")
print()
print("Prevents:")
print(" ✓ Rate limit bypass by spreading requests across workers")
print(" ✓ Revoked token bypass by hitting different workers")
print(" ✓ Throttle limit bypass in multi-node deployments")
print(" ✓ Misconfigured production deployments")
print()
print("Requires:")
print(" ✓ Redis deployment for multi-node production")
print(" ✓ Proper Redis authentication (REDIS_PASSWORD)")
print(" ✓ Shared state across all workers")
print()
print("=" * 70)
print()
print("P0 Risk Mitigated:")
print(" MEM mode in production with multiple nodes allows rate limit bypass")
print()
print("Production Impact:")
print(" ✓ Multi-node deployments require Redis (prevents bypass)")
print(" ✓ Single-node deployments allowed with warning")
print(" ✓ Rate limits enforced correctly across all workers")
print(" ✓ Token revocation works reliably in HA setups")
print()
if __name__ == '__main__':
test_multinode_mem_mode_validation()