mirror of
https://github.com/apidoorman/doorman.git
synced 2026-02-08 01:58:59 -06:00
545 lines
18 KiB
Python
545 lines
18 KiB
Python
"""
|
||
Test Load Testing Baseline
|
||
Documents the implementation of load testing infrastructure for performance validation
|
||
"""
|
||
|
||
def test_load_testing_implementation():
|
||
"""Test load testing baseline documentation"""
|
||
|
||
print("Load Testing Baseline - Implementation")
|
||
print("=" * 70)
|
||
print()
|
||
|
||
print("P2 Performance Validation:")
|
||
print(" No documented throughput/latency benchmarks")
|
||
print(" → Cannot detect performance regressions")
|
||
print(" → No baseline for capacity planning")
|
||
print(" → Unknown breaking points")
|
||
print(" → No SLA guarantees")
|
||
print()
|
||
print("=" * 70)
|
||
print()
|
||
|
||
print("Implementation Locations:")
|
||
print()
|
||
|
||
locations = [
|
||
{
|
||
'file': 'load-tests/k6-load-test.js',
|
||
'type': 'k6 Load Test',
|
||
'scenarios': 4,
|
||
'description': 'JavaScript-based load testing with k6'
|
||
},
|
||
{
|
||
'file': 'load-tests/locust-load-test.py',
|
||
'type': 'Locust Load Test',
|
||
'scenarios': 3,
|
||
'description': 'Python-based load testing with Locust'
|
||
},
|
||
{
|
||
'file': 'load-tests/PERFORMANCE_BASELINES.md',
|
||
'type': 'Documentation',
|
||
'sections': 10,
|
||
'description': 'Performance targets and test procedures'
|
||
}
|
||
]
|
||
|
||
for i, loc in enumerate(locations, 1):
|
||
print(f"{i}. {loc['file']}")
|
||
print(f" Type: {loc['type']}")
|
||
if 'scenarios' in loc:
|
||
print(f" Scenarios: {loc['scenarios']}")
|
||
if 'sections' in loc:
|
||
print(f" Sections: {loc['sections']}")
|
||
print(f" Description: {loc['description']}")
|
||
print()
|
||
|
||
print("=" * 70)
|
||
print()
|
||
|
||
print("Performance Targets (p50/p95/p99):")
|
||
print()
|
||
|
||
targets = [
|
||
{
|
||
'category': 'Overall',
|
||
'p50': '< 100ms',
|
||
'p95': '< 500ms',
|
||
'p99': '< 1000ms',
|
||
'notes': 'All requests combined'
|
||
},
|
||
{
|
||
'category': 'Authentication',
|
||
'p50': '< 80ms',
|
||
'p95': '< 400ms',
|
||
'p99': '< 800ms',
|
||
'notes': 'Login/token validation'
|
||
},
|
||
{
|
||
'category': 'REST Gateway',
|
||
'p50': '< 150ms',
|
||
'p95': '< 600ms',
|
||
'p99': '< 1200ms',
|
||
'notes': 'API proxying overhead'
|
||
},
|
||
{
|
||
'category': 'GraphQL Gateway',
|
||
'p50': '< 200ms',
|
||
'p95': '< 800ms',
|
||
'p99': '< 1500ms',
|
||
'notes': 'Query/mutation processing'
|
||
},
|
||
{
|
||
'category': 'SOAP Gateway',
|
||
'p50': '< 250ms',
|
||
'p95': '< 1000ms',
|
||
'p99': '< 2000ms',
|
||
'notes': 'XML processing overhead'
|
||
},
|
||
{
|
||
'category': 'Health Check',
|
||
'p50': '< 10ms',
|
||
'p95': '< 50ms',
|
||
'p99': '< 100ms',
|
||
'notes': 'No database queries'
|
||
}
|
||
]
|
||
|
||
for target in targets:
|
||
print(f"{target['category']}:")
|
||
print(f" p50: {target['p50']}")
|
||
print(f" p95: {target['p95']}")
|
||
print(f" p99: {target['p99']}")
|
||
print(f" Notes: {target['notes']}")
|
||
print()
|
||
|
||
print("=" * 70)
|
||
print()
|
||
|
||
print("k6 Load Test - Scenarios:")
|
||
print()
|
||
|
||
k6_scenarios = [
|
||
{
|
||
'name': 'Smoke Test',
|
||
'vus': '1',
|
||
'duration': '30s',
|
||
'purpose': 'Verify basic functionality',
|
||
'thresholds': 'p99 < 200ms, 100% success'
|
||
},
|
||
{
|
||
'name': 'Load Test',
|
||
'vus': '0→10→50',
|
||
'duration': '9m',
|
||
'purpose': 'Simulate realistic production load',
|
||
'thresholds': 'p95 < 500ms, error rate < 1%'
|
||
},
|
||
{
|
||
'name': 'Stress Test',
|
||
'vus': '0→100→200',
|
||
'duration': '16m',
|
||
'purpose': 'Find system breaking point',
|
||
'thresholds': 'p99 < 2000ms, error rate < 5%'
|
||
},
|
||
{
|
||
'name': 'Spike Test',
|
||
'vus': '10→200→10',
|
||
'duration': '2m',
|
||
'purpose': 'Test resilience to traffic spikes',
|
||
'thresholds': 'No crashes, recovery < 30s'
|
||
}
|
||
]
|
||
|
||
for scenario in k6_scenarios:
|
||
print(f"Scenario: {scenario['name']}")
|
||
print(f" Virtual Users: {scenario['vus']}")
|
||
print(f" Duration: {scenario['duration']}")
|
||
print(f" Purpose: {scenario['purpose']}")
|
||
print(f" Success Criteria: {scenario['thresholds']}")
|
||
print()
|
||
|
||
print("=" * 70)
|
||
print()
|
||
|
||
print("k6 Implementation Details:")
|
||
print()
|
||
print(" Custom Metrics:")
|
||
print(" - auth_success_rate: Authentication success rate")
|
||
print(" - rest_gateway_latency: REST proxy latency distribution")
|
||
print(" - graphql_gateway_latency: GraphQL latency distribution")
|
||
print(" - soap_gateway_latency: SOAP latency distribution")
|
||
print(" - error_count: Total errors across all requests")
|
||
print()
|
||
print(" Built-in Metrics:")
|
||
print(" - http_req_duration: Request latency (p50/p95/p99)")
|
||
print(" - http_req_failed: Failed request rate")
|
||
print(" - http_reqs: Total requests per second")
|
||
print(" - vus: Active virtual users")
|
||
print()
|
||
print(" Thresholds (fail test if exceeded):")
|
||
print(" - http_req_duration: p50<100ms, p95<500ms, p99<1000ms")
|
||
print(" - http_req_failed: rate<0.05 (error rate < 5%)")
|
||
print(" - auth_success_rate: rate>0.95 (95% success)")
|
||
print(" - error_count: count<100 (max 100 errors)")
|
||
print()
|
||
print("=" * 70)
|
||
print()
|
||
|
||
print("Locust Load Test - Features:")
|
||
print()
|
||
|
||
locust_features = [
|
||
{
|
||
'feature': 'User Classes',
|
||
'description': 'DoormanUser (realistic), StressTestUser (rapid-fire)',
|
||
'benefit': 'Different load patterns for different scenarios'
|
||
},
|
||
{
|
||
'feature': 'Task Weighting',
|
||
'description': 'Tasks weighted by production traffic distribution',
|
||
'benefit': 'Realistic workload simulation'
|
||
},
|
||
{
|
||
'feature': 'Load Shapes',
|
||
'description': 'Step, Spike, Wave patterns',
|
||
'benefit': 'Custom load patterns for specific tests'
|
||
},
|
||
{
|
||
'feature': 'Tag-based Execution',
|
||
'description': 'Run specific test categories (auth, gateway, stress)',
|
||
'benefit': 'Targeted testing of specific components'
|
||
},
|
||
{
|
||
'feature': 'Web UI',
|
||
'description': 'Real-time metrics dashboard',
|
||
'benefit': 'Visual monitoring during tests'
|
||
},
|
||
{
|
||
'feature': 'HTML Reports',
|
||
'description': 'Comprehensive test result reports',
|
||
'benefit': 'Easy sharing and archival'
|
||
}
|
||
]
|
||
|
||
for feature in locust_features:
|
||
print(f"{feature['feature']}:")
|
||
print(f" Description: {feature['description']}")
|
||
print(f" Benefit: {feature['benefit']}")
|
||
print()
|
||
|
||
print("=" * 70)
|
||
print()
|
||
|
||
print("Locust Traffic Mix (Realistic Workload):")
|
||
print()
|
||
print(" Task Distribution:")
|
||
print(" - 10% Authentication (login)")
|
||
print(" - 20% List APIs")
|
||
print(" - 15% List Users")
|
||
print(" - 15% List Roles")
|
||
print(" - 15% List Groups")
|
||
print(" - 25% REST Gateway (API proxying)")
|
||
print(" - 5% Health Check")
|
||
print()
|
||
print(" Based on production access patterns:")
|
||
print(" - Gateway requests are most common (API proxying)")
|
||
print(" - API management second (admin operations)")
|
||
print(" - Authentication periodic (token refresh)")
|
||
print(" - Health checks for monitoring")
|
||
print()
|
||
print("=" * 70)
|
||
print()
|
||
|
||
print("Running Tests:")
|
||
print()
|
||
print(" k6 - Basic Run:")
|
||
print(" k6 run load-tests/k6-load-test.js")
|
||
print()
|
||
print(" k6 - Custom Configuration:")
|
||
print(" k6 run --env BASE_URL=https://api.example.com \\")
|
||
print(" --env TEST_USERNAME=admin \\")
|
||
print(" --env TEST_PASSWORD=secret \\")
|
||
print(" load-tests/k6-load-test.js")
|
||
print()
|
||
print(" k6 - Generate JSON Results:")
|
||
print(" k6 run --out json=results.json load-tests/k6-load-test.js")
|
||
print()
|
||
print(" Locust - Web UI Mode:")
|
||
print(" locust -f load-tests/locust-load-test.py \\")
|
||
print(" --host=http://localhost:8000")
|
||
print(" # Open browser to http://localhost:8089")
|
||
print()
|
||
print(" Locust - Headless (CI/CD):")
|
||
print(" locust -f load-tests/locust-load-test.py \\")
|
||
print(" --host=http://localhost:8000 \\")
|
||
print(" --users 50 --spawn-rate 5 --run-time 5m \\")
|
||
print(" --headless --html report.html")
|
||
print()
|
||
print(" Locust - Specific Scenario:")
|
||
print(" locust -f load-tests/locust-load-test.py \\")
|
||
print(" --host=http://localhost:8000 \\")
|
||
print(" --tags authentication")
|
||
print()
|
||
print("=" * 70)
|
||
print()
|
||
|
||
print("Load Shapes (Locust Custom Patterns):")
|
||
print()
|
||
print(" 1. StepLoadShape:")
|
||
print(" - Gradually increase load in steps")
|
||
print(" - 10 users added every 60 seconds")
|
||
print(" - Total duration: 10 minutes")
|
||
print(" - Use: locust --shape StepLoadShape")
|
||
print()
|
||
print(" 2. SpikeLoadShape:")
|
||
print(" - Sudden traffic spike pattern")
|
||
print(" - 0-60s: 10 users (normal)")
|
||
print(" - 60-120s: 200 users (spike)")
|
||
print(" - 120-180s: 10 users (recovery)")
|
||
print(" - Use: locust --shape SpikeLoadShape")
|
||
print()
|
||
print(" 3. WaveLoadShape:")
|
||
print(" - Sine wave load pattern")
|
||
print(" - Baseline: 25 users")
|
||
print(" - Amplitude: 50 users")
|
||
print(" - Period: 2-minute waves")
|
||
print(" - Use: locust --shape WaveLoadShape")
|
||
print()
|
||
print("=" * 70)
|
||
print()
|
||
|
||
print("CI/CD Integration:")
|
||
print()
|
||
print(" GitHub Actions Example:")
|
||
print(" 1. Start Doorman (docker-compose up -d)")
|
||
print(" 2. Run k6 load test")
|
||
print(" 3. Check performance thresholds")
|
||
print(" 4. Fail build if thresholds exceeded")
|
||
print(" 5. Upload results as artifacts")
|
||
print()
|
||
print(" Performance Regression Detection:")
|
||
print(" - Save baseline results (JSON)")
|
||
print(" - Compare with new test results")
|
||
print(" - Fail if regression detected:")
|
||
print(" • Latency increase > 20%")
|
||
print(" • Error rate increase > 2%")
|
||
print(" • Throughput decrease > 15%")
|
||
print()
|
||
print(" Automated Testing Schedule:")
|
||
print(" - Daily: Smoke tests (5 minutes)")
|
||
print(" - Weekly: Load tests (30 minutes)")
|
||
print(" - Monthly: Stress/spike tests (1 hour)")
|
||
print(" - Quarterly: Baseline review and update")
|
||
print()
|
||
print("=" * 70)
|
||
print()
|
||
|
||
print("Monitoring During Tests:")
|
||
print()
|
||
print(" Application Metrics:")
|
||
print(" - Request latency (p50, p95, p99)")
|
||
print(" - Request rate (RPS)")
|
||
print(" - Error rate by status code")
|
||
print(" - Active connections")
|
||
print(" - Worker CPU/memory usage")
|
||
print()
|
||
print(" Database Metrics:")
|
||
print(" - Connection pool usage")
|
||
print(" - Query latency")
|
||
print(" - Slow queries (> 100ms)")
|
||
print(" - Lock wait time")
|
||
print(" - Replication lag")
|
||
print()
|
||
print(" Cache Metrics:")
|
||
print(" - Hit rate (target > 80%)")
|
||
print(" - Eviction rate")
|
||
print(" - Memory usage")
|
||
print(" - Connection count")
|
||
print()
|
||
print(" Commands:")
|
||
print(" # Monitor system resources")
|
||
print(" htop")
|
||
print()
|
||
print(" # Monitor MongoDB")
|
||
print(" mongotop --host localhost --port 27017")
|
||
print()
|
||
print(" # Monitor Redis")
|
||
print(" redis-cli --stat")
|
||
print()
|
||
print("=" * 70)
|
||
print()
|
||
|
||
print("Performance Optimization Tips:")
|
||
print()
|
||
print(" Database:")
|
||
print(" ✓ Index frequently queried fields")
|
||
print(" ✓ Increase connection pool size")
|
||
print(" ✓ Use secondaryPreferred for reads")
|
||
print()
|
||
print(" Cache:")
|
||
print(" ✓ Enable LRU eviction policy")
|
||
print(" ✓ Increase max memory for Redis")
|
||
print(" ✓ Use persistent connections")
|
||
print()
|
||
print(" Application:")
|
||
print(" ✓ Workers = (2 × CPU cores) + 1")
|
||
print(" ✓ Increase HTTP connection pool size")
|
||
print(" ✓ Use asyncio.gather() for parallel ops")
|
||
print()
|
||
print(" Scaling:")
|
||
print(" ✓ Horizontal: Add Doorman instances")
|
||
print(" ✓ Vertical: Increase CPU/RAM")
|
||
print(" ✓ Database: Replica set for reads")
|
||
print()
|
||
print("=" * 70)
|
||
print()
|
||
|
||
print("Alert Thresholds:")
|
||
print()
|
||
|
||
alerts = [
|
||
{
|
||
'metric': 'p99 Latency',
|
||
'warning': '> 1000ms',
|
||
'critical': '> 2000ms',
|
||
'action': 'Investigate slow queries'
|
||
},
|
||
{
|
||
'metric': 'Error Rate',
|
||
'warning': '> 2%',
|
||
'critical': '> 5%',
|
||
'action': 'Check logs, scale up'
|
||
},
|
||
{
|
||
'metric': 'CPU Usage',
|
||
'warning': '> 70%',
|
||
'critical': '> 85%',
|
||
'action': 'Add workers/instances'
|
||
},
|
||
{
|
||
'metric': 'Memory Usage',
|
||
'warning': '> 75%',
|
||
'critical': '> 90%',
|
||
'action': 'Increase RAM'
|
||
},
|
||
{
|
||
'metric': 'Cache Hit Rate',
|
||
'warning': '< 70%',
|
||
'critical': '< 50%',
|
||
'action': 'Review cache strategy'
|
||
},
|
||
{
|
||
'metric': 'DB Connections',
|
||
'warning': '> 80% pool',
|
||
'critical': '> 95% pool',
|
||
'action': 'Increase pool size'
|
||
}
|
||
]
|
||
|
||
for alert in alerts:
|
||
print(f"{alert['metric']}:")
|
||
print(f" Warning: {alert['warning']}")
|
||
print(f" Critical: {alert['critical']}")
|
||
print(f" Action: {alert['action']}")
|
||
print()
|
||
|
||
print("=" * 70)
|
||
print()
|
||
|
||
print("Benefits:")
|
||
print()
|
||
print(" Performance Visibility:")
|
||
print(" ✓ Know system capacity (max throughput)")
|
||
print(" ✓ Understand latency under load")
|
||
print(" ✓ Identify bottlenecks early")
|
||
print(" ✓ Detect regressions before production")
|
||
print()
|
||
print(" Capacity Planning:")
|
||
print(" ✓ Data-driven scaling decisions")
|
||
print(" ✓ Predict resource needs")
|
||
print(" ✓ Optimize infrastructure costs")
|
||
print(" ✓ Plan for traffic growth")
|
||
print()
|
||
print(" SLA Guarantees:")
|
||
print(" ✓ Set realistic performance targets")
|
||
print(" ✓ Validate against baselines")
|
||
print(" ✓ Prove compliance to customers")
|
||
print(" ✓ Track improvement over time")
|
||
print()
|
||
print(" Risk Mitigation:")
|
||
print(" ✓ Identify breaking points before production")
|
||
print(" ✓ Verify resilience to traffic spikes")
|
||
print(" ✓ Test graceful degradation")
|
||
print(" ✓ Validate autoscaling behavior")
|
||
print()
|
||
print("=" * 70)
|
||
print()
|
||
|
||
print("Testing Best Practices:")
|
||
print()
|
||
print(" 1. Test realistic workloads:")
|
||
print(" - Use production traffic distribution")
|
||
print(" - Include authentication overhead")
|
||
print(" - Mix read/write operations")
|
||
print()
|
||
print(" 2. Test incrementally:")
|
||
print(" - Start with smoke test (verify basics)")
|
||
print(" - Ramp up gradually (find saturation point)")
|
||
print(" - Spike test (verify resilience)")
|
||
print()
|
||
print(" 3. Monitor holistically:")
|
||
print(" - Application metrics (latency, errors)")
|
||
print(" - Infrastructure metrics (CPU, memory)")
|
||
print(" - Database metrics (connections, queries)")
|
||
print()
|
||
print(" 4. Document everything:")
|
||
print(" - Test configuration")
|
||
print(" - Results and observations")
|
||
print(" - Issues identified")
|
||
print(" - Optimization actions")
|
||
print()
|
||
print(" 5. Automate regression testing:")
|
||
print(" - Run daily smoke tests")
|
||
print(" - Compare with baselines")
|
||
print(" - Alert on performance degradation")
|
||
print()
|
||
print("=" * 70)
|
||
print()
|
||
|
||
print("Future Enhancements:")
|
||
print()
|
||
print(" 1. Distributed load testing:")
|
||
print(" - Run k6 across multiple machines")
|
||
print(" - Coordinate with k6 Cloud or custom orchestration")
|
||
print()
|
||
print(" 2. Real user monitoring (RUM):")
|
||
print(" - Capture actual user latency")
|
||
print(" - Compare with synthetic tests")
|
||
print()
|
||
print(" 3. Chaos engineering:")
|
||
print(" - Inject failures during load tests")
|
||
print(" - Test resilience to database outages")
|
||
print(" - Verify graceful degradation")
|
||
print()
|
||
print(" 4. Cost-based testing:")
|
||
print(" - Track infrastructure cost during tests")
|
||
print(" - Optimize cost per request")
|
||
print()
|
||
print("=" * 70)
|
||
print()
|
||
|
||
print("P2 Impact:")
|
||
print(" No documented throughput/latency benchmarks")
|
||
print()
|
||
print("Production Impact:")
|
||
print(" ✓ Established performance baselines (p50/p95/p99)")
|
||
print(" ✓ Automated load testing with k6 and Locust")
|
||
print(" ✓ Multiple test scenarios (smoke, load, stress, spike)")
|
||
print(" ✓ CI/CD integration for regression detection")
|
||
print(" ✓ Comprehensive monitoring guidance")
|
||
print(" ✓ Data-driven capacity planning")
|
||
print()
|
||
|
||
if __name__ == '__main__':
|
||
test_load_testing_implementation()
|