mirror of
https://github.com/DRYTRIX/TimeTracker.git
synced 2026-05-19 12:50:11 -05:00
feat: add OIDC metadata utility with DNS troubleshooting support
- Add oidc_metadata.py utility module with retry logic and DNS testing - Implement fetch_oidc_metadata() with exponential backoff retry - Add test_dns_resolution() for proactive DNS diagnostics - Create TROUBLESHOOTING_OIDC_DNS.md documentation - Improves handling of DNS resolution errors in containerized environments
This commit is contained in:
@@ -0,0 +1,168 @@
|
||||
"""
|
||||
OIDC Metadata Fetcher Utility
|
||||
|
||||
Provides functions to fetch OIDC discovery documents with retry logic
|
||||
and better DNS handling to work around Python urllib3 DNS resolution issues.
|
||||
"""
|
||||
|
||||
import socket
|
||||
import time
|
||||
import logging
|
||||
from typing import Optional, Dict, Any
|
||||
from urllib.parse import urlparse
|
||||
import requests
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def test_dns_resolution(hostname: str, timeout: int = 5) -> tuple[bool, Optional[str]]:
|
||||
"""
|
||||
Test DNS resolution for a hostname using Python's socket library.
|
||||
|
||||
Args:
|
||||
hostname: The hostname to resolve
|
||||
timeout: DNS resolution timeout in seconds
|
||||
|
||||
Returns:
|
||||
Tuple of (success: bool, error_message: Optional[str])
|
||||
"""
|
||||
try:
|
||||
# Use socket.gethostbyname which may work better than urllib3's resolver
|
||||
ip_address = socket.gethostbyname(hostname)
|
||||
logger.debug("DNS resolution successful for %s: %s", hostname, ip_address)
|
||||
return True, None
|
||||
except socket.gaierror as e:
|
||||
error_msg = f"DNS resolution failed for {hostname}: {str(e)}"
|
||||
logger.warning(error_msg)
|
||||
return False, error_msg
|
||||
except Exception as e:
|
||||
error_msg = f"Unexpected error during DNS resolution for {hostname}: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
return False, error_msg
|
||||
|
||||
|
||||
def fetch_oidc_metadata(
|
||||
issuer_url: str,
|
||||
max_retries: int = 3,
|
||||
retry_delay: int = 2,
|
||||
timeout: int = 10,
|
||||
use_dns_test: bool = True,
|
||||
) -> tuple[Optional[Dict[str, Any]], Optional[str]]:
|
||||
"""
|
||||
Fetch OIDC metadata from the discovery endpoint with retry logic.
|
||||
|
||||
This function uses the requests library which may have better DNS handling
|
||||
than urllib3 used by Authlib. It also implements exponential backoff retry.
|
||||
|
||||
Args:
|
||||
issuer_url: The OIDC issuer URL (e.g., https://auth.example.com)
|
||||
max_retries: Maximum number of retry attempts (default: 3)
|
||||
retry_delay: Initial delay between retries in seconds (default: 2)
|
||||
timeout: Request timeout in seconds (default: 10)
|
||||
use_dns_test: Whether to test DNS resolution first (default: True)
|
||||
|
||||
Returns:
|
||||
Tuple of (metadata_dict: Optional[Dict], error_message: Optional[str])
|
||||
Returns (None, error_message) on failure, (metadata, None) on success
|
||||
"""
|
||||
# Parse the issuer URL
|
||||
try:
|
||||
parsed = urlparse(issuer_url)
|
||||
if not parsed.scheme or not parsed.netloc:
|
||||
return None, f"Invalid issuer URL format: {issuer_url}"
|
||||
|
||||
hostname = parsed.netloc.split(":")[0]
|
||||
metadata_url = f"{issuer_url.rstrip('/')}/.well-known/openid-configuration"
|
||||
except Exception as e:
|
||||
return None, f"Failed to parse issuer URL: {str(e)}"
|
||||
|
||||
# Test DNS resolution first if requested
|
||||
if use_dns_test:
|
||||
dns_success, dns_error = test_dns_resolution(hostname, timeout=timeout)
|
||||
if not dns_success:
|
||||
logger.warning(
|
||||
"DNS resolution test failed for %s, but will attempt metadata fetch anyway",
|
||||
hostname,
|
||||
)
|
||||
# Continue anyway - sometimes requests library works even if socket doesn't
|
||||
|
||||
# Attempt to fetch metadata with retry logic
|
||||
last_error = None
|
||||
for attempt in range(1, max_retries + 1):
|
||||
try:
|
||||
logger.info(
|
||||
"Fetching OIDC metadata from %s (attempt %d/%d)",
|
||||
metadata_url,
|
||||
attempt,
|
||||
max_retries,
|
||||
)
|
||||
|
||||
response = requests.get(metadata_url, timeout=timeout)
|
||||
response.raise_for_status()
|
||||
|
||||
metadata = response.json()
|
||||
|
||||
# Validate that we got a proper OIDC discovery document
|
||||
if not isinstance(metadata, dict):
|
||||
raise ValueError("Metadata response is not a JSON object")
|
||||
|
||||
required_fields = ["issuer", "authorization_endpoint", "token_endpoint"]
|
||||
missing_fields = [field for field in required_fields if field not in metadata]
|
||||
if missing_fields:
|
||||
raise ValueError(
|
||||
f"Missing required fields in metadata: {', '.join(missing_fields)}"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Successfully fetched OIDC metadata from %s (issuer: %s)",
|
||||
metadata_url,
|
||||
metadata.get("issuer"),
|
||||
)
|
||||
return metadata, None
|
||||
|
||||
except requests.exceptions.Timeout as e:
|
||||
last_error = f"Timeout fetching metadata from {metadata_url}: {str(e)}"
|
||||
logger.warning("%s (attempt %d/%d)", last_error, attempt, max_retries)
|
||||
|
||||
except requests.exceptions.ConnectionError as e:
|
||||
# This often includes DNS resolution errors
|
||||
error_str = str(e)
|
||||
if "NameResolutionError" in error_str or "Failed to resolve" in error_str or "[Errno -2]" in error_str:
|
||||
last_error = (
|
||||
f"DNS resolution failed for {hostname}: {error_str}. "
|
||||
"This may occur when Python's DNS resolver cannot resolve the domain. "
|
||||
"Try configuring DNS servers in Docker or using container names for internal services."
|
||||
)
|
||||
else:
|
||||
last_error = f"Connection error fetching metadata from {metadata_url}: {error_str}"
|
||||
logger.warning("%s (attempt %d/%d)", last_error, attempt, max_retries)
|
||||
|
||||
except requests.exceptions.HTTPError as e:
|
||||
last_error = f"HTTP error fetching metadata from {metadata_url}: {str(e)}"
|
||||
logger.warning("%s (attempt %d/%d)", last_error, attempt, max_retries)
|
||||
# Don't retry on HTTP errors (4xx, 5xx) - they're unlikely to resolve
|
||||
break
|
||||
|
||||
except ValueError as e:
|
||||
last_error = f"Invalid metadata response from {metadata_url}: {str(e)}"
|
||||
logger.error("%s (attempt %d/%d)", last_error, attempt, max_retries)
|
||||
# Don't retry on validation errors
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
last_error = f"Unexpected error fetching metadata from {metadata_url}: {str(e)}"
|
||||
logger.error("%s (attempt %d/%d)", last_error, attempt, max_retries)
|
||||
|
||||
# Wait before retrying (exponential backoff)
|
||||
if attempt < max_retries:
|
||||
delay = retry_delay * (2 ** (attempt - 1)) # Exponential backoff
|
||||
logger.info("Waiting %d seconds before retry...", delay)
|
||||
time.sleep(delay)
|
||||
|
||||
# All retries failed
|
||||
error_message = (
|
||||
f"Failed to fetch OIDC metadata after {max_retries} attempts. "
|
||||
f"Last error: {last_error}"
|
||||
)
|
||||
logger.error(error_message)
|
||||
return None, error_message
|
||||
@@ -0,0 +1,250 @@
|
||||
# Troubleshooting OIDC DNS Resolution Errors
|
||||
|
||||
## Problem Description
|
||||
|
||||
When configuring OIDC (OpenID Connect) authentication, you may encounter DNS resolution errors during application startup, even though DNS resolution works correctly from the command line (e.g., `curl` or `ping`).
|
||||
|
||||
### Common Error Messages
|
||||
|
||||
```
|
||||
Error loading metadata: HTTPSConnectionPool(host='auth.example.com', port=443):
|
||||
Max retries exceeded with url: /.well-known/openid-configuration
|
||||
(Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object>:
|
||||
Failed to resolve 'auth.example.com' ([Errno -2] Name or service not known)"))
|
||||
```
|
||||
|
||||
### Why This Happens
|
||||
|
||||
This issue occurs because Python's `urllib3` library (used by Authlib) may use a different DNS resolution mechanism than the system's DNS resolver. Even though:
|
||||
|
||||
- System DNS resolution works (curl/ping succeed)
|
||||
- Docker DNS configuration is correct
|
||||
- Containers are on the same network
|
||||
|
||||
Python's resolver may still fail to resolve the domain name.
|
||||
|
||||
## Solutions
|
||||
|
||||
### Solution 1: Configure DNS Servers in Docker/Portainer (Recommended)
|
||||
|
||||
Explicitly configure DNS servers in your Docker Compose or Portainer stack configuration.
|
||||
|
||||
#### For Docker Compose
|
||||
|
||||
Add DNS configuration to your service:
|
||||
|
||||
```yaml
|
||||
services:
|
||||
app:
|
||||
image: ghcr.io/drytrix/timetracker:latest
|
||||
dns:
|
||||
- 8.8.8.8 # Google DNS
|
||||
- 8.8.4.4 # Google DNS secondary
|
||||
# OR use your internal DNS server
|
||||
- 192.168.1.1 # Your router/internal DNS
|
||||
# ... rest of configuration
|
||||
```
|
||||
|
||||
#### For Portainer Stacks
|
||||
|
||||
Edit your stack configuration and add DNS settings under the service definition:
|
||||
|
||||
```yaml
|
||||
services:
|
||||
app:
|
||||
# ... other configuration ...
|
||||
dns:
|
||||
- 8.8.8.8
|
||||
- 8.8.4.4
|
||||
```
|
||||
|
||||
After updating, restart the container/stack.
|
||||
|
||||
### Solution 2: Use Docker Internal Networking
|
||||
|
||||
If both your OIDC provider (e.g., Authentik) and TimeTracker are running on the same Docker network, you can use Docker's internal DNS resolution by using the container/service name instead of the external domain.
|
||||
|
||||
#### Find Your OIDC Provider Container Name
|
||||
|
||||
In Portainer, check your OIDC provider stack for the service name, or use:
|
||||
|
||||
```bash
|
||||
docker network inspect <network_name>
|
||||
```
|
||||
|
||||
#### Update OIDC_ISSUER Environment Variable
|
||||
|
||||
Instead of:
|
||||
```
|
||||
OIDC_ISSUER=https://auth.example.com/application/o/time-tracker/
|
||||
```
|
||||
|
||||
Use:
|
||||
```
|
||||
OIDC_ISSUER=https://authentik:9443/application/o/time-tracker/
|
||||
```
|
||||
|
||||
Replace `authentik` with your actual Authentik service/container name and `9443` with the internal port.
|
||||
|
||||
**Note:** This only works for internal communication. External redirects (like OIDC callbacks) will still need the public domain.
|
||||
|
||||
### Solution 3: Add extra_hosts Mapping
|
||||
|
||||
Map the domain to an IP address in your Docker configuration.
|
||||
|
||||
#### For Docker Compose
|
||||
|
||||
```yaml
|
||||
services:
|
||||
app:
|
||||
image: ghcr.io/drytrix/timetracker:latest
|
||||
extra_hosts:
|
||||
- "auth.example.com:192.168.1.100" # Replace with actual OIDC provider IP
|
||||
# ... rest of configuration
|
||||
```
|
||||
|
||||
#### For Portainer Stacks
|
||||
|
||||
```yaml
|
||||
services:
|
||||
app:
|
||||
# ... other configuration ...
|
||||
extra_hosts:
|
||||
- "auth.example.com:192.168.1.100"
|
||||
```
|
||||
|
||||
#### To Find the IP Address
|
||||
|
||||
```bash
|
||||
# From within the TimeTracker container
|
||||
docker exec -it timetracker-app ping -c 1 auth.example.com
|
||||
|
||||
# Or from host
|
||||
ping auth.example.com
|
||||
```
|
||||
|
||||
### Solution 4: Use Lazy Metadata Loading (Automatic)
|
||||
|
||||
TimeTracker now includes automatic lazy loading of OIDC metadata. If DNS resolution fails at startup, the application will:
|
||||
|
||||
1. Start successfully (no blocking errors)
|
||||
2. Store OIDC configuration for lazy loading
|
||||
3. Attempt to fetch metadata on the first login attempt
|
||||
4. Retry with exponential backoff if DNS resolution fails
|
||||
|
||||
This means your application will start even if DNS isn't ready, and will automatically retry when a user attempts to log in.
|
||||
|
||||
#### Configuration Options
|
||||
|
||||
You can configure the retry behavior using environment variables:
|
||||
|
||||
```bash
|
||||
# Timeout for each metadata fetch attempt (default: 10 seconds)
|
||||
OIDC_METADATA_FETCH_TIMEOUT=10
|
||||
|
||||
# Number of retry attempts (default: 3)
|
||||
OIDC_METADATA_RETRY_ATTEMPTS=3
|
||||
|
||||
# Delay between retries in seconds (default: 2)
|
||||
OIDC_METADATA_RETRY_DELAY=2
|
||||
```
|
||||
|
||||
## Verification Steps
|
||||
|
||||
### 1. Test DNS Resolution from Container
|
||||
|
||||
```bash
|
||||
# Test DNS resolution using Python
|
||||
docker exec -it <container> python -c "import socket; print(socket.gethostbyname('auth.example.com'))"
|
||||
|
||||
# Test with curl
|
||||
docker exec -it <container> curl -I https://auth.example.com/.well-known/openid-configuration
|
||||
```
|
||||
|
||||
### 2. Check Application Logs
|
||||
|
||||
Look for OIDC-related messages in your application logs:
|
||||
|
||||
```bash
|
||||
# If using Docker
|
||||
docker logs <container> | grep -i oidc
|
||||
|
||||
# Check for lazy loading messages
|
||||
docker logs <container> | grep -i "lazy\|metadata"
|
||||
```
|
||||
|
||||
### 3. Use the OIDC Debug Dashboard
|
||||
|
||||
1. Log in as an administrator
|
||||
2. Navigate to **Admin → OIDC Settings**
|
||||
3. Click **Test Configuration** to verify connectivity
|
||||
4. Review the metadata display to confirm successful connection
|
||||
|
||||
### 4. Use the Guided Setup Wizard
|
||||
|
||||
TimeTracker includes a guided OIDC setup wizard that:
|
||||
|
||||
- Tests DNS resolution before configuration
|
||||
- Validates metadata endpoint accessibility
|
||||
- Provides troubleshooting tips if connection fails
|
||||
- Generates correct configuration automatically
|
||||
|
||||
Access it via **Admin → OIDC Setup Wizard** (if available).
|
||||
|
||||
## Common Scenarios
|
||||
|
||||
### Scenario 1: Both Services on Same Docker Network
|
||||
|
||||
**Problem:** Authentik and TimeTracker are on the same Docker network but using external domains.
|
||||
|
||||
**Solution:** Use Docker internal service names (Solution 2) or ensure both services can resolve each other's external domains.
|
||||
|
||||
### Scenario 2: DNS Not Ready at Startup
|
||||
|
||||
**Problem:** DNS resolution works after container starts, but fails during startup.
|
||||
|
||||
**Solution:** Use lazy loading (Solution 4) - this is automatic and requires no configuration.
|
||||
|
||||
### Scenario 3: Custom DNS Server
|
||||
|
||||
**Problem:** Using a custom internal DNS server that Python can't access.
|
||||
|
||||
**Solution:** Configure explicit DNS servers (Solution 1) pointing to your DNS server.
|
||||
|
||||
### Scenario 4: Reverse Proxy with Different Domain
|
||||
|
||||
**Problem:** OIDC provider is behind a reverse proxy with a different domain.
|
||||
|
||||
**Solution:** Ensure the reverse proxy domain is resolvable and use that domain in `OIDC_ISSUER`.
|
||||
|
||||
## Still Having Issues?
|
||||
|
||||
If none of the above solutions work:
|
||||
|
||||
1. **Check Network Configuration**: Ensure containers are on the same network and can communicate
|
||||
2. **Verify Firewall Rules**: Check if firewall is blocking DNS queries
|
||||
3. **Review Provider Logs**: Check your OIDC provider logs for connection attempts
|
||||
4. **Test from Host**: Verify DNS resolution works from the Docker host
|
||||
5. **Check DNS Server**: Ensure your DNS server is responding correctly
|
||||
|
||||
## Related Documentation
|
||||
|
||||
- [OIDC Setup Guide](admin/configuration/OIDC_SETUP.md) - Complete OIDC configuration guide
|
||||
- [Docker Compose Setup](admin/configuration/DOCKER_COMPOSE_SETUP.md) - Docker deployment guide
|
||||
|
||||
## Technical Details
|
||||
|
||||
### How Lazy Loading Works
|
||||
|
||||
1. **At Startup**: If metadata fetch fails, TimeTracker stores OIDC configuration in app config
|
||||
2. **On First Login**: When a user attempts OIDC login, the application:
|
||||
- Checks if OIDC client exists
|
||||
- If not, attempts to fetch metadata using the `requests` library (better DNS handling)
|
||||
- Registers the OAuth client with fetched metadata
|
||||
- Proceeds with normal OIDC flow
|
||||
|
||||
3. **Retry Logic**: Uses exponential backoff (2s, 4s, 8s delays) with configurable attempts
|
||||
|
||||
### Why requests Library Works Better
|
||||
|
||||
The `requests` library may use different DNS resolution mechanisms than `urllib3`, and sometimes succeeds where `urllib3` fails. TimeTracker's metadata fetcher uses `requests` for better compatibility.
|
||||
Reference in New Issue
Block a user