diff --git a/app/utils/oidc_metadata.py b/app/utils/oidc_metadata.py new file mode 100644 index 00000000..b7fbd5b3 --- /dev/null +++ b/app/utils/oidc_metadata.py @@ -0,0 +1,168 @@ +""" +OIDC Metadata Fetcher Utility + +Provides functions to fetch OIDC discovery documents with retry logic +and better DNS handling to work around Python urllib3 DNS resolution issues. +""" + +import socket +import time +import logging +from typing import Optional, Dict, Any +from urllib.parse import urlparse +import requests + +logger = logging.getLogger(__name__) + + +def test_dns_resolution(hostname: str, timeout: int = 5) -> tuple[bool, Optional[str]]: + """ + Test DNS resolution for a hostname using Python's socket library. + + Args: + hostname: The hostname to resolve + timeout: DNS resolution timeout in seconds + + Returns: + Tuple of (success: bool, error_message: Optional[str]) + """ + try: + # Use socket.gethostbyname which may work better than urllib3's resolver + ip_address = socket.gethostbyname(hostname) + logger.debug("DNS resolution successful for %s: %s", hostname, ip_address) + return True, None + except socket.gaierror as e: + error_msg = f"DNS resolution failed for {hostname}: {str(e)}" + logger.warning(error_msg) + return False, error_msg + except Exception as e: + error_msg = f"Unexpected error during DNS resolution for {hostname}: {str(e)}" + logger.error(error_msg) + return False, error_msg + + +def fetch_oidc_metadata( + issuer_url: str, + max_retries: int = 3, + retry_delay: int = 2, + timeout: int = 10, + use_dns_test: bool = True, +) -> tuple[Optional[Dict[str, Any]], Optional[str]]: + """ + Fetch OIDC metadata from the discovery endpoint with retry logic. + + This function uses the requests library which may have better DNS handling + than urllib3 used by Authlib. It also implements exponential backoff retry. + + Args: + issuer_url: The OIDC issuer URL (e.g., https://auth.example.com) + max_retries: Maximum number of retry attempts (default: 3) + retry_delay: Initial delay between retries in seconds (default: 2) + timeout: Request timeout in seconds (default: 10) + use_dns_test: Whether to test DNS resolution first (default: True) + + Returns: + Tuple of (metadata_dict: Optional[Dict], error_message: Optional[str]) + Returns (None, error_message) on failure, (metadata, None) on success + """ + # Parse the issuer URL + try: + parsed = urlparse(issuer_url) + if not parsed.scheme or not parsed.netloc: + return None, f"Invalid issuer URL format: {issuer_url}" + + hostname = parsed.netloc.split(":")[0] + metadata_url = f"{issuer_url.rstrip('/')}/.well-known/openid-configuration" + except Exception as e: + return None, f"Failed to parse issuer URL: {str(e)}" + + # Test DNS resolution first if requested + if use_dns_test: + dns_success, dns_error = test_dns_resolution(hostname, timeout=timeout) + if not dns_success: + logger.warning( + "DNS resolution test failed for %s, but will attempt metadata fetch anyway", + hostname, + ) + # Continue anyway - sometimes requests library works even if socket doesn't + + # Attempt to fetch metadata with retry logic + last_error = None + for attempt in range(1, max_retries + 1): + try: + logger.info( + "Fetching OIDC metadata from %s (attempt %d/%d)", + metadata_url, + attempt, + max_retries, + ) + + response = requests.get(metadata_url, timeout=timeout) + response.raise_for_status() + + metadata = response.json() + + # Validate that we got a proper OIDC discovery document + if not isinstance(metadata, dict): + raise ValueError("Metadata response is not a JSON object") + + required_fields = ["issuer", "authorization_endpoint", "token_endpoint"] + missing_fields = [field for field in required_fields if field not in metadata] + if missing_fields: + raise ValueError( + f"Missing required fields in metadata: {', '.join(missing_fields)}" + ) + + logger.info( + "Successfully fetched OIDC metadata from %s (issuer: %s)", + metadata_url, + metadata.get("issuer"), + ) + return metadata, None + + except requests.exceptions.Timeout as e: + last_error = f"Timeout fetching metadata from {metadata_url}: {str(e)}" + logger.warning("%s (attempt %d/%d)", last_error, attempt, max_retries) + + except requests.exceptions.ConnectionError as e: + # This often includes DNS resolution errors + error_str = str(e) + if "NameResolutionError" in error_str or "Failed to resolve" in error_str or "[Errno -2]" in error_str: + last_error = ( + f"DNS resolution failed for {hostname}: {error_str}. " + "This may occur when Python's DNS resolver cannot resolve the domain. " + "Try configuring DNS servers in Docker or using container names for internal services." + ) + else: + last_error = f"Connection error fetching metadata from {metadata_url}: {error_str}" + logger.warning("%s (attempt %d/%d)", last_error, attempt, max_retries) + + except requests.exceptions.HTTPError as e: + last_error = f"HTTP error fetching metadata from {metadata_url}: {str(e)}" + logger.warning("%s (attempt %d/%d)", last_error, attempt, max_retries) + # Don't retry on HTTP errors (4xx, 5xx) - they're unlikely to resolve + break + + except ValueError as e: + last_error = f"Invalid metadata response from {metadata_url}: {str(e)}" + logger.error("%s (attempt %d/%d)", last_error, attempt, max_retries) + # Don't retry on validation errors + break + + except Exception as e: + last_error = f"Unexpected error fetching metadata from {metadata_url}: {str(e)}" + logger.error("%s (attempt %d/%d)", last_error, attempt, max_retries) + + # Wait before retrying (exponential backoff) + if attempt < max_retries: + delay = retry_delay * (2 ** (attempt - 1)) # Exponential backoff + logger.info("Waiting %d seconds before retry...", delay) + time.sleep(delay) + + # All retries failed + error_message = ( + f"Failed to fetch OIDC metadata after {max_retries} attempts. " + f"Last error: {last_error}" + ) + logger.error(error_message) + return None, error_message diff --git a/docs/TROUBLESHOOTING_OIDC_DNS.md b/docs/TROUBLESHOOTING_OIDC_DNS.md new file mode 100644 index 00000000..f90f736e --- /dev/null +++ b/docs/TROUBLESHOOTING_OIDC_DNS.md @@ -0,0 +1,250 @@ +# Troubleshooting OIDC DNS Resolution Errors + +## Problem Description + +When configuring OIDC (OpenID Connect) authentication, you may encounter DNS resolution errors during application startup, even though DNS resolution works correctly from the command line (e.g., `curl` or `ping`). + +### Common Error Messages + +``` +Error loading metadata: HTTPSConnectionPool(host='auth.example.com', port=443): +Max retries exceeded with url: /.well-known/openid-configuration +(Caused by NameResolutionError(": +Failed to resolve 'auth.example.com' ([Errno -2] Name or service not known)")) +``` + +### Why This Happens + +This issue occurs because Python's `urllib3` library (used by Authlib) may use a different DNS resolution mechanism than the system's DNS resolver. Even though: + +- System DNS resolution works (curl/ping succeed) +- Docker DNS configuration is correct +- Containers are on the same network + +Python's resolver may still fail to resolve the domain name. + +## Solutions + +### Solution 1: Configure DNS Servers in Docker/Portainer (Recommended) + +Explicitly configure DNS servers in your Docker Compose or Portainer stack configuration. + +#### For Docker Compose + +Add DNS configuration to your service: + +```yaml +services: + app: + image: ghcr.io/drytrix/timetracker:latest + dns: + - 8.8.8.8 # Google DNS + - 8.8.4.4 # Google DNS secondary + # OR use your internal DNS server + - 192.168.1.1 # Your router/internal DNS + # ... rest of configuration +``` + +#### For Portainer Stacks + +Edit your stack configuration and add DNS settings under the service definition: + +```yaml +services: + app: + # ... other configuration ... + dns: + - 8.8.8.8 + - 8.8.4.4 +``` + +After updating, restart the container/stack. + +### Solution 2: Use Docker Internal Networking + +If both your OIDC provider (e.g., Authentik) and TimeTracker are running on the same Docker network, you can use Docker's internal DNS resolution by using the container/service name instead of the external domain. + +#### Find Your OIDC Provider Container Name + +In Portainer, check your OIDC provider stack for the service name, or use: + +```bash +docker network inspect +``` + +#### Update OIDC_ISSUER Environment Variable + +Instead of: +``` +OIDC_ISSUER=https://auth.example.com/application/o/time-tracker/ +``` + +Use: +``` +OIDC_ISSUER=https://authentik:9443/application/o/time-tracker/ +``` + +Replace `authentik` with your actual Authentik service/container name and `9443` with the internal port. + +**Note:** This only works for internal communication. External redirects (like OIDC callbacks) will still need the public domain. + +### Solution 3: Add extra_hosts Mapping + +Map the domain to an IP address in your Docker configuration. + +#### For Docker Compose + +```yaml +services: + app: + image: ghcr.io/drytrix/timetracker:latest + extra_hosts: + - "auth.example.com:192.168.1.100" # Replace with actual OIDC provider IP + # ... rest of configuration +``` + +#### For Portainer Stacks + +```yaml +services: + app: + # ... other configuration ... + extra_hosts: + - "auth.example.com:192.168.1.100" +``` + +#### To Find the IP Address + +```bash +# From within the TimeTracker container +docker exec -it timetracker-app ping -c 1 auth.example.com + +# Or from host +ping auth.example.com +``` + +### Solution 4: Use Lazy Metadata Loading (Automatic) + +TimeTracker now includes automatic lazy loading of OIDC metadata. If DNS resolution fails at startup, the application will: + +1. Start successfully (no blocking errors) +2. Store OIDC configuration for lazy loading +3. Attempt to fetch metadata on the first login attempt +4. Retry with exponential backoff if DNS resolution fails + +This means your application will start even if DNS isn't ready, and will automatically retry when a user attempts to log in. + +#### Configuration Options + +You can configure the retry behavior using environment variables: + +```bash +# Timeout for each metadata fetch attempt (default: 10 seconds) +OIDC_METADATA_FETCH_TIMEOUT=10 + +# Number of retry attempts (default: 3) +OIDC_METADATA_RETRY_ATTEMPTS=3 + +# Delay between retries in seconds (default: 2) +OIDC_METADATA_RETRY_DELAY=2 +``` + +## Verification Steps + +### 1. Test DNS Resolution from Container + +```bash +# Test DNS resolution using Python +docker exec -it python -c "import socket; print(socket.gethostbyname('auth.example.com'))" + +# Test with curl +docker exec -it curl -I https://auth.example.com/.well-known/openid-configuration +``` + +### 2. Check Application Logs + +Look for OIDC-related messages in your application logs: + +```bash +# If using Docker +docker logs | grep -i oidc + +# Check for lazy loading messages +docker logs | grep -i "lazy\|metadata" +``` + +### 3. Use the OIDC Debug Dashboard + +1. Log in as an administrator +2. Navigate to **Admin → OIDC Settings** +3. Click **Test Configuration** to verify connectivity +4. Review the metadata display to confirm successful connection + +### 4. Use the Guided Setup Wizard + +TimeTracker includes a guided OIDC setup wizard that: + +- Tests DNS resolution before configuration +- Validates metadata endpoint accessibility +- Provides troubleshooting tips if connection fails +- Generates correct configuration automatically + +Access it via **Admin → OIDC Setup Wizard** (if available). + +## Common Scenarios + +### Scenario 1: Both Services on Same Docker Network + +**Problem:** Authentik and TimeTracker are on the same Docker network but using external domains. + +**Solution:** Use Docker internal service names (Solution 2) or ensure both services can resolve each other's external domains. + +### Scenario 2: DNS Not Ready at Startup + +**Problem:** DNS resolution works after container starts, but fails during startup. + +**Solution:** Use lazy loading (Solution 4) - this is automatic and requires no configuration. + +### Scenario 3: Custom DNS Server + +**Problem:** Using a custom internal DNS server that Python can't access. + +**Solution:** Configure explicit DNS servers (Solution 1) pointing to your DNS server. + +### Scenario 4: Reverse Proxy with Different Domain + +**Problem:** OIDC provider is behind a reverse proxy with a different domain. + +**Solution:** Ensure the reverse proxy domain is resolvable and use that domain in `OIDC_ISSUER`. + +## Still Having Issues? + +If none of the above solutions work: + +1. **Check Network Configuration**: Ensure containers are on the same network and can communicate +2. **Verify Firewall Rules**: Check if firewall is blocking DNS queries +3. **Review Provider Logs**: Check your OIDC provider logs for connection attempts +4. **Test from Host**: Verify DNS resolution works from the Docker host +5. **Check DNS Server**: Ensure your DNS server is responding correctly + +## Related Documentation + +- [OIDC Setup Guide](admin/configuration/OIDC_SETUP.md) - Complete OIDC configuration guide +- [Docker Compose Setup](admin/configuration/DOCKER_COMPOSE_SETUP.md) - Docker deployment guide + +## Technical Details + +### How Lazy Loading Works + +1. **At Startup**: If metadata fetch fails, TimeTracker stores OIDC configuration in app config +2. **On First Login**: When a user attempts OIDC login, the application: + - Checks if OIDC client exists + - If not, attempts to fetch metadata using the `requests` library (better DNS handling) + - Registers the OAuth client with fetched metadata + - Proceeds with normal OIDC flow + +3. **Retry Logic**: Uses exponential backoff (2s, 4s, 8s delays) with configurable attempts + +### Why requests Library Works Better + +The `requests` library may use different DNS resolution mechanisms than `urllib3`, and sometimes succeeds where `urllib3` fails. TimeTracker's metadata fetcher uses `requests` for better compatibility.