diff --git a/README.md b/README.md
index 7f7f2227..bcab5eec 100644
--- a/README.md
+++ b/README.md
@@ -709,7 +709,7 @@ docker-compose -f docker/docker-compose.remote.yml up -d
 TimeTracker includes an optional **server-side AI helper** for the web app and API clients.
 
 - **Enable**: set `AI_ENABLED=true`
-- **Ollama (default)**: set `AI_PROVIDER=ollama`, `AI_BASE_URL=http://127.0.0.1:11434`, `AI_MODEL=...`
+- **Ollama (default)**: set `AI_PROVIDER=ollama`, `AI_MODEL=...`, and `AI_BASE_URL` to `http://ollama:11434` when using the bundled stack in `docker-compose.yml`, or `http://127.0.0.1:11434` when Ollama runs on the host outside Docker.
 - **Hosted OpenAI-compatible**: set `AI_PROVIDER=openai_compatible` and `AI_API_KEY=...`
 
 The AI helper is exposed as:
@@ -717,6 +717,17 @@ The AI helper is exposed as:
 - Session web UI JSON: `POST /api/ai/chat` (same-origin, login required)
 - REST API v1: `POST /api/v1/ai/chat` (API token required, scopes `read:ai`/`write:ai`)
 
+### Bundled Ollama service (Docker Compose)
+
+`docker-compose.yml` ships a CPU-only `ollama` service plus a one-shot `ollama-init` sidecar that pulls the model defined by `AI_MODEL` (default `llama3.1`, ~4.7 GB) on first boot. The `app` service waits for the pull to finish before starting.
+
+- The app reaches it at `http://ollama:11434` over the Docker network — no host ports need to be opened.
+- Change the model by setting `AI_MODEL` in `.env` (e.g. `AI_MODEL=qwen2.5:3b` for lighter hardware) and re-running `docker compose up -d`; the init sidecar will pull the new model.
+- Pulled models are cached in the `ollama_data` named volume, so subsequent boots are instant.
+- Verify in the UI: **Admin → System Settings → AI helper**, then click *Test connection*.
+- To pull additional models manually: `docker compose exec ollama ollama pull <model>`.
+- To use a hosted provider instead, set `AI_PROVIDER=openai_compatible`, `AI_BASE_URL=https://api.your-provider.example/`, `AI_API_KEY=…` in `.env`; the in-cluster Ollama can stay running or be removed.
+
 ### Encrypting stored secrets (recommended)
 
 To store sensitive settings (OAuth secrets, mail password, AI API key, Peppol token, 2FA secret) encrypted at rest, set:
diff --git a/app/services/llm_service.py b/app/services/llm_service.py
index 76b082ac..53e407bd 100644
--- a/app/services/llm_service.py
+++ b/app/services/llm_service.py
@@ -42,6 +42,7 @@ class AIProviderConfig:
     base_url: str
     model: str
     api_key: str
+    api_key_set: bool
     timeout_seconds: int
     context_limit: int
     system_prompt: str
@@ -58,7 +59,7 @@ class AIProviderConfig:
             "provider": self.provider,
             "base_url": self.base_url,
             "model": self.model,
-            "api_key_set": bool(self.api_key),
+            "api_key_set": self.api_key_set,
             "timeout_seconds": self.timeout_seconds,
             "context_limit": self.context_limit,
         }
diff --git a/docker-compose.yml b/docker-compose.yml
index b91f2ec6..dce47e8e 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -89,6 +89,14 @@ services:
       - OTEL_METRICS_EXPORT_INTERVAL_MS=${OTEL_METRICS_EXPORT_INTERVAL_MS:-60000}
       - ENABLE_TELEMETRY=${ENABLE_TELEMETRY:-false}
       - TELE_SALT=${TELE_SALT:-8f4a7b2e9c1d6f3a5e8b4c7d2a9f6e3b1c8d5a7f2e9b4c6d3a8f5e1b7c4d9a2f}
+      # AI helper (Ollama in-cluster by default; override AI_PROVIDER for hosted providers)
+      - AI_ENABLED=${AI_ENABLED:-true}
+      - AI_PROVIDER=${AI_PROVIDER:-ollama}
+      - AI_BASE_URL=${AI_BASE_URL:-http://ollama:11434}
+      - AI_MODEL=${AI_MODEL:-llama3.1}
+      - AI_API_KEY=${AI_API_KEY:-}
+      - AI_TIMEOUT_SECONDS=${AI_TIMEOUT_SECONDS:-60}
+      - AI_CONTEXT_LIMIT=${AI_CONTEXT_LIMIT:-40}
 
     # Expose only internally; nginx publishes ports
     ports: []
@@ -99,6 +107,8 @@ services:
     depends_on:
       db:
         condition: service_healthy
+      ollama-init:
+        condition: service_completed_successfully
     restart: unless-stopped
     healthcheck:
       test: ["CMD", "curl", "-f", "-s", "-o", "/dev/null", "http://localhost:8080/_health"]
@@ -107,6 +117,47 @@ services:
       retries: 3
       start_period: 40s
 
+  # Ollama - local LLM runtime for the TimeTracker AI helper
+  # The app talks to it via the OpenAI-compatible endpoint at /v1/chat/completions.
+  # First boot pulls the model defined by AI_MODEL (default llama3.1, ~4.7 GB).
+  ollama:
+    image: ollama/ollama:latest
+    container_name: timetracker-ollama
+    environment:
+      - OLLAMA_HOST=0.0.0.0:11434
+      - OLLAMA_KEEP_ALIVE=${OLLAMA_KEEP_ALIVE:-5m}
+    volumes:
+      - ollama_data:/root/.ollama
+    # Internal-only by default; uncomment to expose for host tools.
+    # ports:
+    #   - "11434:11434"
+    healthcheck:
+      test: ["CMD-SHELL", "ollama list >/dev/null 2>&1 || exit 1"]
+      interval: 15s
+      timeout: 5s
+      retries: 10
+      start_period: 30s
+    restart: unless-stopped
+
+  # One-shot model puller; runs to completion on each `up` (no-op if model already cached).
+  ollama-init:
+    image: ollama/ollama:latest
+    container_name: timetracker-ollama-init
+    depends_on:
+      ollama:
+        condition: service_healthy
+    environment:
+      - OLLAMA_HOST=http://ollama:11434
+    entrypoint: ["/bin/sh","-c"]
+    command:
+      - |
+        set -e
+        MODEL="${AI_MODEL:-llama3.1}"
+        echo "Pulling Ollama model: $$MODEL"
+        ollama pull "$$MODEL"
+        echo "Model ready: $$MODEL"
+    restart: "no"
+
   db:
     image: postgres:16-alpine
     container_name: timetracker-db
@@ -222,4 +273,6 @@ volumes:
   loki_data:
     driver: local
   redis_data:
+    driver: local
+  ollama_data:
     driver: local
\ No newline at end of file
diff --git a/docs/admin/configuration/DOCKER_COMPOSE_SETUP.md b/docs/admin/configuration/DOCKER_COMPOSE_SETUP.md
index 5d700bc4..81ceeb51 100644
--- a/docs/admin/configuration/DOCKER_COMPOSE_SETUP.md
+++ b/docs/admin/configuration/DOCKER_COMPOSE_SETUP.md
@@ -52,7 +52,8 @@ docker-compose logs app --tail=100
 ### 4) Optional services
 - Reverse proxy (HTTPS): See `docker-compose.yml` (services `certgen` and `nginx`).
   - **Note**: The `certgen` service is now self-contained and works with Portainer and other container orchestration tools without requiring host filesystem mounts.
-- Monitoring stack: Prometheus, Grafana, Loki, Promtail are available in `docker-compose.yml`.
+- Monitoring stack: Prometheus, Grafana, Loki, Promtail are available in `docker-compose.yml` (commented out by default; uncomment services to enable).
+- **Ollama (bundled LLM)**: The root `docker-compose.yml` includes `ollama` and a one-shot `ollama-init` container that pulls `AI_MODEL` into the `ollama_data` volume. The `app` service defaults to `AI_BASE_URL=http://ollama:11434` and waits for `ollama-init` to succeed before starting. Set `AI_ENABLED=false` in `.env` to turn off the in-app AI helper without removing the containers. Details: [README.md](../../../README.md) (sections *AI Helper* and *Bundled Ollama service*).
 
 ---
 
@@ -148,6 +149,18 @@ All environment variables can be provided via `.env` and are consumed by the `ap
 - LOG_LEVEL: Default: `INFO`.
 - LOG_FILE: Default: `/data/logs/timetracker.log` or `/app/logs/timetracker.log` based on compose.
 
+### AI helper (optional)
+Used by the server-side AI helper (`/api/ai/*`, Admin → System Settings). In the root `docker-compose.yml`, defaults target the bundled `ollama` service.
+
+- AI_ENABLED: Enable the AI helper. Default in root compose: `true` (override with `false` if you do not want LLM calls).
+- AI_PROVIDER: `ollama` or `openai_compatible`. Default: `ollama`.
+- AI_BASE_URL: Provider base URL without a trailing path. Default in root compose: `http://ollama:11434` (Docker service name). For Ollama on the host: `http://127.0.0.1:11434`.
+- AI_MODEL: Model tag (e.g. `llama3.1`, `qwen2.5:3b`). Pulled automatically on startup by `ollama-init` when using the bundled stack.
+- AI_API_KEY: Required when `AI_PROVIDER=openai_compatible`. Empty for Ollama.
+- AI_TIMEOUT_SECONDS: HTTP timeout for provider requests. Default in root compose: `60`.
+- AI_CONTEXT_LIMIT: Max recent time entries included in context. Default: `40`.
+- OLLAMA_KEEP_ALIVE: Passed to the `ollama` service (how long models stay loaded). Default: `5m`.
+
 ### Analytics & Telemetry (optional)
 - SENTRY_DSN: Sentry DSN (empty by default).
 - SENTRY_TRACES_RATE: 0.0–1.0 sampling rate. Default: `0.0`.
diff --git a/env.example b/env.example
index 3a9eefc9..7d6e4be4 100644
--- a/env.example
+++ b/env.example
@@ -40,13 +40,15 @@ IDLE_TIMEOUT_MINUTES=30
 # SMART_NOTIFY_SCHEDULER_SLOT_MINUTES=30
 
 # AI helper (server-side; API keys are never sent to clients)
-# AI_ENABLED=false
-# AI_PROVIDER=ollama
-# AI_BASE_URL=http://127.0.0.1:11434
-# AI_MODEL=llama3.1
-# AI_API_KEY=
-# AI_TIMEOUT_SECONDS=30
-# AI_CONTEXT_LIMIT=40
+# Defaults below match the bundled Ollama service in docker-compose.yml.
+AI_ENABLED=true
+AI_PROVIDER=ollama
+AI_BASE_URL=http://ollama:11434
+AI_MODEL=llama3.1
+# AI_API_KEY=          # only needed when AI_PROVIDER=openai_compatible
+AI_TIMEOUT_SECONDS=60
+AI_CONTEXT_LIMIT=40
+# OLLAMA_KEEP_ALIVE=5m  # how long Ollama keeps the model resident in memory
 
 # API token rate limits (per token; Redis recommended for multi-worker)
 # API_TOKEN_RATE_LIMIT_PER_MINUTE=100