From 98e5291afc0c75697e253d893f264f2dcfd1e456 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 22 Jul 2025 16:31:04 +0200
Subject: [PATCH] feat: refactor build process, drop embedded backends (#5875)

* feat: split remaining backends and drop embedded backends

- Drop silero-vad, huggingface, and stores backend from embedded
  binaries
- Refactor Makefile and Dockerfile to avoid building grpc backends
- Drop golang code that was used to embed backends
- Simplify building by using goreleaser

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(gallery): be specific with llama-cpp backend templates

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(docs): update

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(ci): minor fixes

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore: drop all ffmpeg references

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix: run protogen-go

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Always enable p2p mode

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Update gorelease file

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix(stores): do not always load

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fix linting issues

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Simplify

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Mac OS fixup

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .devcontainer-scripts/poststart.sh            |   3 -
 .devcontainer/docker-compose-devcontainer.yml |   3 -
 .env                                          |   7 -
 .github/workflows/backend.yml                 | 169 +++++---
 .github/workflows/build-test.yaml             |  23 +
 .github/workflows/deploy-explorer.yaml        |   2 +-
 .github/workflows/image-pr.yml                |  11 +-
 .github/workflows/image.yml                   |  11 -
 .github/workflows/image_build.yml             |   6 -
 .github/workflows/notify-models.yaml          |   2 +-
 .github/workflows/release.yaml                | 399 +-----------------
 .github/workflows/test.yml                    |   9 +-
 .gitignore                                    |   2 +
 .goreleaser.yaml                              |  33 ++
 .vscode/launch.json                           |   2 +-
 Dockerfile                                    |  10 +-
 Earthfile                                     |   5 -
 Makefile                                      | 221 ++++------
 assets.go                                     |  15 -
 backend/cpp/llama-cpp/run.sh                  |   2 +-
 backend/go/huggingface/Makefile               |   9 +
 .../langchain => huggingface}/langchain.go    |   0
 .../go/{llm/langchain => huggingface}/main.go |   0
 backend/go/huggingface/package.sh             |  12 +
 backend/go/huggingface/run.sh                 |   6 +
 backend/go/local-store/Makefile               |   9 +
 backend/go/{stores => local-store}/debug.go   |   0
 backend/go/{stores => local-store}/main.go    |   0
 backend/go/local-store/package.sh             |  12 +
 .../go/{stores => local-store}/production.go  |   0
 backend/go/local-store/run.sh                 |   6 +
 backend/go/{stores => local-store}/store.go   |   6 +-
 backend/go/silero-vad/Makefile                |  47 +++
 backend/go/{vad/silero => silero-vad}/main.go |   0
 backend/go/silero-vad/package.sh              |  53 +++
 backend/go/silero-vad/run.sh                  |  14 +
 backend/go/{vad/silero => silero-vad}/vad.go  |   0
 backend/index.yaml                            |  50 ++-
 core/application/startup.go                   |  19 -
 core/backend/options.go                       |   1 -
 core/backend/stores.go                        |  10 +-
 core/cli/context/context.go                   |   7 -
 core/cli/run.go                               |   5 -
 core/cli/soundgeneration.go                   |   5 +-
 core/cli/transcript.go                        |  18 +-
 core/cli/tts.go                               |  16 +-
 core/cli/worker/worker.go                     |   2 +-
 core/cli/worker/worker_llamacpp.go            |  11 +-
 core/cli/worker/worker_nop2p.go               |  16 -
 core/cli/worker/worker_p2p.go                 |  15 +-
 core/config/application_config.go             |  23 -
 core/gallery/models.go                        |   3 +-
 core/http/app_test.go                         |  31 +-
 core/http/endpoints/localai/stores.go         |   8 +-
 core/http/endpoints/localai/system.go         |   5 +-
 core/http/endpoints/localai/welcome.go        |   2 -
 core/http/routes/localai.go                   |   7 +-
 core/http/routes/ui.go                        |  69 ++-
 core/http/routes/ui_backend_gallery.go        |   2 -
 core/http/routes/ui_gallery.go                |   5 +-
 core/http/views/explorer.html                 |   2 +-
 core/http/views/p2p.html                      |   4 +-
 core/http/views/partials/navbar.html          |   4 -
 core/p2p/federated_server.go                  |   3 -
 core/p2p/p2p.go                               |   7 -
 core/p2p/p2p_disabled.go                      |  35 --
 core/schema/localai.go                        |   7 +
 docker-compose.yaml                           |   2 +-
 docs/content/docs/advanced/advanced-usage.md  |  28 +-
 .../content/docs/features/GPU-acceleration.md |   8 +-
 docs/content/docs/features/embeddings.md      |   1 -
 .../docs/getting-started/container-images.md  |   2 -
 .../docs/getting-started/customize-model.md   |   4 +-
 gallery/alpaca.yaml                           |   1 +
 gallery/arch-function.yaml                    |   1 +
 gallery/chatml-hercules.yaml                  |   1 +
 gallery/chatml.yaml                           |   1 +
 gallery/command-r.yaml                        |   1 +
 gallery/deephermes.yaml                       |   1 +
 gallery/deepseek-r1.yaml                      |   1 +
 gallery/deepseek.yaml                         |   1 +
 gallery/falcon3.yaml                          |   1 +
 gallery/gemma.yaml                            |   1 +
 gallery/granite.yaml                          |   1 +
 gallery/granite3-2.yaml                       |   1 +
 gallery/hermes-2-pro-mistral.yaml             |   1 +
 gallery/llama3-instruct.yaml                  |   1 +
 gallery/llama3.1-instruct-grammar.yaml        |   1 +
 gallery/llama3.1-instruct.yaml                |   1 +
 gallery/llama3.1-reflective.yaml              |   1 +
 gallery/llama3.2-fcall.yaml                   |   1 +
 gallery/llama3.2-quantized.yaml               |   1 +
 gallery/mathstral.yaml                        |   1 +
 gallery/mistral-0.3.yaml                      |   1 +
 gallery/moondream.yaml                        |   1 +
 gallery/mudler.yaml                           |   1 +
 gallery/phi-2-chat.yaml                       |   1 +
 gallery/phi-2-orange.yaml                     |   1 +
 gallery/phi-3-chat.yaml                       |   1 +
 gallery/phi-4-chat-fcall.yaml                 |   1 +
 gallery/phi-4-chat.yaml                       |   1 +
 gallery/qwen-fcall.yaml                       |   1 +
 gallery/qwen3-openbuddy.yaml                  |   1 +
 gallery/qwen3.yaml                            |   1 +
 gallery/rwkv.yaml                             |   1 +
 gallery/smolvlm.yaml                          |   1 +
 gallery/tuluv2.yaml                           |   1 +
 gallery/vllm.yaml                             |   1 +
 gallery/wizardlm2.yaml                        |   1 +
 go.mod                                        |  21 +-
 go.sum                                        |  39 --
 main.go                                       |   3 -
 pkg/assets/extract.go                         |  64 ---
 pkg/assets/list.go                            |  27 --
 pkg/library/dynaload.go                       |  86 ----
 pkg/model/initializers.go                     | 128 +-----
 pkg/model/loader_options.go                   |   7 -
 tests/integration/stores_test.go              |  13 -
 118 files changed, 631 insertions(+), 1339 deletions(-)
 create mode 100644 .github/workflows/build-test.yaml
 create mode 100644 .goreleaser.yaml
 delete mode 100644 Earthfile
 delete mode 100644 assets.go
 create mode 100644 backend/go/huggingface/Makefile
 rename backend/go/{llm/langchain => huggingface}/langchain.go (100%)
 rename backend/go/{llm/langchain => huggingface}/main.go (100%)
 create mode 100755 backend/go/huggingface/package.sh
 create mode 100755 backend/go/huggingface/run.sh
 create mode 100644 backend/go/local-store/Makefile
 rename backend/go/{stores => local-store}/debug.go (100%)
 rename backend/go/{stores => local-store}/main.go (100%)
 create mode 100755 backend/go/local-store/package.sh
 rename backend/go/{stores => local-store}/production.go (100%)
 create mode 100755 backend/go/local-store/run.sh
 rename backend/go/{stores => local-store}/store.go (99%)
 create mode 100644 backend/go/silero-vad/Makefile
 rename backend/go/{vad/silero => silero-vad}/main.go (100%)
 create mode 100755 backend/go/silero-vad/package.sh
 create mode 100755 backend/go/silero-vad/run.sh
 rename backend/go/{vad/silero => silero-vad}/vad.go (100%)
 delete mode 100644 core/cli/worker/worker_nop2p.go
 delete mode 100644 core/p2p/p2p_disabled.go
 delete mode 100644 pkg/assets/extract.go
 delete mode 100644 pkg/assets/list.go
 delete mode 100644 pkg/library/dynaload.go

diff --git a/.devcontainer-scripts/poststart.sh b/.devcontainer-scripts/poststart.sh
index 196e821db..7e65b4c7f 100644
--- a/.devcontainer-scripts/poststart.sh
+++ b/.devcontainer-scripts/poststart.sh
@@ -2,9 +2,6 @@
 
 cd /workspace
 
-# Grab the pre-stashed backend assets to avoid build issues
-cp -r /build/backend-assets /workspace/backend-assets
-
 # Ensures generated source files are present upon load
 make prepare
 
diff --git a/.devcontainer/docker-compose-devcontainer.yml b/.devcontainer/docker-compose-devcontainer.yml
index 65e9b5c1b..81610ade5 100644
--- a/.devcontainer/docker-compose-devcontainer.yml
+++ b/.devcontainer/docker-compose-devcontainer.yml
@@ -4,9 +4,6 @@ services:
       context: ..
       dockerfile: Dockerfile
       target: devcontainer
-      args:
-      - FFMPEG=true
-      - GO_TAGS=p2p tts
     env_file:
       - ../.env
     ports:
diff --git a/.env b/.env
index b0d1a2ad2..53d796bc1 100644
--- a/.env
+++ b/.env
@@ -41,13 +41,6 @@
 ## Uncomment and set to true to enable rebuilding from source
 # REBUILD=true
 
-## Enable go tags, available: p2p, tts
-## p2p: enable distributed inferencing
-## tts: enables text-to-speech with go-piper 
-## (requires REBUILD=true)
-#
-# GO_TAGS=p2p
-
 ## Path where to store generated images
 # LOCALAI_IMAGE_PATH=/tmp/generated/images
 
diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index cf7536e81..fe08deb2c 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -43,7 +43,7 @@ jobs:
             cuda-major-version: "11"
             cuda-minor-version: "7"
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-11-rerankers'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -55,7 +55,7 @@ jobs:
             cuda-major-version: "11"
             cuda-minor-version: "7"
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-11-llama-cpp'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -67,7 +67,7 @@ jobs:
             cuda-major-version: "11"
             cuda-minor-version: "7"
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-11-vllm'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -79,7 +79,7 @@ jobs:
             cuda-major-version: "11"
             cuda-minor-version: "7"
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-11-transformers'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -91,7 +91,7 @@ jobs:
             cuda-major-version: "11"
             cuda-minor-version: "7"
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-11-diffusers'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -104,7 +104,7 @@ jobs:
             cuda-major-version: "11"
             cuda-minor-version: "7"
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-11-kokoro'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -116,7 +116,7 @@ jobs:
             cuda-major-version: "11"
             cuda-minor-version: "7"
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -128,7 +128,7 @@ jobs:
             cuda-major-version: "11"
             cuda-minor-version: "7"
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-11-coqui'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -140,7 +140,7 @@ jobs:
             cuda-major-version: "11"
             cuda-minor-version: "7"
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-11-bark'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -152,7 +152,7 @@ jobs:
             cuda-major-version: "11"
             cuda-minor-version: "7"
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-11-chatterbox'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -165,7 +165,7 @@ jobs:
             cuda-major-version: "12"
             cuda-minor-version: "0"
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-rerankers'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -177,7 +177,7 @@ jobs:
             cuda-major-version: "12"
             cuda-minor-version: "0"
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -189,7 +189,7 @@ jobs:
             cuda-major-version: "12"
             cuda-minor-version: "0"
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-vllm'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -201,7 +201,7 @@ jobs:
             cuda-major-version: "12"
             cuda-minor-version: "0"
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-transformers'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -213,7 +213,7 @@ jobs:
             cuda-major-version: "12"
             cuda-minor-version: "0"
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -226,7 +226,7 @@ jobs:
             cuda-major-version: "12"
             cuda-minor-version: "0"
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -238,7 +238,7 @@ jobs:
             cuda-major-version: "12"
             cuda-minor-version: "0"
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -250,7 +250,7 @@ jobs:
             cuda-major-version: "12"
             cuda-minor-version: "0"
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-coqui'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -262,7 +262,7 @@ jobs:
             cuda-major-version: "12"
             cuda-minor-version: "0"
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-bark'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -274,7 +274,7 @@ jobs:
             cuda-major-version: "12"
             cuda-minor-version: "0"
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -287,7 +287,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-rerankers'
             runs-on: 'ubuntu-latest'
             base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -299,7 +299,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-llama-cpp'
             runs-on: 'ubuntu-latest'
             base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -311,7 +311,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-vllm'
             runs-on: 'ubuntu-latest'
             base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -323,7 +323,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-transformers'
             runs-on: 'ubuntu-latest'
             base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -335,7 +335,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-diffusers'
             runs-on: 'ubuntu-latest'
             base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -348,7 +348,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-kokoro'
             runs-on: 'ubuntu-latest'
             base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -360,7 +360,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-faster-whisper'
             runs-on: 'ubuntu-latest'
             base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -372,7 +372,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-coqui'
             runs-on: 'ubuntu-latest'
             base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -384,7 +384,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-bark'
             runs-on: 'ubuntu-latest'
             base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -397,7 +397,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f32-rerankers'
             runs-on: 'ubuntu-latest'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -409,7 +409,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f16-rerankers'
             runs-on: 'ubuntu-latest'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -421,7 +421,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f32-llama-cpp'
             runs-on: 'ubuntu-latest'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -433,7 +433,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f16-llama-cpp'
             runs-on: 'ubuntu-latest'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -445,7 +445,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f32-vllm'
             runs-on: 'ubuntu-latest'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -457,7 +457,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f16-vllm'
             runs-on: 'ubuntu-latest'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -469,7 +469,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f32-transformers'
             runs-on: 'ubuntu-latest'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -481,7 +481,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f16-transformers'
             runs-on: 'ubuntu-latest'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -493,7 +493,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f32-diffusers'
             runs-on: 'ubuntu-latest'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -506,7 +506,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f32-kokoro'
             runs-on: 'ubuntu-latest'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -518,7 +518,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f16-kokoro'
             runs-on: 'ubuntu-latest'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -530,7 +530,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f32-faster-whisper'
             runs-on: 'ubuntu-latest'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -542,7 +542,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f16-faster-whisper'
             runs-on: 'ubuntu-latest'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -554,7 +554,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f32-coqui'
             runs-on: 'ubuntu-latest'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -566,7 +566,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f16-coqui'
             runs-on: 'ubuntu-latest'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -578,7 +578,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f32-bark'
             runs-on: 'ubuntu-latest'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -590,7 +590,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f16-bark'
             runs-on: 'ubuntu-latest'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -603,7 +603,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64,linux/arm64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-piper'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -616,7 +616,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-bark-cpp'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -628,7 +628,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64,linux/arm64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-cpu-llama-cpp'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -652,7 +652,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-vulkan-llama-cpp'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -665,7 +665,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-cpu-stablediffusion-ggml'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -677,7 +677,7 @@ jobs:
             cuda-major-version: "12"
             cuda-minor-version: "0"
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -689,7 +689,7 @@ jobs:
             cuda-major-version: "11"
             cuda-minor-version: "7"
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-11-stablediffusion-ggml'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -701,7 +701,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f32-stablediffusion-ggml'
             runs-on: 'ubuntu-latest'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -713,7 +713,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f16-stablediffusion-ggml'
             runs-on: 'ubuntu-latest'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -725,7 +725,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-vulkan-stablediffusion-ggml'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -749,8 +749,8 @@ jobs:
           - build-type: ''
             cuda-major-version: ""
             cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            platforms: 'linux/amd64,linux/arm64'
+            tag-latest: 'auto'
             tag-suffix: '-cpu-whisper'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -762,7 +762,7 @@ jobs:
             cuda-major-version: "12"
             cuda-minor-version: "0"
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-whisper'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -774,7 +774,7 @@ jobs:
             cuda-major-version: "11"
             cuda-minor-version: "7"
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-11-whisper'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -786,7 +786,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f32-whisper'
             runs-on: 'ubuntu-latest'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -798,7 +798,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f16-whisper'
             runs-on: 'ubuntu-latest'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -810,7 +810,7 @@ jobs:
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
             tag-suffix: '-gpu-vulkan-whisper'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
@@ -842,6 +842,45 @@ jobs:
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.go"
             context: "./"
+          #silero-vad
+          - build-type: ''
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64,linux/arm64'
+            tag-latest: 'auto'
+            tag-suffix: '-cpu-silero-vad'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "silero-vad"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"
+          # local-store
+          - build-type: ''
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64,linux/arm64'
+            tag-latest: 'auto'
+            tag-suffix: '-cpu-local-store'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "local-store"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"
+          # huggingface
+          - build-type: ''
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64,linux/arm64'
+            tag-latest: 'auto'
+            tag-suffix: '-huggingface'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "huggingface"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"  
   llama-cpp-darwin:
     runs-on: macOS-14
     strategy:
@@ -866,7 +905,7 @@ jobs:
       - name: Build llama-cpp-darwin
         run: |
           make protogen-go
-          make build-api
+          make build
           bash scripts/build-llama-cpp-darwin.sh
           ls -la build/darwin.tar
           mv build/darwin.tar build/llama-cpp.tar
@@ -954,7 +993,7 @@ jobs:
       - name: Build llama-cpp-darwin
         run: |
           make protogen-go
-          make build-api
+          make build
           export PLATFORMARCH=darwin/amd64
           bash scripts/build-llama-cpp-darwin.sh
           ls -la build/darwin.tar
diff --git a/.github/workflows/build-test.yaml b/.github/workflows/build-test.yaml
new file mode 100644
index 000000000..095b41822
--- /dev/null
+++ b/.github/workflows/build-test.yaml
@@ -0,0 +1,23 @@
+name: Build test
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+
+jobs:
+  build-test:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Set up Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: 1.23
+      - name: Run GoReleaser
+        run: |
+          make dev-dist
diff --git a/.github/workflows/deploy-explorer.yaml b/.github/workflows/deploy-explorer.yaml
index 9a6d729d9..c2063247f 100644
--- a/.github/workflows/deploy-explorer.yaml
+++ b/.github/workflows/deploy-explorer.yaml
@@ -31,7 +31,7 @@ jobs:
           make protogen-go
       - name: Build api
         run: |
-          CGO_ENABLED=0 make build-api
+          CGO_ENABLED=0 make build
       - name: rm
         uses: appleboy/ssh-action@v1.2.2
         with:
diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
index 0a3ed2708..262412237 100644
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -14,7 +14,6 @@ jobs:
     with:
       tag-latest: ${{ matrix.tag-latest }}
       tag-suffix: ${{ matrix.tag-suffix }}
-      ffmpeg: ${{ matrix.ffmpeg }}
       build-type: ${{ matrix.build-type }}
       cuda-major-version: ${{ matrix.cuda-major-version }}
       cuda-minor-version: ${{ matrix.cuda-minor-version }}
@@ -40,8 +39,7 @@ jobs:
             cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'false'
-            tag-suffix: '-gpu-nvidia-cuda12-ffmpeg'
-            ffmpeg: 'true'
+            tag-suffix: '-gpu-nvidia-cuda12'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
             makeflags: "--jobs=3 --output-sync=target"
@@ -49,7 +47,6 @@ jobs:
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-hipblas'
-            ffmpeg: 'false'
             base-image: "rocm/dev-ubuntu-22.04:6.1"
             grpc-base-image: "ubuntu:22.04"
             runs-on: 'ubuntu-latest'
@@ -59,15 +56,13 @@ jobs:
             tag-latest: 'false'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
             grpc-base-image: "ubuntu:22.04"
-            tag-suffix: 'sycl-f16-ffmpeg'
-            ffmpeg: 'true'
+            tag-suffix: 'sycl-f16'
             runs-on: 'ubuntu-latest'
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'vulkan'
             platforms: 'linux/amd64'
             tag-latest: 'false'
-            tag-suffix: '-vulkan-ffmpeg-core'
-            ffmpeg: 'true'
+            tag-suffix: '-vulkan-core'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
             makeflags: "--jobs=4 --output-sync=target"
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 7339038c3..f97fda6a5 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -18,7 +18,6 @@ jobs:
     with:
       tag-latest: ${{ matrix.tag-latest }}
       tag-suffix: ${{ matrix.tag-suffix }}
-      ffmpeg: ${{ matrix.ffmpeg }}
       build-type: ${{ matrix.build-type }}
       cuda-major-version: ${{ matrix.cuda-major-version }}
       cuda-minor-version: ${{ matrix.cuda-minor-version }}
@@ -40,7 +39,6 @@ jobs:
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-hipblas'
-            ffmpeg: 'true'
             base-image: "rocm/dev-ubuntu-22.04:6.1"
             grpc-base-image: "ubuntu:22.04"
             runs-on: 'ubuntu-latest'
@@ -52,7 +50,6 @@ jobs:
     with:
       tag-latest: ${{ matrix.tag-latest }}
       tag-suffix: ${{ matrix.tag-suffix }}
-      ffmpeg: ${{ matrix.ffmpeg }}
       build-type: ${{ matrix.build-type }}
       cuda-major-version: ${{ matrix.cuda-major-version }}
       cuda-minor-version: ${{ matrix.cuda-minor-version }}
@@ -76,7 +73,6 @@ jobs:
             platforms: 'linux/amd64,linux/arm64'
             tag-latest: 'auto'
             tag-suffix: ''
-            ffmpeg: 'true'
             base-image: "ubuntu:22.04"
             runs-on: 'ubuntu-latest'
             aio: "-aio-cpu"
@@ -88,7 +84,6 @@ jobs:
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda11'
-            ffmpeg: 'true'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
             makeflags: "--jobs=4 --output-sync=target"
@@ -100,7 +95,6 @@ jobs:
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda12'
-            ffmpeg: 'true'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
             skip-drivers: 'false'
@@ -110,7 +104,6 @@ jobs:
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-vulkan'
-            ffmpeg: 'true'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
             skip-drivers: 'false'
@@ -122,7 +115,6 @@ jobs:
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
             grpc-base-image: "ubuntu:22.04"
             tag-suffix: '-gpu-intel-f16'
-            ffmpeg: 'true'
             runs-on: 'ubuntu-latest'
             makeflags: "--jobs=3 --output-sync=target"
             aio: "-aio-gpu-intel-f16"
@@ -132,7 +124,6 @@ jobs:
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
             grpc-base-image: "ubuntu:22.04"
             tag-suffix: '-gpu-intel-f32'
-            ffmpeg: 'true'
             runs-on: 'ubuntu-latest'
             makeflags: "--jobs=3 --output-sync=target"
             aio: "-aio-gpu-intel-f32"
@@ -142,7 +133,6 @@ jobs:
     with:
       tag-latest: ${{ matrix.tag-latest }}
       tag-suffix: ${{ matrix.tag-suffix }}
-      ffmpeg: ${{ matrix.ffmpeg }}
       build-type: ${{ matrix.build-type }}
       cuda-major-version: ${{ matrix.cuda-major-version }}
       cuda-minor-version: ${{ matrix.cuda-minor-version }}
@@ -167,7 +157,6 @@ jobs:
             platforms: 'linux/arm64'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-arm64'
-            ffmpeg: 'true'
             base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
             runs-on: 'ubuntu-24.04-arm'
             makeflags: "--jobs=4 --output-sync=target"
diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
index fe021823b..4e1e19c42 100644
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -37,10 +37,6 @@ on:
         description: 'Tag suffix'
         default: ''
         type: string
-      ffmpeg:
-        description: 'FFMPEG'
-        default: ''
-        type: string
       skip-drivers:
         description: 'Skip drivers by default'
         default: 'false'
@@ -236,7 +232,6 @@ jobs:
             BUILD_TYPE=${{ inputs.build-type }}
             CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
             CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
-            FFMPEG=${{ inputs.ffmpeg }}
             BASE_IMAGE=${{ inputs.base-image }}
             GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
             GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
@@ -264,7 +259,6 @@ jobs:
             BUILD_TYPE=${{ inputs.build-type }}
             CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
             CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
-            FFMPEG=${{ inputs.ffmpeg }}
             BASE_IMAGE=${{ inputs.base-image }}
             GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
             GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
diff --git a/.github/workflows/notify-models.yaml b/.github/workflows/notify-models.yaml
index f54b4852d..56846cc86 100644
--- a/.github/workflows/notify-models.yaml
+++ b/.github/workflows/notify-models.yaml
@@ -96,7 +96,7 @@ jobs:
     - name: Start LocalAI
       run: |
         echo "Starting LocalAI..."
-        docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
+        docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master run --debug $MODEL_NAME
         until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready";  docker logs --tail 10 local-ai; sleep 2; done
       # Check the PR diff using the current branch and the base branch of the PR
     - uses: GrantBirki/git-diff-action@v2.8.1
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 366f330ed..96495a1bf 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -1,399 +1,26 @@
-name: Build and Release
+name: goreleaser
 
 on:
   push:
-    branches:
-      - master
     tags:
       - 'v*'
-  pull_request:
-
-env:
-  GRPC_VERSION: v1.65.0
-
-permissions:
-  contents: write
-
-concurrency:
-  group: ci-releases-${{ github.head_ref || github.ref }}-${{ github.repository }}
-  cancel-in-progress: true
 
 jobs:
-
-  # TODO: temporary disable linux-arm64 build
-  # build-linux-arm:
-  #   runs-on: ubuntu-24.04-arm
-  #   steps:
-  #     - name: Free Disk Space (Ubuntu)
-  #       uses: jlumbroso/free-disk-space@main
-  #       with:
-  #         # this might remove tools that are actually needed,
-  #         # if set to "true" but frees about 6 GB
-  #         tool-cache: true
-  #         # all of these default to true, but feel free to set to
-  #         # "false" if necessary for your workflow
-  #         android: true
-  #         dotnet: true
-  #         haskell: true
-  #         large-packages: true
-  #         docker-images: true
-  #         swap-storage: true
-
-  #     - name: Release space from worker
-  #       run: |
-  #         echo "Listing top largest packages"
-  #         pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-  #         head -n 30 <<< "${pkgs}"
-  #         echo
-  #         df -h
-  #         echo
-  #         sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
-  #         sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true
-  #         sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true
-  #         sudo rm -rf /usr/local/lib/android
-  #         sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
-  #         sudo rm -rf /usr/share/dotnet
-  #         sudo apt-get remove -y '^mono-.*' || true
-  #         sudo apt-get remove -y '^ghc-.*' || true
-  #         sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
-  #         sudo apt-get remove -y 'php.*' || true
-  #         sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
-  #         sudo apt-get remove -y '^google-.*' || true
-  #         sudo apt-get remove -y azure-cli || true
-  #         sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
-  #         sudo apt-get remove -y '^gfortran-.*' || true
-  #         sudo apt-get remove -y microsoft-edge-stable || true
-  #         sudo apt-get remove -y firefox || true
-  #         sudo apt-get remove -y powershell || true
-  #         sudo apt-get remove -y r-base-core || true
-  #         sudo apt-get autoremove -y
-  #         sudo apt-get clean
-  #         echo
-  #         echo "Listing top largest packages"
-  #         pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-  #         head -n 30 <<< "${pkgs}"
-  #         echo
-  #         sudo rm -rfv build || true
-  #         sudo rm -rf /usr/share/dotnet || true
-  #         sudo rm -rf /opt/ghc || true
-  #         sudo rm -rf "/usr/local/share/boost" || true
-  #         sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
-  #         df -h
-
-  #     - name: Force Install GIT latest
-  #       run: |
-  #         sudo apt-get update \
-  #         && sudo apt-get install -y software-properties-common \
-  #         && sudo apt-get update \
-  #         && sudo add-apt-repository -y ppa:git-core/ppa \
-  #         && sudo apt-get update \
-  #         && sudo apt-get install -y git
-  #     - name: Clone
-  #       uses: actions/checkout@v4
-  #       with:
-  #         submodules: true
-  #     - uses: actions/setup-go@v5
-  #       with:
-  #         go-version: '1.21.x'
-  #         cache: false
-  #     - name: Dependencies
-  #       run: |
-  #         sudo apt-get update
-  #         sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
-  #         make install-go-tools
-  #     - name: Install CUDA Dependencies
-  #       run: |
-  #         curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
-  #         sudo dpkg -i cuda-keyring_1.1-1_all.deb
-  #         sudo apt-get update
-  #         sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
-  #       env:
-  #         CUDA_VERSION: 12-5
-  #     - name: Cache grpc
-  #       id: cache-grpc
-  #       uses: actions/cache@v4
-  #       with:
-  #         path: grpc
-  #         key: ${{ runner.os }}-grpc-arm64-${{ env.GRPC_VERSION }}
-  #     - name: Build grpc
-  #       if: steps.cache-grpc.outputs.cache-hit != 'true'
-  #       run: |
-  #         git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
-  #         cd grpc && sed -i "216i\  TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
-  #         cd cmake/build && cmake -DgRPC_INSTALL=ON \
-  #           -DgRPC_BUILD_TESTS=OFF \
-  #           ../.. && sudo make --jobs 5 --output-sync=target
-  #     - name: Install gRPC
-  #       run: |
-  #         cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
-  #     # BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
-  #     - name: Build
-  #       id: build
-  #       run: |
-  #         go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-  #         go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-  #         export PATH=$PATH:$GOPATH/bin
-  #         export PATH=/usr/local/cuda/bin:$PATH
-  #         sudo cp /lib64/ld-linux-aarch64.so.1 ld.so
-  #         BACKEND_LIBS="./ld.so ./sources/go-piper/piper/build/fi/lib/libfmt.a ./sources/go-piper/piper-phonemize/pi/lib/libonnxruntime.so.1.14.1 ./sources/go-piper/piper-phonemize/pi/src/libespeak-ng/libespeak-ng.so /usr/lib/aarch64-linux-gnu/libdl.so.2 /usr/lib/aarch64-linux-gnu/librt.so.1 /usr/lib/aarch64-linux-gnu/libpthread.so.0 ./sources/go-piper/piper-phonemize/pi/lib/libpiper_phonemize.so.1 ./sources/go-piper/piper/build/si/lib/libspdlog.a ./sources/go-piper/espeak/ei/lib/libucd.so" \
-  #         make -j4 dist
-  #     - uses: actions/upload-artifact@v4
-  #       with:
-  #         name: LocalAI-linux-arm64
-  #         path: release/
-  #     - name: Release
-  #       uses: softprops/action-gh-release@v2
-  #       if: startsWith(github.ref, 'refs/tags/')
-  #       with:
-  #         files: |
-  #           release/*
-  #     - name: Setup tmate session if tests fail
-  #       if: ${{ failure() }}
-  #       uses: mxschmitt/action-tmate@v3.22
-  #       with:
-  #         detached: true
-  #         connect-timeout-seconds: 180
-  #         limit-access-to-actor: true
-  build-linux:
+  goreleaser:
     runs-on: ubuntu-latest
     steps:
-      - name: Free Disk Space (Ubuntu)
-        uses: jlumbroso/free-disk-space@main
-        with:
-          # this might remove tools that are actually needed,
-          # if set to "true" but frees about 6 GB
-          tool-cache: true
-          # all of these default to true, but feel free to set to
-          # "false" if necessary for your workflow
-          android: true
-          dotnet: true
-          haskell: true
-          large-packages: true
-          docker-images: true
-          swap-storage: true
-
-      - name: Release space from worker
-        run: |
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          df -h
-          echo
-          sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
-          sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true
-          sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true
-          sudo rm -rf /usr/local/lib/android
-          sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
-          sudo rm -rf /usr/share/dotnet
-          sudo apt-get remove -y '^mono-.*' || true
-          sudo apt-get remove -y '^ghc-.*' || true
-          sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
-          sudo apt-get remove -y 'php.*' || true
-          sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
-          sudo apt-get remove -y '^google-.*' || true
-          sudo apt-get remove -y azure-cli || true
-          sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
-          sudo apt-get remove -y '^gfortran-.*' || true
-          sudo apt-get remove -y microsoft-edge-stable || true
-          sudo apt-get remove -y firefox || true
-          sudo apt-get remove -y powershell || true
-          sudo apt-get remove -y r-base-core || true
-          sudo apt-get autoremove -y
-          sudo apt-get clean
-          echo
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          sudo rm -rfv build || true
-          sudo rm -rf /usr/share/dotnet || true
-          sudo rm -rf /opt/ghc || true
-          sudo rm -rf "/usr/local/share/boost" || true
-          sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
-          df -h
-
-      - name: Force Install GIT latest
-        run: |
-          sudo apt-get update \
-          && sudo apt-get install -y software-properties-common \
-          && sudo apt-get update \
-          && sudo add-apt-repository -y ppa:git-core/ppa \
-          && sudo apt-get update \
-          && sudo apt-get install -y git
-      - name: Clone
+      - name: Checkout
         uses: actions/checkout@v4
         with:
-          submodules: true
-      - uses: actions/setup-go@v5
+          fetch-depth: 0
+      - name: Set up Go
+        uses: actions/setup-go@v5
         with:
-          go-version: '1.21.x'
-          cache: false
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
-          make install-go-tools
-      - name: Intel Dependencies
-        run: |
-          wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
-          echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
-          sudo apt update
-          sudo apt install -y intel-basekit
-      - name: Install CUDA Dependencies
-        run: |
-          curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
-          sudo dpkg -i cuda-keyring_1.1-1_all.deb
-          sudo apt-get update
-          sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
+          go-version: 1.23
+      - name: Run GoReleaser
+        uses: goreleaser/goreleaser-action@v6
+        with:
+          version: v2.11.0
+          args: release --clean
         env:
-          CUDA_VERSION: 12-5
-      - name: "Install Hipblas"
-        env:
-          ROCM_VERSION: "6.1"
-          AMDGPU_VERSION: "6.1"
-        run: |
-            set -ex
-
-            sudo apt-get update
-            sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg
-
-            sudo apt update
-            wget https://repo.radeon.com/amdgpu-install/6.4.1/ubuntu/noble/amdgpu-install_6.4.60401-1_all.deb
-            sudo apt install ./amdgpu-install_6.4.60401-1_all.deb
-            sudo apt update
-
-            sudo amdgpu-install --usecase=rocm
-
-            sudo apt-get clean
-            sudo rm -rf /var/lib/apt/lists/*
-            sudo ldconfig
-      - name: Cache grpc
-        id: cache-grpc
-        uses: actions/cache@v4
-        with:
-          path: grpc
-          key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
-      - name: Build grpc
-        if: steps.cache-grpc.outputs.cache-hit != 'true'
-        run: |
-          git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
-          cd grpc && sed -i "216i\  TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
-          cd cmake/build && cmake -DgRPC_INSTALL=ON \
-            -DgRPC_BUILD_TESTS=OFF \
-            ../.. && sudo make --jobs 5 --output-sync=target
-      - name: Install gRPC
-        run: |
-          cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
-      # BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
-      - name: Build
-        id: build
-        run: |
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-          export PATH=$PATH:$GOPATH/bin
-          export PATH=/usr/local/cuda/bin:$PATH
-          export PATH=/opt/rocm/bin:$PATH
-          source /opt/intel/oneapi/setvars.sh
-          sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
-          make -j4 dist
-      - uses: actions/upload-artifact@v4
-        with:
-          name: LocalAI-linux
-          path: release/
-      - name: Release
-        uses: softprops/action-gh-release@v2
-        if: startsWith(github.ref, 'refs/tags/')
-        with:
-          files: |
-            release/*
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.22
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
-
-
-  build-macOS-x86_64:
-    runs-on: macos-13
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.21.x'
-          cache: false
-      - name: Dependencies
-        run: |
-          brew install protobuf grpc
-          make install-go-tools
-      - name: Build
-        id: build
-        run: |
-          export C_INCLUDE_PATH=/usr/local/include
-          export CPLUS_INCLUDE_PATH=/usr/local/include
-          export PATH=$PATH:$GOPATH/bin
-          export SKIP_GRPC_BACKEND=backend-assets/grpc/whisper
-          make dist
-      - uses: actions/upload-artifact@v4
-        with:
-          name: LocalAI-MacOS-x86_64
-          path: release/
-      - name: Release
-        uses: softprops/action-gh-release@v2
-        if: startsWith(github.ref, 'refs/tags/')
-        with:
-          files: |
-            release/*
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.22
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
-
-  build-macOS-arm64:
-    runs-on: macos-14
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.21.x'
-          cache: false
-      - name: Dependencies
-        run: |
-          brew install protobuf grpc libomp llvm
-          make install-go-tools
-      - name: Build
-        id: build
-        run: |
-          export C_INCLUDE_PATH=/usr/local/include
-          export CPLUS_INCLUDE_PATH=/usr/local/include
-          export PATH=$PATH:$GOPATH/bin
-          export CC=/opt/homebrew/opt/llvm/bin/clang
-          make dist
-      - uses: actions/upload-artifact@v4
-        with:
-          name: LocalAI-MacOS-arm64
-          path: release/
-      - name: Release
-        uses: softprops/action-gh-release@v2
-        if: startsWith(github.ref, 'refs/tags/')
-        with:
-          files: |
-            release/*
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.22
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 
\ No newline at end of file
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index cc6ef333d..8a3f89871 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -75,7 +75,6 @@ jobs:
           rm protoc.zip
           go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
           go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install github.com/GeertJohan/go.rice/rice@latest
           PATH="$PATH:$HOME/go/bin" make protogen-go
       - name: Dependencies
         run: |
@@ -103,7 +102,7 @@ jobs:
 
           make -C backend/python/transformers
 
-          make backends/llama-cpp backends/piper backends/whisper backends/stablediffusion-ggml
+          make backends/huggingface backends/llama-cpp backends/local-store backends/silero-vad backends/piper backends/whisper backends/stablediffusion-ggml
         env:
           CUDA_VERSION: 12-4
       - name: Test
@@ -164,11 +163,10 @@ jobs:
           rm protoc.zip
           go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
           go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install github.com/GeertJohan/go.rice/rice@latest
           PATH="$PATH:$HOME/go/bin" make protogen-go
       - name: Test
         run: |
-            PATH="$PATH:$HOME/go/bin" make backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
+            PATH="$PATH:$HOME/go/bin" make backends/local-store backends/silero-vad backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
       - name: Setup tmate session if tests fail
         if: ${{ failure() }}
         uses: mxschmitt/action-tmate@v3.22
@@ -199,11 +197,10 @@ jobs:
         run: |
           brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
           pip install --user --no-cache-dir grpcio-tools==1.71.0 grpcio==1.71.0
-          go install github.com/GeertJohan/go.rice/rice@latest
       - name: Build llama-cpp-darwin
         run: |
           make protogen-go
-          make build-api
+          make build
           bash scripts/build-llama-cpp-darwin.sh
           ls -la build/darwin.tar
           mv build/darwin.tar build/llama-cpp.tar
diff --git a/.gitignore b/.gitignore
index 1f160c8ce..f6d83447e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,8 @@ prepare-sources
 /backend/cpp/llama-*
 !backend/cpp/llama-cpp
 /backends
+/backend-images
+/result.yaml
 
 *.log
 
diff --git a/.goreleaser.yaml b/.goreleaser.yaml
new file mode 100644
index 000000000..5bd6aa0bc
--- /dev/null
+++ b/.goreleaser.yaml
@@ -0,0 +1,33 @@
+version: 2
+before:
+  hooks:
+    - make protogen-go
+    - go mod tidy
+dist: release
+source:
+  enabled: true
+  name_template: '{{ .ProjectName }}-{{ .Tag }}-source'
+builds:
+  -
+    env:
+      - CGO_ENABLED=0
+    ldflags:
+      - -s -w
+      - -X "github.com/mudler/LocalAI/internal.Version={{ .Tag }}"
+      - -X "github.com/mudler/LocalAI/internal.Commit={{ .FullCommit }}"
+    goos:
+      - linux
+      - darwin
+      #- windows
+    goarch:
+      - amd64
+      - arm64
+archives:
+  - formats: [ 'binary' ] # this removes the tar of the archives, leaving the binaries alone
+    name_template: local-ai-{{ .Tag }}-{{ .Os }}-{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}
+checksum:
+  name_template: '{{ .ProjectName }}-{{ .Tag }}-checksums.txt'
+snapshot:
+  version_template: "{{ .Tag }}-next"
+changelog:
+  use: github-native
diff --git a/.vscode/launch.json b/.vscode/launch.json
index f5e91508e..55da767b4 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -26,7 +26,7 @@
                 "LOCALAI_P2P": "true",
                 "LOCALAI_FEDERATED": "true"
             },
-            "buildFlags": ["-tags", "p2p tts", "-v"],
+            "buildFlags": ["-tags", "", "-v"],
             "envFile": "${workspaceFolder}/.env",
             "cwd": "${workspaceRoot}"
         }
diff --git a/Dockerfile b/Dockerfile
index 91e8aea5a..4e8e29cb2 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -142,10 +142,9 @@ EOT
 RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
 ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
 
-# Install grpc compilers and rice
+# Install grpc compilers
 RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
-    go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af && \
-    go install github.com/GeertJohan/go.rice/rice@latest
+    go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
 
 COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
 RUN update-ca-certificates
@@ -194,7 +193,7 @@ RUN apt-get update && \
 
 FROM build-requirements AS builder-base
 
-ARG GO_TAGS="p2p"
+ARG GO_TAGS=""
 ARG GRPC_BACKENDS
 ARG MAKEFLAGS
 ARG LD_FLAGS="-s -w"
@@ -249,8 +248,7 @@ COPY ./pkg/utils ./pkg/utils
 COPY ./pkg/langchain ./pkg/langchain
 
 RUN ls -l ./
-RUN make backend-assets
-RUN make grpcs
+RUN make protogen-go
 
 # The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
 # Adjustments to the build process should likely be made here.
diff --git a/Earthfile b/Earthfile
deleted file mode 100644
index 218768c9a..000000000
--- a/Earthfile
+++ /dev/null
@@ -1,5 +0,0 @@
-VERSION 0.7
-
-build:
-    FROM DOCKERFILE -f Dockerfile .
-    SAVE ARTIFACT /usr/bin/local-ai AS LOCAL local-ai
diff --git a/Makefile b/Makefile
index 6f2be04d7..6f7f4fcb2 100644
--- a/Makefile
+++ b/Makefile
@@ -3,9 +3,7 @@ GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
 
-ONNX_VERSION?=1.20.0
-ONNX_ARCH?=x64
-ONNX_OS?=linux
+GORELEASER?=
 
 export BUILD_TYPE?=
 
@@ -35,77 +33,33 @@ WHITE  := $(shell tput -Txterm setaf 7)
 CYAN   := $(shell tput -Txterm setaf 6)
 RESET  := $(shell tput -Txterm sgr0)
 
-UPX?=
-# check if upx exists
-ifeq (, $(shell which upx))
-	UPX=
-else
-	UPX=$(shell which upx)
-endif
-
 # Default Docker bridge IP
 E2E_BRIDGE_IP?=172.17.0.1
 
 ifndef UNAME_S
 UNAME_S := $(shell uname -s)
 endif
-# Detect if we are running on arm64
-ifneq (,$(findstring aarch64,$(shell uname -m)))
-	ONNX_ARCH=aarch64
-endif
 
 ifeq ($(OS),Darwin)
-	ONNX_OS=osx
-	ifneq (,$(findstring aarch64,$(shell uname -m)))
-		ONNX_ARCH=arm64
-	else ifneq (,$(findstring arm64,$(shell uname -m)))
-		ONNX_ARCH=arm64
-	else
-		ONNX_ARCH=x86_64
-	endif
-
 	ifeq ($(OSX_SIGNING_IDENTITY),)
 		OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
 	endif
 endif
 
-ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
-ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
-ALL_GRPC_BACKENDS+=backend-assets/grpc/silero-vad
-ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
-# Use filter-out to remove the specified backends
-ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
+# check if goreleaser exists
+ifeq (, $(shell which goreleaser))
+	GORELEASER=curl -sfL https://goreleaser.com/static/run | bash -s --
+else
+	GORELEASER=$(shell which goreleaser)
+endif
 
-GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
 TEST_PATHS?=./api/... ./pkg/... ./core/...
 
-# If empty, then we build all
-ifeq ($(GRPC_BACKENDS),)
-	GRPC_BACKENDS=$(ALL_GRPC_BACKENDS)
-endif
-
-ifeq ($(BUILD_API_ONLY),true)
-	GRPC_BACKENDS=
-endif
 
 .PHONY: all test build vendor
 
 all: help
 
-sources/onnxruntime:
-	mkdir -p sources/onnxruntime
-	curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
-	cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
-	cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./
-
-backend-assets/lib/libonnxruntime.so.1: backend-assets/lib sources/onnxruntime
-	cp -rfv sources/onnxruntime/lib/* backend-assets/lib/
-ifeq ($(OS),Darwin)
-	mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib
-else
-	mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
-endif
-
 ## GENERIC
 rebuild: ## Rebuilds the project
 	$(GOCMD) clean -cache
@@ -116,58 +70,33 @@ clean: ## Remove build related file
 	rm -f prepare
 	rm -rf $(BINARY_NAME)
 	rm -rf release/
-	rm -rf backend-assets/*
-	$(MAKE) -C backend/cpp/grpc clean
 	$(MAKE) protogen-clean
 	rmdir pkg/grpc/proto || true
 
 clean-tests:
 	rm -rf test-models
 	rm -rf test-dir
-	rm -rf core/http/backend-assets
-
-clean-dc: clean
-	cp -r /build/backend-assets /workspace/backend-assets
 
 ## Install Go tools
 install-go-tools:
 	go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
 	go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-	go install github.com/GeertJohan/go.rice/rice@latest
 
 ## Build:
-build: backend-assets grpcs install-go-tools ## Build the project
+build: protogen-go install-go-tools ## Build the project
 	$(info ${GREEN}I local-ai build info:${RESET})
 	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
 	$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
 	$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
 	$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
-ifneq ($(BACKEND_LIBS),)
-	$(MAKE) backend-assets/lib
-	cp -f $(BACKEND_LIBS) backend-assets/lib/
-endif
 	rm -rf $(BINARY_NAME) || true
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
-	rice append --exec $(BINARY_NAME)
 
-build-api:
-	BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=p2p $(MAKE) build
-
-backend-assets/lib:
-	mkdir -p backend-assets/lib
+dev-dist:
+	$(GORELEASER) build --snapshot --clean
 
 dist:
-	GO_TAGS="p2p" $(MAKE) build
-	GO_TAGS="p2p" STATIC=true $(MAKE) build
-	mkdir -p release
-# if BUILD_ID is empty, then we don't append it to the binary name
-ifeq ($(BUILD_ID),)
-	cp $(BINARY_NAME) release/$(BINARY_NAME)-$(OS)-$(ARCH)
-	shasum -a 256 release/$(BINARY_NAME)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(OS)-$(ARCH).sha256
-else
-	cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)
-	shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH).sha256
-endif
+	$(GORELEASER) build --clean
 
 osx-signed: build
 	codesign --deep --force --sign "$(OSX_SIGNING_IDENTITY)" --entitlements "./Entitlements.plist" "./$(BINARY_NAME)"
@@ -185,8 +114,7 @@ test-models/testmodel.ggml:
 	wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
 	cp tests/models_fixtures/* test-models
 
-prepare-test: grpcs
-	cp -rf backend-assets core/http
+prepare-test: protogen-go
 	cp tests/models_fixtures/* test-models
 
 ########################################################
@@ -194,7 +122,7 @@ prepare-test: grpcs
 ########################################################
 
 ## Test targets
-test: test-models/testmodel.ggml grpcs
+test: test-models/testmodel.ggml protogen-go
 	@echo 'Running tests'
 	export GO_TAGS="debug"
 	$(MAKE) prepare-test
@@ -204,17 +132,26 @@ test: test-models/testmodel.ggml grpcs
 	$(MAKE) test-tts
 	$(MAKE) test-stablediffusion
 
-backends/llama-cpp: docker-build-llama-cpp docker-save-llama-cpp build-api
+backends/llama-cpp: docker-build-llama-cpp docker-save-llama-cpp build
 	./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"
 
-backends/piper: docker-build-piper docker-save-piper build-api
+backends/piper: docker-build-piper docker-save-piper build
 	./local-ai backends install "ocifile://$(abspath ./backend-images/piper.tar)"
 
-backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-stablediffusion-ggml build-api
+backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-stablediffusion-ggml build
 	./local-ai backends install "ocifile://$(abspath ./backend-images/stablediffusion-ggml.tar)"
 
-backends/whisper: docker-build-whisper docker-save-whisper build-api
+backends/whisper: docker-build-whisper docker-save-whisper build
 	./local-ai backends install "ocifile://$(abspath ./backend-images/whisper.tar)"
+	
+backends/silero-vad: docker-build-silero-vad docker-save-silero-vad build
+	./local-ai backends install "ocifile://$(abspath ./backend-images/silero-vad.tar)"
+
+backends/local-store: docker-build-local-store docker-save-local-store build
+	./local-ai backends install "ocifile://$(abspath ./backend-images/local-store.tar)"
+
+backends/huggingface: docker-build-huggingface docker-save-huggingface build
+	./local-ai backends install "ocifile://$(abspath ./backend-images/huggingface.tar)"
 
 ########################################################
 ## AIO tests
@@ -243,7 +180,7 @@ prepare-e2e:
 	mkdir -p $(TEST_DIR)
 	cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
 	test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
-	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 --build-arg FFMPEG=true -t localai-tests .
+	docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 -t localai-tests .
 
 run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)
@@ -275,9 +212,7 @@ test-stablediffusion: prepare-test
 	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
 
-test-stores: backend-assets/grpc/local-store
-	mkdir -p tests/integration/backend-assets/grpc
-	cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/
+test-stores:
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts $(TEST_FLAKES) -v -r tests/integration
 
 test-container:
@@ -310,10 +245,42 @@ protogen: protogen-go protogen-python
 .PHONY: protogen-clean
 protogen-clean: protogen-go-clean protogen-python-clean
 
+protoc:
+	@OS_NAME=$$(uname -s | tr '[:upper:]' '[:lower:]'); \
+	ARCH_NAME=$$(uname -m); \
+	if [ "$$OS_NAME" = "darwin" ]; then \
+	  if [ "$$ARCH_NAME" = "arm64" ]; then \
+	    FILE=protoc-31.1-osx-aarch_64.zip; \
+	  elif [ "$$ARCH_NAME" = "x86_64" ]; then \
+	    FILE=protoc-31.1-osx-x86_64.zip; \
+	  else \
+	    echo "Unsupported macOS architecture: $$ARCH_NAME"; exit 1; \
+	  fi; \
+	elif [ "$$OS_NAME" = "linux" ]; then \
+	  if [ "$$ARCH_NAME" = "x86_64" ]; then \
+	    FILE=protoc-31.1-linux-x86_64.zip; \
+	  elif [ "$$ARCH_NAME" = "aarch64" ] || [ "$$ARCH_NAME" = "arm64" ]; then \
+	    FILE=protoc-31.1-linux-aarch_64.zip; \
+	  elif [ "$$ARCH_NAME" = "ppc64le" ]; then \
+	    FILE=protoc-31.1-linux-ppcle_64.zip; \
+	  elif [ "$$ARCH_NAME" = "s390x" ]; then \
+	    FILE=protoc-31.1-linux-s390_64.zip; \
+	  elif [ "$$ARCH_NAME" = "i386" ] || [ "$$ARCH_NAME" = "x86" ]; then \
+	    FILE=protoc-31.1-linux-x86_32.zip; \
+	  else \
+	    echo "Unsupported Linux architecture: $$ARCH_NAME"; exit 1; \
+	  fi; \
+	else \
+	  echo "Unsupported OS: $$OS_NAME"; exit 1; \
+	fi; \
+	URL=https://github.com/protocolbuffers/protobuf/releases/download/v31.1/$$FILE; \
+	curl -L -s $$URL -o protoc.zip && \
+	unzip -j -d $(CURDIR) protoc.zip bin/protoc && rm protoc.zip
+
 .PHONY: protogen-go
-protogen-go: install-go-tools
+protogen-go: protoc install-go-tools
 	mkdir -p pkg/grpc/proto
-	protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
+	./protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
     backend/backend.proto
 
 .PHONY: protogen-go-clean
@@ -407,19 +374,6 @@ vllm-protogen:
 vllm-protogen-clean:
 	$(MAKE) -C backend/python/vllm protogen-clean
 
-## GRPC
-# Note: it is duplicated in the Dockerfile
-prepare-extra-conda-environments: protogen-python
-	$(MAKE) -C backend/python/bark
-	$(MAKE) -C backend/python/coqui
-	$(MAKE) -C backend/python/diffusers
-	$(MAKE) -C backend/python/chatterbox
-	$(MAKE) -C backend/python/faster-whisper
-	$(MAKE) -C backend/python/vllm
-	$(MAKE) -C backend/python/rerankers
-	$(MAKE) -C backend/python/transformers
-	$(MAKE) -C backend/python/kokoro
-	$(MAKE) -C backend/python/exllama2
 
 prepare-test-extra: protogen-python
 	$(MAKE) -C backend/python/transformers
@@ -433,37 +387,6 @@ test-extra: prepare-test-extra
 	$(MAKE) -C backend/python/chatterbox test
 	$(MAKE) -C backend/python/vllm test
 
-backend-assets:
-	mkdir -p backend-assets
-ifeq ($(BUILD_API_ONLY),true)
-	touch backend-assets/keep
-endif
-
-
-backend-assets/grpc:
-	mkdir -p backend-assets/grpc
-
-backend-assets/grpc/huggingface: protogen-go backend-assets/grpc
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/huggingface
-endif
-
-backend-assets/grpc/silero-vad: protogen-go backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/silero-vad
-endif
-
-backend-assets/grpc/local-store: backend-assets/grpc protogen-go
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/local-store
-endif
-
-grpcs: protogen-go $(GRPC_BACKENDS)
-
 DOCKER_IMAGE?=local-ai
 DOCKER_AIO_IMAGE?=local-ai-aio
 IMAGE_TYPE?=core
@@ -506,7 +429,6 @@ docker-image-intel:
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="$(GO_TAGS)" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
-		--build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" \
 		--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
 
 docker-image-intel-xpu:
@@ -515,7 +437,6 @@ docker-image-intel-xpu:
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="$(GO_TAGS)" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
-		--build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" \
 		--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
 
 ########################################################
@@ -534,6 +455,24 @@ docker-build-bark-cpp:
 docker-build-piper:
 	docker build -t local-ai-backend:piper -f backend/Dockerfile.go --build-arg BACKEND=piper .
 
+docker-build-local-store:
+	docker build -t local-ai-backend:local-store -f backend/Dockerfile.go --build-arg BACKEND=local-store .
+
+docker-build-huggingface:
+	docker build -t local-ai-backend:huggingface -f backend/Dockerfile.go --build-arg BACKEND=huggingface .
+
+docker-save-huggingface: backend-images
+	docker save local-ai-backend:huggingface -o backend-images/huggingface.tar
+
+docker-save-local-store: backend-images
+	docker save local-ai-backend:local-store -o backend-images/local-store.tar
+
+docker-build-silero-vad:
+	docker build -t local-ai-backend:silero-vad -f backend/Dockerfile.go --build-arg BACKEND=silero-vad .
+
+docker-save-silero-vad: backend-images
+	docker save local-ai-backend:silero-vad -o backend-images/silero-vad.tar
+
 docker-save-piper: backend-images
 	docker save local-ai-backend:piper -o backend-images/piper.tar
 
diff --git a/assets.go b/assets.go
deleted file mode 100644
index b3c813871..000000000
--- a/assets.go
+++ /dev/null
@@ -1,15 +0,0 @@
-package main
-
-import (
-	rice "github.com/GeertJohan/go.rice"
-)
-
-var backendAssets *rice.Box
-
-func init() {
-	var err error
-	backendAssets, err = rice.FindBox("backend-assets")
-	if err != nil {
-		panic(err)
-	}
-}
diff --git a/backend/cpp/llama-cpp/run.sh b/backend/cpp/llama-cpp/run.sh
index 17a1d0df9..dde3161fa 100755
--- a/backend/cpp/llama-cpp/run.sh
+++ b/backend/cpp/llama-cpp/run.sh
@@ -44,7 +44,7 @@ fi
 if [ "$(uname)" == "Darwin" ]; then
 	DYLD_FALLBACK_LIBRARY_PATH=$CURDIR/lib:$DYLD_FALLBACK_LIBRARY_PATH
 else
-	LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
+	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
 fi
 
 # If there is a lib/ld.so, use it
diff --git a/backend/go/huggingface/Makefile b/backend/go/huggingface/Makefile
new file mode 100644
index 000000000..77b6c82ed
--- /dev/null
+++ b/backend/go/huggingface/Makefile
@@ -0,0 +1,9 @@
+GOCMD=go
+
+huggingface:
+	CGO_ENABLED=0 $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o huggingface ./
+
+package:
+	bash package.sh
+
+build: huggingface package
\ No newline at end of file
diff --git a/backend/go/llm/langchain/langchain.go b/backend/go/huggingface/langchain.go
similarity index 100%
rename from backend/go/llm/langchain/langchain.go
rename to backend/go/huggingface/langchain.go
diff --git a/backend/go/llm/langchain/main.go b/backend/go/huggingface/main.go
similarity index 100%
rename from backend/go/llm/langchain/main.go
rename to backend/go/huggingface/main.go
diff --git a/backend/go/huggingface/package.sh b/backend/go/huggingface/package.sh
new file mode 100755
index 000000000..6218a65f6
--- /dev/null
+++ b/backend/go/huggingface/package.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+# Script to copy the appropriate libraries based on architecture
+# This script is used in the final stage of the Dockerfile
+
+set -e
+
+CURDIR=$(dirname "$(realpath $0)")
+
+mkdir -p $CURDIR/package
+cp -avrf $CURDIR/huggingface $CURDIR/package/
+cp -rfv $CURDIR/run.sh $CURDIR/package/
\ No newline at end of file
diff --git a/backend/go/huggingface/run.sh b/backend/go/huggingface/run.sh
new file mode 100755
index 000000000..08972b5d2
--- /dev/null
+++ b/backend/go/huggingface/run.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+set -ex
+
+CURDIR=$(dirname "$(realpath $0)")
+
+exec $CURDIR/huggingface "$@"
\ No newline at end of file
diff --git a/backend/go/local-store/Makefile b/backend/go/local-store/Makefile
new file mode 100644
index 000000000..6cde84b00
--- /dev/null
+++ b/backend/go/local-store/Makefile
@@ -0,0 +1,9 @@
+GOCMD=go
+
+local-store:
+	CGO_ENABLED=0 $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o local-store ./
+
+package:
+	bash package.sh
+
+build: local-store package
\ No newline at end of file
diff --git a/backend/go/stores/debug.go b/backend/go/local-store/debug.go
similarity index 100%
rename from backend/go/stores/debug.go
rename to backend/go/local-store/debug.go
diff --git a/backend/go/stores/main.go b/backend/go/local-store/main.go
similarity index 100%
rename from backend/go/stores/main.go
rename to backend/go/local-store/main.go
diff --git a/backend/go/local-store/package.sh b/backend/go/local-store/package.sh
new file mode 100755
index 000000000..af94e0ee7
--- /dev/null
+++ b/backend/go/local-store/package.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+# Script to copy the appropriate libraries based on architecture
+# This script is used in the final stage of the Dockerfile
+
+set -e
+
+CURDIR=$(dirname "$(realpath $0)")
+
+mkdir -p $CURDIR/package
+cp -avrf $CURDIR/local-store $CURDIR/package/
+cp -rfv $CURDIR/run.sh $CURDIR/package/
\ No newline at end of file
diff --git a/backend/go/stores/production.go b/backend/go/local-store/production.go
similarity index 100%
rename from backend/go/stores/production.go
rename to backend/go/local-store/production.go
diff --git a/backend/go/local-store/run.sh b/backend/go/local-store/run.sh
new file mode 100755
index 000000000..479f3b486
--- /dev/null
+++ b/backend/go/local-store/run.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+set -ex
+
+CURDIR=$(dirname "$(realpath $0)")
+
+exec $CURDIR/local-store "$@"
\ No newline at end of file
diff --git a/backend/go/stores/store.go b/backend/go/local-store/store.go
similarity index 99%
rename from backend/go/stores/store.go
rename to backend/go/local-store/store.go
index c8788a9c7..1fa0b2ef6 100644
--- a/backend/go/stores/store.go
+++ b/backend/go/local-store/store.go
@@ -4,6 +4,7 @@ package main
 // It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
 import (
 	"container/heap"
+	"errors"
 	"fmt"
 	"math"
 	"slices"
@@ -99,6 +100,9 @@ func sortIntoKeySlicese(keys []*pb.StoresKey) [][]float32 {
 }
 
 func (s *Store) Load(opts *pb.ModelOptions) error {
+	if opts.Model != "" {
+		return errors.New("not implemented")
+	}
 	return nil
 }
 
@@ -315,7 +319,7 @@ func isNormalized(k []float32) bool {
 
 	for _, v := range k {
 		v64 := float64(v)
-		sum += v64*v64
+		sum += v64 * v64
 	}
 
 	s := math.Sqrt(sum)
diff --git a/backend/go/silero-vad/Makefile b/backend/go/silero-vad/Makefile
new file mode 100644
index 000000000..93fd6b4c9
--- /dev/null
+++ b/backend/go/silero-vad/Makefile
@@ -0,0 +1,47 @@
+
+CURRENT_DIR=$(abspath ./)
+GOCMD=go
+
+ONNX_VERSION?=1.20.0
+ONNX_ARCH?=x64
+ONNX_OS?=linux
+
+# Detect if we are running on arm64
+ifneq (,$(findstring aarch64,$(shell uname -m)))
+	ONNX_ARCH=aarch64
+endif
+
+ifeq ($(OS),Darwin)
+	ONNX_OS=osx
+	ifneq (,$(findstring aarch64,$(shell uname -m)))
+		ONNX_ARCH=arm64
+	else ifneq (,$(findstring arm64,$(shell uname -m)))
+		ONNX_ARCH=arm64
+	else
+		ONNX_ARCH=x86_64
+	endif
+endif
+
+sources/onnxruntime:
+	mkdir -p sources/onnxruntime
+	curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
+	cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
+	cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./
+
+backend-assets/lib/libonnxruntime.so.1: sources/onnxruntime
+	mkdir -p backend-assets/lib
+	cp -rfLv sources/onnxruntime/lib/* backend-assets/lib/
+ifeq ($(OS),Darwin)
+	mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib
+else
+	mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
+endif
+
+silero-vad: backend-assets/lib/libonnxruntime.so.1
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURRENT_DIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURRENT_DIR)/backend-assets/lib \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o silero-vad ./
+
+package:
+	bash package.sh
+
+build: silero-vad package
\ No newline at end of file
diff --git a/backend/go/vad/silero/main.go b/backend/go/silero-vad/main.go
similarity index 100%
rename from backend/go/vad/silero/main.go
rename to backend/go/silero-vad/main.go
diff --git a/backend/go/silero-vad/package.sh b/backend/go/silero-vad/package.sh
new file mode 100755
index 000000000..1c524000c
--- /dev/null
+++ b/backend/go/silero-vad/package.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+# Script to copy the appropriate libraries based on architecture
+# This script is used in the final stage of the Dockerfile
+
+set -e
+
+CURDIR=$(dirname "$(realpath $0)")
+
+# Create lib directory
+mkdir -p $CURDIR/package/lib
+
+cp -avrf $CURDIR/silero-vad $CURDIR/package/
+cp -avrf $CURDIR/run.sh $CURDIR/package/
+cp -rfLv $CURDIR/backend-assets/lib/* $CURDIR/package/lib/
+
+# Detect architecture and copy appropriate libraries
+if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
+    # x86_64 architecture
+    echo "Detected x86_64 architecture, copying x86_64 libraries..."
+    cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
+    cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
+    cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
+    cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
+    cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
+    cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
+    cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
+    cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
+    cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
+    cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
+    cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
+elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
+    # ARM64 architecture
+    echo "Detected ARM64 architecture, copying ARM64 libraries..."
+    cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
+    cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
+    cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
+    cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
+    cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
+    cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
+    cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
+    cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
+    cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
+    cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
+    cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
+else
+    echo "Error: Could not detect architecture"
+    exit 1
+fi
+
+echo "Packaging completed successfully" 
+ls -liah $CURDIR/package/
+ls -liah $CURDIR/package/lib/
\ No newline at end of file
diff --git a/backend/go/silero-vad/run.sh b/backend/go/silero-vad/run.sh
new file mode 100755
index 000000000..72658908a
--- /dev/null
+++ b/backend/go/silero-vad/run.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+set -ex
+
+CURDIR=$(dirname "$(realpath $0)")
+
+export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
+
+# If there is a lib/ld.so, use it
+if [ -f $CURDIR/lib/ld.so ]; then
+	echo "Using lib/ld.so"
+	exec $CURDIR/lib/ld.so $CURDIR/silero-vad "$@"
+fi
+
+exec $CURDIR/silero-vad "$@"
\ No newline at end of file
diff --git a/backend/go/vad/silero/vad.go b/backend/go/silero-vad/vad.go
similarity index 100%
rename from backend/go/vad/silero/vad.go
rename to backend/go/silero-vad/vad.go
diff --git a/backend/index.yaml b/backend/index.yaml
index 6451bfd77..608854605 100644
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -68,7 +68,7 @@
     default: "cpu-stablediffusion-ggml"
     nvidia: "cuda12-stablediffusion-ggml"
     intel: "intel-sycl-f16-stablediffusion-ggml"
-    #amd: "rocm-stablediffusion-ggml"
+    # amd: "rocm-stablediffusion-ggml"
     vulkan: "vulkan-stablediffusion-ggml"
     nvidia-l4t: "nvidia-l4t-arm64-stablediffusion-ggml"
     # metal: "metal-stablediffusion-ggml"
@@ -285,6 +285,54 @@
   tags:
     - text-to-speech
     - TTS
+- &silero-vad
+  name: "silero-vad"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-silero-vad"
+  icon: https://user-images.githubusercontent.com/12515440/89997349-b3523080-dc94-11ea-9906-ca2e8bc50535.png
+  urls:
+    - https://github.com/snakers4/silero-vad
+  description: |
+    Silero VAD: pre-trained enterprise-grade Voice Activity Detector.
+    Silero VAD is a voice activity detection model that can be used to detect whether a given audio contains speech or not.
+  tags:
+    - voice-activity-detection
+    - VAD
+    - silero-vad
+    - CPU
+- &local-store
+  name: "local-store"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-local-store"
+  urls:
+    - https://github.com/mudler/LocalAI
+  description: |
+    Local Store is a local-first, self-hosted, and open-source vector database.
+  tags:
+    - vector-database
+    - local-first
+    - open-source
+    - CPU
+  license: MIT
+- &huggingface
+  name: "huggingface"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-huggingface"
+  icon: https://huggingface.co/front/assets/huggingface_logo-noborder.svg
+  urls:
+    - https://huggingface.co/docs/hub/en/api
+  description: |
+    HuggingFace is a backend which uses the huggingface API to run models.
+  tags:
+    - LLM
+    - huggingface
+  license: MIT
+- !!merge <<: *huggingface
+  name: "huggingface-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-huggingface"
+- !!merge <<: *local-store
+  name: "local-store-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-local-store"
+- !!merge <<: *silero-vad
+  name: "silero-vad-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-silero-vad"
 - !!merge <<: *piper
   name: "piper-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-piper"
diff --git a/core/application/startup.go b/core/application/startup.go
index 1fdd1ad50..59003799b 100644
--- a/core/application/startup.go
+++ b/core/application/startup.go
@@ -9,9 +9,7 @@ import (
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/internal"
-	"github.com/mudler/LocalAI/pkg/assets"
 
-	"github.com/mudler/LocalAI/pkg/library"
 	"github.com/mudler/LocalAI/pkg/model"
 	pkgStartup "github.com/mudler/LocalAI/pkg/startup"
 	"github.com/mudler/LocalAI/pkg/xsysinfo"
@@ -103,23 +101,6 @@ func New(opts ...config.AppOption) (*Application, error) {
 		}
 	}
 
-	if options.AssetsDestination != "" {
-		// Extract files from the embedded FS
-		err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination)
-		log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination)
-		if err != nil {
-			log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
-		}
-	}
-
-	if options.LibPath != "" {
-		// If there is a lib directory, set LD_LIBRARY_PATH to include it
-		err := library.LoadExternal(options.LibPath)
-		if err != nil {
-			log.Error().Err(err).Str("LibPath", options.LibPath).Msg("Error while loading external libraries")
-		}
-	}
-
 	// turn off any process that was started by GRPC if the context is canceled
 	go func() {
 		<-options.Context.Done()
diff --git a/core/backend/options.go b/core/backend/options.go
index 7f4623c2d..cfe7b35e4 100644
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -20,7 +20,6 @@ func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts ...
 	defOpts := []model.Option{
 		model.WithBackendString(c.Backend),
 		model.WithModel(c.Model),
-		model.WithAssetDir(so.AssetsDestination),
 		model.WithContext(so.Context),
 		model.WithModelID(name),
 	}
diff --git a/core/backend/stores.go b/core/backend/stores.go
index f5ee9166d..78257180e 100644
--- a/core/backend/stores.go
+++ b/core/backend/stores.go
@@ -7,14 +7,12 @@ import (
 	"github.com/mudler/LocalAI/pkg/model"
 )
 
-func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string) (grpc.Backend, error) {
-	if storeName == "" {
-		storeName = "default"
+func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string, backend string) (grpc.Backend, error) {
+	if backend == "" {
+		backend = model.LocalStoreBackend
 	}
-
 	sc := []model.Option{
-		model.WithBackendString(model.LocalStoreBackend),
-		model.WithAssetDir(appConfig.AssetsDestination),
+		model.WithBackendString(backend),
 		model.WithModel(storeName),
 	}
 
diff --git a/core/cli/context/context.go b/core/cli/context/context.go
index 34242e971..061d27503 100644
--- a/core/cli/context/context.go
+++ b/core/cli/context/context.go
@@ -1,13 +1,6 @@
 package cliContext
 
-import (
-	rice "github.com/GeertJohan/go.rice"
-)
-
 type Context struct {
 	Debug    bool    `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"`
 	LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug,trace" help:"Set the level of logs to output [${enum}]"`
-
-	// This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI
-	BackendAssets *rice.Box `kong:"-"`
 }
diff --git a/core/cli/run.go b/core/cli/run.go
index 481d89448..47e765dd8 100644
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -23,7 +23,6 @@ type RunCMD struct {
 	ExternalBackends             []string      `env:"LOCALAI_EXTERNAL_BACKENDS,EXTERNAL_BACKENDS" help:"A list of external backends to load from gallery on boot" group:"backends"`
 	BackendsPath                 string        `env:"LOCALAI_BACKENDS_PATH,BACKENDS_PATH" type:"path" default:"${basepath}/backends" help:"Path containing backends used for inferencing" group:"backends"`
 	ModelsPath                   string        `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
-	BackendAssetsPath            string        `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
 	GeneratedContentPath         string        `env:"LOCALAI_GENERATED_CONTENT_PATH,GENERATED_CONTENT_PATH" type:"path" default:"/tmp/generated/content" help:"Location for generated content (e.g. images, audio, videos)" group:"storage"`
 	UploadPath                   string        `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"`
 	ConfigPath                   string        `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"`
@@ -46,7 +45,6 @@ type RunCMD struct {
 	Address                            string   `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
 	CORS                               bool     `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
 	CORSAllowOrigins                   string   `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
-	LibraryPath                        string   `env:"LOCALAI_LIBRARY_PATH,LIBRARY_PATH" help:"Path to the library directory (for e.g. external libraries used by backends)" default:"/usr/share/local-ai/libs" group:"backends"`
 	CSRF                               bool     `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"`
 	UploadLimit                        int      `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
 	APIKeys                            []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
@@ -99,10 +97,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		config.WithCors(r.CORS),
 		config.WithCorsAllowOrigins(r.CORSAllowOrigins),
 		config.WithCsrf(r.CSRF),
-		config.WithLibPath(r.LibraryPath),
 		config.WithThreads(r.Threads),
-		config.WithBackendAssets(ctx.BackendAssets),
-		config.WithBackendAssetsOutput(r.BackendAssetsPath),
 		config.WithUploadLimitMB(r.UploadLimit),
 		config.WithApiKeys(r.APIKeys),
 		config.WithModelsURL(append(r.Models, r.ModelArgs...)...),
diff --git a/core/cli/soundgeneration.go b/core/cli/soundgeneration.go
index b7c1d0fe6..1193b329f 100644
--- a/core/cli/soundgeneration.go
+++ b/core/cli/soundgeneration.go
@@ -27,7 +27,6 @@ type SoundGenerationCMD struct {
 	DoSample               bool     `short:"s" default:"true" help:"Enables sampling from the model. Better quality at the cost of speed. Defaults to enabled."`
 	OutputFile             string   `short:"o" type:"path" help:"The path to write the output wav file"`
 	ModelsPath             string   `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
-	BackendAssetsPath      string   `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
 	ExternalGRPCBackends   []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"`
 }
 
@@ -51,11 +50,10 @@ func parseToInt32Ptr(input string) *int32 {
 
 func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
 	outputFile := t.OutputFile
-	outputDir := t.BackendAssetsPath
+	outputDir := os.TempDir()
 	if outputFile != "" {
 		outputDir = filepath.Dir(outputFile)
 	}
-
 	text := strings.Join(t.Text, " ")
 
 	externalBackends := make(map[string]string)
@@ -71,7 +69,6 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
 		ModelPath:            t.ModelsPath,
 		Context:              context.Background(),
 		GeneratedContentDir:  outputDir,
-		AssetsDestination:    t.BackendAssetsPath,
 		ExternalGRPCBackends: externalBackends,
 	}
 	ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend)
diff --git a/core/cli/transcript.go b/core/cli/transcript.go
index 67b5ed1da..3e5ee6d44 100644
--- a/core/cli/transcript.go
+++ b/core/cli/transcript.go
@@ -15,20 +15,18 @@ import (
 type TranscriptCMD struct {
 	Filename string `arg:""`
 
-	Backend           string `short:"b" default:"whisper" help:"Backend to run the transcription model"`
-	Model             string `short:"m" required:"" help:"Model name to run the TTS"`
-	Language          string `short:"l" help:"Language of the audio file"`
-	Translate         bool   `short:"c" help:"Translate the transcription to english"`
-	Threads           int    `short:"t" default:"1" help:"Number of threads used for parallel computation"`
-	ModelsPath        string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
-	BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
+	Backend    string `short:"b" default:"whisper" help:"Backend to run the transcription model"`
+	Model      string `short:"m" required:"" help:"Model name to run the TTS"`
+	Language   string `short:"l" help:"Language of the audio file"`
+	Translate  bool   `short:"c" help:"Translate the transcription to english"`
+	Threads    int    `short:"t" default:"1" help:"Number of threads used for parallel computation"`
+	ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
 }
 
 func (t *TranscriptCMD) Run(ctx *cliContext.Context) error {
 	opts := &config.ApplicationConfig{
-		ModelPath:         t.ModelsPath,
-		Context:           context.Background(),
-		AssetsDestination: t.BackendAssetsPath,
+		ModelPath: t.ModelsPath,
+		Context:   context.Background(),
 	}
 
 	cl := config.NewBackendConfigLoader(t.ModelsPath)
diff --git a/core/cli/tts.go b/core/cli/tts.go
index 074487e62..552fdf018 100644
--- a/core/cli/tts.go
+++ b/core/cli/tts.go
@@ -17,18 +17,17 @@ import (
 type TTSCMD struct {
 	Text []string `arg:""`
 
-	Backend           string `short:"b" default:"piper" help:"Backend to run the TTS model"`
-	Model             string `short:"m" required:"" help:"Model name to run the TTS"`
-	Voice             string `short:"v" help:"Voice name to run the TTS"`
-	Language          string `short:"l" help:"Language to use with the TTS"`
-	OutputFile        string `short:"o" type:"path" help:"The path to write the output wav file"`
-	ModelsPath        string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
-	BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
+	Backend    string `short:"b" default:"piper" help:"Backend to run the TTS model"`
+	Model      string `short:"m" required:"" help:"Model name to run the TTS"`
+	Voice      string `short:"v" help:"Voice name to run the TTS"`
+	Language   string `short:"l" help:"Language to use with the TTS"`
+	OutputFile string `short:"o" type:"path" help:"The path to write the output wav file"`
+	ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
 }
 
 func (t *TTSCMD) Run(ctx *cliContext.Context) error {
 	outputFile := t.OutputFile
-	outputDir := t.BackendAssetsPath
+	outputDir := os.TempDir()
 	if outputFile != "" {
 		outputDir = filepath.Dir(outputFile)
 	}
@@ -39,7 +38,6 @@ func (t *TTSCMD) Run(ctx *cliContext.Context) error {
 		ModelPath:           t.ModelsPath,
 		Context:             context.Background(),
 		GeneratedContentDir: outputDir,
-		AssetsDestination:   t.BackendAssetsPath,
 	}
 	ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend)
 
diff --git a/core/cli/worker/worker.go b/core/cli/worker/worker.go
index a5d065773..33813db06 100644
--- a/core/cli/worker/worker.go
+++ b/core/cli/worker/worker.go
@@ -1,7 +1,7 @@
 package worker
 
 type WorkerFlags struct {
-	BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
+	BackendsPath      string `env:"LOCALAI_BACKENDS_PATH,BACKENDS_PATH" type:"path" default:"${basepath}/backends" help:"Path containing backends used for inferencing" group:"backends"`
 	ExtraLLamaCPPArgs string `name:"llama-cpp-args" env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"`
 }
 
diff --git a/core/cli/worker/worker_llamacpp.go b/core/cli/worker/worker_llamacpp.go
index 3ea3cd426..d9fe8e4db 100644
--- a/core/cli/worker/worker_llamacpp.go
+++ b/core/cli/worker/worker_llamacpp.go
@@ -9,8 +9,6 @@ import (
 
 	cliContext "github.com/mudler/LocalAI/core/cli/context"
 	"github.com/mudler/LocalAI/core/gallery"
-	"github.com/mudler/LocalAI/pkg/assets"
-	"github.com/mudler/LocalAI/pkg/library"
 	"github.com/rs/zerolog/log"
 )
 
@@ -47,24 +45,17 @@ func findLLamaCPPBackend(backendSystemPath string) (string, error) {
 }
 
 func (r *LLamaCPP) Run(ctx *cliContext.Context) error {
-	// Extract files from the embedded FS
-	err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath)
-	log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath)
-	if err != nil {
-		log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
-	}
 
 	if len(os.Args) < 4 {
 		return fmt.Errorf("usage: local-ai worker llama-cpp-rpc -- <llama-rpc-server-args>")
 	}
 
-	grpcProcess, err := findLLamaCPPBackend(r.BackendAssetsPath)
+	grpcProcess, err := findLLamaCPPBackend(r.BackendsPath)
 	if err != nil {
 		return err
 	}
 
 	args := strings.Split(r.ExtraLLamaCPPArgs, " ")
-	args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
 
 	args = append([]string{grpcProcess}, args...)
 	return syscall.Exec(
diff --git a/core/cli/worker/worker_nop2p.go b/core/cli/worker/worker_nop2p.go
deleted file mode 100644
index fc3f095d7..000000000
--- a/core/cli/worker/worker_nop2p.go
+++ /dev/null
@@ -1,16 +0,0 @@
-//go:build !p2p
-// +build !p2p
-
-package worker
-
-import (
-	"fmt"
-
-	cliContext "github.com/mudler/LocalAI/core/cli/context"
-)
-
-type P2P struct{}
-
-func (r *P2P) Run(ctx *cliContext.Context) error {
-	return fmt.Errorf("p2p mode is not enabled in this build")
-}
diff --git a/core/cli/worker/worker_p2p.go b/core/cli/worker/worker_p2p.go
index 4fb1b5825..1533de4e5 100644
--- a/core/cli/worker/worker_p2p.go
+++ b/core/cli/worker/worker_p2p.go
@@ -1,6 +1,3 @@
-//go:build p2p
-// +build p2p
-
 package worker
 
 import (
@@ -13,8 +10,6 @@ import (
 
 	cliContext "github.com/mudler/LocalAI/core/cli/context"
 	"github.com/mudler/LocalAI/core/p2p"
-	"github.com/mudler/LocalAI/pkg/assets"
-	"github.com/mudler/LocalAI/pkg/library"
 	"github.com/phayes/freeport"
 	"github.com/rs/zerolog/log"
 )
@@ -29,12 +24,6 @@ type P2P struct {
 }
 
 func (r *P2P) Run(ctx *cliContext.Context) error {
-	// Extract files from the embedded FS
-	err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath)
-	log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath)
-	if err != nil {
-		log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
-	}
 
 	// Check if the token is set
 	// as we always need it.
@@ -71,7 +60,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 			for {
 				log.Info().Msgf("Starting llama-cpp-rpc-server on '%s:%d'", address, port)
 
-				grpcProcess, err := findLLamaCPPBackend(r.BackendAssetsPath)
+				grpcProcess, err := findLLamaCPPBackend(r.BackendsPath)
 				if err != nil {
 					log.Error().Err(err).Msg("Failed to find llama-cpp-rpc-server")
 					return
@@ -85,8 +74,6 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 				args := append([]string{"--host", address, "--port", fmt.Sprint(port)}, extraArgs...)
 				log.Debug().Msgf("Starting llama-cpp-rpc-server on '%s:%d' with args: %+v (%d)", address, port, args, len(args))
 
-				args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
-
 				cmd := exec.Command(
 					grpcProcess, args...,
 				)
diff --git a/core/config/application_config.go b/core/config/application_config.go
index 662bddc6a..4f5f878d1 100644
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -6,7 +6,6 @@ import (
 	"regexp"
 	"time"
 
-	rice "github.com/GeertJohan/go.rice"
 	"github.com/mudler/LocalAI/pkg/xsysinfo"
 	"github.com/rs/zerolog/log"
 )
@@ -17,7 +16,6 @@ type ApplicationConfig struct {
 	ModelPath                           string
 	BackendsPath                        string
 	ExternalBackends                    []string
-	LibPath                             string
 	UploadLimitMB, Threads, ContextSize int
 	F16                                 bool
 	Debug                               bool
@@ -50,9 +48,6 @@ type ApplicationConfig struct {
 	Galleries        []Gallery
 	BackendGalleries []Gallery
 
-	BackendAssets     *rice.Box
-	AssetsDestination string
-
 	ExternalGRPCBackends map[string]string
 
 	AutoloadGalleries, AutoloadBackendGalleries bool
@@ -140,12 +135,6 @@ func WithP2PToken(s string) AppOption {
 	}
 }
 
-func WithLibPath(path string) AppOption {
-	return func(o *ApplicationConfig) {
-		o.LibPath = path
-	}
-}
-
 var EnableWatchDog = func(o *ApplicationConfig) {
 	o.WatchDog = true
 }
@@ -211,18 +200,6 @@ func WithCorsAllowOrigins(b string) AppOption {
 	}
 }
 
-func WithBackendAssetsOutput(out string) AppOption {
-	return func(o *ApplicationConfig) {
-		o.AssetsDestination = out
-	}
-}
-
-func WithBackendAssets(f *rice.Box) AppOption {
-	return func(o *ApplicationConfig) {
-		o.BackendAssets = f
-	}
-}
-
 func WithStringGalleries(galls string) AppOption {
 	return func(o *ApplicationConfig) {
 		if galls == "" {
diff --git a/core/gallery/models.go b/core/gallery/models.go
index a1c8a4b75..30ec2908e 100644
--- a/core/gallery/models.go
+++ b/core/gallery/models.go
@@ -126,8 +126,9 @@ func InstallModelFromGallery(
 		if err != nil {
 			return err
 		}
-
+		log.Debug().Msgf("Installed model %q", installedModel.Name)
 		if automaticallyInstallBackend && installedModel.Backend != "" {
+			log.Debug().Msgf("Installing backend %q", installedModel.Backend)
 			systemState, err := system.GetSystemState()
 			if err != nil {
 				return err
diff --git a/core/http/app_test.go b/core/http/app_test.go
index b4eadbe73..03aaf8a4c 100644
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -23,7 +23,6 @@ import (
 	. "github.com/onsi/gomega"
 	"gopkg.in/yaml.v3"
 
-	rice "github.com/GeertJohan/go.rice"
 	openaigo "github.com/otiai10/openaigo"
 	"github.com/sashabaranov/go-openai"
 	"github.com/sashabaranov/go-openai/jsonschema"
@@ -264,16 +263,6 @@ func getRequest(url string, header http.Header) (error, int, []byte) {
 
 const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`
 
-var backendAssets *rice.Box
-
-func init() {
-	var err error
-	backendAssets, err = rice.FindBox("backend-assets")
-	if err != nil {
-		panic(err)
-	}
-}
-
 var _ = Describe("API test", func() {
 
 	var app *fiber.App
@@ -300,9 +289,6 @@ var _ = Describe("API test", func() {
 			modelDir = filepath.Join(tmpdir, "models")
 			err = os.Mkdir(modelDir, 0750)
 			Expect(err).ToNot(HaveOccurred())
-			backendAssetsDir := filepath.Join(tmpdir, "backend-assets")
-			err = os.Mkdir(backendAssetsDir, 0750)
-			Expect(err).ToNot(HaveOccurred())
 
 			c, cancel = context.WithCancel(context.Background())
 
@@ -341,8 +327,7 @@ var _ = Describe("API test", func() {
 					config.WithModelPath(modelDir),
 					config.WithBackendsPath(backendPath),
 					config.WithApiKeys([]string{apiKey}),
-					config.WithBackendAssets(backendAssets),
-					config.WithBackendAssetsOutput(backendAssetsDir))...)
+				)...)
 			Expect(err).ToNot(HaveOccurred())
 
 			app, err = API(application)
@@ -545,8 +530,7 @@ var _ = Describe("API test", func() {
 					config.WithBackendsPath(backendPath),
 					config.WithGalleries(galleries),
 					config.WithModelPath(modelDir),
-					config.WithBackendAssets(backendAssets),
-					config.WithBackendAssetsOutput(tmpdir))...,
+				)...,
 			)
 			Expect(err).ToNot(HaveOccurred())
 			app, err = API(application)
@@ -803,6 +787,10 @@ var _ = Describe("API test", func() {
 		})
 
 		It("shows the external backend", func() {
+			// Only run on linux
+			if runtime.GOOS != "linux" {
+				Skip("test supported only on linux")
+			}
 			// do an http request to the /system endpoint
 			resp, err := http.Get("http://127.0.0.1:9090/system")
 			Expect(err).ToNot(HaveOccurred())
@@ -888,6 +876,13 @@ var _ = Describe("API test", func() {
 		// See tests/integration/stores_test
 		Context("Stores", Label("stores"), func() {
 
+			BeforeEach(func() {
+				// Only run on linux
+				if runtime.GOOS != "linux" {
+					Skip("test supported only on linux")
+				}
+			})
+
 			It("sets, gets, finds and deletes entries", func() {
 				ks := [][]float32{
 					{0.1, 0.2, 0.3},
diff --git a/core/http/endpoints/localai/stores.go b/core/http/endpoints/localai/stores.go
index dd8df8b18..303d943f6 100644
--- a/core/http/endpoints/localai/stores.go
+++ b/core/http/endpoints/localai/stores.go
@@ -17,7 +17,7 @@ func StoresSetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfi
 			return err
 		}
 
-		sb, err := backend.StoreBackend(sl, appConfig, input.Store)
+		sb, err := backend.StoreBackend(sl, appConfig, input.Store, input.Backend)
 		if err != nil {
 			return err
 		}
@@ -45,7 +45,7 @@ func StoresDeleteEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationCo
 			return err
 		}
 
-		sb, err := backend.StoreBackend(sl, appConfig, input.Store)
+		sb, err := backend.StoreBackend(sl, appConfig, input.Store, input.Backend)
 		if err != nil {
 			return err
 		}
@@ -67,7 +67,7 @@ func StoresGetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfi
 			return err
 		}
 
-		sb, err := backend.StoreBackend(sl, appConfig, input.Store)
+		sb, err := backend.StoreBackend(sl, appConfig, input.Store, input.Backend)
 		if err != nil {
 			return err
 		}
@@ -99,7 +99,7 @@ func StoresFindEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConf
 			return err
 		}
 
-		sb, err := backend.StoreBackend(sl, appConfig, input.Store)
+		sb, err := backend.StoreBackend(sl, appConfig, input.Store, input.Backend)
 		if err != nil {
 			return err
 		}
diff --git a/core/http/endpoints/localai/system.go b/core/http/endpoints/localai/system.go
index ea01a6702..64b1d111b 100644
--- a/core/http/endpoints/localai/system.go
+++ b/core/http/endpoints/localai/system.go
@@ -13,10 +13,7 @@ import (
 // @Router /system [get]
 func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(*fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		availableBackends, err := ml.ListAvailableBackends(appConfig.AssetsDestination)
-		if err != nil {
-			return err
-		}
+		availableBackends := []string{}
 		loadedModels := ml.ListModels()
 		for b := range appConfig.ExternalGRPCBackends {
 			availableBackends = append(availableBackends, b)
diff --git a/core/http/endpoints/localai/welcome.go b/core/http/endpoints/localai/welcome.go
index 07bc92c63..ba291536e 100644
--- a/core/http/endpoints/localai/welcome.go
+++ b/core/http/endpoints/localai/welcome.go
@@ -5,7 +5,6 @@ import (
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/http/utils"
-	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/internal"
 	"github.com/mudler/LocalAI/pkg/model"
@@ -37,7 +36,6 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
 			"Models":            modelsWithoutConfig,
 			"ModelsConfig":      backendConfigs,
 			"GalleryConfig":     galleryConfigs,
-			"IsP2PEnabled":      p2p.IsP2PEnabled(),
 			"ApplicationConfig": appConfig,
 			"ProcessingModels":  processingModels,
 			"TaskTypes":         taskTypes,
diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go
index e0217be3e..39f22ca61 100644
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -6,7 +6,6 @@ import (
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/http/endpoints/localai"
 	"github.com/mudler/LocalAI/core/http/middleware"
-	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/internal"
@@ -80,10 +79,8 @@ func RegisterLocalAIRoutes(router *fiber.App,
 	router.Post("/v1/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
 
 	// p2p
-	if p2p.IsP2PEnabled() {
-		router.Get("/api/p2p", localai.ShowP2PNodes(appConfig))
-		router.Get("/api/p2p/token", localai.ShowP2PToken(appConfig))
-	}
+	router.Get("/api/p2p", localai.ShowP2PNodes(appConfig))
+	router.Get("/api/p2p/token", localai.ShowP2PToken(appConfig))
 
 	router.Get("/version", func(c *fiber.Ctx) error {
 		return c.JSON(struct {
diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go
index 6a59ad1ab..11b2ab485 100644
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -25,38 +25,39 @@ func RegisterUIRoutes(app *fiber.App,
 
 	app.Get("/", localai.WelcomeEndpoint(appConfig, cl, ml, processingOps))
 
-	if p2p.IsP2PEnabled() {
-		app.Get("/p2p", func(c *fiber.Ctx) error {
-			summary := fiber.Map{
-				"Title":   "LocalAI - P2P dashboard",
-				"BaseURL": utils.BaseURL(c),
-				"Version": internal.PrintableVersion(),
-				//"Nodes":          p2p.GetAvailableNodes(""),
-				//"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
-				"IsP2PEnabled": p2p.IsP2PEnabled(),
-				"P2PToken":     appConfig.P2PToken,
-				"NetworkID":    appConfig.P2PNetworkID,
-			}
+	// P2P
+	app.Get("/p2p", func(c *fiber.Ctx) error {
+		summary := fiber.Map{
+			"Title":   "LocalAI - P2P dashboard",
+			"BaseURL": utils.BaseURL(c),
+			"Version": internal.PrintableVersion(),
+			//"Nodes":          p2p.GetAvailableNodes(""),
+			//"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
 
-			// Render index
-			return c.Render("views/p2p", summary)
-		})
+			"P2PToken":  appConfig.P2PToken,
+			"NetworkID": appConfig.P2PNetworkID,
+		}
 
-		/* show nodes live! */
-		app.Get("/p2p/ui/workers", func(c *fiber.Ctx) error {
-			return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
-		})
-		app.Get("/p2p/ui/workers-federation", func(c *fiber.Ctx) error {
-			return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
-		})
+		// Render index
+		return c.Render("views/p2p", summary)
+	})
 
-		app.Get("/p2p/ui/workers-stats", func(c *fiber.Ctx) error {
-			return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
-		})
-		app.Get("/p2p/ui/workers-federation-stats", func(c *fiber.Ctx) error {
-			return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
-		})
-	}
+	/* show nodes live! */
+	app.Get("/p2p/ui/workers", func(c *fiber.Ctx) error {
+		return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
+	})
+	app.Get("/p2p/ui/workers-federation", func(c *fiber.Ctx) error {
+		return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
+	})
+
+	app.Get("/p2p/ui/workers-stats", func(c *fiber.Ctx) error {
+		return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
+	})
+	app.Get("/p2p/ui/workers-federation-stats", func(c *fiber.Ctx) error {
+		return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
+	})
+
+	// End P2P
 
 	if !appConfig.DisableGalleryEndpoint {
 		registerGalleryRoutes(app, cl, appConfig, galleryService, processingOps)
@@ -76,8 +77,8 @@ func RegisterUIRoutes(app *fiber.App,
 			"BaseURL":      utils.BaseURL(c),
 			"ModelsConfig": backendConfigs,
 			"Model":        backendConfigs[0],
-			"IsP2PEnabled": p2p.IsP2PEnabled(),
-			"Version":      internal.PrintableVersion(),
+
+			"Version": internal.PrintableVersion(),
 		}
 
 		// Render index
@@ -121,7 +122,6 @@ func RegisterUIRoutes(app *fiber.App,
 			"ModelsConfig":        backendConfigs,
 			"Model":               modelThatCanBeUsed,
 			"Version":             internal.PrintableVersion(),
-			"IsP2PEnabled":        p2p.IsP2PEnabled(),
 		}
 
 		// Render index
@@ -151,7 +151,6 @@ func RegisterUIRoutes(app *fiber.App,
 			"ModelsWithoutConfig": modelsWithoutConfig,
 			"Model":               c.Params("model"),
 			"Version":             internal.PrintableVersion(),
-			"IsP2PEnabled":        p2p.IsP2PEnabled(),
 		}
 
 		// Render index
@@ -169,7 +168,6 @@ func RegisterUIRoutes(app *fiber.App,
 			"ModelsWithoutConfig": modelsWithoutConfig,
 			"Model":               c.Params("model"),
 			"Version":             internal.PrintableVersion(),
-			"IsP2PEnabled":        p2p.IsP2PEnabled(),
 		}
 
 		// Render index
@@ -203,7 +201,6 @@ func RegisterUIRoutes(app *fiber.App,
 			"ModelsWithoutConfig": modelsWithoutConfig,
 			"Model":               modelThatCanBeUsed,
 			"Version":             internal.PrintableVersion(),
-			"IsP2PEnabled":        p2p.IsP2PEnabled(),
 		}
 
 		// Render index
@@ -221,7 +218,6 @@ func RegisterUIRoutes(app *fiber.App,
 			"ModelsWithoutConfig": modelsWithoutConfig,
 			"Model":               c.Params("model"),
 			"Version":             internal.PrintableVersion(),
-			"IsP2PEnabled":        p2p.IsP2PEnabled(),
 		}
 
 		// Render index
@@ -253,7 +249,6 @@ func RegisterUIRoutes(app *fiber.App,
 			"ModelsConfig":        backendConfigs,
 			"ModelsWithoutConfig": modelsWithoutConfig,
 			"Model":               modelThatCanBeUsed,
-			"IsP2PEnabled":        p2p.IsP2PEnabled(),
 			"Version":             internal.PrintableVersion(),
 		}
 
diff --git a/core/http/routes/ui_backend_gallery.go b/core/http/routes/ui_backend_gallery.go
index 6b6ba40e3..d16cdb026 100644
--- a/core/http/routes/ui_backend_gallery.go
+++ b/core/http/routes/ui_backend_gallery.go
@@ -15,7 +15,6 @@ import (
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/http/elements"
 	"github.com/mudler/LocalAI/core/http/utils"
-	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/internal"
 	"github.com/rs/zerolog/log"
@@ -71,7 +70,6 @@ func registerBackendGalleryRoutes(app *fiber.App, appConfig *config.ApplicationC
 			"ProcessingBackends": processingBackendsData,
 			"AvailableBackends":  len(backends),
 			"TaskTypes":          taskTypes,
-			"IsP2PEnabled":       p2p.IsP2PEnabled(),
 		}
 
 		if page == "" {
diff --git a/core/http/routes/ui_gallery.go b/core/http/routes/ui_gallery.go
index d9b0c43d6..6a0e1d7dd 100644
--- a/core/http/routes/ui_gallery.go
+++ b/core/http/routes/ui_gallery.go
@@ -15,7 +15,6 @@ import (
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/http/elements"
 	"github.com/mudler/LocalAI/core/http/utils"
-	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/internal"
 	"github.com/rs/zerolog/log"
@@ -70,9 +69,7 @@ func registerGalleryRoutes(app *fiber.App, cl *config.BackendConfigLoader, appCo
 			"AllTags":          tags,
 			"ProcessingModels": processingModelsData,
 			"AvailableModels":  len(models),
-			"IsP2PEnabled":     p2p.IsP2PEnabled(),
-
-			"TaskTypes": taskTypes,
+			"TaskTypes":        taskTypes,
 			//	"ApplicationConfig": appConfig,
 		}
 
diff --git a/core/http/views/explorer.html b/core/http/views/explorer.html
index cfcfbe319..b3339e876 100644
--- a/core/http/views/explorer.html
+++ b/core/http/views/explorer.html
@@ -268,7 +268,7 @@
                                     Command to connect (click to copy): 
                                 </p>
                                 <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words"  @click="copyToken($el.textContent)" >
-                                    docker run -d --restart=always -e ADDRESS=":80" -e LOCALAI_P2P_NETWORK_ID=<span class="token" x-text="cluster.NetworkID"></span> -e LOCALAI_P2P_LOGLEVEL=debug --name local-ai -e TOKEN="<span class="token" x-text="network.token"></span>" --net host -ti localai/localai:master-ffmpeg-core federated --debug
+                                    docker run -d --restart=always -e ADDRESS=":80" -e LOCALAI_P2P_NETWORK_ID=<span class="token" x-text="cluster.NetworkID"></span> -e LOCALAI_P2P_LOGLEVEL=debug --name local-ai -e TOKEN="<span class="token" x-text="network.token"></span>" --net host -ti localai/localai:master federated --debug
                                 </code>
                                 or via CLI:
                                 <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words"  @click="copyToken($el.textContent)" >
diff --git a/core/http/views/p2p.html b/core/http/views/p2p.html
index 6e9024851..bd6324bf6 100644
--- a/core/http/views/p2p.html
+++ b/core/http/views/p2p.html
@@ -49,11 +49,11 @@
             </div>
         
             <!-- Warning box if p2p token is empty and p2p is enabled -->
-            {{ if and .IsP2PEnabled (eq .P2PToken "") }}
+            {{ if eq .P2PToken "" }}
             <div class="bg-gradient-to-r from-red-800/70 to-red-700/70 border border-red-600/50 p-6 rounded-xl shadow-lg mb-10 text-left">
                 <div class="flex items-center mb-2">
                     <i class="fa-solid fa-exclamation-triangle text-red-300 text-2xl mr-3"></i>
-                    <h3 class="text-xl font-bold text-white">Warning: P2P mode is disabled or no token was specified</h3>
+                    <h3 class="text-xl font-bold text-white">Warning: P2P token was not specified</h3>
                 </div>
                 <p class="mb-4 text-red-200">
                     You have to enable P2P mode by starting LocalAI with <code class="bg-red-900/50 px-2 py-0.5 rounded">--p2p</code>. Please restart the server with <code class="bg-red-900/50 px-2 py-0.5 rounded">--p2p</code> to generate a new token automatically that can be used to discover other nodes. If you already have a token, specify it with <code class="bg-red-900/50 px-2 py-0.5 rounded">export TOKEN=".."</code>
diff --git a/core/http/views/partials/navbar.html b/core/http/views/partials/navbar.html
index 229d7bf29..4ecfab872 100644
--- a/core/http/views/partials/navbar.html
+++ b/core/http/views/partials/navbar.html
@@ -40,11 +40,9 @@
                 <a href="talk/" class="text-gray-300 hover:text-white px-3 py-2 rounded-lg transition duration-300 ease-in-out hover:bg-blue-900/30 flex items-center">
                     <i class="fa-solid fa-phone text-blue-400 mr-2"></i>Talk
                 </a>
-                {{ if .IsP2PEnabled }}
                 <a href="p2p/" class="text-gray-300 hover:text-white px-3 py-2 rounded-lg transition duration-300 ease-in-out hover:bg-blue-900/30 flex items-center">
                     <i class="fa-solid fa-circle-nodes text-blue-400 mr-2"></i>Swarm
                 </a>
-                {{ end }}
                 <a href="swagger/" class="text-gray-300 hover:text-white px-3 py-2 rounded-lg transition duration-300 ease-in-out hover:bg-blue-900/30 flex items-center">
                     <i class="fas fa-code text-blue-400 mr-2"></i>API
                 </a>
@@ -75,11 +73,9 @@
                 <a href="talk/" class="block text-gray-300 hover:text-white hover:bg-blue-900/30 px-3 py-2 rounded-lg transition duration-300 ease-in-out flex items-center">
                     <i class="fa-solid fa-phone text-blue-400 mr-3 w-5 text-center"></i>Talk
                 </a>
-                {{ if .IsP2PEnabled }}
                 <a href="p2p/" class="block text-gray-300 hover:text-white hover:bg-blue-900/30 px-3 py-2 rounded-lg transition duration-300 ease-in-out flex items-center">
                     <i class="fa-solid fa-circle-nodes text-blue-400 mr-3 w-5 text-center"></i>Swarm
                 </a>
-                {{ end }}
                 <a href="swagger/" class="block text-gray-300 hover:text-white hover:bg-blue-900/30 px-3 py-2 rounded-lg transition duration-300 ease-in-out flex items-center">
                     <i class="fas fa-code text-blue-400 mr-3 w-5 text-center"></i>API
                 </a>
diff --git a/core/p2p/federated_server.go b/core/p2p/federated_server.go
index d80af082c..e382576ba 100644
--- a/core/p2p/federated_server.go
+++ b/core/p2p/federated_server.go
@@ -1,6 +1,3 @@
-//go:build p2p
-// +build p2p
-
 package p2p
 
 import (
diff --git a/core/p2p/p2p.go b/core/p2p/p2p.go
index b4b5886b5..b5cd1f831 100644
--- a/core/p2p/p2p.go
+++ b/core/p2p/p2p.go
@@ -1,6 +1,3 @@
-//go:build p2p
-// +build p2p
-
 package p2p
 
 import (
@@ -65,10 +62,6 @@ func GenerateToken(DHTInterval, OTPInterval int) string {
 	return generateNewConnectionData(DHTInterval, OTPInterval).Base64()
 }
 
-func IsP2PEnabled() bool {
-	return true
-}
-
 func nodeID(s string) string {
 	hostname, _ := os.Hostname()
 	return fmt.Sprintf("%s-%s", hostname, s)
diff --git a/core/p2p/p2p_disabled.go b/core/p2p/p2p_disabled.go
deleted file mode 100644
index c5ba98fda..000000000
--- a/core/p2p/p2p_disabled.go
+++ /dev/null
@@ -1,35 +0,0 @@
-//go:build !p2p
-// +build !p2p
-
-package p2p
-
-import (
-	"context"
-	"fmt"
-
-	"github.com/mudler/edgevpn/pkg/node"
-)
-
-func GenerateToken(DHTInterval, OTPInterval int) string {
-	return "not implemented"
-}
-
-func (f *FederatedServer) Start(ctx context.Context) error {
-	return fmt.Errorf("not implemented")
-}
-
-func ServiceDiscoverer(ctx context.Context, node *node.Node, token, servicesID string, fn func(string, NodeData), allocate bool) error {
-	return fmt.Errorf("not implemented")
-}
-
-func ExposeService(ctx context.Context, host, port, token, servicesID string) (*node.Node, error) {
-	return nil, fmt.Errorf("not implemented")
-}
-
-func IsP2PEnabled() bool {
-	return false
-}
-
-func NewNode(token string) (*node.Node, error) {
-	return nil, fmt.Errorf("not implemented")
-}
diff --git a/core/schema/localai.go b/core/schema/localai.go
index 734314a2b..4e819238a 100644
--- a/core/schema/localai.go
+++ b/core/schema/localai.go
@@ -63,23 +63,29 @@ type VADResponse struct {
 	Segments []VADSegment `json:"segments" yaml:"segments"`
 }
 
+type StoreCommon struct {
+	Backend string `json:"backend,omitempty" yaml:"backend,omitempty"`
+}
 type StoresSet struct {
 	Store string `json:"store,omitempty" yaml:"store,omitempty"`
 
 	Keys   [][]float32 `json:"keys" yaml:"keys"`
 	Values []string    `json:"values" yaml:"values"`
+	StoreCommon
 }
 
 type StoresDelete struct {
 	Store string `json:"store,omitempty" yaml:"store,omitempty"`
 
 	Keys [][]float32 `json:"keys"`
+	StoreCommon
 }
 
 type StoresGet struct {
 	Store string `json:"store,omitempty" yaml:"store,omitempty"`
 
 	Keys [][]float32 `json:"keys" yaml:"keys"`
+	StoreCommon
 }
 
 type StoresGetResponse struct {
@@ -92,6 +98,7 @@ type StoresFind struct {
 
 	Key  []float32 `json:"key" yaml:"key"`
 	Topk int       `json:"topk" yaml:"topk"`
+	StoreCommon
 }
 
 type StoresFindResponse struct {
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 2306c28f3..b9880352a 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -5,7 +5,7 @@ services:
     # Available images with CUDA, ROCm, SYCL
     # Image list (quay.io): https://quay.io/repository/go-skynet/local-ai?tab=tags
     # Image list (dockerhub): https://hub.docker.com/r/localai/localai
-    image: quay.io/go-skynet/local-ai:master-ffmpeg-core
+    image: quay.io/go-skynet/local-ai:master
     build:
       context: .
       dockerfile: Dockerfile
diff --git a/docs/content/docs/advanced/advanced-usage.md b/docs/content/docs/advanced/advanced-usage.md
index 5c52ed4ca..68bb53adf 100644
--- a/docs/content/docs/advanced/advanced-usage.md
+++ b/docs/content/docs/advanced/advanced-usage.md
@@ -579,38 +579,14 @@ You can use 'Extra-Usage' request header key presence ('Extra-Usage: true') to r
 
 ### Extra backends
 
-LocalAI can be extended with extra backends. The backends are implemented as `gRPC` services and can be written in any language. The container images that are built and published on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags) contain a set of images split in core and extra. By default Images bring all the dependencies and backends supported by LocalAI (we call those `extra` images). The `-core` images instead bring only the strictly necessary dependencies to run LocalAI without only a core set of backends.
-
-If you wish to build a custom container image with extra backends, you can use the core images and build only the backends you are interested into or prepare the environment on startup by using the `EXTRA_BACKENDS` environment variable. For instance, to use the diffusers backend:
-
-```Dockerfile
-FROM quay.io/go-skynet/local-ai:master-ffmpeg-core
-
-RUN make -C backend/python/diffusers
-```
-
-Remember also to set the `EXTERNAL_GRPC_BACKENDS` environment variable (or `--external-grpc-backends` as CLI flag) to point to the backends you are using (`EXTERNAL_GRPC_BACKENDS="backend_name:/path/to/backend"`), for example with diffusers:
-
-```Dockerfile
-FROM quay.io/go-skynet/local-ai:master-ffmpeg-core
-
-RUN make -C backend/python/diffusers
-
-ENV EXTERNAL_GRPC_BACKENDS="diffusers:/build/backend/python/diffusers/run.sh"
-```
-
-{{% alert note %}}
-
-You can specify remote external backends or path to local files. The syntax is `backend-name:/path/to/backend` or `backend-name:host:port`.
-
-{{% /alert %}}
+LocalAI can be extended with extra backends. The backends are implemented as `gRPC` services and can be written in any language. See the [backend section](https://localai.io/backends/) for more details on how to install and build new backends for LocalAI.
 
 #### In runtime
 
 When using the `-core` container image it is possible to prepare the python backends you are interested into by using the `EXTRA_BACKENDS` variable, for instance:
 
 ```bash
-docker run --env EXTRA_BACKENDS="backend/python/diffusers" quay.io/go-skynet/local-ai:master-ffmpeg-core
+docker run --env EXTRA_BACKENDS="backend/python/diffusers" quay.io/go-skynet/local-ai:master
 ```
 
 ### Concurrent requests
diff --git a/docs/content/docs/features/GPU-acceleration.md b/docs/content/docs/features/GPU-acceleration.md
index 51bce71fb..12eba2946 100644
--- a/docs/content/docs/features/GPU-acceleration.md
+++ b/docs/content/docs/features/GPU-acceleration.md
@@ -73,8 +73,6 @@ The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=ta
 
 - CUDA `11` tags: `master-gpu-nvidia-cuda11`, `v1.40.0-gpu-nvidia-cuda11`, ...
 - CUDA `12` tags: `master-gpu-nvidia-cuda12`, `v1.40.0-gpu-nvidia-cuda12`, ...
-- CUDA `11` + FFmpeg tags: `master-gpu-nvidia-cuda11-ffmpeg`, `v1.40.0-gpu-nvidia-cuda11-ffmpeg`, ...
-- CUDA `12` + FFmpeg tags: `master-gpu-nvidia-cuda12-ffmpeg`, `v1.40.0-gpu-nvidia-cuda12-ffmpeg`, ...
 
 In addition to the commands to run LocalAI normally, you need to specify `--gpus all` to docker, for example:
 
@@ -259,7 +257,7 @@ If building from source, you need to install [Intel oneAPI Base Toolkit](https:/
 
 ### Container images
 
-To use SYCL, use the images with the `gpu-intel-f16` or `gpu-intel-f32` tag, for example `{{< version >}}-gpu-intel-f32-core`, `{{< version >}}-gpu-intel-f16-ffmpeg-core`, ...
+To use SYCL, use the images with the `gpu-intel-f16` or `gpu-intel-f32` tag, for example `{{< version >}}-gpu-intel-f32-core`, `{{< version >}}-gpu-intel-f16`, ...
 
 The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags).
 
@@ -268,7 +266,7 @@ The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=ta
 To run LocalAI with Docker and sycl starting `phi-2`, you can use the following command as an example:
 
 ```bash
-docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080  -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-gpu-intel-f32-ffmpeg-core phi-2
+docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080  -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-gpu-intel-f32 phi-2
 ```
 
 ### Notes
@@ -276,7 +274,7 @@ docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080  -
 In addition to the commands to run LocalAI normally, you need to specify `--device /dev/dri` to docker, for example:
 
 ```bash
-docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f16-ffmpeg-core
+docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f16
 ```
 
 Note also that sycl does have a known issue to hang with `mmap: true`. You have to disable it in the model configuration if explicitly enabled.
diff --git a/docs/content/docs/features/embeddings.md b/docs/content/docs/features/embeddings.md
index 92c41eb64..7e0f3abf4 100644
--- a/docs/content/docs/features/embeddings.md
+++ b/docs/content/docs/features/embeddings.md
@@ -44,7 +44,6 @@ The `sentencetransformers` backend uses Python [sentence-transformers](https://g
 {{% alert note %}}
 
 - The `sentencetransformers` backend is an optional backend of LocalAI and uses Python. If you are running `LocalAI` from the containers you are good to go and should be already configured for use.
-- If you are running `LocalAI` manually you must install the python dependencies (`make prepare-extra-conda-environments`). This requires `conda` to be installed.
 - For local execution, you also have to specify the extra backend in the `EXTERNAL_GRPC_BACKENDS` environment variable.
     - Example: `EXTERNAL_GRPC_BACKENDS="sentencetransformers:/path/to/LocalAI/backend/python/sentencetransformers/sentencetransformers.py"`
 - The `sentencetransformers` backend does support only embeddings of text, and not of tokens. If you need to embed tokens you can use the `bert` backend or `llama.cpp`.
diff --git a/docs/content/docs/getting-started/container-images.md b/docs/content/docs/getting-started/container-images.md
index 38cafeca1..a3b39f0a3 100644
--- a/docs/content/docs/getting-started/container-images.md
+++ b/docs/content/docs/getting-started/container-images.md
@@ -18,8 +18,6 @@ For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA
 
 - Images ending with `-core` are smaller images without predownload python dependencies. Use these images if you plan to use `llama.cpp`, `stablediffusion-ncn` or `rwkv` backends - if you are not sure which one to use, do **not** use these images.
 - Images containing the `aio` tag are all-in-one images with all the features enabled, and come with an opinionated set of configuration.
-- FFMpeg is **not** included in the default images due to [its licensing](https://www.ffmpeg.org/legal.html). If you need FFMpeg, use the images ending with `-ffmpeg`. Note that `ffmpeg` is needed in case of using `audio-to-text` LocalAI's features.
-- If using old and outdated CPUs and no GPUs you might need to set `REBUILD` to `true` as environment variable along with options to disable the flags which your CPU does not support, however note that inference will perform poorly and slow. See also [flagset compatibility]({{%relref "docs/getting-started/build#cpu-flagset-compatibility" %}}).
 
 {{% /alert %}}
 
diff --git a/docs/content/docs/getting-started/customize-model.md b/docs/content/docs/getting-started/customize-model.md
index e8440cd39..eff83ebd2 100644
--- a/docs/content/docs/getting-started/customize-model.md
+++ b/docs/content/docs/getting-started/customize-model.md
@@ -23,7 +23,7 @@ MODELS="github://owner/repo/file.yaml@branch,github://owner/repo/file.yaml@branc
 Here's an example to initiate the **phi-2** model:
 
 ```bash
-docker run -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core https://gist.githubusercontent.com/mudler/ad601a0488b497b69ec549150d9edd18/raw/a8a8869ef1bb7e3830bf5c0bae29a0cce991ff8d/phi-2.yaml
+docker run -p 8080:8080 localai/localai:{{< version >}} https://gist.githubusercontent.com/mudler/ad601a0488b497b69ec549150d9edd18/raw/a8a8869ef1bb7e3830bf5c0bae29a0cce991ff8d/phi-2.yaml
 ```
 
 You can also check all the embedded models configurations [here](https://github.com/mudler/LocalAI/tree/master/embedded/models).
@@ -64,7 +64,7 @@ Then, launch LocalAI using your gist's URL:
 
 ```bash
 ## Important! Substitute with your gist's URL!
-docker run -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core https://gist.githubusercontent.com/xxxx/phi-2.yaml
+docker run -p 8080:8080 localai/localai:{{< version >}} https://gist.githubusercontent.com/xxxx/phi-2.yaml
 ```
 
 ## Next Steps
diff --git a/gallery/alpaca.yaml b/gallery/alpaca.yaml
index b647d2f64..18512de77 100644
--- a/gallery/alpaca.yaml
+++ b/gallery/alpaca.yaml
@@ -2,6 +2,7 @@
 name: "alpaca"
 
 config_file: |
+  backend: "llama-cpp"
   context_size: 4096
   f16: true
   mmap: true
diff --git a/gallery/arch-function.yaml b/gallery/arch-function.yaml
index a527d0f79..c7e7775ce 100644
--- a/gallery/arch-function.yaml
+++ b/gallery/arch-function.yaml
@@ -2,6 +2,7 @@
 name: "chatml"
 
 config_file: |
+  backend: "llama-cpp"
   mmap: true
   function:
     disable_no_action: true
diff --git a/gallery/chatml-hercules.yaml b/gallery/chatml-hercules.yaml
index c10367896..36b478a1a 100644
--- a/gallery/chatml-hercules.yaml
+++ b/gallery/chatml-hercules.yaml
@@ -2,6 +2,7 @@
 name: "chatml-hercules"
 
 config_file: |
+  backend: "llama-cpp"
   mmap: true
   function:
     # disable injecting the "answer" tool
diff --git a/gallery/chatml.yaml b/gallery/chatml.yaml
index abaf3209f..7e8e63a62 100644
--- a/gallery/chatml.yaml
+++ b/gallery/chatml.yaml
@@ -2,6 +2,7 @@
 name: "chatml"
 
 config_file: |
+  backend: "llama-cpp"
   mmap: true
   template:
     chat_message: |
diff --git a/gallery/command-r.yaml b/gallery/command-r.yaml
index 81a24fb19..0c1636f3e 100644
--- a/gallery/command-r.yaml
+++ b/gallery/command-r.yaml
@@ -2,6 +2,7 @@
 name: "command-r"
 
 config_file: |
+  backend: "llama-cpp"
   context_size: 131072
   stopwords:
   - "<|END_OF_TURN_TOKEN|>"
diff --git a/gallery/deephermes.yaml b/gallery/deephermes.yaml
index 93d5c7939..3805b57ec 100644
--- a/gallery/deephermes.yaml
+++ b/gallery/deephermes.yaml
@@ -2,6 +2,7 @@
 name: "deephermes"
 
 config_file: |
+  backend: "llama-cpp"
   mmap: true
   context_size: 8192
   stopwords:
diff --git a/gallery/deepseek-r1.yaml b/gallery/deepseek-r1.yaml
index 29ca9db12..d03073534 100644
--- a/gallery/deepseek-r1.yaml
+++ b/gallery/deepseek-r1.yaml
@@ -2,6 +2,7 @@
 name: "deepseek-r1"
 
 config_file: |
+  backend: "llama-cpp"
   context_size: 131072
   mmap: true
   f16: true
diff --git a/gallery/deepseek.yaml b/gallery/deepseek.yaml
index fa8870a1d..d8f926739 100644
--- a/gallery/deepseek.yaml
+++ b/gallery/deepseek.yaml
@@ -2,6 +2,7 @@
 name: "deepseek"
 
 config_file: |
+  backend: "llama-cpp"
   mmap: true
   context_size: 8192
   template:
diff --git a/gallery/falcon3.yaml b/gallery/falcon3.yaml
index b6593f4bb..5f2fc8c59 100644
--- a/gallery/falcon3.yaml
+++ b/gallery/falcon3.yaml
@@ -2,6 +2,7 @@
 name: "falcon3"
 
 config_file: |
+  backend: "llama-cpp"
   mmap: true
   template:
     chat_message: |
diff --git a/gallery/gemma.yaml b/gallery/gemma.yaml
index ed69795f0..d6a1eab06 100644
--- a/gallery/gemma.yaml
+++ b/gallery/gemma.yaml
@@ -2,6 +2,7 @@
 name: "gemma"
 
 config_file: |
+  backend: "llama-cpp"
   mmap: true
   context_size: 8192
   template:
diff --git a/gallery/granite.yaml b/gallery/granite.yaml
index 465cca186..8b94b4703 100644
--- a/gallery/granite.yaml
+++ b/gallery/granite.yaml
@@ -2,6 +2,7 @@
 name: "granite"
 
 config_file: |
+  backend: "llama-cpp"
   mmap: true
   template:
     chat_message: |
diff --git a/gallery/granite3-2.yaml b/gallery/granite3-2.yaml
index 8a4a9b88d..ec07fca9e 100644
--- a/gallery/granite3-2.yaml
+++ b/gallery/granite3-2.yaml
@@ -2,6 +2,7 @@
 name: "granite-3.2"
 
 config_file: |
+  backend: "llama-cpp"
   mmap: true
   template:
     chat_message: |
diff --git a/gallery/hermes-2-pro-mistral.yaml b/gallery/hermes-2-pro-mistral.yaml
index 22a5fb3a6..040927e09 100644
--- a/gallery/hermes-2-pro-mistral.yaml
+++ b/gallery/hermes-2-pro-mistral.yaml
@@ -2,6 +2,7 @@
 name: "hermes-2-pro-mistral"
 
 config_file: |
+  backend: "llama-cpp"
   mmap: true
   context_size: 8192
   stopwords:
diff --git a/gallery/llama3-instruct.yaml b/gallery/llama3-instruct.yaml
index 5dc54b0e8..c2ef37e87 100644
--- a/gallery/llama3-instruct.yaml
+++ b/gallery/llama3-instruct.yaml
@@ -2,6 +2,7 @@
 name: "llama3-instruct"
 
 config_file: |
+  backend: "llama-cpp"
   mmap: true
   template:
     chat_message: |
diff --git a/gallery/llama3.1-instruct-grammar.yaml b/gallery/llama3.1-instruct-grammar.yaml
index 30237af35..b91834937 100644
--- a/gallery/llama3.1-instruct-grammar.yaml
+++ b/gallery/llama3.1-instruct-grammar.yaml
@@ -2,6 +2,7 @@
 name: "llama3-instruct-grammar"
 
 config_file: |
+  backend: "llama-cpp"
   mmap: true
   function:
     disable_no_action: true
diff --git a/gallery/llama3.1-instruct.yaml b/gallery/llama3.1-instruct.yaml
index 4a2b4db13..1d078f2b0 100644
--- a/gallery/llama3.1-instruct.yaml
+++ b/gallery/llama3.1-instruct.yaml
@@ -2,6 +2,7 @@
 name: "llama3-instruct"
 
 config_file: |
+  backend: "llama-cpp"
   mmap: true
   function:
     disable_no_action: true
diff --git a/gallery/llama3.1-reflective.yaml b/gallery/llama3.1-reflective.yaml
index 86a91d8b1..75f6edf2f 100644
--- a/gallery/llama3.1-reflective.yaml
+++ b/gallery/llama3.1-reflective.yaml
@@ -2,6 +2,7 @@
 name: "llama3-instruct"
 
 config_file: |
+  backend: "llama-cpp"
   mmap: true
   cutstrings:
   - (.*?)</thinking>
diff --git a/gallery/llama3.2-fcall.yaml b/gallery/llama3.2-fcall.yaml
index 73f370a8f..fc8dc1240 100644
--- a/gallery/llama3.2-fcall.yaml
+++ b/gallery/llama3.2-fcall.yaml
@@ -2,6 +2,7 @@
 name: "llama3.2-fcall"
 
 config_file: |
+  backend: "llama-cpp"
   mmap: true
   function:
     json_regex_match:
diff --git a/gallery/llama3.2-quantized.yaml b/gallery/llama3.2-quantized.yaml
index 7e1d26305..2407b22da 100644
--- a/gallery/llama3.2-quantized.yaml
+++ b/gallery/llama3.2-quantized.yaml
@@ -2,6 +2,7 @@
 name: "llama3.2-quantized"
 
 config_file: |
+  backend: "llama-cpp"
   mmap: true
   function:
     disable_no_action: true
diff --git a/gallery/mathstral.yaml b/gallery/mathstral.yaml
index a1c686b45..1ed503396 100644
--- a/gallery/mathstral.yaml
+++ b/gallery/mathstral.yaml
@@ -2,6 +2,7 @@
 name: "mathstral"
 
 config_file: |
+  backend: "llama-cpp"
   context_size: 8192
   mmap: true
   stopwords:
diff --git a/gallery/mistral-0.3.yaml b/gallery/mistral-0.3.yaml
index 502e7a5af..1f45728d1 100644
--- a/gallery/mistral-0.3.yaml
+++ b/gallery/mistral-0.3.yaml
@@ -2,6 +2,7 @@
 name: "mistral-0.3"
 
 config_file: |
+  backend: "llama-cpp"
   context_size: 8192
   mmap: true
   stopwords:
diff --git a/gallery/moondream.yaml b/gallery/moondream.yaml
index d3511f20b..5ff871cac 100644
--- a/gallery/moondream.yaml
+++ b/gallery/moondream.yaml
@@ -3,6 +3,7 @@ name: "moondream2"
 
 
 config_file: |
+    backend: "llama-cpp"
     context_size: 2046
     roles:
       user: "\nQuestion: "
diff --git a/gallery/mudler.yaml b/gallery/mudler.yaml
index 77bdc8eb4..fa85b9730 100644
--- a/gallery/mudler.yaml
+++ b/gallery/mudler.yaml
@@ -2,6 +2,7 @@
 name: localai
 
 config_file: |-
+  backend: "llama-cpp"
   context_size: 8192
   stopwords:
     - <|im_end|>
diff --git a/gallery/phi-2-chat.yaml b/gallery/phi-2-chat.yaml
index 5e1fb702d..cd161fa27 100644
--- a/gallery/phi-2-chat.yaml
+++ b/gallery/phi-2-chat.yaml
@@ -2,6 +2,7 @@
 name: "phi-2-chatml"
 
 config_file: |
+  backend: "llama-cpp"
   mmap: true
   template:
     chat_message: |
diff --git a/gallery/phi-2-orange.yaml b/gallery/phi-2-orange.yaml
index 89971b4d5..22642ac50 100644
--- a/gallery/phi-2-orange.yaml
+++ b/gallery/phi-2-orange.yaml
@@ -2,6 +2,7 @@
 name: "phi-2-orange"
 
 config_file: |
+  backend: "llama-cpp"
   mmap: true
   template:
     chat_message: |
diff --git a/gallery/phi-3-chat.yaml b/gallery/phi-3-chat.yaml
index 98a3f3854..ce3f21116 100644
--- a/gallery/phi-3-chat.yaml
+++ b/gallery/phi-3-chat.yaml
@@ -2,6 +2,7 @@
 name: "phi-3-chat"
 
 config_file: |
+  backend: "llama-cpp"
   mmap: true
   template:
     chat_message: |
diff --git a/gallery/phi-4-chat-fcall.yaml b/gallery/phi-4-chat-fcall.yaml
index 23c2e53db..c73f993e3 100644
--- a/gallery/phi-4-chat-fcall.yaml
+++ b/gallery/phi-4-chat-fcall.yaml
@@ -2,6 +2,7 @@
 name: "phi-4-chat"
 
 config_file: |
+  backend: "llama-cpp"
   mmap: true
   function:
     json_regex_match:
diff --git a/gallery/phi-4-chat.yaml b/gallery/phi-4-chat.yaml
index 1ff0b14af..6b8de6dbf 100644
--- a/gallery/phi-4-chat.yaml
+++ b/gallery/phi-4-chat.yaml
@@ -3,6 +3,7 @@ name: "phi-4-chat"
 
 config_file: |
   mmap: true
+  backend: "llama-cpp"
   template:
     chat_message: |
       <|im_start|>{{ .RoleName }}<|im_sep|>
diff --git a/gallery/qwen-fcall.yaml b/gallery/qwen-fcall.yaml
index f168c7fe0..dc8fb47ec 100644
--- a/gallery/qwen-fcall.yaml
+++ b/gallery/qwen-fcall.yaml
@@ -2,6 +2,7 @@
 name: "qwen-fcall"
 
 config_file: |
+  backend: "llama-cpp"
   mmap: true
   function:
     json_regex_match:
diff --git a/gallery/qwen3-openbuddy.yaml b/gallery/qwen3-openbuddy.yaml
index 754d730d7..1af782a2c 100644
--- a/gallery/qwen3-openbuddy.yaml
+++ b/gallery/qwen3-openbuddy.yaml
@@ -3,6 +3,7 @@ name: "qwen3-openbuddy"
 
 config_file: |
   mmap: true
+  backend: "llama-cpp"
   template:
     chat_message: |
       <|role|>{{ .RoleName }}<|says|>
diff --git a/gallery/qwen3.yaml b/gallery/qwen3.yaml
index aef6c109c..1d2eb05dc 100644
--- a/gallery/qwen3.yaml
+++ b/gallery/qwen3.yaml
@@ -3,6 +3,7 @@ name: "qwen3"
 
 config_file: |
   mmap: true
+  backend: "llama-cpp"
   template:
     chat_message: |
       <|im_start|>{{ .RoleName }}
diff --git a/gallery/rwkv.yaml b/gallery/rwkv.yaml
index 686937997..3750db974 100644
--- a/gallery/rwkv.yaml
+++ b/gallery/rwkv.yaml
@@ -2,6 +2,7 @@
 name: "rwkv"
 
 config_file: |
+    backend: "llama-cpp"
     parameters:
       top_k: 80
       temperature: 0.9
diff --git a/gallery/smolvlm.yaml b/gallery/smolvlm.yaml
index 2c4ef47e3..a3fddcc6c 100644
--- a/gallery/smolvlm.yaml
+++ b/gallery/smolvlm.yaml
@@ -2,6 +2,7 @@
 name: smolvlm
 # yamllint disable-line rule:trailing-spaces
 config_file: |
+    backend: "llama-cpp"
     mmap: true
     template:
       chat_message: |
diff --git a/gallery/tuluv2.yaml b/gallery/tuluv2.yaml
index ca2785a23..d716879a9 100644
--- a/gallery/tuluv2.yaml
+++ b/gallery/tuluv2.yaml
@@ -2,6 +2,7 @@
 name: "tuluv2"
 
 config_file: |
+  backend: "llama-cpp"
   mmap: true
   template:
     chat_message: |
diff --git a/gallery/vllm.yaml b/gallery/vllm.yaml
index f0b797cc7..852db148c 100644
--- a/gallery/vllm.yaml
+++ b/gallery/vllm.yaml
@@ -2,6 +2,7 @@
 name: "vllm"
 
 config_file: |
+    backend: vllm
     context_size: 8192
     parameters:
       max_tokens: 8192
diff --git a/gallery/wizardlm2.yaml b/gallery/wizardlm2.yaml
index 6c2c14115..6c074b783 100644
--- a/gallery/wizardlm2.yaml
+++ b/gallery/wizardlm2.yaml
@@ -2,6 +2,7 @@
 name: "wizardlm2"
 
 config_file: |
+  backend: "llama-cpp"
   mmap: true
   template:
     chat_message: |-
diff --git a/go.mod b/go.mod
index ef137346a..a6da77492 100644
--- a/go.mod
+++ b/go.mod
@@ -6,16 +6,12 @@ toolchain go1.23.1
 
 require (
 	dario.cat/mergo v1.0.1
-	github.com/GeertJohan/go.rice v1.0.3
 	github.com/Masterminds/sprig/v3 v3.3.0
 	github.com/alecthomas/kong v0.9.0
-	github.com/census-instrumentation/opencensus-proto v0.4.1
 	github.com/charmbracelet/glamour v0.7.0
 	github.com/chasefleming/elem-go v0.26.0
-	github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20
 	github.com/containerd/containerd v1.7.19
 	github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2
-	github.com/elliotchance/orderedmap/v2 v2.2.0
 	github.com/fsnotify/fsnotify v1.7.0
 	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20240626202019-c118733a29ad
 	github.com/go-audio/wav v1.1.0
@@ -25,11 +21,9 @@ require (
 	github.com/gofiber/template/html/v2 v2.1.2
 	github.com/gofiber/websocket/v2 v2.2.1
 	github.com/gofrs/flock v0.12.1
-	github.com/golang/protobuf v1.5.4
 	github.com/google/go-containerregistry v0.19.2
 	github.com/google/uuid v1.6.0
 	github.com/gpustack/gguf-parser-go v0.17.0
-	github.com/grpc-ecosystem/grpc-gateway v1.5.0
 	github.com/hpcloud/tail v1.0.0
 	github.com/ipfs/go-log v1.0.5
 	github.com/jaypipes/ghw v0.12.0
@@ -43,7 +37,6 @@ require (
 	github.com/nikolalohinski/gonja/v2 v2.3.2
 	github.com/onsi/ginkgo/v2 v2.22.2
 	github.com/onsi/gomega v1.36.2
-	github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e
 	github.com/otiai10/openaigo v1.7.0
 	github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5
 	github.com/prometheus/client_golang v1.20.5
@@ -62,7 +55,6 @@ require (
 	go.opentelemetry.io/otel/exporters/prometheus v0.50.0
 	go.opentelemetry.io/otel/metric v1.34.0
 	go.opentelemetry.io/otel/sdk/metric v1.28.0
-	google.golang.org/api v0.180.0
 	google.golang.org/grpc v1.67.1
 	google.golang.org/protobuf v1.36.5
 	gopkg.in/yaml.v2 v2.4.0
@@ -71,22 +63,13 @@ require (
 )
 
 require (
-	cel.dev/expr v0.16.0 // indirect
-	cloud.google.com/go/auth v0.4.1 // indirect
-	cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect
-	cloud.google.com/go/compute/metadata v0.5.0 // indirect
 	github.com/containerd/platforms v0.2.1 // indirect
 	github.com/cpuguy83/dockercfg v0.3.2 // indirect
-	github.com/daaku/go.zipexe v1.0.2 // indirect
 	github.com/distribution/reference v0.6.0 // indirect
 	github.com/dustin/go-humanize v1.0.1 // indirect
-	github.com/envoyproxy/protoc-gen-validate v1.1.0 // indirect
 	github.com/fasthttp/websocket v1.5.8 // indirect
 	github.com/felixge/httpsnoop v1.0.4 // indirect
 	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
-	github.com/google/s2a-go v0.1.7 // indirect
-	github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
-	github.com/googleapis/gax-go/v2 v2.12.4 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
 	github.com/magiconair/properties v1.8.7 // indirect
 	github.com/moby/docker-image-spec v1.3.1 // indirect
@@ -125,9 +108,7 @@ require (
 	go.opentelemetry.io/auto/sdk v1.1.0 // indirect
 	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 // indirect
 	go.uber.org/mock v0.5.0 // indirect
-	golang.org/x/oauth2 v0.24.0 // indirect
 	golang.org/x/time v0.8.0 // indirect
-	google.golang.org/genproto/googleapis/api v0.0.0-20241007155032-5fefd90f89a9 // indirect
 )
 
 require (
@@ -268,7 +249,7 @@ require (
 	github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
 	github.com/philhofer/fwd v1.1.2 // indirect
 	github.com/pierrec/lz4/v4 v4.1.2 // indirect
-	github.com/pkg/errors v0.9.1 // indirect
+	github.com/pkg/errors v0.9.1
 	github.com/pkoukk/tiktoken-go v0.1.6 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/polydawn/refmt v0.89.0 // indirect
diff --git a/go.sum b/go.sum
index 935126b8e..1ba732dcd 100644
--- a/go.sum
+++ b/go.sum
@@ -1,15 +1,7 @@
-cel.dev/expr v0.16.0 h1:yloc84fytn4zmJX2GU3TkXGsaieaV7dQ057Qs4sIG2Y=
-cel.dev/expr v0.16.0/go.mod h1:TRSuuV7DlVCE/uwv5QbAiW/v8l5O8C4eEPHeu7gf7Sg=
 cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 cloud.google.com/go v0.31.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 cloud.google.com/go v0.37.0/go.mod h1:TS1dMSSfndXH133OKGwekG838Om/cQT0BUHV3HcBgoo=
-cloud.google.com/go/auth v0.4.1 h1:Z7YNIhlWRtrnKlZke7z3GMqzvuYzdc2z98F9D1NV5Hg=
-cloud.google.com/go/auth v0.4.1/go.mod h1:QVBuVEKpCn4Zp58hzRGvL0tjRGU0YqdRTdCHM1IHnro=
-cloud.google.com/go/auth/oauth2adapt v0.2.2 h1:+TTV8aXpjeChS9M+aTtN/TjdQnzJvmzKFt//oWu7HX4=
-cloud.google.com/go/auth/oauth2adapt v0.2.2/go.mod h1:wcYjgpZI9+Yu7LyYBg4pqSiaRkfEK3GQcpb7C/uyF1Q=
-cloud.google.com/go/compute/metadata v0.5.0 h1:Zr0eK8JbFv6+Wi4ilXAR8FJ3wyNdpxHKJNPos6LTZOY=
-cloud.google.com/go/compute/metadata v0.5.0/go.mod h1:aHnloV2TPI38yx4s9+wAZhHykWvVCfu7hQbF+9CWoiY=
 dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
 dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
 dmitri.shuralyov.com/app/changes v0.0.0-20180602232624-0a106ad413e3/go.mod h1:Yl+fi1br7+Rr3LqpNJf1/uxUdtRUV+Tnj0o93V2B9MU=
@@ -23,9 +15,6 @@ github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25
 github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
-github.com/GeertJohan/go.incremental v1.0.0/go.mod h1:6fAjUhbVuX1KcMD3c8TEgVUqmo4seqhv0i0kdATSkM0=
-github.com/GeertJohan/go.rice v1.0.3 h1:k5viR+xGtIhF61125vCE1cmJ5957RQGXG6dmbaWZSmI=
-github.com/GeertJohan/go.rice v1.0.3/go.mod h1:XVdrU4pW00M4ikZed5q56tPf1v2KwnIKeIdc9CBYNt4=
 github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
 github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
 github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ=
@@ -42,7 +31,6 @@ github.com/Microsoft/hcsshim v0.11.7 h1:vl/nj3Bar/CvJSYo7gIQPyRWc9f3c6IeSNavBTSZ
 github.com/Microsoft/hcsshim v0.11.7/go.mod h1:MV8xMfmECjl5HdO7U/3/hFVnkmSBjAjmA09d4bExKcU=
 github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDOSA=
 github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8=
-github.com/akavel/rsrc v0.8.0/go.mod h1:uLoCtb9J+EyAqh+26kdrTgmzRBFPGOolLWKpdxkKq+c=
 github.com/alecthomas/assert/v2 v2.6.0 h1:o3WJwILtexrEUk3cUVal3oiQY2tfgr/FHWiz/v2n4FU=
 github.com/alecthomas/assert/v2 v2.6.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
 github.com/alecthomas/chroma/v2 v2.8.0 h1:w9WJUjFFmHHB2e8mRpL9jjy3alYDlU0QLDezj1xE264=
@@ -73,8 +61,6 @@ github.com/c-robinson/iplib v1.0.8/go.mod h1:i3LuuFL1hRT5gFpBRnEydzw8R6yhGkF4szN
 github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
 github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
 github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
-github.com/census-instrumentation/opencensus-proto v0.4.1 h1:iKLQ0xPNFxR/2hzXZMrBo8f1j86j5WHzznCCQxV/b8g=
-github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw=
 github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
 github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/charmbracelet/glamour v0.7.0 h1:2BtKGZ4iVJCDfMF229EzbeR1QRKLWztO9dMtjmqZSng=
@@ -84,8 +70,6 @@ github.com/chasefleming/elem-go v0.26.0/go.mod h1:hz73qILBIKnTgOujnSMtEj20/epI+f
 github.com/cilium/ebpf v0.2.0/go.mod h1:To2CFviqOWL/M0gIMsvSMlqe7em/l1ALkX1PyjrX2Qs=
 github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
 github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
-github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20 h1:N+3sFI5GUjRKBi+i0TxYVST9h4Ie192jJWpHvthBBgg=
-github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
 github.com/containerd/cgroups v0.0.0-20201119153540-4cbc285b3327/go.mod h1:ZJeTFisyysqgcCdecO57Dj79RfL0LNeGiFUqLYQRYLE=
 github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM=
 github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw=
@@ -116,8 +100,6 @@ github.com/creachadair/otp v0.5.0 h1:q3Th7CXm2zlmCdBjw5tEPFOj4oWJMnVL5HXlq0sNKS0
 github.com/creachadair/otp v0.5.0/go.mod h1:0kceI87EnYFNYSTL121goJVAnk3eJhaed9H0nMuJUkA=
 github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
 github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
-github.com/daaku/go.zipexe v1.0.2 h1:Zg55YLYTr7M9wjKn8SY/WcpuuEi+kR2u4E8RhvpyXmk=
-github.com/daaku/go.zipexe v1.0.2/go.mod h1:5xWogtqlYnfBXkSB1o9xysukNP9GTvaNkqzUZbt3Bw8=
 github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2 h1:flLYmnQFZNo04x2NPehMbf30m7Pli57xwZ0NFqR/hb0=
 github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2/go.mod h1:NtWqRzAp/1tw+twkW8uuBenEVVYndEAZACWU3F3xdoQ=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -155,14 +137,10 @@ github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+m
 github.com/elastic/gosigar v0.12.0/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
 github.com/elastic/gosigar v0.14.3 h1:xwkKwPia+hSfg9GqrCUKYdId102m9qTJIIr7egmK/uo=
 github.com/elastic/gosigar v0.14.3/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
-github.com/elliotchance/orderedmap/v2 v2.2.0 h1:7/2iwO98kYT4XkOjA9mBEIwvi4KpGB4cyHeOFOnj4Vk=
-github.com/elliotchance/orderedmap/v2 v2.2.0/go.mod h1:85lZyVbpGaGvHvnKa7Qhx7zncAdBIBq6u56Hb1PRU5Q=
 github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
 github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
 github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
 github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
-github.com/envoyproxy/protoc-gen-validate v1.1.0 h1:tntQDh69XqOCOZsDz0lVJQez/2L6Uu2PdjCQwWCJ3bM=
-github.com/envoyproxy/protoc-gen-validate v1.1.0/go.mod h1:sXRDRVmzEbkM7CVcM06s9shE/m23dg3wzjl0UWqJ2q4=
 github.com/fasthttp/websocket v1.5.8 h1:k5DpirKkftIF/w1R8ZzjSgARJrs54Je9YJK37DL/Ah8=
 github.com/fasthttp/websocket v1.5.8/go.mod h1:d08g8WaT6nnyvg9uMm8K9zMYyDjfKyj3170AtPRuVU0=
 github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
@@ -252,8 +230,6 @@ github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:W
 github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
 github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
 github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
-github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
-github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
 github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
 github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
@@ -282,18 +258,12 @@ github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OI
 github.com/google/pprof v0.0.0-20250208200701-d0013a598941 h1:43XjGa6toxLpeksjcxs1jIoIyr+vUfOqY2c6HB4bpoc=
 github.com/google/pprof v0.0.0-20250208200701-d0013a598941/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144=
 github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
-github.com/google/s2a-go v0.1.7 h1:60BLSyTrOV4/haCDW4zb1guZItoSq8foHCXrAnjBo/o=
-github.com/google/s2a-go v0.1.7/go.mod h1:50CgR4k1jNlWBu4UfS4AcfhVe1r6pdZPygJ3R8F0Qdw=
 github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/googleapis/enterprise-certificate-proxy v0.3.2 h1:Vie5ybvEvT75RniqhfFxPRy3Bf7vr3h0cechB90XaQs=
-github.com/googleapis/enterprise-certificate-proxy v0.3.2/go.mod h1:VLSiSSBs/ksPL8kq3OBOQ6WRI2QnaFynd1DCjZ62+V0=
 github.com/googleapis/gax-go v2.0.0+incompatible/go.mod h1:SFVmujtThgffbyetf+mdk2eWhX2bMyUtNHzFKcPA9HY=
 github.com/googleapis/gax-go/v2 v2.0.3/go.mod h1:LLvjysVCY1JZeum8Z6l8qUty8fiNwE08qbEPm1M08qg=
-github.com/googleapis/gax-go/v2 v2.12.4 h1:9gWcmF85Wvq4ryPFvGFaOgPIs1AQX0d0bcbGw4Z96qg=
-github.com/googleapis/gax-go/v2 v2.12.4/go.mod h1:KYEYLorsnIGDi/rPC8b5TdlB9kbKoFubselGIoBMCwI=
 github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
 github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c h1:7lF+Vz0LqiRidnzC1Oq86fpX1q/iEv2KJdrCtttYjT4=
 github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
@@ -545,7 +515,6 @@ github.com/neelance/astrewrite v0.0.0-20160511093645-99348263ae86/go.mod h1:kHJE
 github.com/neelance/sourcemap v0.0.0-20151028013722-8c68805598ab/go.mod h1:Qr6/a/Q4r9LP1IltGz7tA7iOK1WonHEYhu1HRBA7ZiM=
 github.com/nikolalohinski/gonja/v2 v2.3.2 h1:UgLFfqi7L9XfX0PEcE4eUpvGojVQL5KhBfJJaBp7ZxY=
 github.com/nikolalohinski/gonja/v2 v2.3.2/go.mod h1:1Wcc/5huTu6y36e0sOFR1XQoFlylw3c3H3L5WOz0RDg=
-github.com/nkovacs/streamquote v1.0.0/go.mod h1:BN+NaZ2CmdKqUuTUXUEm9j95B2TRbpOWpxbJYzzgUsc=
 github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ=
 github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
 github.com/nxadm/tail v1.4.11 h1:8feyoE3OzPrcshW5/MJ4sGESc5cqmGkGCWlco4l0bqY=
@@ -568,8 +537,6 @@ github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/
 github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs=
 github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
 github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8=
-github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e h1:s2RNOM/IGdY0Y6qfTeUKhDawdHDpK9RGBdx80qN4Ttw=
-github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e/go.mod h1:nBdnFKj15wFbf94Rwfq4m30eAcyY9V/IyKAGQFtqkW0=
 github.com/otiai10/mint v1.6.1 h1:kgbTJmOpp/0ce7hk3H8jiSuR0MXmpwWRfqUdKww17qg=
 github.com/otiai10/mint v1.6.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM=
 github.com/otiai10/openaigo v1.7.0 h1:AOQcOjRRM57ABvz+aI2oJA/Qsz1AydKbdZAlGiKyCqg=
@@ -785,7 +752,6 @@ github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6Kllzaw
 github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
 github.com/valyala/fasthttp v1.55.0 h1:Zkefzgt6a7+bVKHnu/YaYSOPfNYNisSVBo/unVCf8k8=
 github.com/valyala/fasthttp v1.55.0/go.mod h1:NkY9JtkrpPKmgwV3HTaS2HWaJss9RSIsRVfcxxoHiOM=
-github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8=
 github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
 github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
 github.com/vbatts/tar-split v0.11.3 h1:hLFqsOLQ1SsppQNTMpkpPXClLDfC2A3Zgy9OUU+RVck=
@@ -929,8 +895,6 @@ golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAG
 golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
-golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE=
-golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
 golang.org/x/perf v0.0.0-20180704124530-6e6d33e29852/go.mod h1:JLpeXjPJfIyPr5TlbXLkXWLhP8nz10XfvxElABhCtcw=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -1050,8 +1014,6 @@ gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o
 google.golang.org/api v0.0.0-20180910000450-7ca32eb868bf/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0=
 google.golang.org/api v0.0.0-20181030000543-1d582fd0359e/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0=
 google.golang.org/api v0.1.0/go.mod h1:UGEZY7KEX120AnNLIHFMKIo4obdJhkp2tPbaPlQx13Y=
-google.golang.org/api v0.180.0 h1:M2D87Yo0rGBPWpo1orwfCLehUUL6E7/TYe5gvMQWDh4=
-google.golang.org/api v0.180.0/go.mod h1:51AiyoEg1MJPSZ9zvklA8VnRILPXxn1iVen9v25XHAE=
 google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
 google.golang.org/appengine v1.2.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
 google.golang.org/appengine v1.3.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
@@ -1064,7 +1026,6 @@ google.golang.org/genproto v0.0.0-20190306203927-b5d61aea6440/go.mod h1:VzzqZJRn
 google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
 google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
 google.golang.org/genproto v0.0.0-20240401170217-c3f982113cda h1:wu/KJm9KJwpfHWhkkZGohVC6KRrc1oJNr4jwtQMOQXw=
-google.golang.org/genproto v0.0.0-20240401170217-c3f982113cda/go.mod h1:g2LLCvCeCSir/JJSWosk19BR4NVxGqHUC6rxIRsd7Aw=
 google.golang.org/genproto/googleapis/api v0.0.0-20241007155032-5fefd90f89a9 h1:T6rh4haD3GVYsgEfWExoCZA2o2FmbNyKpTuAxbEFPTg=
 google.golang.org/genproto/googleapis/api v0.0.0-20241007155032-5fefd90f89a9/go.mod h1:wp2WsuBYj6j8wUdo3ToZsdxxixbvQNAHqVJrTgi5E5M=
 google.golang.org/genproto/googleapis/rpc v0.0.0-20241007155032-5fefd90f89a9 h1:QCqS/PdaHTSWGvupk2F/ehwHtGc0/GYkT+3GAcR1CCc=
diff --git a/main.go b/main.go
index 380e8f5b1..3c8615952 100644
--- a/main.go
+++ b/main.go
@@ -112,9 +112,6 @@ Version: ${version}
 		log.Trace().Msg("Setting logging to trace")
 	}
 
-	// Populate the application with the embedded backend assets
-	cli.CLI.Context.BackendAssets = backendAssets
-
 	// Run the thing!
 	err = ctx.Run(&cli.CLI.Context)
 	if err != nil {
diff --git a/pkg/assets/extract.go b/pkg/assets/extract.go
deleted file mode 100644
index 8c1a6be68..000000000
--- a/pkg/assets/extract.go
+++ /dev/null
@@ -1,64 +0,0 @@
-package assets
-
-import (
-	"fmt"
-	"os"
-	"path/filepath"
-
-	rice "github.com/GeertJohan/go.rice"
-	"github.com/mudler/LocalAI/pkg/library"
-)
-
-const backendAssetsDir = "backend-assets"
-
-func ResolvePath(dir string, paths ...string) string {
-	return filepath.Join(append([]string{dir, backendAssetsDir}, paths...)...)
-}
-
-func ExtractFiles(content *rice.Box, extractDir string) error {
-	// Create the target directory with backend-assets subdirectory
-	backendAssetsDir := filepath.Join(extractDir, backendAssetsDir)
-	err := os.MkdirAll(backendAssetsDir, 0750)
-	if err != nil {
-		return fmt.Errorf("failed to create directory: %v", err)
-	}
-
-	// Walk through the rice box and extract files
-	err = content.Walk("", func(path string, info os.FileInfo, err error) error {
-		if err != nil {
-			return err
-		}
-
-		// Reconstruct the directory structure in the target directory
-		targetFile := filepath.Join(backendAssetsDir, path)
-		if info.IsDir() {
-			// Create the directory in the target directory
-			err := os.MkdirAll(targetFile, 0750)
-			if err != nil {
-				return fmt.Errorf("failed to create directory: %v", err)
-			}
-			return nil
-		}
-
-		// Read the file from the rice box
-		fileData, err := content.Bytes(path)
-		if err != nil {
-			return fmt.Errorf("failed to read file: %v", err)
-		}
-
-		// Create the file in the target directory
-		err = os.WriteFile(targetFile, fileData, 0700)
-		if err != nil {
-			return fmt.Errorf("failed to write file: %v", err)
-		}
-
-		return nil
-	})
-
-	// If there is a lib directory, set LD_LIBRARY_PATH to include it
-	// we might use this mechanism to carry over e.g. Nvidia CUDA libraries
-	// from the embedded FS to the target directory
-	library.LoadExtractedLibs(backendAssetsDir)
-
-	return err
-}
diff --git a/pkg/assets/list.go b/pkg/assets/list.go
deleted file mode 100644
index edfdf4985..000000000
--- a/pkg/assets/list.go
+++ /dev/null
@@ -1,27 +0,0 @@
-package assets
-
-import (
-	"os"
-
-	rice "github.com/GeertJohan/go.rice"
-	"github.com/rs/zerolog/log"
-)
-
-func ListFiles(content *rice.Box) (files []string) {
-	err := content.Walk("", func(path string, info os.FileInfo, err error) error {
-		if err != nil {
-			return err
-		}
-
-		if info.IsDir() {
-			return nil
-		}
-
-		files = append(files, path)
-		return nil
-	})
-	if err != nil {
-		log.Error().Err(err).Msg("error walking the rice box")
-	}
-	return
-}
diff --git a/pkg/library/dynaload.go b/pkg/library/dynaload.go
deleted file mode 100644
index 878cdc881..000000000
--- a/pkg/library/dynaload.go
+++ /dev/null
@@ -1,86 +0,0 @@
-package library
-
-import (
-	"errors"
-	"fmt"
-	"os"
-	"path/filepath"
-	"runtime"
-
-	"github.com/rs/zerolog/log"
-)
-
-/*
-	This file contains functions to load libraries from the asset directory to keep the business logic clean.
-*/
-
-// skipLibraryPath checks if LOCALAI_SKIP_LIBRARY_PATH is set
-var skipLibraryPath = os.Getenv("LOCALAI_SKIP_LIBRARY_PATH") != ""
-
-// LoadExtractedLibs loads the extracted libraries from the asset dir
-func LoadExtractedLibs(dir string) error {
-	// Skip this if LOCALAI_SKIP_LIBRARY_PATH is set
-	if skipLibraryPath {
-		return nil
-	}
-
-	var err error = nil
-	for _, libDir := range []string{filepath.Join(dir, "lib"), filepath.Join(dir, "lib")} {
-		err = errors.Join(err, LoadExternal(libDir))
-	}
-	return err
-}
-
-// LoadLDSO checks if there is a ld.so in the asset dir and if so, prefixes the grpc process with it.
-// In linux, if we find a ld.so in the asset dir we prefix it to run with the libs exposed in
-// LD_LIBRARY_PATH for more compatibility
-// If we don't do this, we might run into stack smash
-// See also: https://stackoverflow.com/questions/847179/multiple-glibc-libraries-on-a-single-host/851229#851229
-// In this case, we expect a ld.so in the lib asset dir.
-// If that's present, we use it to run the grpc backends as supposedly built against
-// that specific version of ld.so
-func LoadLDSO(assetDir string, args []string, grpcProcess string) ([]string, string) {
-	if skipLibraryPath {
-		return args, grpcProcess
-	}
-
-	if runtime.GOOS != "linux" {
-		return args, grpcProcess
-	}
-
-	// Check if there is a ld.so file in the assetDir, if it does, we need to run the grpc process with it
-	ldPath := filepath.Join(assetDir, "backend-assets", "lib", "ld.so")
-	if _, err := os.Stat(ldPath); err == nil {
-		log.Debug().Msgf("ld.so found")
-		// We need to run the grpc process with the ld.so
-		args = append([]string{grpcProcess}, args...)
-		grpcProcess = ldPath
-	}
-
-	return args, grpcProcess
-}
-
-// LoadExternal sets the LD_LIBRARY_PATH to include the given directory
-func LoadExternal(dir string) error {
-	// Skip this if LOCALAI_SKIP_LIBRARY_PATH is set
-	if skipLibraryPath {
-		return nil
-	}
-
-	lpathVar := "LD_LIBRARY_PATH"
-	if runtime.GOOS == "darwin" {
-		lpathVar = "DYLD_FALLBACK_LIBRARY_PATH" // should it be DYLD_LIBRARY_PATH ?
-	}
-
-	var setErr error = nil
-	if _, err := os.Stat(dir); err == nil {
-		ldLibraryPath := os.Getenv(lpathVar)
-		if ldLibraryPath == "" {
-			ldLibraryPath = dir
-		} else {
-			ldLibraryPath = fmt.Sprintf("%s:%s", ldLibraryPath, dir)
-		}
-		setErr = errors.Join(setErr, os.Setenv(lpathVar, ldLibraryPath))
-	}
-	return setErr
-}
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index c54fbdcc3..dc60f98d2 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -5,18 +5,12 @@ import (
 	"errors"
 	"fmt"
 	"os"
-	"path/filepath"
-	"slices"
 	"strings"
 	"time"
 
 	grpc "github.com/mudler/LocalAI/pkg/grpc"
-	"github.com/mudler/LocalAI/pkg/library"
-	"github.com/mudler/LocalAI/pkg/utils"
 	"github.com/phayes/freeport"
 	"github.com/rs/zerolog/log"
-
-	"github.com/elliotchance/orderedmap/v2"
 )
 
 const (
@@ -51,79 +45,6 @@ const (
 	LocalStoreBackend   = "local-store"
 )
 
-func backendPath(assetDir, backend string) string {
-	return filepath.Join(assetDir, "backend-assets", "grpc", backend)
-}
-
-// backendsInAssetDir returns the list of backends in the asset directory
-// that should be loaded
-func backendsInAssetDir(assetDir string) (map[string][]string, error) {
-	// Exclude backends from automatic loading
-	excludeBackends := []string{LocalStoreBackend}
-	entry, err := os.ReadDir(backendPath(assetDir, ""))
-	if err != nil {
-		return nil, err
-	}
-	backends := make(map[string][]string)
-ENTRY:
-	for _, e := range entry {
-		for _, exclude := range excludeBackends {
-			if e.Name() == exclude {
-				continue ENTRY
-			}
-		}
-		if e.IsDir() {
-			continue
-		}
-		if strings.HasSuffix(e.Name(), ".log") {
-			continue
-		}
-
-		backends[e.Name()] = []string{}
-	}
-
-	return backends, nil
-}
-
-func orderBackends(backends map[string][]string) ([]string, error) {
-	// order backends from the asset directory.
-	// as we scan for backends, we want to keep some order which backends are tried of.
-	// for example, llama.cpp should be tried first, and we want to keep the huggingface backend at the last.
-
-	// sets a priority list - first has more priority
-	priorityList := []string{}
-
-	toTheEnd := []string{
-		// last has to be huggingface
-		LCHuggingFaceBackend,
-	}
-
-	// create an ordered map
-	orderedBackends := orderedmap.NewOrderedMap[string, any]()
-	// add priorityList first
-	for _, p := range priorityList {
-		if _, ok := backends[p]; ok {
-			orderedBackends.Set(p, backends[p])
-		}
-	}
-
-	for k, v := range backends {
-		if !slices.Contains(toTheEnd, k) {
-			if _, ok := orderedBackends.Get(k); !ok {
-				orderedBackends.Set(k, v)
-			}
-		}
-	}
-
-	for _, t := range toTheEnd {
-		if _, ok := backends[t]; ok {
-			orderedBackends.Set(t, backends[t])
-		}
-	}
-
-	return orderedBackends.Keys(), nil
-}
-
 // starts the grpcModelProcess for the backend, and returns a grpc client
 // It also loads the model
 func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string, string) (*Model, error) {
@@ -177,35 +98,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
 				client = NewModel(modelID, uri, nil)
 			}
 		} else {
-			grpcProcess := backendPath(o.assetDir, backend)
-			if err := utils.VerifyPath(grpcProcess, o.assetDir); err != nil {
-				return nil, fmt.Errorf("referring to a backend not in asset dir: %s", err.Error())
-			}
-
-			// Check if the file exists
-			if _, err := os.Stat(grpcProcess); os.IsNotExist(err) {
-				return nil, fmt.Errorf("backend not found: %s", grpcProcess)
-			}
-
-			serverAddress, err := getFreeAddress()
-			if err != nil {
-				return nil, fmt.Errorf("failed allocating free ports: %s", err.Error())
-			}
-
-			args := []string{}
-
-			// Load the ld.so if it exists
-			args, grpcProcess = library.LoadLDSO(o.assetDir, args, grpcProcess)
-
-			// Make sure the process is executable in any circumstance
-			process, err := ml.startProcess(grpcProcess, modelID, serverAddress, args...)
-			if err != nil {
-				return nil, err
-			}
-
-			log.Debug().Msgf("GRPC Service Started")
-
-			client = NewModel(modelID, serverAddress, process)
+			return nil, fmt.Errorf("backend not found: %s", backend)
 		}
 
 		log.Debug().Msgf("Wait for the service to start up")
@@ -259,14 +152,6 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
 	}
 }
 
-func (ml *ModelLoader) ListAvailableBackends(assetdir string) ([]string, error) {
-	backends, err := backendsInAssetDir(assetdir)
-	if err != nil {
-		return nil, err
-	}
-	return orderBackends(backends)
-}
-
 func (ml *ModelLoader) backendLoader(opts ...Option) (client grpc.Backend, err error) {
 	o := NewOptions(opts...)
 
@@ -346,17 +231,18 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
 	var err error
 
 	// get backends embedded in the binary
-	autoLoadBackends, err := ml.ListAvailableBackends(o.assetDir)
-	if err != nil {
-		ml.Close() // we failed, release the lock
-		return nil, err
-	}
+	autoLoadBackends := []string{}
 
 	// append externalBackends supplied by the user via the CLI
 	for b := range ml.GetAllExternalBackends(o) {
 		autoLoadBackends = append(autoLoadBackends, b)
 	}
 
+	if len(autoLoadBackends) == 0 {
+		log.Error().Msg("No backends found")
+		return nil, fmt.Errorf("no backends found")
+	}
+
 	log.Debug().Msgf("Loading from the following backends (in order): %+v", autoLoadBackends)
 
 	log.Info().Msgf("Trying to load the model '%s' with the backend '%s'", o.modelID, autoLoadBackends)
diff --git a/pkg/model/loader_options.go b/pkg/model/loader_options.go
index 28a7c598f..16df2b9bd 100644
--- a/pkg/model/loader_options.go
+++ b/pkg/model/loader_options.go
@@ -10,7 +10,6 @@ type Options struct {
 	backendString string
 	model         string
 	modelID       string
-	assetDir      string
 	context       context.Context
 
 	gRPCOptions *pb.ModelOptions
@@ -75,12 +74,6 @@ func WithLoadGRPCLoadModelOpts(opts *pb.ModelOptions) Option {
 	}
 }
 
-func WithAssetDir(assetDir string) Option {
-	return func(o *Options) {
-		o.assetDir = assetDir
-	}
-}
-
 func WithContext(ctx context.Context) Option {
 	return func(o *Options) {
 		o.context = ctx
diff --git a/tests/integration/stores_test.go b/tests/integration/stores_test.go
index 5484a79ca..dfe992c1d 100644
--- a/tests/integration/stores_test.go
+++ b/tests/integration/stores_test.go
@@ -2,11 +2,9 @@ package integration_test
 
 import (
 	"context"
-	"embed"
 	"math"
 	"math/rand"
 	"os"
-	"path/filepath"
 
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
@@ -14,15 +12,11 @@ import (
 	"github.com/rs/zerolog/log"
 
 	"github.com/mudler/LocalAI/core/config"
-	"github.com/mudler/LocalAI/pkg/assets"
 	"github.com/mudler/LocalAI/pkg/grpc"
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/LocalAI/pkg/store"
 )
 
-//go:embed backend-assets/*
-var backendAssets embed.FS
-
 func normalize(vecs [][]float32) {
 	for i, k := range vecs {
 		norm := float64(0)
@@ -49,12 +43,6 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs"
 
 			tmpdir, err = os.MkdirTemp("", "")
 			Expect(err).ToNot(HaveOccurred())
-			backendAssetsDir := filepath.Join(tmpdir, "backend-assets")
-			err = os.Mkdir(backendAssetsDir, 0750)
-			Expect(err).ToNot(HaveOccurred())
-
-			err = assets.ExtractFiles(backendAssets, backendAssetsDir)
-			Expect(err).ToNot(HaveOccurred())
 
 			debug := true
 
@@ -66,7 +54,6 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs"
 
 			storeOpts := []model.Option{
 				model.WithBackendString(bc.Backend),
-				model.WithAssetDir(backendAssetsDir),
 				model.WithModel("test"),
 			}