From 98e5291afc0c75697e253d893f264f2dcfd1e456 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Tue, 22 Jul 2025 16:31:04 +0200
Subject: [PATCH] feat: refactor build process, drop embedded backends (#5875)
* feat: split remaining backends and drop embedded backends
- Drop silero-vad, huggingface, and stores backend from embedded
binaries
- Refactor Makefile and Dockerfile to avoid building grpc backends
- Drop golang code that was used to embed backends
- Simplify building by using goreleaser
Signed-off-by: Ettore Di Giacinto
* chore(gallery): be specific with llama-cpp backend templates
Signed-off-by: Ettore Di Giacinto
* chore(docs): update
Signed-off-by: Ettore Di Giacinto
* chore(ci): minor fixes
Signed-off-by: Ettore Di Giacinto
* chore: drop all ffmpeg references
Signed-off-by: Ettore Di Giacinto
* fix: run protogen-go
Signed-off-by: Ettore Di Giacinto
* Always enable p2p mode
Signed-off-by: Ettore Di Giacinto
* Update gorelease file
Signed-off-by: Ettore Di Giacinto
* fix(stores): do not always load
Signed-off-by: Ettore Di Giacinto
* Fix linting issues
Signed-off-by: Ettore Di Giacinto
* Simplify
Signed-off-by: Ettore Di Giacinto
* Mac OS fixup
Signed-off-by: Ettore Di Giacinto
---------
Signed-off-by: Ettore Di Giacinto
---
.devcontainer-scripts/poststart.sh | 3 -
.devcontainer/docker-compose-devcontainer.yml | 3 -
.env | 7 -
.github/workflows/backend.yml | 169 +++++---
.github/workflows/build-test.yaml | 23 +
.github/workflows/deploy-explorer.yaml | 2 +-
.github/workflows/image-pr.yml | 11 +-
.github/workflows/image.yml | 11 -
.github/workflows/image_build.yml | 6 -
.github/workflows/notify-models.yaml | 2 +-
.github/workflows/release.yaml | 399 +-----------------
.github/workflows/test.yml | 9 +-
.gitignore | 2 +
.goreleaser.yaml | 33 ++
.vscode/launch.json | 2 +-
Dockerfile | 10 +-
Earthfile | 5 -
Makefile | 221 ++++------
assets.go | 15 -
backend/cpp/llama-cpp/run.sh | 2 +-
backend/go/huggingface/Makefile | 9 +
.../langchain => huggingface}/langchain.go | 0
.../go/{llm/langchain => huggingface}/main.go | 0
backend/go/huggingface/package.sh | 12 +
backend/go/huggingface/run.sh | 6 +
backend/go/local-store/Makefile | 9 +
backend/go/{stores => local-store}/debug.go | 0
backend/go/{stores => local-store}/main.go | 0
backend/go/local-store/package.sh | 12 +
.../go/{stores => local-store}/production.go | 0
backend/go/local-store/run.sh | 6 +
backend/go/{stores => local-store}/store.go | 6 +-
backend/go/silero-vad/Makefile | 47 +++
backend/go/{vad/silero => silero-vad}/main.go | 0
backend/go/silero-vad/package.sh | 53 +++
backend/go/silero-vad/run.sh | 14 +
backend/go/{vad/silero => silero-vad}/vad.go | 0
backend/index.yaml | 50 ++-
core/application/startup.go | 19 -
core/backend/options.go | 1 -
core/backend/stores.go | 10 +-
core/cli/context/context.go | 7 -
core/cli/run.go | 5 -
core/cli/soundgeneration.go | 5 +-
core/cli/transcript.go | 18 +-
core/cli/tts.go | 16 +-
core/cli/worker/worker.go | 2 +-
core/cli/worker/worker_llamacpp.go | 11 +-
core/cli/worker/worker_nop2p.go | 16 -
core/cli/worker/worker_p2p.go | 15 +-
core/config/application_config.go | 23 -
core/gallery/models.go | 3 +-
core/http/app_test.go | 31 +-
core/http/endpoints/localai/stores.go | 8 +-
core/http/endpoints/localai/system.go | 5 +-
core/http/endpoints/localai/welcome.go | 2 -
core/http/routes/localai.go | 7 +-
core/http/routes/ui.go | 69 ++-
core/http/routes/ui_backend_gallery.go | 2 -
core/http/routes/ui_gallery.go | 5 +-
core/http/views/explorer.html | 2 +-
core/http/views/p2p.html | 4 +-
core/http/views/partials/navbar.html | 4 -
core/p2p/federated_server.go | 3 -
core/p2p/p2p.go | 7 -
core/p2p/p2p_disabled.go | 35 --
core/schema/localai.go | 7 +
docker-compose.yaml | 2 +-
docs/content/docs/advanced/advanced-usage.md | 28 +-
.../content/docs/features/GPU-acceleration.md | 8 +-
docs/content/docs/features/embeddings.md | 1 -
.../docs/getting-started/container-images.md | 2 -
.../docs/getting-started/customize-model.md | 4 +-
gallery/alpaca.yaml | 1 +
gallery/arch-function.yaml | 1 +
gallery/chatml-hercules.yaml | 1 +
gallery/chatml.yaml | 1 +
gallery/command-r.yaml | 1 +
gallery/deephermes.yaml | 1 +
gallery/deepseek-r1.yaml | 1 +
gallery/deepseek.yaml | 1 +
gallery/falcon3.yaml | 1 +
gallery/gemma.yaml | 1 +
gallery/granite.yaml | 1 +
gallery/granite3-2.yaml | 1 +
gallery/hermes-2-pro-mistral.yaml | 1 +
gallery/llama3-instruct.yaml | 1 +
gallery/llama3.1-instruct-grammar.yaml | 1 +
gallery/llama3.1-instruct.yaml | 1 +
gallery/llama3.1-reflective.yaml | 1 +
gallery/llama3.2-fcall.yaml | 1 +
gallery/llama3.2-quantized.yaml | 1 +
gallery/mathstral.yaml | 1 +
gallery/mistral-0.3.yaml | 1 +
gallery/moondream.yaml | 1 +
gallery/mudler.yaml | 1 +
gallery/phi-2-chat.yaml | 1 +
gallery/phi-2-orange.yaml | 1 +
gallery/phi-3-chat.yaml | 1 +
gallery/phi-4-chat-fcall.yaml | 1 +
gallery/phi-4-chat.yaml | 1 +
gallery/qwen-fcall.yaml | 1 +
gallery/qwen3-openbuddy.yaml | 1 +
gallery/qwen3.yaml | 1 +
gallery/rwkv.yaml | 1 +
gallery/smolvlm.yaml | 1 +
gallery/tuluv2.yaml | 1 +
gallery/vllm.yaml | 1 +
gallery/wizardlm2.yaml | 1 +
go.mod | 21 +-
go.sum | 39 --
main.go | 3 -
pkg/assets/extract.go | 64 ---
pkg/assets/list.go | 27 --
pkg/library/dynaload.go | 86 ----
pkg/model/initializers.go | 128 +-----
pkg/model/loader_options.go | 7 -
tests/integration/stores_test.go | 13 -
118 files changed, 631 insertions(+), 1339 deletions(-)
create mode 100644 .github/workflows/build-test.yaml
create mode 100644 .goreleaser.yaml
delete mode 100644 Earthfile
delete mode 100644 assets.go
create mode 100644 backend/go/huggingface/Makefile
rename backend/go/{llm/langchain => huggingface}/langchain.go (100%)
rename backend/go/{llm/langchain => huggingface}/main.go (100%)
create mode 100755 backend/go/huggingface/package.sh
create mode 100755 backend/go/huggingface/run.sh
create mode 100644 backend/go/local-store/Makefile
rename backend/go/{stores => local-store}/debug.go (100%)
rename backend/go/{stores => local-store}/main.go (100%)
create mode 100755 backend/go/local-store/package.sh
rename backend/go/{stores => local-store}/production.go (100%)
create mode 100755 backend/go/local-store/run.sh
rename backend/go/{stores => local-store}/store.go (99%)
create mode 100644 backend/go/silero-vad/Makefile
rename backend/go/{vad/silero => silero-vad}/main.go (100%)
create mode 100755 backend/go/silero-vad/package.sh
create mode 100755 backend/go/silero-vad/run.sh
rename backend/go/{vad/silero => silero-vad}/vad.go (100%)
delete mode 100644 core/cli/worker/worker_nop2p.go
delete mode 100644 core/p2p/p2p_disabled.go
delete mode 100644 pkg/assets/extract.go
delete mode 100644 pkg/assets/list.go
delete mode 100644 pkg/library/dynaload.go
diff --git a/.devcontainer-scripts/poststart.sh b/.devcontainer-scripts/poststart.sh
index 196e821db..7e65b4c7f 100644
--- a/.devcontainer-scripts/poststart.sh
+++ b/.devcontainer-scripts/poststart.sh
@@ -2,9 +2,6 @@
cd /workspace
-# Grab the pre-stashed backend assets to avoid build issues
-cp -r /build/backend-assets /workspace/backend-assets
-
# Ensures generated source files are present upon load
make prepare
diff --git a/.devcontainer/docker-compose-devcontainer.yml b/.devcontainer/docker-compose-devcontainer.yml
index 65e9b5c1b..81610ade5 100644
--- a/.devcontainer/docker-compose-devcontainer.yml
+++ b/.devcontainer/docker-compose-devcontainer.yml
@@ -4,9 +4,6 @@ services:
context: ..
dockerfile: Dockerfile
target: devcontainer
- args:
- - FFMPEG=true
- - GO_TAGS=p2p tts
env_file:
- ../.env
ports:
diff --git a/.env b/.env
index b0d1a2ad2..53d796bc1 100644
--- a/.env
+++ b/.env
@@ -41,13 +41,6 @@
## Uncomment and set to true to enable rebuilding from source
# REBUILD=true
-## Enable go tags, available: p2p, tts
-## p2p: enable distributed inferencing
-## tts: enables text-to-speech with go-piper
-## (requires REBUILD=true)
-#
-# GO_TAGS=p2p
-
## Path where to store generated images
# LOCALAI_IMAGE_PATH=/tmp/generated/images
diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index cf7536e81..fe08deb2c 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -43,7 +43,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-rerankers'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -55,7 +55,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-llama-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -67,7 +67,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-vllm'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -79,7 +79,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-transformers'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -91,7 +91,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-diffusers'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -104,7 +104,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-kokoro'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -116,7 +116,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -128,7 +128,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-coqui'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -140,7 +140,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-bark'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -152,7 +152,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-chatterbox'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -165,7 +165,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-rerankers'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -177,7 +177,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -189,7 +189,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-vllm'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -201,7 +201,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-transformers'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -213,7 +213,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -226,7 +226,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -238,7 +238,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -250,7 +250,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-coqui'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -262,7 +262,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-bark'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -274,7 +274,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -287,7 +287,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-rerankers'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -299,7 +299,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-llama-cpp'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -311,7 +311,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-vllm'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -323,7 +323,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-transformers'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -335,7 +335,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-diffusers'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -348,7 +348,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-kokoro'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -360,7 +360,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-faster-whisper'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -372,7 +372,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-coqui'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -384,7 +384,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-bark'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -397,7 +397,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-rerankers'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -409,7 +409,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-rerankers'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -421,7 +421,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-llama-cpp'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -433,7 +433,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-llama-cpp'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -445,7 +445,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-vllm'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -457,7 +457,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-vllm'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -469,7 +469,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-transformers'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -481,7 +481,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-transformers'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -493,7 +493,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-diffusers'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -506,7 +506,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-kokoro'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -518,7 +518,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-kokoro'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -530,7 +530,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-faster-whisper'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -542,7 +542,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-faster-whisper'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -554,7 +554,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-coqui'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -566,7 +566,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-coqui'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -578,7 +578,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-bark'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -590,7 +590,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-bark'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -603,7 +603,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64,linux/arm64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-piper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -616,7 +616,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-bark-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -628,7 +628,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64,linux/arm64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-cpu-llama-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -652,7 +652,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-vulkan-llama-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -665,7 +665,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-cpu-stablediffusion-ggml'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -677,7 +677,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -689,7 +689,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-stablediffusion-ggml'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -701,7 +701,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-stablediffusion-ggml'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -713,7 +713,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-stablediffusion-ggml'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -725,7 +725,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-vulkan-stablediffusion-ggml'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -749,8 +749,8 @@ jobs:
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
- platforms: 'linux/amd64'
- tag-latest: 'true'
+ platforms: 'linux/amd64,linux/arm64'
+ tag-latest: 'auto'
tag-suffix: '-cpu-whisper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -762,7 +762,7 @@ jobs:
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-whisper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -774,7 +774,7 @@ jobs:
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-whisper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -786,7 +786,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-whisper'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -798,7 +798,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-whisper'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -810,7 +810,7 @@ jobs:
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
- tag-latest: 'true'
+ tag-latest: 'auto'
tag-suffix: '-gpu-vulkan-whisper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
@@ -842,6 +842,45 @@ jobs:
backend: "whisper"
dockerfile: "./backend/Dockerfile.go"
context: "./"
+ #silero-vad
+ - build-type: ''
+ cuda-major-version: ""
+ cuda-minor-version: ""
+ platforms: 'linux/amd64,linux/arm64'
+ tag-latest: 'auto'
+ tag-suffix: '-cpu-silero-vad'
+ runs-on: 'ubuntu-latest'
+ base-image: "ubuntu:22.04"
+ skip-drivers: 'false'
+ backend: "silero-vad"
+ dockerfile: "./backend/Dockerfile.go"
+ context: "./"
+ # local-store
+ - build-type: ''
+ cuda-major-version: ""
+ cuda-minor-version: ""
+ platforms: 'linux/amd64,linux/arm64'
+ tag-latest: 'auto'
+ tag-suffix: '-cpu-local-store'
+ runs-on: 'ubuntu-latest'
+ base-image: "ubuntu:22.04"
+ skip-drivers: 'false'
+ backend: "local-store"
+ dockerfile: "./backend/Dockerfile.go"
+ context: "./"
+ # huggingface
+ - build-type: ''
+ cuda-major-version: ""
+ cuda-minor-version: ""
+ platforms: 'linux/amd64,linux/arm64'
+ tag-latest: 'auto'
+ tag-suffix: '-huggingface'
+ runs-on: 'ubuntu-latest'
+ base-image: "ubuntu:22.04"
+ skip-drivers: 'false'
+ backend: "huggingface"
+ dockerfile: "./backend/Dockerfile.go"
+ context: "./"
llama-cpp-darwin:
runs-on: macOS-14
strategy:
@@ -866,7 +905,7 @@ jobs:
- name: Build llama-cpp-darwin
run: |
make protogen-go
- make build-api
+ make build
bash scripts/build-llama-cpp-darwin.sh
ls -la build/darwin.tar
mv build/darwin.tar build/llama-cpp.tar
@@ -954,7 +993,7 @@ jobs:
- name: Build llama-cpp-darwin
run: |
make protogen-go
- make build-api
+ make build
export PLATFORMARCH=darwin/amd64
bash scripts/build-llama-cpp-darwin.sh
ls -la build/darwin.tar
diff --git a/.github/workflows/build-test.yaml b/.github/workflows/build-test.yaml
new file mode 100644
index 000000000..095b41822
--- /dev/null
+++ b/.github/workflows/build-test.yaml
@@ -0,0 +1,23 @@
+name: Build test
+
+on:
+ push:
+ branches:
+ - master
+ pull_request:
+
+jobs:
+ build-test:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ - name: Set up Go
+ uses: actions/setup-go@v5
+ with:
+ go-version: 1.23
+ - name: Run GoReleaser
+ run: |
+ make dev-dist
diff --git a/.github/workflows/deploy-explorer.yaml b/.github/workflows/deploy-explorer.yaml
index 9a6d729d9..c2063247f 100644
--- a/.github/workflows/deploy-explorer.yaml
+++ b/.github/workflows/deploy-explorer.yaml
@@ -31,7 +31,7 @@ jobs:
make protogen-go
- name: Build api
run: |
- CGO_ENABLED=0 make build-api
+ CGO_ENABLED=0 make build
- name: rm
uses: appleboy/ssh-action@v1.2.2
with:
diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
index 0a3ed2708..262412237 100644
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -14,7 +14,6 @@ jobs:
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
- ffmpeg: ${{ matrix.ffmpeg }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
@@ -40,8 +39,7 @@ jobs:
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'false'
- tag-suffix: '-gpu-nvidia-cuda12-ffmpeg'
- ffmpeg: 'true'
+ tag-suffix: '-gpu-nvidia-cuda12'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target"
@@ -49,7 +47,6 @@ jobs:
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas'
- ffmpeg: 'false'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'ubuntu-latest'
@@ -59,15 +56,13 @@ jobs:
tag-latest: 'false'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
- tag-suffix: 'sycl-f16-ffmpeg'
- ffmpeg: 'true'
+ tag-suffix: 'sycl-f16'
runs-on: 'ubuntu-latest'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'vulkan'
platforms: 'linux/amd64'
tag-latest: 'false'
- tag-suffix: '-vulkan-ffmpeg-core'
- ffmpeg: 'true'
+ tag-suffix: '-vulkan-core'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target"
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 7339038c3..f97fda6a5 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -18,7 +18,6 @@ jobs:
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
- ffmpeg: ${{ matrix.ffmpeg }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
@@ -40,7 +39,6 @@ jobs:
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-hipblas'
- ffmpeg: 'true'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'ubuntu-latest'
@@ -52,7 +50,6 @@ jobs:
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
- ffmpeg: ${{ matrix.ffmpeg }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
@@ -76,7 +73,6 @@ jobs:
platforms: 'linux/amd64,linux/arm64'
tag-latest: 'auto'
tag-suffix: ''
- ffmpeg: 'true'
base-image: "ubuntu:22.04"
runs-on: 'ubuntu-latest'
aio: "-aio-cpu"
@@ -88,7 +84,6 @@ jobs:
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda11'
- ffmpeg: 'true'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target"
@@ -100,7 +95,6 @@ jobs:
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda12'
- ffmpeg: 'true'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
@@ -110,7 +104,6 @@ jobs:
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-vulkan'
- ffmpeg: 'true'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
@@ -122,7 +115,6 @@ jobs:
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
tag-suffix: '-gpu-intel-f16'
- ffmpeg: 'true'
runs-on: 'ubuntu-latest'
makeflags: "--jobs=3 --output-sync=target"
aio: "-aio-gpu-intel-f16"
@@ -132,7 +124,6 @@ jobs:
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
tag-suffix: '-gpu-intel-f32'
- ffmpeg: 'true'
runs-on: 'ubuntu-latest'
makeflags: "--jobs=3 --output-sync=target"
aio: "-aio-gpu-intel-f32"
@@ -142,7 +133,6 @@ jobs:
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
- ffmpeg: ${{ matrix.ffmpeg }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
@@ -167,7 +157,6 @@ jobs:
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-arm64'
- ffmpeg: 'true'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
runs-on: 'ubuntu-24.04-arm'
makeflags: "--jobs=4 --output-sync=target"
diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
index fe021823b..4e1e19c42 100644
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -37,10 +37,6 @@ on:
description: 'Tag suffix'
default: ''
type: string
- ffmpeg:
- description: 'FFMPEG'
- default: ''
- type: string
skip-drivers:
description: 'Skip drivers by default'
default: 'false'
@@ -236,7 +232,6 @@ jobs:
BUILD_TYPE=${{ inputs.build-type }}
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
- FFMPEG=${{ inputs.ffmpeg }}
BASE_IMAGE=${{ inputs.base-image }}
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
@@ -264,7 +259,6 @@ jobs:
BUILD_TYPE=${{ inputs.build-type }}
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
- FFMPEG=${{ inputs.ffmpeg }}
BASE_IMAGE=${{ inputs.base-image }}
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
diff --git a/.github/workflows/notify-models.yaml b/.github/workflows/notify-models.yaml
index f54b4852d..56846cc86 100644
--- a/.github/workflows/notify-models.yaml
+++ b/.github/workflows/notify-models.yaml
@@ -96,7 +96,7 @@ jobs:
- name: Start LocalAI
run: |
echo "Starting LocalAI..."
- docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
+ docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master run --debug $MODEL_NAME
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
# Check the PR diff using the current branch and the base branch of the PR
- uses: GrantBirki/git-diff-action@v2.8.1
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 366f330ed..96495a1bf 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -1,399 +1,26 @@
-name: Build and Release
+name: goreleaser
on:
push:
- branches:
- - master
tags:
- 'v*'
- pull_request:
-
-env:
- GRPC_VERSION: v1.65.0
-
-permissions:
- contents: write
-
-concurrency:
- group: ci-releases-${{ github.head_ref || github.ref }}-${{ github.repository }}
- cancel-in-progress: true
jobs:
-
- # TODO: temporary disable linux-arm64 build
- # build-linux-arm:
- # runs-on: ubuntu-24.04-arm
- # steps:
- # - name: Free Disk Space (Ubuntu)
- # uses: jlumbroso/free-disk-space@main
- # with:
- # # this might remove tools that are actually needed,
- # # if set to "true" but frees about 6 GB
- # tool-cache: true
- # # all of these default to true, but feel free to set to
- # # "false" if necessary for your workflow
- # android: true
- # dotnet: true
- # haskell: true
- # large-packages: true
- # docker-images: true
- # swap-storage: true
-
- # - name: Release space from worker
- # run: |
- # echo "Listing top largest packages"
- # pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
- # head -n 30 <<< "${pkgs}"
- # echo
- # df -h
- # echo
- # sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
- # sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true
- # sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true
- # sudo rm -rf /usr/local/lib/android
- # sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
- # sudo rm -rf /usr/share/dotnet
- # sudo apt-get remove -y '^mono-.*' || true
- # sudo apt-get remove -y '^ghc-.*' || true
- # sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
- # sudo apt-get remove -y 'php.*' || true
- # sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
- # sudo apt-get remove -y '^google-.*' || true
- # sudo apt-get remove -y azure-cli || true
- # sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
- # sudo apt-get remove -y '^gfortran-.*' || true
- # sudo apt-get remove -y microsoft-edge-stable || true
- # sudo apt-get remove -y firefox || true
- # sudo apt-get remove -y powershell || true
- # sudo apt-get remove -y r-base-core || true
- # sudo apt-get autoremove -y
- # sudo apt-get clean
- # echo
- # echo "Listing top largest packages"
- # pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
- # head -n 30 <<< "${pkgs}"
- # echo
- # sudo rm -rfv build || true
- # sudo rm -rf /usr/share/dotnet || true
- # sudo rm -rf /opt/ghc || true
- # sudo rm -rf "/usr/local/share/boost" || true
- # sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
- # df -h
-
- # - name: Force Install GIT latest
- # run: |
- # sudo apt-get update \
- # && sudo apt-get install -y software-properties-common \
- # && sudo apt-get update \
- # && sudo add-apt-repository -y ppa:git-core/ppa \
- # && sudo apt-get update \
- # && sudo apt-get install -y git
- # - name: Clone
- # uses: actions/checkout@v4
- # with:
- # submodules: true
- # - uses: actions/setup-go@v5
- # with:
- # go-version: '1.21.x'
- # cache: false
- # - name: Dependencies
- # run: |
- # sudo apt-get update
- # sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
- # make install-go-tools
- # - name: Install CUDA Dependencies
- # run: |
- # curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
- # sudo dpkg -i cuda-keyring_1.1-1_all.deb
- # sudo apt-get update
- # sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
- # env:
- # CUDA_VERSION: 12-5
- # - name: Cache grpc
- # id: cache-grpc
- # uses: actions/cache@v4
- # with:
- # path: grpc
- # key: ${{ runner.os }}-grpc-arm64-${{ env.GRPC_VERSION }}
- # - name: Build grpc
- # if: steps.cache-grpc.outputs.cache-hit != 'true'
- # run: |
- # git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
- # cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
- # cd cmake/build && cmake -DgRPC_INSTALL=ON \
- # -DgRPC_BUILD_TESTS=OFF \
- # ../.. && sudo make --jobs 5 --output-sync=target
- # - name: Install gRPC
- # run: |
- # cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
- # # BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
- # - name: Build
- # id: build
- # run: |
- # go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
- # go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
- # export PATH=$PATH:$GOPATH/bin
- # export PATH=/usr/local/cuda/bin:$PATH
- # sudo cp /lib64/ld-linux-aarch64.so.1 ld.so
- # BACKEND_LIBS="./ld.so ./sources/go-piper/piper/build/fi/lib/libfmt.a ./sources/go-piper/piper-phonemize/pi/lib/libonnxruntime.so.1.14.1 ./sources/go-piper/piper-phonemize/pi/src/libespeak-ng/libespeak-ng.so /usr/lib/aarch64-linux-gnu/libdl.so.2 /usr/lib/aarch64-linux-gnu/librt.so.1 /usr/lib/aarch64-linux-gnu/libpthread.so.0 ./sources/go-piper/piper-phonemize/pi/lib/libpiper_phonemize.so.1 ./sources/go-piper/piper/build/si/lib/libspdlog.a ./sources/go-piper/espeak/ei/lib/libucd.so" \
- # make -j4 dist
- # - uses: actions/upload-artifact@v4
- # with:
- # name: LocalAI-linux-arm64
- # path: release/
- # - name: Release
- # uses: softprops/action-gh-release@v2
- # if: startsWith(github.ref, 'refs/tags/')
- # with:
- # files: |
- # release/*
- # - name: Setup tmate session if tests fail
- # if: ${{ failure() }}
- # uses: mxschmitt/action-tmate@v3.22
- # with:
- # detached: true
- # connect-timeout-seconds: 180
- # limit-access-to-actor: true
- build-linux:
+ goreleaser:
runs-on: ubuntu-latest
steps:
- - name: Free Disk Space (Ubuntu)
- uses: jlumbroso/free-disk-space@main
- with:
- # this might remove tools that are actually needed,
- # if set to "true" but frees about 6 GB
- tool-cache: true
- # all of these default to true, but feel free to set to
- # "false" if necessary for your workflow
- android: true
- dotnet: true
- haskell: true
- large-packages: true
- docker-images: true
- swap-storage: true
-
- - name: Release space from worker
- run: |
- echo "Listing top largest packages"
- pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
- head -n 30 <<< "${pkgs}"
- echo
- df -h
- echo
- sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
- sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true
- sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true
- sudo rm -rf /usr/local/lib/android
- sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
- sudo rm -rf /usr/share/dotnet
- sudo apt-get remove -y '^mono-.*' || true
- sudo apt-get remove -y '^ghc-.*' || true
- sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
- sudo apt-get remove -y 'php.*' || true
- sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
- sudo apt-get remove -y '^google-.*' || true
- sudo apt-get remove -y azure-cli || true
- sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
- sudo apt-get remove -y '^gfortran-.*' || true
- sudo apt-get remove -y microsoft-edge-stable || true
- sudo apt-get remove -y firefox || true
- sudo apt-get remove -y powershell || true
- sudo apt-get remove -y r-base-core || true
- sudo apt-get autoremove -y
- sudo apt-get clean
- echo
- echo "Listing top largest packages"
- pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
- head -n 30 <<< "${pkgs}"
- echo
- sudo rm -rfv build || true
- sudo rm -rf /usr/share/dotnet || true
- sudo rm -rf /opt/ghc || true
- sudo rm -rf "/usr/local/share/boost" || true
- sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
- df -h
-
- - name: Force Install GIT latest
- run: |
- sudo apt-get update \
- && sudo apt-get install -y software-properties-common \
- && sudo apt-get update \
- && sudo add-apt-repository -y ppa:git-core/ppa \
- && sudo apt-get update \
- && sudo apt-get install -y git
- - name: Clone
+ - name: Checkout
uses: actions/checkout@v4
with:
- submodules: true
- - uses: actions/setup-go@v5
+ fetch-depth: 0
+ - name: Set up Go
+ uses: actions/setup-go@v5
with:
- go-version: '1.21.x'
- cache: false
- - name: Dependencies
- run: |
- sudo apt-get update
- sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
- make install-go-tools
- - name: Intel Dependencies
- run: |
- wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
- echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
- sudo apt update
- sudo apt install -y intel-basekit
- - name: Install CUDA Dependencies
- run: |
- curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
- sudo dpkg -i cuda-keyring_1.1-1_all.deb
- sudo apt-get update
- sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
+ go-version: 1.23
+ - name: Run GoReleaser
+ uses: goreleaser/goreleaser-action@v6
+ with:
+ version: v2.11.0
+ args: release --clean
env:
- CUDA_VERSION: 12-5
- - name: "Install Hipblas"
- env:
- ROCM_VERSION: "6.1"
- AMDGPU_VERSION: "6.1"
- run: |
- set -ex
-
- sudo apt-get update
- sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg
-
- sudo apt update
- wget https://repo.radeon.com/amdgpu-install/6.4.1/ubuntu/noble/amdgpu-install_6.4.60401-1_all.deb
- sudo apt install ./amdgpu-install_6.4.60401-1_all.deb
- sudo apt update
-
- sudo amdgpu-install --usecase=rocm
-
- sudo apt-get clean
- sudo rm -rf /var/lib/apt/lists/*
- sudo ldconfig
- - name: Cache grpc
- id: cache-grpc
- uses: actions/cache@v4
- with:
- path: grpc
- key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
- - name: Build grpc
- if: steps.cache-grpc.outputs.cache-hit != 'true'
- run: |
- git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
- cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
- cd cmake/build && cmake -DgRPC_INSTALL=ON \
- -DgRPC_BUILD_TESTS=OFF \
- ../.. && sudo make --jobs 5 --output-sync=target
- - name: Install gRPC
- run: |
- cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
- # BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
- - name: Build
- id: build
- run: |
- go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
- go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
- export PATH=$PATH:$GOPATH/bin
- export PATH=/usr/local/cuda/bin:$PATH
- export PATH=/opt/rocm/bin:$PATH
- source /opt/intel/oneapi/setvars.sh
- sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
- make -j4 dist
- - uses: actions/upload-artifact@v4
- with:
- name: LocalAI-linux
- path: release/
- - name: Release
- uses: softprops/action-gh-release@v2
- if: startsWith(github.ref, 'refs/tags/')
- with:
- files: |
- release/*
- - name: Setup tmate session if tests fail
- if: ${{ failure() }}
- uses: mxschmitt/action-tmate@v3.22
- with:
- detached: true
- connect-timeout-seconds: 180
- limit-access-to-actor: true
-
-
- build-macOS-x86_64:
- runs-on: macos-13
- steps:
- - name: Clone
- uses: actions/checkout@v4
- with:
- submodules: true
- - uses: actions/setup-go@v5
- with:
- go-version: '1.21.x'
- cache: false
- - name: Dependencies
- run: |
- brew install protobuf grpc
- make install-go-tools
- - name: Build
- id: build
- run: |
- export C_INCLUDE_PATH=/usr/local/include
- export CPLUS_INCLUDE_PATH=/usr/local/include
- export PATH=$PATH:$GOPATH/bin
- export SKIP_GRPC_BACKEND=backend-assets/grpc/whisper
- make dist
- - uses: actions/upload-artifact@v4
- with:
- name: LocalAI-MacOS-x86_64
- path: release/
- - name: Release
- uses: softprops/action-gh-release@v2
- if: startsWith(github.ref, 'refs/tags/')
- with:
- files: |
- release/*
- - name: Setup tmate session if tests fail
- if: ${{ failure() }}
- uses: mxschmitt/action-tmate@v3.22
- with:
- detached: true
- connect-timeout-seconds: 180
- limit-access-to-actor: true
-
- build-macOS-arm64:
- runs-on: macos-14
- steps:
- - name: Clone
- uses: actions/checkout@v4
- with:
- submodules: true
- - uses: actions/setup-go@v5
- with:
- go-version: '1.21.x'
- cache: false
- - name: Dependencies
- run: |
- brew install protobuf grpc libomp llvm
- make install-go-tools
- - name: Build
- id: build
- run: |
- export C_INCLUDE_PATH=/usr/local/include
- export CPLUS_INCLUDE_PATH=/usr/local/include
- export PATH=$PATH:$GOPATH/bin
- export CC=/opt/homebrew/opt/llvm/bin/clang
- make dist
- - uses: actions/upload-artifact@v4
- with:
- name: LocalAI-MacOS-arm64
- path: release/
- - name: Release
- uses: softprops/action-gh-release@v2
- if: startsWith(github.ref, 'refs/tags/')
- with:
- files: |
- release/*
- - name: Setup tmate session if tests fail
- if: ${{ failure() }}
- uses: mxschmitt/action-tmate@v3.22
- with:
- detached: true
- connect-timeout-seconds: 180
- limit-access-to-actor: true
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
\ No newline at end of file
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index cc6ef333d..8a3f89871 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -75,7 +75,6 @@ jobs:
rm protoc.zip
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
- go install github.com/GeertJohan/go.rice/rice@latest
PATH="$PATH:$HOME/go/bin" make protogen-go
- name: Dependencies
run: |
@@ -103,7 +102,7 @@ jobs:
make -C backend/python/transformers
- make backends/llama-cpp backends/piper backends/whisper backends/stablediffusion-ggml
+ make backends/huggingface backends/llama-cpp backends/local-store backends/silero-vad backends/piper backends/whisper backends/stablediffusion-ggml
env:
CUDA_VERSION: 12-4
- name: Test
@@ -164,11 +163,10 @@ jobs:
rm protoc.zip
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
- go install github.com/GeertJohan/go.rice/rice@latest
PATH="$PATH:$HOME/go/bin" make protogen-go
- name: Test
run: |
- PATH="$PATH:$HOME/go/bin" make backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
+ PATH="$PATH:$HOME/go/bin" make backends/local-store backends/silero-vad backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.22
@@ -199,11 +197,10 @@ jobs:
run: |
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
pip install --user --no-cache-dir grpcio-tools==1.71.0 grpcio==1.71.0
- go install github.com/GeertJohan/go.rice/rice@latest
- name: Build llama-cpp-darwin
run: |
make protogen-go
- make build-api
+ make build
bash scripts/build-llama-cpp-darwin.sh
ls -la build/darwin.tar
mv build/darwin.tar build/llama-cpp.tar
diff --git a/.gitignore b/.gitignore
index 1f160c8ce..f6d83447e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,8 @@ prepare-sources
/backend/cpp/llama-*
!backend/cpp/llama-cpp
/backends
+/backend-images
+/result.yaml
*.log
diff --git a/.goreleaser.yaml b/.goreleaser.yaml
new file mode 100644
index 000000000..5bd6aa0bc
--- /dev/null
+++ b/.goreleaser.yaml
@@ -0,0 +1,33 @@
+version: 2
+before:
+ hooks:
+ - make protogen-go
+ - go mod tidy
+dist: release
+source:
+ enabled: true
+ name_template: '{{ .ProjectName }}-{{ .Tag }}-source'
+builds:
+ -
+ env:
+ - CGO_ENABLED=0
+ ldflags:
+ - -s -w
+ - -X "github.com/mudler/LocalAI/internal.Version={{ .Tag }}"
+ - -X "github.com/mudler/LocalAI/internal.Commit={{ .FullCommit }}"
+ goos:
+ - linux
+ - darwin
+ #- windows
+ goarch:
+ - amd64
+ - arm64
+archives:
+ - formats: [ 'binary' ] # this removes the tar of the archives, leaving the binaries alone
+ name_template: local-ai-{{ .Tag }}-{{ .Os }}-{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}
+checksum:
+ name_template: '{{ .ProjectName }}-{{ .Tag }}-checksums.txt'
+snapshot:
+ version_template: "{{ .Tag }}-next"
+changelog:
+ use: github-native
diff --git a/.vscode/launch.json b/.vscode/launch.json
index f5e91508e..55da767b4 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -26,7 +26,7 @@
"LOCALAI_P2P": "true",
"LOCALAI_FEDERATED": "true"
},
- "buildFlags": ["-tags", "p2p tts", "-v"],
+ "buildFlags": ["-tags", "", "-v"],
"envFile": "${workspaceFolder}/.env",
"cwd": "${workspaceRoot}"
}
diff --git a/Dockerfile b/Dockerfile
index 91e8aea5a..4e8e29cb2 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -142,10 +142,9 @@ EOT
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
-# Install grpc compilers and rice
+# Install grpc compilers
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
- go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af && \
- go install github.com/GeertJohan/go.rice/rice@latest
+ go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
RUN update-ca-certificates
@@ -194,7 +193,7 @@ RUN apt-get update && \
FROM build-requirements AS builder-base
-ARG GO_TAGS="p2p"
+ARG GO_TAGS=""
ARG GRPC_BACKENDS
ARG MAKEFLAGS
ARG LD_FLAGS="-s -w"
@@ -249,8 +248,7 @@ COPY ./pkg/utils ./pkg/utils
COPY ./pkg/langchain ./pkg/langchain
RUN ls -l ./
-RUN make backend-assets
-RUN make grpcs
+RUN make protogen-go
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
# Adjustments to the build process should likely be made here.
diff --git a/Earthfile b/Earthfile
deleted file mode 100644
index 218768c9a..000000000
--- a/Earthfile
+++ /dev/null
@@ -1,5 +0,0 @@
-VERSION 0.7
-
-build:
- FROM DOCKERFILE -f Dockerfile .
- SAVE ARTIFACT /usr/bin/local-ai AS LOCAL local-ai
diff --git a/Makefile b/Makefile
index 6f2be04d7..6f7f4fcb2 100644
--- a/Makefile
+++ b/Makefile
@@ -3,9 +3,7 @@ GOTEST=$(GOCMD) test
GOVET=$(GOCMD) vet
BINARY_NAME=local-ai
-ONNX_VERSION?=1.20.0
-ONNX_ARCH?=x64
-ONNX_OS?=linux
+GORELEASER?=
export BUILD_TYPE?=
@@ -35,77 +33,33 @@ WHITE := $(shell tput -Txterm setaf 7)
CYAN := $(shell tput -Txterm setaf 6)
RESET := $(shell tput -Txterm sgr0)
-UPX?=
-# check if upx exists
-ifeq (, $(shell which upx))
- UPX=
-else
- UPX=$(shell which upx)
-endif
-
# Default Docker bridge IP
E2E_BRIDGE_IP?=172.17.0.1
ifndef UNAME_S
UNAME_S := $(shell uname -s)
endif
-# Detect if we are running on arm64
-ifneq (,$(findstring aarch64,$(shell uname -m)))
- ONNX_ARCH=aarch64
-endif
ifeq ($(OS),Darwin)
- ONNX_OS=osx
- ifneq (,$(findstring aarch64,$(shell uname -m)))
- ONNX_ARCH=arm64
- else ifneq (,$(findstring arm64,$(shell uname -m)))
- ONNX_ARCH=arm64
- else
- ONNX_ARCH=x86_64
- endif
-
ifeq ($(OSX_SIGNING_IDENTITY),)
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
endif
endif
-ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
-ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
-ALL_GRPC_BACKENDS+=backend-assets/grpc/silero-vad
-ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
-# Use filter-out to remove the specified backends
-ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
+# check if goreleaser exists
+ifeq (, $(shell which goreleaser))
+ GORELEASER=curl -sfL https://goreleaser.com/static/run | bash -s --
+else
+ GORELEASER=$(shell which goreleaser)
+endif
-GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
TEST_PATHS?=./api/... ./pkg/... ./core/...
-# If empty, then we build all
-ifeq ($(GRPC_BACKENDS),)
- GRPC_BACKENDS=$(ALL_GRPC_BACKENDS)
-endif
-
-ifeq ($(BUILD_API_ONLY),true)
- GRPC_BACKENDS=
-endif
.PHONY: all test build vendor
all: help
-sources/onnxruntime:
- mkdir -p sources/onnxruntime
- curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
- cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
- cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./
-
-backend-assets/lib/libonnxruntime.so.1: backend-assets/lib sources/onnxruntime
- cp -rfv sources/onnxruntime/lib/* backend-assets/lib/
-ifeq ($(OS),Darwin)
- mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib
-else
- mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
-endif
-
## GENERIC
rebuild: ## Rebuilds the project
$(GOCMD) clean -cache
@@ -116,58 +70,33 @@ clean: ## Remove build related file
rm -f prepare
rm -rf $(BINARY_NAME)
rm -rf release/
- rm -rf backend-assets/*
- $(MAKE) -C backend/cpp/grpc clean
$(MAKE) protogen-clean
rmdir pkg/grpc/proto || true
clean-tests:
rm -rf test-models
rm -rf test-dir
- rm -rf core/http/backend-assets
-
-clean-dc: clean
- cp -r /build/backend-assets /workspace/backend-assets
## Install Go tools
install-go-tools:
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
- go install github.com/GeertJohan/go.rice/rice@latest
## Build:
-build: backend-assets grpcs install-go-tools ## Build the project
+build: protogen-go install-go-tools ## Build the project
$(info ${GREEN}I local-ai build info:${RESET})
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
-ifneq ($(BACKEND_LIBS),)
- $(MAKE) backend-assets/lib
- cp -f $(BACKEND_LIBS) backend-assets/lib/
-endif
rm -rf $(BINARY_NAME) || true
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
- rice append --exec $(BINARY_NAME)
-build-api:
- BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=p2p $(MAKE) build
-
-backend-assets/lib:
- mkdir -p backend-assets/lib
+dev-dist:
+ $(GORELEASER) build --snapshot --clean
dist:
- GO_TAGS="p2p" $(MAKE) build
- GO_TAGS="p2p" STATIC=true $(MAKE) build
- mkdir -p release
-# if BUILD_ID is empty, then we don't append it to the binary name
-ifeq ($(BUILD_ID),)
- cp $(BINARY_NAME) release/$(BINARY_NAME)-$(OS)-$(ARCH)
- shasum -a 256 release/$(BINARY_NAME)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(OS)-$(ARCH).sha256
-else
- cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)
- shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH).sha256
-endif
+ $(GORELEASER) build --clean
osx-signed: build
codesign --deep --force --sign "$(OSX_SIGNING_IDENTITY)" --entitlements "./Entitlements.plist" "./$(BINARY_NAME)"
@@ -185,8 +114,7 @@ test-models/testmodel.ggml:
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
cp tests/models_fixtures/* test-models
-prepare-test: grpcs
- cp -rf backend-assets core/http
+prepare-test: protogen-go
cp tests/models_fixtures/* test-models
########################################################
@@ -194,7 +122,7 @@ prepare-test: grpcs
########################################################
## Test targets
-test: test-models/testmodel.ggml grpcs
+test: test-models/testmodel.ggml protogen-go
@echo 'Running tests'
export GO_TAGS="debug"
$(MAKE) prepare-test
@@ -204,17 +132,26 @@ test: test-models/testmodel.ggml grpcs
$(MAKE) test-tts
$(MAKE) test-stablediffusion
-backends/llama-cpp: docker-build-llama-cpp docker-save-llama-cpp build-api
+backends/llama-cpp: docker-build-llama-cpp docker-save-llama-cpp build
./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"
-backends/piper: docker-build-piper docker-save-piper build-api
+backends/piper: docker-build-piper docker-save-piper build
./local-ai backends install "ocifile://$(abspath ./backend-images/piper.tar)"
-backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-stablediffusion-ggml build-api
+backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-stablediffusion-ggml build
./local-ai backends install "ocifile://$(abspath ./backend-images/stablediffusion-ggml.tar)"
-backends/whisper: docker-build-whisper docker-save-whisper build-api
+backends/whisper: docker-build-whisper docker-save-whisper build
./local-ai backends install "ocifile://$(abspath ./backend-images/whisper.tar)"
+
+backends/silero-vad: docker-build-silero-vad docker-save-silero-vad build
+ ./local-ai backends install "ocifile://$(abspath ./backend-images/silero-vad.tar)"
+
+backends/local-store: docker-build-local-store docker-save-local-store build
+ ./local-ai backends install "ocifile://$(abspath ./backend-images/local-store.tar)"
+
+backends/huggingface: docker-build-huggingface docker-save-huggingface build
+ ./local-ai backends install "ocifile://$(abspath ./backend-images/huggingface.tar)"
########################################################
## AIO tests
@@ -243,7 +180,7 @@ prepare-e2e:
mkdir -p $(TEST_DIR)
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
- docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 --build-arg FFMPEG=true -t localai-tests .
+ docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 -t localai-tests .
run-e2e-image:
ls -liah $(abspath ./tests/e2e-fixtures)
@@ -275,9 +212,7 @@ test-stablediffusion: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
-test-stores: backend-assets/grpc/local-store
- mkdir -p tests/integration/backend-assets/grpc
- cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/
+test-stores:
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts $(TEST_FLAKES) -v -r tests/integration
test-container:
@@ -310,10 +245,42 @@ protogen: protogen-go protogen-python
.PHONY: protogen-clean
protogen-clean: protogen-go-clean protogen-python-clean
+protoc:
+ @OS_NAME=$$(uname -s | tr '[:upper:]' '[:lower:]'); \
+ ARCH_NAME=$$(uname -m); \
+ if [ "$$OS_NAME" = "darwin" ]; then \
+ if [ "$$ARCH_NAME" = "arm64" ]; then \
+ FILE=protoc-31.1-osx-aarch_64.zip; \
+ elif [ "$$ARCH_NAME" = "x86_64" ]; then \
+ FILE=protoc-31.1-osx-x86_64.zip; \
+ else \
+ echo "Unsupported macOS architecture: $$ARCH_NAME"; exit 1; \
+ fi; \
+ elif [ "$$OS_NAME" = "linux" ]; then \
+ if [ "$$ARCH_NAME" = "x86_64" ]; then \
+ FILE=protoc-31.1-linux-x86_64.zip; \
+ elif [ "$$ARCH_NAME" = "aarch64" ] || [ "$$ARCH_NAME" = "arm64" ]; then \
+ FILE=protoc-31.1-linux-aarch_64.zip; \
+ elif [ "$$ARCH_NAME" = "ppc64le" ]; then \
+ FILE=protoc-31.1-linux-ppcle_64.zip; \
+ elif [ "$$ARCH_NAME" = "s390x" ]; then \
+ FILE=protoc-31.1-linux-s390_64.zip; \
+ elif [ "$$ARCH_NAME" = "i386" ] || [ "$$ARCH_NAME" = "x86" ]; then \
+ FILE=protoc-31.1-linux-x86_32.zip; \
+ else \
+ echo "Unsupported Linux architecture: $$ARCH_NAME"; exit 1; \
+ fi; \
+ else \
+ echo "Unsupported OS: $$OS_NAME"; exit 1; \
+ fi; \
+ URL=https://github.com/protocolbuffers/protobuf/releases/download/v31.1/$$FILE; \
+ curl -L -s $$URL -o protoc.zip && \
+ unzip -j -d $(CURDIR) protoc.zip bin/protoc && rm protoc.zip
+
.PHONY: protogen-go
-protogen-go: install-go-tools
+protogen-go: protoc install-go-tools
mkdir -p pkg/grpc/proto
- protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
+ ./protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
backend/backend.proto
.PHONY: protogen-go-clean
@@ -407,19 +374,6 @@ vllm-protogen:
vllm-protogen-clean:
$(MAKE) -C backend/python/vllm protogen-clean
-## GRPC
-# Note: it is duplicated in the Dockerfile
-prepare-extra-conda-environments: protogen-python
- $(MAKE) -C backend/python/bark
- $(MAKE) -C backend/python/coqui
- $(MAKE) -C backend/python/diffusers
- $(MAKE) -C backend/python/chatterbox
- $(MAKE) -C backend/python/faster-whisper
- $(MAKE) -C backend/python/vllm
- $(MAKE) -C backend/python/rerankers
- $(MAKE) -C backend/python/transformers
- $(MAKE) -C backend/python/kokoro
- $(MAKE) -C backend/python/exllama2
prepare-test-extra: protogen-python
$(MAKE) -C backend/python/transformers
@@ -433,37 +387,6 @@ test-extra: prepare-test-extra
$(MAKE) -C backend/python/chatterbox test
$(MAKE) -C backend/python/vllm test
-backend-assets:
- mkdir -p backend-assets
-ifeq ($(BUILD_API_ONLY),true)
- touch backend-assets/keep
-endif
-
-
-backend-assets/grpc:
- mkdir -p backend-assets/grpc
-
-backend-assets/grpc/huggingface: protogen-go backend-assets/grpc
- $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
-ifneq ($(UPX),)
- $(UPX) backend-assets/grpc/huggingface
-endif
-
-backend-assets/grpc/silero-vad: protogen-go backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
- CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
- $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
-ifneq ($(UPX),)
- $(UPX) backend-assets/grpc/silero-vad
-endif
-
-backend-assets/grpc/local-store: backend-assets/grpc protogen-go
- $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
-ifneq ($(UPX),)
- $(UPX) backend-assets/grpc/local-store
-endif
-
-grpcs: protogen-go $(GRPC_BACKENDS)
-
DOCKER_IMAGE?=local-ai
DOCKER_AIO_IMAGE?=local-ai-aio
IMAGE_TYPE?=core
@@ -506,7 +429,6 @@ docker-image-intel:
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="$(GO_TAGS)" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
- --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" \
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
docker-image-intel-xpu:
@@ -515,7 +437,6 @@ docker-image-intel-xpu:
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="$(GO_TAGS)" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
- --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" \
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
########################################################
@@ -534,6 +455,24 @@ docker-build-bark-cpp:
docker-build-piper:
docker build -t local-ai-backend:piper -f backend/Dockerfile.go --build-arg BACKEND=piper .
+docker-build-local-store:
+ docker build -t local-ai-backend:local-store -f backend/Dockerfile.go --build-arg BACKEND=local-store .
+
+docker-build-huggingface:
+ docker build -t local-ai-backend:huggingface -f backend/Dockerfile.go --build-arg BACKEND=huggingface .
+
+docker-save-huggingface: backend-images
+ docker save local-ai-backend:huggingface -o backend-images/huggingface.tar
+
+docker-save-local-store: backend-images
+ docker save local-ai-backend:local-store -o backend-images/local-store.tar
+
+docker-build-silero-vad:
+ docker build -t local-ai-backend:silero-vad -f backend/Dockerfile.go --build-arg BACKEND=silero-vad .
+
+docker-save-silero-vad: backend-images
+ docker save local-ai-backend:silero-vad -o backend-images/silero-vad.tar
+
docker-save-piper: backend-images
docker save local-ai-backend:piper -o backend-images/piper.tar
diff --git a/assets.go b/assets.go
deleted file mode 100644
index b3c813871..000000000
--- a/assets.go
+++ /dev/null
@@ -1,15 +0,0 @@
-package main
-
-import (
- rice "github.com/GeertJohan/go.rice"
-)
-
-var backendAssets *rice.Box
-
-func init() {
- var err error
- backendAssets, err = rice.FindBox("backend-assets")
- if err != nil {
- panic(err)
- }
-}
diff --git a/backend/cpp/llama-cpp/run.sh b/backend/cpp/llama-cpp/run.sh
index 17a1d0df9..dde3161fa 100755
--- a/backend/cpp/llama-cpp/run.sh
+++ b/backend/cpp/llama-cpp/run.sh
@@ -44,7 +44,7 @@ fi
if [ "$(uname)" == "Darwin" ]; then
DYLD_FALLBACK_LIBRARY_PATH=$CURDIR/lib:$DYLD_FALLBACK_LIBRARY_PATH
else
- LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
+ export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi
# If there is a lib/ld.so, use it
diff --git a/backend/go/huggingface/Makefile b/backend/go/huggingface/Makefile
new file mode 100644
index 000000000..77b6c82ed
--- /dev/null
+++ b/backend/go/huggingface/Makefile
@@ -0,0 +1,9 @@
+GOCMD=go
+
+huggingface:
+ CGO_ENABLED=0 $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o huggingface ./
+
+package:
+ bash package.sh
+
+build: huggingface package
\ No newline at end of file
diff --git a/backend/go/llm/langchain/langchain.go b/backend/go/huggingface/langchain.go
similarity index 100%
rename from backend/go/llm/langchain/langchain.go
rename to backend/go/huggingface/langchain.go
diff --git a/backend/go/llm/langchain/main.go b/backend/go/huggingface/main.go
similarity index 100%
rename from backend/go/llm/langchain/main.go
rename to backend/go/huggingface/main.go
diff --git a/backend/go/huggingface/package.sh b/backend/go/huggingface/package.sh
new file mode 100755
index 000000000..6218a65f6
--- /dev/null
+++ b/backend/go/huggingface/package.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+# Script to copy the appropriate libraries based on architecture
+# This script is used in the final stage of the Dockerfile
+
+set -e
+
+CURDIR=$(dirname "$(realpath $0)")
+
+mkdir -p $CURDIR/package
+cp -avrf $CURDIR/huggingface $CURDIR/package/
+cp -rfv $CURDIR/run.sh $CURDIR/package/
\ No newline at end of file
diff --git a/backend/go/huggingface/run.sh b/backend/go/huggingface/run.sh
new file mode 100755
index 000000000..08972b5d2
--- /dev/null
+++ b/backend/go/huggingface/run.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+set -ex
+
+CURDIR=$(dirname "$(realpath $0)")
+
+exec $CURDIR/huggingface "$@"
\ No newline at end of file
diff --git a/backend/go/local-store/Makefile b/backend/go/local-store/Makefile
new file mode 100644
index 000000000..6cde84b00
--- /dev/null
+++ b/backend/go/local-store/Makefile
@@ -0,0 +1,9 @@
+GOCMD=go
+
+local-store:
+ CGO_ENABLED=0 $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o local-store ./
+
+package:
+ bash package.sh
+
+build: local-store package
\ No newline at end of file
diff --git a/backend/go/stores/debug.go b/backend/go/local-store/debug.go
similarity index 100%
rename from backend/go/stores/debug.go
rename to backend/go/local-store/debug.go
diff --git a/backend/go/stores/main.go b/backend/go/local-store/main.go
similarity index 100%
rename from backend/go/stores/main.go
rename to backend/go/local-store/main.go
diff --git a/backend/go/local-store/package.sh b/backend/go/local-store/package.sh
new file mode 100755
index 000000000..af94e0ee7
--- /dev/null
+++ b/backend/go/local-store/package.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+# Script to copy the appropriate libraries based on architecture
+# This script is used in the final stage of the Dockerfile
+
+set -e
+
+CURDIR=$(dirname "$(realpath $0)")
+
+mkdir -p $CURDIR/package
+cp -avrf $CURDIR/local-store $CURDIR/package/
+cp -rfv $CURDIR/run.sh $CURDIR/package/
\ No newline at end of file
diff --git a/backend/go/stores/production.go b/backend/go/local-store/production.go
similarity index 100%
rename from backend/go/stores/production.go
rename to backend/go/local-store/production.go
diff --git a/backend/go/local-store/run.sh b/backend/go/local-store/run.sh
new file mode 100755
index 000000000..479f3b486
--- /dev/null
+++ b/backend/go/local-store/run.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+set -ex
+
+CURDIR=$(dirname "$(realpath $0)")
+
+exec $CURDIR/local-store "$@"
\ No newline at end of file
diff --git a/backend/go/stores/store.go b/backend/go/local-store/store.go
similarity index 99%
rename from backend/go/stores/store.go
rename to backend/go/local-store/store.go
index c8788a9c7..1fa0b2ef6 100644
--- a/backend/go/stores/store.go
+++ b/backend/go/local-store/store.go
@@ -4,6 +4,7 @@ package main
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"container/heap"
+ "errors"
"fmt"
"math"
"slices"
@@ -99,6 +100,9 @@ func sortIntoKeySlicese(keys []*pb.StoresKey) [][]float32 {
}
func (s *Store) Load(opts *pb.ModelOptions) error {
+ if opts.Model != "" {
+ return errors.New("not implemented")
+ }
return nil
}
@@ -315,7 +319,7 @@ func isNormalized(k []float32) bool {
for _, v := range k {
v64 := float64(v)
- sum += v64*v64
+ sum += v64 * v64
}
s := math.Sqrt(sum)
diff --git a/backend/go/silero-vad/Makefile b/backend/go/silero-vad/Makefile
new file mode 100644
index 000000000..93fd6b4c9
--- /dev/null
+++ b/backend/go/silero-vad/Makefile
@@ -0,0 +1,47 @@
+
+CURRENT_DIR=$(abspath ./)
+GOCMD=go
+
+ONNX_VERSION?=1.20.0
+ONNX_ARCH?=x64
+ONNX_OS?=linux
+
+# Detect if we are running on arm64
+ifneq (,$(findstring aarch64,$(shell uname -m)))
+ ONNX_ARCH=aarch64
+endif
+
+ifeq ($(OS),Darwin)
+ ONNX_OS=osx
+ ifneq (,$(findstring aarch64,$(shell uname -m)))
+ ONNX_ARCH=arm64
+ else ifneq (,$(findstring arm64,$(shell uname -m)))
+ ONNX_ARCH=arm64
+ else
+ ONNX_ARCH=x86_64
+ endif
+endif
+
+sources/onnxruntime:
+ mkdir -p sources/onnxruntime
+ curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
+ cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
+ cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./
+
+backend-assets/lib/libonnxruntime.so.1: sources/onnxruntime
+ mkdir -p backend-assets/lib
+ cp -rfLv sources/onnxruntime/lib/* backend-assets/lib/
+ifeq ($(OS),Darwin)
+ mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib
+else
+ mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
+endif
+
+silero-vad: backend-assets/lib/libonnxruntime.so.1
+ CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURRENT_DIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURRENT_DIR)/backend-assets/lib \
+ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o silero-vad ./
+
+package:
+ bash package.sh
+
+build: silero-vad package
\ No newline at end of file
diff --git a/backend/go/vad/silero/main.go b/backend/go/silero-vad/main.go
similarity index 100%
rename from backend/go/vad/silero/main.go
rename to backend/go/silero-vad/main.go
diff --git a/backend/go/silero-vad/package.sh b/backend/go/silero-vad/package.sh
new file mode 100755
index 000000000..1c524000c
--- /dev/null
+++ b/backend/go/silero-vad/package.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+# Script to copy the appropriate libraries based on architecture
+# This script is used in the final stage of the Dockerfile
+
+set -e
+
+CURDIR=$(dirname "$(realpath $0)")
+
+# Create lib directory
+mkdir -p $CURDIR/package/lib
+
+cp -avrf $CURDIR/silero-vad $CURDIR/package/
+cp -avrf $CURDIR/run.sh $CURDIR/package/
+cp -rfLv $CURDIR/backend-assets/lib/* $CURDIR/package/lib/
+
+# Detect architecture and copy appropriate libraries
+if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
+ # x86_64 architecture
+ echo "Detected x86_64 architecture, copying x86_64 libraries..."
+ cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
+ cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
+ cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
+ cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
+ cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
+ cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
+ cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
+ cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
+ cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
+ cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
+ cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
+elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
+ # ARM64 architecture
+ echo "Detected ARM64 architecture, copying ARM64 libraries..."
+ cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
+ cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
+ cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
+ cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
+ cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
+ cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
+ cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
+ cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
+ cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
+ cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
+ cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
+else
+ echo "Error: Could not detect architecture"
+ exit 1
+fi
+
+echo "Packaging completed successfully"
+ls -liah $CURDIR/package/
+ls -liah $CURDIR/package/lib/
\ No newline at end of file
diff --git a/backend/go/silero-vad/run.sh b/backend/go/silero-vad/run.sh
new file mode 100755
index 000000000..72658908a
--- /dev/null
+++ b/backend/go/silero-vad/run.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+set -ex
+
+CURDIR=$(dirname "$(realpath $0)")
+
+export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
+
+# If there is a lib/ld.so, use it
+if [ -f $CURDIR/lib/ld.so ]; then
+ echo "Using lib/ld.so"
+ exec $CURDIR/lib/ld.so $CURDIR/silero-vad "$@"
+fi
+
+exec $CURDIR/silero-vad "$@"
\ No newline at end of file
diff --git a/backend/go/vad/silero/vad.go b/backend/go/silero-vad/vad.go
similarity index 100%
rename from backend/go/vad/silero/vad.go
rename to backend/go/silero-vad/vad.go
diff --git a/backend/index.yaml b/backend/index.yaml
index 6451bfd77..608854605 100644
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -68,7 +68,7 @@
default: "cpu-stablediffusion-ggml"
nvidia: "cuda12-stablediffusion-ggml"
intel: "intel-sycl-f16-stablediffusion-ggml"
- #amd: "rocm-stablediffusion-ggml"
+ # amd: "rocm-stablediffusion-ggml"
vulkan: "vulkan-stablediffusion-ggml"
nvidia-l4t: "nvidia-l4t-arm64-stablediffusion-ggml"
# metal: "metal-stablediffusion-ggml"
@@ -285,6 +285,54 @@
tags:
- text-to-speech
- TTS
+- &silero-vad
+ name: "silero-vad"
+ uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-silero-vad"
+ icon: https://user-images.githubusercontent.com/12515440/89997349-b3523080-dc94-11ea-9906-ca2e8bc50535.png
+ urls:
+ - https://github.com/snakers4/silero-vad
+ description: |
+ Silero VAD: pre-trained enterprise-grade Voice Activity Detector.
+ Silero VAD is a voice activity detection model that can be used to detect whether a given audio contains speech or not.
+ tags:
+ - voice-activity-detection
+ - VAD
+ - silero-vad
+ - CPU
+- &local-store
+ name: "local-store"
+ uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-local-store"
+ urls:
+ - https://github.com/mudler/LocalAI
+ description: |
+ Local Store is a local-first, self-hosted, and open-source vector database.
+ tags:
+ - vector-database
+ - local-first
+ - open-source
+ - CPU
+ license: MIT
+- &huggingface
+ name: "huggingface"
+ uri: "quay.io/go-skynet/local-ai-backends:latest-huggingface"
+ icon: https://huggingface.co/front/assets/huggingface_logo-noborder.svg
+ urls:
+ - https://huggingface.co/docs/hub/en/api
+ description: |
+ HuggingFace is a backend which uses the huggingface API to run models.
+ tags:
+ - LLM
+ - huggingface
+ license: MIT
+- !!merge <<: *huggingface
+ name: "huggingface-development"
+ uri: "quay.io/go-skynet/local-ai-backends:master-huggingface"
+- !!merge <<: *local-store
+ name: "local-store-development"
+ uri: "quay.io/go-skynet/local-ai-backends:master-cpu-local-store"
+- !!merge <<: *silero-vad
+ name: "silero-vad-development"
+ uri: "quay.io/go-skynet/local-ai-backends:master-cpu-silero-vad"
- !!merge <<: *piper
name: "piper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-piper"
diff --git a/core/application/startup.go b/core/application/startup.go
index 1fdd1ad50..59003799b 100644
--- a/core/application/startup.go
+++ b/core/application/startup.go
@@ -9,9 +9,7 @@ import (
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/internal"
- "github.com/mudler/LocalAI/pkg/assets"
- "github.com/mudler/LocalAI/pkg/library"
"github.com/mudler/LocalAI/pkg/model"
pkgStartup "github.com/mudler/LocalAI/pkg/startup"
"github.com/mudler/LocalAI/pkg/xsysinfo"
@@ -103,23 +101,6 @@ func New(opts ...config.AppOption) (*Application, error) {
}
}
- if options.AssetsDestination != "" {
- // Extract files from the embedded FS
- err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination)
- log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination)
- if err != nil {
- log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
- }
- }
-
- if options.LibPath != "" {
- // If there is a lib directory, set LD_LIBRARY_PATH to include it
- err := library.LoadExternal(options.LibPath)
- if err != nil {
- log.Error().Err(err).Str("LibPath", options.LibPath).Msg("Error while loading external libraries")
- }
- }
-
// turn off any process that was started by GRPC if the context is canceled
go func() {
<-options.Context.Done()
diff --git a/core/backend/options.go b/core/backend/options.go
index 7f4623c2d..cfe7b35e4 100644
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -20,7 +20,6 @@ func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts ...
defOpts := []model.Option{
model.WithBackendString(c.Backend),
model.WithModel(c.Model),
- model.WithAssetDir(so.AssetsDestination),
model.WithContext(so.Context),
model.WithModelID(name),
}
diff --git a/core/backend/stores.go b/core/backend/stores.go
index f5ee9166d..78257180e 100644
--- a/core/backend/stores.go
+++ b/core/backend/stores.go
@@ -7,14 +7,12 @@ import (
"github.com/mudler/LocalAI/pkg/model"
)
-func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string) (grpc.Backend, error) {
- if storeName == "" {
- storeName = "default"
+func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string, backend string) (grpc.Backend, error) {
+ if backend == "" {
+ backend = model.LocalStoreBackend
}
-
sc := []model.Option{
- model.WithBackendString(model.LocalStoreBackend),
- model.WithAssetDir(appConfig.AssetsDestination),
+ model.WithBackendString(backend),
model.WithModel(storeName),
}
diff --git a/core/cli/context/context.go b/core/cli/context/context.go
index 34242e971..061d27503 100644
--- a/core/cli/context/context.go
+++ b/core/cli/context/context.go
@@ -1,13 +1,6 @@
package cliContext
-import (
- rice "github.com/GeertJohan/go.rice"
-)
-
type Context struct {
Debug bool `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"`
LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug,trace" help:"Set the level of logs to output [${enum}]"`
-
- // This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI
- BackendAssets *rice.Box `kong:"-"`
}
diff --git a/core/cli/run.go b/core/cli/run.go
index 481d89448..47e765dd8 100644
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -23,7 +23,6 @@ type RunCMD struct {
ExternalBackends []string `env:"LOCALAI_EXTERNAL_BACKENDS,EXTERNAL_BACKENDS" help:"A list of external backends to load from gallery on boot" group:"backends"`
BackendsPath string `env:"LOCALAI_BACKENDS_PATH,BACKENDS_PATH" type:"path" default:"${basepath}/backends" help:"Path containing backends used for inferencing" group:"backends"`
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
- BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
GeneratedContentPath string `env:"LOCALAI_GENERATED_CONTENT_PATH,GENERATED_CONTENT_PATH" type:"path" default:"/tmp/generated/content" help:"Location for generated content (e.g. images, audio, videos)" group:"storage"`
UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"`
ConfigPath string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"`
@@ -46,7 +45,6 @@ type RunCMD struct {
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
- LibraryPath string `env:"LOCALAI_LIBRARY_PATH,LIBRARY_PATH" help:"Path to the library directory (for e.g. external libraries used by backends)" default:"/usr/share/local-ai/libs" group:"backends"`
CSRF bool `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"`
UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
@@ -99,10 +97,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
config.WithCors(r.CORS),
config.WithCorsAllowOrigins(r.CORSAllowOrigins),
config.WithCsrf(r.CSRF),
- config.WithLibPath(r.LibraryPath),
config.WithThreads(r.Threads),
- config.WithBackendAssets(ctx.BackendAssets),
- config.WithBackendAssetsOutput(r.BackendAssetsPath),
config.WithUploadLimitMB(r.UploadLimit),
config.WithApiKeys(r.APIKeys),
config.WithModelsURL(append(r.Models, r.ModelArgs...)...),
diff --git a/core/cli/soundgeneration.go b/core/cli/soundgeneration.go
index b7c1d0fe6..1193b329f 100644
--- a/core/cli/soundgeneration.go
+++ b/core/cli/soundgeneration.go
@@ -27,7 +27,6 @@ type SoundGenerationCMD struct {
DoSample bool `short:"s" default:"true" help:"Enables sampling from the model. Better quality at the cost of speed. Defaults to enabled."`
OutputFile string `short:"o" type:"path" help:"The path to write the output wav file"`
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
- BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
ExternalGRPCBackends []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"`
}
@@ -51,11 +50,10 @@ func parseToInt32Ptr(input string) *int32 {
func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
outputFile := t.OutputFile
- outputDir := t.BackendAssetsPath
+ outputDir := os.TempDir()
if outputFile != "" {
outputDir = filepath.Dir(outputFile)
}
-
text := strings.Join(t.Text, " ")
externalBackends := make(map[string]string)
@@ -71,7 +69,6 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
ModelPath: t.ModelsPath,
Context: context.Background(),
GeneratedContentDir: outputDir,
- AssetsDestination: t.BackendAssetsPath,
ExternalGRPCBackends: externalBackends,
}
ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend)
diff --git a/core/cli/transcript.go b/core/cli/transcript.go
index 67b5ed1da..3e5ee6d44 100644
--- a/core/cli/transcript.go
+++ b/core/cli/transcript.go
@@ -15,20 +15,18 @@ import (
type TranscriptCMD struct {
Filename string `arg:""`
- Backend string `short:"b" default:"whisper" help:"Backend to run the transcription model"`
- Model string `short:"m" required:"" help:"Model name to run the TTS"`
- Language string `short:"l" help:"Language of the audio file"`
- Translate bool `short:"c" help:"Translate the transcription to english"`
- Threads int `short:"t" default:"1" help:"Number of threads used for parallel computation"`
- ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
- BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
+ Backend string `short:"b" default:"whisper" help:"Backend to run the transcription model"`
+ Model string `short:"m" required:"" help:"Model name to run the TTS"`
+ Language string `short:"l" help:"Language of the audio file"`
+ Translate bool `short:"c" help:"Translate the transcription to english"`
+ Threads int `short:"t" default:"1" help:"Number of threads used for parallel computation"`
+ ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
}
func (t *TranscriptCMD) Run(ctx *cliContext.Context) error {
opts := &config.ApplicationConfig{
- ModelPath: t.ModelsPath,
- Context: context.Background(),
- AssetsDestination: t.BackendAssetsPath,
+ ModelPath: t.ModelsPath,
+ Context: context.Background(),
}
cl := config.NewBackendConfigLoader(t.ModelsPath)
diff --git a/core/cli/tts.go b/core/cli/tts.go
index 074487e62..552fdf018 100644
--- a/core/cli/tts.go
+++ b/core/cli/tts.go
@@ -17,18 +17,17 @@ import (
type TTSCMD struct {
Text []string `arg:""`
- Backend string `short:"b" default:"piper" help:"Backend to run the TTS model"`
- Model string `short:"m" required:"" help:"Model name to run the TTS"`
- Voice string `short:"v" help:"Voice name to run the TTS"`
- Language string `short:"l" help:"Language to use with the TTS"`
- OutputFile string `short:"o" type:"path" help:"The path to write the output wav file"`
- ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
- BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
+ Backend string `short:"b" default:"piper" help:"Backend to run the TTS model"`
+ Model string `short:"m" required:"" help:"Model name to run the TTS"`
+ Voice string `short:"v" help:"Voice name to run the TTS"`
+ Language string `short:"l" help:"Language to use with the TTS"`
+ OutputFile string `short:"o" type:"path" help:"The path to write the output wav file"`
+ ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
}
func (t *TTSCMD) Run(ctx *cliContext.Context) error {
outputFile := t.OutputFile
- outputDir := t.BackendAssetsPath
+ outputDir := os.TempDir()
if outputFile != "" {
outputDir = filepath.Dir(outputFile)
}
@@ -39,7 +38,6 @@ func (t *TTSCMD) Run(ctx *cliContext.Context) error {
ModelPath: t.ModelsPath,
Context: context.Background(),
GeneratedContentDir: outputDir,
- AssetsDestination: t.BackendAssetsPath,
}
ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend)
diff --git a/core/cli/worker/worker.go b/core/cli/worker/worker.go
index a5d065773..33813db06 100644
--- a/core/cli/worker/worker.go
+++ b/core/cli/worker/worker.go
@@ -1,7 +1,7 @@
package worker
type WorkerFlags struct {
- BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
+ BackendsPath string `env:"LOCALAI_BACKENDS_PATH,BACKENDS_PATH" type:"path" default:"${basepath}/backends" help:"Path containing backends used for inferencing" group:"backends"`
ExtraLLamaCPPArgs string `name:"llama-cpp-args" env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"`
}
diff --git a/core/cli/worker/worker_llamacpp.go b/core/cli/worker/worker_llamacpp.go
index 3ea3cd426..d9fe8e4db 100644
--- a/core/cli/worker/worker_llamacpp.go
+++ b/core/cli/worker/worker_llamacpp.go
@@ -9,8 +9,6 @@ import (
cliContext "github.com/mudler/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/gallery"
- "github.com/mudler/LocalAI/pkg/assets"
- "github.com/mudler/LocalAI/pkg/library"
"github.com/rs/zerolog/log"
)
@@ -47,24 +45,17 @@ func findLLamaCPPBackend(backendSystemPath string) (string, error) {
}
func (r *LLamaCPP) Run(ctx *cliContext.Context) error {
- // Extract files from the embedded FS
- err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath)
- log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath)
- if err != nil {
- log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
- }
if len(os.Args) < 4 {
return fmt.Errorf("usage: local-ai worker llama-cpp-rpc -- ")
}
- grpcProcess, err := findLLamaCPPBackend(r.BackendAssetsPath)
+ grpcProcess, err := findLLamaCPPBackend(r.BackendsPath)
if err != nil {
return err
}
args := strings.Split(r.ExtraLLamaCPPArgs, " ")
- args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
args = append([]string{grpcProcess}, args...)
return syscall.Exec(
diff --git a/core/cli/worker/worker_nop2p.go b/core/cli/worker/worker_nop2p.go
deleted file mode 100644
index fc3f095d7..000000000
--- a/core/cli/worker/worker_nop2p.go
+++ /dev/null
@@ -1,16 +0,0 @@
-//go:build !p2p
-// +build !p2p
-
-package worker
-
-import (
- "fmt"
-
- cliContext "github.com/mudler/LocalAI/core/cli/context"
-)
-
-type P2P struct{}
-
-func (r *P2P) Run(ctx *cliContext.Context) error {
- return fmt.Errorf("p2p mode is not enabled in this build")
-}
diff --git a/core/cli/worker/worker_p2p.go b/core/cli/worker/worker_p2p.go
index 4fb1b5825..1533de4e5 100644
--- a/core/cli/worker/worker_p2p.go
+++ b/core/cli/worker/worker_p2p.go
@@ -1,6 +1,3 @@
-//go:build p2p
-// +build p2p
-
package worker
import (
@@ -13,8 +10,6 @@ import (
cliContext "github.com/mudler/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/p2p"
- "github.com/mudler/LocalAI/pkg/assets"
- "github.com/mudler/LocalAI/pkg/library"
"github.com/phayes/freeport"
"github.com/rs/zerolog/log"
)
@@ -29,12 +24,6 @@ type P2P struct {
}
func (r *P2P) Run(ctx *cliContext.Context) error {
- // Extract files from the embedded FS
- err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath)
- log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath)
- if err != nil {
- log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
- }
// Check if the token is set
// as we always need it.
@@ -71,7 +60,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
for {
log.Info().Msgf("Starting llama-cpp-rpc-server on '%s:%d'", address, port)
- grpcProcess, err := findLLamaCPPBackend(r.BackendAssetsPath)
+ grpcProcess, err := findLLamaCPPBackend(r.BackendsPath)
if err != nil {
log.Error().Err(err).Msg("Failed to find llama-cpp-rpc-server")
return
@@ -85,8 +74,6 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
args := append([]string{"--host", address, "--port", fmt.Sprint(port)}, extraArgs...)
log.Debug().Msgf("Starting llama-cpp-rpc-server on '%s:%d' with args: %+v (%d)", address, port, args, len(args))
- args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
-
cmd := exec.Command(
grpcProcess, args...,
)
diff --git a/core/config/application_config.go b/core/config/application_config.go
index 662bddc6a..4f5f878d1 100644
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -6,7 +6,6 @@ import (
"regexp"
"time"
- rice "github.com/GeertJohan/go.rice"
"github.com/mudler/LocalAI/pkg/xsysinfo"
"github.com/rs/zerolog/log"
)
@@ -17,7 +16,6 @@ type ApplicationConfig struct {
ModelPath string
BackendsPath string
ExternalBackends []string
- LibPath string
UploadLimitMB, Threads, ContextSize int
F16 bool
Debug bool
@@ -50,9 +48,6 @@ type ApplicationConfig struct {
Galleries []Gallery
BackendGalleries []Gallery
- BackendAssets *rice.Box
- AssetsDestination string
-
ExternalGRPCBackends map[string]string
AutoloadGalleries, AutoloadBackendGalleries bool
@@ -140,12 +135,6 @@ func WithP2PToken(s string) AppOption {
}
}
-func WithLibPath(path string) AppOption {
- return func(o *ApplicationConfig) {
- o.LibPath = path
- }
-}
-
var EnableWatchDog = func(o *ApplicationConfig) {
o.WatchDog = true
}
@@ -211,18 +200,6 @@ func WithCorsAllowOrigins(b string) AppOption {
}
}
-func WithBackendAssetsOutput(out string) AppOption {
- return func(o *ApplicationConfig) {
- o.AssetsDestination = out
- }
-}
-
-func WithBackendAssets(f *rice.Box) AppOption {
- return func(o *ApplicationConfig) {
- o.BackendAssets = f
- }
-}
-
func WithStringGalleries(galls string) AppOption {
return func(o *ApplicationConfig) {
if galls == "" {
diff --git a/core/gallery/models.go b/core/gallery/models.go
index a1c8a4b75..30ec2908e 100644
--- a/core/gallery/models.go
+++ b/core/gallery/models.go
@@ -126,8 +126,9 @@ func InstallModelFromGallery(
if err != nil {
return err
}
-
+ log.Debug().Msgf("Installed model %q", installedModel.Name)
if automaticallyInstallBackend && installedModel.Backend != "" {
+ log.Debug().Msgf("Installing backend %q", installedModel.Backend)
systemState, err := system.GetSystemState()
if err != nil {
return err
diff --git a/core/http/app_test.go b/core/http/app_test.go
index b4eadbe73..03aaf8a4c 100644
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -23,7 +23,6 @@ import (
. "github.com/onsi/gomega"
"gopkg.in/yaml.v3"
- rice "github.com/GeertJohan/go.rice"
openaigo "github.com/otiai10/openaigo"
"github.com/sashabaranov/go-openai"
"github.com/sashabaranov/go-openai/jsonschema"
@@ -264,16 +263,6 @@ func getRequest(url string, header http.Header) (error, int, []byte) {
const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`
-var backendAssets *rice.Box
-
-func init() {
- var err error
- backendAssets, err = rice.FindBox("backend-assets")
- if err != nil {
- panic(err)
- }
-}
-
var _ = Describe("API test", func() {
var app *fiber.App
@@ -300,9 +289,6 @@ var _ = Describe("API test", func() {
modelDir = filepath.Join(tmpdir, "models")
err = os.Mkdir(modelDir, 0750)
Expect(err).ToNot(HaveOccurred())
- backendAssetsDir := filepath.Join(tmpdir, "backend-assets")
- err = os.Mkdir(backendAssetsDir, 0750)
- Expect(err).ToNot(HaveOccurred())
c, cancel = context.WithCancel(context.Background())
@@ -341,8 +327,7 @@ var _ = Describe("API test", func() {
config.WithModelPath(modelDir),
config.WithBackendsPath(backendPath),
config.WithApiKeys([]string{apiKey}),
- config.WithBackendAssets(backendAssets),
- config.WithBackendAssetsOutput(backendAssetsDir))...)
+ )...)
Expect(err).ToNot(HaveOccurred())
app, err = API(application)
@@ -545,8 +530,7 @@ var _ = Describe("API test", func() {
config.WithBackendsPath(backendPath),
config.WithGalleries(galleries),
config.WithModelPath(modelDir),
- config.WithBackendAssets(backendAssets),
- config.WithBackendAssetsOutput(tmpdir))...,
+ )...,
)
Expect(err).ToNot(HaveOccurred())
app, err = API(application)
@@ -803,6 +787,10 @@ var _ = Describe("API test", func() {
})
It("shows the external backend", func() {
+ // Only run on linux
+ if runtime.GOOS != "linux" {
+ Skip("test supported only on linux")
+ }
// do an http request to the /system endpoint
resp, err := http.Get("http://127.0.0.1:9090/system")
Expect(err).ToNot(HaveOccurred())
@@ -888,6 +876,13 @@ var _ = Describe("API test", func() {
// See tests/integration/stores_test
Context("Stores", Label("stores"), func() {
+ BeforeEach(func() {
+ // Only run on linux
+ if runtime.GOOS != "linux" {
+ Skip("test supported only on linux")
+ }
+ })
+
It("sets, gets, finds and deletes entries", func() {
ks := [][]float32{
{0.1, 0.2, 0.3},
diff --git a/core/http/endpoints/localai/stores.go b/core/http/endpoints/localai/stores.go
index dd8df8b18..303d943f6 100644
--- a/core/http/endpoints/localai/stores.go
+++ b/core/http/endpoints/localai/stores.go
@@ -17,7 +17,7 @@ func StoresSetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfi
return err
}
- sb, err := backend.StoreBackend(sl, appConfig, input.Store)
+ sb, err := backend.StoreBackend(sl, appConfig, input.Store, input.Backend)
if err != nil {
return err
}
@@ -45,7 +45,7 @@ func StoresDeleteEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationCo
return err
}
- sb, err := backend.StoreBackend(sl, appConfig, input.Store)
+ sb, err := backend.StoreBackend(sl, appConfig, input.Store, input.Backend)
if err != nil {
return err
}
@@ -67,7 +67,7 @@ func StoresGetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfi
return err
}
- sb, err := backend.StoreBackend(sl, appConfig, input.Store)
+ sb, err := backend.StoreBackend(sl, appConfig, input.Store, input.Backend)
if err != nil {
return err
}
@@ -99,7 +99,7 @@ func StoresFindEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConf
return err
}
- sb, err := backend.StoreBackend(sl, appConfig, input.Store)
+ sb, err := backend.StoreBackend(sl, appConfig, input.Store, input.Backend)
if err != nil {
return err
}
diff --git a/core/http/endpoints/localai/system.go b/core/http/endpoints/localai/system.go
index ea01a6702..64b1d111b 100644
--- a/core/http/endpoints/localai/system.go
+++ b/core/http/endpoints/localai/system.go
@@ -13,10 +13,7 @@ import (
// @Router /system [get]
func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(*fiber.Ctx) error {
return func(c *fiber.Ctx) error {
- availableBackends, err := ml.ListAvailableBackends(appConfig.AssetsDestination)
- if err != nil {
- return err
- }
+ availableBackends := []string{}
loadedModels := ml.ListModels()
for b := range appConfig.ExternalGRPCBackends {
availableBackends = append(availableBackends, b)
diff --git a/core/http/endpoints/localai/welcome.go b/core/http/endpoints/localai/welcome.go
index 07bc92c63..ba291536e 100644
--- a/core/http/endpoints/localai/welcome.go
+++ b/core/http/endpoints/localai/welcome.go
@@ -5,7 +5,6 @@ import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/http/utils"
- "github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/internal"
"github.com/mudler/LocalAI/pkg/model"
@@ -37,7 +36,6 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
"Models": modelsWithoutConfig,
"ModelsConfig": backendConfigs,
"GalleryConfig": galleryConfigs,
- "IsP2PEnabled": p2p.IsP2PEnabled(),
"ApplicationConfig": appConfig,
"ProcessingModels": processingModels,
"TaskTypes": taskTypes,
diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go
index e0217be3e..39f22ca61 100644
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -6,7 +6,6 @@ import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/endpoints/localai"
"github.com/mudler/LocalAI/core/http/middleware"
- "github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/internal"
@@ -80,10 +79,8 @@ func RegisterLocalAIRoutes(router *fiber.App,
router.Post("/v1/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
// p2p
- if p2p.IsP2PEnabled() {
- router.Get("/api/p2p", localai.ShowP2PNodes(appConfig))
- router.Get("/api/p2p/token", localai.ShowP2PToken(appConfig))
- }
+ router.Get("/api/p2p", localai.ShowP2PNodes(appConfig))
+ router.Get("/api/p2p/token", localai.ShowP2PToken(appConfig))
router.Get("/version", func(c *fiber.Ctx) error {
return c.JSON(struct {
diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go
index 6a59ad1ab..11b2ab485 100644
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -25,38 +25,39 @@ func RegisterUIRoutes(app *fiber.App,
app.Get("/", localai.WelcomeEndpoint(appConfig, cl, ml, processingOps))
- if p2p.IsP2PEnabled() {
- app.Get("/p2p", func(c *fiber.Ctx) error {
- summary := fiber.Map{
- "Title": "LocalAI - P2P dashboard",
- "BaseURL": utils.BaseURL(c),
- "Version": internal.PrintableVersion(),
- //"Nodes": p2p.GetAvailableNodes(""),
- //"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
- "IsP2PEnabled": p2p.IsP2PEnabled(),
- "P2PToken": appConfig.P2PToken,
- "NetworkID": appConfig.P2PNetworkID,
- }
+ // P2P
+ app.Get("/p2p", func(c *fiber.Ctx) error {
+ summary := fiber.Map{
+ "Title": "LocalAI - P2P dashboard",
+ "BaseURL": utils.BaseURL(c),
+ "Version": internal.PrintableVersion(),
+ //"Nodes": p2p.GetAvailableNodes(""),
+ //"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
- // Render index
- return c.Render("views/p2p", summary)
- })
+ "P2PToken": appConfig.P2PToken,
+ "NetworkID": appConfig.P2PNetworkID,
+ }
- /* show nodes live! */
- app.Get("/p2p/ui/workers", func(c *fiber.Ctx) error {
- return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
- })
- app.Get("/p2p/ui/workers-federation", func(c *fiber.Ctx) error {
- return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
- })
+ // Render index
+ return c.Render("views/p2p", summary)
+ })
- app.Get("/p2p/ui/workers-stats", func(c *fiber.Ctx) error {
- return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
- })
- app.Get("/p2p/ui/workers-federation-stats", func(c *fiber.Ctx) error {
- return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
- })
- }
+ /* show nodes live! */
+ app.Get("/p2p/ui/workers", func(c *fiber.Ctx) error {
+ return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
+ })
+ app.Get("/p2p/ui/workers-federation", func(c *fiber.Ctx) error {
+ return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
+ })
+
+ app.Get("/p2p/ui/workers-stats", func(c *fiber.Ctx) error {
+ return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
+ })
+ app.Get("/p2p/ui/workers-federation-stats", func(c *fiber.Ctx) error {
+ return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
+ })
+
+ // End P2P
if !appConfig.DisableGalleryEndpoint {
registerGalleryRoutes(app, cl, appConfig, galleryService, processingOps)
@@ -76,8 +77,8 @@ func RegisterUIRoutes(app *fiber.App,
"BaseURL": utils.BaseURL(c),
"ModelsConfig": backendConfigs,
"Model": backendConfigs[0],
- "IsP2PEnabled": p2p.IsP2PEnabled(),
- "Version": internal.PrintableVersion(),
+
+ "Version": internal.PrintableVersion(),
}
// Render index
@@ -121,7 +122,6 @@ func RegisterUIRoutes(app *fiber.App,
"ModelsConfig": backendConfigs,
"Model": modelThatCanBeUsed,
"Version": internal.PrintableVersion(),
- "IsP2PEnabled": p2p.IsP2PEnabled(),
}
// Render index
@@ -151,7 +151,6 @@ func RegisterUIRoutes(app *fiber.App,
"ModelsWithoutConfig": modelsWithoutConfig,
"Model": c.Params("model"),
"Version": internal.PrintableVersion(),
- "IsP2PEnabled": p2p.IsP2PEnabled(),
}
// Render index
@@ -169,7 +168,6 @@ func RegisterUIRoutes(app *fiber.App,
"ModelsWithoutConfig": modelsWithoutConfig,
"Model": c.Params("model"),
"Version": internal.PrintableVersion(),
- "IsP2PEnabled": p2p.IsP2PEnabled(),
}
// Render index
@@ -203,7 +201,6 @@ func RegisterUIRoutes(app *fiber.App,
"ModelsWithoutConfig": modelsWithoutConfig,
"Model": modelThatCanBeUsed,
"Version": internal.PrintableVersion(),
- "IsP2PEnabled": p2p.IsP2PEnabled(),
}
// Render index
@@ -221,7 +218,6 @@ func RegisterUIRoutes(app *fiber.App,
"ModelsWithoutConfig": modelsWithoutConfig,
"Model": c.Params("model"),
"Version": internal.PrintableVersion(),
- "IsP2PEnabled": p2p.IsP2PEnabled(),
}
// Render index
@@ -253,7 +249,6 @@ func RegisterUIRoutes(app *fiber.App,
"ModelsConfig": backendConfigs,
"ModelsWithoutConfig": modelsWithoutConfig,
"Model": modelThatCanBeUsed,
- "IsP2PEnabled": p2p.IsP2PEnabled(),
"Version": internal.PrintableVersion(),
}
diff --git a/core/http/routes/ui_backend_gallery.go b/core/http/routes/ui_backend_gallery.go
index 6b6ba40e3..d16cdb026 100644
--- a/core/http/routes/ui_backend_gallery.go
+++ b/core/http/routes/ui_backend_gallery.go
@@ -15,7 +15,6 @@ import (
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/http/elements"
"github.com/mudler/LocalAI/core/http/utils"
- "github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/internal"
"github.com/rs/zerolog/log"
@@ -71,7 +70,6 @@ func registerBackendGalleryRoutes(app *fiber.App, appConfig *config.ApplicationC
"ProcessingBackends": processingBackendsData,
"AvailableBackends": len(backends),
"TaskTypes": taskTypes,
- "IsP2PEnabled": p2p.IsP2PEnabled(),
}
if page == "" {
diff --git a/core/http/routes/ui_gallery.go b/core/http/routes/ui_gallery.go
index d9b0c43d6..6a0e1d7dd 100644
--- a/core/http/routes/ui_gallery.go
+++ b/core/http/routes/ui_gallery.go
@@ -15,7 +15,6 @@ import (
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/http/elements"
"github.com/mudler/LocalAI/core/http/utils"
- "github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/internal"
"github.com/rs/zerolog/log"
@@ -70,9 +69,7 @@ func registerGalleryRoutes(app *fiber.App, cl *config.BackendConfigLoader, appCo
"AllTags": tags,
"ProcessingModels": processingModelsData,
"AvailableModels": len(models),
- "IsP2PEnabled": p2p.IsP2PEnabled(),
-
- "TaskTypes": taskTypes,
+ "TaskTypes": taskTypes,
// "ApplicationConfig": appConfig,
}
diff --git a/core/http/views/explorer.html b/core/http/views/explorer.html
index cfcfbe319..b3339e876 100644
--- a/core/http/views/explorer.html
+++ b/core/http/views/explorer.html
@@ -268,7 +268,7 @@
Command to connect (click to copy):
- docker run -d --restart=always -e ADDRESS=":80" -e LOCALAI_P2P_NETWORK_ID= -e LOCALAI_P2P_LOGLEVEL=debug --name local-ai -e TOKEN="" --net host -ti localai/localai:master-ffmpeg-core federated --debug
+ docker run -d --restart=always -e ADDRESS=":80" -e LOCALAI_P2P_NETWORK_ID= -e LOCALAI_P2P_LOGLEVEL=debug --name local-ai -e TOKEN="" --net host -ti localai/localai:master federated --debug
or via CLI:
diff --git a/core/http/views/p2p.html b/core/http/views/p2p.html
index 6e9024851..bd6324bf6 100644
--- a/core/http/views/p2p.html
+++ b/core/http/views/p2p.html
@@ -49,11 +49,11 @@
- {{ if and .IsP2PEnabled (eq .P2PToken "") }}
+ {{ if eq .P2PToken "" }}
-
Warning: P2P mode is disabled or no token was specified
+ Warning: P2P token was not specified
You have to enable P2P mode by starting LocalAI with --p2p. Please restart the server with --p2p to generate a new token automatically that can be used to discover other nodes. If you already have a token, specify it with export TOKEN=".."
diff --git a/core/http/views/partials/navbar.html b/core/http/views/partials/navbar.html
index 229d7bf29..4ecfab872 100644
--- a/core/http/views/partials/navbar.html
+++ b/core/http/views/partials/navbar.html
@@ -40,11 +40,9 @@
Talk
- {{ if .IsP2PEnabled }}
Swarm
- {{ end }}
API
@@ -75,11 +73,9 @@
Talk
- {{ if .IsP2PEnabled }}
Swarm
- {{ end }}
API
diff --git a/core/p2p/federated_server.go b/core/p2p/federated_server.go
index d80af082c..e382576ba 100644
--- a/core/p2p/federated_server.go
+++ b/core/p2p/federated_server.go
@@ -1,6 +1,3 @@
-//go:build p2p
-// +build p2p
-
package p2p
import (
diff --git a/core/p2p/p2p.go b/core/p2p/p2p.go
index b4b5886b5..b5cd1f831 100644
--- a/core/p2p/p2p.go
+++ b/core/p2p/p2p.go
@@ -1,6 +1,3 @@
-//go:build p2p
-// +build p2p
-
package p2p
import (
@@ -65,10 +62,6 @@ func GenerateToken(DHTInterval, OTPInterval int) string {
return generateNewConnectionData(DHTInterval, OTPInterval).Base64()
}
-func IsP2PEnabled() bool {
- return true
-}
-
func nodeID(s string) string {
hostname, _ := os.Hostname()
return fmt.Sprintf("%s-%s", hostname, s)
diff --git a/core/p2p/p2p_disabled.go b/core/p2p/p2p_disabled.go
deleted file mode 100644
index c5ba98fda..000000000
--- a/core/p2p/p2p_disabled.go
+++ /dev/null
@@ -1,35 +0,0 @@
-//go:build !p2p
-// +build !p2p
-
-package p2p
-
-import (
- "context"
- "fmt"
-
- "github.com/mudler/edgevpn/pkg/node"
-)
-
-func GenerateToken(DHTInterval, OTPInterval int) string {
- return "not implemented"
-}
-
-func (f *FederatedServer) Start(ctx context.Context) error {
- return fmt.Errorf("not implemented")
-}
-
-func ServiceDiscoverer(ctx context.Context, node *node.Node, token, servicesID string, fn func(string, NodeData), allocate bool) error {
- return fmt.Errorf("not implemented")
-}
-
-func ExposeService(ctx context.Context, host, port, token, servicesID string) (*node.Node, error) {
- return nil, fmt.Errorf("not implemented")
-}
-
-func IsP2PEnabled() bool {
- return false
-}
-
-func NewNode(token string) (*node.Node, error) {
- return nil, fmt.Errorf("not implemented")
-}
diff --git a/core/schema/localai.go b/core/schema/localai.go
index 734314a2b..4e819238a 100644
--- a/core/schema/localai.go
+++ b/core/schema/localai.go
@@ -63,23 +63,29 @@ type VADResponse struct {
Segments []VADSegment `json:"segments" yaml:"segments"`
}
+type StoreCommon struct {
+ Backend string `json:"backend,omitempty" yaml:"backend,omitempty"`
+}
type StoresSet struct {
Store string `json:"store,omitempty" yaml:"store,omitempty"`
Keys [][]float32 `json:"keys" yaml:"keys"`
Values []string `json:"values" yaml:"values"`
+ StoreCommon
}
type StoresDelete struct {
Store string `json:"store,omitempty" yaml:"store,omitempty"`
Keys [][]float32 `json:"keys"`
+ StoreCommon
}
type StoresGet struct {
Store string `json:"store,omitempty" yaml:"store,omitempty"`
Keys [][]float32 `json:"keys" yaml:"keys"`
+ StoreCommon
}
type StoresGetResponse struct {
@@ -92,6 +98,7 @@ type StoresFind struct {
Key []float32 `json:"key" yaml:"key"`
Topk int `json:"topk" yaml:"topk"`
+ StoreCommon
}
type StoresFindResponse struct {
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 2306c28f3..b9880352a 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -5,7 +5,7 @@ services:
# Available images with CUDA, ROCm, SYCL
# Image list (quay.io): https://quay.io/repository/go-skynet/local-ai?tab=tags
# Image list (dockerhub): https://hub.docker.com/r/localai/localai
- image: quay.io/go-skynet/local-ai:master-ffmpeg-core
+ image: quay.io/go-skynet/local-ai:master
build:
context: .
dockerfile: Dockerfile
diff --git a/docs/content/docs/advanced/advanced-usage.md b/docs/content/docs/advanced/advanced-usage.md
index 5c52ed4ca..68bb53adf 100644
--- a/docs/content/docs/advanced/advanced-usage.md
+++ b/docs/content/docs/advanced/advanced-usage.md
@@ -579,38 +579,14 @@ You can use 'Extra-Usage' request header key presence ('Extra-Usage: true') to r
### Extra backends
-LocalAI can be extended with extra backends. The backends are implemented as `gRPC` services and can be written in any language. The container images that are built and published on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags) contain a set of images split in core and extra. By default Images bring all the dependencies and backends supported by LocalAI (we call those `extra` images). The `-core` images instead bring only the strictly necessary dependencies to run LocalAI without only a core set of backends.
-
-If you wish to build a custom container image with extra backends, you can use the core images and build only the backends you are interested into or prepare the environment on startup by using the `EXTRA_BACKENDS` environment variable. For instance, to use the diffusers backend:
-
-```Dockerfile
-FROM quay.io/go-skynet/local-ai:master-ffmpeg-core
-
-RUN make -C backend/python/diffusers
-```
-
-Remember also to set the `EXTERNAL_GRPC_BACKENDS` environment variable (or `--external-grpc-backends` as CLI flag) to point to the backends you are using (`EXTERNAL_GRPC_BACKENDS="backend_name:/path/to/backend"`), for example with diffusers:
-
-```Dockerfile
-FROM quay.io/go-skynet/local-ai:master-ffmpeg-core
-
-RUN make -C backend/python/diffusers
-
-ENV EXTERNAL_GRPC_BACKENDS="diffusers:/build/backend/python/diffusers/run.sh"
-```
-
-{{% alert note %}}
-
-You can specify remote external backends or path to local files. The syntax is `backend-name:/path/to/backend` or `backend-name:host:port`.
-
-{{% /alert %}}
+LocalAI can be extended with extra backends. The backends are implemented as `gRPC` services and can be written in any language. See the [backend section](https://localai.io/backends/) for more details on how to install and build new backends for LocalAI.
#### In runtime
When using the `-core` container image it is possible to prepare the python backends you are interested into by using the `EXTRA_BACKENDS` variable, for instance:
```bash
-docker run --env EXTRA_BACKENDS="backend/python/diffusers" quay.io/go-skynet/local-ai:master-ffmpeg-core
+docker run --env EXTRA_BACKENDS="backend/python/diffusers" quay.io/go-skynet/local-ai:master
```
### Concurrent requests
diff --git a/docs/content/docs/features/GPU-acceleration.md b/docs/content/docs/features/GPU-acceleration.md
index 51bce71fb..12eba2946 100644
--- a/docs/content/docs/features/GPU-acceleration.md
+++ b/docs/content/docs/features/GPU-acceleration.md
@@ -73,8 +73,6 @@ The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=ta
- CUDA `11` tags: `master-gpu-nvidia-cuda11`, `v1.40.0-gpu-nvidia-cuda11`, ...
- CUDA `12` tags: `master-gpu-nvidia-cuda12`, `v1.40.0-gpu-nvidia-cuda12`, ...
-- CUDA `11` + FFmpeg tags: `master-gpu-nvidia-cuda11-ffmpeg`, `v1.40.0-gpu-nvidia-cuda11-ffmpeg`, ...
-- CUDA `12` + FFmpeg tags: `master-gpu-nvidia-cuda12-ffmpeg`, `v1.40.0-gpu-nvidia-cuda12-ffmpeg`, ...
In addition to the commands to run LocalAI normally, you need to specify `--gpus all` to docker, for example:
@@ -259,7 +257,7 @@ If building from source, you need to install [Intel oneAPI Base Toolkit](https:/
### Container images
-To use SYCL, use the images with the `gpu-intel-f16` or `gpu-intel-f32` tag, for example `{{< version >}}-gpu-intel-f32-core`, `{{< version >}}-gpu-intel-f16-ffmpeg-core`, ...
+To use SYCL, use the images with the `gpu-intel-f16` or `gpu-intel-f32` tag, for example `{{< version >}}-gpu-intel-f32-core`, `{{< version >}}-gpu-intel-f16`, ...
The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags).
@@ -268,7 +266,7 @@ The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=ta
To run LocalAI with Docker and sycl starting `phi-2`, you can use the following command as an example:
```bash
-docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080 -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-gpu-intel-f32-ffmpeg-core phi-2
+docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080 -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-gpu-intel-f32 phi-2
```
### Notes
@@ -276,7 +274,7 @@ docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080 -
In addition to the commands to run LocalAI normally, you need to specify `--device /dev/dri` to docker, for example:
```bash
-docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f16-ffmpeg-core
+docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f16
```
Note also that sycl does have a known issue to hang with `mmap: true`. You have to disable it in the model configuration if explicitly enabled.
diff --git a/docs/content/docs/features/embeddings.md b/docs/content/docs/features/embeddings.md
index 92c41eb64..7e0f3abf4 100644
--- a/docs/content/docs/features/embeddings.md
+++ b/docs/content/docs/features/embeddings.md
@@ -44,7 +44,6 @@ The `sentencetransformers` backend uses Python [sentence-transformers](https://g
{{% alert note %}}
- The `sentencetransformers` backend is an optional backend of LocalAI and uses Python. If you are running `LocalAI` from the containers you are good to go and should be already configured for use.
-- If you are running `LocalAI` manually you must install the python dependencies (`make prepare-extra-conda-environments`). This requires `conda` to be installed.
- For local execution, you also have to specify the extra backend in the `EXTERNAL_GRPC_BACKENDS` environment variable.
- Example: `EXTERNAL_GRPC_BACKENDS="sentencetransformers:/path/to/LocalAI/backend/python/sentencetransformers/sentencetransformers.py"`
- The `sentencetransformers` backend does support only embeddings of text, and not of tokens. If you need to embed tokens you can use the `bert` backend or `llama.cpp`.
diff --git a/docs/content/docs/getting-started/container-images.md b/docs/content/docs/getting-started/container-images.md
index 38cafeca1..a3b39f0a3 100644
--- a/docs/content/docs/getting-started/container-images.md
+++ b/docs/content/docs/getting-started/container-images.md
@@ -18,8 +18,6 @@ For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA
- Images ending with `-core` are smaller images without predownload python dependencies. Use these images if you plan to use `llama.cpp`, `stablediffusion-ncn` or `rwkv` backends - if you are not sure which one to use, do **not** use these images.
- Images containing the `aio` tag are all-in-one images with all the features enabled, and come with an opinionated set of configuration.
-- FFMpeg is **not** included in the default images due to [its licensing](https://www.ffmpeg.org/legal.html). If you need FFMpeg, use the images ending with `-ffmpeg`. Note that `ffmpeg` is needed in case of using `audio-to-text` LocalAI's features.
-- If using old and outdated CPUs and no GPUs you might need to set `REBUILD` to `true` as environment variable along with options to disable the flags which your CPU does not support, however note that inference will perform poorly and slow. See also [flagset compatibility]({{%relref "docs/getting-started/build#cpu-flagset-compatibility" %}}).
{{% /alert %}}
diff --git a/docs/content/docs/getting-started/customize-model.md b/docs/content/docs/getting-started/customize-model.md
index e8440cd39..eff83ebd2 100644
--- a/docs/content/docs/getting-started/customize-model.md
+++ b/docs/content/docs/getting-started/customize-model.md
@@ -23,7 +23,7 @@ MODELS="github://owner/repo/file.yaml@branch,github://owner/repo/file.yaml@branc
Here's an example to initiate the **phi-2** model:
```bash
-docker run -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core https://gist.githubusercontent.com/mudler/ad601a0488b497b69ec549150d9edd18/raw/a8a8869ef1bb7e3830bf5c0bae29a0cce991ff8d/phi-2.yaml
+docker run -p 8080:8080 localai/localai:{{< version >}} https://gist.githubusercontent.com/mudler/ad601a0488b497b69ec549150d9edd18/raw/a8a8869ef1bb7e3830bf5c0bae29a0cce991ff8d/phi-2.yaml
```
You can also check all the embedded models configurations [here](https://github.com/mudler/LocalAI/tree/master/embedded/models).
@@ -64,7 +64,7 @@ Then, launch LocalAI using your gist's URL:
```bash
## Important! Substitute with your gist's URL!
-docker run -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core https://gist.githubusercontent.com/xxxx/phi-2.yaml
+docker run -p 8080:8080 localai/localai:{{< version >}} https://gist.githubusercontent.com/xxxx/phi-2.yaml
```
## Next Steps
diff --git a/gallery/alpaca.yaml b/gallery/alpaca.yaml
index b647d2f64..18512de77 100644
--- a/gallery/alpaca.yaml
+++ b/gallery/alpaca.yaml
@@ -2,6 +2,7 @@
name: "alpaca"
config_file: |
+ backend: "llama-cpp"
context_size: 4096
f16: true
mmap: true
diff --git a/gallery/arch-function.yaml b/gallery/arch-function.yaml
index a527d0f79..c7e7775ce 100644
--- a/gallery/arch-function.yaml
+++ b/gallery/arch-function.yaml
@@ -2,6 +2,7 @@
name: "chatml"
config_file: |
+ backend: "llama-cpp"
mmap: true
function:
disable_no_action: true
diff --git a/gallery/chatml-hercules.yaml b/gallery/chatml-hercules.yaml
index c10367896..36b478a1a 100644
--- a/gallery/chatml-hercules.yaml
+++ b/gallery/chatml-hercules.yaml
@@ -2,6 +2,7 @@
name: "chatml-hercules"
config_file: |
+ backend: "llama-cpp"
mmap: true
function:
# disable injecting the "answer" tool
diff --git a/gallery/chatml.yaml b/gallery/chatml.yaml
index abaf3209f..7e8e63a62 100644
--- a/gallery/chatml.yaml
+++ b/gallery/chatml.yaml
@@ -2,6 +2,7 @@
name: "chatml"
config_file: |
+ backend: "llama-cpp"
mmap: true
template:
chat_message: |
diff --git a/gallery/command-r.yaml b/gallery/command-r.yaml
index 81a24fb19..0c1636f3e 100644
--- a/gallery/command-r.yaml
+++ b/gallery/command-r.yaml
@@ -2,6 +2,7 @@
name: "command-r"
config_file: |
+ backend: "llama-cpp"
context_size: 131072
stopwords:
- "<|END_OF_TURN_TOKEN|>"
diff --git a/gallery/deephermes.yaml b/gallery/deephermes.yaml
index 93d5c7939..3805b57ec 100644
--- a/gallery/deephermes.yaml
+++ b/gallery/deephermes.yaml
@@ -2,6 +2,7 @@
name: "deephermes"
config_file: |
+ backend: "llama-cpp"
mmap: true
context_size: 8192
stopwords:
diff --git a/gallery/deepseek-r1.yaml b/gallery/deepseek-r1.yaml
index 29ca9db12..d03073534 100644
--- a/gallery/deepseek-r1.yaml
+++ b/gallery/deepseek-r1.yaml
@@ -2,6 +2,7 @@
name: "deepseek-r1"
config_file: |
+ backend: "llama-cpp"
context_size: 131072
mmap: true
f16: true
diff --git a/gallery/deepseek.yaml b/gallery/deepseek.yaml
index fa8870a1d..d8f926739 100644
--- a/gallery/deepseek.yaml
+++ b/gallery/deepseek.yaml
@@ -2,6 +2,7 @@
name: "deepseek"
config_file: |
+ backend: "llama-cpp"
mmap: true
context_size: 8192
template:
diff --git a/gallery/falcon3.yaml b/gallery/falcon3.yaml
index b6593f4bb..5f2fc8c59 100644
--- a/gallery/falcon3.yaml
+++ b/gallery/falcon3.yaml
@@ -2,6 +2,7 @@
name: "falcon3"
config_file: |
+ backend: "llama-cpp"
mmap: true
template:
chat_message: |
diff --git a/gallery/gemma.yaml b/gallery/gemma.yaml
index ed69795f0..d6a1eab06 100644
--- a/gallery/gemma.yaml
+++ b/gallery/gemma.yaml
@@ -2,6 +2,7 @@
name: "gemma"
config_file: |
+ backend: "llama-cpp"
mmap: true
context_size: 8192
template:
diff --git a/gallery/granite.yaml b/gallery/granite.yaml
index 465cca186..8b94b4703 100644
--- a/gallery/granite.yaml
+++ b/gallery/granite.yaml
@@ -2,6 +2,7 @@
name: "granite"
config_file: |
+ backend: "llama-cpp"
mmap: true
template:
chat_message: |
diff --git a/gallery/granite3-2.yaml b/gallery/granite3-2.yaml
index 8a4a9b88d..ec07fca9e 100644
--- a/gallery/granite3-2.yaml
+++ b/gallery/granite3-2.yaml
@@ -2,6 +2,7 @@
name: "granite-3.2"
config_file: |
+ backend: "llama-cpp"
mmap: true
template:
chat_message: |
diff --git a/gallery/hermes-2-pro-mistral.yaml b/gallery/hermes-2-pro-mistral.yaml
index 22a5fb3a6..040927e09 100644
--- a/gallery/hermes-2-pro-mistral.yaml
+++ b/gallery/hermes-2-pro-mistral.yaml
@@ -2,6 +2,7 @@
name: "hermes-2-pro-mistral"
config_file: |
+ backend: "llama-cpp"
mmap: true
context_size: 8192
stopwords:
diff --git a/gallery/llama3-instruct.yaml b/gallery/llama3-instruct.yaml
index 5dc54b0e8..c2ef37e87 100644
--- a/gallery/llama3-instruct.yaml
+++ b/gallery/llama3-instruct.yaml
@@ -2,6 +2,7 @@
name: "llama3-instruct"
config_file: |
+ backend: "llama-cpp"
mmap: true
template:
chat_message: |
diff --git a/gallery/llama3.1-instruct-grammar.yaml b/gallery/llama3.1-instruct-grammar.yaml
index 30237af35..b91834937 100644
--- a/gallery/llama3.1-instruct-grammar.yaml
+++ b/gallery/llama3.1-instruct-grammar.yaml
@@ -2,6 +2,7 @@
name: "llama3-instruct-grammar"
config_file: |
+ backend: "llama-cpp"
mmap: true
function:
disable_no_action: true
diff --git a/gallery/llama3.1-instruct.yaml b/gallery/llama3.1-instruct.yaml
index 4a2b4db13..1d078f2b0 100644
--- a/gallery/llama3.1-instruct.yaml
+++ b/gallery/llama3.1-instruct.yaml
@@ -2,6 +2,7 @@
name: "llama3-instruct"
config_file: |
+ backend: "llama-cpp"
mmap: true
function:
disable_no_action: true
diff --git a/gallery/llama3.1-reflective.yaml b/gallery/llama3.1-reflective.yaml
index 86a91d8b1..75f6edf2f 100644
--- a/gallery/llama3.1-reflective.yaml
+++ b/gallery/llama3.1-reflective.yaml
@@ -2,6 +2,7 @@
name: "llama3-instruct"
config_file: |
+ backend: "llama-cpp"
mmap: true
cutstrings:
- (.*?)
diff --git a/gallery/llama3.2-fcall.yaml b/gallery/llama3.2-fcall.yaml
index 73f370a8f..fc8dc1240 100644
--- a/gallery/llama3.2-fcall.yaml
+++ b/gallery/llama3.2-fcall.yaml
@@ -2,6 +2,7 @@
name: "llama3.2-fcall"
config_file: |
+ backend: "llama-cpp"
mmap: true
function:
json_regex_match:
diff --git a/gallery/llama3.2-quantized.yaml b/gallery/llama3.2-quantized.yaml
index 7e1d26305..2407b22da 100644
--- a/gallery/llama3.2-quantized.yaml
+++ b/gallery/llama3.2-quantized.yaml
@@ -2,6 +2,7 @@
name: "llama3.2-quantized"
config_file: |
+ backend: "llama-cpp"
mmap: true
function:
disable_no_action: true
diff --git a/gallery/mathstral.yaml b/gallery/mathstral.yaml
index a1c686b45..1ed503396 100644
--- a/gallery/mathstral.yaml
+++ b/gallery/mathstral.yaml
@@ -2,6 +2,7 @@
name: "mathstral"
config_file: |
+ backend: "llama-cpp"
context_size: 8192
mmap: true
stopwords:
diff --git a/gallery/mistral-0.3.yaml b/gallery/mistral-0.3.yaml
index 502e7a5af..1f45728d1 100644
--- a/gallery/mistral-0.3.yaml
+++ b/gallery/mistral-0.3.yaml
@@ -2,6 +2,7 @@
name: "mistral-0.3"
config_file: |
+ backend: "llama-cpp"
context_size: 8192
mmap: true
stopwords:
diff --git a/gallery/moondream.yaml b/gallery/moondream.yaml
index d3511f20b..5ff871cac 100644
--- a/gallery/moondream.yaml
+++ b/gallery/moondream.yaml
@@ -3,6 +3,7 @@ name: "moondream2"
config_file: |
+ backend: "llama-cpp"
context_size: 2046
roles:
user: "\nQuestion: "
diff --git a/gallery/mudler.yaml b/gallery/mudler.yaml
index 77bdc8eb4..fa85b9730 100644
--- a/gallery/mudler.yaml
+++ b/gallery/mudler.yaml
@@ -2,6 +2,7 @@
name: localai
config_file: |-
+ backend: "llama-cpp"
context_size: 8192
stopwords:
- <|im_end|>
diff --git a/gallery/phi-2-chat.yaml b/gallery/phi-2-chat.yaml
index 5e1fb702d..cd161fa27 100644
--- a/gallery/phi-2-chat.yaml
+++ b/gallery/phi-2-chat.yaml
@@ -2,6 +2,7 @@
name: "phi-2-chatml"
config_file: |
+ backend: "llama-cpp"
mmap: true
template:
chat_message: |
diff --git a/gallery/phi-2-orange.yaml b/gallery/phi-2-orange.yaml
index 89971b4d5..22642ac50 100644
--- a/gallery/phi-2-orange.yaml
+++ b/gallery/phi-2-orange.yaml
@@ -2,6 +2,7 @@
name: "phi-2-orange"
config_file: |
+ backend: "llama-cpp"
mmap: true
template:
chat_message: |
diff --git a/gallery/phi-3-chat.yaml b/gallery/phi-3-chat.yaml
index 98a3f3854..ce3f21116 100644
--- a/gallery/phi-3-chat.yaml
+++ b/gallery/phi-3-chat.yaml
@@ -2,6 +2,7 @@
name: "phi-3-chat"
config_file: |
+ backend: "llama-cpp"
mmap: true
template:
chat_message: |
diff --git a/gallery/phi-4-chat-fcall.yaml b/gallery/phi-4-chat-fcall.yaml
index 23c2e53db..c73f993e3 100644
--- a/gallery/phi-4-chat-fcall.yaml
+++ b/gallery/phi-4-chat-fcall.yaml
@@ -2,6 +2,7 @@
name: "phi-4-chat"
config_file: |
+ backend: "llama-cpp"
mmap: true
function:
json_regex_match:
diff --git a/gallery/phi-4-chat.yaml b/gallery/phi-4-chat.yaml
index 1ff0b14af..6b8de6dbf 100644
--- a/gallery/phi-4-chat.yaml
+++ b/gallery/phi-4-chat.yaml
@@ -3,6 +3,7 @@ name: "phi-4-chat"
config_file: |
mmap: true
+ backend: "llama-cpp"
template:
chat_message: |
<|im_start|>{{ .RoleName }}<|im_sep|>
diff --git a/gallery/qwen-fcall.yaml b/gallery/qwen-fcall.yaml
index f168c7fe0..dc8fb47ec 100644
--- a/gallery/qwen-fcall.yaml
+++ b/gallery/qwen-fcall.yaml
@@ -2,6 +2,7 @@
name: "qwen-fcall"
config_file: |
+ backend: "llama-cpp"
mmap: true
function:
json_regex_match:
diff --git a/gallery/qwen3-openbuddy.yaml b/gallery/qwen3-openbuddy.yaml
index 754d730d7..1af782a2c 100644
--- a/gallery/qwen3-openbuddy.yaml
+++ b/gallery/qwen3-openbuddy.yaml
@@ -3,6 +3,7 @@ name: "qwen3-openbuddy"
config_file: |
mmap: true
+ backend: "llama-cpp"
template:
chat_message: |
<|role|>{{ .RoleName }}<|says|>
diff --git a/gallery/qwen3.yaml b/gallery/qwen3.yaml
index aef6c109c..1d2eb05dc 100644
--- a/gallery/qwen3.yaml
+++ b/gallery/qwen3.yaml
@@ -3,6 +3,7 @@ name: "qwen3"
config_file: |
mmap: true
+ backend: "llama-cpp"
template:
chat_message: |
<|im_start|>{{ .RoleName }}
diff --git a/gallery/rwkv.yaml b/gallery/rwkv.yaml
index 686937997..3750db974 100644
--- a/gallery/rwkv.yaml
+++ b/gallery/rwkv.yaml
@@ -2,6 +2,7 @@
name: "rwkv"
config_file: |
+ backend: "llama-cpp"
parameters:
top_k: 80
temperature: 0.9
diff --git a/gallery/smolvlm.yaml b/gallery/smolvlm.yaml
index 2c4ef47e3..a3fddcc6c 100644
--- a/gallery/smolvlm.yaml
+++ b/gallery/smolvlm.yaml
@@ -2,6 +2,7 @@
name: smolvlm
# yamllint disable-line rule:trailing-spaces
config_file: |
+ backend: "llama-cpp"
mmap: true
template:
chat_message: |
diff --git a/gallery/tuluv2.yaml b/gallery/tuluv2.yaml
index ca2785a23..d716879a9 100644
--- a/gallery/tuluv2.yaml
+++ b/gallery/tuluv2.yaml
@@ -2,6 +2,7 @@
name: "tuluv2"
config_file: |
+ backend: "llama-cpp"
mmap: true
template:
chat_message: |
diff --git a/gallery/vllm.yaml b/gallery/vllm.yaml
index f0b797cc7..852db148c 100644
--- a/gallery/vllm.yaml
+++ b/gallery/vllm.yaml
@@ -2,6 +2,7 @@
name: "vllm"
config_file: |
+ backend: vllm
context_size: 8192
parameters:
max_tokens: 8192
diff --git a/gallery/wizardlm2.yaml b/gallery/wizardlm2.yaml
index 6c2c14115..6c074b783 100644
--- a/gallery/wizardlm2.yaml
+++ b/gallery/wizardlm2.yaml
@@ -2,6 +2,7 @@
name: "wizardlm2"
config_file: |
+ backend: "llama-cpp"
mmap: true
template:
chat_message: |-
diff --git a/go.mod b/go.mod
index ef137346a..a6da77492 100644
--- a/go.mod
+++ b/go.mod
@@ -6,16 +6,12 @@ toolchain go1.23.1
require (
dario.cat/mergo v1.0.1
- github.com/GeertJohan/go.rice v1.0.3
github.com/Masterminds/sprig/v3 v3.3.0
github.com/alecthomas/kong v0.9.0
- github.com/census-instrumentation/opencensus-proto v0.4.1
github.com/charmbracelet/glamour v0.7.0
github.com/chasefleming/elem-go v0.26.0
- github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20
github.com/containerd/containerd v1.7.19
github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2
- github.com/elliotchance/orderedmap/v2 v2.2.0
github.com/fsnotify/fsnotify v1.7.0
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20240626202019-c118733a29ad
github.com/go-audio/wav v1.1.0
@@ -25,11 +21,9 @@ require (
github.com/gofiber/template/html/v2 v2.1.2
github.com/gofiber/websocket/v2 v2.2.1
github.com/gofrs/flock v0.12.1
- github.com/golang/protobuf v1.5.4
github.com/google/go-containerregistry v0.19.2
github.com/google/uuid v1.6.0
github.com/gpustack/gguf-parser-go v0.17.0
- github.com/grpc-ecosystem/grpc-gateway v1.5.0
github.com/hpcloud/tail v1.0.0
github.com/ipfs/go-log v1.0.5
github.com/jaypipes/ghw v0.12.0
@@ -43,7 +37,6 @@ require (
github.com/nikolalohinski/gonja/v2 v2.3.2
github.com/onsi/ginkgo/v2 v2.22.2
github.com/onsi/gomega v1.36.2
- github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e
github.com/otiai10/openaigo v1.7.0
github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5
github.com/prometheus/client_golang v1.20.5
@@ -62,7 +55,6 @@ require (
go.opentelemetry.io/otel/exporters/prometheus v0.50.0
go.opentelemetry.io/otel/metric v1.34.0
go.opentelemetry.io/otel/sdk/metric v1.28.0
- google.golang.org/api v0.180.0
google.golang.org/grpc v1.67.1
google.golang.org/protobuf v1.36.5
gopkg.in/yaml.v2 v2.4.0
@@ -71,22 +63,13 @@ require (
)
require (
- cel.dev/expr v0.16.0 // indirect
- cloud.google.com/go/auth v0.4.1 // indirect
- cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect
- cloud.google.com/go/compute/metadata v0.5.0 // indirect
github.com/containerd/platforms v0.2.1 // indirect
github.com/cpuguy83/dockercfg v0.3.2 // indirect
- github.com/daaku/go.zipexe v1.0.2 // indirect
github.com/distribution/reference v0.6.0 // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
- github.com/envoyproxy/protoc-gen-validate v1.1.0 // indirect
github.com/fasthttp/websocket v1.5.8 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
- github.com/google/s2a-go v0.1.7 // indirect
- github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
- github.com/googleapis/gax-go/v2 v2.12.4 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/magiconair/properties v1.8.7 // indirect
github.com/moby/docker-image-spec v1.3.1 // indirect
@@ -125,9 +108,7 @@ require (
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 // indirect
go.uber.org/mock v0.5.0 // indirect
- golang.org/x/oauth2 v0.24.0 // indirect
golang.org/x/time v0.8.0 // indirect
- google.golang.org/genproto/googleapis/api v0.0.0-20241007155032-5fefd90f89a9 // indirect
)
require (
@@ -268,7 +249,7 @@ require (
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
github.com/philhofer/fwd v1.1.2 // indirect
github.com/pierrec/lz4/v4 v4.1.2 // indirect
- github.com/pkg/errors v0.9.1 // indirect
+ github.com/pkg/errors v0.9.1
github.com/pkoukk/tiktoken-go v0.1.6 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/polydawn/refmt v0.89.0 // indirect
diff --git a/go.sum b/go.sum
index 935126b8e..1ba732dcd 100644
--- a/go.sum
+++ b/go.sum
@@ -1,15 +1,7 @@
-cel.dev/expr v0.16.0 h1:yloc84fytn4zmJX2GU3TkXGsaieaV7dQ057Qs4sIG2Y=
-cel.dev/expr v0.16.0/go.mod h1:TRSuuV7DlVCE/uwv5QbAiW/v8l5O8C4eEPHeu7gf7Sg=
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.31.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.37.0/go.mod h1:TS1dMSSfndXH133OKGwekG838Om/cQT0BUHV3HcBgoo=
-cloud.google.com/go/auth v0.4.1 h1:Z7YNIhlWRtrnKlZke7z3GMqzvuYzdc2z98F9D1NV5Hg=
-cloud.google.com/go/auth v0.4.1/go.mod h1:QVBuVEKpCn4Zp58hzRGvL0tjRGU0YqdRTdCHM1IHnro=
-cloud.google.com/go/auth/oauth2adapt v0.2.2 h1:+TTV8aXpjeChS9M+aTtN/TjdQnzJvmzKFt//oWu7HX4=
-cloud.google.com/go/auth/oauth2adapt v0.2.2/go.mod h1:wcYjgpZI9+Yu7LyYBg4pqSiaRkfEK3GQcpb7C/uyF1Q=
-cloud.google.com/go/compute/metadata v0.5.0 h1:Zr0eK8JbFv6+Wi4ilXAR8FJ3wyNdpxHKJNPos6LTZOY=
-cloud.google.com/go/compute/metadata v0.5.0/go.mod h1:aHnloV2TPI38yx4s9+wAZhHykWvVCfu7hQbF+9CWoiY=
dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
dmitri.shuralyov.com/app/changes v0.0.0-20180602232624-0a106ad413e3/go.mod h1:Yl+fi1br7+Rr3LqpNJf1/uxUdtRUV+Tnj0o93V2B9MU=
@@ -23,9 +15,6 @@ github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
-github.com/GeertJohan/go.incremental v1.0.0/go.mod h1:6fAjUhbVuX1KcMD3c8TEgVUqmo4seqhv0i0kdATSkM0=
-github.com/GeertJohan/go.rice v1.0.3 h1:k5viR+xGtIhF61125vCE1cmJ5957RQGXG6dmbaWZSmI=
-github.com/GeertJohan/go.rice v1.0.3/go.mod h1:XVdrU4pW00M4ikZed5q56tPf1v2KwnIKeIdc9CBYNt4=
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ=
@@ -42,7 +31,6 @@ github.com/Microsoft/hcsshim v0.11.7 h1:vl/nj3Bar/CvJSYo7gIQPyRWc9f3c6IeSNavBTSZ
github.com/Microsoft/hcsshim v0.11.7/go.mod h1:MV8xMfmECjl5HdO7U/3/hFVnkmSBjAjmA09d4bExKcU=
github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDOSA=
github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8=
-github.com/akavel/rsrc v0.8.0/go.mod h1:uLoCtb9J+EyAqh+26kdrTgmzRBFPGOolLWKpdxkKq+c=
github.com/alecthomas/assert/v2 v2.6.0 h1:o3WJwILtexrEUk3cUVal3oiQY2tfgr/FHWiz/v2n4FU=
github.com/alecthomas/assert/v2 v2.6.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
github.com/alecthomas/chroma/v2 v2.8.0 h1:w9WJUjFFmHHB2e8mRpL9jjy3alYDlU0QLDezj1xE264=
@@ -73,8 +61,6 @@ github.com/c-robinson/iplib v1.0.8/go.mod h1:i3LuuFL1hRT5gFpBRnEydzw8R6yhGkF4szN
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
-github.com/census-instrumentation/opencensus-proto v0.4.1 h1:iKLQ0xPNFxR/2hzXZMrBo8f1j86j5WHzznCCQxV/b8g=
-github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/charmbracelet/glamour v0.7.0 h1:2BtKGZ4iVJCDfMF229EzbeR1QRKLWztO9dMtjmqZSng=
@@ -84,8 +70,6 @@ github.com/chasefleming/elem-go v0.26.0/go.mod h1:hz73qILBIKnTgOujnSMtEj20/epI+f
github.com/cilium/ebpf v0.2.0/go.mod h1:To2CFviqOWL/M0gIMsvSMlqe7em/l1ALkX1PyjrX2Qs=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
-github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20 h1:N+3sFI5GUjRKBi+i0TxYVST9h4Ie192jJWpHvthBBgg=
-github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
github.com/containerd/cgroups v0.0.0-20201119153540-4cbc285b3327/go.mod h1:ZJeTFisyysqgcCdecO57Dj79RfL0LNeGiFUqLYQRYLE=
github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM=
github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw=
@@ -116,8 +100,6 @@ github.com/creachadair/otp v0.5.0 h1:q3Th7CXm2zlmCdBjw5tEPFOj4oWJMnVL5HXlq0sNKS0
github.com/creachadair/otp v0.5.0/go.mod h1:0kceI87EnYFNYSTL121goJVAnk3eJhaed9H0nMuJUkA=
github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
-github.com/daaku/go.zipexe v1.0.2 h1:Zg55YLYTr7M9wjKn8SY/WcpuuEi+kR2u4E8RhvpyXmk=
-github.com/daaku/go.zipexe v1.0.2/go.mod h1:5xWogtqlYnfBXkSB1o9xysukNP9GTvaNkqzUZbt3Bw8=
github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2 h1:flLYmnQFZNo04x2NPehMbf30m7Pli57xwZ0NFqR/hb0=
github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2/go.mod h1:NtWqRzAp/1tw+twkW8uuBenEVVYndEAZACWU3F3xdoQ=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -155,14 +137,10 @@ github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+m
github.com/elastic/gosigar v0.12.0/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
github.com/elastic/gosigar v0.14.3 h1:xwkKwPia+hSfg9GqrCUKYdId102m9qTJIIr7egmK/uo=
github.com/elastic/gosigar v0.14.3/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
-github.com/elliotchance/orderedmap/v2 v2.2.0 h1:7/2iwO98kYT4XkOjA9mBEIwvi4KpGB4cyHeOFOnj4Vk=
-github.com/elliotchance/orderedmap/v2 v2.2.0/go.mod h1:85lZyVbpGaGvHvnKa7Qhx7zncAdBIBq6u56Hb1PRU5Q=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
-github.com/envoyproxy/protoc-gen-validate v1.1.0 h1:tntQDh69XqOCOZsDz0lVJQez/2L6Uu2PdjCQwWCJ3bM=
-github.com/envoyproxy/protoc-gen-validate v1.1.0/go.mod h1:sXRDRVmzEbkM7CVcM06s9shE/m23dg3wzjl0UWqJ2q4=
github.com/fasthttp/websocket v1.5.8 h1:k5DpirKkftIF/w1R8ZzjSgARJrs54Je9YJK37DL/Ah8=
github.com/fasthttp/websocket v1.5.8/go.mod h1:d08g8WaT6nnyvg9uMm8K9zMYyDjfKyj3170AtPRuVU0=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
@@ -252,8 +230,6 @@ github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:W
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
-github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
-github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
@@ -282,18 +258,12 @@ github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OI
github.com/google/pprof v0.0.0-20250208200701-d0013a598941 h1:43XjGa6toxLpeksjcxs1jIoIyr+vUfOqY2c6HB4bpoc=
github.com/google/pprof v0.0.0-20250208200701-d0013a598941/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
-github.com/google/s2a-go v0.1.7 h1:60BLSyTrOV4/haCDW4zb1guZItoSq8foHCXrAnjBo/o=
-github.com/google/s2a-go v0.1.7/go.mod h1:50CgR4k1jNlWBu4UfS4AcfhVe1r6pdZPygJ3R8F0Qdw=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/googleapis/enterprise-certificate-proxy v0.3.2 h1:Vie5ybvEvT75RniqhfFxPRy3Bf7vr3h0cechB90XaQs=
-github.com/googleapis/enterprise-certificate-proxy v0.3.2/go.mod h1:VLSiSSBs/ksPL8kq3OBOQ6WRI2QnaFynd1DCjZ62+V0=
github.com/googleapis/gax-go v2.0.0+incompatible/go.mod h1:SFVmujtThgffbyetf+mdk2eWhX2bMyUtNHzFKcPA9HY=
github.com/googleapis/gax-go/v2 v2.0.3/go.mod h1:LLvjysVCY1JZeum8Z6l8qUty8fiNwE08qbEPm1M08qg=
-github.com/googleapis/gax-go/v2 v2.12.4 h1:9gWcmF85Wvq4ryPFvGFaOgPIs1AQX0d0bcbGw4Z96qg=
-github.com/googleapis/gax-go/v2 v2.12.4/go.mod h1:KYEYLorsnIGDi/rPC8b5TdlB9kbKoFubselGIoBMCwI=
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c h1:7lF+Vz0LqiRidnzC1Oq86fpX1q/iEv2KJdrCtttYjT4=
github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
@@ -545,7 +515,6 @@ github.com/neelance/astrewrite v0.0.0-20160511093645-99348263ae86/go.mod h1:kHJE
github.com/neelance/sourcemap v0.0.0-20151028013722-8c68805598ab/go.mod h1:Qr6/a/Q4r9LP1IltGz7tA7iOK1WonHEYhu1HRBA7ZiM=
github.com/nikolalohinski/gonja/v2 v2.3.2 h1:UgLFfqi7L9XfX0PEcE4eUpvGojVQL5KhBfJJaBp7ZxY=
github.com/nikolalohinski/gonja/v2 v2.3.2/go.mod h1:1Wcc/5huTu6y36e0sOFR1XQoFlylw3c3H3L5WOz0RDg=
-github.com/nkovacs/streamquote v1.0.0/go.mod h1:BN+NaZ2CmdKqUuTUXUEm9j95B2TRbpOWpxbJYzzgUsc=
github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ=
github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
github.com/nxadm/tail v1.4.11 h1:8feyoE3OzPrcshW5/MJ4sGESc5cqmGkGCWlco4l0bqY=
@@ -568,8 +537,6 @@ github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/
github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs=
github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8=
-github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e h1:s2RNOM/IGdY0Y6qfTeUKhDawdHDpK9RGBdx80qN4Ttw=
-github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e/go.mod h1:nBdnFKj15wFbf94Rwfq4m30eAcyY9V/IyKAGQFtqkW0=
github.com/otiai10/mint v1.6.1 h1:kgbTJmOpp/0ce7hk3H8jiSuR0MXmpwWRfqUdKww17qg=
github.com/otiai10/mint v1.6.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM=
github.com/otiai10/openaigo v1.7.0 h1:AOQcOjRRM57ABvz+aI2oJA/Qsz1AydKbdZAlGiKyCqg=
@@ -785,7 +752,6 @@ github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6Kllzaw
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasthttp v1.55.0 h1:Zkefzgt6a7+bVKHnu/YaYSOPfNYNisSVBo/unVCf8k8=
github.com/valyala/fasthttp v1.55.0/go.mod h1:NkY9JtkrpPKmgwV3HTaS2HWaJss9RSIsRVfcxxoHiOM=
-github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8=
github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
github.com/vbatts/tar-split v0.11.3 h1:hLFqsOLQ1SsppQNTMpkpPXClLDfC2A3Zgy9OUU+RVck=
@@ -929,8 +895,6 @@ golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAG
golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
-golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE=
-golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
golang.org/x/perf v0.0.0-20180704124530-6e6d33e29852/go.mod h1:JLpeXjPJfIyPr5TlbXLkXWLhP8nz10XfvxElABhCtcw=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -1050,8 +1014,6 @@ gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o
google.golang.org/api v0.0.0-20180910000450-7ca32eb868bf/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0=
google.golang.org/api v0.0.0-20181030000543-1d582fd0359e/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0=
google.golang.org/api v0.1.0/go.mod h1:UGEZY7KEX120AnNLIHFMKIo4obdJhkp2tPbaPlQx13Y=
-google.golang.org/api v0.180.0 h1:M2D87Yo0rGBPWpo1orwfCLehUUL6E7/TYe5gvMQWDh4=
-google.golang.org/api v0.180.0/go.mod h1:51AiyoEg1MJPSZ9zvklA8VnRILPXxn1iVen9v25XHAE=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.2.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.3.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
@@ -1064,7 +1026,6 @@ google.golang.org/genproto v0.0.0-20190306203927-b5d61aea6440/go.mod h1:VzzqZJRn
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
google.golang.org/genproto v0.0.0-20240401170217-c3f982113cda h1:wu/KJm9KJwpfHWhkkZGohVC6KRrc1oJNr4jwtQMOQXw=
-google.golang.org/genproto v0.0.0-20240401170217-c3f982113cda/go.mod h1:g2LLCvCeCSir/JJSWosk19BR4NVxGqHUC6rxIRsd7Aw=
google.golang.org/genproto/googleapis/api v0.0.0-20241007155032-5fefd90f89a9 h1:T6rh4haD3GVYsgEfWExoCZA2o2FmbNyKpTuAxbEFPTg=
google.golang.org/genproto/googleapis/api v0.0.0-20241007155032-5fefd90f89a9/go.mod h1:wp2WsuBYj6j8wUdo3ToZsdxxixbvQNAHqVJrTgi5E5M=
google.golang.org/genproto/googleapis/rpc v0.0.0-20241007155032-5fefd90f89a9 h1:QCqS/PdaHTSWGvupk2F/ehwHtGc0/GYkT+3GAcR1CCc=
diff --git a/main.go b/main.go
index 380e8f5b1..3c8615952 100644
--- a/main.go
+++ b/main.go
@@ -112,9 +112,6 @@ Version: ${version}
log.Trace().Msg("Setting logging to trace")
}
- // Populate the application with the embedded backend assets
- cli.CLI.Context.BackendAssets = backendAssets
-
// Run the thing!
err = ctx.Run(&cli.CLI.Context)
if err != nil {
diff --git a/pkg/assets/extract.go b/pkg/assets/extract.go
deleted file mode 100644
index 8c1a6be68..000000000
--- a/pkg/assets/extract.go
+++ /dev/null
@@ -1,64 +0,0 @@
-package assets
-
-import (
- "fmt"
- "os"
- "path/filepath"
-
- rice "github.com/GeertJohan/go.rice"
- "github.com/mudler/LocalAI/pkg/library"
-)
-
-const backendAssetsDir = "backend-assets"
-
-func ResolvePath(dir string, paths ...string) string {
- return filepath.Join(append([]string{dir, backendAssetsDir}, paths...)...)
-}
-
-func ExtractFiles(content *rice.Box, extractDir string) error {
- // Create the target directory with backend-assets subdirectory
- backendAssetsDir := filepath.Join(extractDir, backendAssetsDir)
- err := os.MkdirAll(backendAssetsDir, 0750)
- if err != nil {
- return fmt.Errorf("failed to create directory: %v", err)
- }
-
- // Walk through the rice box and extract files
- err = content.Walk("", func(path string, info os.FileInfo, err error) error {
- if err != nil {
- return err
- }
-
- // Reconstruct the directory structure in the target directory
- targetFile := filepath.Join(backendAssetsDir, path)
- if info.IsDir() {
- // Create the directory in the target directory
- err := os.MkdirAll(targetFile, 0750)
- if err != nil {
- return fmt.Errorf("failed to create directory: %v", err)
- }
- return nil
- }
-
- // Read the file from the rice box
- fileData, err := content.Bytes(path)
- if err != nil {
- return fmt.Errorf("failed to read file: %v", err)
- }
-
- // Create the file in the target directory
- err = os.WriteFile(targetFile, fileData, 0700)
- if err != nil {
- return fmt.Errorf("failed to write file: %v", err)
- }
-
- return nil
- })
-
- // If there is a lib directory, set LD_LIBRARY_PATH to include it
- // we might use this mechanism to carry over e.g. Nvidia CUDA libraries
- // from the embedded FS to the target directory
- library.LoadExtractedLibs(backendAssetsDir)
-
- return err
-}
diff --git a/pkg/assets/list.go b/pkg/assets/list.go
deleted file mode 100644
index edfdf4985..000000000
--- a/pkg/assets/list.go
+++ /dev/null
@@ -1,27 +0,0 @@
-package assets
-
-import (
- "os"
-
- rice "github.com/GeertJohan/go.rice"
- "github.com/rs/zerolog/log"
-)
-
-func ListFiles(content *rice.Box) (files []string) {
- err := content.Walk("", func(path string, info os.FileInfo, err error) error {
- if err != nil {
- return err
- }
-
- if info.IsDir() {
- return nil
- }
-
- files = append(files, path)
- return nil
- })
- if err != nil {
- log.Error().Err(err).Msg("error walking the rice box")
- }
- return
-}
diff --git a/pkg/library/dynaload.go b/pkg/library/dynaload.go
deleted file mode 100644
index 878cdc881..000000000
--- a/pkg/library/dynaload.go
+++ /dev/null
@@ -1,86 +0,0 @@
-package library
-
-import (
- "errors"
- "fmt"
- "os"
- "path/filepath"
- "runtime"
-
- "github.com/rs/zerolog/log"
-)
-
-/*
- This file contains functions to load libraries from the asset directory to keep the business logic clean.
-*/
-
-// skipLibraryPath checks if LOCALAI_SKIP_LIBRARY_PATH is set
-var skipLibraryPath = os.Getenv("LOCALAI_SKIP_LIBRARY_PATH") != ""
-
-// LoadExtractedLibs loads the extracted libraries from the asset dir
-func LoadExtractedLibs(dir string) error {
- // Skip this if LOCALAI_SKIP_LIBRARY_PATH is set
- if skipLibraryPath {
- return nil
- }
-
- var err error = nil
- for _, libDir := range []string{filepath.Join(dir, "lib"), filepath.Join(dir, "lib")} {
- err = errors.Join(err, LoadExternal(libDir))
- }
- return err
-}
-
-// LoadLDSO checks if there is a ld.so in the asset dir and if so, prefixes the grpc process with it.
-// In linux, if we find a ld.so in the asset dir we prefix it to run with the libs exposed in
-// LD_LIBRARY_PATH for more compatibility
-// If we don't do this, we might run into stack smash
-// See also: https://stackoverflow.com/questions/847179/multiple-glibc-libraries-on-a-single-host/851229#851229
-// In this case, we expect a ld.so in the lib asset dir.
-// If that's present, we use it to run the grpc backends as supposedly built against
-// that specific version of ld.so
-func LoadLDSO(assetDir string, args []string, grpcProcess string) ([]string, string) {
- if skipLibraryPath {
- return args, grpcProcess
- }
-
- if runtime.GOOS != "linux" {
- return args, grpcProcess
- }
-
- // Check if there is a ld.so file in the assetDir, if it does, we need to run the grpc process with it
- ldPath := filepath.Join(assetDir, "backend-assets", "lib", "ld.so")
- if _, err := os.Stat(ldPath); err == nil {
- log.Debug().Msgf("ld.so found")
- // We need to run the grpc process with the ld.so
- args = append([]string{grpcProcess}, args...)
- grpcProcess = ldPath
- }
-
- return args, grpcProcess
-}
-
-// LoadExternal sets the LD_LIBRARY_PATH to include the given directory
-func LoadExternal(dir string) error {
- // Skip this if LOCALAI_SKIP_LIBRARY_PATH is set
- if skipLibraryPath {
- return nil
- }
-
- lpathVar := "LD_LIBRARY_PATH"
- if runtime.GOOS == "darwin" {
- lpathVar = "DYLD_FALLBACK_LIBRARY_PATH" // should it be DYLD_LIBRARY_PATH ?
- }
-
- var setErr error = nil
- if _, err := os.Stat(dir); err == nil {
- ldLibraryPath := os.Getenv(lpathVar)
- if ldLibraryPath == "" {
- ldLibraryPath = dir
- } else {
- ldLibraryPath = fmt.Sprintf("%s:%s", ldLibraryPath, dir)
- }
- setErr = errors.Join(setErr, os.Setenv(lpathVar, ldLibraryPath))
- }
- return setErr
-}
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index c54fbdcc3..dc60f98d2 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -5,18 +5,12 @@ import (
"errors"
"fmt"
"os"
- "path/filepath"
- "slices"
"strings"
"time"
grpc "github.com/mudler/LocalAI/pkg/grpc"
- "github.com/mudler/LocalAI/pkg/library"
- "github.com/mudler/LocalAI/pkg/utils"
"github.com/phayes/freeport"
"github.com/rs/zerolog/log"
-
- "github.com/elliotchance/orderedmap/v2"
)
const (
@@ -51,79 +45,6 @@ const (
LocalStoreBackend = "local-store"
)
-func backendPath(assetDir, backend string) string {
- return filepath.Join(assetDir, "backend-assets", "grpc", backend)
-}
-
-// backendsInAssetDir returns the list of backends in the asset directory
-// that should be loaded
-func backendsInAssetDir(assetDir string) (map[string][]string, error) {
- // Exclude backends from automatic loading
- excludeBackends := []string{LocalStoreBackend}
- entry, err := os.ReadDir(backendPath(assetDir, ""))
- if err != nil {
- return nil, err
- }
- backends := make(map[string][]string)
-ENTRY:
- for _, e := range entry {
- for _, exclude := range excludeBackends {
- if e.Name() == exclude {
- continue ENTRY
- }
- }
- if e.IsDir() {
- continue
- }
- if strings.HasSuffix(e.Name(), ".log") {
- continue
- }
-
- backends[e.Name()] = []string{}
- }
-
- return backends, nil
-}
-
-func orderBackends(backends map[string][]string) ([]string, error) {
- // order backends from the asset directory.
- // as we scan for backends, we want to keep some order which backends are tried of.
- // for example, llama.cpp should be tried first, and we want to keep the huggingface backend at the last.
-
- // sets a priority list - first has more priority
- priorityList := []string{}
-
- toTheEnd := []string{
- // last has to be huggingface
- LCHuggingFaceBackend,
- }
-
- // create an ordered map
- orderedBackends := orderedmap.NewOrderedMap[string, any]()
- // add priorityList first
- for _, p := range priorityList {
- if _, ok := backends[p]; ok {
- orderedBackends.Set(p, backends[p])
- }
- }
-
- for k, v := range backends {
- if !slices.Contains(toTheEnd, k) {
- if _, ok := orderedBackends.Get(k); !ok {
- orderedBackends.Set(k, v)
- }
- }
- }
-
- for _, t := range toTheEnd {
- if _, ok := backends[t]; ok {
- orderedBackends.Set(t, backends[t])
- }
- }
-
- return orderedBackends.Keys(), nil
-}
-
// starts the grpcModelProcess for the backend, and returns a grpc client
// It also loads the model
func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string, string) (*Model, error) {
@@ -177,35 +98,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
client = NewModel(modelID, uri, nil)
}
} else {
- grpcProcess := backendPath(o.assetDir, backend)
- if err := utils.VerifyPath(grpcProcess, o.assetDir); err != nil {
- return nil, fmt.Errorf("referring to a backend not in asset dir: %s", err.Error())
- }
-
- // Check if the file exists
- if _, err := os.Stat(grpcProcess); os.IsNotExist(err) {
- return nil, fmt.Errorf("backend not found: %s", grpcProcess)
- }
-
- serverAddress, err := getFreeAddress()
- if err != nil {
- return nil, fmt.Errorf("failed allocating free ports: %s", err.Error())
- }
-
- args := []string{}
-
- // Load the ld.so if it exists
- args, grpcProcess = library.LoadLDSO(o.assetDir, args, grpcProcess)
-
- // Make sure the process is executable in any circumstance
- process, err := ml.startProcess(grpcProcess, modelID, serverAddress, args...)
- if err != nil {
- return nil, err
- }
-
- log.Debug().Msgf("GRPC Service Started")
-
- client = NewModel(modelID, serverAddress, process)
+ return nil, fmt.Errorf("backend not found: %s", backend)
}
log.Debug().Msgf("Wait for the service to start up")
@@ -259,14 +152,6 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
}
}
-func (ml *ModelLoader) ListAvailableBackends(assetdir string) ([]string, error) {
- backends, err := backendsInAssetDir(assetdir)
- if err != nil {
- return nil, err
- }
- return orderBackends(backends)
-}
-
func (ml *ModelLoader) backendLoader(opts ...Option) (client grpc.Backend, err error) {
o := NewOptions(opts...)
@@ -346,17 +231,18 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
var err error
// get backends embedded in the binary
- autoLoadBackends, err := ml.ListAvailableBackends(o.assetDir)
- if err != nil {
- ml.Close() // we failed, release the lock
- return nil, err
- }
+ autoLoadBackends := []string{}
// append externalBackends supplied by the user via the CLI
for b := range ml.GetAllExternalBackends(o) {
autoLoadBackends = append(autoLoadBackends, b)
}
+ if len(autoLoadBackends) == 0 {
+ log.Error().Msg("No backends found")
+ return nil, fmt.Errorf("no backends found")
+ }
+
log.Debug().Msgf("Loading from the following backends (in order): %+v", autoLoadBackends)
log.Info().Msgf("Trying to load the model '%s' with the backend '%s'", o.modelID, autoLoadBackends)
diff --git a/pkg/model/loader_options.go b/pkg/model/loader_options.go
index 28a7c598f..16df2b9bd 100644
--- a/pkg/model/loader_options.go
+++ b/pkg/model/loader_options.go
@@ -10,7 +10,6 @@ type Options struct {
backendString string
model string
modelID string
- assetDir string
context context.Context
gRPCOptions *pb.ModelOptions
@@ -75,12 +74,6 @@ func WithLoadGRPCLoadModelOpts(opts *pb.ModelOptions) Option {
}
}
-func WithAssetDir(assetDir string) Option {
- return func(o *Options) {
- o.assetDir = assetDir
- }
-}
-
func WithContext(ctx context.Context) Option {
return func(o *Options) {
o.context = ctx
diff --git a/tests/integration/stores_test.go b/tests/integration/stores_test.go
index 5484a79ca..dfe992c1d 100644
--- a/tests/integration/stores_test.go
+++ b/tests/integration/stores_test.go
@@ -2,11 +2,9 @@ package integration_test
import (
"context"
- "embed"
"math"
"math/rand"
"os"
- "path/filepath"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
@@ -14,15 +12,11 @@ import (
"github.com/rs/zerolog/log"
"github.com/mudler/LocalAI/core/config"
- "github.com/mudler/LocalAI/pkg/assets"
"github.com/mudler/LocalAI/pkg/grpc"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/store"
)
-//go:embed backend-assets/*
-var backendAssets embed.FS
-
func normalize(vecs [][]float32) {
for i, k := range vecs {
norm := float64(0)
@@ -49,12 +43,6 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs"
tmpdir, err = os.MkdirTemp("", "")
Expect(err).ToNot(HaveOccurred())
- backendAssetsDir := filepath.Join(tmpdir, "backend-assets")
- err = os.Mkdir(backendAssetsDir, 0750)
- Expect(err).ToNot(HaveOccurred())
-
- err = assets.ExtractFiles(backendAssets, backendAssetsDir)
- Expect(err).ToNot(HaveOccurred())
debug := true
@@ -66,7 +54,6 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs"
storeOpts := []model.Option{
model.WithBackendString(bc.Backend),
- model.WithAssetDir(backendAssetsDir),
model.WithModel("test"),
}