From 98e5291afc0c75697e253d893f264f2dcfd1e456 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 22 Jul 2025 16:31:04 +0200 Subject: [PATCH] feat: refactor build process, drop embedded backends (#5875) * feat: split remaining backends and drop embedded backends - Drop silero-vad, huggingface, and stores backend from embedded binaries - Refactor Makefile and Dockerfile to avoid building grpc backends - Drop golang code that was used to embed backends - Simplify building by using goreleaser Signed-off-by: Ettore Di Giacinto * chore(gallery): be specific with llama-cpp backend templates Signed-off-by: Ettore Di Giacinto * chore(docs): update Signed-off-by: Ettore Di Giacinto * chore(ci): minor fixes Signed-off-by: Ettore Di Giacinto * chore: drop all ffmpeg references Signed-off-by: Ettore Di Giacinto * fix: run protogen-go Signed-off-by: Ettore Di Giacinto * Always enable p2p mode Signed-off-by: Ettore Di Giacinto * Update gorelease file Signed-off-by: Ettore Di Giacinto * fix(stores): do not always load Signed-off-by: Ettore Di Giacinto * Fix linting issues Signed-off-by: Ettore Di Giacinto * Simplify Signed-off-by: Ettore Di Giacinto * Mac OS fixup Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- .devcontainer-scripts/poststart.sh | 3 - .devcontainer/docker-compose-devcontainer.yml | 3 - .env | 7 - .github/workflows/backend.yml | 169 +++++--- .github/workflows/build-test.yaml | 23 + .github/workflows/deploy-explorer.yaml | 2 +- .github/workflows/image-pr.yml | 11 +- .github/workflows/image.yml | 11 - .github/workflows/image_build.yml | 6 - .github/workflows/notify-models.yaml | 2 +- .github/workflows/release.yaml | 399 +----------------- .github/workflows/test.yml | 9 +- .gitignore | 2 + .goreleaser.yaml | 33 ++ .vscode/launch.json | 2 +- Dockerfile | 10 +- Earthfile | 5 - Makefile | 221 ++++------ assets.go | 15 - backend/cpp/llama-cpp/run.sh | 2 +- backend/go/huggingface/Makefile | 9 + .../langchain => huggingface}/langchain.go | 0 .../go/{llm/langchain => huggingface}/main.go | 0 backend/go/huggingface/package.sh | 12 + backend/go/huggingface/run.sh | 6 + backend/go/local-store/Makefile | 9 + backend/go/{stores => local-store}/debug.go | 0 backend/go/{stores => local-store}/main.go | 0 backend/go/local-store/package.sh | 12 + .../go/{stores => local-store}/production.go | 0 backend/go/local-store/run.sh | 6 + backend/go/{stores => local-store}/store.go | 6 +- backend/go/silero-vad/Makefile | 47 +++ backend/go/{vad/silero => silero-vad}/main.go | 0 backend/go/silero-vad/package.sh | 53 +++ backend/go/silero-vad/run.sh | 14 + backend/go/{vad/silero => silero-vad}/vad.go | 0 backend/index.yaml | 50 ++- core/application/startup.go | 19 - core/backend/options.go | 1 - core/backend/stores.go | 10 +- core/cli/context/context.go | 7 - core/cli/run.go | 5 - core/cli/soundgeneration.go | 5 +- core/cli/transcript.go | 18 +- core/cli/tts.go | 16 +- core/cli/worker/worker.go | 2 +- core/cli/worker/worker_llamacpp.go | 11 +- core/cli/worker/worker_nop2p.go | 16 - core/cli/worker/worker_p2p.go | 15 +- core/config/application_config.go | 23 - core/gallery/models.go | 3 +- core/http/app_test.go | 31 +- core/http/endpoints/localai/stores.go | 8 +- core/http/endpoints/localai/system.go | 5 +- core/http/endpoints/localai/welcome.go | 2 - core/http/routes/localai.go | 7 +- core/http/routes/ui.go | 69 ++- core/http/routes/ui_backend_gallery.go | 2 - core/http/routes/ui_gallery.go | 5 +- core/http/views/explorer.html | 2 +- core/http/views/p2p.html | 4 +- core/http/views/partials/navbar.html | 4 - core/p2p/federated_server.go | 3 - core/p2p/p2p.go | 7 - core/p2p/p2p_disabled.go | 35 -- core/schema/localai.go | 7 + docker-compose.yaml | 2 +- docs/content/docs/advanced/advanced-usage.md | 28 +- .../content/docs/features/GPU-acceleration.md | 8 +- docs/content/docs/features/embeddings.md | 1 - .../docs/getting-started/container-images.md | 2 - .../docs/getting-started/customize-model.md | 4 +- gallery/alpaca.yaml | 1 + gallery/arch-function.yaml | 1 + gallery/chatml-hercules.yaml | 1 + gallery/chatml.yaml | 1 + gallery/command-r.yaml | 1 + gallery/deephermes.yaml | 1 + gallery/deepseek-r1.yaml | 1 + gallery/deepseek.yaml | 1 + gallery/falcon3.yaml | 1 + gallery/gemma.yaml | 1 + gallery/granite.yaml | 1 + gallery/granite3-2.yaml | 1 + gallery/hermes-2-pro-mistral.yaml | 1 + gallery/llama3-instruct.yaml | 1 + gallery/llama3.1-instruct-grammar.yaml | 1 + gallery/llama3.1-instruct.yaml | 1 + gallery/llama3.1-reflective.yaml | 1 + gallery/llama3.2-fcall.yaml | 1 + gallery/llama3.2-quantized.yaml | 1 + gallery/mathstral.yaml | 1 + gallery/mistral-0.3.yaml | 1 + gallery/moondream.yaml | 1 + gallery/mudler.yaml | 1 + gallery/phi-2-chat.yaml | 1 + gallery/phi-2-orange.yaml | 1 + gallery/phi-3-chat.yaml | 1 + gallery/phi-4-chat-fcall.yaml | 1 + gallery/phi-4-chat.yaml | 1 + gallery/qwen-fcall.yaml | 1 + gallery/qwen3-openbuddy.yaml | 1 + gallery/qwen3.yaml | 1 + gallery/rwkv.yaml | 1 + gallery/smolvlm.yaml | 1 + gallery/tuluv2.yaml | 1 + gallery/vllm.yaml | 1 + gallery/wizardlm2.yaml | 1 + go.mod | 21 +- go.sum | 39 -- main.go | 3 - pkg/assets/extract.go | 64 --- pkg/assets/list.go | 27 -- pkg/library/dynaload.go | 86 ---- pkg/model/initializers.go | 128 +----- pkg/model/loader_options.go | 7 - tests/integration/stores_test.go | 13 - 118 files changed, 631 insertions(+), 1339 deletions(-) create mode 100644 .github/workflows/build-test.yaml create mode 100644 .goreleaser.yaml delete mode 100644 Earthfile delete mode 100644 assets.go create mode 100644 backend/go/huggingface/Makefile rename backend/go/{llm/langchain => huggingface}/langchain.go (100%) rename backend/go/{llm/langchain => huggingface}/main.go (100%) create mode 100755 backend/go/huggingface/package.sh create mode 100755 backend/go/huggingface/run.sh create mode 100644 backend/go/local-store/Makefile rename backend/go/{stores => local-store}/debug.go (100%) rename backend/go/{stores => local-store}/main.go (100%) create mode 100755 backend/go/local-store/package.sh rename backend/go/{stores => local-store}/production.go (100%) create mode 100755 backend/go/local-store/run.sh rename backend/go/{stores => local-store}/store.go (99%) create mode 100644 backend/go/silero-vad/Makefile rename backend/go/{vad/silero => silero-vad}/main.go (100%) create mode 100755 backend/go/silero-vad/package.sh create mode 100755 backend/go/silero-vad/run.sh rename backend/go/{vad/silero => silero-vad}/vad.go (100%) delete mode 100644 core/cli/worker/worker_nop2p.go delete mode 100644 core/p2p/p2p_disabled.go delete mode 100644 pkg/assets/extract.go delete mode 100644 pkg/assets/list.go delete mode 100644 pkg/library/dynaload.go diff --git a/.devcontainer-scripts/poststart.sh b/.devcontainer-scripts/poststart.sh index 196e821db..7e65b4c7f 100644 --- a/.devcontainer-scripts/poststart.sh +++ b/.devcontainer-scripts/poststart.sh @@ -2,9 +2,6 @@ cd /workspace -# Grab the pre-stashed backend assets to avoid build issues -cp -r /build/backend-assets /workspace/backend-assets - # Ensures generated source files are present upon load make prepare diff --git a/.devcontainer/docker-compose-devcontainer.yml b/.devcontainer/docker-compose-devcontainer.yml index 65e9b5c1b..81610ade5 100644 --- a/.devcontainer/docker-compose-devcontainer.yml +++ b/.devcontainer/docker-compose-devcontainer.yml @@ -4,9 +4,6 @@ services: context: .. dockerfile: Dockerfile target: devcontainer - args: - - FFMPEG=true - - GO_TAGS=p2p tts env_file: - ../.env ports: diff --git a/.env b/.env index b0d1a2ad2..53d796bc1 100644 --- a/.env +++ b/.env @@ -41,13 +41,6 @@ ## Uncomment and set to true to enable rebuilding from source # REBUILD=true -## Enable go tags, available: p2p, tts -## p2p: enable distributed inferencing -## tts: enables text-to-speech with go-piper -## (requires REBUILD=true) -# -# GO_TAGS=p2p - ## Path where to store generated images # LOCALAI_IMAGE_PATH=/tmp/generated/images diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index cf7536e81..fe08deb2c 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -43,7 +43,7 @@ jobs: cuda-major-version: "11" cuda-minor-version: "7" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-11-rerankers' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -55,7 +55,7 @@ jobs: cuda-major-version: "11" cuda-minor-version: "7" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-11-llama-cpp' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -67,7 +67,7 @@ jobs: cuda-major-version: "11" cuda-minor-version: "7" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-11-vllm' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -79,7 +79,7 @@ jobs: cuda-major-version: "11" cuda-minor-version: "7" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-11-transformers' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -91,7 +91,7 @@ jobs: cuda-major-version: "11" cuda-minor-version: "7" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-11-diffusers' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -104,7 +104,7 @@ jobs: cuda-major-version: "11" cuda-minor-version: "7" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-11-kokoro' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -116,7 +116,7 @@ jobs: cuda-major-version: "11" cuda-minor-version: "7" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -128,7 +128,7 @@ jobs: cuda-major-version: "11" cuda-minor-version: "7" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-11-coqui' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -140,7 +140,7 @@ jobs: cuda-major-version: "11" cuda-minor-version: "7" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-11-bark' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -152,7 +152,7 @@ jobs: cuda-major-version: "11" cuda-minor-version: "7" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-11-chatterbox' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -165,7 +165,7 @@ jobs: cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-rerankers' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -177,7 +177,7 @@ jobs: cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -189,7 +189,7 @@ jobs: cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-vllm' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -201,7 +201,7 @@ jobs: cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-transformers' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -213,7 +213,7 @@ jobs: cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-diffusers' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -226,7 +226,7 @@ jobs: cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-kokoro' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -238,7 +238,7 @@ jobs: cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -250,7 +250,7 @@ jobs: cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-coqui' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -262,7 +262,7 @@ jobs: cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-bark' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -274,7 +274,7 @@ jobs: cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-chatterbox' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -287,7 +287,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-rerankers' runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" @@ -299,7 +299,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-llama-cpp' runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" @@ -311,7 +311,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-vllm' runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" @@ -323,7 +323,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-transformers' runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" @@ -335,7 +335,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-diffusers' runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" @@ -348,7 +348,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-kokoro' runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" @@ -360,7 +360,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-faster-whisper' runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" @@ -372,7 +372,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-coqui' runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" @@ -384,7 +384,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-bark' runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" @@ -397,7 +397,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-rerankers' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" @@ -409,7 +409,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-rerankers' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" @@ -421,7 +421,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-llama-cpp' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" @@ -433,7 +433,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-llama-cpp' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" @@ -445,7 +445,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-vllm' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" @@ -457,7 +457,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-vllm' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" @@ -469,7 +469,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-transformers' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" @@ -481,7 +481,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-transformers' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" @@ -493,7 +493,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-diffusers' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" @@ -506,7 +506,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-kokoro' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" @@ -518,7 +518,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-kokoro' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" @@ -530,7 +530,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-faster-whisper' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" @@ -542,7 +542,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-faster-whisper' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" @@ -554,7 +554,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-coqui' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" @@ -566,7 +566,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-coqui' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" @@ -578,7 +578,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-bark' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" @@ -590,7 +590,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-bark' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" @@ -603,7 +603,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64,linux/arm64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-piper' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -616,7 +616,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-bark-cpp' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -628,7 +628,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64,linux/arm64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-cpu-llama-cpp' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -652,7 +652,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-vulkan-llama-cpp' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -665,7 +665,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-cpu-stablediffusion-ggml' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -677,7 +677,7 @@ jobs: cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -689,7 +689,7 @@ jobs: cuda-major-version: "11" cuda-minor-version: "7" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-11-stablediffusion-ggml' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -701,7 +701,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-stablediffusion-ggml' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" @@ -713,7 +713,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-stablediffusion-ggml' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" @@ -725,7 +725,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-vulkan-stablediffusion-ggml' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -749,8 +749,8 @@ jobs: - build-type: '' cuda-major-version: "" cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'true' + platforms: 'linux/amd64,linux/arm64' + tag-latest: 'auto' tag-suffix: '-cpu-whisper' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -762,7 +762,7 @@ jobs: cuda-major-version: "12" cuda-minor-version: "0" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-whisper' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -774,7 +774,7 @@ jobs: cuda-major-version: "11" cuda-minor-version: "7" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-11-whisper' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -786,7 +786,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-whisper' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" @@ -798,7 +798,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-whisper' runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" @@ -810,7 +810,7 @@ jobs: cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' - tag-latest: 'true' + tag-latest: 'auto' tag-suffix: '-gpu-vulkan-whisper' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" @@ -842,6 +842,45 @@ jobs: backend: "whisper" dockerfile: "./backend/Dockerfile.go" context: "./" + #silero-vad + - build-type: '' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64,linux/arm64' + tag-latest: 'auto' + tag-suffix: '-cpu-silero-vad' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + skip-drivers: 'false' + backend: "silero-vad" + dockerfile: "./backend/Dockerfile.go" + context: "./" + # local-store + - build-type: '' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64,linux/arm64' + tag-latest: 'auto' + tag-suffix: '-cpu-local-store' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + skip-drivers: 'false' + backend: "local-store" + dockerfile: "./backend/Dockerfile.go" + context: "./" + # huggingface + - build-type: '' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64,linux/arm64' + tag-latest: 'auto' + tag-suffix: '-huggingface' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" + skip-drivers: 'false' + backend: "huggingface" + dockerfile: "./backend/Dockerfile.go" + context: "./" llama-cpp-darwin: runs-on: macOS-14 strategy: @@ -866,7 +905,7 @@ jobs: - name: Build llama-cpp-darwin run: | make protogen-go - make build-api + make build bash scripts/build-llama-cpp-darwin.sh ls -la build/darwin.tar mv build/darwin.tar build/llama-cpp.tar @@ -954,7 +993,7 @@ jobs: - name: Build llama-cpp-darwin run: | make protogen-go - make build-api + make build export PLATFORMARCH=darwin/amd64 bash scripts/build-llama-cpp-darwin.sh ls -la build/darwin.tar diff --git a/.github/workflows/build-test.yaml b/.github/workflows/build-test.yaml new file mode 100644 index 000000000..095b41822 --- /dev/null +++ b/.github/workflows/build-test.yaml @@ -0,0 +1,23 @@ +name: Build test + +on: + push: + branches: + - master + pull_request: + +jobs: + build-test: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: 1.23 + - name: Run GoReleaser + run: | + make dev-dist diff --git a/.github/workflows/deploy-explorer.yaml b/.github/workflows/deploy-explorer.yaml index 9a6d729d9..c2063247f 100644 --- a/.github/workflows/deploy-explorer.yaml +++ b/.github/workflows/deploy-explorer.yaml @@ -31,7 +31,7 @@ jobs: make protogen-go - name: Build api run: | - CGO_ENABLED=0 make build-api + CGO_ENABLED=0 make build - name: rm uses: appleboy/ssh-action@v1.2.2 with: diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index 0a3ed2708..262412237 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -14,7 +14,6 @@ jobs: with: tag-latest: ${{ matrix.tag-latest }} tag-suffix: ${{ matrix.tag-suffix }} - ffmpeg: ${{ matrix.ffmpeg }} build-type: ${{ matrix.build-type }} cuda-major-version: ${{ matrix.cuda-major-version }} cuda-minor-version: ${{ matrix.cuda-minor-version }} @@ -40,8 +39,7 @@ jobs: cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'false' - tag-suffix: '-gpu-nvidia-cuda12-ffmpeg' - ffmpeg: 'true' + tag-suffix: '-gpu-nvidia-cuda12' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" makeflags: "--jobs=3 --output-sync=target" @@ -49,7 +47,6 @@ jobs: platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-hipblas' - ffmpeg: 'false' base-image: "rocm/dev-ubuntu-22.04:6.1" grpc-base-image: "ubuntu:22.04" runs-on: 'ubuntu-latest' @@ -59,15 +56,13 @@ jobs: tag-latest: 'false' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" grpc-base-image: "ubuntu:22.04" - tag-suffix: 'sycl-f16-ffmpeg' - ffmpeg: 'true' + tag-suffix: 'sycl-f16' runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" - build-type: 'vulkan' platforms: 'linux/amd64' tag-latest: 'false' - tag-suffix: '-vulkan-ffmpeg-core' - ffmpeg: 'true' + tag-suffix: '-vulkan-core' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" makeflags: "--jobs=4 --output-sync=target" diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 7339038c3..f97fda6a5 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -18,7 +18,6 @@ jobs: with: tag-latest: ${{ matrix.tag-latest }} tag-suffix: ${{ matrix.tag-suffix }} - ffmpeg: ${{ matrix.ffmpeg }} build-type: ${{ matrix.build-type }} cuda-major-version: ${{ matrix.cuda-major-version }} cuda-minor-version: ${{ matrix.cuda-minor-version }} @@ -40,7 +39,6 @@ jobs: platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-hipblas' - ffmpeg: 'true' base-image: "rocm/dev-ubuntu-22.04:6.1" grpc-base-image: "ubuntu:22.04" runs-on: 'ubuntu-latest' @@ -52,7 +50,6 @@ jobs: with: tag-latest: ${{ matrix.tag-latest }} tag-suffix: ${{ matrix.tag-suffix }} - ffmpeg: ${{ matrix.ffmpeg }} build-type: ${{ matrix.build-type }} cuda-major-version: ${{ matrix.cuda-major-version }} cuda-minor-version: ${{ matrix.cuda-minor-version }} @@ -76,7 +73,6 @@ jobs: platforms: 'linux/amd64,linux/arm64' tag-latest: 'auto' tag-suffix: '' - ffmpeg: 'true' base-image: "ubuntu:22.04" runs-on: 'ubuntu-latest' aio: "-aio-cpu" @@ -88,7 +84,6 @@ jobs: platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda11' - ffmpeg: 'true' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" makeflags: "--jobs=4 --output-sync=target" @@ -100,7 +95,6 @@ jobs: platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda12' - ffmpeg: 'true' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' @@ -110,7 +104,6 @@ jobs: platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-vulkan' - ffmpeg: 'true' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" skip-drivers: 'false' @@ -122,7 +115,6 @@ jobs: base-image: "quay.io/go-skynet/intel-oneapi-base:latest" grpc-base-image: "ubuntu:22.04" tag-suffix: '-gpu-intel-f16' - ffmpeg: 'true' runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" aio: "-aio-gpu-intel-f16" @@ -132,7 +124,6 @@ jobs: base-image: "quay.io/go-skynet/intel-oneapi-base:latest" grpc-base-image: "ubuntu:22.04" tag-suffix: '-gpu-intel-f32' - ffmpeg: 'true' runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" aio: "-aio-gpu-intel-f32" @@ -142,7 +133,6 @@ jobs: with: tag-latest: ${{ matrix.tag-latest }} tag-suffix: ${{ matrix.tag-suffix }} - ffmpeg: ${{ matrix.ffmpeg }} build-type: ${{ matrix.build-type }} cuda-major-version: ${{ matrix.cuda-major-version }} cuda-minor-version: ${{ matrix.cuda-minor-version }} @@ -167,7 +157,6 @@ jobs: platforms: 'linux/arm64' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64' - ffmpeg: 'true' base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" runs-on: 'ubuntu-24.04-arm' makeflags: "--jobs=4 --output-sync=target" diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index fe021823b..4e1e19c42 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -37,10 +37,6 @@ on: description: 'Tag suffix' default: '' type: string - ffmpeg: - description: 'FFMPEG' - default: '' - type: string skip-drivers: description: 'Skip drivers by default' default: 'false' @@ -236,7 +232,6 @@ jobs: BUILD_TYPE=${{ inputs.build-type }} CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }} CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }} - FFMPEG=${{ inputs.ffmpeg }} BASE_IMAGE=${{ inputs.base-image }} GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }} GRPC_MAKEFLAGS=--jobs=4 --output-sync=target @@ -264,7 +259,6 @@ jobs: BUILD_TYPE=${{ inputs.build-type }} CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }} CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }} - FFMPEG=${{ inputs.ffmpeg }} BASE_IMAGE=${{ inputs.base-image }} GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }} GRPC_MAKEFLAGS=--jobs=4 --output-sync=target diff --git a/.github/workflows/notify-models.yaml b/.github/workflows/notify-models.yaml index f54b4852d..56846cc86 100644 --- a/.github/workflows/notify-models.yaml +++ b/.github/workflows/notify-models.yaml @@ -96,7 +96,7 @@ jobs: - name: Start LocalAI run: | echo "Starting LocalAI..." - docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME + docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master run --debug $MODEL_NAME until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done # Check the PR diff using the current branch and the base branch of the PR - uses: GrantBirki/git-diff-action@v2.8.1 diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 366f330ed..96495a1bf 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -1,399 +1,26 @@ -name: Build and Release +name: goreleaser on: push: - branches: - - master tags: - 'v*' - pull_request: - -env: - GRPC_VERSION: v1.65.0 - -permissions: - contents: write - -concurrency: - group: ci-releases-${{ github.head_ref || github.ref }}-${{ github.repository }} - cancel-in-progress: true jobs: - - # TODO: temporary disable linux-arm64 build - # build-linux-arm: - # runs-on: ubuntu-24.04-arm - # steps: - # - name: Free Disk Space (Ubuntu) - # uses: jlumbroso/free-disk-space@main - # with: - # # this might remove tools that are actually needed, - # # if set to "true" but frees about 6 GB - # tool-cache: true - # # all of these default to true, but feel free to set to - # # "false" if necessary for your workflow - # android: true - # dotnet: true - # haskell: true - # large-packages: true - # docker-images: true - # swap-storage: true - - # - name: Release space from worker - # run: | - # echo "Listing top largest packages" - # pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) - # head -n 30 <<< "${pkgs}" - # echo - # df -h - # echo - # sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true - # sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true - # sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true - # sudo rm -rf /usr/local/lib/android - # sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true - # sudo rm -rf /usr/share/dotnet - # sudo apt-get remove -y '^mono-.*' || true - # sudo apt-get remove -y '^ghc-.*' || true - # sudo apt-get remove -y '.*jdk.*|.*jre.*' || true - # sudo apt-get remove -y 'php.*' || true - # sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true - # sudo apt-get remove -y '^google-.*' || true - # sudo apt-get remove -y azure-cli || true - # sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true - # sudo apt-get remove -y '^gfortran-.*' || true - # sudo apt-get remove -y microsoft-edge-stable || true - # sudo apt-get remove -y firefox || true - # sudo apt-get remove -y powershell || true - # sudo apt-get remove -y r-base-core || true - # sudo apt-get autoremove -y - # sudo apt-get clean - # echo - # echo "Listing top largest packages" - # pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) - # head -n 30 <<< "${pkgs}" - # echo - # sudo rm -rfv build || true - # sudo rm -rf /usr/share/dotnet || true - # sudo rm -rf /opt/ghc || true - # sudo rm -rf "/usr/local/share/boost" || true - # sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true - # df -h - - # - name: Force Install GIT latest - # run: | - # sudo apt-get update \ - # && sudo apt-get install -y software-properties-common \ - # && sudo apt-get update \ - # && sudo add-apt-repository -y ppa:git-core/ppa \ - # && sudo apt-get update \ - # && sudo apt-get install -y git - # - name: Clone - # uses: actions/checkout@v4 - # with: - # submodules: true - # - uses: actions/setup-go@v5 - # with: - # go-version: '1.21.x' - # cache: false - # - name: Dependencies - # run: | - # sudo apt-get update - # sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev - # make install-go-tools - # - name: Install CUDA Dependencies - # run: | - # curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb - # sudo dpkg -i cuda-keyring_1.1-1_all.deb - # sudo apt-get update - # sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION} - # env: - # CUDA_VERSION: 12-5 - # - name: Cache grpc - # id: cache-grpc - # uses: actions/cache@v4 - # with: - # path: grpc - # key: ${{ runner.os }}-grpc-arm64-${{ env.GRPC_VERSION }} - # - name: Build grpc - # if: steps.cache-grpc.outputs.cache-hit != 'true' - # run: | - # git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ - # cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \ - # cd cmake/build && cmake -DgRPC_INSTALL=ON \ - # -DgRPC_BUILD_TESTS=OFF \ - # ../.. && sudo make --jobs 5 --output-sync=target - # - name: Install gRPC - # run: | - # cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install - # # BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so - # - name: Build - # id: build - # run: | - # go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af - # go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 - # export PATH=$PATH:$GOPATH/bin - # export PATH=/usr/local/cuda/bin:$PATH - # sudo cp /lib64/ld-linux-aarch64.so.1 ld.so - # BACKEND_LIBS="./ld.so ./sources/go-piper/piper/build/fi/lib/libfmt.a ./sources/go-piper/piper-phonemize/pi/lib/libonnxruntime.so.1.14.1 ./sources/go-piper/piper-phonemize/pi/src/libespeak-ng/libespeak-ng.so /usr/lib/aarch64-linux-gnu/libdl.so.2 /usr/lib/aarch64-linux-gnu/librt.so.1 /usr/lib/aarch64-linux-gnu/libpthread.so.0 ./sources/go-piper/piper-phonemize/pi/lib/libpiper_phonemize.so.1 ./sources/go-piper/piper/build/si/lib/libspdlog.a ./sources/go-piper/espeak/ei/lib/libucd.so" \ - # make -j4 dist - # - uses: actions/upload-artifact@v4 - # with: - # name: LocalAI-linux-arm64 - # path: release/ - # - name: Release - # uses: softprops/action-gh-release@v2 - # if: startsWith(github.ref, 'refs/tags/') - # with: - # files: | - # release/* - # - name: Setup tmate session if tests fail - # if: ${{ failure() }} - # uses: mxschmitt/action-tmate@v3.22 - # with: - # detached: true - # connect-timeout-seconds: 180 - # limit-access-to-actor: true - build-linux: + goreleaser: runs-on: ubuntu-latest steps: - - name: Free Disk Space (Ubuntu) - uses: jlumbroso/free-disk-space@main - with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB - tool-cache: true - # all of these default to true, but feel free to set to - # "false" if necessary for your workflow - android: true - dotnet: true - haskell: true - large-packages: true - docker-images: true - swap-storage: true - - - name: Release space from worker - run: | - echo "Listing top largest packages" - pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) - head -n 30 <<< "${pkgs}" - echo - df -h - echo - sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true - sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true - sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true - sudo rm -rf /usr/local/lib/android - sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true - sudo rm -rf /usr/share/dotnet - sudo apt-get remove -y '^mono-.*' || true - sudo apt-get remove -y '^ghc-.*' || true - sudo apt-get remove -y '.*jdk.*|.*jre.*' || true - sudo apt-get remove -y 'php.*' || true - sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true - sudo apt-get remove -y '^google-.*' || true - sudo apt-get remove -y azure-cli || true - sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true - sudo apt-get remove -y '^gfortran-.*' || true - sudo apt-get remove -y microsoft-edge-stable || true - sudo apt-get remove -y firefox || true - sudo apt-get remove -y powershell || true - sudo apt-get remove -y r-base-core || true - sudo apt-get autoremove -y - sudo apt-get clean - echo - echo "Listing top largest packages" - pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) - head -n 30 <<< "${pkgs}" - echo - sudo rm -rfv build || true - sudo rm -rf /usr/share/dotnet || true - sudo rm -rf /opt/ghc || true - sudo rm -rf "/usr/local/share/boost" || true - sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true - df -h - - - name: Force Install GIT latest - run: | - sudo apt-get update \ - && sudo apt-get install -y software-properties-common \ - && sudo apt-get update \ - && sudo add-apt-repository -y ppa:git-core/ppa \ - && sudo apt-get update \ - && sudo apt-get install -y git - - name: Clone + - name: Checkout uses: actions/checkout@v4 with: - submodules: true - - uses: actions/setup-go@v5 + fetch-depth: 0 + - name: Set up Go + uses: actions/setup-go@v5 with: - go-version: '1.21.x' - cache: false - - name: Dependencies - run: | - sudo apt-get update - sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev - make install-go-tools - - name: Intel Dependencies - run: | - wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null - echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list - sudo apt update - sudo apt install -y intel-basekit - - name: Install CUDA Dependencies - run: | - curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb - sudo dpkg -i cuda-keyring_1.1-1_all.deb - sudo apt-get update - sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION} + go-version: 1.23 + - name: Run GoReleaser + uses: goreleaser/goreleaser-action@v6 + with: + version: v2.11.0 + args: release --clean env: - CUDA_VERSION: 12-5 - - name: "Install Hipblas" - env: - ROCM_VERSION: "6.1" - AMDGPU_VERSION: "6.1" - run: | - set -ex - - sudo apt-get update - sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg - - sudo apt update - wget https://repo.radeon.com/amdgpu-install/6.4.1/ubuntu/noble/amdgpu-install_6.4.60401-1_all.deb - sudo apt install ./amdgpu-install_6.4.60401-1_all.deb - sudo apt update - - sudo amdgpu-install --usecase=rocm - - sudo apt-get clean - sudo rm -rf /var/lib/apt/lists/* - sudo ldconfig - - name: Cache grpc - id: cache-grpc - uses: actions/cache@v4 - with: - path: grpc - key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }} - - name: Build grpc - if: steps.cache-grpc.outputs.cache-hit != 'true' - run: | - git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ - cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \ - cd cmake/build && cmake -DgRPC_INSTALL=ON \ - -DgRPC_BUILD_TESTS=OFF \ - ../.. && sudo make --jobs 5 --output-sync=target - - name: Install gRPC - run: | - cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install - # BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so - - name: Build - id: build - run: | - go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af - go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 - export PATH=$PATH:$GOPATH/bin - export PATH=/usr/local/cuda/bin:$PATH - export PATH=/opt/rocm/bin:$PATH - source /opt/intel/oneapi/setvars.sh - sudo cp /lib64/ld-linux-x86-64.so.2 ld.so - make -j4 dist - - uses: actions/upload-artifact@v4 - with: - name: LocalAI-linux - path: release/ - - name: Release - uses: softprops/action-gh-release@v2 - if: startsWith(github.ref, 'refs/tags/') - with: - files: | - release/* - - name: Setup tmate session if tests fail - if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3.22 - with: - detached: true - connect-timeout-seconds: 180 - limit-access-to-actor: true - - - build-macOS-x86_64: - runs-on: macos-13 - steps: - - name: Clone - uses: actions/checkout@v4 - with: - submodules: true - - uses: actions/setup-go@v5 - with: - go-version: '1.21.x' - cache: false - - name: Dependencies - run: | - brew install protobuf grpc - make install-go-tools - - name: Build - id: build - run: | - export C_INCLUDE_PATH=/usr/local/include - export CPLUS_INCLUDE_PATH=/usr/local/include - export PATH=$PATH:$GOPATH/bin - export SKIP_GRPC_BACKEND=backend-assets/grpc/whisper - make dist - - uses: actions/upload-artifact@v4 - with: - name: LocalAI-MacOS-x86_64 - path: release/ - - name: Release - uses: softprops/action-gh-release@v2 - if: startsWith(github.ref, 'refs/tags/') - with: - files: | - release/* - - name: Setup tmate session if tests fail - if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3.22 - with: - detached: true - connect-timeout-seconds: 180 - limit-access-to-actor: true - - build-macOS-arm64: - runs-on: macos-14 - steps: - - name: Clone - uses: actions/checkout@v4 - with: - submodules: true - - uses: actions/setup-go@v5 - with: - go-version: '1.21.x' - cache: false - - name: Dependencies - run: | - brew install protobuf grpc libomp llvm - make install-go-tools - - name: Build - id: build - run: | - export C_INCLUDE_PATH=/usr/local/include - export CPLUS_INCLUDE_PATH=/usr/local/include - export PATH=$PATH:$GOPATH/bin - export CC=/opt/homebrew/opt/llvm/bin/clang - make dist - - uses: actions/upload-artifact@v4 - with: - name: LocalAI-MacOS-arm64 - path: release/ - - name: Release - uses: softprops/action-gh-release@v2 - if: startsWith(github.ref, 'refs/tags/') - with: - files: | - release/* - - name: Setup tmate session if tests fail - if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3.22 - with: - detached: true - connect-timeout-seconds: 180 - limit-access-to-actor: true + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index cc6ef333d..8a3f89871 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -75,7 +75,6 @@ jobs: rm protoc.zip go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af - go install github.com/GeertJohan/go.rice/rice@latest PATH="$PATH:$HOME/go/bin" make protogen-go - name: Dependencies run: | @@ -103,7 +102,7 @@ jobs: make -C backend/python/transformers - make backends/llama-cpp backends/piper backends/whisper backends/stablediffusion-ggml + make backends/huggingface backends/llama-cpp backends/local-store backends/silero-vad backends/piper backends/whisper backends/stablediffusion-ggml env: CUDA_VERSION: 12-4 - name: Test @@ -164,11 +163,10 @@ jobs: rm protoc.zip go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af - go install github.com/GeertJohan/go.rice/rice@latest PATH="$PATH:$HOME/go/bin" make protogen-go - name: Test run: | - PATH="$PATH:$HOME/go/bin" make backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio + PATH="$PATH:$HOME/go/bin" make backends/local-store backends/silero-vad backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio - name: Setup tmate session if tests fail if: ${{ failure() }} uses: mxschmitt/action-tmate@v3.22 @@ -199,11 +197,10 @@ jobs: run: | brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm pip install --user --no-cache-dir grpcio-tools==1.71.0 grpcio==1.71.0 - go install github.com/GeertJohan/go.rice/rice@latest - name: Build llama-cpp-darwin run: | make protogen-go - make build-api + make build bash scripts/build-llama-cpp-darwin.sh ls -la build/darwin.tar mv build/darwin.tar build/llama-cpp.tar diff --git a/.gitignore b/.gitignore index 1f160c8ce..f6d83447e 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,8 @@ prepare-sources /backend/cpp/llama-* !backend/cpp/llama-cpp /backends +/backend-images +/result.yaml *.log diff --git a/.goreleaser.yaml b/.goreleaser.yaml new file mode 100644 index 000000000..5bd6aa0bc --- /dev/null +++ b/.goreleaser.yaml @@ -0,0 +1,33 @@ +version: 2 +before: + hooks: + - make protogen-go + - go mod tidy +dist: release +source: + enabled: true + name_template: '{{ .ProjectName }}-{{ .Tag }}-source' +builds: + - + env: + - CGO_ENABLED=0 + ldflags: + - -s -w + - -X "github.com/mudler/LocalAI/internal.Version={{ .Tag }}" + - -X "github.com/mudler/LocalAI/internal.Commit={{ .FullCommit }}" + goos: + - linux + - darwin + #- windows + goarch: + - amd64 + - arm64 +archives: + - formats: [ 'binary' ] # this removes the tar of the archives, leaving the binaries alone + name_template: local-ai-{{ .Tag }}-{{ .Os }}-{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }} +checksum: + name_template: '{{ .ProjectName }}-{{ .Tag }}-checksums.txt' +snapshot: + version_template: "{{ .Tag }}-next" +changelog: + use: github-native diff --git a/.vscode/launch.json b/.vscode/launch.json index f5e91508e..55da767b4 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -26,7 +26,7 @@ "LOCALAI_P2P": "true", "LOCALAI_FEDERATED": "true" }, - "buildFlags": ["-tags", "p2p tts", "-v"], + "buildFlags": ["-tags", "", "-v"], "envFile": "${workspaceFolder}/.env", "cwd": "${workspaceRoot}" } diff --git a/Dockerfile b/Dockerfile index 91e8aea5a..4e8e29cb2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -142,10 +142,9 @@ EOT RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin -# Install grpc compilers and rice +# Install grpc compilers RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \ - go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af && \ - go install github.com/GeertJohan/go.rice/rice@latest + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/ RUN update-ca-certificates @@ -194,7 +193,7 @@ RUN apt-get update && \ FROM build-requirements AS builder-base -ARG GO_TAGS="p2p" +ARG GO_TAGS="" ARG GRPC_BACKENDS ARG MAKEFLAGS ARG LD_FLAGS="-s -w" @@ -249,8 +248,7 @@ COPY ./pkg/utils ./pkg/utils COPY ./pkg/langchain ./pkg/langchain RUN ls -l ./ -RUN make backend-assets -RUN make grpcs +RUN make protogen-go # The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry. # Adjustments to the build process should likely be made here. diff --git a/Earthfile b/Earthfile deleted file mode 100644 index 218768c9a..000000000 --- a/Earthfile +++ /dev/null @@ -1,5 +0,0 @@ -VERSION 0.7 - -build: - FROM DOCKERFILE -f Dockerfile . - SAVE ARTIFACT /usr/bin/local-ai AS LOCAL local-ai diff --git a/Makefile b/Makefile index 6f2be04d7..6f7f4fcb2 100644 --- a/Makefile +++ b/Makefile @@ -3,9 +3,7 @@ GOTEST=$(GOCMD) test GOVET=$(GOCMD) vet BINARY_NAME=local-ai -ONNX_VERSION?=1.20.0 -ONNX_ARCH?=x64 -ONNX_OS?=linux +GORELEASER?= export BUILD_TYPE?= @@ -35,77 +33,33 @@ WHITE := $(shell tput -Txterm setaf 7) CYAN := $(shell tput -Txterm setaf 6) RESET := $(shell tput -Txterm sgr0) -UPX?= -# check if upx exists -ifeq (, $(shell which upx)) - UPX= -else - UPX=$(shell which upx) -endif - # Default Docker bridge IP E2E_BRIDGE_IP?=172.17.0.1 ifndef UNAME_S UNAME_S := $(shell uname -s) endif -# Detect if we are running on arm64 -ifneq (,$(findstring aarch64,$(shell uname -m))) - ONNX_ARCH=aarch64 -endif ifeq ($(OS),Darwin) - ONNX_OS=osx - ifneq (,$(findstring aarch64,$(shell uname -m))) - ONNX_ARCH=arm64 - else ifneq (,$(findstring arm64,$(shell uname -m))) - ONNX_ARCH=arm64 - else - ONNX_ARCH=x86_64 - endif - ifeq ($(OSX_SIGNING_IDENTITY),) OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/') endif endif -ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface -ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store -ALL_GRPC_BACKENDS+=backend-assets/grpc/silero-vad -ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC) -# Use filter-out to remove the specified backends -ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS)) +# check if goreleaser exists +ifeq (, $(shell which goreleaser)) + GORELEASER=curl -sfL https://goreleaser.com/static/run | bash -s -- +else + GORELEASER=$(shell which goreleaser) +endif -GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC) TEST_PATHS?=./api/... ./pkg/... ./core/... -# If empty, then we build all -ifeq ($(GRPC_BACKENDS),) - GRPC_BACKENDS=$(ALL_GRPC_BACKENDS) -endif - -ifeq ($(BUILD_API_ONLY),true) - GRPC_BACKENDS= -endif .PHONY: all test build vendor all: help -sources/onnxruntime: - mkdir -p sources/onnxruntime - curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz - cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz - cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./ - -backend-assets/lib/libonnxruntime.so.1: backend-assets/lib sources/onnxruntime - cp -rfv sources/onnxruntime/lib/* backend-assets/lib/ -ifeq ($(OS),Darwin) - mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib -else - mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1 -endif - ## GENERIC rebuild: ## Rebuilds the project $(GOCMD) clean -cache @@ -116,58 +70,33 @@ clean: ## Remove build related file rm -f prepare rm -rf $(BINARY_NAME) rm -rf release/ - rm -rf backend-assets/* - $(MAKE) -C backend/cpp/grpc clean $(MAKE) protogen-clean rmdir pkg/grpc/proto || true clean-tests: rm -rf test-models rm -rf test-dir - rm -rf core/http/backend-assets - -clean-dc: clean - cp -r /build/backend-assets /workspace/backend-assets ## Install Go tools install-go-tools: go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 - go install github.com/GeertJohan/go.rice/rice@latest ## Build: -build: backend-assets grpcs install-go-tools ## Build the project +build: protogen-go install-go-tools ## Build the project $(info ${GREEN}I local-ai build info:${RESET}) $(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET}) $(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET}) $(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET}) $(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET}) -ifneq ($(BACKEND_LIBS),) - $(MAKE) backend-assets/lib - cp -f $(BACKEND_LIBS) backend-assets/lib/ -endif rm -rf $(BINARY_NAME) || true CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./ - rice append --exec $(BINARY_NAME) -build-api: - BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=p2p $(MAKE) build - -backend-assets/lib: - mkdir -p backend-assets/lib +dev-dist: + $(GORELEASER) build --snapshot --clean dist: - GO_TAGS="p2p" $(MAKE) build - GO_TAGS="p2p" STATIC=true $(MAKE) build - mkdir -p release -# if BUILD_ID is empty, then we don't append it to the binary name -ifeq ($(BUILD_ID),) - cp $(BINARY_NAME) release/$(BINARY_NAME)-$(OS)-$(ARCH) - shasum -a 256 release/$(BINARY_NAME)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(OS)-$(ARCH).sha256 -else - cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH) - shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH).sha256 -endif + $(GORELEASER) build --clean osx-signed: build codesign --deep --force --sign "$(OSX_SIGNING_IDENTITY)" --entitlements "./Entitlements.plist" "./$(BINARY_NAME)" @@ -185,8 +114,7 @@ test-models/testmodel.ggml: wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav cp tests/models_fixtures/* test-models -prepare-test: grpcs - cp -rf backend-assets core/http +prepare-test: protogen-go cp tests/models_fixtures/* test-models ######################################################## @@ -194,7 +122,7 @@ prepare-test: grpcs ######################################################## ## Test targets -test: test-models/testmodel.ggml grpcs +test: test-models/testmodel.ggml protogen-go @echo 'Running tests' export GO_TAGS="debug" $(MAKE) prepare-test @@ -204,17 +132,26 @@ test: test-models/testmodel.ggml grpcs $(MAKE) test-tts $(MAKE) test-stablediffusion -backends/llama-cpp: docker-build-llama-cpp docker-save-llama-cpp build-api +backends/llama-cpp: docker-build-llama-cpp docker-save-llama-cpp build ./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)" -backends/piper: docker-build-piper docker-save-piper build-api +backends/piper: docker-build-piper docker-save-piper build ./local-ai backends install "ocifile://$(abspath ./backend-images/piper.tar)" -backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-stablediffusion-ggml build-api +backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-stablediffusion-ggml build ./local-ai backends install "ocifile://$(abspath ./backend-images/stablediffusion-ggml.tar)" -backends/whisper: docker-build-whisper docker-save-whisper build-api +backends/whisper: docker-build-whisper docker-save-whisper build ./local-ai backends install "ocifile://$(abspath ./backend-images/whisper.tar)" + +backends/silero-vad: docker-build-silero-vad docker-save-silero-vad build + ./local-ai backends install "ocifile://$(abspath ./backend-images/silero-vad.tar)" + +backends/local-store: docker-build-local-store docker-save-local-store build + ./local-ai backends install "ocifile://$(abspath ./backend-images/local-store.tar)" + +backends/huggingface: docker-build-huggingface docker-save-huggingface build + ./local-ai backends install "ocifile://$(abspath ./backend-images/huggingface.tar)" ######################################################## ## AIO tests @@ -243,7 +180,7 @@ prepare-e2e: mkdir -p $(TEST_DIR) cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin - docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 --build-arg FFMPEG=true -t localai-tests . + docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 -t localai-tests . run-e2e-image: ls -liah $(abspath ./tests/e2e-fixtures) @@ -275,9 +212,7 @@ test-stablediffusion: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \ $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) -test-stores: backend-assets/grpc/local-store - mkdir -p tests/integration/backend-assets/grpc - cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/ +test-stores: $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts $(TEST_FLAKES) -v -r tests/integration test-container: @@ -310,10 +245,42 @@ protogen: protogen-go protogen-python .PHONY: protogen-clean protogen-clean: protogen-go-clean protogen-python-clean +protoc: + @OS_NAME=$$(uname -s | tr '[:upper:]' '[:lower:]'); \ + ARCH_NAME=$$(uname -m); \ + if [ "$$OS_NAME" = "darwin" ]; then \ + if [ "$$ARCH_NAME" = "arm64" ]; then \ + FILE=protoc-31.1-osx-aarch_64.zip; \ + elif [ "$$ARCH_NAME" = "x86_64" ]; then \ + FILE=protoc-31.1-osx-x86_64.zip; \ + else \ + echo "Unsupported macOS architecture: $$ARCH_NAME"; exit 1; \ + fi; \ + elif [ "$$OS_NAME" = "linux" ]; then \ + if [ "$$ARCH_NAME" = "x86_64" ]; then \ + FILE=protoc-31.1-linux-x86_64.zip; \ + elif [ "$$ARCH_NAME" = "aarch64" ] || [ "$$ARCH_NAME" = "arm64" ]; then \ + FILE=protoc-31.1-linux-aarch_64.zip; \ + elif [ "$$ARCH_NAME" = "ppc64le" ]; then \ + FILE=protoc-31.1-linux-ppcle_64.zip; \ + elif [ "$$ARCH_NAME" = "s390x" ]; then \ + FILE=protoc-31.1-linux-s390_64.zip; \ + elif [ "$$ARCH_NAME" = "i386" ] || [ "$$ARCH_NAME" = "x86" ]; then \ + FILE=protoc-31.1-linux-x86_32.zip; \ + else \ + echo "Unsupported Linux architecture: $$ARCH_NAME"; exit 1; \ + fi; \ + else \ + echo "Unsupported OS: $$OS_NAME"; exit 1; \ + fi; \ + URL=https://github.com/protocolbuffers/protobuf/releases/download/v31.1/$$FILE; \ + curl -L -s $$URL -o protoc.zip && \ + unzip -j -d $(CURDIR) protoc.zip bin/protoc && rm protoc.zip + .PHONY: protogen-go -protogen-go: install-go-tools +protogen-go: protoc install-go-tools mkdir -p pkg/grpc/proto - protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \ + ./protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \ backend/backend.proto .PHONY: protogen-go-clean @@ -407,19 +374,6 @@ vllm-protogen: vllm-protogen-clean: $(MAKE) -C backend/python/vllm protogen-clean -## GRPC -# Note: it is duplicated in the Dockerfile -prepare-extra-conda-environments: protogen-python - $(MAKE) -C backend/python/bark - $(MAKE) -C backend/python/coqui - $(MAKE) -C backend/python/diffusers - $(MAKE) -C backend/python/chatterbox - $(MAKE) -C backend/python/faster-whisper - $(MAKE) -C backend/python/vllm - $(MAKE) -C backend/python/rerankers - $(MAKE) -C backend/python/transformers - $(MAKE) -C backend/python/kokoro - $(MAKE) -C backend/python/exllama2 prepare-test-extra: protogen-python $(MAKE) -C backend/python/transformers @@ -433,37 +387,6 @@ test-extra: prepare-test-extra $(MAKE) -C backend/python/chatterbox test $(MAKE) -C backend/python/vllm test -backend-assets: - mkdir -p backend-assets -ifeq ($(BUILD_API_ONLY),true) - touch backend-assets/keep -endif - - -backend-assets/grpc: - mkdir -p backend-assets/grpc - -backend-assets/grpc/huggingface: protogen-go backend-assets/grpc - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/ -ifneq ($(UPX),) - $(UPX) backend-assets/grpc/huggingface -endif - -backend-assets/grpc/silero-vad: protogen-go backend-assets/grpc backend-assets/lib/libonnxruntime.so.1 - CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero -ifneq ($(UPX),) - $(UPX) backend-assets/grpc/silero-vad -endif - -backend-assets/grpc/local-store: backend-assets/grpc protogen-go - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/ -ifneq ($(UPX),) - $(UPX) backend-assets/grpc/local-store -endif - -grpcs: protogen-go $(GRPC_BACKENDS) - DOCKER_IMAGE?=local-ai DOCKER_AIO_IMAGE?=local-ai-aio IMAGE_TYPE?=core @@ -506,7 +429,6 @@ docker-image-intel: --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg GO_TAGS="$(GO_TAGS)" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ - --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" \ --build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) . docker-image-intel-xpu: @@ -515,7 +437,6 @@ docker-image-intel-xpu: --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg GO_TAGS="$(GO_TAGS)" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ - --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" \ --build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) . ######################################################## @@ -534,6 +455,24 @@ docker-build-bark-cpp: docker-build-piper: docker build -t local-ai-backend:piper -f backend/Dockerfile.go --build-arg BACKEND=piper . +docker-build-local-store: + docker build -t local-ai-backend:local-store -f backend/Dockerfile.go --build-arg BACKEND=local-store . + +docker-build-huggingface: + docker build -t local-ai-backend:huggingface -f backend/Dockerfile.go --build-arg BACKEND=huggingface . + +docker-save-huggingface: backend-images + docker save local-ai-backend:huggingface -o backend-images/huggingface.tar + +docker-save-local-store: backend-images + docker save local-ai-backend:local-store -o backend-images/local-store.tar + +docker-build-silero-vad: + docker build -t local-ai-backend:silero-vad -f backend/Dockerfile.go --build-arg BACKEND=silero-vad . + +docker-save-silero-vad: backend-images + docker save local-ai-backend:silero-vad -o backend-images/silero-vad.tar + docker-save-piper: backend-images docker save local-ai-backend:piper -o backend-images/piper.tar diff --git a/assets.go b/assets.go deleted file mode 100644 index b3c813871..000000000 --- a/assets.go +++ /dev/null @@ -1,15 +0,0 @@ -package main - -import ( - rice "github.com/GeertJohan/go.rice" -) - -var backendAssets *rice.Box - -func init() { - var err error - backendAssets, err = rice.FindBox("backend-assets") - if err != nil { - panic(err) - } -} diff --git a/backend/cpp/llama-cpp/run.sh b/backend/cpp/llama-cpp/run.sh index 17a1d0df9..dde3161fa 100755 --- a/backend/cpp/llama-cpp/run.sh +++ b/backend/cpp/llama-cpp/run.sh @@ -44,7 +44,7 @@ fi if [ "$(uname)" == "Darwin" ]; then DYLD_FALLBACK_LIBRARY_PATH=$CURDIR/lib:$DYLD_FALLBACK_LIBRARY_PATH else - LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH fi # If there is a lib/ld.so, use it diff --git a/backend/go/huggingface/Makefile b/backend/go/huggingface/Makefile new file mode 100644 index 000000000..77b6c82ed --- /dev/null +++ b/backend/go/huggingface/Makefile @@ -0,0 +1,9 @@ +GOCMD=go + +huggingface: + CGO_ENABLED=0 $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o huggingface ./ + +package: + bash package.sh + +build: huggingface package \ No newline at end of file diff --git a/backend/go/llm/langchain/langchain.go b/backend/go/huggingface/langchain.go similarity index 100% rename from backend/go/llm/langchain/langchain.go rename to backend/go/huggingface/langchain.go diff --git a/backend/go/llm/langchain/main.go b/backend/go/huggingface/main.go similarity index 100% rename from backend/go/llm/langchain/main.go rename to backend/go/huggingface/main.go diff --git a/backend/go/huggingface/package.sh b/backend/go/huggingface/package.sh new file mode 100755 index 000000000..6218a65f6 --- /dev/null +++ b/backend/go/huggingface/package.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +# Script to copy the appropriate libraries based on architecture +# This script is used in the final stage of the Dockerfile + +set -e + +CURDIR=$(dirname "$(realpath $0)") + +mkdir -p $CURDIR/package +cp -avrf $CURDIR/huggingface $CURDIR/package/ +cp -rfv $CURDIR/run.sh $CURDIR/package/ \ No newline at end of file diff --git a/backend/go/huggingface/run.sh b/backend/go/huggingface/run.sh new file mode 100755 index 000000000..08972b5d2 --- /dev/null +++ b/backend/go/huggingface/run.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -ex + +CURDIR=$(dirname "$(realpath $0)") + +exec $CURDIR/huggingface "$@" \ No newline at end of file diff --git a/backend/go/local-store/Makefile b/backend/go/local-store/Makefile new file mode 100644 index 000000000..6cde84b00 --- /dev/null +++ b/backend/go/local-store/Makefile @@ -0,0 +1,9 @@ +GOCMD=go + +local-store: + CGO_ENABLED=0 $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o local-store ./ + +package: + bash package.sh + +build: local-store package \ No newline at end of file diff --git a/backend/go/stores/debug.go b/backend/go/local-store/debug.go similarity index 100% rename from backend/go/stores/debug.go rename to backend/go/local-store/debug.go diff --git a/backend/go/stores/main.go b/backend/go/local-store/main.go similarity index 100% rename from backend/go/stores/main.go rename to backend/go/local-store/main.go diff --git a/backend/go/local-store/package.sh b/backend/go/local-store/package.sh new file mode 100755 index 000000000..af94e0ee7 --- /dev/null +++ b/backend/go/local-store/package.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +# Script to copy the appropriate libraries based on architecture +# This script is used in the final stage of the Dockerfile + +set -e + +CURDIR=$(dirname "$(realpath $0)") + +mkdir -p $CURDIR/package +cp -avrf $CURDIR/local-store $CURDIR/package/ +cp -rfv $CURDIR/run.sh $CURDIR/package/ \ No newline at end of file diff --git a/backend/go/stores/production.go b/backend/go/local-store/production.go similarity index 100% rename from backend/go/stores/production.go rename to backend/go/local-store/production.go diff --git a/backend/go/local-store/run.sh b/backend/go/local-store/run.sh new file mode 100755 index 000000000..479f3b486 --- /dev/null +++ b/backend/go/local-store/run.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -ex + +CURDIR=$(dirname "$(realpath $0)") + +exec $CURDIR/local-store "$@" \ No newline at end of file diff --git a/backend/go/stores/store.go b/backend/go/local-store/store.go similarity index 99% rename from backend/go/stores/store.go rename to backend/go/local-store/store.go index c8788a9c7..1fa0b2ef6 100644 --- a/backend/go/stores/store.go +++ b/backend/go/local-store/store.go @@ -4,6 +4,7 @@ package main // It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) import ( "container/heap" + "errors" "fmt" "math" "slices" @@ -99,6 +100,9 @@ func sortIntoKeySlicese(keys []*pb.StoresKey) [][]float32 { } func (s *Store) Load(opts *pb.ModelOptions) error { + if opts.Model != "" { + return errors.New("not implemented") + } return nil } @@ -315,7 +319,7 @@ func isNormalized(k []float32) bool { for _, v := range k { v64 := float64(v) - sum += v64*v64 + sum += v64 * v64 } s := math.Sqrt(sum) diff --git a/backend/go/silero-vad/Makefile b/backend/go/silero-vad/Makefile new file mode 100644 index 000000000..93fd6b4c9 --- /dev/null +++ b/backend/go/silero-vad/Makefile @@ -0,0 +1,47 @@ + +CURRENT_DIR=$(abspath ./) +GOCMD=go + +ONNX_VERSION?=1.20.0 +ONNX_ARCH?=x64 +ONNX_OS?=linux + +# Detect if we are running on arm64 +ifneq (,$(findstring aarch64,$(shell uname -m))) + ONNX_ARCH=aarch64 +endif + +ifeq ($(OS),Darwin) + ONNX_OS=osx + ifneq (,$(findstring aarch64,$(shell uname -m))) + ONNX_ARCH=arm64 + else ifneq (,$(findstring arm64,$(shell uname -m))) + ONNX_ARCH=arm64 + else + ONNX_ARCH=x86_64 + endif +endif + +sources/onnxruntime: + mkdir -p sources/onnxruntime + curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz + cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz + cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./ + +backend-assets/lib/libonnxruntime.so.1: sources/onnxruntime + mkdir -p backend-assets/lib + cp -rfLv sources/onnxruntime/lib/* backend-assets/lib/ +ifeq ($(OS),Darwin) + mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib +else + mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1 +endif + +silero-vad: backend-assets/lib/libonnxruntime.so.1 + CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURRENT_DIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURRENT_DIR)/backend-assets/lib \ + $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o silero-vad ./ + +package: + bash package.sh + +build: silero-vad package \ No newline at end of file diff --git a/backend/go/vad/silero/main.go b/backend/go/silero-vad/main.go similarity index 100% rename from backend/go/vad/silero/main.go rename to backend/go/silero-vad/main.go diff --git a/backend/go/silero-vad/package.sh b/backend/go/silero-vad/package.sh new file mode 100755 index 000000000..1c524000c --- /dev/null +++ b/backend/go/silero-vad/package.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# Script to copy the appropriate libraries based on architecture +# This script is used in the final stage of the Dockerfile + +set -e + +CURDIR=$(dirname "$(realpath $0)") + +# Create lib directory +mkdir -p $CURDIR/package/lib + +cp -avrf $CURDIR/silero-vad $CURDIR/package/ +cp -avrf $CURDIR/run.sh $CURDIR/package/ +cp -rfLv $CURDIR/backend-assets/lib/* $CURDIR/package/lib/ + +# Detect architecture and copy appropriate libraries +if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then + # x86_64 architecture + echo "Detected x86_64 architecture, copying x86_64 libraries..." + cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so + cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6 + cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1 + cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6 + cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6 + cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1 + cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1 + cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6 + cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2 + cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1 + cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0 +elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then + # ARM64 architecture + echo "Detected ARM64 architecture, copying ARM64 libraries..." + cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so + cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6 + cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1 + cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6 + cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6 + cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1 + cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1 + cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6 + cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2 + cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1 + cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0 +else + echo "Error: Could not detect architecture" + exit 1 +fi + +echo "Packaging completed successfully" +ls -liah $CURDIR/package/ +ls -liah $CURDIR/package/lib/ \ No newline at end of file diff --git a/backend/go/silero-vad/run.sh b/backend/go/silero-vad/run.sh new file mode 100755 index 000000000..72658908a --- /dev/null +++ b/backend/go/silero-vad/run.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -ex + +CURDIR=$(dirname "$(realpath $0)") + +export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH + +# If there is a lib/ld.so, use it +if [ -f $CURDIR/lib/ld.so ]; then + echo "Using lib/ld.so" + exec $CURDIR/lib/ld.so $CURDIR/silero-vad "$@" +fi + +exec $CURDIR/silero-vad "$@" \ No newline at end of file diff --git a/backend/go/vad/silero/vad.go b/backend/go/silero-vad/vad.go similarity index 100% rename from backend/go/vad/silero/vad.go rename to backend/go/silero-vad/vad.go diff --git a/backend/index.yaml b/backend/index.yaml index 6451bfd77..608854605 100644 --- a/backend/index.yaml +++ b/backend/index.yaml @@ -68,7 +68,7 @@ default: "cpu-stablediffusion-ggml" nvidia: "cuda12-stablediffusion-ggml" intel: "intel-sycl-f16-stablediffusion-ggml" - #amd: "rocm-stablediffusion-ggml" + # amd: "rocm-stablediffusion-ggml" vulkan: "vulkan-stablediffusion-ggml" nvidia-l4t: "nvidia-l4t-arm64-stablediffusion-ggml" # metal: "metal-stablediffusion-ggml" @@ -285,6 +285,54 @@ tags: - text-to-speech - TTS +- &silero-vad + name: "silero-vad" + uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-silero-vad" + icon: https://user-images.githubusercontent.com/12515440/89997349-b3523080-dc94-11ea-9906-ca2e8bc50535.png + urls: + - https://github.com/snakers4/silero-vad + description: | + Silero VAD: pre-trained enterprise-grade Voice Activity Detector. + Silero VAD is a voice activity detection model that can be used to detect whether a given audio contains speech or not. + tags: + - voice-activity-detection + - VAD + - silero-vad + - CPU +- &local-store + name: "local-store" + uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-local-store" + urls: + - https://github.com/mudler/LocalAI + description: | + Local Store is a local-first, self-hosted, and open-source vector database. + tags: + - vector-database + - local-first + - open-source + - CPU + license: MIT +- &huggingface + name: "huggingface" + uri: "quay.io/go-skynet/local-ai-backends:latest-huggingface" + icon: https://huggingface.co/front/assets/huggingface_logo-noborder.svg + urls: + - https://huggingface.co/docs/hub/en/api + description: | + HuggingFace is a backend which uses the huggingface API to run models. + tags: + - LLM + - huggingface + license: MIT +- !!merge <<: *huggingface + name: "huggingface-development" + uri: "quay.io/go-skynet/local-ai-backends:master-huggingface" +- !!merge <<: *local-store + name: "local-store-development" + uri: "quay.io/go-skynet/local-ai-backends:master-cpu-local-store" +- !!merge <<: *silero-vad + name: "silero-vad-development" + uri: "quay.io/go-skynet/local-ai-backends:master-cpu-silero-vad" - !!merge <<: *piper name: "piper-development" uri: "quay.io/go-skynet/local-ai-backends:master-piper" diff --git a/core/application/startup.go b/core/application/startup.go index 1fdd1ad50..59003799b 100644 --- a/core/application/startup.go +++ b/core/application/startup.go @@ -9,9 +9,7 @@ import ( "github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/internal" - "github.com/mudler/LocalAI/pkg/assets" - "github.com/mudler/LocalAI/pkg/library" "github.com/mudler/LocalAI/pkg/model" pkgStartup "github.com/mudler/LocalAI/pkg/startup" "github.com/mudler/LocalAI/pkg/xsysinfo" @@ -103,23 +101,6 @@ func New(opts ...config.AppOption) (*Application, error) { } } - if options.AssetsDestination != "" { - // Extract files from the embedded FS - err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination) - log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination) - if err != nil { - log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err) - } - } - - if options.LibPath != "" { - // If there is a lib directory, set LD_LIBRARY_PATH to include it - err := library.LoadExternal(options.LibPath) - if err != nil { - log.Error().Err(err).Str("LibPath", options.LibPath).Msg("Error while loading external libraries") - } - } - // turn off any process that was started by GRPC if the context is canceled go func() { <-options.Context.Done() diff --git a/core/backend/options.go b/core/backend/options.go index 7f4623c2d..cfe7b35e4 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -20,7 +20,6 @@ func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts ... defOpts := []model.Option{ model.WithBackendString(c.Backend), model.WithModel(c.Model), - model.WithAssetDir(so.AssetsDestination), model.WithContext(so.Context), model.WithModelID(name), } diff --git a/core/backend/stores.go b/core/backend/stores.go index f5ee9166d..78257180e 100644 --- a/core/backend/stores.go +++ b/core/backend/stores.go @@ -7,14 +7,12 @@ import ( "github.com/mudler/LocalAI/pkg/model" ) -func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string) (grpc.Backend, error) { - if storeName == "" { - storeName = "default" +func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string, backend string) (grpc.Backend, error) { + if backend == "" { + backend = model.LocalStoreBackend } - sc := []model.Option{ - model.WithBackendString(model.LocalStoreBackend), - model.WithAssetDir(appConfig.AssetsDestination), + model.WithBackendString(backend), model.WithModel(storeName), } diff --git a/core/cli/context/context.go b/core/cli/context/context.go index 34242e971..061d27503 100644 --- a/core/cli/context/context.go +++ b/core/cli/context/context.go @@ -1,13 +1,6 @@ package cliContext -import ( - rice "github.com/GeertJohan/go.rice" -) - type Context struct { Debug bool `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"` LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug,trace" help:"Set the level of logs to output [${enum}]"` - - // This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI - BackendAssets *rice.Box `kong:"-"` } diff --git a/core/cli/run.go b/core/cli/run.go index 481d89448..47e765dd8 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -23,7 +23,6 @@ type RunCMD struct { ExternalBackends []string `env:"LOCALAI_EXTERNAL_BACKENDS,EXTERNAL_BACKENDS" help:"A list of external backends to load from gallery on boot" group:"backends"` BackendsPath string `env:"LOCALAI_BACKENDS_PATH,BACKENDS_PATH" type:"path" default:"${basepath}/backends" help:"Path containing backends used for inferencing" group:"backends"` ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` - BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"` GeneratedContentPath string `env:"LOCALAI_GENERATED_CONTENT_PATH,GENERATED_CONTENT_PATH" type:"path" default:"/tmp/generated/content" help:"Location for generated content (e.g. images, audio, videos)" group:"storage"` UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"` ConfigPath string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"` @@ -46,7 +45,6 @@ type RunCMD struct { Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"` CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"` CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"` - LibraryPath string `env:"LOCALAI_LIBRARY_PATH,LIBRARY_PATH" help:"Path to the library directory (for e.g. external libraries used by backends)" default:"/usr/share/local-ai/libs" group:"backends"` CSRF bool `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"` UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"` APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"` @@ -99,10 +97,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { config.WithCors(r.CORS), config.WithCorsAllowOrigins(r.CORSAllowOrigins), config.WithCsrf(r.CSRF), - config.WithLibPath(r.LibraryPath), config.WithThreads(r.Threads), - config.WithBackendAssets(ctx.BackendAssets), - config.WithBackendAssetsOutput(r.BackendAssetsPath), config.WithUploadLimitMB(r.UploadLimit), config.WithApiKeys(r.APIKeys), config.WithModelsURL(append(r.Models, r.ModelArgs...)...), diff --git a/core/cli/soundgeneration.go b/core/cli/soundgeneration.go index b7c1d0fe6..1193b329f 100644 --- a/core/cli/soundgeneration.go +++ b/core/cli/soundgeneration.go @@ -27,7 +27,6 @@ type SoundGenerationCMD struct { DoSample bool `short:"s" default:"true" help:"Enables sampling from the model. Better quality at the cost of speed. Defaults to enabled."` OutputFile string `short:"o" type:"path" help:"The path to write the output wav file"` ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` - BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"` ExternalGRPCBackends []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"` } @@ -51,11 +50,10 @@ func parseToInt32Ptr(input string) *int32 { func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error { outputFile := t.OutputFile - outputDir := t.BackendAssetsPath + outputDir := os.TempDir() if outputFile != "" { outputDir = filepath.Dir(outputFile) } - text := strings.Join(t.Text, " ") externalBackends := make(map[string]string) @@ -71,7 +69,6 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error { ModelPath: t.ModelsPath, Context: context.Background(), GeneratedContentDir: outputDir, - AssetsDestination: t.BackendAssetsPath, ExternalGRPCBackends: externalBackends, } ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend) diff --git a/core/cli/transcript.go b/core/cli/transcript.go index 67b5ed1da..3e5ee6d44 100644 --- a/core/cli/transcript.go +++ b/core/cli/transcript.go @@ -15,20 +15,18 @@ import ( type TranscriptCMD struct { Filename string `arg:""` - Backend string `short:"b" default:"whisper" help:"Backend to run the transcription model"` - Model string `short:"m" required:"" help:"Model name to run the TTS"` - Language string `short:"l" help:"Language of the audio file"` - Translate bool `short:"c" help:"Translate the transcription to english"` - Threads int `short:"t" default:"1" help:"Number of threads used for parallel computation"` - ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` - BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"` + Backend string `short:"b" default:"whisper" help:"Backend to run the transcription model"` + Model string `short:"m" required:"" help:"Model name to run the TTS"` + Language string `short:"l" help:"Language of the audio file"` + Translate bool `short:"c" help:"Translate the transcription to english"` + Threads int `short:"t" default:"1" help:"Number of threads used for parallel computation"` + ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` } func (t *TranscriptCMD) Run(ctx *cliContext.Context) error { opts := &config.ApplicationConfig{ - ModelPath: t.ModelsPath, - Context: context.Background(), - AssetsDestination: t.BackendAssetsPath, + ModelPath: t.ModelsPath, + Context: context.Background(), } cl := config.NewBackendConfigLoader(t.ModelsPath) diff --git a/core/cli/tts.go b/core/cli/tts.go index 074487e62..552fdf018 100644 --- a/core/cli/tts.go +++ b/core/cli/tts.go @@ -17,18 +17,17 @@ import ( type TTSCMD struct { Text []string `arg:""` - Backend string `short:"b" default:"piper" help:"Backend to run the TTS model"` - Model string `short:"m" required:"" help:"Model name to run the TTS"` - Voice string `short:"v" help:"Voice name to run the TTS"` - Language string `short:"l" help:"Language to use with the TTS"` - OutputFile string `short:"o" type:"path" help:"The path to write the output wav file"` - ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` - BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"` + Backend string `short:"b" default:"piper" help:"Backend to run the TTS model"` + Model string `short:"m" required:"" help:"Model name to run the TTS"` + Voice string `short:"v" help:"Voice name to run the TTS"` + Language string `short:"l" help:"Language to use with the TTS"` + OutputFile string `short:"o" type:"path" help:"The path to write the output wav file"` + ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` } func (t *TTSCMD) Run(ctx *cliContext.Context) error { outputFile := t.OutputFile - outputDir := t.BackendAssetsPath + outputDir := os.TempDir() if outputFile != "" { outputDir = filepath.Dir(outputFile) } @@ -39,7 +38,6 @@ func (t *TTSCMD) Run(ctx *cliContext.Context) error { ModelPath: t.ModelsPath, Context: context.Background(), GeneratedContentDir: outputDir, - AssetsDestination: t.BackendAssetsPath, } ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend) diff --git a/core/cli/worker/worker.go b/core/cli/worker/worker.go index a5d065773..33813db06 100644 --- a/core/cli/worker/worker.go +++ b/core/cli/worker/worker.go @@ -1,7 +1,7 @@ package worker type WorkerFlags struct { - BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"` + BackendsPath string `env:"LOCALAI_BACKENDS_PATH,BACKENDS_PATH" type:"path" default:"${basepath}/backends" help:"Path containing backends used for inferencing" group:"backends"` ExtraLLamaCPPArgs string `name:"llama-cpp-args" env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"` } diff --git a/core/cli/worker/worker_llamacpp.go b/core/cli/worker/worker_llamacpp.go index 3ea3cd426..d9fe8e4db 100644 --- a/core/cli/worker/worker_llamacpp.go +++ b/core/cli/worker/worker_llamacpp.go @@ -9,8 +9,6 @@ import ( cliContext "github.com/mudler/LocalAI/core/cli/context" "github.com/mudler/LocalAI/core/gallery" - "github.com/mudler/LocalAI/pkg/assets" - "github.com/mudler/LocalAI/pkg/library" "github.com/rs/zerolog/log" ) @@ -47,24 +45,17 @@ func findLLamaCPPBackend(backendSystemPath string) (string, error) { } func (r *LLamaCPP) Run(ctx *cliContext.Context) error { - // Extract files from the embedded FS - err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath) - log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath) - if err != nil { - log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err) - } if len(os.Args) < 4 { return fmt.Errorf("usage: local-ai worker llama-cpp-rpc -- ") } - grpcProcess, err := findLLamaCPPBackend(r.BackendAssetsPath) + grpcProcess, err := findLLamaCPPBackend(r.BackendsPath) if err != nil { return err } args := strings.Split(r.ExtraLLamaCPPArgs, " ") - args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess) args = append([]string{grpcProcess}, args...) return syscall.Exec( diff --git a/core/cli/worker/worker_nop2p.go b/core/cli/worker/worker_nop2p.go deleted file mode 100644 index fc3f095d7..000000000 --- a/core/cli/worker/worker_nop2p.go +++ /dev/null @@ -1,16 +0,0 @@ -//go:build !p2p -// +build !p2p - -package worker - -import ( - "fmt" - - cliContext "github.com/mudler/LocalAI/core/cli/context" -) - -type P2P struct{} - -func (r *P2P) Run(ctx *cliContext.Context) error { - return fmt.Errorf("p2p mode is not enabled in this build") -} diff --git a/core/cli/worker/worker_p2p.go b/core/cli/worker/worker_p2p.go index 4fb1b5825..1533de4e5 100644 --- a/core/cli/worker/worker_p2p.go +++ b/core/cli/worker/worker_p2p.go @@ -1,6 +1,3 @@ -//go:build p2p -// +build p2p - package worker import ( @@ -13,8 +10,6 @@ import ( cliContext "github.com/mudler/LocalAI/core/cli/context" "github.com/mudler/LocalAI/core/p2p" - "github.com/mudler/LocalAI/pkg/assets" - "github.com/mudler/LocalAI/pkg/library" "github.com/phayes/freeport" "github.com/rs/zerolog/log" ) @@ -29,12 +24,6 @@ type P2P struct { } func (r *P2P) Run(ctx *cliContext.Context) error { - // Extract files from the embedded FS - err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath) - log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath) - if err != nil { - log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err) - } // Check if the token is set // as we always need it. @@ -71,7 +60,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error { for { log.Info().Msgf("Starting llama-cpp-rpc-server on '%s:%d'", address, port) - grpcProcess, err := findLLamaCPPBackend(r.BackendAssetsPath) + grpcProcess, err := findLLamaCPPBackend(r.BackendsPath) if err != nil { log.Error().Err(err).Msg("Failed to find llama-cpp-rpc-server") return @@ -85,8 +74,6 @@ func (r *P2P) Run(ctx *cliContext.Context) error { args := append([]string{"--host", address, "--port", fmt.Sprint(port)}, extraArgs...) log.Debug().Msgf("Starting llama-cpp-rpc-server on '%s:%d' with args: %+v (%d)", address, port, args, len(args)) - args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess) - cmd := exec.Command( grpcProcess, args..., ) diff --git a/core/config/application_config.go b/core/config/application_config.go index 662bddc6a..4f5f878d1 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -6,7 +6,6 @@ import ( "regexp" "time" - rice "github.com/GeertJohan/go.rice" "github.com/mudler/LocalAI/pkg/xsysinfo" "github.com/rs/zerolog/log" ) @@ -17,7 +16,6 @@ type ApplicationConfig struct { ModelPath string BackendsPath string ExternalBackends []string - LibPath string UploadLimitMB, Threads, ContextSize int F16 bool Debug bool @@ -50,9 +48,6 @@ type ApplicationConfig struct { Galleries []Gallery BackendGalleries []Gallery - BackendAssets *rice.Box - AssetsDestination string - ExternalGRPCBackends map[string]string AutoloadGalleries, AutoloadBackendGalleries bool @@ -140,12 +135,6 @@ func WithP2PToken(s string) AppOption { } } -func WithLibPath(path string) AppOption { - return func(o *ApplicationConfig) { - o.LibPath = path - } -} - var EnableWatchDog = func(o *ApplicationConfig) { o.WatchDog = true } @@ -211,18 +200,6 @@ func WithCorsAllowOrigins(b string) AppOption { } } -func WithBackendAssetsOutput(out string) AppOption { - return func(o *ApplicationConfig) { - o.AssetsDestination = out - } -} - -func WithBackendAssets(f *rice.Box) AppOption { - return func(o *ApplicationConfig) { - o.BackendAssets = f - } -} - func WithStringGalleries(galls string) AppOption { return func(o *ApplicationConfig) { if galls == "" { diff --git a/core/gallery/models.go b/core/gallery/models.go index a1c8a4b75..30ec2908e 100644 --- a/core/gallery/models.go +++ b/core/gallery/models.go @@ -126,8 +126,9 @@ func InstallModelFromGallery( if err != nil { return err } - + log.Debug().Msgf("Installed model %q", installedModel.Name) if automaticallyInstallBackend && installedModel.Backend != "" { + log.Debug().Msgf("Installing backend %q", installedModel.Backend) systemState, err := system.GetSystemState() if err != nil { return err diff --git a/core/http/app_test.go b/core/http/app_test.go index b4eadbe73..03aaf8a4c 100644 --- a/core/http/app_test.go +++ b/core/http/app_test.go @@ -23,7 +23,6 @@ import ( . "github.com/onsi/gomega" "gopkg.in/yaml.v3" - rice "github.com/GeertJohan/go.rice" openaigo "github.com/otiai10/openaigo" "github.com/sashabaranov/go-openai" "github.com/sashabaranov/go-openai/jsonschema" @@ -264,16 +263,6 @@ func getRequest(url string, header http.Header) (error, int, []byte) { const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml` -var backendAssets *rice.Box - -func init() { - var err error - backendAssets, err = rice.FindBox("backend-assets") - if err != nil { - panic(err) - } -} - var _ = Describe("API test", func() { var app *fiber.App @@ -300,9 +289,6 @@ var _ = Describe("API test", func() { modelDir = filepath.Join(tmpdir, "models") err = os.Mkdir(modelDir, 0750) Expect(err).ToNot(HaveOccurred()) - backendAssetsDir := filepath.Join(tmpdir, "backend-assets") - err = os.Mkdir(backendAssetsDir, 0750) - Expect(err).ToNot(HaveOccurred()) c, cancel = context.WithCancel(context.Background()) @@ -341,8 +327,7 @@ var _ = Describe("API test", func() { config.WithModelPath(modelDir), config.WithBackendsPath(backendPath), config.WithApiKeys([]string{apiKey}), - config.WithBackendAssets(backendAssets), - config.WithBackendAssetsOutput(backendAssetsDir))...) + )...) Expect(err).ToNot(HaveOccurred()) app, err = API(application) @@ -545,8 +530,7 @@ var _ = Describe("API test", func() { config.WithBackendsPath(backendPath), config.WithGalleries(galleries), config.WithModelPath(modelDir), - config.WithBackendAssets(backendAssets), - config.WithBackendAssetsOutput(tmpdir))..., + )..., ) Expect(err).ToNot(HaveOccurred()) app, err = API(application) @@ -803,6 +787,10 @@ var _ = Describe("API test", func() { }) It("shows the external backend", func() { + // Only run on linux + if runtime.GOOS != "linux" { + Skip("test supported only on linux") + } // do an http request to the /system endpoint resp, err := http.Get("http://127.0.0.1:9090/system") Expect(err).ToNot(HaveOccurred()) @@ -888,6 +876,13 @@ var _ = Describe("API test", func() { // See tests/integration/stores_test Context("Stores", Label("stores"), func() { + BeforeEach(func() { + // Only run on linux + if runtime.GOOS != "linux" { + Skip("test supported only on linux") + } + }) + It("sets, gets, finds and deletes entries", func() { ks := [][]float32{ {0.1, 0.2, 0.3}, diff --git a/core/http/endpoints/localai/stores.go b/core/http/endpoints/localai/stores.go index dd8df8b18..303d943f6 100644 --- a/core/http/endpoints/localai/stores.go +++ b/core/http/endpoints/localai/stores.go @@ -17,7 +17,7 @@ func StoresSetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfi return err } - sb, err := backend.StoreBackend(sl, appConfig, input.Store) + sb, err := backend.StoreBackend(sl, appConfig, input.Store, input.Backend) if err != nil { return err } @@ -45,7 +45,7 @@ func StoresDeleteEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationCo return err } - sb, err := backend.StoreBackend(sl, appConfig, input.Store) + sb, err := backend.StoreBackend(sl, appConfig, input.Store, input.Backend) if err != nil { return err } @@ -67,7 +67,7 @@ func StoresGetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfi return err } - sb, err := backend.StoreBackend(sl, appConfig, input.Store) + sb, err := backend.StoreBackend(sl, appConfig, input.Store, input.Backend) if err != nil { return err } @@ -99,7 +99,7 @@ func StoresFindEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConf return err } - sb, err := backend.StoreBackend(sl, appConfig, input.Store) + sb, err := backend.StoreBackend(sl, appConfig, input.Store, input.Backend) if err != nil { return err } diff --git a/core/http/endpoints/localai/system.go b/core/http/endpoints/localai/system.go index ea01a6702..64b1d111b 100644 --- a/core/http/endpoints/localai/system.go +++ b/core/http/endpoints/localai/system.go @@ -13,10 +13,7 @@ import ( // @Router /system [get] func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(*fiber.Ctx) error { return func(c *fiber.Ctx) error { - availableBackends, err := ml.ListAvailableBackends(appConfig.AssetsDestination) - if err != nil { - return err - } + availableBackends := []string{} loadedModels := ml.ListModels() for b := range appConfig.ExternalGRPCBackends { availableBackends = append(availableBackends, b) diff --git a/core/http/endpoints/localai/welcome.go b/core/http/endpoints/localai/welcome.go index 07bc92c63..ba291536e 100644 --- a/core/http/endpoints/localai/welcome.go +++ b/core/http/endpoints/localai/welcome.go @@ -5,7 +5,6 @@ import ( "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/http/utils" - "github.com/mudler/LocalAI/core/p2p" "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/internal" "github.com/mudler/LocalAI/pkg/model" @@ -37,7 +36,6 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig, "Models": modelsWithoutConfig, "ModelsConfig": backendConfigs, "GalleryConfig": galleryConfigs, - "IsP2PEnabled": p2p.IsP2PEnabled(), "ApplicationConfig": appConfig, "ProcessingModels": processingModels, "TaskTypes": taskTypes, diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go index e0217be3e..39f22ca61 100644 --- a/core/http/routes/localai.go +++ b/core/http/routes/localai.go @@ -6,7 +6,6 @@ import ( "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/http/endpoints/localai" "github.com/mudler/LocalAI/core/http/middleware" - "github.com/mudler/LocalAI/core/p2p" "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/internal" @@ -80,10 +79,8 @@ func RegisterLocalAIRoutes(router *fiber.App, router.Post("/v1/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService)) // p2p - if p2p.IsP2PEnabled() { - router.Get("/api/p2p", localai.ShowP2PNodes(appConfig)) - router.Get("/api/p2p/token", localai.ShowP2PToken(appConfig)) - } + router.Get("/api/p2p", localai.ShowP2PNodes(appConfig)) + router.Get("/api/p2p/token", localai.ShowP2PToken(appConfig)) router.Get("/version", func(c *fiber.Ctx) error { return c.JSON(struct { diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go index 6a59ad1ab..11b2ab485 100644 --- a/core/http/routes/ui.go +++ b/core/http/routes/ui.go @@ -25,38 +25,39 @@ func RegisterUIRoutes(app *fiber.App, app.Get("/", localai.WelcomeEndpoint(appConfig, cl, ml, processingOps)) - if p2p.IsP2PEnabled() { - app.Get("/p2p", func(c *fiber.Ctx) error { - summary := fiber.Map{ - "Title": "LocalAI - P2P dashboard", - "BaseURL": utils.BaseURL(c), - "Version": internal.PrintableVersion(), - //"Nodes": p2p.GetAvailableNodes(""), - //"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID), - "IsP2PEnabled": p2p.IsP2PEnabled(), - "P2PToken": appConfig.P2PToken, - "NetworkID": appConfig.P2PNetworkID, - } + // P2P + app.Get("/p2p", func(c *fiber.Ctx) error { + summary := fiber.Map{ + "Title": "LocalAI - P2P dashboard", + "BaseURL": utils.BaseURL(c), + "Version": internal.PrintableVersion(), + //"Nodes": p2p.GetAvailableNodes(""), + //"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID), - // Render index - return c.Render("views/p2p", summary) - }) + "P2PToken": appConfig.P2PToken, + "NetworkID": appConfig.P2PNetworkID, + } - /* show nodes live! */ - app.Get("/p2p/ui/workers", func(c *fiber.Ctx) error { - return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID)))) - }) - app.Get("/p2p/ui/workers-federation", func(c *fiber.Ctx) error { - return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID)))) - }) + // Render index + return c.Render("views/p2p", summary) + }) - app.Get("/p2p/ui/workers-stats", func(c *fiber.Ctx) error { - return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID)))) - }) - app.Get("/p2p/ui/workers-federation-stats", func(c *fiber.Ctx) error { - return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID)))) - }) - } + /* show nodes live! */ + app.Get("/p2p/ui/workers", func(c *fiber.Ctx) error { + return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID)))) + }) + app.Get("/p2p/ui/workers-federation", func(c *fiber.Ctx) error { + return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID)))) + }) + + app.Get("/p2p/ui/workers-stats", func(c *fiber.Ctx) error { + return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID)))) + }) + app.Get("/p2p/ui/workers-federation-stats", func(c *fiber.Ctx) error { + return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID)))) + }) + + // End P2P if !appConfig.DisableGalleryEndpoint { registerGalleryRoutes(app, cl, appConfig, galleryService, processingOps) @@ -76,8 +77,8 @@ func RegisterUIRoutes(app *fiber.App, "BaseURL": utils.BaseURL(c), "ModelsConfig": backendConfigs, "Model": backendConfigs[0], - "IsP2PEnabled": p2p.IsP2PEnabled(), - "Version": internal.PrintableVersion(), + + "Version": internal.PrintableVersion(), } // Render index @@ -121,7 +122,6 @@ func RegisterUIRoutes(app *fiber.App, "ModelsConfig": backendConfigs, "Model": modelThatCanBeUsed, "Version": internal.PrintableVersion(), - "IsP2PEnabled": p2p.IsP2PEnabled(), } // Render index @@ -151,7 +151,6 @@ func RegisterUIRoutes(app *fiber.App, "ModelsWithoutConfig": modelsWithoutConfig, "Model": c.Params("model"), "Version": internal.PrintableVersion(), - "IsP2PEnabled": p2p.IsP2PEnabled(), } // Render index @@ -169,7 +168,6 @@ func RegisterUIRoutes(app *fiber.App, "ModelsWithoutConfig": modelsWithoutConfig, "Model": c.Params("model"), "Version": internal.PrintableVersion(), - "IsP2PEnabled": p2p.IsP2PEnabled(), } // Render index @@ -203,7 +201,6 @@ func RegisterUIRoutes(app *fiber.App, "ModelsWithoutConfig": modelsWithoutConfig, "Model": modelThatCanBeUsed, "Version": internal.PrintableVersion(), - "IsP2PEnabled": p2p.IsP2PEnabled(), } // Render index @@ -221,7 +218,6 @@ func RegisterUIRoutes(app *fiber.App, "ModelsWithoutConfig": modelsWithoutConfig, "Model": c.Params("model"), "Version": internal.PrintableVersion(), - "IsP2PEnabled": p2p.IsP2PEnabled(), } // Render index @@ -253,7 +249,6 @@ func RegisterUIRoutes(app *fiber.App, "ModelsConfig": backendConfigs, "ModelsWithoutConfig": modelsWithoutConfig, "Model": modelThatCanBeUsed, - "IsP2PEnabled": p2p.IsP2PEnabled(), "Version": internal.PrintableVersion(), } diff --git a/core/http/routes/ui_backend_gallery.go b/core/http/routes/ui_backend_gallery.go index 6b6ba40e3..d16cdb026 100644 --- a/core/http/routes/ui_backend_gallery.go +++ b/core/http/routes/ui_backend_gallery.go @@ -15,7 +15,6 @@ import ( "github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/http/elements" "github.com/mudler/LocalAI/core/http/utils" - "github.com/mudler/LocalAI/core/p2p" "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/internal" "github.com/rs/zerolog/log" @@ -71,7 +70,6 @@ func registerBackendGalleryRoutes(app *fiber.App, appConfig *config.ApplicationC "ProcessingBackends": processingBackendsData, "AvailableBackends": len(backends), "TaskTypes": taskTypes, - "IsP2PEnabled": p2p.IsP2PEnabled(), } if page == "" { diff --git a/core/http/routes/ui_gallery.go b/core/http/routes/ui_gallery.go index d9b0c43d6..6a0e1d7dd 100644 --- a/core/http/routes/ui_gallery.go +++ b/core/http/routes/ui_gallery.go @@ -15,7 +15,6 @@ import ( "github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/http/elements" "github.com/mudler/LocalAI/core/http/utils" - "github.com/mudler/LocalAI/core/p2p" "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/internal" "github.com/rs/zerolog/log" @@ -70,9 +69,7 @@ func registerGalleryRoutes(app *fiber.App, cl *config.BackendConfigLoader, appCo "AllTags": tags, "ProcessingModels": processingModelsData, "AvailableModels": len(models), - "IsP2PEnabled": p2p.IsP2PEnabled(), - - "TaskTypes": taskTypes, + "TaskTypes": taskTypes, // "ApplicationConfig": appConfig, } diff --git a/core/http/views/explorer.html b/core/http/views/explorer.html index cfcfbe319..b3339e876 100644 --- a/core/http/views/explorer.html +++ b/core/http/views/explorer.html @@ -268,7 +268,7 @@ Command to connect (click to copy):

- docker run -d --restart=always -e ADDRESS=":80" -e LOCALAI_P2P_NETWORK_ID= -e LOCALAI_P2P_LOGLEVEL=debug --name local-ai -e TOKEN="" --net host -ti localai/localai:master-ffmpeg-core federated --debug + docker run -d --restart=always -e ADDRESS=":80" -e LOCALAI_P2P_NETWORK_ID= -e LOCALAI_P2P_LOGLEVEL=debug --name local-ai -e TOKEN="" --net host -ti localai/localai:master federated --debug or via CLI: diff --git a/core/http/views/p2p.html b/core/http/views/p2p.html index 6e9024851..bd6324bf6 100644 --- a/core/http/views/p2p.html +++ b/core/http/views/p2p.html @@ -49,11 +49,11 @@ - {{ if and .IsP2PEnabled (eq .P2PToken "") }} + {{ if eq .P2PToken "" }}
-

Warning: P2P mode is disabled or no token was specified

+

Warning: P2P token was not specified

You have to enable P2P mode by starting LocalAI with --p2p. Please restart the server with --p2p to generate a new token automatically that can be used to discover other nodes. If you already have a token, specify it with export TOKEN=".." diff --git a/core/http/views/partials/navbar.html b/core/http/views/partials/navbar.html index 229d7bf29..4ecfab872 100644 --- a/core/http/views/partials/navbar.html +++ b/core/http/views/partials/navbar.html @@ -40,11 +40,9 @@ Talk - {{ if .IsP2PEnabled }} Swarm - {{ end }} API @@ -75,11 +73,9 @@ Talk - {{ if .IsP2PEnabled }} Swarm - {{ end }} API diff --git a/core/p2p/federated_server.go b/core/p2p/federated_server.go index d80af082c..e382576ba 100644 --- a/core/p2p/federated_server.go +++ b/core/p2p/federated_server.go @@ -1,6 +1,3 @@ -//go:build p2p -// +build p2p - package p2p import ( diff --git a/core/p2p/p2p.go b/core/p2p/p2p.go index b4b5886b5..b5cd1f831 100644 --- a/core/p2p/p2p.go +++ b/core/p2p/p2p.go @@ -1,6 +1,3 @@ -//go:build p2p -// +build p2p - package p2p import ( @@ -65,10 +62,6 @@ func GenerateToken(DHTInterval, OTPInterval int) string { return generateNewConnectionData(DHTInterval, OTPInterval).Base64() } -func IsP2PEnabled() bool { - return true -} - func nodeID(s string) string { hostname, _ := os.Hostname() return fmt.Sprintf("%s-%s", hostname, s) diff --git a/core/p2p/p2p_disabled.go b/core/p2p/p2p_disabled.go deleted file mode 100644 index c5ba98fda..000000000 --- a/core/p2p/p2p_disabled.go +++ /dev/null @@ -1,35 +0,0 @@ -//go:build !p2p -// +build !p2p - -package p2p - -import ( - "context" - "fmt" - - "github.com/mudler/edgevpn/pkg/node" -) - -func GenerateToken(DHTInterval, OTPInterval int) string { - return "not implemented" -} - -func (f *FederatedServer) Start(ctx context.Context) error { - return fmt.Errorf("not implemented") -} - -func ServiceDiscoverer(ctx context.Context, node *node.Node, token, servicesID string, fn func(string, NodeData), allocate bool) error { - return fmt.Errorf("not implemented") -} - -func ExposeService(ctx context.Context, host, port, token, servicesID string) (*node.Node, error) { - return nil, fmt.Errorf("not implemented") -} - -func IsP2PEnabled() bool { - return false -} - -func NewNode(token string) (*node.Node, error) { - return nil, fmt.Errorf("not implemented") -} diff --git a/core/schema/localai.go b/core/schema/localai.go index 734314a2b..4e819238a 100644 --- a/core/schema/localai.go +++ b/core/schema/localai.go @@ -63,23 +63,29 @@ type VADResponse struct { Segments []VADSegment `json:"segments" yaml:"segments"` } +type StoreCommon struct { + Backend string `json:"backend,omitempty" yaml:"backend,omitempty"` +} type StoresSet struct { Store string `json:"store,omitempty" yaml:"store,omitempty"` Keys [][]float32 `json:"keys" yaml:"keys"` Values []string `json:"values" yaml:"values"` + StoreCommon } type StoresDelete struct { Store string `json:"store,omitempty" yaml:"store,omitempty"` Keys [][]float32 `json:"keys"` + StoreCommon } type StoresGet struct { Store string `json:"store,omitempty" yaml:"store,omitempty"` Keys [][]float32 `json:"keys" yaml:"keys"` + StoreCommon } type StoresGetResponse struct { @@ -92,6 +98,7 @@ type StoresFind struct { Key []float32 `json:"key" yaml:"key"` Topk int `json:"topk" yaml:"topk"` + StoreCommon } type StoresFindResponse struct { diff --git a/docker-compose.yaml b/docker-compose.yaml index 2306c28f3..b9880352a 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -5,7 +5,7 @@ services: # Available images with CUDA, ROCm, SYCL # Image list (quay.io): https://quay.io/repository/go-skynet/local-ai?tab=tags # Image list (dockerhub): https://hub.docker.com/r/localai/localai - image: quay.io/go-skynet/local-ai:master-ffmpeg-core + image: quay.io/go-skynet/local-ai:master build: context: . dockerfile: Dockerfile diff --git a/docs/content/docs/advanced/advanced-usage.md b/docs/content/docs/advanced/advanced-usage.md index 5c52ed4ca..68bb53adf 100644 --- a/docs/content/docs/advanced/advanced-usage.md +++ b/docs/content/docs/advanced/advanced-usage.md @@ -579,38 +579,14 @@ You can use 'Extra-Usage' request header key presence ('Extra-Usage: true') to r ### Extra backends -LocalAI can be extended with extra backends. The backends are implemented as `gRPC` services and can be written in any language. The container images that are built and published on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags) contain a set of images split in core and extra. By default Images bring all the dependencies and backends supported by LocalAI (we call those `extra` images). The `-core` images instead bring only the strictly necessary dependencies to run LocalAI without only a core set of backends. - -If you wish to build a custom container image with extra backends, you can use the core images and build only the backends you are interested into or prepare the environment on startup by using the `EXTRA_BACKENDS` environment variable. For instance, to use the diffusers backend: - -```Dockerfile -FROM quay.io/go-skynet/local-ai:master-ffmpeg-core - -RUN make -C backend/python/diffusers -``` - -Remember also to set the `EXTERNAL_GRPC_BACKENDS` environment variable (or `--external-grpc-backends` as CLI flag) to point to the backends you are using (`EXTERNAL_GRPC_BACKENDS="backend_name:/path/to/backend"`), for example with diffusers: - -```Dockerfile -FROM quay.io/go-skynet/local-ai:master-ffmpeg-core - -RUN make -C backend/python/diffusers - -ENV EXTERNAL_GRPC_BACKENDS="diffusers:/build/backend/python/diffusers/run.sh" -``` - -{{% alert note %}} - -You can specify remote external backends or path to local files. The syntax is `backend-name:/path/to/backend` or `backend-name:host:port`. - -{{% /alert %}} +LocalAI can be extended with extra backends. The backends are implemented as `gRPC` services and can be written in any language. See the [backend section](https://localai.io/backends/) for more details on how to install and build new backends for LocalAI. #### In runtime When using the `-core` container image it is possible to prepare the python backends you are interested into by using the `EXTRA_BACKENDS` variable, for instance: ```bash -docker run --env EXTRA_BACKENDS="backend/python/diffusers" quay.io/go-skynet/local-ai:master-ffmpeg-core +docker run --env EXTRA_BACKENDS="backend/python/diffusers" quay.io/go-skynet/local-ai:master ``` ### Concurrent requests diff --git a/docs/content/docs/features/GPU-acceleration.md b/docs/content/docs/features/GPU-acceleration.md index 51bce71fb..12eba2946 100644 --- a/docs/content/docs/features/GPU-acceleration.md +++ b/docs/content/docs/features/GPU-acceleration.md @@ -73,8 +73,6 @@ The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=ta - CUDA `11` tags: `master-gpu-nvidia-cuda11`, `v1.40.0-gpu-nvidia-cuda11`, ... - CUDA `12` tags: `master-gpu-nvidia-cuda12`, `v1.40.0-gpu-nvidia-cuda12`, ... -- CUDA `11` + FFmpeg tags: `master-gpu-nvidia-cuda11-ffmpeg`, `v1.40.0-gpu-nvidia-cuda11-ffmpeg`, ... -- CUDA `12` + FFmpeg tags: `master-gpu-nvidia-cuda12-ffmpeg`, `v1.40.0-gpu-nvidia-cuda12-ffmpeg`, ... In addition to the commands to run LocalAI normally, you need to specify `--gpus all` to docker, for example: @@ -259,7 +257,7 @@ If building from source, you need to install [Intel oneAPI Base Toolkit](https:/ ### Container images -To use SYCL, use the images with the `gpu-intel-f16` or `gpu-intel-f32` tag, for example `{{< version >}}-gpu-intel-f32-core`, `{{< version >}}-gpu-intel-f16-ffmpeg-core`, ... +To use SYCL, use the images with the `gpu-intel-f16` or `gpu-intel-f32` tag, for example `{{< version >}}-gpu-intel-f32-core`, `{{< version >}}-gpu-intel-f16`, ... The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags). @@ -268,7 +266,7 @@ The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=ta To run LocalAI with Docker and sycl starting `phi-2`, you can use the following command as an example: ```bash -docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080 -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-gpu-intel-f32-ffmpeg-core phi-2 +docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080 -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-gpu-intel-f32 phi-2 ``` ### Notes @@ -276,7 +274,7 @@ docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080 - In addition to the commands to run LocalAI normally, you need to specify `--device /dev/dri` to docker, for example: ```bash -docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f16-ffmpeg-core +docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f16 ``` Note also that sycl does have a known issue to hang with `mmap: true`. You have to disable it in the model configuration if explicitly enabled. diff --git a/docs/content/docs/features/embeddings.md b/docs/content/docs/features/embeddings.md index 92c41eb64..7e0f3abf4 100644 --- a/docs/content/docs/features/embeddings.md +++ b/docs/content/docs/features/embeddings.md @@ -44,7 +44,6 @@ The `sentencetransformers` backend uses Python [sentence-transformers](https://g {{% alert note %}} - The `sentencetransformers` backend is an optional backend of LocalAI and uses Python. If you are running `LocalAI` from the containers you are good to go and should be already configured for use. -- If you are running `LocalAI` manually you must install the python dependencies (`make prepare-extra-conda-environments`). This requires `conda` to be installed. - For local execution, you also have to specify the extra backend in the `EXTERNAL_GRPC_BACKENDS` environment variable. - Example: `EXTERNAL_GRPC_BACKENDS="sentencetransformers:/path/to/LocalAI/backend/python/sentencetransformers/sentencetransformers.py"` - The `sentencetransformers` backend does support only embeddings of text, and not of tokens. If you need to embed tokens you can use the `bert` backend or `llama.cpp`. diff --git a/docs/content/docs/getting-started/container-images.md b/docs/content/docs/getting-started/container-images.md index 38cafeca1..a3b39f0a3 100644 --- a/docs/content/docs/getting-started/container-images.md +++ b/docs/content/docs/getting-started/container-images.md @@ -18,8 +18,6 @@ For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA - Images ending with `-core` are smaller images without predownload python dependencies. Use these images if you plan to use `llama.cpp`, `stablediffusion-ncn` or `rwkv` backends - if you are not sure which one to use, do **not** use these images. - Images containing the `aio` tag are all-in-one images with all the features enabled, and come with an opinionated set of configuration. -- FFMpeg is **not** included in the default images due to [its licensing](https://www.ffmpeg.org/legal.html). If you need FFMpeg, use the images ending with `-ffmpeg`. Note that `ffmpeg` is needed in case of using `audio-to-text` LocalAI's features. -- If using old and outdated CPUs and no GPUs you might need to set `REBUILD` to `true` as environment variable along with options to disable the flags which your CPU does not support, however note that inference will perform poorly and slow. See also [flagset compatibility]({{%relref "docs/getting-started/build#cpu-flagset-compatibility" %}}). {{% /alert %}} diff --git a/docs/content/docs/getting-started/customize-model.md b/docs/content/docs/getting-started/customize-model.md index e8440cd39..eff83ebd2 100644 --- a/docs/content/docs/getting-started/customize-model.md +++ b/docs/content/docs/getting-started/customize-model.md @@ -23,7 +23,7 @@ MODELS="github://owner/repo/file.yaml@branch,github://owner/repo/file.yaml@branc Here's an example to initiate the **phi-2** model: ```bash -docker run -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core https://gist.githubusercontent.com/mudler/ad601a0488b497b69ec549150d9edd18/raw/a8a8869ef1bb7e3830bf5c0bae29a0cce991ff8d/phi-2.yaml +docker run -p 8080:8080 localai/localai:{{< version >}} https://gist.githubusercontent.com/mudler/ad601a0488b497b69ec549150d9edd18/raw/a8a8869ef1bb7e3830bf5c0bae29a0cce991ff8d/phi-2.yaml ``` You can also check all the embedded models configurations [here](https://github.com/mudler/LocalAI/tree/master/embedded/models). @@ -64,7 +64,7 @@ Then, launch LocalAI using your gist's URL: ```bash ## Important! Substitute with your gist's URL! -docker run -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core https://gist.githubusercontent.com/xxxx/phi-2.yaml +docker run -p 8080:8080 localai/localai:{{< version >}} https://gist.githubusercontent.com/xxxx/phi-2.yaml ``` ## Next Steps diff --git a/gallery/alpaca.yaml b/gallery/alpaca.yaml index b647d2f64..18512de77 100644 --- a/gallery/alpaca.yaml +++ b/gallery/alpaca.yaml @@ -2,6 +2,7 @@ name: "alpaca" config_file: | + backend: "llama-cpp" context_size: 4096 f16: true mmap: true diff --git a/gallery/arch-function.yaml b/gallery/arch-function.yaml index a527d0f79..c7e7775ce 100644 --- a/gallery/arch-function.yaml +++ b/gallery/arch-function.yaml @@ -2,6 +2,7 @@ name: "chatml" config_file: | + backend: "llama-cpp" mmap: true function: disable_no_action: true diff --git a/gallery/chatml-hercules.yaml b/gallery/chatml-hercules.yaml index c10367896..36b478a1a 100644 --- a/gallery/chatml-hercules.yaml +++ b/gallery/chatml-hercules.yaml @@ -2,6 +2,7 @@ name: "chatml-hercules" config_file: | + backend: "llama-cpp" mmap: true function: # disable injecting the "answer" tool diff --git a/gallery/chatml.yaml b/gallery/chatml.yaml index abaf3209f..7e8e63a62 100644 --- a/gallery/chatml.yaml +++ b/gallery/chatml.yaml @@ -2,6 +2,7 @@ name: "chatml" config_file: | + backend: "llama-cpp" mmap: true template: chat_message: | diff --git a/gallery/command-r.yaml b/gallery/command-r.yaml index 81a24fb19..0c1636f3e 100644 --- a/gallery/command-r.yaml +++ b/gallery/command-r.yaml @@ -2,6 +2,7 @@ name: "command-r" config_file: | + backend: "llama-cpp" context_size: 131072 stopwords: - "<|END_OF_TURN_TOKEN|>" diff --git a/gallery/deephermes.yaml b/gallery/deephermes.yaml index 93d5c7939..3805b57ec 100644 --- a/gallery/deephermes.yaml +++ b/gallery/deephermes.yaml @@ -2,6 +2,7 @@ name: "deephermes" config_file: | + backend: "llama-cpp" mmap: true context_size: 8192 stopwords: diff --git a/gallery/deepseek-r1.yaml b/gallery/deepseek-r1.yaml index 29ca9db12..d03073534 100644 --- a/gallery/deepseek-r1.yaml +++ b/gallery/deepseek-r1.yaml @@ -2,6 +2,7 @@ name: "deepseek-r1" config_file: | + backend: "llama-cpp" context_size: 131072 mmap: true f16: true diff --git a/gallery/deepseek.yaml b/gallery/deepseek.yaml index fa8870a1d..d8f926739 100644 --- a/gallery/deepseek.yaml +++ b/gallery/deepseek.yaml @@ -2,6 +2,7 @@ name: "deepseek" config_file: | + backend: "llama-cpp" mmap: true context_size: 8192 template: diff --git a/gallery/falcon3.yaml b/gallery/falcon3.yaml index b6593f4bb..5f2fc8c59 100644 --- a/gallery/falcon3.yaml +++ b/gallery/falcon3.yaml @@ -2,6 +2,7 @@ name: "falcon3" config_file: | + backend: "llama-cpp" mmap: true template: chat_message: | diff --git a/gallery/gemma.yaml b/gallery/gemma.yaml index ed69795f0..d6a1eab06 100644 --- a/gallery/gemma.yaml +++ b/gallery/gemma.yaml @@ -2,6 +2,7 @@ name: "gemma" config_file: | + backend: "llama-cpp" mmap: true context_size: 8192 template: diff --git a/gallery/granite.yaml b/gallery/granite.yaml index 465cca186..8b94b4703 100644 --- a/gallery/granite.yaml +++ b/gallery/granite.yaml @@ -2,6 +2,7 @@ name: "granite" config_file: | + backend: "llama-cpp" mmap: true template: chat_message: | diff --git a/gallery/granite3-2.yaml b/gallery/granite3-2.yaml index 8a4a9b88d..ec07fca9e 100644 --- a/gallery/granite3-2.yaml +++ b/gallery/granite3-2.yaml @@ -2,6 +2,7 @@ name: "granite-3.2" config_file: | + backend: "llama-cpp" mmap: true template: chat_message: | diff --git a/gallery/hermes-2-pro-mistral.yaml b/gallery/hermes-2-pro-mistral.yaml index 22a5fb3a6..040927e09 100644 --- a/gallery/hermes-2-pro-mistral.yaml +++ b/gallery/hermes-2-pro-mistral.yaml @@ -2,6 +2,7 @@ name: "hermes-2-pro-mistral" config_file: | + backend: "llama-cpp" mmap: true context_size: 8192 stopwords: diff --git a/gallery/llama3-instruct.yaml b/gallery/llama3-instruct.yaml index 5dc54b0e8..c2ef37e87 100644 --- a/gallery/llama3-instruct.yaml +++ b/gallery/llama3-instruct.yaml @@ -2,6 +2,7 @@ name: "llama3-instruct" config_file: | + backend: "llama-cpp" mmap: true template: chat_message: | diff --git a/gallery/llama3.1-instruct-grammar.yaml b/gallery/llama3.1-instruct-grammar.yaml index 30237af35..b91834937 100644 --- a/gallery/llama3.1-instruct-grammar.yaml +++ b/gallery/llama3.1-instruct-grammar.yaml @@ -2,6 +2,7 @@ name: "llama3-instruct-grammar" config_file: | + backend: "llama-cpp" mmap: true function: disable_no_action: true diff --git a/gallery/llama3.1-instruct.yaml b/gallery/llama3.1-instruct.yaml index 4a2b4db13..1d078f2b0 100644 --- a/gallery/llama3.1-instruct.yaml +++ b/gallery/llama3.1-instruct.yaml @@ -2,6 +2,7 @@ name: "llama3-instruct" config_file: | + backend: "llama-cpp" mmap: true function: disable_no_action: true diff --git a/gallery/llama3.1-reflective.yaml b/gallery/llama3.1-reflective.yaml index 86a91d8b1..75f6edf2f 100644 --- a/gallery/llama3.1-reflective.yaml +++ b/gallery/llama3.1-reflective.yaml @@ -2,6 +2,7 @@ name: "llama3-instruct" config_file: | + backend: "llama-cpp" mmap: true cutstrings: - (.*?) diff --git a/gallery/llama3.2-fcall.yaml b/gallery/llama3.2-fcall.yaml index 73f370a8f..fc8dc1240 100644 --- a/gallery/llama3.2-fcall.yaml +++ b/gallery/llama3.2-fcall.yaml @@ -2,6 +2,7 @@ name: "llama3.2-fcall" config_file: | + backend: "llama-cpp" mmap: true function: json_regex_match: diff --git a/gallery/llama3.2-quantized.yaml b/gallery/llama3.2-quantized.yaml index 7e1d26305..2407b22da 100644 --- a/gallery/llama3.2-quantized.yaml +++ b/gallery/llama3.2-quantized.yaml @@ -2,6 +2,7 @@ name: "llama3.2-quantized" config_file: | + backend: "llama-cpp" mmap: true function: disable_no_action: true diff --git a/gallery/mathstral.yaml b/gallery/mathstral.yaml index a1c686b45..1ed503396 100644 --- a/gallery/mathstral.yaml +++ b/gallery/mathstral.yaml @@ -2,6 +2,7 @@ name: "mathstral" config_file: | + backend: "llama-cpp" context_size: 8192 mmap: true stopwords: diff --git a/gallery/mistral-0.3.yaml b/gallery/mistral-0.3.yaml index 502e7a5af..1f45728d1 100644 --- a/gallery/mistral-0.3.yaml +++ b/gallery/mistral-0.3.yaml @@ -2,6 +2,7 @@ name: "mistral-0.3" config_file: | + backend: "llama-cpp" context_size: 8192 mmap: true stopwords: diff --git a/gallery/moondream.yaml b/gallery/moondream.yaml index d3511f20b..5ff871cac 100644 --- a/gallery/moondream.yaml +++ b/gallery/moondream.yaml @@ -3,6 +3,7 @@ name: "moondream2" config_file: | + backend: "llama-cpp" context_size: 2046 roles: user: "\nQuestion: " diff --git a/gallery/mudler.yaml b/gallery/mudler.yaml index 77bdc8eb4..fa85b9730 100644 --- a/gallery/mudler.yaml +++ b/gallery/mudler.yaml @@ -2,6 +2,7 @@ name: localai config_file: |- + backend: "llama-cpp" context_size: 8192 stopwords: - <|im_end|> diff --git a/gallery/phi-2-chat.yaml b/gallery/phi-2-chat.yaml index 5e1fb702d..cd161fa27 100644 --- a/gallery/phi-2-chat.yaml +++ b/gallery/phi-2-chat.yaml @@ -2,6 +2,7 @@ name: "phi-2-chatml" config_file: | + backend: "llama-cpp" mmap: true template: chat_message: | diff --git a/gallery/phi-2-orange.yaml b/gallery/phi-2-orange.yaml index 89971b4d5..22642ac50 100644 --- a/gallery/phi-2-orange.yaml +++ b/gallery/phi-2-orange.yaml @@ -2,6 +2,7 @@ name: "phi-2-orange" config_file: | + backend: "llama-cpp" mmap: true template: chat_message: | diff --git a/gallery/phi-3-chat.yaml b/gallery/phi-3-chat.yaml index 98a3f3854..ce3f21116 100644 --- a/gallery/phi-3-chat.yaml +++ b/gallery/phi-3-chat.yaml @@ -2,6 +2,7 @@ name: "phi-3-chat" config_file: | + backend: "llama-cpp" mmap: true template: chat_message: | diff --git a/gallery/phi-4-chat-fcall.yaml b/gallery/phi-4-chat-fcall.yaml index 23c2e53db..c73f993e3 100644 --- a/gallery/phi-4-chat-fcall.yaml +++ b/gallery/phi-4-chat-fcall.yaml @@ -2,6 +2,7 @@ name: "phi-4-chat" config_file: | + backend: "llama-cpp" mmap: true function: json_regex_match: diff --git a/gallery/phi-4-chat.yaml b/gallery/phi-4-chat.yaml index 1ff0b14af..6b8de6dbf 100644 --- a/gallery/phi-4-chat.yaml +++ b/gallery/phi-4-chat.yaml @@ -3,6 +3,7 @@ name: "phi-4-chat" config_file: | mmap: true + backend: "llama-cpp" template: chat_message: | <|im_start|>{{ .RoleName }}<|im_sep|> diff --git a/gallery/qwen-fcall.yaml b/gallery/qwen-fcall.yaml index f168c7fe0..dc8fb47ec 100644 --- a/gallery/qwen-fcall.yaml +++ b/gallery/qwen-fcall.yaml @@ -2,6 +2,7 @@ name: "qwen-fcall" config_file: | + backend: "llama-cpp" mmap: true function: json_regex_match: diff --git a/gallery/qwen3-openbuddy.yaml b/gallery/qwen3-openbuddy.yaml index 754d730d7..1af782a2c 100644 --- a/gallery/qwen3-openbuddy.yaml +++ b/gallery/qwen3-openbuddy.yaml @@ -3,6 +3,7 @@ name: "qwen3-openbuddy" config_file: | mmap: true + backend: "llama-cpp" template: chat_message: | <|role|>{{ .RoleName }}<|says|> diff --git a/gallery/qwen3.yaml b/gallery/qwen3.yaml index aef6c109c..1d2eb05dc 100644 --- a/gallery/qwen3.yaml +++ b/gallery/qwen3.yaml @@ -3,6 +3,7 @@ name: "qwen3" config_file: | mmap: true + backend: "llama-cpp" template: chat_message: | <|im_start|>{{ .RoleName }} diff --git a/gallery/rwkv.yaml b/gallery/rwkv.yaml index 686937997..3750db974 100644 --- a/gallery/rwkv.yaml +++ b/gallery/rwkv.yaml @@ -2,6 +2,7 @@ name: "rwkv" config_file: | + backend: "llama-cpp" parameters: top_k: 80 temperature: 0.9 diff --git a/gallery/smolvlm.yaml b/gallery/smolvlm.yaml index 2c4ef47e3..a3fddcc6c 100644 --- a/gallery/smolvlm.yaml +++ b/gallery/smolvlm.yaml @@ -2,6 +2,7 @@ name: smolvlm # yamllint disable-line rule:trailing-spaces config_file: | + backend: "llama-cpp" mmap: true template: chat_message: | diff --git a/gallery/tuluv2.yaml b/gallery/tuluv2.yaml index ca2785a23..d716879a9 100644 --- a/gallery/tuluv2.yaml +++ b/gallery/tuluv2.yaml @@ -2,6 +2,7 @@ name: "tuluv2" config_file: | + backend: "llama-cpp" mmap: true template: chat_message: | diff --git a/gallery/vllm.yaml b/gallery/vllm.yaml index f0b797cc7..852db148c 100644 --- a/gallery/vllm.yaml +++ b/gallery/vllm.yaml @@ -2,6 +2,7 @@ name: "vllm" config_file: | + backend: vllm context_size: 8192 parameters: max_tokens: 8192 diff --git a/gallery/wizardlm2.yaml b/gallery/wizardlm2.yaml index 6c2c14115..6c074b783 100644 --- a/gallery/wizardlm2.yaml +++ b/gallery/wizardlm2.yaml @@ -2,6 +2,7 @@ name: "wizardlm2" config_file: | + backend: "llama-cpp" mmap: true template: chat_message: |- diff --git a/go.mod b/go.mod index ef137346a..a6da77492 100644 --- a/go.mod +++ b/go.mod @@ -6,16 +6,12 @@ toolchain go1.23.1 require ( dario.cat/mergo v1.0.1 - github.com/GeertJohan/go.rice v1.0.3 github.com/Masterminds/sprig/v3 v3.3.0 github.com/alecthomas/kong v0.9.0 - github.com/census-instrumentation/opencensus-proto v0.4.1 github.com/charmbracelet/glamour v0.7.0 github.com/chasefleming/elem-go v0.26.0 - github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20 github.com/containerd/containerd v1.7.19 github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2 - github.com/elliotchance/orderedmap/v2 v2.2.0 github.com/fsnotify/fsnotify v1.7.0 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20240626202019-c118733a29ad github.com/go-audio/wav v1.1.0 @@ -25,11 +21,9 @@ require ( github.com/gofiber/template/html/v2 v2.1.2 github.com/gofiber/websocket/v2 v2.2.1 github.com/gofrs/flock v0.12.1 - github.com/golang/protobuf v1.5.4 github.com/google/go-containerregistry v0.19.2 github.com/google/uuid v1.6.0 github.com/gpustack/gguf-parser-go v0.17.0 - github.com/grpc-ecosystem/grpc-gateway v1.5.0 github.com/hpcloud/tail v1.0.0 github.com/ipfs/go-log v1.0.5 github.com/jaypipes/ghw v0.12.0 @@ -43,7 +37,6 @@ require ( github.com/nikolalohinski/gonja/v2 v2.3.2 github.com/onsi/ginkgo/v2 v2.22.2 github.com/onsi/gomega v1.36.2 - github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e github.com/otiai10/openaigo v1.7.0 github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 github.com/prometheus/client_golang v1.20.5 @@ -62,7 +55,6 @@ require ( go.opentelemetry.io/otel/exporters/prometheus v0.50.0 go.opentelemetry.io/otel/metric v1.34.0 go.opentelemetry.io/otel/sdk/metric v1.28.0 - google.golang.org/api v0.180.0 google.golang.org/grpc v1.67.1 google.golang.org/protobuf v1.36.5 gopkg.in/yaml.v2 v2.4.0 @@ -71,22 +63,13 @@ require ( ) require ( - cel.dev/expr v0.16.0 // indirect - cloud.google.com/go/auth v0.4.1 // indirect - cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect - cloud.google.com/go/compute/metadata v0.5.0 // indirect github.com/containerd/platforms v0.2.1 // indirect github.com/cpuguy83/dockercfg v0.3.2 // indirect - github.com/daaku/go.zipexe v1.0.2 // indirect github.com/distribution/reference v0.6.0 // indirect github.com/dustin/go-humanize v1.0.1 // indirect - github.com/envoyproxy/protoc-gen-validate v1.1.0 // indirect github.com/fasthttp/websocket v1.5.8 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect - github.com/google/s2a-go v0.1.7 // indirect - github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect - github.com/googleapis/gax-go/v2 v2.12.4 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/magiconair/properties v1.8.7 // indirect github.com/moby/docker-image-spec v1.3.1 // indirect @@ -125,9 +108,7 @@ require ( go.opentelemetry.io/auto/sdk v1.1.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 // indirect go.uber.org/mock v0.5.0 // indirect - golang.org/x/oauth2 v0.24.0 // indirect golang.org/x/time v0.8.0 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20241007155032-5fefd90f89a9 // indirect ) require ( @@ -268,7 +249,7 @@ require ( github.com/peterbourgon/diskv v2.0.1+incompatible // indirect github.com/philhofer/fwd v1.1.2 // indirect github.com/pierrec/lz4/v4 v4.1.2 // indirect - github.com/pkg/errors v0.9.1 // indirect + github.com/pkg/errors v0.9.1 github.com/pkoukk/tiktoken-go v0.1.6 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/polydawn/refmt v0.89.0 // indirect diff --git a/go.sum b/go.sum index 935126b8e..1ba732dcd 100644 --- a/go.sum +++ b/go.sum @@ -1,15 +1,7 @@ -cel.dev/expr v0.16.0 h1:yloc84fytn4zmJX2GU3TkXGsaieaV7dQ057Qs4sIG2Y= -cel.dev/expr v0.16.0/go.mod h1:TRSuuV7DlVCE/uwv5QbAiW/v8l5O8C4eEPHeu7gf7Sg= cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.31.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.37.0/go.mod h1:TS1dMSSfndXH133OKGwekG838Om/cQT0BUHV3HcBgoo= -cloud.google.com/go/auth v0.4.1 h1:Z7YNIhlWRtrnKlZke7z3GMqzvuYzdc2z98F9D1NV5Hg= -cloud.google.com/go/auth v0.4.1/go.mod h1:QVBuVEKpCn4Zp58hzRGvL0tjRGU0YqdRTdCHM1IHnro= -cloud.google.com/go/auth/oauth2adapt v0.2.2 h1:+TTV8aXpjeChS9M+aTtN/TjdQnzJvmzKFt//oWu7HX4= -cloud.google.com/go/auth/oauth2adapt v0.2.2/go.mod h1:wcYjgpZI9+Yu7LyYBg4pqSiaRkfEK3GQcpb7C/uyF1Q= -cloud.google.com/go/compute/metadata v0.5.0 h1:Zr0eK8JbFv6+Wi4ilXAR8FJ3wyNdpxHKJNPos6LTZOY= -cloud.google.com/go/compute/metadata v0.5.0/go.mod h1:aHnloV2TPI38yx4s9+wAZhHykWvVCfu7hQbF+9CWoiY= dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s= dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= dmitri.shuralyov.com/app/changes v0.0.0-20180602232624-0a106ad413e3/go.mod h1:Yl+fi1br7+Rr3LqpNJf1/uxUdtRUV+Tnj0o93V2B9MU= @@ -23,9 +15,6 @@ github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25 github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= -github.com/GeertJohan/go.incremental v1.0.0/go.mod h1:6fAjUhbVuX1KcMD3c8TEgVUqmo4seqhv0i0kdATSkM0= -github.com/GeertJohan/go.rice v1.0.3 h1:k5viR+xGtIhF61125vCE1cmJ5957RQGXG6dmbaWZSmI= -github.com/GeertJohan/go.rice v1.0.3/go.mod h1:XVdrU4pW00M4ikZed5q56tPf1v2KwnIKeIdc9CBYNt4= github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ= @@ -42,7 +31,6 @@ github.com/Microsoft/hcsshim v0.11.7 h1:vl/nj3Bar/CvJSYo7gIQPyRWc9f3c6IeSNavBTSZ github.com/Microsoft/hcsshim v0.11.7/go.mod h1:MV8xMfmECjl5HdO7U/3/hFVnkmSBjAjmA09d4bExKcU= github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDOSA= github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8= -github.com/akavel/rsrc v0.8.0/go.mod h1:uLoCtb9J+EyAqh+26kdrTgmzRBFPGOolLWKpdxkKq+c= github.com/alecthomas/assert/v2 v2.6.0 h1:o3WJwILtexrEUk3cUVal3oiQY2tfgr/FHWiz/v2n4FU= github.com/alecthomas/assert/v2 v2.6.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k= github.com/alecthomas/chroma/v2 v2.8.0 h1:w9WJUjFFmHHB2e8mRpL9jjy3alYDlU0QLDezj1xE264= @@ -73,8 +61,6 @@ github.com/c-robinson/iplib v1.0.8/go.mod h1:i3LuuFL1hRT5gFpBRnEydzw8R6yhGkF4szN github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/census-instrumentation/opencensus-proto v0.4.1 h1:iKLQ0xPNFxR/2hzXZMrBo8f1j86j5WHzznCCQxV/b8g= -github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/charmbracelet/glamour v0.7.0 h1:2BtKGZ4iVJCDfMF229EzbeR1QRKLWztO9dMtjmqZSng= @@ -84,8 +70,6 @@ github.com/chasefleming/elem-go v0.26.0/go.mod h1:hz73qILBIKnTgOujnSMtEj20/epI+f github.com/cilium/ebpf v0.2.0/go.mod h1:To2CFviqOWL/M0gIMsvSMlqe7em/l1ALkX1PyjrX2Qs= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20 h1:N+3sFI5GUjRKBi+i0TxYVST9h4Ie192jJWpHvthBBgg= -github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8= github.com/containerd/cgroups v0.0.0-20201119153540-4cbc285b3327/go.mod h1:ZJeTFisyysqgcCdecO57Dj79RfL0LNeGiFUqLYQRYLE= github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM= github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw= @@ -116,8 +100,6 @@ github.com/creachadair/otp v0.5.0 h1:q3Th7CXm2zlmCdBjw5tEPFOj4oWJMnVL5HXlq0sNKS0 github.com/creachadair/otp v0.5.0/go.mod h1:0kceI87EnYFNYSTL121goJVAnk3eJhaed9H0nMuJUkA= github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= -github.com/daaku/go.zipexe v1.0.2 h1:Zg55YLYTr7M9wjKn8SY/WcpuuEi+kR2u4E8RhvpyXmk= -github.com/daaku/go.zipexe v1.0.2/go.mod h1:5xWogtqlYnfBXkSB1o9xysukNP9GTvaNkqzUZbt3Bw8= github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2 h1:flLYmnQFZNo04x2NPehMbf30m7Pli57xwZ0NFqR/hb0= github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2/go.mod h1:NtWqRzAp/1tw+twkW8uuBenEVVYndEAZACWU3F3xdoQ= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -155,14 +137,10 @@ github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+m github.com/elastic/gosigar v0.12.0/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs= github.com/elastic/gosigar v0.14.3 h1:xwkKwPia+hSfg9GqrCUKYdId102m9qTJIIr7egmK/uo= github.com/elastic/gosigar v0.14.3/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs= -github.com/elliotchance/orderedmap/v2 v2.2.0 h1:7/2iwO98kYT4XkOjA9mBEIwvi4KpGB4cyHeOFOnj4Vk= -github.com/elliotchance/orderedmap/v2 v2.2.0/go.mod h1:85lZyVbpGaGvHvnKa7Qhx7zncAdBIBq6u56Hb1PRU5Q= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/envoyproxy/protoc-gen-validate v1.1.0 h1:tntQDh69XqOCOZsDz0lVJQez/2L6Uu2PdjCQwWCJ3bM= -github.com/envoyproxy/protoc-gen-validate v1.1.0/go.mod h1:sXRDRVmzEbkM7CVcM06s9shE/m23dg3wzjl0UWqJ2q4= github.com/fasthttp/websocket v1.5.8 h1:k5DpirKkftIF/w1R8ZzjSgARJrs54Je9YJK37DL/Ah8= github.com/fasthttp/websocket v1.5.8/go.mod h1:d08g8WaT6nnyvg9uMm8K9zMYyDjfKyj3170AtPRuVU0= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= @@ -252,8 +230,6 @@ github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:W github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= -github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= @@ -282,18 +258,12 @@ github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OI github.com/google/pprof v0.0.0-20250208200701-d0013a598941 h1:43XjGa6toxLpeksjcxs1jIoIyr+vUfOqY2c6HB4bpoc= github.com/google/pprof v0.0.0-20250208200701-d0013a598941/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= -github.com/google/s2a-go v0.1.7 h1:60BLSyTrOV4/haCDW4zb1guZItoSq8foHCXrAnjBo/o= -github.com/google/s2a-go v0.1.7/go.mod h1:50CgR4k1jNlWBu4UfS4AcfhVe1r6pdZPygJ3R8F0Qdw= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/googleapis/enterprise-certificate-proxy v0.3.2 h1:Vie5ybvEvT75RniqhfFxPRy3Bf7vr3h0cechB90XaQs= -github.com/googleapis/enterprise-certificate-proxy v0.3.2/go.mod h1:VLSiSSBs/ksPL8kq3OBOQ6WRI2QnaFynd1DCjZ62+V0= github.com/googleapis/gax-go v2.0.0+incompatible/go.mod h1:SFVmujtThgffbyetf+mdk2eWhX2bMyUtNHzFKcPA9HY= github.com/googleapis/gax-go/v2 v2.0.3/go.mod h1:LLvjysVCY1JZeum8Z6l8qUty8fiNwE08qbEPm1M08qg= -github.com/googleapis/gax-go/v2 v2.12.4 h1:9gWcmF85Wvq4ryPFvGFaOgPIs1AQX0d0bcbGw4Z96qg= -github.com/googleapis/gax-go/v2 v2.12.4/go.mod h1:KYEYLorsnIGDi/rPC8b5TdlB9kbKoFubselGIoBMCwI= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c h1:7lF+Vz0LqiRidnzC1Oq86fpX1q/iEv2KJdrCtttYjT4= github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= @@ -545,7 +515,6 @@ github.com/neelance/astrewrite v0.0.0-20160511093645-99348263ae86/go.mod h1:kHJE github.com/neelance/sourcemap v0.0.0-20151028013722-8c68805598ab/go.mod h1:Qr6/a/Q4r9LP1IltGz7tA7iOK1WonHEYhu1HRBA7ZiM= github.com/nikolalohinski/gonja/v2 v2.3.2 h1:UgLFfqi7L9XfX0PEcE4eUpvGojVQL5KhBfJJaBp7ZxY= github.com/nikolalohinski/gonja/v2 v2.3.2/go.mod h1:1Wcc/5huTu6y36e0sOFR1XQoFlylw3c3H3L5WOz0RDg= -github.com/nkovacs/streamquote v1.0.0/go.mod h1:BN+NaZ2CmdKqUuTUXUEm9j95B2TRbpOWpxbJYzzgUsc= github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ= github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0= github.com/nxadm/tail v1.4.11 h1:8feyoE3OzPrcshW5/MJ4sGESc5cqmGkGCWlco4l0bqY= @@ -568,8 +537,6 @@ github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/ github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs= github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8= -github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e h1:s2RNOM/IGdY0Y6qfTeUKhDawdHDpK9RGBdx80qN4Ttw= -github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e/go.mod h1:nBdnFKj15wFbf94Rwfq4m30eAcyY9V/IyKAGQFtqkW0= github.com/otiai10/mint v1.6.1 h1:kgbTJmOpp/0ce7hk3H8jiSuR0MXmpwWRfqUdKww17qg= github.com/otiai10/mint v1.6.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM= github.com/otiai10/openaigo v1.7.0 h1:AOQcOjRRM57ABvz+aI2oJA/Qsz1AydKbdZAlGiKyCqg= @@ -785,7 +752,6 @@ github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6Kllzaw github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/valyala/fasthttp v1.55.0 h1:Zkefzgt6a7+bVKHnu/YaYSOPfNYNisSVBo/unVCf8k8= github.com/valyala/fasthttp v1.55.0/go.mod h1:NkY9JtkrpPKmgwV3HTaS2HWaJss9RSIsRVfcxxoHiOM= -github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8= github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc= github.com/vbatts/tar-split v0.11.3 h1:hLFqsOLQ1SsppQNTMpkpPXClLDfC2A3Zgy9OUU+RVck= @@ -929,8 +895,6 @@ golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAG golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE= -golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/perf v0.0.0-20180704124530-6e6d33e29852/go.mod h1:JLpeXjPJfIyPr5TlbXLkXWLhP8nz10XfvxElABhCtcw= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -1050,8 +1014,6 @@ gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o google.golang.org/api v0.0.0-20180910000450-7ca32eb868bf/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0= google.golang.org/api v0.0.0-20181030000543-1d582fd0359e/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0= google.golang.org/api v0.1.0/go.mod h1:UGEZY7KEX120AnNLIHFMKIo4obdJhkp2tPbaPlQx13Y= -google.golang.org/api v0.180.0 h1:M2D87Yo0rGBPWpo1orwfCLehUUL6E7/TYe5gvMQWDh4= -google.golang.org/api v0.180.0/go.mod h1:51AiyoEg1MJPSZ9zvklA8VnRILPXxn1iVen9v25XHAE= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.2.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.3.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -1064,7 +1026,6 @@ google.golang.org/genproto v0.0.0-20190306203927-b5d61aea6440/go.mod h1:VzzqZJRn google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= google.golang.org/genproto v0.0.0-20240401170217-c3f982113cda h1:wu/KJm9KJwpfHWhkkZGohVC6KRrc1oJNr4jwtQMOQXw= -google.golang.org/genproto v0.0.0-20240401170217-c3f982113cda/go.mod h1:g2LLCvCeCSir/JJSWosk19BR4NVxGqHUC6rxIRsd7Aw= google.golang.org/genproto/googleapis/api v0.0.0-20241007155032-5fefd90f89a9 h1:T6rh4haD3GVYsgEfWExoCZA2o2FmbNyKpTuAxbEFPTg= google.golang.org/genproto/googleapis/api v0.0.0-20241007155032-5fefd90f89a9/go.mod h1:wp2WsuBYj6j8wUdo3ToZsdxxixbvQNAHqVJrTgi5E5M= google.golang.org/genproto/googleapis/rpc v0.0.0-20241007155032-5fefd90f89a9 h1:QCqS/PdaHTSWGvupk2F/ehwHtGc0/GYkT+3GAcR1CCc= diff --git a/main.go b/main.go index 380e8f5b1..3c8615952 100644 --- a/main.go +++ b/main.go @@ -112,9 +112,6 @@ Version: ${version} log.Trace().Msg("Setting logging to trace") } - // Populate the application with the embedded backend assets - cli.CLI.Context.BackendAssets = backendAssets - // Run the thing! err = ctx.Run(&cli.CLI.Context) if err != nil { diff --git a/pkg/assets/extract.go b/pkg/assets/extract.go deleted file mode 100644 index 8c1a6be68..000000000 --- a/pkg/assets/extract.go +++ /dev/null @@ -1,64 +0,0 @@ -package assets - -import ( - "fmt" - "os" - "path/filepath" - - rice "github.com/GeertJohan/go.rice" - "github.com/mudler/LocalAI/pkg/library" -) - -const backendAssetsDir = "backend-assets" - -func ResolvePath(dir string, paths ...string) string { - return filepath.Join(append([]string{dir, backendAssetsDir}, paths...)...) -} - -func ExtractFiles(content *rice.Box, extractDir string) error { - // Create the target directory with backend-assets subdirectory - backendAssetsDir := filepath.Join(extractDir, backendAssetsDir) - err := os.MkdirAll(backendAssetsDir, 0750) - if err != nil { - return fmt.Errorf("failed to create directory: %v", err) - } - - // Walk through the rice box and extract files - err = content.Walk("", func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - - // Reconstruct the directory structure in the target directory - targetFile := filepath.Join(backendAssetsDir, path) - if info.IsDir() { - // Create the directory in the target directory - err := os.MkdirAll(targetFile, 0750) - if err != nil { - return fmt.Errorf("failed to create directory: %v", err) - } - return nil - } - - // Read the file from the rice box - fileData, err := content.Bytes(path) - if err != nil { - return fmt.Errorf("failed to read file: %v", err) - } - - // Create the file in the target directory - err = os.WriteFile(targetFile, fileData, 0700) - if err != nil { - return fmt.Errorf("failed to write file: %v", err) - } - - return nil - }) - - // If there is a lib directory, set LD_LIBRARY_PATH to include it - // we might use this mechanism to carry over e.g. Nvidia CUDA libraries - // from the embedded FS to the target directory - library.LoadExtractedLibs(backendAssetsDir) - - return err -} diff --git a/pkg/assets/list.go b/pkg/assets/list.go deleted file mode 100644 index edfdf4985..000000000 --- a/pkg/assets/list.go +++ /dev/null @@ -1,27 +0,0 @@ -package assets - -import ( - "os" - - rice "github.com/GeertJohan/go.rice" - "github.com/rs/zerolog/log" -) - -func ListFiles(content *rice.Box) (files []string) { - err := content.Walk("", func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - - if info.IsDir() { - return nil - } - - files = append(files, path) - return nil - }) - if err != nil { - log.Error().Err(err).Msg("error walking the rice box") - } - return -} diff --git a/pkg/library/dynaload.go b/pkg/library/dynaload.go deleted file mode 100644 index 878cdc881..000000000 --- a/pkg/library/dynaload.go +++ /dev/null @@ -1,86 +0,0 @@ -package library - -import ( - "errors" - "fmt" - "os" - "path/filepath" - "runtime" - - "github.com/rs/zerolog/log" -) - -/* - This file contains functions to load libraries from the asset directory to keep the business logic clean. -*/ - -// skipLibraryPath checks if LOCALAI_SKIP_LIBRARY_PATH is set -var skipLibraryPath = os.Getenv("LOCALAI_SKIP_LIBRARY_PATH") != "" - -// LoadExtractedLibs loads the extracted libraries from the asset dir -func LoadExtractedLibs(dir string) error { - // Skip this if LOCALAI_SKIP_LIBRARY_PATH is set - if skipLibraryPath { - return nil - } - - var err error = nil - for _, libDir := range []string{filepath.Join(dir, "lib"), filepath.Join(dir, "lib")} { - err = errors.Join(err, LoadExternal(libDir)) - } - return err -} - -// LoadLDSO checks if there is a ld.so in the asset dir and if so, prefixes the grpc process with it. -// In linux, if we find a ld.so in the asset dir we prefix it to run with the libs exposed in -// LD_LIBRARY_PATH for more compatibility -// If we don't do this, we might run into stack smash -// See also: https://stackoverflow.com/questions/847179/multiple-glibc-libraries-on-a-single-host/851229#851229 -// In this case, we expect a ld.so in the lib asset dir. -// If that's present, we use it to run the grpc backends as supposedly built against -// that specific version of ld.so -func LoadLDSO(assetDir string, args []string, grpcProcess string) ([]string, string) { - if skipLibraryPath { - return args, grpcProcess - } - - if runtime.GOOS != "linux" { - return args, grpcProcess - } - - // Check if there is a ld.so file in the assetDir, if it does, we need to run the grpc process with it - ldPath := filepath.Join(assetDir, "backend-assets", "lib", "ld.so") - if _, err := os.Stat(ldPath); err == nil { - log.Debug().Msgf("ld.so found") - // We need to run the grpc process with the ld.so - args = append([]string{grpcProcess}, args...) - grpcProcess = ldPath - } - - return args, grpcProcess -} - -// LoadExternal sets the LD_LIBRARY_PATH to include the given directory -func LoadExternal(dir string) error { - // Skip this if LOCALAI_SKIP_LIBRARY_PATH is set - if skipLibraryPath { - return nil - } - - lpathVar := "LD_LIBRARY_PATH" - if runtime.GOOS == "darwin" { - lpathVar = "DYLD_FALLBACK_LIBRARY_PATH" // should it be DYLD_LIBRARY_PATH ? - } - - var setErr error = nil - if _, err := os.Stat(dir); err == nil { - ldLibraryPath := os.Getenv(lpathVar) - if ldLibraryPath == "" { - ldLibraryPath = dir - } else { - ldLibraryPath = fmt.Sprintf("%s:%s", ldLibraryPath, dir) - } - setErr = errors.Join(setErr, os.Setenv(lpathVar, ldLibraryPath)) - } - return setErr -} diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index c54fbdcc3..dc60f98d2 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -5,18 +5,12 @@ import ( "errors" "fmt" "os" - "path/filepath" - "slices" "strings" "time" grpc "github.com/mudler/LocalAI/pkg/grpc" - "github.com/mudler/LocalAI/pkg/library" - "github.com/mudler/LocalAI/pkg/utils" "github.com/phayes/freeport" "github.com/rs/zerolog/log" - - "github.com/elliotchance/orderedmap/v2" ) const ( @@ -51,79 +45,6 @@ const ( LocalStoreBackend = "local-store" ) -func backendPath(assetDir, backend string) string { - return filepath.Join(assetDir, "backend-assets", "grpc", backend) -} - -// backendsInAssetDir returns the list of backends in the asset directory -// that should be loaded -func backendsInAssetDir(assetDir string) (map[string][]string, error) { - // Exclude backends from automatic loading - excludeBackends := []string{LocalStoreBackend} - entry, err := os.ReadDir(backendPath(assetDir, "")) - if err != nil { - return nil, err - } - backends := make(map[string][]string) -ENTRY: - for _, e := range entry { - for _, exclude := range excludeBackends { - if e.Name() == exclude { - continue ENTRY - } - } - if e.IsDir() { - continue - } - if strings.HasSuffix(e.Name(), ".log") { - continue - } - - backends[e.Name()] = []string{} - } - - return backends, nil -} - -func orderBackends(backends map[string][]string) ([]string, error) { - // order backends from the asset directory. - // as we scan for backends, we want to keep some order which backends are tried of. - // for example, llama.cpp should be tried first, and we want to keep the huggingface backend at the last. - - // sets a priority list - first has more priority - priorityList := []string{} - - toTheEnd := []string{ - // last has to be huggingface - LCHuggingFaceBackend, - } - - // create an ordered map - orderedBackends := orderedmap.NewOrderedMap[string, any]() - // add priorityList first - for _, p := range priorityList { - if _, ok := backends[p]; ok { - orderedBackends.Set(p, backends[p]) - } - } - - for k, v := range backends { - if !slices.Contains(toTheEnd, k) { - if _, ok := orderedBackends.Get(k); !ok { - orderedBackends.Set(k, v) - } - } - } - - for _, t := range toTheEnd { - if _, ok := backends[t]; ok { - orderedBackends.Set(t, backends[t]) - } - } - - return orderedBackends.Keys(), nil -} - // starts the grpcModelProcess for the backend, and returns a grpc client // It also loads the model func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string, string) (*Model, error) { @@ -177,35 +98,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string client = NewModel(modelID, uri, nil) } } else { - grpcProcess := backendPath(o.assetDir, backend) - if err := utils.VerifyPath(grpcProcess, o.assetDir); err != nil { - return nil, fmt.Errorf("referring to a backend not in asset dir: %s", err.Error()) - } - - // Check if the file exists - if _, err := os.Stat(grpcProcess); os.IsNotExist(err) { - return nil, fmt.Errorf("backend not found: %s", grpcProcess) - } - - serverAddress, err := getFreeAddress() - if err != nil { - return nil, fmt.Errorf("failed allocating free ports: %s", err.Error()) - } - - args := []string{} - - // Load the ld.so if it exists - args, grpcProcess = library.LoadLDSO(o.assetDir, args, grpcProcess) - - // Make sure the process is executable in any circumstance - process, err := ml.startProcess(grpcProcess, modelID, serverAddress, args...) - if err != nil { - return nil, err - } - - log.Debug().Msgf("GRPC Service Started") - - client = NewModel(modelID, serverAddress, process) + return nil, fmt.Errorf("backend not found: %s", backend) } log.Debug().Msgf("Wait for the service to start up") @@ -259,14 +152,6 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string } } -func (ml *ModelLoader) ListAvailableBackends(assetdir string) ([]string, error) { - backends, err := backendsInAssetDir(assetdir) - if err != nil { - return nil, err - } - return orderBackends(backends) -} - func (ml *ModelLoader) backendLoader(opts ...Option) (client grpc.Backend, err error) { o := NewOptions(opts...) @@ -346,17 +231,18 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) { var err error // get backends embedded in the binary - autoLoadBackends, err := ml.ListAvailableBackends(o.assetDir) - if err != nil { - ml.Close() // we failed, release the lock - return nil, err - } + autoLoadBackends := []string{} // append externalBackends supplied by the user via the CLI for b := range ml.GetAllExternalBackends(o) { autoLoadBackends = append(autoLoadBackends, b) } + if len(autoLoadBackends) == 0 { + log.Error().Msg("No backends found") + return nil, fmt.Errorf("no backends found") + } + log.Debug().Msgf("Loading from the following backends (in order): %+v", autoLoadBackends) log.Info().Msgf("Trying to load the model '%s' with the backend '%s'", o.modelID, autoLoadBackends) diff --git a/pkg/model/loader_options.go b/pkg/model/loader_options.go index 28a7c598f..16df2b9bd 100644 --- a/pkg/model/loader_options.go +++ b/pkg/model/loader_options.go @@ -10,7 +10,6 @@ type Options struct { backendString string model string modelID string - assetDir string context context.Context gRPCOptions *pb.ModelOptions @@ -75,12 +74,6 @@ func WithLoadGRPCLoadModelOpts(opts *pb.ModelOptions) Option { } } -func WithAssetDir(assetDir string) Option { - return func(o *Options) { - o.assetDir = assetDir - } -} - func WithContext(ctx context.Context) Option { return func(o *Options) { o.context = ctx diff --git a/tests/integration/stores_test.go b/tests/integration/stores_test.go index 5484a79ca..dfe992c1d 100644 --- a/tests/integration/stores_test.go +++ b/tests/integration/stores_test.go @@ -2,11 +2,9 @@ package integration_test import ( "context" - "embed" "math" "math/rand" "os" - "path/filepath" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -14,15 +12,11 @@ import ( "github.com/rs/zerolog/log" "github.com/mudler/LocalAI/core/config" - "github.com/mudler/LocalAI/pkg/assets" "github.com/mudler/LocalAI/pkg/grpc" "github.com/mudler/LocalAI/pkg/model" "github.com/mudler/LocalAI/pkg/store" ) -//go:embed backend-assets/* -var backendAssets embed.FS - func normalize(vecs [][]float32) { for i, k := range vecs { norm := float64(0) @@ -49,12 +43,6 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs" tmpdir, err = os.MkdirTemp("", "") Expect(err).ToNot(HaveOccurred()) - backendAssetsDir := filepath.Join(tmpdir, "backend-assets") - err = os.Mkdir(backendAssetsDir, 0750) - Expect(err).ToNot(HaveOccurred()) - - err = assets.ExtractFiles(backendAssets, backendAssetsDir) - Expect(err).ToNot(HaveOccurred()) debug := true @@ -66,7 +54,6 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs" storeOpts := []model.Option{ model.WithBackendString(bc.Backend), - model.WithAssetDir(backendAssetsDir), model.WithModel("test"), }