mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-05 01:19:13 -06:00
feat: do not bundle llama-cpp anymore (#5790)
* Build llama.cpp separately Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * WIP Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * WIP Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * WIP Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Start to try to attach some tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add git and small fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix: correctly autoload external backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Try to run AIO tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Slightly update the Makefile helps Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Adapt auto-bumper Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Try to run linux test Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add llama-cpp into build pipelines Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add default capability (for cpu) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop llama-cpp specific logic from the backend loader Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * drop grpc install in ci for tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Pass by backends path for tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Build protogen at start Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(tests): set backends path consistently Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Correctly configure the backends path Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Try to build for darwin Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * WIP Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Compile for metal on arm64/darwin Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Try to run build off from cross-arch Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add to the backend index nvidia-l4t and cpu's llama-cpp backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Build also darwin-x86 for llama-cpp Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Disable arm64 builds temporary Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Test backend build on PR Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixup build backend reusable workflow Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * pass by skip drivers Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Use crane Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Skip drivers Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * x86 darwin Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add packaging step for llama.cpp Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fix leftover from bark-cpp extraction Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Try to fix hipblas build Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
committed by
GitHub
parent
932f6b01a6
commit
294f7022f3
@@ -3,7 +3,9 @@
|
||||
.vscode
|
||||
.devcontainer
|
||||
models
|
||||
backends
|
||||
examples/chatbot-ui/models
|
||||
backend/go/image/stablediffusion-ggml/build/
|
||||
examples/rwkv/models
|
||||
examples/**/models
|
||||
Dockerfile*
|
||||
@@ -14,4 +16,4 @@ __pycache__
|
||||
|
||||
# backend virtual environments
|
||||
**/venv
|
||||
backend/python/**/source
|
||||
backend/python/**/source
|
||||
|
||||
9
.github/bump_deps.sh
vendored
9
.github/bump_deps.sh
vendored
@@ -3,15 +3,20 @@ set -xe
|
||||
REPO=$1
|
||||
BRANCH=$2
|
||||
VAR=$3
|
||||
FILE=$4
|
||||
|
||||
if [ -z "$FILE" ]; then
|
||||
FILE="Makefile"
|
||||
fi
|
||||
|
||||
LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
|
||||
|
||||
# Read $VAR from Makefile (only first match)
|
||||
set +e
|
||||
CURRENT_COMMIT="$(grep -m1 "^$VAR?=" Makefile | cut -d'=' -f2)"
|
||||
CURRENT_COMMIT="$(grep -m1 "^$VAR?=" $FILE | cut -d'=' -f2)"
|
||||
set -e
|
||||
|
||||
sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
|
||||
sed -i $FILE -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
|
||||
|
||||
if [ -z "$CURRENT_COMMIT" ]; then
|
||||
echo "Could not find $VAR in Makefile."
|
||||
|
||||
313
.github/workflows/backend.yml
vendored
313
.github/workflows/backend.yml
vendored
@@ -7,7 +7,6 @@ on:
|
||||
- master
|
||||
tags:
|
||||
- '*'
|
||||
#pull_request:
|
||||
|
||||
concurrency:
|
||||
group: ci-backends-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||
@@ -26,8 +25,9 @@ jobs:
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
base-image: ${{ matrix.base-image }}
|
||||
backend: ${{ matrix.backend }}
|
||||
dockerfile: $${ matrix.dockerfile }}
|
||||
context: $${ matrix.context }}
|
||||
dockerfile: ${{ matrix.dockerfile }}
|
||||
skip-drivers: ${{ matrix.skip-drivers }}
|
||||
context: ${{ matrix.context }}
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
@@ -47,9 +47,22 @@ jobs:
|
||||
tag-suffix: '-gpu-nvidia-cuda-11-rerankers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "rerankers"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-nvidia-cuda-11-llama-cpp'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "llama-cpp"
|
||||
dockerfile: "./backend/Dockerfile.llama-cpp"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
@@ -58,6 +71,7 @@ jobs:
|
||||
tag-suffix: '-gpu-nvidia-cuda-11-vllm'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "vllm"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -69,6 +83,7 @@ jobs:
|
||||
tag-suffix: '-gpu-nvidia-cuda-11-transformers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "transformers"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -80,6 +95,7 @@ jobs:
|
||||
tag-suffix: '-gpu-nvidia-cuda-11-diffusers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "diffusers"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -92,6 +108,7 @@ jobs:
|
||||
tag-suffix: '-gpu-nvidia-cuda-11-kokoro'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "kokoro"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -103,6 +120,7 @@ jobs:
|
||||
tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "faster-whisper"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -114,6 +132,7 @@ jobs:
|
||||
tag-suffix: '-gpu-nvidia-cuda-11-coqui'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "coqui"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -125,6 +144,7 @@ jobs:
|
||||
tag-suffix: '-gpu-nvidia-cuda-11-bark'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "bark"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -136,6 +156,7 @@ jobs:
|
||||
tag-suffix: '-gpu-nvidia-cuda-11-chatterbox'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "chatterbox"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -148,9 +169,22 @@ jobs:
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-rerankers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "rerankers"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "llama-cpp"
|
||||
dockerfile: "./backend/Dockerfile.llama-cpp"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
@@ -159,6 +193,7 @@ jobs:
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-vllm'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "vllm"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -170,6 +205,7 @@ jobs:
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-transformers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "transformers"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -181,6 +217,7 @@ jobs:
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "diffusers"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -193,6 +230,7 @@ jobs:
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "kokoro"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -204,6 +242,7 @@ jobs:
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "faster-whisper"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -215,6 +254,7 @@ jobs:
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-coqui'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "coqui"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -226,6 +266,7 @@ jobs:
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-bark'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "bark"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -237,6 +278,7 @@ jobs:
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "chatterbox"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -249,9 +291,22 @@ jobs:
|
||||
tag-suffix: '-gpu-rocm-hipblas-rerankers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
skip-drivers: 'false'
|
||||
backend: "rerankers"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'hipblas'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-rocm-hipblas-llama-cpp'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
skip-drivers: 'false'
|
||||
backend: "llama-cpp"
|
||||
dockerfile: "./backend/Dockerfile.llama-cpp"
|
||||
context: "./"
|
||||
- build-type: 'hipblas'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
@@ -260,6 +315,7 @@ jobs:
|
||||
tag-suffix: '-gpu-rocm-hipblas-vllm'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
skip-drivers: 'false'
|
||||
backend: "vllm"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -271,6 +327,7 @@ jobs:
|
||||
tag-suffix: '-gpu-rocm-hipblas-transformers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
skip-drivers: 'false'
|
||||
backend: "transformers"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -282,6 +339,7 @@ jobs:
|
||||
tag-suffix: '-gpu-rocm-hipblas-diffusers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
skip-drivers: 'false'
|
||||
backend: "diffusers"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -294,6 +352,7 @@ jobs:
|
||||
tag-suffix: '-gpu-rocm-hipblas-kokoro'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
skip-drivers: 'false'
|
||||
backend: "kokoro"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -305,6 +364,7 @@ jobs:
|
||||
tag-suffix: '-gpu-rocm-hipblas-faster-whisper'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
skip-drivers: 'false'
|
||||
backend: "faster-whisper"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -316,6 +376,7 @@ jobs:
|
||||
tag-suffix: '-gpu-rocm-hipblas-coqui'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
skip-drivers: 'false'
|
||||
backend: "coqui"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -327,6 +388,7 @@ jobs:
|
||||
tag-suffix: '-gpu-rocm-hipblas-bark'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
skip-drivers: 'false'
|
||||
backend: "bark"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -339,6 +401,7 @@ jobs:
|
||||
tag-suffix: '-gpu-intel-sycl-f32-rerankers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
skip-drivers: 'false'
|
||||
backend: "rerankers"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -350,9 +413,34 @@ jobs:
|
||||
tag-suffix: '-gpu-intel-sycl-f16-rerankers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
skip-drivers: 'false'
|
||||
backend: "rerankers"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
- build-type: 'sycl_f32'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-intel-sycl-f32-llama-cpp'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
skip-drivers: 'false'
|
||||
backend: "llama-cpp"
|
||||
dockerfile: "./backend/Dockerfile.llama-cpp"
|
||||
context: "./"
|
||||
- build-type: 'sycl_f16'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-gpu-intel-sycl-f16-llama-cpp'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
skip-drivers: 'false'
|
||||
backend: "llama-cpp"
|
||||
dockerfile: "./backend/Dockerfile.llama-cpp"
|
||||
context: "./"
|
||||
- build-type: 'sycl_f32'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
@@ -361,6 +449,7 @@ jobs:
|
||||
tag-suffix: '-gpu-intel-sycl-f32-vllm'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
skip-drivers: 'false'
|
||||
backend: "vllm"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -372,6 +461,7 @@ jobs:
|
||||
tag-suffix: '-gpu-intel-sycl-f16-vllm'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
skip-drivers: 'false'
|
||||
backend: "vllm"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -383,6 +473,7 @@ jobs:
|
||||
tag-suffix: '-gpu-intel-sycl-f32-transformers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
skip-drivers: 'false'
|
||||
backend: "transformers"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -394,6 +485,7 @@ jobs:
|
||||
tag-suffix: '-gpu-intel-sycl-f16-transformers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
skip-drivers: 'false'
|
||||
backend: "transformers"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -405,6 +497,7 @@ jobs:
|
||||
tag-suffix: '-gpu-intel-sycl-f32-diffusers'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
skip-drivers: 'false'
|
||||
backend: "diffusers"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -417,6 +510,7 @@ jobs:
|
||||
tag-suffix: '-gpu-intel-sycl-f32-kokoro'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
skip-drivers: 'false'
|
||||
backend: "kokoro"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -428,6 +522,7 @@ jobs:
|
||||
tag-suffix: '-gpu-intel-sycl-f16-kokoro'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
skip-drivers: 'false'
|
||||
backend: "kokoro"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -439,6 +534,7 @@ jobs:
|
||||
tag-suffix: '-gpu-intel-sycl-f32-faster-whisper'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
skip-drivers: 'false'
|
||||
backend: "faster-whisper"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -450,6 +546,7 @@ jobs:
|
||||
tag-suffix: '-gpu-intel-sycl-f16-faster-whisper'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
skip-drivers: 'false'
|
||||
backend: "faster-whisper"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -461,6 +558,7 @@ jobs:
|
||||
tag-suffix: '-gpu-intel-sycl-f32-coqui'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
skip-drivers: 'false'
|
||||
backend: "coqui"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -472,6 +570,7 @@ jobs:
|
||||
tag-suffix: '-gpu-intel-sycl-f16-coqui'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
skip-drivers: 'false'
|
||||
backend: "coqui"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -483,6 +582,7 @@ jobs:
|
||||
tag-suffix: '-gpu-intel-sycl-f32-bark'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
skip-drivers: 'false'
|
||||
backend: "bark"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -494,6 +594,7 @@ jobs:
|
||||
tag-suffix: '-gpu-intel-sycl-f16-bark'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
skip-drivers: 'false'
|
||||
backend: "bark"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./backend"
|
||||
@@ -506,6 +607,208 @@ jobs:
|
||||
tag-suffix: '-bark-cpp'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
backend: "bark"
|
||||
skip-drivers: 'false'
|
||||
backend: "bark-cpp"
|
||||
dockerfile: "./backend/Dockerfile.go"
|
||||
context: "./"
|
||||
context: "./"
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64,linux/arm64'
|
||||
tag-latest: 'true'
|
||||
tag-suffix: '-cpu-llama-cpp'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "llama-cpp"
|
||||
dockerfile: "./backend/Dockerfile.llama-cpp"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
skip-drivers: 'true'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-arm64-llama-cpp'
|
||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
backend: "llama-cpp"
|
||||
dockerfile: "./backend/Dockerfile.llama-cpp"
|
||||
context: "./"
|
||||
llama-cpp-darwin:
|
||||
runs-on: macOS-14
|
||||
strategy:
|
||||
matrix:
|
||||
go-version: ['1.21.x']
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go ${{ matrix.go-version }}
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: ${{ matrix.go-version }}
|
||||
cache: false
|
||||
# You can test your matrix by printing the current Go version
|
||||
- name: Display Go version
|
||||
run: go version
|
||||
- name: Dependencies
|
||||
run: |
|
||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
|
||||
- name: Build llama-cpp-darwin
|
||||
run: |
|
||||
make protogen-go
|
||||
make build-api
|
||||
bash scripts/build-llama-cpp-darwin.sh
|
||||
ls -la build/darwin.tar
|
||||
mv build/darwin.tar build/llama-cpp.tar
|
||||
- name: Upload llama-cpp.tar
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: llama-cpp-tar
|
||||
path: build/llama-cpp.tar
|
||||
llama-cpp-darwin-publish:
|
||||
needs: llama-cpp-darwin
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Download llama-cpp.tar
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: llama-cpp-tar
|
||||
path: .
|
||||
- name: Install crane
|
||||
run: |
|
||||
curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz | tar -xz
|
||||
sudo mv crane /usr/local/bin/
|
||||
- name: Log in to DockerHub
|
||||
run: |
|
||||
echo "${{ secrets.DOCKERHUB_PASSWORD }}" | crane auth login docker.io -u "${{ secrets.DOCKERHUB_USERNAME }}" --password-stdin
|
||||
- name: Log in to quay.io
|
||||
run: |
|
||||
echo "${{ secrets.LOCALAI_REGISTRY_PASSWORD }}" | crane auth login quay.io -u "${{ secrets.LOCALAI_REGISTRY_USERNAME }}" --password-stdin
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: |
|
||||
localai/localai-backends
|
||||
tags: |
|
||||
type=ref,event=branch
|
||||
type=semver,pattern={{raw}}
|
||||
type=sha
|
||||
flavor: |
|
||||
latest=auto
|
||||
suffix=metal-darwin-arm64-llama-cpp,onlatest=true
|
||||
- name: Docker meta
|
||||
id: quaymeta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: |
|
||||
quay.io/go-skynet/local-ai-backends
|
||||
tags: |
|
||||
type=ref,event=branch
|
||||
type=semver,pattern={{raw}}
|
||||
type=sha
|
||||
flavor: |
|
||||
latest=auto
|
||||
suffix=metal-darwin-arm64-llama-cpp,onlatest=true
|
||||
- name: Push Docker image (DockerHub)
|
||||
run: |
|
||||
for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr ',' '\n'); do
|
||||
crane push llama-cpp.tar $tag
|
||||
done
|
||||
- name: Push Docker image (Quay)
|
||||
run: |
|
||||
for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do
|
||||
crane push llama-cpp.tar $tag
|
||||
done
|
||||
llama-cpp-darwin-x86:
|
||||
runs-on: macos-13
|
||||
strategy:
|
||||
matrix:
|
||||
go-version: ['1.21.x']
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go ${{ matrix.go-version }}
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: ${{ matrix.go-version }}
|
||||
cache: false
|
||||
# You can test your matrix by printing the current Go version
|
||||
- name: Display Go version
|
||||
run: go version
|
||||
- name: Dependencies
|
||||
run: |
|
||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
|
||||
- name: Build llama-cpp-darwin
|
||||
run: |
|
||||
make protogen-go
|
||||
make build-api
|
||||
export PLATFORMARCH=darwin/amd64
|
||||
bash scripts/build-llama-cpp-darwin.sh
|
||||
ls -la build/darwin.tar
|
||||
mv build/darwin.tar build/llama-cpp.tar
|
||||
- name: Upload llama-cpp.tar
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: llama-cpp-tar-x86
|
||||
path: build/llama-cpp.tar
|
||||
llama-cpp-darwin-x86-publish:
|
||||
needs: llama-cpp-darwin-x86
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Download llama-cpp.tar
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: llama-cpp-tar-x86
|
||||
path: .
|
||||
- name: Install crane
|
||||
run: |
|
||||
curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz | tar -xz
|
||||
sudo mv crane /usr/local/bin/
|
||||
- name: Log in to DockerHub
|
||||
run: |
|
||||
echo "${{ secrets.DOCKERHUB_PASSWORD }}" | crane auth login docker.io -u "${{ secrets.DOCKERHUB_USERNAME }}" --password-stdin
|
||||
- name: Log in to quay.io
|
||||
run: |
|
||||
echo "${{ secrets.LOCALAI_REGISTRY_PASSWORD }}" | crane auth login quay.io -u "${{ secrets.LOCALAI_REGISTRY_USERNAME }}" --password-stdin
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: |
|
||||
localai/localai-backends
|
||||
tags: |
|
||||
type=ref,event=branch
|
||||
type=semver,pattern={{raw}}
|
||||
type=sha
|
||||
flavor: |
|
||||
latest=auto
|
||||
suffix=darwin-x86-llama-cpp,onlatest=true
|
||||
- name: Docker meta
|
||||
id: quaymeta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: |
|
||||
quay.io/go-skynet/local-ai-backends
|
||||
tags: |
|
||||
type=ref,event=branch
|
||||
type=semver,pattern={{raw}}
|
||||
type=sha
|
||||
flavor: |
|
||||
latest=auto
|
||||
suffix=darwin-x86-llama-cpp,onlatest=true
|
||||
- name: Push Docker image (DockerHub)
|
||||
run: |
|
||||
for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr ',' '\n'); do
|
||||
crane push llama-cpp.tar $tag
|
||||
done
|
||||
- name: Push Docker image (Quay)
|
||||
run: |
|
||||
for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do
|
||||
crane push llama-cpp.tar $tag
|
||||
done
|
||||
14
.github/workflows/backend_build.yml
vendored
14
.github/workflows/backend_build.yml
vendored
@@ -49,6 +49,10 @@ on:
|
||||
description: 'Build Dockerfile'
|
||||
required: true
|
||||
type: string
|
||||
skip-drivers:
|
||||
description: 'Skip drivers'
|
||||
default: 'false'
|
||||
type: string
|
||||
secrets:
|
||||
dockerUsername:
|
||||
required: true
|
||||
@@ -197,12 +201,13 @@ jobs:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
build-args: |
|
||||
BUILD_TYPE=${{ inputs.build-type }}
|
||||
SKIP_DRIVERS=${{ inputs.skip-drivers }}
|
||||
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
||||
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
||||
BASE_IMAGE=${{ inputs.base-image }}
|
||||
BACKEND=${{ inputs.backend }}
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile.python
|
||||
context: ${{ inputs.context }}
|
||||
file: ${{ inputs.dockerfile }}
|
||||
cache-from: type=gha
|
||||
platforms: ${{ inputs.platforms }}
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
@@ -216,12 +221,13 @@ jobs:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
build-args: |
|
||||
BUILD_TYPE=${{ inputs.build-type }}
|
||||
SKIP_DRIVERS=${{ inputs.skip-drivers }}
|
||||
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
||||
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
||||
BASE_IMAGE=${{ inputs.base-image }}
|
||||
BACKEND=${{ inputs.backend }}
|
||||
context: ./backend
|
||||
file: ./backend/Dockerfile.python
|
||||
context: ${{ inputs.context }}
|
||||
file: ${{ inputs.dockerfile }}
|
||||
cache-from: type=gha
|
||||
platforms: ${{ inputs.platforms }}
|
||||
push: true
|
||||
|
||||
10
.github/workflows/bump_deps.yaml
vendored
10
.github/workflows/bump_deps.yaml
vendored
@@ -10,30 +10,36 @@ jobs:
|
||||
matrix:
|
||||
include:
|
||||
- repository: "ggml-org/llama.cpp"
|
||||
variable: "CPPLLAMA_VERSION"
|
||||
variable: "LLAMA_VERSION"
|
||||
branch: "master"
|
||||
file: "backend/cpp/llama-cpp/Makefile"
|
||||
- repository: "ggml-org/whisper.cpp"
|
||||
variable: "WHISPER_CPP_VERSION"
|
||||
branch: "master"
|
||||
file: "Makefile"
|
||||
- repository: "PABannier/bark.cpp"
|
||||
variable: "BARKCPP_VERSION"
|
||||
branch: "main"
|
||||
file: "Makefile"
|
||||
- repository: "leejet/stable-diffusion.cpp"
|
||||
variable: "STABLEDIFFUSION_GGML_VERSION"
|
||||
branch: "master"
|
||||
file: "Makefile"
|
||||
- repository: "mudler/go-stable-diffusion"
|
||||
variable: "STABLEDIFFUSION_VERSION"
|
||||
branch: "master"
|
||||
file: "Makefile"
|
||||
- repository: "mudler/go-piper"
|
||||
variable: "PIPER_VERSION"
|
||||
branch: "master"
|
||||
file: "Makefile"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Bump dependencies 🔧
|
||||
id: bump
|
||||
run: |
|
||||
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
|
||||
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }} ${{ matrix.file }}
|
||||
{
|
||||
echo 'message<<EOF'
|
||||
cat "${{ matrix.variable }}_message.txt"
|
||||
|
||||
239
.github/workflows/release.yaml
vendored
239
.github/workflows/release.yaml
vendored
@@ -20,115 +20,140 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
|
||||
build-linux-arm:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.21.x'
|
||||
cache: false
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
|
||||
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
|
||||
make install-go-tools
|
||||
- name: Install CUDA Dependencies
|
||||
run: |
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb
|
||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION}
|
||||
env:
|
||||
CUDA_VERSION: 12-4
|
||||
- name: Cache grpc
|
||||
id: cache-grpc
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: grpc
|
||||
key: ${{ runner.os }}-arm-grpc-${{ env.GRPC_VERSION }}
|
||||
- name: Build grpc
|
||||
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
# TODO: temporary disable linux-arm64 build
|
||||
# build-linux-arm:
|
||||
# runs-on: ubuntu-24.04-arm
|
||||
# steps:
|
||||
# - name: Free Disk Space (Ubuntu)
|
||||
# uses: jlumbroso/free-disk-space@main
|
||||
# with:
|
||||
# # this might remove tools that are actually needed,
|
||||
# # if set to "true" but frees about 6 GB
|
||||
# tool-cache: true
|
||||
# # all of these default to true, but feel free to set to
|
||||
# # "false" if necessary for your workflow
|
||||
# android: true
|
||||
# dotnet: true
|
||||
# haskell: true
|
||||
# large-packages: true
|
||||
# docker-images: true
|
||||
# swap-storage: true
|
||||
|
||||
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
|
||||
cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
../.. && sudo make --jobs 5 --output-sync=target
|
||||
- name: Install gRPC
|
||||
run: |
|
||||
GNU_HOST=aarch64-linux-gnu
|
||||
C_COMPILER_ARM_LINUX=$GNU_HOST-gcc
|
||||
CXX_COMPILER_ARM_LINUX=$GNU_HOST-g++
|
||||
# - name: Release space from worker
|
||||
# run: |
|
||||
# echo "Listing top largest packages"
|
||||
# pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||
# head -n 30 <<< "${pkgs}"
|
||||
# echo
|
||||
# df -h
|
||||
# echo
|
||||
# sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
||||
# sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true
|
||||
# sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true
|
||||
# sudo rm -rf /usr/local/lib/android
|
||||
# sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
||||
# sudo rm -rf /usr/share/dotnet
|
||||
# sudo apt-get remove -y '^mono-.*' || true
|
||||
# sudo apt-get remove -y '^ghc-.*' || true
|
||||
# sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
||||
# sudo apt-get remove -y 'php.*' || true
|
||||
# sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
||||
# sudo apt-get remove -y '^google-.*' || true
|
||||
# sudo apt-get remove -y azure-cli || true
|
||||
# sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
||||
# sudo apt-get remove -y '^gfortran-.*' || true
|
||||
# sudo apt-get remove -y microsoft-edge-stable || true
|
||||
# sudo apt-get remove -y firefox || true
|
||||
# sudo apt-get remove -y powershell || true
|
||||
# sudo apt-get remove -y r-base-core || true
|
||||
# sudo apt-get autoremove -y
|
||||
# sudo apt-get clean
|
||||
# echo
|
||||
# echo "Listing top largest packages"
|
||||
# pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||
# head -n 30 <<< "${pkgs}"
|
||||
# echo
|
||||
# sudo rm -rfv build || true
|
||||
# sudo rm -rf /usr/share/dotnet || true
|
||||
# sudo rm -rf /opt/ghc || true
|
||||
# sudo rm -rf "/usr/local/share/boost" || true
|
||||
# sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
||||
# df -h
|
||||
|
||||
CROSS_TOOLCHAIN=/usr/$GNU_HOST
|
||||
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
|
||||
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
|
||||
|
||||
# https://cmake.org/cmake/help/v3.13/manual/cmake-toolchains.7.html#cross-compiling-for-linux
|
||||
echo "set(CMAKE_SYSTEM_NAME Linux)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_SYSTEM_PROCESSOR arm)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_STAGING_PREFIX $CROSS_STAGING_PREFIX)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_SYSROOT ${CROSS_TOOLCHAIN}/sysroot)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_C_COMPILER /usr/bin/$C_COMPILER_ARM_LINUX)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_CXX_COMPILER /usr/bin/$CXX_COMPILER_ARM_LINUX)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN
|
||||
GRPC_DIR=$PWD/grpc
|
||||
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install && \
|
||||
GRPC_CROSS_BUILD_DIR=$GRPC_DIR/cmake/cross_build && \
|
||||
mkdir -p $GRPC_CROSS_BUILD_DIR && \
|
||||
cd $GRPC_CROSS_BUILD_DIR && \
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_INSTALL_PREFIX=$CROSS_TOOLCHAIN/grpc_install \
|
||||
../.. && \
|
||||
sudo make -j`nproc` install
|
||||
- name: Build
|
||||
id: build
|
||||
run: |
|
||||
GNU_HOST=aarch64-linux-gnu
|
||||
C_COMPILER_ARM_LINUX=$GNU_HOST-gcc
|
||||
CXX_COMPILER_ARM_LINUX=$GNU_HOST-g++
|
||||
|
||||
CROSS_TOOLCHAIN=/usr/$GNU_HOST
|
||||
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
|
||||
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||
export PATH=$PATH:$GOPATH/bin
|
||||
export PATH=/usr/local/cuda/bin:$PATH
|
||||
sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
|
||||
sudo cp -rf /usr/aarch64-linux-gnu/lib/libstdc++.so* /usr/aarch64-linux-gnu/lib/libstdc++.so.6
|
||||
sudo cp /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 ld.so
|
||||
BACKEND_LIBS="./grpc/cmake/cross_build/third_party/re2/libre2.a ./grpc/cmake/cross_build/libgrpc.a ./grpc/cmake/cross_build/libgrpc++.a ./grpc/cmake/cross_build/third_party/protobuf/libprotobuf.a /usr/aarch64-linux-gnu/lib/libc.so.6 /usr/aarch64-linux-gnu/lib/libstdc++.so.6 /usr/aarch64-linux-gnu/lib/libgomp.so.1 /usr/aarch64-linux-gnu/lib/libm.so.6 /usr/aarch64-linux-gnu/lib/libgcc_s.so.1 /usr/aarch64-linux-gnu/lib/libdl.so.2 /usr/aarch64-linux-gnu/lib/libpthread.so.0 ./ld.so" \
|
||||
GOOS=linux \
|
||||
GOARCH=arm64 \
|
||||
CMAKE_ARGS="-DProtobuf_INCLUDE_DIRS=$CROSS_STAGING_PREFIX/include -DProtobuf_DIR=$CROSS_STAGING_PREFIX/lib/cmake/protobuf -DgRPC_DIR=$CROSS_STAGING_PREFIX/lib/cmake/grpc -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++" make dist-cross-linux-arm64
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: LocalAI-linux-arm64
|
||||
path: release/
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
with:
|
||||
files: |
|
||||
release/*
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.22
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
# - name: Force Install GIT latest
|
||||
# run: |
|
||||
# sudo apt-get update \
|
||||
# && sudo apt-get install -y software-properties-common \
|
||||
# && sudo apt-get update \
|
||||
# && sudo add-apt-repository -y ppa:git-core/ppa \
|
||||
# && sudo apt-get update \
|
||||
# && sudo apt-get install -y git
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v4
|
||||
# with:
|
||||
# submodules: true
|
||||
# - uses: actions/setup-go@v5
|
||||
# with:
|
||||
# go-version: '1.21.x'
|
||||
# cache: false
|
||||
# - name: Dependencies
|
||||
# run: |
|
||||
# sudo apt-get update
|
||||
# sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
|
||||
# make install-go-tools
|
||||
# - name: Install CUDA Dependencies
|
||||
# run: |
|
||||
# curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
|
||||
# sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||
# sudo apt-get update
|
||||
# sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
||||
# env:
|
||||
# CUDA_VERSION: 12-5
|
||||
# - name: Cache grpc
|
||||
# id: cache-grpc
|
||||
# uses: actions/cache@v4
|
||||
# with:
|
||||
# path: grpc
|
||||
# key: ${{ runner.os }}-grpc-arm64-${{ env.GRPC_VERSION }}
|
||||
# - name: Build grpc
|
||||
# if: steps.cache-grpc.outputs.cache-hit != 'true'
|
||||
# run: |
|
||||
# git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
# cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
|
||||
# cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
# -DgRPC_BUILD_TESTS=OFF \
|
||||
# ../.. && sudo make --jobs 5 --output-sync=target
|
||||
# - name: Install gRPC
|
||||
# run: |
|
||||
# cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
|
||||
# # BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
|
||||
# - name: Build
|
||||
# id: build
|
||||
# run: |
|
||||
# go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
# go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||
# export PATH=$PATH:$GOPATH/bin
|
||||
# export PATH=/usr/local/cuda/bin:$PATH
|
||||
# sudo cp /lib64/ld-linux-aarch64.so.1 ld.so
|
||||
# BACKEND_LIBS="./ld.so ./sources/go-piper/piper/build/fi/lib/libfmt.a ./sources/go-piper/piper-phonemize/pi/lib/libonnxruntime.so.1.14.1 ./sources/go-piper/piper-phonemize/pi/src/libespeak-ng/libespeak-ng.so /usr/lib/aarch64-linux-gnu/libdl.so.2 /usr/lib/aarch64-linux-gnu/librt.so.1 /usr/lib/aarch64-linux-gnu/libpthread.so.0 ./sources/go-piper/piper-phonemize/pi/lib/libpiper_phonemize.so.1 ./sources/go-piper/piper/build/si/lib/libspdlog.a ./sources/go-piper/espeak/ei/lib/libucd.so" \
|
||||
# make -j4 dist
|
||||
# - uses: actions/upload-artifact@v4
|
||||
# with:
|
||||
# name: LocalAI-linux-arm64
|
||||
# path: release/
|
||||
# - name: Release
|
||||
# uses: softprops/action-gh-release@v2
|
||||
# if: startsWith(github.ref, 'refs/tags/')
|
||||
# with:
|
||||
# files: |
|
||||
# release/*
|
||||
# - name: Setup tmate session if tests fail
|
||||
# if: ${{ failure() }}
|
||||
# uses: mxschmitt/action-tmate@v3.22
|
||||
# with:
|
||||
# detached: true
|
||||
# connect-timeout-seconds: 180
|
||||
# limit-access-to-actor: true
|
||||
build-linux:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
||||
57
.github/workflows/test.yml
vendored
57
.github/workflows/test.yml
vendored
@@ -67,18 +67,21 @@ jobs:
|
||||
# You can test your matrix by printing the current Go version
|
||||
- name: Display Go version
|
||||
run: go version
|
||||
- name: Proto Dependencies
|
||||
run: |
|
||||
# Install protoc
|
||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||
rm protoc.zip
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
go install github.com/GeertJohan/go.rice/rice@latest
|
||||
PATH="$PATH:$HOME/go/bin" make protogen-go
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
|
||||
sudo apt-get install -y libgmock-dev clang
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake patch python3-pip unzip
|
||||
@@ -94,9 +97,6 @@ jobs:
|
||||
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
||||
export CUDACXX=/usr/local/cuda/bin/nvcc
|
||||
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
go install github.com/GeertJohan/go.rice/rice@latest
|
||||
|
||||
# The python3-grpc-tools package in 22.04 is too old
|
||||
pip install --user grpcio-tools==1.71.0 grpcio==1.71.0
|
||||
@@ -107,25 +107,10 @@ jobs:
|
||||
make sources/go-piper && \
|
||||
GO_TAGS="tts" make -C sources/go-piper piper.o && \
|
||||
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/
|
||||
|
||||
make backends/llama-cpp
|
||||
env:
|
||||
CUDA_VERSION: 12-4
|
||||
- name: Cache grpc
|
||||
id: cache-grpc
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: grpc
|
||||
key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
|
||||
- name: Build grpc
|
||||
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --jobs 5 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && cd cmake/build && \
|
||||
cmake -DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
../.. && sudo make --jobs 5
|
||||
- name: Install gRPC
|
||||
run: |
|
||||
cd grpc && cd cmake/build && sudo make --jobs 5 install
|
||||
- name: Test
|
||||
run: |
|
||||
PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
|
||||
@@ -186,14 +171,9 @@ jobs:
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
go install github.com/GeertJohan/go.rice/rice@latest
|
||||
PATH="$PATH:$HOME/go/bin" make protogen-go
|
||||
- name: Build images
|
||||
run: |
|
||||
docker build --build-arg FFMPEG=true --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
|
||||
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
|
||||
- name: Test
|
||||
run: |
|
||||
PATH="$PATH:$HOME/go/bin" LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
|
||||
make run-e2e-aio
|
||||
PATH="$PATH:$HOME/go/bin" make backends/llama-cpp docker-build-aio e2e-aio
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.22
|
||||
@@ -225,6 +205,14 @@ jobs:
|
||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
|
||||
pip install --user --no-cache-dir grpcio-tools==1.71.0 grpcio==1.71.0
|
||||
go install github.com/GeertJohan/go.rice/rice@latest
|
||||
- name: Build llama-cpp-darwin
|
||||
run: |
|
||||
make protogen-go
|
||||
make build-api
|
||||
bash scripts/build-llama-cpp-darwin.sh
|
||||
ls -la build/darwin.tar
|
||||
mv build/darwin.tar build/llama-cpp.tar
|
||||
./local-ai backends install "ocifile://$PWD/build/llama-cpp.tar"
|
||||
- name: Test
|
||||
run: |
|
||||
export C_INCLUDE_PATH=/usr/local/include
|
||||
@@ -232,7 +220,8 @@ jobs:
|
||||
export CC=/opt/homebrew/opt/llvm/bin/clang
|
||||
# Used to run the newer GNUMake version from brew that supports --output-sync
|
||||
export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
|
||||
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
|
||||
PATH="$PATH:$HOME/go/bin" make protogen-go
|
||||
PATH="$PATH:$HOME/go/bin" BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.22
|
||||
|
||||
8
.gitignore
vendored
8
.gitignore
vendored
@@ -5,9 +5,11 @@ __pycache__/
|
||||
*.o
|
||||
get-sources
|
||||
prepare-sources
|
||||
/backend/cpp/llama/grpc-server
|
||||
/backend/cpp/llama/llama.cpp
|
||||
/backend/cpp/llama-cpp/grpc-server
|
||||
/backend/cpp/llama-cpp/llama.cpp
|
||||
/backend/cpp/llama-*
|
||||
!backend/cpp/llama-cpp
|
||||
/backends
|
||||
|
||||
*.log
|
||||
|
||||
@@ -56,4 +58,4 @@ docs/static/gallery.html
|
||||
**/venv
|
||||
|
||||
# per-developer customization files for the development container
|
||||
.devcontainer/customization/*
|
||||
.devcontainer/customization/*
|
||||
|
||||
13
Dockerfile
13
Dockerfile
@@ -25,6 +25,7 @@ ARG TARGETVARIANT
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
|
||||
RUN mkdir -p /run/localai
|
||||
RUN echo "default" > /run/localai/capability
|
||||
|
||||
# Vulkan requirements
|
||||
RUN <<EOT bash
|
||||
@@ -299,11 +300,7 @@ COPY ./pkg/langchain ./pkg/langchain
|
||||
RUN ls -l ./
|
||||
RUN make backend-assets
|
||||
RUN make prepare
|
||||
RUN if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make grpcs; \
|
||||
else \
|
||||
make grpcs; \
|
||||
fi
|
||||
RUN make grpcs
|
||||
|
||||
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
||||
# Adjustments to the build process should likely be made here.
|
||||
@@ -316,11 +313,7 @@ COPY . .
|
||||
## Build the binary
|
||||
## If we're on arm64 AND using cublas/hipblas, skip some of the llama-compat backends to save space
|
||||
## Otherwise just run the normal build
|
||||
RUN if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
||||
else \
|
||||
make build; \
|
||||
fi
|
||||
RUN make build
|
||||
|
||||
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
||||
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
|
||||
|
||||
289
Makefile
289
Makefile
@@ -5,9 +5,6 @@ BINARY_NAME=local-ai
|
||||
|
||||
DETECT_LIBS?=true
|
||||
|
||||
# llama.cpp versions
|
||||
CPPLLAMA_VERSION?=d6fb3f6b49b27ef1c0f4cf5128e041f7e7dc03af
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||
WHISPER_CPP_VERSION?=032697b9a850dc2615555e2a93a683cc3dd58559
|
||||
@@ -16,10 +13,6 @@ WHISPER_CPP_VERSION?=032697b9a850dc2615555e2a93a683cc3dd58559
|
||||
PIPER_REPO?=https://github.com/mudler/go-piper
|
||||
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
|
||||
|
||||
# bark.cpp
|
||||
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
|
||||
BARKCPP_VERSION?=5d5be84f089ab9ea53b7a793f088d3fbf7247495
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/richiejp/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=53e3b17eb3d0b5760ced06a1f98320b68b34aaae
|
||||
@@ -225,12 +218,6 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
||||
endif
|
||||
|
||||
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
||||
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
||||
|
||||
ifeq ($(ONNX_OS),linux)
|
||||
@@ -261,23 +248,6 @@ endif
|
||||
|
||||
all: help
|
||||
|
||||
## bark.cpp
|
||||
sources/bark.cpp:
|
||||
git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
|
||||
cd sources/bark.cpp && \
|
||||
git checkout $(BARKCPP_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/bark.cpp/build/libbark.a: sources/bark.cpp
|
||||
cd sources/bark.cpp && \
|
||||
mkdir -p build && \
|
||||
cd build && \
|
||||
cmake $(CMAKE_ARGS) .. && \
|
||||
cmake --build . --config Release
|
||||
|
||||
backend/go/bark-cpp/libbark.a: sources/bark.cpp/build/libbark.a
|
||||
$(MAKE) -C backend/go/bark-cpp libbark.a
|
||||
|
||||
## go-piper
|
||||
sources/go-piper:
|
||||
mkdir -p sources/go-piper
|
||||
@@ -333,7 +303,7 @@ sources/whisper.cpp/build/src/libwhisper.a: sources/whisper.cpp
|
||||
cd sources/whisper.cpp && cmake $(WHISPER_CMAKE_ARGS) . -B ./build
|
||||
cd sources/whisper.cpp/build && cmake --build . --config Release
|
||||
|
||||
get-sources: sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
|
||||
get-sources: sources/go-piper sources/stablediffusion-ggml.cpp sources/whisper.cpp
|
||||
|
||||
replace:
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||
@@ -365,10 +335,7 @@ clean: ## Remove build related file
|
||||
rm -rf release/
|
||||
rm -rf backend-assets/*
|
||||
$(MAKE) -C backend/cpp/grpc clean
|
||||
$(MAKE) -C backend/go/bark-cpp clean
|
||||
$(MAKE) -C backend/cpp/llama clean
|
||||
$(MAKE) -C backend/go/image/stablediffusion-ggml clean
|
||||
rm -rf backend/cpp/llama-* || true
|
||||
$(MAKE) dropreplace
|
||||
$(MAKE) protogen-clean
|
||||
rmdir pkg/grpc/proto || true
|
||||
@@ -402,9 +369,6 @@ endif
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
|
||||
rice append --exec $(BINARY_NAME)
|
||||
|
||||
build-minimal:
|
||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
|
||||
|
||||
build-api:
|
||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=p2p $(MAKE) build
|
||||
|
||||
@@ -412,18 +376,6 @@ backend-assets/lib:
|
||||
mkdir -p backend-assets/lib
|
||||
|
||||
dist:
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-avx2
|
||||
ifeq ($(DETECT_LIBS),true)
|
||||
scripts/prepare-libs.sh backend-assets/grpc/llama-cpp-avx2
|
||||
endif
|
||||
ifeq ($(OS),Darwin)
|
||||
BUILD_TYPE=none $(MAKE) backend-assets/grpc/llama-cpp-fallback
|
||||
else
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-cuda
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-hipblas
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f16
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f32
|
||||
endif
|
||||
GO_TAGS="tts p2p" $(MAKE) build
|
||||
ifeq ($(DETECT_LIBS),true)
|
||||
scripts/prepare-libs.sh backend-assets/grpc/piper
|
||||
@@ -439,19 +391,6 @@ else
|
||||
shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH).sha256
|
||||
endif
|
||||
|
||||
dist-cross-linux-arm64:
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" GO_TAGS="p2p" \
|
||||
STATIC=true $(MAKE) build
|
||||
mkdir -p release
|
||||
# if BUILD_ID is empty, then we don't append it to the binary name
|
||||
ifeq ($(BUILD_ID),)
|
||||
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(OS)-arm64
|
||||
shasum -a 256 release/$(BINARY_NAME)-$(OS)-arm64 > release/$(BINARY_NAME)-$(OS)-arm64.sha256
|
||||
else
|
||||
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-arm64
|
||||
shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-arm64 > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-arm64.sha256
|
||||
endif
|
||||
|
||||
osx-signed: build
|
||||
codesign --deep --force --sign "$(OSX_SIGNING_IDENTITY)" --entitlements "./Entitlements.plist" "./$(BINARY_NAME)"
|
||||
|
||||
@@ -472,17 +411,47 @@ prepare-test: grpcs
|
||||
cp -rf backend-assets core/http
|
||||
cp tests/models_fixtures/* test-models
|
||||
|
||||
########################################################
|
||||
## Tests
|
||||
########################################################
|
||||
|
||||
## Test targets
|
||||
test: prepare test-models/testmodel.ggml grpcs
|
||||
@echo 'Running tests'
|
||||
export GO_TAGS="tts debug"
|
||||
$(MAKE) prepare-test
|
||||
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||
$(MAKE) test-llama-gguf
|
||||
$(MAKE) test-tts
|
||||
$(MAKE) test-stablediffusion
|
||||
|
||||
backends/llama-cpp: docker-build-llama-cpp docker-save-llama-cpp build-api
|
||||
./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"
|
||||
|
||||
########################################################
|
||||
## AIO tests
|
||||
########################################################
|
||||
|
||||
docker-build-aio:
|
||||
docker build --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
|
||||
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test $(MAKE) docker-aio
|
||||
|
||||
e2e-aio:
|
||||
LOCALAI_BACKEND_DIR=$(abspath ./backends) \
|
||||
LOCALAI_MODELS_DIR=$(abspath ./models) \
|
||||
LOCALAI_IMAGE_TAG=test \
|
||||
LOCALAI_IMAGE=local-ai-aio \
|
||||
$(MAKE) run-e2e-aio
|
||||
|
||||
run-e2e-aio: protogen-go
|
||||
@echo 'Running e2e AIO tests'
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio
|
||||
|
||||
########################################################
|
||||
## E2E tests
|
||||
########################################################
|
||||
|
||||
prepare-e2e:
|
||||
mkdir -p $(TEST_DIR)
|
||||
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
|
||||
@@ -493,10 +462,6 @@ run-e2e-image:
|
||||
ls -liah $(abspath ./tests/e2e-fixtures)
|
||||
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
|
||||
|
||||
run-e2e-aio: protogen-go
|
||||
@echo 'Running e2e AIO tests'
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio
|
||||
|
||||
test-e2e:
|
||||
@echo 'Running e2e tests'
|
||||
BUILD_TYPE=$(BUILD_TYPE) \
|
||||
@@ -507,16 +472,20 @@ teardown-e2e:
|
||||
rm -rf $(TEST_DIR) || true
|
||||
docker stop $$(docker ps -q --filter ancestor=localai-tests)
|
||||
|
||||
########################################################
|
||||
## Integration and unit tests
|
||||
########################################################
|
||||
|
||||
test-llama-gguf: prepare-test
|
||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
||||
|
||||
test-tts: prepare-test
|
||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
||||
|
||||
test-stablediffusion: prepare-test
|
||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
||||
|
||||
test-stores: backend-assets/grpc/local-store
|
||||
@@ -528,6 +497,10 @@ test-container:
|
||||
docker build --target requirements -t local-ai-test-container .
|
||||
docker run -ti --rm --entrypoint /bin/bash -ti -v $(abspath ./):/build local-ai-test-container
|
||||
|
||||
########################################################
|
||||
## Help
|
||||
########################################################
|
||||
|
||||
## Help:
|
||||
help: ## Show this help.
|
||||
@echo ''
|
||||
@@ -540,6 +513,10 @@ help: ## Show this help.
|
||||
else if (/^## .*$$/) {printf " ${CYAN}%s${RESET}\n", substr($$1,4)} \
|
||||
}' $(MAKEFILE_LIST)
|
||||
|
||||
########################################################
|
||||
## Backends
|
||||
########################################################
|
||||
|
||||
.PHONY: protogen
|
||||
protogen: protogen-go protogen-python
|
||||
|
||||
@@ -679,7 +656,7 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
|
||||
mkdir -p backend-assets/espeak-ng-data
|
||||
@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
|
||||
|
||||
backend-assets/grpc: protogen-go replace
|
||||
backend-assets/grpc:
|
||||
mkdir -p backend-assets/grpc
|
||||
|
||||
backend-assets/grpc/huggingface: backend-assets/grpc
|
||||
@@ -688,128 +665,21 @@ ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/huggingface
|
||||
endif
|
||||
|
||||
backend/cpp/llama/llama.cpp:
|
||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
|
||||
|
||||
INSTALLED_PACKAGES=$(CURDIR)/backend/cpp/grpc/installed_packages
|
||||
INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake
|
||||
ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
|
||||
-DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \
|
||||
-Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
|
||||
-DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
|
||||
-DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
|
||||
build-llama-cpp-grpc-server:
|
||||
# Conditionally build grpc for the llama backend to use if needed
|
||||
ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
|
||||
$(MAKE) -C backend/cpp/grpc build
|
||||
_PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto \
|
||||
_GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin \
|
||||
PATH="${INSTALLED_PACKAGES}/bin:${PATH}" \
|
||||
CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" \
|
||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) \
|
||||
$(MAKE) -C backend/cpp/${VARIANT} grpc-server
|
||||
else
|
||||
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
|
||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
|
||||
endif
|
||||
|
||||
# This target is for manually building a variant with-auto detected flags
|
||||
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-cpp
|
||||
$(MAKE) -C backend/cpp/llama-cpp purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
|
||||
$(MAKE) VARIANT="llama-cpp" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-cpp/grpc-server backend-assets/grpc/llama-cpp
|
||||
|
||||
backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-avx2
|
||||
$(MAKE) -C backend/cpp/llama-avx2 purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
|
||||
|
||||
backend-assets/grpc/llama-cpp-avx512: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-avx512
|
||||
$(MAKE) -C backend/cpp/llama-avx512 purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx512${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx512" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-avx512/grpc-server backend-assets/grpc/llama-cpp-avx512
|
||||
|
||||
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-avx
|
||||
$(MAKE) -C backend/cpp/llama-avx purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx
|
||||
|
||||
backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-fallback
|
||||
$(MAKE) -C backend/cpp/llama-fallback purge
|
||||
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
|
||||
|
||||
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-cuda
|
||||
$(MAKE) -C backend/cpp/llama-cuda purge
|
||||
$(info ${GREEN}I llama-cpp build info:cuda${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
|
||||
|
||||
backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-hipblas
|
||||
$(MAKE) -C backend/cpp/llama-hipblas purge
|
||||
$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
|
||||
|
||||
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f16
|
||||
$(MAKE) -C backend/cpp/llama-sycl_f16 purge
|
||||
$(info ${GREEN}I llama-cpp build info:sycl_f16${RESET})
|
||||
BUILD_TYPE="sycl_f16" $(MAKE) VARIANT="llama-sycl_f16" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-sycl_f16/grpc-server backend-assets/grpc/llama-cpp-sycl_f16
|
||||
|
||||
backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f32
|
||||
$(MAKE) -C backend/cpp/llama-sycl_f32 purge
|
||||
$(info ${GREEN}I llama-cpp build info:sycl_f32${RESET})
|
||||
BUILD_TYPE="sycl_f32" $(MAKE) VARIANT="llama-sycl_f32" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-sycl_f32/grpc-server backend-assets/grpc/llama-cpp-sycl_f32
|
||||
|
||||
backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-grpc
|
||||
$(MAKE) -C backend/cpp/llama-grpc purge
|
||||
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc
|
||||
|
||||
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
|
||||
mkdir -p backend-assets/util/
|
||||
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
|
||||
|
||||
backend-assets/grpc/bark-cpp: backend/go/bark-cpp/libbark.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark-cpp/ LIBRARY_PATH=$(CURDIR)/backend/go/bark-cpp/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark-cpp/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/bark-cpp
|
||||
endif
|
||||
|
||||
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
|
||||
backend-assets/grpc/piper: protogen-go replace sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
|
||||
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/piper
|
||||
endif
|
||||
|
||||
backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
|
||||
backend-assets/grpc/silero-vad: protogen-go replace backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/silero-vad
|
||||
endif
|
||||
|
||||
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/build/src/libwhisper.a backend-assets/grpc
|
||||
backend-assets/grpc/whisper: protogen-go replace sources/whisper.cpp sources/whisper.cpp/build/src/libwhisper.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="${WHISPER_INCLUDE_PATH}" LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" LD_LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" \
|
||||
CGO_CXXFLAGS="$(CGO_CXXFLAGS_WHISPER)" \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
|
||||
@@ -817,13 +687,13 @@ ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/whisper
|
||||
endif
|
||||
|
||||
backend-assets/grpc/local-store: backend-assets/grpc
|
||||
backend-assets/grpc/local-store: backend-assets/grpc protogen-go replace
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/local-store
|
||||
endif
|
||||
|
||||
grpcs: prepare $(GRPC_BACKENDS)
|
||||
grpcs: prepare protogen-go $(GRPC_BACKENDS)
|
||||
|
||||
DOCKER_IMAGE?=local-ai
|
||||
DOCKER_AIO_IMAGE?=local-ai-aio
|
||||
@@ -879,6 +749,59 @@ docker-image-intel-xpu:
|
||||
--build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" \
|
||||
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
|
||||
|
||||
########################################################
|
||||
## Backends
|
||||
########################################################
|
||||
|
||||
backend-images:
|
||||
mkdir -p backend-images
|
||||
|
||||
docker-build-llama-cpp:
|
||||
docker build -t local-ai-backend:llama-cpp -f backend/Dockerfile.llama-cpp .
|
||||
|
||||
docker-build-bark-cpp:
|
||||
docker build -t local-ai-backend:bark-cpp -f backend/Dockerfile.go --build-arg BACKEND=bark-cpp .
|
||||
|
||||
docker-save-llama-cpp: backend-images
|
||||
docker save local-ai-backend:llama-cpp -o backend-images/llama-cpp.tar
|
||||
|
||||
|
||||
docker-build-rerankers:
|
||||
docker build -t local-ai-backend:rerankers -f backend/Dockerfile.python --build-arg BACKEND=rerankers .
|
||||
|
||||
docker-build-vllm:
|
||||
docker build -t local-ai-backend:vllm -f backend/Dockerfile.python --build-arg BACKEND=vllm .
|
||||
|
||||
docker-build-transformers:
|
||||
docker build -t local-ai-backend:transformers -f backend/Dockerfile.python --build-arg BACKEND=transformers .
|
||||
|
||||
docker-build-diffusers:
|
||||
docker build -t local-ai-backend:diffusers -f backend/Dockerfile.python --build-arg BACKEND=diffusers .
|
||||
|
||||
docker-build-kokoro:
|
||||
docker build -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro .
|
||||
|
||||
docker-build-faster-whisper:
|
||||
docker build -t local-ai-backend:faster-whisper -f backend/Dockerfile.python --build-arg BACKEND=faster-whisper .
|
||||
|
||||
docker-build-coqui:
|
||||
docker build -t local-ai-backend:coqui -f backend/Dockerfile.python --build-arg BACKEND=coqui .
|
||||
|
||||
docker-build-bark:
|
||||
docker build -t local-ai-backend:bark -f backend/Dockerfile.python --build-arg BACKEND=bark .
|
||||
|
||||
docker-build-chatterbox:
|
||||
docker build -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox .
|
||||
|
||||
docker-build-exllama2:
|
||||
docker build -t local-ai-backend:exllama2 -f backend/Dockerfile.python --build-arg BACKEND=exllama2 .
|
||||
|
||||
docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-bark docker-build-chatterbox docker-build-exllama2
|
||||
|
||||
########################################################
|
||||
### END Backends
|
||||
########################################################
|
||||
|
||||
.PHONY: swagger
|
||||
swagger:
|
||||
swag init -g core/http/app.go --output swagger
|
||||
|
||||
@@ -17,9 +17,9 @@ ARG GO_VERSION=1.22.6
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
ccache \
|
||||
git ccache \
|
||||
ca-certificates \
|
||||
make \
|
||||
make cmake \
|
||||
curl unzip \
|
||||
libssl-dev && \
|
||||
apt-get clean && \
|
||||
@@ -123,9 +123,10 @@ EOT
|
||||
|
||||
COPY . /LocalAI
|
||||
|
||||
RUN cd /LocalAI && make backend-assets/grpc/${BACKEND}
|
||||
RUN cd /LocalAI && make protogen-go && make -C /LocalAI/backend/go/${BACKEND} build
|
||||
|
||||
FROM scratch
|
||||
ARG BACKEND=rerankers
|
||||
|
||||
COPY --from=builder /LocalAI/backend-assets/grpc/${BACKEND} ./
|
||||
COPY --from=builder /LocalAI/backend/go/${BACKEND}/${BACKEND} ./
|
||||
COPY --from=builder /LocalAI/backend/go/${BACKEND}/run.sh ./
|
||||
207
backend/Dockerfile.llama-cpp
Normal file
207
backend/Dockerfile.llama-cpp
Normal file
@@ -0,0 +1,207 @@
|
||||
ARG BASE_IMAGE=ubuntu:22.04
|
||||
ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
|
||||
|
||||
|
||||
# The grpc target does one thing, it builds and installs GRPC. This is in it's own layer so that it can be effectively cached by CI.
|
||||
# You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work.
|
||||
FROM ${GRPC_BASE_IMAGE} AS grpc
|
||||
|
||||
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
|
||||
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
|
||||
ARG GRPC_VERSION=v1.65.0
|
||||
ARG CMAKE_FROM_SOURCE=false
|
||||
ARG CMAKE_VERSION=3.26.4
|
||||
|
||||
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
build-essential curl libssl-dev \
|
||||
git && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install CMake (the version in 22.04 is too old)
|
||||
RUN <<EOT bash
|
||||
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
|
||||
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
|
||||
else
|
||||
apt-get update && \
|
||||
apt-get install -y \
|
||||
cmake && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
fi
|
||||
EOT
|
||||
|
||||
# We install GRPC to a different prefix here so that we can copy in only the build artifacts later
|
||||
# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
|
||||
# and running make install in the target container
|
||||
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
mkdir -p /build/grpc/cmake/build && \
|
||||
cd /build/grpc/cmake/build && \
|
||||
sed -i "216i\ TESTONLY" "../../third_party/abseil-cpp/absl/container/CMakeLists.txt" && \
|
||||
cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
|
||||
make && \
|
||||
make install && \
|
||||
rm -rf /build
|
||||
|
||||
FROM ${BASE_IMAGE} AS builder
|
||||
ARG BACKEND=rerankers
|
||||
ARG BUILD_TYPE
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
ARG CUDA_MAJOR_VERSION
|
||||
ARG CUDA_MINOR_VERSION
|
||||
ARG SKIP_DRIVERS=false
|
||||
ENV CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION}
|
||||
ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
ARG GO_VERSION=1.22.6
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
ccache git \
|
||||
ca-certificates \
|
||||
make \
|
||||
curl unzip \
|
||||
libssl-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Cuda
|
||||
ENV PATH=/usr/local/cuda/bin:${PATH}
|
||||
|
||||
# HipBLAS requirements
|
||||
ENV PATH=/opt/rocm/bin:${PATH}
|
||||
|
||||
# Vulkan requirements
|
||||
RUN <<EOT bash
|
||||
if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
software-properties-common pciutils wget gpg-agent && \
|
||||
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
||||
apt-get update && \
|
||||
apt-get install -y \
|
||||
vulkan-sdk && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
fi
|
||||
EOT
|
||||
|
||||
# CuBLAS requirements
|
||||
RUN <<EOT bash
|
||||
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
software-properties-common pciutils
|
||||
if [ "amd64" = "$TARGETARCH" ]; then
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||
fi
|
||||
if [ "arm64" = "$TARGETARCH" ]; then
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
|
||||
fi
|
||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
fi
|
||||
EOT
|
||||
|
||||
# If we are building with clblas support, we need the libraries for the builds
|
||||
RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
libclblast-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* \
|
||||
; fi
|
||||
|
||||
RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
hipblas-dev \
|
||||
rocblas-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
|
||||
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
|
||||
ldconfig \
|
||||
; fi
|
||||
|
||||
RUN echo "TARGETARCH: $TARGETARCH"
|
||||
|
||||
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
|
||||
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
|
||||
# here so that we can generate the grpc code for the stablediffusion build
|
||||
RUN <<EOT bash
|
||||
if [ "amd64" = "$TARGETARCH" ]; then
|
||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip -o protoc.zip && \
|
||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||
rm protoc.zip
|
||||
fi
|
||||
if [ "arm64" = "$TARGETARCH" ]; then
|
||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-aarch_64.zip -o protoc.zip && \
|
||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||
rm protoc.zip
|
||||
fi
|
||||
EOT
|
||||
|
||||
# Install CMake (the version in 22.04 is too old)
|
||||
RUN <<EOT bash
|
||||
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
|
||||
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
|
||||
else
|
||||
apt-get update && \
|
||||
apt-get install -y \
|
||||
cmake && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
fi
|
||||
EOT
|
||||
|
||||
COPY --from=grpc /opt/grpc /usr/local
|
||||
|
||||
|
||||
COPY . /LocalAI
|
||||
|
||||
## Otherwise just run the normal build
|
||||
RUN <<EOT bash
|
||||
if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||
cd /LocalAI/backend/cpp/llama-cpp && make llama-cpp-fallback && \
|
||||
make llama-cpp-grpc && make llama-cpp-rpc-server; \
|
||||
else \
|
||||
cd /LocalAI/backend/cpp/llama-cpp && make llama-cpp-avx && \
|
||||
make llama-cpp-avx2 && \
|
||||
make llama-cpp-avx512 && \
|
||||
make llama-cpp-fallback && \
|
||||
make llama-cpp-grpc && \
|
||||
make llama-cpp-rpc-server; \
|
||||
fi
|
||||
EOT
|
||||
|
||||
|
||||
# Copy libraries using a script to handle architecture differences
|
||||
RUN make -C /LocalAI/backend/cpp/llama-cpp package
|
||||
|
||||
|
||||
FROM scratch
|
||||
|
||||
|
||||
# Copy all available binaries (the build process only creates the appropriate ones for the target architecture)
|
||||
COPY --from=builder /LocalAI/backend/cpp/llama-cpp/package/. ./
|
||||
165
backend/cpp/llama-cpp/Makefile
Normal file
165
backend/cpp/llama-cpp/Makefile
Normal file
@@ -0,0 +1,165 @@
|
||||
|
||||
LLAMA_VERSION?=e75ba4c0434eb759eb7ff74e034ebe729053e575
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
BUILD_TYPE?=
|
||||
NATIVE?=false
|
||||
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
||||
TARGET?=--target grpc-server
|
||||
|
||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
|
||||
|
||||
CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
||||
ifeq ($(NATIVE),false)
|
||||
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
||||
endif
|
||||
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
||||
ifeq ($(BUILD_TYPE),cublas)
|
||||
CMAKE_ARGS+=-DGGML_CUDA=ON
|
||||
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||
# to CMAKE_ARGS automatically
|
||||
else ifeq ($(BUILD_TYPE),openblas)
|
||||
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
else ifeq ($(BUILD_TYPE),clblas)
|
||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||
else ifeq ($(BUILD_TYPE),hipblas)
|
||||
ROCM_HOME ?= /opt/rocm
|
||||
ROCM_PATH ?= /opt/rocm
|
||||
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||
# GPU_TARGETS ?= gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102
|
||||
# AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
||||
CMAKE_ARGS+=-DGGML_HIP=ON
|
||||
# CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
||||
else ifeq ($(OS),Darwin)
|
||||
ifeq ($(BUILD_TYPE),)
|
||||
BUILD_TYPE=metal
|
||||
endif
|
||||
ifneq ($(BUILD_TYPE),metal)
|
||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||
else
|
||||
CMAKE_ARGS+=-DGGML_METAL=ON
|
||||
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
||||
CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
|
||||
CMAKE_ARGS+=-DGGML_OPENMP=OFF
|
||||
endif
|
||||
TARGET+=--target ggml-metal
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||
-DCMAKE_C_COMPILER=icx \
|
||||
-DCMAKE_CXX_COMPILER=icpx \
|
||||
-DCMAKE_CXX_FLAGS="-fsycl" \
|
||||
-DGGML_SYCL_F16=ON
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),sycl_f32)
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||
-DCMAKE_C_COMPILER=icx \
|
||||
-DCMAKE_CXX_COMPILER=icpx \
|
||||
-DCMAKE_CXX_FLAGS="-fsycl"
|
||||
endif
|
||||
|
||||
INSTALLED_PACKAGES=$(CURDIR)/../grpc/installed_packages
|
||||
INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake
|
||||
ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
|
||||
-DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \
|
||||
-Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
|
||||
-DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
|
||||
-DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
|
||||
build-llama-cpp-grpc-server:
|
||||
# Conditionally build grpc for the llama backend to use if needed
|
||||
ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
|
||||
$(MAKE) -C ../../grpc build
|
||||
_PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto \
|
||||
_GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin \
|
||||
PATH="${INSTALLED_PACKAGES}/bin:${PATH}" \
|
||||
CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" \
|
||||
LLAMA_VERSION=$(LLAMA_VERSION) \
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(VARIANT) grpc-server
|
||||
else
|
||||
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
|
||||
LLAMA_VERSION=$(LLAMA_VERSION) $(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(VARIANT) grpc-server
|
||||
endif
|
||||
|
||||
llama-cpp-avx2: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-cpp-avx2-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build/grpc-server llama-cpp-avx2
|
||||
|
||||
llama-cpp-avx512: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx512${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-cpp-avx512-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build/grpc-server llama-cpp-avx512
|
||||
|
||||
llama-cpp-avx: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build/grpc-server llama-cpp-avx
|
||||
|
||||
llama-cpp-fallback: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build purge
|
||||
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build/grpc-server llama-cpp-fallback
|
||||
|
||||
llama-cpp-grpc: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build purge
|
||||
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/grpc-server llama-cpp-grpc
|
||||
|
||||
llama-cpp-rpc-server: llama-cpp-grpc
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/llama.cpp/build/bin/rpc-server llama-cpp-rpc-server
|
||||
|
||||
llama.cpp:
|
||||
mkdir -p llama.cpp
|
||||
cd llama.cpp && \
|
||||
git init && \
|
||||
git remote add origin $(LLAMA_REPO) && \
|
||||
git fetch origin && \
|
||||
git checkout -b build $(LLAMA_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
llama.cpp/tools/grpc-server: llama.cpp
|
||||
mkdir -p llama.cpp/tools/grpc-server
|
||||
bash prepare.sh
|
||||
|
||||
rebuild:
|
||||
bash prepare.sh
|
||||
rm -rf grpc-server
|
||||
$(MAKE) grpc-server
|
||||
|
||||
package:
|
||||
bash package.sh
|
||||
|
||||
purge:
|
||||
rm -rf llama.cpp/build
|
||||
rm -rf llama.cpp/tools/grpc-server
|
||||
rm -rf grpc-server
|
||||
|
||||
clean: purge
|
||||
rm -rf llama.cpp
|
||||
|
||||
grpc-server: llama.cpp llama.cpp/tools/grpc-server
|
||||
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
+bash -c "source $(ONEAPI_VARS); \
|
||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"
|
||||
else
|
||||
+cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
|
||||
endif
|
||||
cp llama.cpp/build/bin/grpc-server .
|
||||
42
backend/cpp/llama-cpp/package.sh
Executable file
42
backend/cpp/llama-cpp/package.sh
Executable file
@@ -0,0 +1,42 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Script to copy the appropriate libraries based on architecture
|
||||
# This script is used in the final stage of the Dockerfile
|
||||
|
||||
set -e
|
||||
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
|
||||
# Create lib directory
|
||||
mkdir -p $CURDIR/package/lib
|
||||
|
||||
cp -avrf $CURDIR/llama-cpp-* $CURDIR/package/
|
||||
cp -rfv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||
# x86_64 architecture
|
||||
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
||||
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
||||
# ARM64 architecture
|
||||
echo "Detected ARM64 architecture, copying ARM64 libraries..."
|
||||
cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
||||
else
|
||||
echo "Error: Could not detect architecture"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Packaging completed successfully"
|
||||
ls -liah $CURDIR/package/
|
||||
ls -liah $CURDIR/package/lib/
|
||||
61
backend/cpp/llama-cpp/run.sh
Executable file
61
backend/cpp/llama-cpp/run.sh
Executable file
@@ -0,0 +1,61 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
# Get the absolute current dir where the script is located
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
|
||||
cd /
|
||||
|
||||
echo "CPU info:"
|
||||
grep -e "model\sname" /proc/cpuinfo | head -1
|
||||
grep -e "flags" /proc/cpuinfo | head -1
|
||||
|
||||
BINARY=llama-cpp-fallback
|
||||
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/llama-cpp-avx ]; then
|
||||
BINARY=llama-cpp-avx
|
||||
fi
|
||||
fi
|
||||
|
||||
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX2 found OK"
|
||||
if [ -e $CURDIR/llama-cpp-avx2 ]; then
|
||||
BINARY=llama-cpp-avx2
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check avx 512
|
||||
if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX512F found OK"
|
||||
if [ -e $CURDIR/llama-cpp-avx512 ]; then
|
||||
BINARY=llama-cpp-avx512
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -n "$LLAMACPP_GRPC_SERVERS" ]; then
|
||||
if [ -e $CURDIR/llama-cpp-grpc ]; then
|
||||
BINARY=llama-cpp-grpc
|
||||
fi
|
||||
fi
|
||||
|
||||
# Extend ld library path with the dir where this script is located/lib
|
||||
if [ "$(uname)" == "Darwin" ]; then
|
||||
DYLD_FALLBACK_LIBRARY_PATH=$CURDIR/lib:$DYLD_FALLBACK_LIBRARY_PATH
|
||||
else
|
||||
LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
fi
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
if [ -f $CURDIR/lib/ld.so ]; then
|
||||
echo "Using lib/ld.so"
|
||||
echo "Using binary: $BINARY"
|
||||
$CURDIR/lib/ld.so $CURDIR/$BINARY "$@"
|
||||
fi
|
||||
|
||||
echo "Using binary: $BINARY"
|
||||
exec $CURDIR/$BINARY "$@"
|
||||
|
||||
# In case we fail execing, just run fallback
|
||||
exec $CURDIR/llama-cpp-fallback "$@"
|
||||
@@ -1,87 +0,0 @@
|
||||
|
||||
LLAMA_VERSION?=
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
BUILD_TYPE?=
|
||||
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
||||
TARGET?=--target grpc-server
|
||||
|
||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
|
||||
|
||||
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
||||
ifeq ($(BUILD_TYPE),cublas)
|
||||
CMAKE_ARGS+=-DGGML_CUDA=ON
|
||||
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||
# to CMAKE_ARGS automatically
|
||||
else ifeq ($(BUILD_TYPE),openblas)
|
||||
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
else ifeq ($(BUILD_TYPE),clblas)
|
||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||
else ifeq ($(BUILD_TYPE),hipblas)
|
||||
CMAKE_ARGS+=-DGGML_HIP=ON
|
||||
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
||||
# But if it's OSX without metal, disable it here
|
||||
else ifeq ($(OS),Darwin)
|
||||
ifneq ($(BUILD_TYPE),metal)
|
||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||
else
|
||||
CMAKE_ARGS+=-DGGML_METAL=ON
|
||||
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
||||
TARGET+=--target ggml-metal
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||
-DCMAKE_C_COMPILER=icx \
|
||||
-DCMAKE_CXX_COMPILER=icpx \
|
||||
-DCMAKE_CXX_FLAGS="-fsycl" \
|
||||
-DGGML_SYCL_F16=ON
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),sycl_f32)
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||
-DCMAKE_C_COMPILER=icx \
|
||||
-DCMAKE_CXX_COMPILER=icpx \
|
||||
-DCMAKE_CXX_FLAGS="-fsycl"
|
||||
endif
|
||||
|
||||
llama.cpp:
|
||||
mkdir -p llama.cpp
|
||||
cd llama.cpp && \
|
||||
git init && \
|
||||
git remote add origin $(LLAMA_REPO) && \
|
||||
git fetch origin && \
|
||||
git checkout -b build $(LLAMA_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
llama.cpp/tools/grpc-server: llama.cpp
|
||||
mkdir -p llama.cpp/tools/grpc-server
|
||||
bash prepare.sh
|
||||
|
||||
rebuild:
|
||||
bash prepare.sh
|
||||
rm -rf grpc-server
|
||||
$(MAKE) grpc-server
|
||||
|
||||
purge:
|
||||
rm -rf llama.cpp/build
|
||||
rm -rf llama.cpp/tools/grpc-server
|
||||
rm -rf grpc-server
|
||||
|
||||
clean: purge
|
||||
rm -rf llama.cpp
|
||||
|
||||
grpc-server: llama.cpp llama.cpp/tools/grpc-server
|
||||
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
+bash -c "source $(ONEAPI_VARS); \
|
||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"
|
||||
else
|
||||
+cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
|
||||
endif
|
||||
cp llama.cpp/build/bin/grpc-server .
|
||||
@@ -3,23 +3,46 @@ LIBRARY_PATH := $(abspath ./)
|
||||
|
||||
AR?=ar
|
||||
|
||||
CMAKE_ARGS?=-DGGML_NATIVE=OFF
|
||||
BUILD_TYPE?=
|
||||
GOCMD=go
|
||||
# keep standard at C11 and C++11
|
||||
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../sources/bark.cpp/examples -I$(INCLUDE_PATH)/../../../sources/bark.cpp/spm-headers -I$(INCLUDE_PATH)/../../../sources/bark.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
||||
LDFLAGS = -L$(LIBRARY_PATH) -L$(LIBRARY_PATH)/../../../sources/bark.cpp/build/examples -lbark -lstdc++ -lm
|
||||
CXXFLAGS = -I. -I$(INCLUDE_PATH)/sources/bark.cpp/examples -I$(INCLUDE_PATH)/sources/bark.cpp/encodec.cpp/ggml/include -I$(INCLUDE_PATH)/sources/bark.cpp/spm-headers -I$(INCLUDE_PATH)/sources/bark.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
||||
LDFLAGS = -L$(LIBRARY_PATH) -L$(LIBRARY_PATH)/sources/bark.cpp/build/examples -lbark -lstdc++ -lm
|
||||
|
||||
# bark.cpp
|
||||
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
|
||||
BARKCPP_VERSION?=5d5be84f089ab9ea53b7a793f088d3fbf7247495
|
||||
|
||||
# warnings
|
||||
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
||||
|
||||
## bark.cpp
|
||||
sources/bark.cpp:
|
||||
git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
|
||||
cd sources/bark.cpp && \
|
||||
git checkout $(BARKCPP_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/bark.cpp/build/libbark.a: sources/bark.cpp
|
||||
cd sources/bark.cpp && \
|
||||
mkdir -p build && \
|
||||
cd build && \
|
||||
cmake $(CMAKE_ARGS) .. && \
|
||||
cmake --build . --config Release
|
||||
|
||||
gobark.o:
|
||||
$(CXX) $(CXXFLAGS) gobark.cpp -o gobark.o -c $(LDFLAGS)
|
||||
|
||||
libbark.a: gobark.o
|
||||
cp $(INCLUDE_PATH)/../../../sources/bark.cpp/build/libbark.a ./
|
||||
libbark.a: sources/bark.cpp/build/libbark.a gobark.o
|
||||
cp $(INCLUDE_PATH)/sources/bark.cpp/build/libbark.a ./
|
||||
$(AR) rcs libbark.a gobark.o
|
||||
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml.c.o
|
||||
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-alloc.c.o
|
||||
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-backend.c.o
|
||||
|
||||
bark-cpp: libbark.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH="$(CURDIR)" LIBRARY_PATH=$(CURDIR) \
|
||||
$(GOCMD) build -v -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o bark-cpp ./
|
||||
|
||||
build: bark-cpp
|
||||
|
||||
clean:
|
||||
rm -f gobark.o libbark.a
|
||||
@@ -1,7 +1,7 @@
|
||||
package main
|
||||
|
||||
// #cgo CXXFLAGS: -I${SRCDIR}/../../../sources/bark.cpp/ -I${SRCDIR}/../../../sources/bark.cpp/encodec.cpp -I${SRCDIR}/../../../sources/bark.cpp/examples -I${SRCDIR}/../../../sources/bark.cpp/spm-headers
|
||||
// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../sources/bark.cpp/build/examples -L${SRCDIR}/../../../sources/bark.cpp/build/encodec.cpp/ -lbark -lencodec -lcommon
|
||||
// #cgo CXXFLAGS: -I${SRCDIR}/sources/bark.cpp/ -I${SRCDIR}/sources/bark.cpp/encodec.cpp -I${SRCDIR}/sources/bark.cpp/encodec.cpp/ggml/include -I${SRCDIR}/sources/bark.cpp/examples -I${SRCDIR}/sources/bark.cpp/spm-headers
|
||||
// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/sources/bark.cpp/build/examples -L${SRCDIR}/sources/bark.cpp/build/encodec.cpp/ggml/src/ -L${SRCDIR}/sources/bark.cpp/build/encodec.cpp/ -lbark -lencodec -lcommon -lggml -lgomp
|
||||
// #include <gobark.h>
|
||||
// #include <stdlib.h>
|
||||
import "C"
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
exec ./bark-cpp
|
||||
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
|
||||
exec $CURDIR/bark-cpp "$@"
|
||||
@@ -1,5 +1,30 @@
|
||||
---
|
||||
## vLLM
|
||||
## metas
|
||||
- &llamacpp
|
||||
name: "llama-cpp"
|
||||
alias: "llama-cpp"
|
||||
license: mit
|
||||
icon: https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png
|
||||
description: |
|
||||
LLM inference in C/C++
|
||||
urls:
|
||||
- https://github.com/ggerganov/llama.cpp
|
||||
tags:
|
||||
- text-to-text
|
||||
- LLM
|
||||
- CPU
|
||||
- GPU
|
||||
- Metal
|
||||
- CUDA
|
||||
- HIP
|
||||
capabilities:
|
||||
default: "cpu-llama-cpp"
|
||||
nvidia: "cuda12-llama-cpp"
|
||||
intel: "intel-sycl-f16-llama-cpp"
|
||||
amd: "rocm-llama-cpp"
|
||||
metal: "metal-llama-cpp"
|
||||
nvidia-l4t: "nvidia-l4t-arm64-llama-cpp"
|
||||
darwin-x86: "darwin-x86-llama-cpp"
|
||||
- &vllm
|
||||
name: "vllm"
|
||||
license: apache-2.0
|
||||
@@ -32,6 +57,229 @@
|
||||
nvidia: "cuda12-vllm"
|
||||
amd: "rocm-vllm"
|
||||
intel: "intel-sycl-f16-vllm"
|
||||
- &rerankers
|
||||
name: "rerankers"
|
||||
alias: "rerankers"
|
||||
capabilities:
|
||||
nvidia: "cuda12-rerankers"
|
||||
intel: "intel-sycl-f16-rerankers"
|
||||
amd: "rocm-rerankers"
|
||||
- &transformers
|
||||
name: "transformers"
|
||||
icon: https://camo.githubusercontent.com/26569a27b8a30a488dd345024b71dbc05da7ff1b2ba97bb6080c9f1ee0f26cc7/68747470733a2f2f68756767696e67666163652e636f2f64617461736574732f68756767696e67666163652f646f63756d656e746174696f6e2d696d616765732f7265736f6c76652f6d61696e2f7472616e73666f726d6572732f7472616e73666f726d6572735f61735f615f6d6f64656c5f646566696e6974696f6e2e706e67
|
||||
alias: "transformers"
|
||||
license: apache-2.0
|
||||
description: |
|
||||
Transformers acts as the model-definition framework for state-of-the-art machine learning models in text, computer vision, audio, video, and multimodal model, for both inference and training.
|
||||
It centralizes the model definition so that this definition is agreed upon across the ecosystem. transformers is the pivot across frameworks: if a model definition is supported, it will be compatible with the majority of training frameworks (Axolotl, Unsloth, DeepSpeed, FSDP, PyTorch-Lightning, ...), inference engines (vLLM, SGLang, TGI, ...), and adjacent modeling libraries (llama.cpp, mlx, ...) which leverage the model definition from transformers.
|
||||
urls:
|
||||
- https://github.com/huggingface/transformers
|
||||
tags:
|
||||
- text-to-text
|
||||
- multimodal
|
||||
capabilities:
|
||||
nvidia: "cuda12-transformers"
|
||||
intel: "intel-sycl-f16-transformers"
|
||||
amd: "rocm-transformers"
|
||||
- &diffusers
|
||||
icon: https://raw.githubusercontent.com/huggingface/diffusers/main/docs/source/en/imgs/diffusers_library.jpg
|
||||
description: |
|
||||
🤗 Diffusers is the go-to library for state-of-the-art pretrained diffusion models for generating images, audio, and even 3D structures of molecules. Whether you're looking for a simple inference solution or training your own diffusion models, 🤗 Diffusers is a modular toolbox that supports both.
|
||||
urls:
|
||||
- https://github.com/huggingface/diffusers
|
||||
tags:
|
||||
- image-generation
|
||||
- video-generation
|
||||
- diffusion-models
|
||||
license: apache-2.0
|
||||
alias: "diffusers"
|
||||
capabilities:
|
||||
nvidia: "cuda12-diffusers"
|
||||
intel: "intel-sycl-f32-diffusers"
|
||||
amd: "rocm-diffusers"
|
||||
- &exllama2
|
||||
name: "exllama2"
|
||||
urls:
|
||||
- https://github.com/turboderp-org/exllamav2
|
||||
tags:
|
||||
- text-to-text
|
||||
- LLM
|
||||
- EXL2
|
||||
license: MIT
|
||||
description: |
|
||||
ExLlamaV2 is an inference library for running local LLMs on modern consumer GPUs.
|
||||
alias: "exllama2"
|
||||
capabilities:
|
||||
nvidia: "cuda12-exllama2"
|
||||
intel: "intel-sycl-f32-exllama2"
|
||||
amd: "rocm-exllama2"
|
||||
- &faster-whisper
|
||||
icon: https://avatars.githubusercontent.com/u/1520500?s=200&v=4
|
||||
description: |
|
||||
faster-whisper is a reimplementation of OpenAI's Whisper model using CTranslate2, which is a fast inference engine for Transformer models.
|
||||
This implementation is up to 4 times faster than openai/whisper for the same accuracy while using less memory. The efficiency can be further improved with 8-bit quantization on both CPU and GPU.
|
||||
urls:
|
||||
- https://github.com/SYSTRAN/faster-whisper
|
||||
tags:
|
||||
- speech-to-text
|
||||
- Whisper
|
||||
license: MIT
|
||||
name: "faster-whisper"
|
||||
capabilities:
|
||||
nvidia: "cuda12-faster-whisper"
|
||||
intel: "intel-sycl-f32-faster-whisper"
|
||||
amd: "rocm-faster-whisper"
|
||||
- &kokoro
|
||||
icon: https://avatars.githubusercontent.com/u/166769057?v=4
|
||||
description: |
|
||||
Kokoro is an open-weight TTS model with 82 million parameters. Despite its lightweight architecture, it delivers comparable quality to larger models while being significantly faster and more cost-efficient. With Apache-licensed weights, Kokoro can be deployed anywhere from production environments to personal projects.
|
||||
urls:
|
||||
- https://huggingface.co/hexgrad/Kokoro-82M
|
||||
- https://github.com/hexgrad/kokoro
|
||||
tags:
|
||||
- text-to-speech
|
||||
- TTS
|
||||
- LLM
|
||||
license: apache-2.0
|
||||
alias: "kokoro"
|
||||
name: "kokoro"
|
||||
capabilities:
|
||||
nvidia: "cuda12-kokoro"
|
||||
intel: "intel-sycl-f32-kokoro"
|
||||
amd: "rocm-kokoro"
|
||||
- &coqui
|
||||
urls:
|
||||
- https://github.com/idiap/coqui-ai-TTS
|
||||
description: |
|
||||
🐸 Coqui TTS is a library for advanced Text-to-Speech generation.
|
||||
|
||||
🚀 Pretrained models in +1100 languages.
|
||||
|
||||
🛠️ Tools for training new models and fine-tuning existing models in any language.
|
||||
|
||||
📚 Utilities for dataset analysis and curation.
|
||||
tags:
|
||||
- text-to-speech
|
||||
- TTS
|
||||
license: mpl-2.0
|
||||
name: "coqui"
|
||||
alias: "coqui"
|
||||
capabilities:
|
||||
nvidia: "cuda12-coqui"
|
||||
intel: "intel-sycl-f32-coqui"
|
||||
amd: "rocm-coqui"
|
||||
icon: https://avatars.githubusercontent.com/u/1338804?s=200&v=4
|
||||
- &bark
|
||||
urls:
|
||||
- https://github.com/suno-ai/bark
|
||||
description: |
|
||||
Bark is a transformer-based text-to-audio model created by Suno. Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. The model can also produce nonverbal communications like laughing, sighing and crying. To support the research community, we are providing access to pretrained model checkpoints, which are ready for inference and available for commercial use.
|
||||
tags:
|
||||
- text-to-speech
|
||||
- TTS
|
||||
license: MIT
|
||||
name: "bark"
|
||||
alias: "bark"
|
||||
capabilities:
|
||||
cuda: "cuda12-bark"
|
||||
intel: "intel-sycl-f32-bark"
|
||||
rocm: "rocm-bark"
|
||||
icon: https://avatars.githubusercontent.com/u/99442120?s=200&v=4
|
||||
- &barkcpp
|
||||
urls:
|
||||
- https://github.com/PABannier/bark.cpp
|
||||
description: |
|
||||
With bark.cpp, our goal is to bring real-time realistic multilingual text-to-speech generation to the community.
|
||||
|
||||
Plain C/C++ implementation without dependencies
|
||||
AVX, AVX2 and AVX512 for x86 architectures
|
||||
CPU and GPU compatible backends
|
||||
Mixed F16 / F32 precision
|
||||
4-bit, 5-bit and 8-bit integer quantization
|
||||
Metal and CUDA backends
|
||||
|
||||
Models supported
|
||||
|
||||
Bark Small
|
||||
Bark Large
|
||||
tags:
|
||||
- text-to-speech
|
||||
- TTS
|
||||
license: MIT
|
||||
icon: https://github.com/PABannier/bark.cpp/raw/main/assets/banner.png
|
||||
name: "bark-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-bark-cpp"
|
||||
alias: "bark-cpp"
|
||||
- &chatterbox
|
||||
urls:
|
||||
- https://github.com/resemble-ai/chatterbox
|
||||
description: |
|
||||
Resemble AI's first production-grade open source TTS model. Licensed under MIT, Chatterbox has been benchmarked against leading closed-source systems like ElevenLabs, and is consistently preferred in side-by-side evaluations.
|
||||
Whether you're working on memes, videos, games, or AI agents, Chatterbox brings your content to life. It's also the first open source TTS model to support emotion exaggeration control, a powerful feature that makes your voices stand out.
|
||||
tags:
|
||||
- text-to-speech
|
||||
- TTS
|
||||
license: MIT
|
||||
icon: https://private-user-images.githubusercontent.com/660224/448166653-bd8c5f03-e91d-4ee5-b680-57355da204d1.png?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NTAxOTE0MDAsIm5iZiI6MTc1MDE5MTEwMCwicGF0aCI6Ii82NjAyMjQvNDQ4MTY2NjUzLWJkOGM1ZjAzLWU5MWQtNGVlNS1iNjgwLTU3MzU1ZGEyMDRkMS5wbmc_WC1BbXotQWxnb3JpdGhtPUFXUzQtSE1BQy1TSEEyNTYmWC1BbXotQ3JlZGVudGlhbD1BS0lBVkNPRFlMU0E1M1BRSzRaQSUyRjIwMjUwNjE3JTJGdXMtZWFzdC0xJTJGczMlMkZhd3M0X3JlcXVlc3QmWC1BbXotRGF0ZT0yMDI1MDYxN1QyMDExNDBaJlgtQW16LUV4cGlyZXM9MzAwJlgtQW16LVNpZ25hdHVyZT1hMmI1NGY3OGFiZTlhNGFkNTVlYTY4NTIwMWEzODRiZGE4YzdhNGQ5MGNhNzE3MDYyYTA2NDIxYTkyYzhiODkwJlgtQW16LVNpZ25lZEhlYWRlcnM9aG9zdCJ9.mR9kM9xX0TdzPuSpuspCllHYQiq79dFQ2rtuNvjrl6w
|
||||
name: "chatterbox"
|
||||
capabilities:
|
||||
nvidia: "cuda12-chatterbox"
|
||||
## llama-cpp
|
||||
- !!merge <<: *llamacpp
|
||||
name: "darwin-x86-llama-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-darwin-x86-llama-cpp"
|
||||
- !!merge <<: *llamacpp
|
||||
name: "darwin-x86-llama-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-darwin-x86-llama-cpp"
|
||||
- !!merge <<: *llamacpp
|
||||
name: "nvidia-l4t-arm64-llama-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-llama-cpp"
|
||||
- !!merge <<: *llamacpp
|
||||
name: "nvidia-l4t-arm64-llama-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-llama-cpp"
|
||||
- !!merge <<: *llamacpp
|
||||
name: "cpu-llama-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-llama-cpp"
|
||||
- !!merge <<: *llamacpp
|
||||
name: "cpu-llama-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-llama-cpp"
|
||||
- !!merge <<: *llamacpp
|
||||
name: "cuda11-llama-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-llama-cpp"
|
||||
- !!merge <<: *llamacpp
|
||||
name: "cuda12-llama-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-llama-cpp"
|
||||
- !!merge <<: *llamacpp
|
||||
name: "rocm-llama-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-llama-cpp"
|
||||
- !!merge <<: *llamacpp
|
||||
name: "intel-sycl-f32-llama-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-llama-cpp"
|
||||
- !!merge <<: *llamacpp
|
||||
name: "intel-sycl-f16-llama-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-llama-cpp"
|
||||
- !!merge <<: *llamacpp
|
||||
name: "metal-llama-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-llama-cpp"
|
||||
- !!merge <<: *llamacpp
|
||||
name: "metal-llama-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-llama-cpp"
|
||||
- !!merge <<: *llamacpp
|
||||
name: "cuda11-llama-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-llama-cpp"
|
||||
- !!merge <<: *llamacpp
|
||||
name: "cuda12-llama-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-llama-cpp"
|
||||
- !!merge <<: *llamacpp
|
||||
name: "rocm-llama-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-llama-cpp"
|
||||
- !!merge <<: *llamacpp
|
||||
name: "intel-sycl-f32-llama-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-llama-cpp"
|
||||
- !!merge <<: *llamacpp
|
||||
name: "intel-sycl-f16-vllm-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-vllm"
|
||||
# vllm
|
||||
- !!merge <<: *vllm
|
||||
name: "vllm-development"
|
||||
capabilities:
|
||||
@@ -69,13 +317,6 @@
|
||||
name: "intel-sycl-f16-vllm-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-vllm"
|
||||
## Rerankers
|
||||
- &rerankers
|
||||
name: "rerankers"
|
||||
alias: "rerankers"
|
||||
capabilities:
|
||||
nvidia: "cuda12-rerankers"
|
||||
intel: "intel-sycl-f16-rerankers"
|
||||
amd: "rocm-rerankers"
|
||||
- !!merge <<: *rerankers
|
||||
name: "rerankers-development"
|
||||
capabilities:
|
||||
@@ -113,23 +354,6 @@
|
||||
name: "intel-sycl-f16-rerankers-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-rerankers"
|
||||
## Transformers
|
||||
- &transformers
|
||||
name: "transformers"
|
||||
icon: https://camo.githubusercontent.com/26569a27b8a30a488dd345024b71dbc05da7ff1b2ba97bb6080c9f1ee0f26cc7/68747470733a2f2f68756767696e67666163652e636f2f64617461736574732f68756767696e67666163652f646f63756d656e746174696f6e2d696d616765732f7265736f6c76652f6d61696e2f7472616e73666f726d6572732f7472616e73666f726d6572735f61735f615f6d6f64656c5f646566696e6974696f6e2e706e67
|
||||
alias: "transformers"
|
||||
license: apache-2.0
|
||||
description: |
|
||||
Transformers acts as the model-definition framework for state-of-the-art machine learning models in text, computer vision, audio, video, and multimodal model, for both inference and training.
|
||||
It centralizes the model definition so that this definition is agreed upon across the ecosystem. transformers is the pivot across frameworks: if a model definition is supported, it will be compatible with the majority of training frameworks (Axolotl, Unsloth, DeepSpeed, FSDP, PyTorch-Lightning, ...), inference engines (vLLM, SGLang, TGI, ...), and adjacent modeling libraries (llama.cpp, mlx, ...) which leverage the model definition from transformers.
|
||||
urls:
|
||||
- https://github.com/huggingface/transformers
|
||||
tags:
|
||||
- text-to-text
|
||||
- multimodal
|
||||
capabilities:
|
||||
nvidia: "cuda12-transformers"
|
||||
intel: "intel-sycl-f16-transformers"
|
||||
amd: "rocm-transformers"
|
||||
- !!merge <<: *transformers
|
||||
name: "transformers-development"
|
||||
capabilities:
|
||||
@@ -167,22 +391,6 @@
|
||||
name: "intel-sycl-f16-transformers-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-transformers"
|
||||
## Diffusers
|
||||
- &diffusers
|
||||
icon: https://raw.githubusercontent.com/huggingface/diffusers/main/docs/source/en/imgs/diffusers_library.jpg
|
||||
description: |
|
||||
🤗 Diffusers is the go-to library for state-of-the-art pretrained diffusion models for generating images, audio, and even 3D structures of molecules. Whether you're looking for a simple inference solution or training your own diffusion models, 🤗 Diffusers is a modular toolbox that supports both.
|
||||
urls:
|
||||
- https://github.com/huggingface/diffusers
|
||||
tags:
|
||||
- image-generation
|
||||
- video-generation
|
||||
- diffusion-models
|
||||
license: apache-2.0
|
||||
alias: "diffusers"
|
||||
capabilities:
|
||||
nvidia: "cuda12-diffusers"
|
||||
intel: "intel-sycl-f32-diffusers"
|
||||
amd: "rocm-diffusers"
|
||||
- !!merge <<: *diffusers
|
||||
name: "diffusers-development"
|
||||
capabilities:
|
||||
@@ -214,22 +422,6 @@
|
||||
name: "intel-sycl-f32-diffusers-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-diffusers"
|
||||
## exllama2
|
||||
- &exllama2
|
||||
name: "exllama2"
|
||||
urls:
|
||||
- https://github.com/turboderp-org/exllamav2
|
||||
tags:
|
||||
- text-to-text
|
||||
- LLM
|
||||
- EXL2
|
||||
license: MIT
|
||||
description: |
|
||||
ExLlamaV2 is an inference library for running local LLMs on modern consumer GPUs.
|
||||
alias: "exllama2"
|
||||
capabilities:
|
||||
nvidia: "cuda12-exllama2"
|
||||
intel: "intel-sycl-f32-exllama2"
|
||||
amd: "rocm-exllama2"
|
||||
- !!merge <<: *exllama2
|
||||
name: "exllama2-development"
|
||||
capabilities:
|
||||
@@ -249,24 +441,6 @@
|
||||
name: "cuda12-exllama2-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-exllama2"
|
||||
## kokoro
|
||||
- &kokoro
|
||||
icon: https://avatars.githubusercontent.com/u/166769057?v=4
|
||||
description: |
|
||||
Kokoro is an open-weight TTS model with 82 million parameters. Despite its lightweight architecture, it delivers comparable quality to larger models while being significantly faster and more cost-efficient. With Apache-licensed weights, Kokoro can be deployed anywhere from production environments to personal projects.
|
||||
urls:
|
||||
- https://huggingface.co/hexgrad/Kokoro-82M
|
||||
- https://github.com/hexgrad/kokoro
|
||||
tags:
|
||||
- text-to-speech
|
||||
- TTS
|
||||
- LLM
|
||||
license: apache-2.0
|
||||
alias: "kokoro"
|
||||
name: "kokoro"
|
||||
capabilities:
|
||||
nvidia: "cuda12-kokoro"
|
||||
intel: "intel-sycl-f32-kokoro"
|
||||
amd: "rocm-kokoro"
|
||||
- !!merge <<: *kokoro
|
||||
name: "kokoro-development"
|
||||
capabilities:
|
||||
@@ -304,22 +478,6 @@
|
||||
name: "rocm-kokoro"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-kokoro"
|
||||
## faster-whisper
|
||||
- &faster-whisper
|
||||
icon: https://avatars.githubusercontent.com/u/1520500?s=200&v=4
|
||||
description: |
|
||||
faster-whisper is a reimplementation of OpenAI's Whisper model using CTranslate2, which is a fast inference engine for Transformer models.
|
||||
This implementation is up to 4 times faster than openai/whisper for the same accuracy while using less memory. The efficiency can be further improved with 8-bit quantization on both CPU and GPU.
|
||||
urls:
|
||||
- https://github.com/SYSTRAN/faster-whisper
|
||||
tags:
|
||||
- speech-to-text
|
||||
- Whisper
|
||||
license: MIT
|
||||
name: "faster-whisper"
|
||||
capabilities:
|
||||
nvidia: "cuda12-faster-whisper"
|
||||
intel: "intel-sycl-f32-faster-whisper"
|
||||
amd: "rocm-faster-whisper"
|
||||
- !!merge <<: *faster-whisper
|
||||
name: "faster-whisper-development"
|
||||
capabilities:
|
||||
@@ -348,28 +506,7 @@
|
||||
name: "sycl-f16-faster-whisper-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-faster-whisper"
|
||||
## coqui
|
||||
- &coqui
|
||||
urls:
|
||||
- https://github.com/idiap/coqui-ai-TTS
|
||||
description: |
|
||||
🐸 Coqui TTS is a library for advanced Text-to-Speech generation.
|
||||
|
||||
🚀 Pretrained models in +1100 languages.
|
||||
|
||||
🛠️ Tools for training new models and fine-tuning existing models in any language.
|
||||
|
||||
📚 Utilities for dataset analysis and curation.
|
||||
tags:
|
||||
- text-to-speech
|
||||
- TTS
|
||||
license: mpl-2.0
|
||||
name: "coqui"
|
||||
alias: "coqui"
|
||||
capabilities:
|
||||
nvidia: "cuda12-coqui"
|
||||
intel: "intel-sycl-f32-coqui"
|
||||
amd: "rocm-coqui"
|
||||
icon: https://avatars.githubusercontent.com/u/1338804?s=200&v=4
|
||||
- !!merge <<: *coqui
|
||||
name: "coqui-development"
|
||||
capabilities:
|
||||
@@ -407,22 +544,6 @@
|
||||
name: "rocm-coqui"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-coqui"
|
||||
## bark
|
||||
- &bark
|
||||
urls:
|
||||
- https://github.com/suno-ai/bark
|
||||
description: |
|
||||
Bark is a transformer-based text-to-audio model created by Suno. Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. The model can also produce nonverbal communications like laughing, sighing and crying. To support the research community, we are providing access to pretrained model checkpoints, which are ready for inference and available for commercial use.
|
||||
tags:
|
||||
- text-to-speech
|
||||
- TTS
|
||||
license: MIT
|
||||
name: "bark"
|
||||
alias: "bark"
|
||||
capabilities:
|
||||
cuda: "cuda12-bark"
|
||||
intel: "intel-sycl-f32-bark"
|
||||
rocm: "rocm-bark"
|
||||
icon: https://avatars.githubusercontent.com/u/99442120?s=200&v=4
|
||||
- !!merge <<: *bark
|
||||
name: "bark-development"
|
||||
capabilities:
|
||||
@@ -459,50 +580,11 @@
|
||||
- !!merge <<: *bark
|
||||
name: "cuda12-bark-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-bark"
|
||||
- &barkcpp
|
||||
urls:
|
||||
- https://github.com/PABannier/bark.cpp
|
||||
description: |
|
||||
With bark.cpp, our goal is to bring real-time realistic multilingual text-to-speech generation to the community.
|
||||
|
||||
Plain C/C++ implementation without dependencies
|
||||
AVX, AVX2 and AVX512 for x86 architectures
|
||||
CPU and GPU compatible backends
|
||||
Mixed F16 / F32 precision
|
||||
4-bit, 5-bit and 8-bit integer quantization
|
||||
Metal and CUDA backends
|
||||
|
||||
Models supported
|
||||
|
||||
Bark Small
|
||||
Bark Large
|
||||
tags:
|
||||
- text-to-speech
|
||||
- TTS
|
||||
license: MIT
|
||||
icon: https://github.com/PABannier/bark.cpp/raw/main/assets/banner.png
|
||||
name: "bark-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-bark-cpp"
|
||||
alias: "bark-cpp"
|
||||
- !!merge <<: *barkcpp
|
||||
name: "bark-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-bark-cpp"
|
||||
alias: "bark-cpp"
|
||||
## chatterbox
|
||||
- &chatterbox
|
||||
urls:
|
||||
- https://github.com/resemble-ai/chatterbox
|
||||
description: |
|
||||
Resemble AI's first production-grade open source TTS model. Licensed under MIT, Chatterbox has been benchmarked against leading closed-source systems like ElevenLabs, and is consistently preferred in side-by-side evaluations.
|
||||
Whether you're working on memes, videos, games, or AI agents, Chatterbox brings your content to life. It's also the first open source TTS model to support emotion exaggeration control, a powerful feature that makes your voices stand out.
|
||||
tags:
|
||||
- text-to-speech
|
||||
- TTS
|
||||
license: MIT
|
||||
icon: https://private-user-images.githubusercontent.com/660224/448166653-bd8c5f03-e91d-4ee5-b680-57355da204d1.png?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NTAxOTE0MDAsIm5iZiI6MTc1MDE5MTEwMCwicGF0aCI6Ii82NjAyMjQvNDQ4MTY2NjUzLWJkOGM1ZjAzLWU5MWQtNGVlNS1iNjgwLTU3MzU1ZGEyMDRkMS5wbmc_WC1BbXotQWxnb3JpdGhtPUFXUzQtSE1BQy1TSEEyNTYmWC1BbXotQ3JlZGVudGlhbD1BS0lBVkNPRFlMU0E1M1BRSzRaQSUyRjIwMjUwNjE3JTJGdXMtZWFzdC0xJTJGczMlMkZhd3M0X3JlcXVlc3QmWC1BbXotRGF0ZT0yMDI1MDYxN1QyMDExNDBaJlgtQW16LUV4cGlyZXM9MzAwJlgtQW16LVNpZ25hdHVyZT1hMmI1NGY3OGFiZTlhNGFkNTVlYTY4NTIwMWEzODRiZGE4YzdhNGQ5MGNhNzE3MDYyYTA2NDIxYTkyYzhiODkwJlgtQW16LVNpZ25lZEhlYWRlcnM9aG9zdCJ9.mR9kM9xX0TdzPuSpuspCllHYQiq79dFQ2rtuNvjrl6w
|
||||
name: "chatterbox"
|
||||
capabilities:
|
||||
nvidia: "cuda12-chatterbox"
|
||||
- !!merge <<: *chatterbox
|
||||
name: "chatterbox-development"
|
||||
capabilities:
|
||||
|
||||
@@ -3,10 +3,12 @@ package worker
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/pkg/assets"
|
||||
"github.com/mudler/LocalAI/pkg/library"
|
||||
"github.com/rs/zerolog/log"
|
||||
@@ -16,6 +18,34 @@ type LLamaCPP struct {
|
||||
WorkerFlags `embed:""`
|
||||
}
|
||||
|
||||
func findLLamaCPPBackend(backendSystemPath string) (string, error) {
|
||||
backends, err := gallery.ListSystemBackends(backendSystemPath)
|
||||
if err != nil {
|
||||
log.Warn().Msgf("Failed listing system backends: %s", err)
|
||||
return "", err
|
||||
}
|
||||
log.Debug().Msgf("System backends: %v", backends)
|
||||
|
||||
backendPath := ""
|
||||
for b, path := range backends {
|
||||
if b == "llama-cpp" {
|
||||
backendPath = filepath.Dir(path)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if backendPath == "" {
|
||||
return "", fmt.Errorf("llama-cpp backend not found")
|
||||
}
|
||||
|
||||
grpcProcess := filepath.Join(
|
||||
backendPath,
|
||||
"grpc-server",
|
||||
)
|
||||
|
||||
return grpcProcess, nil
|
||||
}
|
||||
|
||||
func (r *LLamaCPP) Run(ctx *cliContext.Context) error {
|
||||
// Extract files from the embedded FS
|
||||
err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath)
|
||||
@@ -28,14 +58,14 @@ func (r *LLamaCPP) Run(ctx *cliContext.Context) error {
|
||||
return fmt.Errorf("usage: local-ai worker llama-cpp-rpc -- <llama-rpc-server-args>")
|
||||
}
|
||||
|
||||
grpcProcess := assets.ResolvePath(
|
||||
r.BackendAssetsPath,
|
||||
"util",
|
||||
"llama-cpp-rpc-server",
|
||||
)
|
||||
grpcProcess, err := findLLamaCPPBackend(r.BackendAssetsPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
args := strings.Split(r.ExtraLLamaCPPArgs, " ")
|
||||
args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
|
||||
|
||||
args = append([]string{grpcProcess}, args...)
|
||||
return syscall.Exec(
|
||||
grpcProcess,
|
||||
|
||||
@@ -71,11 +71,12 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
|
||||
for {
|
||||
log.Info().Msgf("Starting llama-cpp-rpc-server on '%s:%d'", address, port)
|
||||
|
||||
grpcProcess := assets.ResolvePath(
|
||||
r.BackendAssetsPath,
|
||||
"util",
|
||||
"llama-cpp-rpc-server",
|
||||
)
|
||||
grpcProcess, err := findLLamaCPPBackend(r.BackendAssetsPath)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("Failed to find llama-cpp-rpc-server")
|
||||
return
|
||||
}
|
||||
|
||||
var extraArgs []string
|
||||
|
||||
if r.ExtraLLamaCPPArgs != "" {
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
package gallery
|
||||
|
||||
import "github.com/mudler/LocalAI/core/config"
|
||||
import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/system"
|
||||
)
|
||||
|
||||
// BackendMetadata represents the metadata stored in a JSON file for each installed backend
|
||||
type BackendMetadata struct {
|
||||
@@ -23,6 +26,19 @@ type GalleryBackend struct {
|
||||
CapabilitiesMap map[string]string `json:"capabilities,omitempty" yaml:"capabilities,omitempty"`
|
||||
}
|
||||
|
||||
func (backend *GalleryBackend) FindBestBackendFromMeta(systemState *system.SystemState, backends GalleryElements[*GalleryBackend]) *GalleryBackend {
|
||||
if systemState == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
realBackend := backend.CapabilitiesMap[systemState.Capability(backend.CapabilitiesMap)]
|
||||
if realBackend == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
return backends.FindByName(realBackend)
|
||||
}
|
||||
|
||||
type GalleryBackends []*GalleryBackend
|
||||
|
||||
func (m *GalleryBackend) SetGallery(gallery config.Gallery) {
|
||||
|
||||
@@ -57,19 +57,6 @@ func writeBackendMetadata(backendPath string, metadata *BackendMetadata) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func findBestBackendFromMeta(backend *GalleryBackend, systemState *system.SystemState, backends GalleryElements[*GalleryBackend]) *GalleryBackend {
|
||||
if systemState == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
realBackend := backend.CapabilitiesMap[systemState.Capability()]
|
||||
if realBackend == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
return backends.FindByName(realBackend)
|
||||
}
|
||||
|
||||
// Installs a model from the gallery
|
||||
func InstallBackendFromGallery(galleries []config.Gallery, systemState *system.SystemState, name string, basePath string, downloadStatus func(string, string, string, float64), force bool) error {
|
||||
if !force {
|
||||
@@ -103,7 +90,7 @@ func InstallBackendFromGallery(galleries []config.Gallery, systemState *system.S
|
||||
log.Debug().Interface("systemState", systemState).Str("name", name).Msg("Backend is a meta backend")
|
||||
|
||||
// Then, let's try to find the best backend based on the capabilities map
|
||||
bestBackend := findBestBackendFromMeta(backend, systemState, backends)
|
||||
bestBackend := backend.FindBestBackendFromMeta(systemState, backends)
|
||||
if bestBackend == nil {
|
||||
return fmt.Errorf("no backend found with capabilities %q", backend.CapabilitiesMap)
|
||||
}
|
||||
@@ -283,6 +270,7 @@ func RegisterBackends(basePath string, modelLoader *model.ModelLoader) error {
|
||||
}
|
||||
|
||||
for name, runFile := range backends {
|
||||
log.Debug().Str("name", name).Str("runFile", runFile).Msg("Registering backend")
|
||||
modelLoader.SetExternalBackend(name, runFile)
|
||||
}
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/system"
|
||||
@@ -98,6 +99,7 @@ var _ = Describe("Gallery Backends", func() {
|
||||
})
|
||||
|
||||
It("should find best backend from meta based on system capabilities", func() {
|
||||
|
||||
metaBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "meta-backend",
|
||||
@@ -106,6 +108,7 @@ var _ = Describe("Gallery Backends", func() {
|
||||
"nvidia": "nvidia-backend",
|
||||
"amd": "amd-backend",
|
||||
"intel": "intel-backend",
|
||||
"metal": "metal-backend",
|
||||
},
|
||||
}
|
||||
|
||||
@@ -123,25 +126,43 @@ var _ = Describe("Gallery Backends", func() {
|
||||
URI: testImage,
|
||||
}
|
||||
|
||||
backends := GalleryElements[*GalleryBackend]{nvidiaBackend, amdBackend}
|
||||
metalBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "metal-backend",
|
||||
},
|
||||
URI: testImage,
|
||||
}
|
||||
|
||||
// Test with NVIDIA system state
|
||||
nvidiaSystemState := &system.SystemState{GPUVendor: "nvidia"}
|
||||
bestBackend := findBestBackendFromMeta(metaBackend, nvidiaSystemState, backends)
|
||||
Expect(bestBackend).To(Equal(nvidiaBackend))
|
||||
backends := GalleryElements[*GalleryBackend]{nvidiaBackend, amdBackend, metalBackend}
|
||||
|
||||
// Test with AMD system state
|
||||
amdSystemState := &system.SystemState{GPUVendor: "amd"}
|
||||
bestBackend = findBestBackendFromMeta(metaBackend, amdSystemState, backends)
|
||||
Expect(bestBackend).To(Equal(amdBackend))
|
||||
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
|
||||
metal := &system.SystemState{}
|
||||
bestBackend := metaBackend.FindBestBackendFromMeta(metal, backends)
|
||||
Expect(bestBackend).To(Equal(metalBackend))
|
||||
|
||||
// Test with unsupported GPU vendor
|
||||
unsupportedSystemState := &system.SystemState{GPUVendor: "unsupported"}
|
||||
bestBackend = findBestBackendFromMeta(metaBackend, unsupportedSystemState, backends)
|
||||
Expect(bestBackend).To(BeNil())
|
||||
} else {
|
||||
// Test with NVIDIA system state
|
||||
nvidiaSystemState := &system.SystemState{GPUVendor: "nvidia"}
|
||||
bestBackend := metaBackend.FindBestBackendFromMeta(nvidiaSystemState, backends)
|
||||
Expect(bestBackend).To(Equal(nvidiaBackend))
|
||||
|
||||
// Test with AMD system state
|
||||
amdSystemState := &system.SystemState{GPUVendor: "amd"}
|
||||
bestBackend = metaBackend.FindBestBackendFromMeta(amdSystemState, backends)
|
||||
Expect(bestBackend).To(Equal(amdBackend))
|
||||
|
||||
// Test with unsupported GPU vendor
|
||||
unsupportedSystemState := &system.SystemState{GPUVendor: "unsupported"}
|
||||
bestBackend = metaBackend.FindBestBackendFromMeta(unsupportedSystemState, backends)
|
||||
Expect(bestBackend).To(BeNil())
|
||||
}
|
||||
})
|
||||
|
||||
It("should handle meta backend deletion correctly", func() {
|
||||
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
|
||||
Skip("Skipping test on darwin/arm64")
|
||||
}
|
||||
|
||||
metaBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "meta-backend",
|
||||
@@ -207,6 +228,9 @@ var _ = Describe("Gallery Backends", func() {
|
||||
})
|
||||
|
||||
It("should handle meta backend deletion correctly with aliases", func() {
|
||||
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
|
||||
Skip("Skipping test on darwin/arm64")
|
||||
}
|
||||
metaBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "meta-backend",
|
||||
@@ -276,6 +300,9 @@ var _ = Describe("Gallery Backends", func() {
|
||||
})
|
||||
|
||||
It("should handle meta backend deletion correctly with aliases pointing to the same backend", func() {
|
||||
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
|
||||
Skip("Skipping test on darwin/arm64")
|
||||
}
|
||||
metaBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "meta-backend",
|
||||
@@ -401,6 +428,9 @@ var _ = Describe("Gallery Backends", func() {
|
||||
})
|
||||
|
||||
It("should create alias file when specified", func() {
|
||||
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
|
||||
Skip("Skipping test on darwin/arm64")
|
||||
}
|
||||
backend := GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "test-backend",
|
||||
|
||||
@@ -295,6 +295,8 @@ var _ = Describe("API test", func() {
|
||||
tmpdir, err = os.MkdirTemp("", "")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
backendPath := os.Getenv("BACKENDS_PATH")
|
||||
|
||||
modelDir = filepath.Join(tmpdir, "models")
|
||||
err = os.Mkdir(modelDir, 0750)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
@@ -337,6 +339,7 @@ var _ = Describe("API test", func() {
|
||||
config.WithContext(c),
|
||||
config.WithGalleries(galleries),
|
||||
config.WithModelPath(modelDir),
|
||||
config.WithBackendsPath(backendPath),
|
||||
config.WithApiKeys([]string{apiKey}),
|
||||
config.WithBackendAssets(backendAssets),
|
||||
config.WithBackendAssetsOutput(backendAssetsDir))...)
|
||||
@@ -517,6 +520,9 @@ var _ = Describe("API test", func() {
|
||||
BeforeEach(func() {
|
||||
var err error
|
||||
tmpdir, err = os.MkdirTemp("", "")
|
||||
|
||||
backendPath := os.Getenv("BACKENDS_PATH")
|
||||
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
modelDir = filepath.Join(tmpdir, "models")
|
||||
backendAssetsDir := filepath.Join(tmpdir, "backend-assets")
|
||||
@@ -540,6 +546,7 @@ var _ = Describe("API test", func() {
|
||||
append(commonOpts,
|
||||
config.WithContext(c),
|
||||
config.WithGeneratedContentDir(tmpdir),
|
||||
config.WithBackendsPath(backendPath),
|
||||
config.WithGalleries(galleries),
|
||||
config.WithModelPath(modelDir),
|
||||
config.WithBackendAssets(backendAssets),
|
||||
@@ -737,6 +744,7 @@ var _ = Describe("API test", func() {
|
||||
Context("API query", func() {
|
||||
BeforeEach(func() {
|
||||
modelPath := os.Getenv("MODELS_PATH")
|
||||
backendPath := os.Getenv("BACKENDS_PATH")
|
||||
c, cancel = context.WithCancel(context.Background())
|
||||
|
||||
var err error
|
||||
@@ -745,6 +753,7 @@ var _ = Describe("API test", func() {
|
||||
append(commonOpts,
|
||||
config.WithExternalBackend("transformers", os.Getenv("HUGGINGFACE_GRPC")),
|
||||
config.WithContext(c),
|
||||
config.WithBackendsPath(backendPath),
|
||||
config.WithModelPath(modelPath),
|
||||
)...)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
@@ -956,6 +965,7 @@ var _ = Describe("API test", func() {
|
||||
Context("Config file", func() {
|
||||
BeforeEach(func() {
|
||||
modelPath := os.Getenv("MODELS_PATH")
|
||||
backendPath := os.Getenv("BACKENDS_PATH")
|
||||
c, cancel = context.WithCancel(context.Background())
|
||||
|
||||
var err error
|
||||
@@ -963,6 +973,7 @@ var _ = Describe("API test", func() {
|
||||
append(commonOpts,
|
||||
config.WithContext(c),
|
||||
config.WithModelPath(modelPath),
|
||||
config.WithBackendsPath(backendPath),
|
||||
config.WithConfigFile(os.Getenv("CONFIG_FILE")))...,
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
@@ -2,6 +2,7 @@ package system
|
||||
|
||||
import (
|
||||
"os"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/xsysinfo"
|
||||
@@ -12,7 +13,26 @@ type SystemState struct {
|
||||
GPUVendor string
|
||||
}
|
||||
|
||||
func (s *SystemState) Capability() string {
|
||||
const (
|
||||
defaultCapability = "default"
|
||||
nvidiaL4T = "nvidia-l4t"
|
||||
darwinX86 = "darwin-x86"
|
||||
metal = "metal"
|
||||
)
|
||||
|
||||
func (s *SystemState) Capability(capMap map[string]string) string {
|
||||
reportedCapability := s.getSystemCapabilities()
|
||||
|
||||
// Check if the reported capability is in the map
|
||||
if _, exists := capMap[reportedCapability]; exists {
|
||||
return reportedCapability
|
||||
}
|
||||
|
||||
// Otherwise, return the default capability (catch-all)
|
||||
return defaultCapability
|
||||
}
|
||||
|
||||
func (s *SystemState) getSystemCapabilities() string {
|
||||
if os.Getenv("LOCALAI_FORCE_META_BACKEND_CAPABILITY") != "" {
|
||||
return os.Getenv("LOCALAI_FORCE_META_BACKEND_CAPABILITY")
|
||||
}
|
||||
@@ -32,6 +52,27 @@ func (s *SystemState) Capability() string {
|
||||
}
|
||||
}
|
||||
|
||||
// If we are on mac and arm64, we will return metal
|
||||
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
|
||||
return metal
|
||||
}
|
||||
|
||||
// If we are on mac and x86, we will return darwin-x86
|
||||
if runtime.GOOS == "darwin" && runtime.GOARCH == "amd64" {
|
||||
return darwinX86
|
||||
}
|
||||
|
||||
// If arm64 on linux and a nvidia gpu is detected, we will return nvidia-l4t
|
||||
if runtime.GOOS == "linux" && runtime.GOARCH == "arm64" {
|
||||
if s.GPUVendor == "nvidia" {
|
||||
return nvidiaL4T
|
||||
}
|
||||
}
|
||||
|
||||
if s.GPUVendor == "" {
|
||||
return defaultCapability
|
||||
}
|
||||
|
||||
return s.GPUVendor
|
||||
}
|
||||
|
||||
|
||||
4
go.sum
4
go.sum
@@ -177,6 +177,8 @@ github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7z
|
||||
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
|
||||
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
|
||||
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20240626202019-c118733a29ad h1:dQ93Vd6i25o+zH9vvnZ8mu7jtJQ6jT3D+zE3V8Q49n0=
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20240626202019-c118733a29ad/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
|
||||
github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
|
||||
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
|
||||
github.com/gliderlabs/ssh v0.1.1/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0=
|
||||
@@ -505,6 +507,8 @@ github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o=
|
||||
github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
|
||||
github.com/mudler/edgevpn v0.30.1 h1:4yyhNFJX62NpRp50sxiyZE5E/sdAqEZX+aE5Mv7QS60=
|
||||
github.com/mudler/edgevpn v0.30.1/go.mod h1:IAJkkJ0oH3rwsSGOGTFT4UBYFqYuD/QyaKzTLB3P/eU=
|
||||
github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc h1:RxwneJl1VgvikiX28EkpdAyL4yQVnJMrbquKospjHyA=
|
||||
github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
|
||||
github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82 h1:FVT07EI8njvsD4tC2Hw8Xhactp5AWhsQWD4oTeQuSAU=
|
||||
github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82/go.mod h1:Urp7LG5jylKoDq0663qeBh0pINGcRl35nXdKx82PSoU=
|
||||
github.com/mudler/water v0.0.0-20221010214108-8c7313014ce0 h1:Qh6ghkMgTu6siFbTf7L3IszJmshMhXxNL4V+t7IIA6w=
|
||||
|
||||
@@ -10,17 +10,19 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/klauspost/cpuid/v2"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
"github.com/mudler/LocalAI/pkg/library"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
"github.com/mudler/LocalAI/pkg/xsysinfo"
|
||||
"github.com/phayes/freeport"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
"github.com/elliotchance/orderedmap/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
LLamaCPP = "llama-cpp"
|
||||
)
|
||||
|
||||
var Aliases map[string]string = map[string]string{
|
||||
"go-llama": LLamaCPP,
|
||||
"llama": LLamaCPP,
|
||||
@@ -40,22 +42,7 @@ var TypeAlias map[string]string = map[string]string{
|
||||
"transformers-musicgen": "MusicgenForConditionalGeneration",
|
||||
}
|
||||
|
||||
var AutoDetect = os.Getenv("DISABLE_AUTODETECT") != "true"
|
||||
|
||||
const (
|
||||
LLamaCPP = "llama-cpp"
|
||||
|
||||
LLamaCPPAVX2 = "llama-cpp-avx2"
|
||||
LLamaCPPAVX512 = "llama-cpp-avx512"
|
||||
LLamaCPPAVX = "llama-cpp-avx"
|
||||
LLamaCPPFallback = "llama-cpp-fallback"
|
||||
LLamaCPPCUDA = "llama-cpp-cuda"
|
||||
LLamaCPPHipblas = "llama-cpp-hipblas"
|
||||
LLamaCPPSycl16 = "llama-cpp-sycl_16"
|
||||
LLamaCPPSycl32 = "llama-cpp-sycl_32"
|
||||
|
||||
LLamaCPPGRPC = "llama-cpp-grpc"
|
||||
|
||||
WhisperBackend = "whisper"
|
||||
StableDiffusionGGMLBackend = "stablediffusion-ggml"
|
||||
PiperBackend = "piper"
|
||||
@@ -65,18 +52,6 @@ const (
|
||||
LocalStoreBackend = "local-store"
|
||||
)
|
||||
|
||||
var llamaCPPVariants = []string{
|
||||
LLamaCPPAVX2,
|
||||
LLamaCPPAVX512,
|
||||
LLamaCPPAVX,
|
||||
LLamaCPPFallback,
|
||||
LLamaCPPCUDA,
|
||||
LLamaCPPHipblas,
|
||||
LLamaCPPSycl16,
|
||||
LLamaCPPSycl32,
|
||||
LLamaCPPGRPC,
|
||||
}
|
||||
|
||||
func backendPath(assetDir, backend string) string {
|
||||
return filepath.Join(assetDir, "backend-assets", "grpc", backend)
|
||||
}
|
||||
@@ -105,32 +80,9 @@ ENTRY:
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip the llama.cpp variants if we are autoDetecting
|
||||
// But we always load the fallback variant if it exists
|
||||
if strings.Contains(e.Name(), LLamaCPP) && !strings.Contains(e.Name(), LLamaCPPFallback) && AutoDetect {
|
||||
continue
|
||||
}
|
||||
|
||||
backends[e.Name()] = []string{}
|
||||
}
|
||||
|
||||
// if we are autoDetecting, we want to show the llama.cpp variants as a single backend
|
||||
if AutoDetect {
|
||||
// if we find the llama.cpp variants, show them of as a single backend (llama-cpp) as later we are going to pick that up
|
||||
// when starting the service
|
||||
foundVariants := map[string]bool{}
|
||||
if _, ok := backends[LLamaCPP]; !ok {
|
||||
for _, e := range entry {
|
||||
for _, v := range llamaCPPVariants {
|
||||
if strings.Contains(e.Name(), v) && !foundVariants[v] {
|
||||
backends[LLamaCPP] = append(backends[LLamaCPP], v)
|
||||
foundVariants[v] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return backends, nil
|
||||
}
|
||||
|
||||
@@ -140,12 +92,7 @@ func orderBackends(backends map[string][]string) ([]string, error) {
|
||||
// for example, llama.cpp should be tried first, and we want to keep the huggingface backend at the last.
|
||||
|
||||
// sets a priority list - first has more priority
|
||||
priorityList := []string{
|
||||
// First llama.cpp(variants)
|
||||
// We keep the fallback to prevent that if the llama.cpp variants
|
||||
// that depends on shared libs if breaks have still a safety net.
|
||||
LLamaCPP, LLamaCPPFallback,
|
||||
}
|
||||
priorityList := []string{}
|
||||
|
||||
toTheEnd := []string{
|
||||
// last has to be huggingface
|
||||
@@ -178,108 +125,9 @@ func orderBackends(backends map[string][]string) ([]string, error) {
|
||||
return orderedBackends.Keys(), nil
|
||||
}
|
||||
|
||||
// selectGRPCProcessByHostCapabilities selects the GRPC process to start based on system capabilities
|
||||
// Note: this is now relevant only for llama.cpp
|
||||
func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) string {
|
||||
|
||||
// Select backend now just for llama.cpp
|
||||
if backend != LLamaCPP {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Note: This environment variable is read by the LocalAI's llama.cpp grpc-server
|
||||
if os.Getenv("LLAMACPP_GRPC_SERVERS") != "" {
|
||||
log.Info().Msgf("[%s] attempting to load with GRPC variant", LLamaCPPGRPC)
|
||||
return backendPath(assetDir, LLamaCPPGRPC)
|
||||
}
|
||||
|
||||
// Check for GPU-binaries that are shipped with single binary releases
|
||||
gpuBinaries := map[string]string{
|
||||
"nvidia": LLamaCPPCUDA,
|
||||
"amd": LLamaCPPHipblas,
|
||||
"intel": LLamaCPPSycl16,
|
||||
}
|
||||
|
||||
if !f16 {
|
||||
gpuBinaries["intel"] = LLamaCPPSycl32
|
||||
}
|
||||
|
||||
for vendor, binary := range gpuBinaries {
|
||||
if xsysinfo.HasGPU(vendor) {
|
||||
p := backendPath(assetDir, binary)
|
||||
if _, err := os.Stat(p); err == nil {
|
||||
log.Info().Msgf("[%s] attempting to load with %s variant (vendor: %s)", backend, binary, vendor)
|
||||
return p
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// No GPU found or no specific binaries found, try to load the CPU variant(s)
|
||||
|
||||
// Select a binary based on availability/capability
|
||||
selectedProcess := ""
|
||||
|
||||
// Check if we have a native build (llama-cpp) and use that
|
||||
if _, err := os.Stat(backendPath(assetDir, LLamaCPPFallback)); err == nil {
|
||||
log.Debug().Msgf("[%s] %s variant available", LLamaCPPFallback, backend)
|
||||
selectedProcess = backendPath(assetDir, LLamaCPPFallback)
|
||||
}
|
||||
|
||||
// Check if we have a native build (llama-cpp) and use that instead
|
||||
// As a reminder, we do ultimately attempt again with the fallback variant
|
||||
// If things fail with what we select here
|
||||
if _, err := os.Stat(backendPath(assetDir, LLamaCPP)); err == nil {
|
||||
log.Debug().Msgf("[%s] attempting to load with native variant", backend)
|
||||
selectedProcess = backendPath(assetDir, LLamaCPP)
|
||||
}
|
||||
|
||||
// IF we find any optimized binary, we use that
|
||||
if xsysinfo.HasCPUCaps(cpuid.AVX512F) {
|
||||
p := backendPath(assetDir, LLamaCPPAVX512)
|
||||
if _, err := os.Stat(p); err == nil {
|
||||
log.Info().Msgf("[%s] attempting to load with AVX512 variant", backend)
|
||||
selectedProcess = p
|
||||
}
|
||||
} else if xsysinfo.HasCPUCaps(cpuid.AVX2) {
|
||||
p := backendPath(assetDir, LLamaCPPAVX2)
|
||||
if _, err := os.Stat(p); err == nil {
|
||||
log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
|
||||
selectedProcess = p
|
||||
}
|
||||
} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
|
||||
p := backendPath(assetDir, LLamaCPPAVX)
|
||||
if _, err := os.Stat(p); err == nil {
|
||||
log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
|
||||
selectedProcess = p
|
||||
}
|
||||
}
|
||||
|
||||
// Safety measure: check if the binary exists otherwise return empty string
|
||||
if _, err := os.Stat(selectedProcess); err == nil {
|
||||
return selectedProcess
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func attemptLoadingOnFailure(backend string, ml *ModelLoader, o *Options, err error) (*Model, error) {
|
||||
// XXX: This is too backend specific(llama-cpp), remove this bit or generalize further
|
||||
// We failed somehow starting the binary. For instance, could be that we are missing
|
||||
// some libraries if running in binary-only mode.
|
||||
// In this case, we attempt to load the model with the fallback variant.
|
||||
|
||||
// If not llama-cpp backend, return the error immediately
|
||||
if backend != LLamaCPP {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Error().Msgf("[%s] Failed loading model, trying with fallback '%s', error: %s", backend, LLamaCPPFallback, err.Error())
|
||||
return ml.LoadModel(o.modelID, o.model, ml.grpcModel(LLamaCPPFallback, false, o))
|
||||
}
|
||||
|
||||
// starts the grpcModelProcess for the backend, and returns a grpc client
|
||||
// It also loads the model
|
||||
func (ml *ModelLoader) grpcModel(backend string, autodetect bool, o *Options) func(string, string, string) (*Model, error) {
|
||||
func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string, string) (*Model, error) {
|
||||
return func(modelID, modelName, modelFile string) (*Model, error) {
|
||||
|
||||
log.Debug().Msgf("Loading Model %s with gRPC (file: %s) (backend: %s): %+v", modelID, modelFile, backend, *o)
|
||||
@@ -335,13 +183,6 @@ func (ml *ModelLoader) grpcModel(backend string, autodetect bool, o *Options) fu
|
||||
return nil, fmt.Errorf("referring to a backend not in asset dir: %s", err.Error())
|
||||
}
|
||||
|
||||
if autodetect {
|
||||
// autoDetect GRPC process to start based on system capabilities
|
||||
if selectedProcess := selectGRPCProcessByHostCapabilities(backend, o.assetDir, o.gRPCOptions.F16Memory); selectedProcess != "" {
|
||||
grpcProcess = selectedProcess
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the file exists
|
||||
if _, err := os.Stat(grpcProcess); os.IsNotExist(err) {
|
||||
return nil, fmt.Errorf("backend not found: %s", grpcProcess)
|
||||
@@ -455,12 +296,9 @@ func (ml *ModelLoader) backendLoader(opts ...Option) (client grpc.Backend, err e
|
||||
backendToConsume = backend
|
||||
}
|
||||
|
||||
model, err := ml.LoadModel(o.modelID, o.model, ml.grpcModel(backendToConsume, AutoDetect, o))
|
||||
model, err := ml.LoadModel(o.modelID, o.model, ml.grpcModel(backendToConsume, o))
|
||||
if err != nil {
|
||||
model, err = attemptLoadingOnFailure(backend, ml, o, err)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return model.GRPC(o.parallelRequests, ml.wd), nil
|
||||
@@ -526,7 +364,7 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
|
||||
}
|
||||
|
||||
// append externalBackends supplied by the user via the CLI
|
||||
for _, b := range ml.GetAllExternalBackends(o) {
|
||||
for b := range ml.GetAllExternalBackends(o) {
|
||||
autoLoadBackends = append(autoLoadBackends, b)
|
||||
}
|
||||
|
||||
|
||||
52
scripts/build-llama-cpp-darwin.sh
Normal file
52
scripts/build-llama-cpp-darwin.sh
Normal file
@@ -0,0 +1,52 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
IMAGE_NAME="${IMAGE_NAME:-localai/llama-cpp-darwin}"
|
||||
|
||||
pushd backend/cpp/llama-cpp
|
||||
|
||||
# make llama-cpp-avx && \
|
||||
# make llama-cpp-avx2 && \
|
||||
# make llama-cpp-avx512 && \
|
||||
make llama-cpp-fallback && \
|
||||
make llama-cpp-grpc && \
|
||||
make llama-cpp-rpc-server
|
||||
|
||||
popd
|
||||
|
||||
mkdir -p build/darwin
|
||||
|
||||
# cp -rf backend/cpp/llama-cpp/llama-cpp-avx build/darwin/
|
||||
# cp -rf backend/cpp/llama-cpp/llama-cpp-avx2 build/darwin/
|
||||
# cp -rf backend/cpp/llama-cpp/llama-cpp-avx512 build/darwin/
|
||||
cp -rf backend/cpp/llama-cpp/llama-cpp-fallback build/darwin/
|
||||
cp -rf backend/cpp/llama-cpp/llama-cpp-grpc build/darwin/
|
||||
cp -rf backend/cpp/llama-cpp/llama-cpp-rpc-server build/darwin/
|
||||
|
||||
for file in build/darwin/*; do
|
||||
LIBS="$(otool -L $file | awk 'NR > 1 { system("echo " $1) } ' | xargs echo)"
|
||||
|
||||
for lib in $LIBS; do
|
||||
mkdir -p build/darwin/lib
|
||||
# only libraries ending in dylib
|
||||
if [[ "$lib" == *.dylib ]]; then
|
||||
if [ -e "$lib" ]; then
|
||||
cp -rvf "$lib" build/darwin/lib
|
||||
fi
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
cp -rf backend/cpp/llama-cpp/run.sh build/darwin/
|
||||
|
||||
PLATFORMARCH="${PLATFORMARCH:-darwin/arm64}"
|
||||
|
||||
./local-ai util create-oci-image \
|
||||
build/darwin/. \
|
||||
--output build/darwin.tar \
|
||||
--image-name $IMAGE_NAME \
|
||||
--platform $PLATFORMARCH
|
||||
|
||||
rm -rf build/darwin
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"os"
|
||||
"runtime"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/docker/go-connections/nat"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
@@ -21,6 +22,7 @@ var client *openai.Client
|
||||
var containerImage = os.Getenv("LOCALAI_IMAGE")
|
||||
var containerImageTag = os.Getenv("LOCALAI_IMAGE_TAG")
|
||||
var modelsDir = os.Getenv("LOCALAI_MODELS_DIR")
|
||||
var backendDir = os.Getenv("LOCALAI_BACKEND_DIR")
|
||||
var apiEndpoint = os.Getenv("LOCALAI_API_ENDPOINT")
|
||||
var apiKey = os.Getenv("LOCALAI_API_KEY")
|
||||
|
||||
@@ -82,6 +84,12 @@ func startDockerImage() {
|
||||
Expect(err).To(Not(HaveOccurred()))
|
||||
md := cwd + "/models"
|
||||
|
||||
bd := cwd + "/backends"
|
||||
|
||||
if backendDir != "" {
|
||||
bd = backendDir
|
||||
}
|
||||
|
||||
if modelsDir != "" {
|
||||
md = modelsDir
|
||||
}
|
||||
@@ -99,20 +107,28 @@ func startDockerImage() {
|
||||
},
|
||||
Env: map[string]string{
|
||||
"MODELS_PATH": "/models",
|
||||
"BACKENDS_PATH": "/backends",
|
||||
"DEBUG": "true",
|
||||
"THREADS": fmt.Sprint(proc),
|
||||
"LOCALAI_SINGLE_ACTIVE_BACKEND": "true",
|
||||
},
|
||||
Files: []testcontainers.ContainerFile{
|
||||
Mounts: testcontainers.ContainerMounts{
|
||||
{
|
||||
HostFilePath: md,
|
||||
ContainerFilePath: "/models",
|
||||
FileMode: 0o755,
|
||||
Source: testcontainers.DockerBindMountSource{
|
||||
HostPath: md,
|
||||
},
|
||||
Target: "/models",
|
||||
},
|
||||
{
|
||||
Source: testcontainers.DockerBindMountSource{
|
||||
HostPath: bd,
|
||||
},
|
||||
Target: "/backends",
|
||||
},
|
||||
},
|
||||
WaitingFor: wait.ForAll(
|
||||
wait.ForListeningPort(nat.Port(defaultApiPort)),
|
||||
// wait.ForHTTP("/v1/models").WithPort(nat.Port(apiPort)).WithStartupTimeout(50*time.Minute),
|
||||
wait.ForListeningPort(nat.Port(defaultApiPort)).WithStartupTimeout(10*time.Minute),
|
||||
wait.ForHTTP("/v1/models").WithPort(nat.Port(defaultApiPort)).WithStartupTimeout(10*time.Minute),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user