feat: split whisper from main binary (#5863)

* feat: split whisper from main binary

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Cleanup makefile

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add backend builds (missing only darwin)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Test CI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add whisper backend to test runs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Make sure we have runtime libs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Less grpc on the main Dockerfile

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fix hipblas build

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add whisper to index

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Re-enable CI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Adapt auto-bumper

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2025-07-20 22:52:45 +02:00
committed by GitHub
parent fab41c29dd
commit b1fc5acd4a
12 changed files with 405 additions and 239 deletions
+97
View File
@@ -745,6 +745,103 @@ jobs:
backend: "stablediffusion-ggml"
dockerfile: "./backend/Dockerfile.go"
context: "./"
# whisper
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-cpu-whisper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
backend: "whisper"
dockerfile: "./backend/Dockerfile.go"
context: "./"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-nvidia-cuda-12-whisper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
backend: "whisper"
dockerfile: "./backend/Dockerfile.go"
context: "./"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-nvidia-cuda-11-whisper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
backend: "whisper"
dockerfile: "./backend/Dockerfile.go"
context: "./"
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-intel-sycl-f32-whisper'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false'
backend: "whisper"
dockerfile: "./backend/Dockerfile.go"
context: "./"
- build-type: 'sycl_f16'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-intel-sycl-f16-whisper'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false'
backend: "whisper"
dockerfile: "./backend/Dockerfile.go"
context: "./"
- build-type: 'vulkan'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-vulkan-whisper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
backend: "whisper"
dockerfile: "./backend/Dockerfile.go"
context: "./"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'true'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-arm64-whisper'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
runs-on: 'ubuntu-24.04-arm'
backend: "whisper"
dockerfile: "./backend/Dockerfile.go"
context: "./"
- build-type: 'hipblas'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-hipblas-whisper'
base-image: "rocm/dev-ubuntu-22.04:6.1"
runs-on: 'ubuntu-latest'
skip-drivers: 'false'
backend: "whisper"
dockerfile: "./backend/Dockerfile.go"
context: "./"
llama-cpp-darwin:
runs-on: macOS-14
strategy:
+1 -1
View File
@@ -16,7 +16,7 @@ jobs:
- repository: "ggml-org/whisper.cpp"
variable: "WHISPER_CPP_VERSION"
branch: "master"
file: "Makefile"
file: "backend/go/whisper/Makefile"
- repository: "PABannier/bark.cpp"
variable: "BARKCPP_VERSION"
branch: "main"
+2 -2
View File
@@ -103,7 +103,7 @@ jobs:
make -C backend/python/transformers
make backends/llama-cpp backends/piper backends/stablediffusion-ggml
make backends/llama-cpp backends/piper backends/whisper backends/stablediffusion-ggml
env:
CUDA_VERSION: 12-4
- name: Test
@@ -168,7 +168,7 @@ jobs:
PATH="$PATH:$HOME/go/bin" make protogen-go
- name: Test
run: |
PATH="$PATH:$HOME/go/bin" make backends/llama-cpp backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
PATH="$PATH:$HOME/go/bin" make backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.22