feat: split whisper from main binary (#5863)

* feat: split whisper from main binary Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Cleanup makefile Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add backend builds (missing only darwin) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Test CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add whisper backend to test runs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Make sure we have runtime libs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Less grpc on the main Dockerfile Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fix hipblas build Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add whisper to index Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Re-enable CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Adapt auto-bumper Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-05-03 16:49:45 -05:00 · 2025-07-20 22:52:45 +02:00
parent fab41c29dd
commit b1fc5acd4a
12 changed files with 405 additions and 239 deletions
@@ -745,6 +745,103 @@ jobs:
            backend: "stablediffusion-ggml"
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
+          # whisper
+          - build-type: ''
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-cpu-whisper'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "whisper"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-nvidia-cuda-12-whisper'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "whisper"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"
+          - build-type: 'cublas'
+            cuda-major-version: "11"
+            cuda-minor-version: "7"
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-nvidia-cuda-11-whisper'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "whisper"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"
+          - build-type: 'sycl_f32'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-intel-sycl-f32-whisper'
+            runs-on: 'ubuntu-latest'
+            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            skip-drivers: 'false'
+            backend: "whisper"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"
+          - build-type: 'sycl_f16'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-intel-sycl-f16-whisper'
+            runs-on: 'ubuntu-latest'
+            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            skip-drivers: 'false'
+            backend: "whisper"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"
+          - build-type: 'vulkan'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'true'
+            tag-suffix: '-gpu-vulkan-whisper'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "whisper"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/arm64'
+            skip-drivers: 'true'
+            tag-latest: 'auto'
+            tag-suffix: '-nvidia-l4t-arm64-whisper'
+            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            runs-on: 'ubuntu-24.04-arm'
+            backend: "whisper"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"
+          - build-type: 'hipblas'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-hipblas-whisper'
+            base-image: "rocm/dev-ubuntu-22.04:6.1"
+            runs-on: 'ubuntu-latest'
+            skip-drivers: 'false'
+            backend: "whisper"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"
  llama-cpp-darwin:
    runs-on: macOS-14
    strategy:
@@ -16,7 +16,7 @@ jobs:
          - repository: "ggml-org/whisper.cpp"
            variable: "WHISPER_CPP_VERSION"
            branch: "master"
-            file: "Makefile"
+            file: "backend/go/whisper/Makefile"
          - repository: "PABannier/bark.cpp"
            variable: "BARKCPP_VERSION"
            branch: "main"
@@ -103,7 +103,7 @@ jobs:

          make -C backend/python/transformers

-          make backends/llama-cpp backends/piper backends/stablediffusion-ggml
+          make backends/llama-cpp backends/piper backends/whisper backends/stablediffusion-ggml
        env:
          CUDA_VERSION: 12-4
      - name: Test
@@ -168,7 +168,7 @@ jobs:
          PATH="$PATH:$HOME/go/bin" make protogen-go
      - name: Test
        run: |
-            PATH="$PATH:$HOME/go/bin" make backends/llama-cpp backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
+            PATH="$PATH:$HOME/go/bin" make backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
        uses: mxschmitt/action-tmate@v3.22