diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 5a0a6b32e..cd35837af 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -2,6 +2,7 @@ name: 'build backend container images' on: + pull_request: push: branches: - master @@ -13,1161 +14,1174 @@ concurrency: cancel-in-progress: true jobs: - backend-jobs: - uses: ./.github/workflows/backend_build.yml - with: - tag-latest: ${{ matrix.tag-latest }} - tag-suffix: ${{ matrix.tag-suffix }} - build-type: ${{ matrix.build-type }} - cuda-major-version: ${{ matrix.cuda-major-version }} - cuda-minor-version: ${{ matrix.cuda-minor-version }} - platforms: ${{ matrix.platforms }} - runs-on: ${{ matrix.runs-on }} - base-image: ${{ matrix.base-image }} - backend: ${{ matrix.backend }} - dockerfile: ${{ matrix.dockerfile }} - skip-drivers: ${{ matrix.skip-drivers }} - context: ${{ matrix.context }} - secrets: - dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} - dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} - quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} - quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} - strategy: - fail-fast: false - #max-parallel: ${{ github.event_name != 'pull_request' && 6 || 4 }} - matrix: - include: - # CUDA 11 builds - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-rerankers' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "rerankers" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-llama-cpp' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "llama-cpp" - dockerfile: "./backend/Dockerfile.llama-cpp" - context: "./" - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-transformers' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "transformers" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-diffusers' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "diffusers" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'l4t' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/arm64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-l4t-diffusers' - runs-on: 'ubuntu-24.04-arm' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" - skip-drivers: 'true' - backend: "diffusers" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: '' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-cpu-diffusers' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'true' - backend: "diffusers" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - # CUDA 11 additional backends - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-kokoro' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "kokoro" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "faster-whisper" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-coqui' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "coqui" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-bark' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "bark" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-chatterbox' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "chatterbox" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - # CUDA 12 builds - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-rerankers' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "rerankers" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "llama-cpp" - dockerfile: "./backend/Dockerfile.llama-cpp" - context: "./" - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-vllm' - runs-on: 'arc-runner-set' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "vllm" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-transformers' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "transformers" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-diffusers' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "diffusers" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - # CUDA 12 additional backends - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-kokoro' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "kokoro" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "faster-whisper" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-coqui' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "coqui" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-bark' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "bark" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-chatterbox' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "chatterbox" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - # hipblas builds - - build-type: 'hipblas' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-rocm-hipblas-rerankers' - runs-on: 'ubuntu-latest' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" - skip-drivers: 'false' - backend: "rerankers" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'hipblas' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-rocm-hipblas-llama-cpp' - runs-on: 'ubuntu-latest' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" - skip-drivers: 'false' - backend: "llama-cpp" - dockerfile: "./backend/Dockerfile.llama-cpp" - context: "./" - - build-type: 'hipblas' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-rocm-hipblas-vllm' - runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" - skip-drivers: 'false' - backend: "vllm" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'hipblas' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-rocm-hipblas-transformers' - runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" - skip-drivers: 'false' - backend: "transformers" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'hipblas' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-rocm-hipblas-diffusers' - runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" - skip-drivers: 'false' - backend: "diffusers" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - # ROCm additional backends - - build-type: 'hipblas' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-rocm-hipblas-kokoro' - runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" - skip-drivers: 'false' - backend: "kokoro" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'hipblas' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-rocm-hipblas-faster-whisper' - runs-on: 'ubuntu-latest' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" - skip-drivers: 'false' - backend: "faster-whisper" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'hipblas' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-rocm-hipblas-coqui' - runs-on: 'ubuntu-latest' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" - skip-drivers: 'false' - backend: "coqui" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'hipblas' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-rocm-hipblas-bark' - runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" - skip-drivers: 'false' - backend: "bark" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - # sycl builds - - build-type: 'intel' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-intel-rerankers' - runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - skip-drivers: 'false' - backend: "rerankers" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'sycl_f32' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-intel-sycl-f32-llama-cpp' - runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - skip-drivers: 'false' - backend: "llama-cpp" - dockerfile: "./backend/Dockerfile.llama-cpp" - context: "./" - - build-type: 'sycl_f16' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-intel-sycl-f16-llama-cpp' - runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - skip-drivers: 'false' - backend: "llama-cpp" - dockerfile: "./backend/Dockerfile.llama-cpp" - context: "./" - - build-type: 'intel' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-intel-vllm' - runs-on: 'arc-runner-set' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - skip-drivers: 'false' - backend: "vllm" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'intel' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-intel-transformers' - runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - skip-drivers: 'false' - backend: "transformers" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'intel' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-intel-diffusers' - runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - skip-drivers: 'false' - backend: "diffusers" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - # SYCL additional backends - - build-type: 'intel' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-intel-kokoro' - runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - skip-drivers: 'false' - backend: "kokoro" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'intel' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-intel-faster-whisper' - runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - skip-drivers: 'false' - backend: "faster-whisper" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'intel' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-intel-coqui' - runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - skip-drivers: 'false' - backend: "coqui" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'intel' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-intel-bark' - runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - skip-drivers: 'false' - backend: "bark" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - # piper - - build-type: '' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64,linux/arm64' - tag-latest: 'auto' - tag-suffix: '-piper' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "piper" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - # bark-cpp - - build-type: '' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-bark-cpp' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "bark-cpp" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - - build-type: '' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64,linux/arm64' - tag-latest: 'auto' - tag-suffix: '-cpu-llama-cpp' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "llama-cpp" - dockerfile: "./backend/Dockerfile.llama-cpp" - context: "./" - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/arm64' - skip-drivers: 'true' - tag-latest: 'auto' - tag-suffix: '-nvidia-l4t-arm64-llama-cpp' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" - runs-on: 'ubuntu-24.04-arm' - backend: "llama-cpp" - dockerfile: "./backend/Dockerfile.llama-cpp" - context: "./" - - build-type: 'vulkan' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-vulkan-llama-cpp' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "llama-cpp" - dockerfile: "./backend/Dockerfile.llama-cpp" - context: "./" - # Stablediffusion-ggml - - build-type: '' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-cpu-stablediffusion-ggml' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "stablediffusion-ggml" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "stablediffusion-ggml" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-stablediffusion-ggml' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "stablediffusion-ggml" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - - build-type: 'sycl_f32' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-intel-sycl-f32-stablediffusion-ggml' - runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - skip-drivers: 'false' - backend: "stablediffusion-ggml" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - - build-type: 'sycl_f16' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-intel-sycl-f16-stablediffusion-ggml' - runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - skip-drivers: 'false' - backend: "stablediffusion-ggml" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - - build-type: 'vulkan' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-vulkan-stablediffusion-ggml' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "stablediffusion-ggml" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/arm64' - skip-drivers: 'true' - tag-latest: 'auto' - tag-suffix: '-nvidia-l4t-arm64-stablediffusion-ggml' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" - runs-on: 'ubuntu-24.04-arm' - backend: "stablediffusion-ggml" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - # whisper - - build-type: '' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64,linux/arm64' - tag-latest: 'auto' - tag-suffix: '-cpu-whisper' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "whisper" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-whisper' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "whisper" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-whisper' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "whisper" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - - build-type: 'sycl_f32' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-intel-sycl-f32-whisper' - runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - skip-drivers: 'false' - backend: "whisper" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - - build-type: 'sycl_f16' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-intel-sycl-f16-whisper' - runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - skip-drivers: 'false' - backend: "whisper" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - - build-type: 'vulkan' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-vulkan-whisper' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "whisper" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/arm64' - skip-drivers: 'true' - tag-latest: 'auto' - tag-suffix: '-nvidia-l4t-arm64-whisper' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" - runs-on: 'ubuntu-24.04-arm' - backend: "whisper" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - - build-type: 'hipblas' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-hipblas-whisper' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" - runs-on: 'ubuntu-latest' - skip-drivers: 'false' - backend: "whisper" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - #silero-vad - - build-type: '' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64,linux/arm64' - tag-latest: 'auto' - tag-suffix: '-cpu-silero-vad' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "silero-vad" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - # local-store - - build-type: '' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64,linux/arm64' - tag-latest: 'auto' - tag-suffix: '-cpu-local-store' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "local-store" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - # huggingface - - build-type: '' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64,linux/arm64' - tag-latest: 'auto' - tag-suffix: '-huggingface' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "huggingface" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - # rfdetr - - build-type: '' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64,linux/arm64' - tag-latest: 'auto' - tag-suffix: '-cpu-rfdetr' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "rfdetr" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-rfdetr' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "rfdetr" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-rfdetr' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "rfdetr" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'intel' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-intel-rfdetr' - runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - skip-drivers: 'false' - backend: "rfdetr" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/arm64' - skip-drivers: 'true' - tag-latest: 'auto' - tag-suffix: '-nvidia-l4t-arm64-rfdetr' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" - runs-on: 'ubuntu-24.04-arm' - backend: "rfdetr" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - # exllama2 - - build-type: '' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-cpu-exllama2' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "exllama2" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-exllama2' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "exllama2" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-exllama2' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "exllama2" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'intel' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-intel-exllama2' - runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - skip-drivers: 'false' - backend: "exllama2" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - - build-type: 'hipblas' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64' - skip-drivers: 'true' - tag-latest: 'auto' - tag-suffix: '-gpu-hipblas-exllama2' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" - runs-on: 'ubuntu-latest' - backend: "exllama2" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - # runs out of space on the runner - # - build-type: 'hipblas' - # cuda-major-version: "" - # cuda-minor-version: "" - # platforms: 'linux/amd64' - # tag-latest: 'auto' - # tag-suffix: '-gpu-hipblas-rfdetr' - # base-image: "rocm/dev-ubuntu-22.04:6.4.3" - # runs-on: 'ubuntu-latest' - # skip-drivers: 'false' - # backend: "rfdetr" - # dockerfile: "./backend/Dockerfile.python" - # context: "./backend" - # kitten-tts - - build-type: '' - cuda-major-version: "" - cuda-minor-version: "" - platforms: 'linux/amd64,linux/arm64' - tag-latest: 'auto' - tag-suffix: '-kitten-tts' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "kitten-tts" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - diffusers-darwin: + # backend-jobs: + # uses: ./.github/workflows/backend_build.yml + # with: + # tag-latest: ${{ matrix.tag-latest }} + # tag-suffix: ${{ matrix.tag-suffix }} + # build-type: ${{ matrix.build-type }} + # cuda-major-version: ${{ matrix.cuda-major-version }} + # cuda-minor-version: ${{ matrix.cuda-minor-version }} + # platforms: ${{ matrix.platforms }} + # runs-on: ${{ matrix.runs-on }} + # base-image: ${{ matrix.base-image }} + # backend: ${{ matrix.backend }} + # dockerfile: ${{ matrix.dockerfile }} + # skip-drivers: ${{ matrix.skip-drivers }} + # context: ${{ matrix.context }} + # secrets: + # dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} + # dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} + # quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} + # quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} + # strategy: + # fail-fast: false + # #max-parallel: ${{ github.event_name != 'pull_request' && 6 || 4 }} + # matrix: + # include: + # # CUDA 11 builds + # - build-type: 'cublas' + # cuda-major-version: "11" + # cuda-minor-version: "7" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-11-rerankers' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "rerankers" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'cublas' + # cuda-major-version: "11" + # cuda-minor-version: "7" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-11-llama-cpp' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "llama-cpp" + # dockerfile: "./backend/Dockerfile.llama-cpp" + # context: "./" + # - build-type: 'cublas' + # cuda-major-version: "11" + # cuda-minor-version: "7" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-11-transformers' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "transformers" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'cublas' + # cuda-major-version: "11" + # cuda-minor-version: "7" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-11-diffusers' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "diffusers" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'l4t' + # cuda-major-version: "12" + # cuda-minor-version: "0" + # platforms: 'linux/arm64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-l4t-diffusers' + # runs-on: 'ubuntu-24.04-arm' + # base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + # skip-drivers: 'true' + # backend: "diffusers" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: '' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-cpu-diffusers' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'true' + # backend: "diffusers" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # # CUDA 11 additional backends + # - build-type: 'cublas' + # cuda-major-version: "11" + # cuda-minor-version: "7" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-11-kokoro' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "kokoro" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'cublas' + # cuda-major-version: "11" + # cuda-minor-version: "7" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "faster-whisper" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'cublas' + # cuda-major-version: "11" + # cuda-minor-version: "7" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-11-coqui' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "coqui" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'cublas' + # cuda-major-version: "11" + # cuda-minor-version: "7" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-11-bark' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "bark" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'cublas' + # cuda-major-version: "11" + # cuda-minor-version: "7" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-11-chatterbox' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "chatterbox" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # # CUDA 12 builds + # - build-type: 'cublas' + # cuda-major-version: "12" + # cuda-minor-version: "0" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-12-rerankers' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "rerankers" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'cublas' + # cuda-major-version: "12" + # cuda-minor-version: "0" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "llama-cpp" + # dockerfile: "./backend/Dockerfile.llama-cpp" + # context: "./" + # - build-type: 'cublas' + # cuda-major-version: "12" + # cuda-minor-version: "0" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-12-vllm' + # runs-on: 'arc-runner-set' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "vllm" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'cublas' + # cuda-major-version: "12" + # cuda-minor-version: "0" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-12-transformers' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "transformers" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'cublas' + # cuda-major-version: "12" + # cuda-minor-version: "0" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-12-diffusers' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "diffusers" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # # CUDA 12 additional backends + # - build-type: 'cublas' + # cuda-major-version: "12" + # cuda-minor-version: "0" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-12-kokoro' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "kokoro" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'cublas' + # cuda-major-version: "12" + # cuda-minor-version: "0" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "faster-whisper" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'cublas' + # cuda-major-version: "12" + # cuda-minor-version: "0" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-12-coqui' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "coqui" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'cublas' + # cuda-major-version: "12" + # cuda-minor-version: "0" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-12-bark' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "bark" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'cublas' + # cuda-major-version: "12" + # cuda-minor-version: "0" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-12-chatterbox' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "chatterbox" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # # hipblas builds + # - build-type: 'hipblas' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-rocm-hipblas-rerankers' + # runs-on: 'ubuntu-latest' + # base-image: "rocm/dev-ubuntu-22.04:6.4.3" + # skip-drivers: 'false' + # backend: "rerankers" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'hipblas' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-rocm-hipblas-llama-cpp' + # runs-on: 'ubuntu-latest' + # base-image: "rocm/dev-ubuntu-22.04:6.4.3" + # skip-drivers: 'false' + # backend: "llama-cpp" + # dockerfile: "./backend/Dockerfile.llama-cpp" + # context: "./" + # - build-type: 'hipblas' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-rocm-hipblas-vllm' + # runs-on: 'arc-runner-set' + # base-image: "rocm/dev-ubuntu-22.04:6.4.3" + # skip-drivers: 'false' + # backend: "vllm" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'hipblas' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-rocm-hipblas-transformers' + # runs-on: 'arc-runner-set' + # base-image: "rocm/dev-ubuntu-22.04:6.4.3" + # skip-drivers: 'false' + # backend: "transformers" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'hipblas' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-rocm-hipblas-diffusers' + # runs-on: 'arc-runner-set' + # base-image: "rocm/dev-ubuntu-22.04:6.4.3" + # skip-drivers: 'false' + # backend: "diffusers" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # # ROCm additional backends + # - build-type: 'hipblas' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-rocm-hipblas-kokoro' + # runs-on: 'arc-runner-set' + # base-image: "rocm/dev-ubuntu-22.04:6.4.3" + # skip-drivers: 'false' + # backend: "kokoro" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'hipblas' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-rocm-hipblas-faster-whisper' + # runs-on: 'ubuntu-latest' + # base-image: "rocm/dev-ubuntu-22.04:6.4.3" + # skip-drivers: 'false' + # backend: "faster-whisper" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'hipblas' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-rocm-hipblas-coqui' + # runs-on: 'ubuntu-latest' + # base-image: "rocm/dev-ubuntu-22.04:6.4.3" + # skip-drivers: 'false' + # backend: "coqui" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'hipblas' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-rocm-hipblas-bark' + # runs-on: 'arc-runner-set' + # base-image: "rocm/dev-ubuntu-22.04:6.4.3" + # skip-drivers: 'false' + # backend: "bark" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # # sycl builds + # - build-type: 'intel' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-intel-rerankers' + # runs-on: 'ubuntu-latest' + # base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + # skip-drivers: 'false' + # backend: "rerankers" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'sycl_f32' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-intel-sycl-f32-llama-cpp' + # runs-on: 'ubuntu-latest' + # base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + # skip-drivers: 'false' + # backend: "llama-cpp" + # dockerfile: "./backend/Dockerfile.llama-cpp" + # context: "./" + # - build-type: 'sycl_f16' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-intel-sycl-f16-llama-cpp' + # runs-on: 'ubuntu-latest' + # base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + # skip-drivers: 'false' + # backend: "llama-cpp" + # dockerfile: "./backend/Dockerfile.llama-cpp" + # context: "./" + # - build-type: 'intel' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-intel-vllm' + # runs-on: 'arc-runner-set' + # base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + # skip-drivers: 'false' + # backend: "vllm" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'intel' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-intel-transformers' + # runs-on: 'ubuntu-latest' + # base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + # skip-drivers: 'false' + # backend: "transformers" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'intel' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-intel-diffusers' + # runs-on: 'ubuntu-latest' + # base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + # skip-drivers: 'false' + # backend: "diffusers" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # # SYCL additional backends + # - build-type: 'intel' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-intel-kokoro' + # runs-on: 'ubuntu-latest' + # base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + # skip-drivers: 'false' + # backend: "kokoro" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'intel' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-intel-faster-whisper' + # runs-on: 'ubuntu-latest' + # base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + # skip-drivers: 'false' + # backend: "faster-whisper" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'intel' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-intel-coqui' + # runs-on: 'ubuntu-latest' + # base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + # skip-drivers: 'false' + # backend: "coqui" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'intel' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-intel-bark' + # runs-on: 'ubuntu-latest' + # base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + # skip-drivers: 'false' + # backend: "bark" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # # piper + # - build-type: '' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64,linux/arm64' + # tag-latest: 'auto' + # tag-suffix: '-piper' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "piper" + # dockerfile: "./backend/Dockerfile.golang" + # context: "./" + # # bark-cpp + # - build-type: '' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-bark-cpp' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "bark-cpp" + # dockerfile: "./backend/Dockerfile.golang" + # context: "./" + # - build-type: '' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64,linux/arm64' + # tag-latest: 'auto' + # tag-suffix: '-cpu-llama-cpp' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "llama-cpp" + # dockerfile: "./backend/Dockerfile.llama-cpp" + # context: "./" + # - build-type: 'cublas' + # cuda-major-version: "12" + # cuda-minor-version: "0" + # platforms: 'linux/arm64' + # skip-drivers: 'true' + # tag-latest: 'auto' + # tag-suffix: '-nvidia-l4t-arm64-llama-cpp' + # base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + # runs-on: 'ubuntu-24.04-arm' + # backend: "llama-cpp" + # dockerfile: "./backend/Dockerfile.llama-cpp" + # context: "./" + # - build-type: 'vulkan' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-vulkan-llama-cpp' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "llama-cpp" + # dockerfile: "./backend/Dockerfile.llama-cpp" + # context: "./" + # # Stablediffusion-ggml + # - build-type: '' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-cpu-stablediffusion-ggml' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "stablediffusion-ggml" + # dockerfile: "./backend/Dockerfile.golang" + # context: "./" + # - build-type: 'cublas' + # cuda-major-version: "12" + # cuda-minor-version: "0" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "stablediffusion-ggml" + # dockerfile: "./backend/Dockerfile.golang" + # context: "./" + # - build-type: 'cublas' + # cuda-major-version: "11" + # cuda-minor-version: "7" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-11-stablediffusion-ggml' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "stablediffusion-ggml" + # dockerfile: "./backend/Dockerfile.golang" + # context: "./" + # - build-type: 'sycl_f32' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-intel-sycl-f32-stablediffusion-ggml' + # runs-on: 'ubuntu-latest' + # base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + # skip-drivers: 'false' + # backend: "stablediffusion-ggml" + # dockerfile: "./backend/Dockerfile.golang" + # context: "./" + # - build-type: 'sycl_f16' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-intel-sycl-f16-stablediffusion-ggml' + # runs-on: 'ubuntu-latest' + # base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + # skip-drivers: 'false' + # backend: "stablediffusion-ggml" + # dockerfile: "./backend/Dockerfile.golang" + # context: "./" + # - build-type: 'vulkan' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-vulkan-stablediffusion-ggml' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "stablediffusion-ggml" + # dockerfile: "./backend/Dockerfile.golang" + # context: "./" + # - build-type: 'cublas' + # cuda-major-version: "12" + # cuda-minor-version: "0" + # platforms: 'linux/arm64' + # skip-drivers: 'true' + # tag-latest: 'auto' + # tag-suffix: '-nvidia-l4t-arm64-stablediffusion-ggml' + # base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + # runs-on: 'ubuntu-24.04-arm' + # backend: "stablediffusion-ggml" + # dockerfile: "./backend/Dockerfile.golang" + # context: "./" + # # whisper + # - build-type: '' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64,linux/arm64' + # tag-latest: 'auto' + # tag-suffix: '-cpu-whisper' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "whisper" + # dockerfile: "./backend/Dockerfile.golang" + # context: "./" + # - build-type: 'cublas' + # cuda-major-version: "12" + # cuda-minor-version: "0" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-12-whisper' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "whisper" + # dockerfile: "./backend/Dockerfile.golang" + # context: "./" + # - build-type: 'cublas' + # cuda-major-version: "11" + # cuda-minor-version: "7" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-11-whisper' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "whisper" + # dockerfile: "./backend/Dockerfile.golang" + # context: "./" + # - build-type: 'sycl_f32' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-intel-sycl-f32-whisper' + # runs-on: 'ubuntu-latest' + # base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + # skip-drivers: 'false' + # backend: "whisper" + # dockerfile: "./backend/Dockerfile.golang" + # context: "./" + # - build-type: 'sycl_f16' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-intel-sycl-f16-whisper' + # runs-on: 'ubuntu-latest' + # base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + # skip-drivers: 'false' + # backend: "whisper" + # dockerfile: "./backend/Dockerfile.golang" + # context: "./" + # - build-type: 'vulkan' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-vulkan-whisper' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "whisper" + # dockerfile: "./backend/Dockerfile.golang" + # context: "./" + # - build-type: 'cublas' + # cuda-major-version: "12" + # cuda-minor-version: "0" + # platforms: 'linux/arm64' + # skip-drivers: 'true' + # tag-latest: 'auto' + # tag-suffix: '-nvidia-l4t-arm64-whisper' + # base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + # runs-on: 'ubuntu-24.04-arm' + # backend: "whisper" + # dockerfile: "./backend/Dockerfile.golang" + # context: "./" + # - build-type: 'hipblas' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-hipblas-whisper' + # base-image: "rocm/dev-ubuntu-22.04:6.4.3" + # runs-on: 'ubuntu-latest' + # skip-drivers: 'false' + # backend: "whisper" + # dockerfile: "./backend/Dockerfile.golang" + # context: "./" + # #silero-vad + # - build-type: '' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64,linux/arm64' + # tag-latest: 'auto' + # tag-suffix: '-cpu-silero-vad' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "silero-vad" + # dockerfile: "./backend/Dockerfile.golang" + # context: "./" + # # local-store + # - build-type: '' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64,linux/arm64' + # tag-latest: 'auto' + # tag-suffix: '-cpu-local-store' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "local-store" + # dockerfile: "./backend/Dockerfile.golang" + # context: "./" + # # huggingface + # - build-type: '' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64,linux/arm64' + # tag-latest: 'auto' + # tag-suffix: '-huggingface' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "huggingface" + # dockerfile: "./backend/Dockerfile.golang" + # context: "./" + # # rfdetr + # - build-type: '' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64,linux/arm64' + # tag-latest: 'auto' + # tag-suffix: '-cpu-rfdetr' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "rfdetr" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'cublas' + # cuda-major-version: "12" + # cuda-minor-version: "0" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-12-rfdetr' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "rfdetr" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'cublas' + # cuda-major-version: "11" + # cuda-minor-version: "7" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-11-rfdetr' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "rfdetr" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'intel' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-intel-rfdetr' + # runs-on: 'ubuntu-latest' + # base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + # skip-drivers: 'false' + # backend: "rfdetr" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'cublas' + # cuda-major-version: "12" + # cuda-minor-version: "0" + # platforms: 'linux/arm64' + # skip-drivers: 'true' + # tag-latest: 'auto' + # tag-suffix: '-nvidia-l4t-arm64-rfdetr' + # base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + # runs-on: 'ubuntu-24.04-arm' + # backend: "rfdetr" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # # exllama2 + # - build-type: '' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-cpu-exllama2' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "exllama2" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'cublas' + # cuda-major-version: "12" + # cuda-minor-version: "0" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-12-exllama2' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "exllama2" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'cublas' + # cuda-major-version: "11" + # cuda-minor-version: "7" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-nvidia-cuda-11-exllama2' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "exllama2" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'intel' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # tag-latest: 'auto' + # tag-suffix: '-gpu-intel-exllama2' + # runs-on: 'ubuntu-latest' + # base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + # skip-drivers: 'false' + # backend: "exllama2" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # - build-type: 'hipblas' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64' + # skip-drivers: 'true' + # tag-latest: 'auto' + # tag-suffix: '-gpu-hipblas-exllama2' + # base-image: "rocm/dev-ubuntu-22.04:6.4.3" + # runs-on: 'ubuntu-latest' + # backend: "exllama2" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # # runs out of space on the runner + # # - build-type: 'hipblas' + # # cuda-major-version: "" + # # cuda-minor-version: "" + # # platforms: 'linux/amd64' + # # tag-latest: 'auto' + # # tag-suffix: '-gpu-hipblas-rfdetr' + # # base-image: "rocm/dev-ubuntu-22.04:6.4.3" + # # runs-on: 'ubuntu-latest' + # # skip-drivers: 'false' + # # backend: "rfdetr" + # # dockerfile: "./backend/Dockerfile.python" + # # context: "./backend" + # # kitten-tts + # - build-type: '' + # cuda-major-version: "" + # cuda-minor-version: "" + # platforms: 'linux/amd64,linux/arm64' + # tag-latest: 'auto' + # tag-suffix: '-kitten-tts' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # skip-drivers: 'false' + # backend: "kitten-tts" + # dockerfile: "./backend/Dockerfile.python" + # context: "./backend" + # diffusers-darwin: + # uses: ./.github/workflows/backend_build_darwin.yml + # with: + # backend: "diffusers" + # build-type: "mps" + # go-version: "1.24.x" + # tag-suffix: "-metal-darwin-arm64-diffusers" + # use-pip: true + # runs-on: "macOS-14" + # secrets: + # dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} + # dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} + # quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} + # quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} + # mlx-darwin: + # uses: ./.github/workflows/backend_build_darwin.yml + # with: + # backend: "mlx" + # build-type: "mps" + # go-version: "1.24.x" + # tag-suffix: "-metal-darwin-arm64-mlx" + # runs-on: "macOS-14" + # secrets: + # dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} + # dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} + # quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} + # quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} + # mlx-vlm-darwin: + # uses: ./.github/workflows/backend_build_darwin.yml + # with: + # backend: "mlx-vlm" + # build-type: "mps" + # go-version: "1.24.x" + # tag-suffix: "-metal-darwin-arm64-mlx-vlm" + # runs-on: "macOS-14" + # secrets: + # dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} + # dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} + # quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} + # quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} + mlx-audio-darwin: uses: ./.github/workflows/backend_build_darwin.yml with: - backend: "diffusers" + backend: "mlx-audio" build-type: "mps" go-version: "1.24.x" - tag-suffix: "-metal-darwin-arm64-diffusers" - use-pip: true + tag-suffix: "-metal-darwin-arm64-mlx-audio" runs-on: "macOS-14" secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} - mlx-darwin: - uses: ./.github/workflows/backend_build_darwin.yml - with: - backend: "mlx" - build-type: "mps" - go-version: "1.24.x" - tag-suffix: "-metal-darwin-arm64-mlx" - runs-on: "macOS-14" - secrets: - dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} - dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} - quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} - quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} - mlx-vlm-darwin: - uses: ./.github/workflows/backend_build_darwin.yml - with: - backend: "mlx-vlm" - build-type: "mps" - go-version: "1.24.x" - tag-suffix: "-metal-darwin-arm64-mlx-vlm" - runs-on: "macOS-14" - secrets: - dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} - dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} - quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} - quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} - llama-cpp-darwin: - runs-on: macOS-14 - strategy: - matrix: - go-version: ['1.21.x'] - steps: - - name: Clone - uses: actions/checkout@v5 - with: - submodules: true - - name: Setup Go ${{ matrix.go-version }} - uses: actions/setup-go@v5 - with: - go-version: ${{ matrix.go-version }} - cache: false - # You can test your matrix by printing the current Go version - - name: Display Go version - run: go version - - name: Dependencies - run: | - brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm - - name: Build llama-cpp-darwin - run: | - make protogen-go - make backends/llama-cpp-darwin - - name: Upload llama-cpp.tar - uses: actions/upload-artifact@v4 - with: - name: llama-cpp-tar - path: backend-images/llama-cpp.tar - llama-cpp-darwin-publish: - needs: llama-cpp-darwin - if: github.event_name != 'pull_request' - runs-on: ubuntu-latest - steps: - - name: Download llama-cpp.tar - uses: actions/download-artifact@v5 - with: - name: llama-cpp-tar - path: . - - name: Install crane - run: | - curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz | tar -xz - sudo mv crane /usr/local/bin/ - - name: Log in to DockerHub - run: | - echo "${{ secrets.DOCKERHUB_PASSWORD }}" | crane auth login docker.io -u "${{ secrets.DOCKERHUB_USERNAME }}" --password-stdin - - name: Log in to quay.io - run: | - echo "${{ secrets.LOCALAI_REGISTRY_PASSWORD }}" | crane auth login quay.io -u "${{ secrets.LOCALAI_REGISTRY_USERNAME }}" --password-stdin - - name: Docker meta - id: meta - uses: docker/metadata-action@v5 - with: - images: | - localai/localai-backends - tags: | - type=ref,event=branch - type=semver,pattern={{raw}} - type=sha - flavor: | - latest=auto - suffix=-metal-darwin-arm64-llama-cpp,onlatest=true - - name: Docker meta - id: quaymeta - uses: docker/metadata-action@v5 - with: - images: | - quay.io/go-skynet/local-ai-backends - tags: | - type=ref,event=branch - type=semver,pattern={{raw}} - type=sha - flavor: | - latest=auto - suffix=-metal-darwin-arm64-llama-cpp,onlatest=true - - name: Push Docker image (DockerHub) - run: | - for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr ',' '\n'); do - crane push llama-cpp.tar $tag - done - - name: Push Docker image (Quay) - run: | - for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do - crane push llama-cpp.tar $tag - done - llama-cpp-darwin-x86: - runs-on: macos-13 - strategy: - matrix: - go-version: ['1.21.x'] - steps: - - name: Clone - uses: actions/checkout@v5 - with: - submodules: true - - name: Setup Go ${{ matrix.go-version }} - uses: actions/setup-go@v5 - with: - go-version: ${{ matrix.go-version }} - cache: false - # You can test your matrix by printing the current Go version - - name: Display Go version - run: go version - - name: Dependencies - run: | - brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm - - name: Build llama-cpp-darwin - run: | - make protogen-go - make build - export PLATFORMARCH=darwin/amd64 - make backends/llama-cpp-darwin - - name: Upload llama-cpp.tar - uses: actions/upload-artifact@v4 - with: - name: llama-cpp-tar-x86 - path: backend-images/llama-cpp.tar - llama-cpp-darwin-x86-publish: - if: github.event_name != 'pull_request' - needs: llama-cpp-darwin-x86 - runs-on: ubuntu-latest - steps: - - name: Download llama-cpp.tar - uses: actions/download-artifact@v5 - with: - name: llama-cpp-tar-x86 - path: . - - name: Install crane - run: | - curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz | tar -xz - sudo mv crane /usr/local/bin/ - - name: Log in to DockerHub - run: | - echo "${{ secrets.DOCKERHUB_PASSWORD }}" | crane auth login docker.io -u "${{ secrets.DOCKERHUB_USERNAME }}" --password-stdin - - name: Log in to quay.io - run: | - echo "${{ secrets.LOCALAI_REGISTRY_PASSWORD }}" | crane auth login quay.io -u "${{ secrets.LOCALAI_REGISTRY_USERNAME }}" --password-stdin - - name: Docker meta - id: meta - uses: docker/metadata-action@v5 - with: - images: | - localai/localai-backends - tags: | - type=ref,event=branch - type=semver,pattern={{raw}} - type=sha - flavor: | - latest=auto - suffix=-darwin-x86-llama-cpp,onlatest=true - - name: Docker meta - id: quaymeta - uses: docker/metadata-action@v5 - with: - images: | - quay.io/go-skynet/local-ai-backends - tags: | - type=ref,event=branch - type=semver,pattern={{raw}} - type=sha - flavor: | - latest=auto - suffix=-darwin-x86-llama-cpp,onlatest=true - - name: Push Docker image (DockerHub) - run: | - for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr ',' '\n'); do - crane push llama-cpp.tar $tag - done - - name: Push Docker image (Quay) - run: | - for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do - crane push llama-cpp.tar $tag - done + # llama-cpp-darwin: + # runs-on: macOS-14 + # strategy: + # matrix: + # go-version: ['1.21.x'] + # steps: + # - name: Clone + # uses: actions/checkout@v5 + # with: + # submodules: true + # - name: Setup Go ${{ matrix.go-version }} + # uses: actions/setup-go@v5 + # with: + # go-version: ${{ matrix.go-version }} + # cache: false + # # You can test your matrix by printing the current Go version + # - name: Display Go version + # run: go version + # - name: Dependencies + # run: | + # brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm + # - name: Build llama-cpp-darwin + # run: | + # make protogen-go + # make backends/llama-cpp-darwin + # - name: Upload llama-cpp.tar + # uses: actions/upload-artifact@v4 + # with: + # name: llama-cpp-tar + # path: backend-images/llama-cpp.tar + # llama-cpp-darwin-publish: + # needs: llama-cpp-darwin + # if: github.event_name != 'pull_request' + # runs-on: ubuntu-latest + # steps: + # - name: Download llama-cpp.tar + # uses: actions/download-artifact@v5 + # with: + # name: llama-cpp-tar + # path: . + # - name: Install crane + # run: | + # curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz | tar -xz + # sudo mv crane /usr/local/bin/ + # - name: Log in to DockerHub + # run: | + # echo "${{ secrets.DOCKERHUB_PASSWORD }}" | crane auth login docker.io -u "${{ secrets.DOCKERHUB_USERNAME }}" --password-stdin + # - name: Log in to quay.io + # run: | + # echo "${{ secrets.LOCALAI_REGISTRY_PASSWORD }}" | crane auth login quay.io -u "${{ secrets.LOCALAI_REGISTRY_USERNAME }}" --password-stdin + # - name: Docker meta + # id: meta + # uses: docker/metadata-action@v5 + # with: + # images: | + # localai/localai-backends + # tags: | + # type=ref,event=branch + # type=semver,pattern={{raw}} + # type=sha + # flavor: | + # latest=auto + # suffix=-metal-darwin-arm64-llama-cpp,onlatest=true + # - name: Docker meta + # id: quaymeta + # uses: docker/metadata-action@v5 + # with: + # images: | + # quay.io/go-skynet/local-ai-backends + # tags: | + # type=ref,event=branch + # type=semver,pattern={{raw}} + # type=sha + # flavor: | + # latest=auto + # suffix=-metal-darwin-arm64-llama-cpp,onlatest=true + # - name: Push Docker image (DockerHub) + # run: | + # for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr ',' '\n'); do + # crane push llama-cpp.tar $tag + # done + # - name: Push Docker image (Quay) + # run: | + # for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do + # crane push llama-cpp.tar $tag + # done + # llama-cpp-darwin-x86: + # runs-on: macos-13 + # strategy: + # matrix: + # go-version: ['1.21.x'] + # steps: + # - name: Clone + # uses: actions/checkout@v5 + # with: + # submodules: true + # - name: Setup Go ${{ matrix.go-version }} + # uses: actions/setup-go@v5 + # with: + # go-version: ${{ matrix.go-version }} + # cache: false + # # You can test your matrix by printing the current Go version + # - name: Display Go version + # run: go version + # - name: Dependencies + # run: | + # brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm + # - name: Build llama-cpp-darwin + # run: | + # make protogen-go + # make build + # export PLATFORMARCH=darwin/amd64 + # make backends/llama-cpp-darwin + # - name: Upload llama-cpp.tar + # uses: actions/upload-artifact@v4 + # with: + # name: llama-cpp-tar-x86 + # path: backend-images/llama-cpp.tar + # llama-cpp-darwin-x86-publish: + # if: github.event_name != 'pull_request' + # needs: llama-cpp-darwin-x86 + # runs-on: ubuntu-latest + # steps: + # - name: Download llama-cpp.tar + # uses: actions/download-artifact@v5 + # with: + # name: llama-cpp-tar-x86 + # path: . + # - name: Install crane + # run: | + # curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz | tar -xz + # sudo mv crane /usr/local/bin/ + # - name: Log in to DockerHub + # run: | + # echo "${{ secrets.DOCKERHUB_PASSWORD }}" | crane auth login docker.io -u "${{ secrets.DOCKERHUB_USERNAME }}" --password-stdin + # - name: Log in to quay.io + # run: | + # echo "${{ secrets.LOCALAI_REGISTRY_PASSWORD }}" | crane auth login quay.io -u "${{ secrets.LOCALAI_REGISTRY_USERNAME }}" --password-stdin + # - name: Docker meta + # id: meta + # uses: docker/metadata-action@v5 + # with: + # images: | + # localai/localai-backends + # tags: | + # type=ref,event=branch + # type=semver,pattern={{raw}} + # type=sha + # flavor: | + # latest=auto + # suffix=-darwin-x86-llama-cpp,onlatest=true + # - name: Docker meta + # id: quaymeta + # uses: docker/metadata-action@v5 + # with: + # images: | + # quay.io/go-skynet/local-ai-backends + # tags: | + # type=ref,event=branch + # type=semver,pattern={{raw}} + # type=sha + # flavor: | + # latest=auto + # suffix=-darwin-x86-llama-cpp,onlatest=true + # - name: Push Docker image (DockerHub) + # run: | + # for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr ',' '\n'); do + # crane push llama-cpp.tar $tag + # done + # - name: Push Docker image (Quay) + # run: | + # for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do + # crane push llama-cpp.tar $tag + # done diff --git a/Makefile b/Makefile index a36bde58b..8657acdfa 100644 --- a/Makefile +++ b/Makefile @@ -388,6 +388,10 @@ backends/mlx-vlm: BACKEND=mlx-vlm $(MAKE) build-darwin-python-backend ./local-ai backends install "ocifile://$(abspath ./backend-images/mlx-vlm.tar)" +backends/mlx-audio: + BACKEND=mlx-audio $(MAKE) build-darwin-python-backend + ./local-ai backends install "ocifile://$(abspath ./backend-images/mlx-audio.tar)" + backend-images: mkdir -p backend-images diff --git a/backend/index.yaml b/backend/index.yaml index 960cf3aec..cc32ac88e 100644 --- a/backend/index.yaml +++ b/backend/index.yaml @@ -159,6 +159,23 @@ - vision-language - LLM - MLX +- &mlx-audio + name: "mlx-audio" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-mlx-audio" + icon: https://avatars.githubusercontent.com/u/102832242?s=200&v=4 + urls: + - https://github.com/Blaizzy/mlx-audio + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-mlx-audio + license: MIT + description: | + Run Audio Models with MLX + tags: + - audio-to-text + - audio-generation + - text-to-audio + - LLM + - MLX - &rerankers name: "rerankers" alias: "rerankers" @@ -415,6 +432,11 @@ uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-mlx-vlm" mirrors: - localai/localai-backends:master-metal-darwin-arm64-mlx-vlm +- !!merge <<: *mlx-audio + name: "mlx-audio-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-mlx-audio" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-mlx-audio - !!merge <<: *kitten-tts name: "kitten-tts-development" uri: "quay.io/go-skynet/local-ai-backends:master-kitten-tts" diff --git a/backend/python/common/libbackend.sh b/backend/python/common/libbackend.sh index f7536b8df..3c6c3909b 100644 --- a/backend/python/common/libbackend.sh +++ b/backend/python/common/libbackend.sh @@ -384,6 +384,11 @@ function installRequirements() { requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}-after.txt") fi + # This is needed to build wheels that e.g. depends on Python.h + if [ "x${PORTABLE_PYTHON}" == "xtrue" ]; then + export C_INCLUDE_PATH="${C_INCLUDE_PATH:-}:$(_portable_dir)/include/python${PYTHON_VERSION}" + fi + for reqFile in ${requirementFiles[@]}; do if [ -f "${reqFile}" ]; then echo "starting requirements install for ${reqFile}" diff --git a/backend/python/mlx-audio/Makefile b/backend/python/mlx-audio/Makefile new file mode 100644 index 000000000..bb7aabe3a --- /dev/null +++ b/backend/python/mlx-audio/Makefile @@ -0,0 +1,23 @@ +.PHONY: mlx-audio +mlx-audio: + bash install.sh + +.PHONY: run +run: mlx-audio + @echo "Running mlx-audio..." + bash run.sh + @echo "mlx run." + +.PHONY: test +test: mlx-audio + @echo "Testing mlx-audio..." + bash test.sh + @echo "mlx tested." + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +.PHONY: clean +clean: protogen-clean + rm -rf venv __pycache__ \ No newline at end of file diff --git a/backend/python/mlx-audio/backend.py b/backend/python/mlx-audio/backend.py new file mode 100644 index 000000000..a098b8872 --- /dev/null +++ b/backend/python/mlx-audio/backend.py @@ -0,0 +1,466 @@ +#!/usr/bin/env python3 +import asyncio +from concurrent import futures +import argparse +import signal +import sys +import os +import shutil +import glob +from typing import List +import time +import tempfile + +import backend_pb2 +import backend_pb2_grpc + +import grpc +from mlx_audio.tts.utils import load_model +import soundfile as sf +import numpy as np +import uuid + +_ONE_DAY_IN_SECONDS = 60 * 60 * 24 + +# If MAX_WORKERS are specified in the environment use it, otherwise default to 1 +MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) + +# Implement the BackendServicer class with the service methods +class BackendServicer(backend_pb2_grpc.BackendServicer): + """ + A gRPC servicer that implements the Backend service defined in backend.proto. + This backend provides TTS (Text-to-Speech) functionality using MLX-Audio. + """ + + def _is_float(self, s): + """Check if a string can be converted to float.""" + try: + float(s) + return True + except ValueError: + return False + + def _is_int(self, s): + """Check if a string can be converted to int.""" + try: + int(s) + return True + except ValueError: + return False + + def Health(self, request, context): + """ + Returns a health check message. + + Args: + request: The health check request. + context: The gRPC context. + + Returns: + backend_pb2.Reply: The health check reply. + """ + return backend_pb2.Reply(message=bytes("OK", 'utf-8')) + + async def LoadModel(self, request, context): + """ + Loads a TTS model using MLX-Audio. + + Args: + request: The load model request. + context: The gRPC context. + + Returns: + backend_pb2.Result: The load model result. + """ + try: + print(f"Loading MLX-Audio TTS model: {request.Model}", file=sys.stderr) + print(f"Request: {request}", file=sys.stderr) + + # Parse options like in the kokoro backend + options = request.Options + self.options = {} + + # The options are a list of strings in this form optname:optvalue + # We store all the options in a dict for later use + for opt in options: + if ":" not in opt: + continue + key, value = opt.split(":", 1) # Split only on first colon to handle values with colons + + # Convert numeric values to appropriate types + if self._is_float(value): + value = float(value) + elif self._is_int(value): + value = int(value) + elif value.lower() in ["true", "false"]: + value = value.lower() == "true" + + self.options[key] = value + + print(f"Options: {self.options}", file=sys.stderr) + + # Load the model using MLX-Audio's load_model function + try: + self.tts_model = load_model(request.Model) + self.model_path = request.Model + print(f"TTS model loaded successfully from {request.Model}", file=sys.stderr) + except Exception as model_err: + print(f"Error loading TTS model: {model_err}", file=sys.stderr) + return backend_pb2.Result(success=False, message=f"Failed to load model: {model_err}") + + except Exception as err: + print(f"Error loading MLX-Audio TTS model {err=}, {type(err)=}", file=sys.stderr) + return backend_pb2.Result(success=False, message=f"Error loading MLX-Audio TTS model: {err}") + + print("MLX-Audio TTS model loaded successfully", file=sys.stderr) + return backend_pb2.Result(message="MLX-Audio TTS model loaded successfully", success=True) + + def TTS(self, request, context): + """ + Generates TTS audio from text using MLX-Audio. + + Args: + request: A TTSRequest object containing text, model, destination, voice, and language. + context: A grpc.ServicerContext object that provides information about the RPC. + + Returns: + A Result object indicating success or failure. + """ + try: + # Check if model is loaded + if not hasattr(self, 'tts_model') or self.tts_model is None: + return backend_pb2.Result(success=False, message="TTS model not loaded. Please call LoadModel first.") + + print(f"Generating TTS with MLX-Audio - text: {request.text[:50]}..., voice: {request.voice}, language: {request.language}", file=sys.stderr) + + # Handle speed parameter based on model type + speed_value = self._handle_speed_parameter(request, self.model_path) + + # Map language names to codes if needed + lang_code = self._map_language_code(request.language, request.voice) + + # Prepare generation parameters + gen_params = { + "text": request.text, + "speed": speed_value, + "verbose": False, + } + + # Add model-specific parameters + if request.voice and request.voice.strip(): + gen_params["voice"] = request.voice + + # Check if model supports language codes (primarily Kokoro) + if "kokoro" in self.model_path.lower(): + gen_params["lang_code"] = lang_code + + # Add pitch and gender for Spark models + if "spark" in self.model_path.lower(): + gen_params["pitch"] = 1.0 # Default to moderate + gen_params["gender"] = "female" # Default to female + + print(f"Generation parameters: {gen_params}", file=sys.stderr) + + # Generate audio using the loaded model + try: + results = self.tts_model.generate(**gen_params) + except Exception as gen_err: + print(f"Error during TTS generation: {gen_err}", file=sys.stderr) + return backend_pb2.Result(success=False, message=f"TTS generation failed: {gen_err}") + + # Process the generated audio segments + audio_arrays = [] + for segment in results: + audio_arrays.append(segment.audio) + + # If no segments, return error + if not audio_arrays: + print("No audio segments generated", file=sys.stderr) + return backend_pb2.Result(success=False, message="No audio generated") + + # Concatenate all segments + cat_audio = np.concatenate(audio_arrays, axis=0) + + # Generate output filename and path + if request.dst: + output_path = request.dst + else: + unique_id = str(uuid.uuid4()) + filename = f"tts_{unique_id}.wav" + output_path = filename + + # Write the audio as a WAV + try: + sf.write(output_path, cat_audio, 24000) + print(f"Successfully wrote audio file to {output_path}", file=sys.stderr) + + # Verify the file exists and has content + if not os.path.exists(output_path): + print(f"File was not created at {output_path}", file=sys.stderr) + return backend_pb2.Result(success=False, message="Failed to create audio file") + + file_size = os.path.getsize(output_path) + if file_size == 0: + print("File was created but is empty", file=sys.stderr) + return backend_pb2.Result(success=False, message="Generated audio file is empty") + + print(f"Audio file size: {file_size} bytes", file=sys.stderr) + + except Exception as write_err: + print(f"Error writing audio file: {write_err}", file=sys.stderr) + return backend_pb2.Result(success=False, message=f"Failed to save audio: {write_err}") + + return backend_pb2.Result(success=True, message=f"TTS audio generated successfully: {output_path}") + + except Exception as e: + print(f"Error in MLX-Audio TTS: {e}", file=sys.stderr) + return backend_pb2.Result(success=False, message=f"TTS generation failed: {str(e)}") + + async def Predict(self, request, context): + """ + Generates TTS audio based on the given prompt using MLX-Audio TTS. + This is a fallback method for compatibility with the Predict endpoint. + + Args: + request: The predict request. + context: The gRPC context. + + Returns: + backend_pb2.Reply: The predict result. + """ + try: + # Check if model is loaded + if not hasattr(self, 'tts_model') or self.tts_model is None: + context.set_code(grpc.StatusCode.FAILED_PRECONDITION) + context.set_details("TTS model not loaded. Please call LoadModel first.") + return backend_pb2.Reply(message=bytes("", encoding='utf-8')) + + # For TTS, we expect the prompt to contain the text to synthesize + if not request.Prompt: + context.set_code(grpc.StatusCode.INVALID_ARGUMENT) + context.set_details("Prompt is required for TTS generation") + return backend_pb2.Reply(message=bytes("", encoding='utf-8')) + + # Handle speed parameter based on model type + speed_value = self._handle_speed_parameter(request, self.model_path) + + # Map language names to codes if needed + lang_code = self._map_language_code(None, None) # Use defaults for Predict + + # Prepare generation parameters + gen_params = { + "text": request.Prompt, + "speed": speed_value, + "verbose": False, + } + + # Add model-specific parameters + if hasattr(self, 'options') and 'voice' in self.options: + gen_params["voice"] = self.options['voice'] + + # Check if model supports language codes (primarily Kokoro) + if "kokoro" in self.model_path.lower(): + gen_params["lang_code"] = lang_code + + print(f"Generating TTS with MLX-Audio - text: {request.Prompt[:50]}..., params: {gen_params}", file=sys.stderr) + + # Generate audio using the loaded model + try: + results = self.tts_model.generate(**gen_params) + except Exception as gen_err: + print(f"Error during TTS generation: {gen_err}", file=sys.stderr) + context.set_code(grpc.StatusCode.INTERNAL) + context.set_details(f"TTS generation failed: {gen_err}") + return backend_pb2.Reply(message=bytes("", encoding='utf-8')) + + # Process the generated audio segments + audio_arrays = [] + for segment in results: + audio_arrays.append(segment.audio) + + # If no segments, return error + if not audio_arrays: + print("No audio segments generated", file=sys.stderr) + return backend_pb2.Reply(message=bytes("No audio generated", encoding='utf-8')) + + # Concatenate all segments + cat_audio = np.concatenate(audio_arrays, axis=0) + duration = len(cat_audio) / 24000 # Assuming 24kHz sample rate + + # Return success message with audio information + response = f"TTS audio generated successfully. Duration: {duration:.2f}s, Sample rate: 24000Hz" + return backend_pb2.Reply(message=bytes(response, encoding='utf-8')) + + except Exception as e: + print(f"Error in MLX-Audio TTS Predict: {e}", file=sys.stderr) + context.set_code(grpc.StatusCode.INTERNAL) + context.set_details(f"TTS generation failed: {str(e)}") + return backend_pb2.Reply(message=bytes("", encoding='utf-8')) + + def _handle_speed_parameter(self, request, model_path): + """ + Handle speed parameter based on model type. + + Args: + request: The TTSRequest object. + model_path: The model path to determine model type. + + Returns: + float: The processed speed value. + """ + # Get speed from options if available + speed = 1.0 + if hasattr(self, 'options') and 'speed' in self.options: + speed = self.options['speed'] + + # Handle speed parameter based on model type + if "spark" in model_path.lower(): + # Spark actually expects float values that map to speed descriptions + speed_map = { + "very_low": 0.0, + "low": 0.5, + "moderate": 1.0, + "high": 1.5, + "very_high": 2.0, + } + if isinstance(speed, str) and speed in speed_map: + speed_value = speed_map[speed] + else: + # Try to use as float, default to 1.0 (moderate) if invalid + try: + speed_value = float(speed) + if speed_value not in [0.0, 0.5, 1.0, 1.5, 2.0]: + speed_value = 1.0 # Default to moderate + except: + speed_value = 1.0 # Default to moderate + else: + # Other models use float speed values + try: + speed_value = float(speed) + if speed_value < 0.5 or speed_value > 2.0: + speed_value = 1.0 # Default to 1.0 if out of range + except ValueError: + speed_value = 1.0 # Default to 1.0 if invalid + + return speed_value + + def _map_language_code(self, language, voice): + """ + Map language names to codes if needed. + + Args: + language: The language parameter from the request. + voice: The voice parameter from the request. + + Returns: + str: The language code. + """ + if not language: + # Default to voice[0] if not found + return voice[0] if voice else "a" + + # Map language names to codes if needed + language_map = { + "american_english": "a", + "british_english": "b", + "spanish": "e", + "french": "f", + "hindi": "h", + "italian": "i", + "portuguese": "p", + "japanese": "j", + "mandarin_chinese": "z", + # Also accept direct language codes + "a": "a", "b": "b", "e": "e", "f": "f", "h": "h", "i": "i", "p": "p", "j": "j", "z": "z", + } + + return language_map.get(language.lower(), language) + + def _build_generation_params(self, request, default_speed=1.0): + """ + Build generation parameters from request attributes and options for MLX-Audio TTS. + + Args: + request: The gRPC request. + default_speed: Default speed if not specified. + + Returns: + dict: Generation parameters for MLX-Audio + """ + # Initialize generation parameters for MLX-Audio TTS + generation_params = { + 'speed': default_speed, + 'voice': 'af_heart', # Default voice + 'lang_code': 'a', # Default language code + } + + # Extract parameters from request attributes + if hasattr(request, 'Temperature') and request.Temperature > 0: + # Temperature could be mapped to speed variation + generation_params['speed'] = 1.0 + (request.Temperature - 0.5) * 0.5 + + # Override with options if available + if hasattr(self, 'options'): + # Speed from options + if 'speed' in self.options: + generation_params['speed'] = self.options['speed'] + + # Voice from options + if 'voice' in self.options: + generation_params['voice'] = self.options['voice'] + + # Language code from options + if 'lang_code' in self.options: + generation_params['lang_code'] = self.options['lang_code'] + + # Model-specific parameters + param_option_mapping = { + 'temp': 'speed', + 'temperature': 'speed', + 'top_p': 'speed', # Map top_p to speed variation + } + + for option_key, param_key in param_option_mapping.items(): + if option_key in self.options: + if param_key == 'speed': + # Ensure speed is within reasonable bounds + speed_val = float(self.options[option_key]) + if 0.5 <= speed_val <= 2.0: + generation_params[param_key] = speed_val + + return generation_params + +async def serve(address): + # Start asyncio gRPC server + server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS), + options=[ + ('grpc.max_message_length', 50 * 1024 * 1024), # 50MB + ('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB + ('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB + ]) + # Add the servicer to the server + backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) + # Bind the server to the address + server.add_insecure_port(address) + + # Gracefully shutdown the server on SIGTERM or SIGINT + loop = asyncio.get_event_loop() + for sig in (signal.SIGINT, signal.SIGTERM): + loop.add_signal_handler( + sig, lambda: asyncio.ensure_future(server.stop(5)) + ) + + # Start the server + await server.start() + print("MLX-Audio TTS Server started. Listening on: " + address, file=sys.stderr) + # Wait for the server to be terminated + await server.wait_for_termination() + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Run the MLX-Audio TTS gRPC server.") + parser.add_argument( + "--addr", default="localhost:50051", help="The address to bind the server to." + ) + args = parser.parse_args() + + asyncio.run(serve(args.addr)) diff --git a/backend/python/mlx-audio/install.sh b/backend/python/mlx-audio/install.sh new file mode 100755 index 000000000..b8ee48552 --- /dev/null +++ b/backend/python/mlx-audio/install.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -e + +USE_PIP=true + +backend_dir=$(dirname $0) + +if [ -d $backend_dir/common ]; then + source $backend_dir/common/libbackend.sh +else + source $backend_dir/../common/libbackend.sh +fi + +installRequirements diff --git a/backend/python/mlx-audio/requirements-mps.txt b/backend/python/mlx-audio/requirements-mps.txt new file mode 100644 index 000000000..31df2a190 --- /dev/null +++ b/backend/python/mlx-audio/requirements-mps.txt @@ -0,0 +1 @@ +git+https://github.com/Blaizzy/mlx-audio \ No newline at end of file diff --git a/backend/python/mlx-audio/requirements.txt b/backend/python/mlx-audio/requirements.txt new file mode 100644 index 000000000..5f47f0cfd --- /dev/null +++ b/backend/python/mlx-audio/requirements.txt @@ -0,0 +1,7 @@ +grpcio==1.71.0 +protobuf +certifi +setuptools +mlx-audio +soundfile +numpy \ No newline at end of file diff --git a/backend/python/mlx-audio/run.sh b/backend/python/mlx-audio/run.sh new file mode 100755 index 000000000..fc88f97da --- /dev/null +++ b/backend/python/mlx-audio/run.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +backend_dir=$(dirname $0) + +if [ -d $backend_dir/common ]; then + source $backend_dir/common/libbackend.sh +else + source $backend_dir/../common/libbackend.sh +fi + +startBackend $@ \ No newline at end of file diff --git a/backend/python/mlx-audio/test.py b/backend/python/mlx-audio/test.py new file mode 100644 index 000000000..792cb0648 --- /dev/null +++ b/backend/python/mlx-audio/test.py @@ -0,0 +1,142 @@ +import unittest +import subprocess +import time +import backend_pb2 +import backend_pb2_grpc + +import grpc + +import unittest +import subprocess +import time +import grpc +import backend_pb2_grpc +import backend_pb2 + +class TestBackendServicer(unittest.TestCase): + """ + TestBackendServicer is the class that tests the gRPC service. + + This class contains methods to test the startup and shutdown of the gRPC service. + """ + def setUp(self): + self.service = subprocess.Popen(["python", "backend.py", "--addr", "localhost:50051"]) + time.sleep(10) + + def tearDown(self) -> None: + self.service.terminate() + self.service.wait() + + def test_server_startup(self): + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.Health(backend_pb2.HealthMessage()) + self.assertEqual(response.message, b'OK') + except Exception as err: + print(err) + self.fail("Server failed to start") + finally: + self.tearDown() + def test_load_model(self): + """ + This method tests if the TTS model is loaded successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="mlx-community/Kokoro-82M-4bit")) + self.assertTrue(response.success) + self.assertEqual(response.message, "MLX-Audio TTS model loaded successfully") + except Exception as err: + print(err) + self.fail("LoadModel service failed") + finally: + self.tearDown() + + def test_tts_generation(self): + """ + This method tests if TTS audio is generated successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="mlx-community/Kokoro-82M-4bit")) + self.assertTrue(response.success) + + # Test TTS generation + tts_req = backend_pb2.TTSRequest( + text="Hello, this is a test of the MLX-Audio TTS system.", + model="mlx-community/Kokoro-82M-4bit", + voice="af_heart", + language="a" + ) + tts_resp = stub.TTS(tts_req) + self.assertTrue(tts_resp.success) + self.assertIn("TTS audio generated successfully", tts_resp.message) + except Exception as err: + print(err) + self.fail("TTS service failed") + finally: + self.tearDown() + + def test_tts_with_options(self): + """ + This method tests if TTS works with various options and parameters + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions( + Model="mlx-community/Kokoro-82M-4bit", + Options=["voice:af_soft", "speed:1.2", "lang_code:b"] + )) + self.assertTrue(response.success) + + # Test TTS generation with different voice and language + tts_req = backend_pb2.TTSRequest( + text="Hello, this is a test with British English accent.", + model="mlx-community/Kokoro-82M-4bit", + voice="af_soft", + language="b" + ) + tts_resp = stub.TTS(tts_req) + self.assertTrue(tts_resp.success) + self.assertIn("TTS audio generated successfully", tts_resp.message) + except Exception as err: + print(err) + self.fail("TTS with options service failed") + finally: + self.tearDown() + + + def test_tts_multilingual(self): + """ + This method tests if TTS works with different languages + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="mlx-community/Kokoro-82M-4bit")) + self.assertTrue(response.success) + + # Test Spanish TTS + tts_req = backend_pb2.TTSRequest( + text="Hola, esto es una prueba del sistema TTS MLX-Audio.", + model="mlx-community/Kokoro-82M-4bit", + voice="af_heart", + language="e" + ) + tts_resp = stub.TTS(tts_req) + self.assertTrue(tts_resp.success) + self.assertIn("TTS audio generated successfully", tts_resp.message) + except Exception as err: + print(err) + self.fail("Multilingual TTS service failed") + finally: + self.tearDown() \ No newline at end of file diff --git a/backend/python/mlx-audio/test.sh b/backend/python/mlx-audio/test.sh new file mode 100755 index 000000000..f31ae54e4 --- /dev/null +++ b/backend/python/mlx-audio/test.sh @@ -0,0 +1,12 @@ +#!/bin/bash +set -e + +backend_dir=$(dirname $0) + +if [ -d $backend_dir/common ]; then + source $backend_dir/common/libbackend.sh +else + source $backend_dir/../common/libbackend.sh +fi + +runUnittests