diff --git a/.env b/.env index 53d796bc1..852d3dac6 100644 --- a/.env +++ b/.env @@ -32,15 +32,6 @@ # Forces shutdown of the backends if busy (only if LOCALAI_SINGLE_ACTIVE_BACKEND is set) # LOCALAI_FORCE_BACKEND_SHUTDOWN=true -## Specify a build type. Available: cublas, openblas, clblas. -## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit. -## OpenBLAS: This is an open-source implementation of the BLAS library that aims to provide highly optimized code for various platforms. It includes support for multi-threading and can be compiled to use hardware-specific features for additional performance. OpenBLAS can run on many kinds of hardware, including CPUs from Intel, AMD, and ARM. -## clBLAS: This is an open-source implementation of the BLAS library that uses OpenCL, a framework for writing programs that execute across heterogeneous platforms consisting of CPUs, GPUs, and other processors. clBLAS is designed to take advantage of the parallel computing power of GPUs but can also run on any hardware that supports OpenCL. This includes hardware from different vendors like Nvidia, AMD, and Intel. -# BUILD_TYPE=openblas - -## Uncomment and set to true to enable rebuilding from source -# REBUILD=true - ## Path where to store generated images # LOCALAI_IMAGE_PATH=/tmp/generated/images diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index e8d456b0d..aa6505fcd 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -394,11 +394,12 @@ jobs: cuda-major-version: "13" cuda-minor-version: "0" platforms: 'linux/arm64' - skip-drivers: 'true' + skip-drivers: 'false' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-cuda-13-arm64-llama-cpp' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" runs-on: 'ubuntu-24.04-arm' + ubuntu-version: '2404' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" @@ -433,8 +434,9 @@ jobs: tag-latest: 'auto' tag-suffix: '-nvidia-l4t-cuda-13-arm64-diffusers' runs-on: 'ubuntu-24.04-arm' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" - skip-drivers: 'true' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + ubuntu-version: '2404' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" context: "./backend" @@ -502,10 +504,11 @@ jobs: cuda-major-version: "13" cuda-minor-version: "0" platforms: 'linux/arm64' - skip-drivers: 'true' + skip-drivers: 'false' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-cuda-13-arm64-stablediffusion-ggml' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" + ubuntu-version: '2404' runs-on: 'ubuntu-24.04-arm' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" @@ -526,10 +529,11 @@ jobs: cuda-major-version: "13" cuda-minor-version: "0" platforms: 'linux/arm64' - skip-drivers: 'true' + skip-drivers: 'false' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-cuda-13-arm64-whisper' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" + ubuntu-version: '2404' runs-on: 'ubuntu-24.04-arm' backend: "whisper" dockerfile: "./backend/Dockerfile.golang" diff --git a/.github/workflows/backend_build.yml b/.github/workflows/backend_build.yml index 1243d60ad..e458dc3cb 100644 --- a/.github/workflows/backend_build.yml +++ b/.github/workflows/backend_build.yml @@ -1,5 +1,5 @@ --- -name: 'build python backend container images (reusable)' +name: 'build backend container images (reusable)' on: workflow_call: @@ -53,6 +53,11 @@ on: description: 'Skip drivers' default: 'false' type: string + ubuntu-version: + description: 'Ubuntu version' + required: false + default: '2204' + type: string secrets: dockerUsername: required: false @@ -208,6 +213,7 @@ jobs: CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }} BASE_IMAGE=${{ inputs.base-image }} BACKEND=${{ inputs.backend }} + UBUNTU_VERSION=${{ inputs.ubuntu-version }} context: ${{ inputs.context }} file: ${{ inputs.dockerfile }} cache-from: type=gha @@ -228,6 +234,7 @@ jobs: CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }} BASE_IMAGE=${{ inputs.base-image }} BACKEND=${{ inputs.backend }} + UBUNTU_VERSION=${{ inputs.ubuntu-version }} context: ${{ inputs.context }} file: ${{ inputs.dockerfile }} cache-from: type=gha diff --git a/.github/workflows/backend_pr.yml b/.github/workflows/backend_pr.yml index f997a1b9e..e00802def 100644 --- a/.github/workflows/backend_pr.yml +++ b/.github/workflows/backend_pr.yml @@ -52,6 +52,7 @@ jobs: dockerfile: ${{ matrix.dockerfile }} skip-drivers: ${{ matrix.skip-drivers }} context: ${{ matrix.context }} + ubuntu-version: ${{ matrix.ubuntu-version }} secrets: quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index ff4bb560f..bc7c29bee 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -81,7 +81,8 @@ jobs: platforms: 'linux/arm64' tag-latest: 'false' tag-suffix: '-nvidia-l4t-arm64-cuda-13' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" runs-on: 'ubuntu-24.04-arm' makeflags: "--jobs=4 --output-sync=target" - skip-drivers: 'false' \ No newline at end of file + skip-drivers: 'false' + ubuntu-version: '2404' \ No newline at end of file diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 0c58ba4ac..cd5047e4b 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -169,7 +169,8 @@ jobs: platforms: 'linux/arm64' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64-cuda-13' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" runs-on: 'ubuntu-24.04-arm' makeflags: "--jobs=4 --output-sync=target" skip-drivers: 'false' + ubuntu-version: '2404' \ No newline at end of file diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index b8f1f0d84..31a1f2310 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -56,6 +56,11 @@ on: required: false default: '' type: string + ubuntu-version: + description: 'Ubuntu version' + required: false + default: '2204' + type: string secrets: dockerUsername: required: true @@ -238,6 +243,7 @@ jobs: GRPC_VERSION=v1.65.0 MAKEFLAGS=${{ inputs.makeflags }} SKIP_DRIVERS=${{ inputs.skip-drivers }} + UBUNTU_VERSION=${{ inputs.ubuntu-version }} context: . file: ./Dockerfile cache-from: type=gha @@ -265,6 +271,7 @@ jobs: GRPC_VERSION=v1.65.0 MAKEFLAGS=${{ inputs.makeflags }} SKIP_DRIVERS=${{ inputs.skip-drivers }} + UBUNTU_VERSION=${{ inputs.ubuntu-version }} context: . file: ./Dockerfile cache-from: type=gha diff --git a/Dockerfile b/Dockerfile index 05a0c2779..094fb927d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,7 +9,7 @@ ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && \ apt-get install -y --no-install-recommends \ ca-certificates curl wget espeak-ng libgomp1 \ - ffmpeg libopenblas-base libopenblas-dev && \ + ffmpeg && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* @@ -23,6 +23,7 @@ ARG SKIP_DRIVERS=false ARG TARGETARCH ARG TARGETVARIANT ENV BUILD_TYPE=${BUILD_TYPE} +ARG UBUNTU_VERSION=2204 RUN mkdir -p /run/localai RUN echo "default" > /run/localai/capability @@ -51,23 +52,13 @@ RUN < /run/localai/capability + fi +EOT + +# https://github.com/NVIDIA/Isaac-GR00T/issues/343 +RUN <