From 6644af10c6c00edeea1844bb5d934fa72469942a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 26 Jun 2025 18:41:38 +0200 Subject: [PATCH] feat: :warning: reduce images size and stop bundling sources (#5721) feat: reduce images size and stop bundling sources Do not copy sources anymore, and reduce packages of the base images by not using builder images. If needed to rebuild, just build the container image from scratch by following the docs. We will slowly try to migrate all backends to the gallery to keep the core small. This PR is a breaking change, it also sets the base folders to /models and /backends instead of /build/models and /build/backends. Signed-off-by: Ettore Di Giacinto Signed-off-by: Ettore Di Giacinto --- Dockerfile | 203 ++++++++---------- aio/entrypoint.sh | 2 +- .../content/docs/features/GPU-acceleration.md | 6 +- .../docs/getting-started/container-images.md | 10 +- docs/content/docs/reference/nvidia-l4t.md | 2 +- docs/static/install.sh | 8 +- entrypoint.sh | 49 ++--- 7 files changed, 123 insertions(+), 157 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3bfc4a388..a31ce0a8c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,82 +2,17 @@ ARG BASE_IMAGE=ubuntu:22.04 ARG GRPC_BASE_IMAGE=${BASE_IMAGE} ARG INTEL_BASE_IMAGE=${BASE_IMAGE} -# The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it. FROM ${BASE_IMAGE} AS requirements -USER root - -ARG GO_VERSION=1.22.6 -ARG CMAKE_VERSION=3.26.4 -ARG CMAKE_FROM_SOURCE=false -ARG TARGETARCH -ARG TARGETVARIANT - ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && \ apt-get install -y --no-install-recommends \ - build-essential \ - ccache \ - ca-certificates espeak-ng \ - curl libssl-dev \ - git \ - git-lfs \ - unzip upx-ucl python3 python-is-python3 && \ + ca-certificates curl wget espeak-ng libgomp1 \ + python3 python-is-python3 ffmpeg && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -# Install CMake (the version in 22.04 is too old) -RUN < Starting LocalAI[$PROFILE] with the following models: $MODELS" -exec /build/entrypoint.sh "$@" +exec /entrypoint.sh "$@" diff --git a/docs/content/docs/features/GPU-acceleration.md b/docs/content/docs/features/GPU-acceleration.md index 4fd3c039a..550c013c1 100644 --- a/docs/content/docs/features/GPU-acceleration.md +++ b/docs/content/docs/features/GPU-acceleration.md @@ -268,7 +268,7 @@ The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=ta To run LocalAI with Docker and sycl starting `phi-2`, you can use the following command as an example: ```bash -docker run -e DEBUG=true --privileged -ti -v $PWD/models:/build/models -p 8080:8080 -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-gpu-intel-f32-ffmpeg-core phi-2 +docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080 -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-gpu-intel-f32-ffmpeg-core phi-2 ``` ### Notes @@ -296,7 +296,7 @@ To use Vulkan, use the images with the `vulkan` tag, for example `{{< version >} To run LocalAI with Docker and Vulkan, you can use the following command as an example: ```bash -docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/build/models localai/localai:latest-vulkan-ffmpeg-core +docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/models localai/localai:latest-vulkan-ffmpeg-core ``` ### Notes @@ -308,7 +308,7 @@ These flags are the same as the sections above, depending on the hardware, for [ If you have mixed hardware, you can pass flags for multiple GPUs, for example: ```bash -docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/build/models \ +docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/models \ --gpus=all \ # nvidia passthrough --device /dev/dri --device /dev/kfd \ # AMD/Intel passthrough localai/localai:latest-vulkan-ffmpeg-core diff --git a/docs/content/docs/getting-started/container-images.md b/docs/content/docs/getting-started/container-images.md index 4edbc6191..67b0c44ee 100644 --- a/docs/content/docs/getting-started/container-images.md +++ b/docs/content/docs/getting-started/container-images.md @@ -92,7 +92,7 @@ services: - DEBUG=true # ... volumes: - - ./models:/build/models:cached + - ./models:/models:cached # decomment the following piece if running with Nvidia GPUs # deploy: # resources: @@ -105,21 +105,21 @@ services: {{% alert icon="💡" %}} -**Models caching**: The **AIO** image will download the needed models on the first run if not already present and store those in `/build/models` inside the container. The AIO models will be automatically updated with new versions of AIO images. +**Models caching**: The **AIO** image will download the needed models on the first run if not already present and store those in `/models` inside the container. The AIO models will be automatically updated with new versions of AIO images. You can change the directory inside the container by specifying a `MODELS_PATH` environment variable (or `--models-path`). -If you want to use a named model or a local directory, you can mount it as a volume to `/build/models`: +If you want to use a named model or a local directory, you can mount it as a volume to `/models`: ```bash -docker run -p 8080:8080 --name local-ai -ti -v $PWD/models:/build/models localai/localai:latest-aio-cpu +docker run -p 8080:8080 --name local-ai -ti -v $PWD/models:/models localai/localai:latest-aio-cpu ``` or associate a volume: ```bash docker volume create localai-models -docker run -p 8080:8080 --name local-ai -ti -v localai-models:/build/models localai/localai:latest-aio-cpu +docker run -p 8080:8080 --name local-ai -ti -v localai-models:/models localai/localai:latest-aio-cpu ``` {{% /alert %}} diff --git a/docs/content/docs/reference/nvidia-l4t.md b/docs/content/docs/reference/nvidia-l4t.md index ce0fd5e95..b019aa70c 100644 --- a/docs/content/docs/reference/nvidia-l4t.md +++ b/docs/content/docs/reference/nvidia-l4t.md @@ -35,7 +35,7 @@ docker pull quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core Run the LocalAI container on Nvidia ARM64 devices using the following command, where `/data/models` is the directory containing the models: ```bash -docker run -e DEBUG=true -p 8080:8080 -v /data/models:/build/models -ti --restart=always --name local-ai --runtime nvidia --gpus all quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core +docker run -e DEBUG=true -p 8080:8080 -v /data/models:/models -ti --restart=always --name local-ai --runtime nvidia --gpus all quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core ``` Note: `/data/models` is the directory containing the models. You can replace it with the directory containing your models. diff --git a/docs/static/install.sh b/docs/static/install.sh index d0c21b63a..357444387 100755 --- a/docs/static/install.sh +++ b/docs/static/install.sh @@ -663,7 +663,7 @@ install_docker() { IMAGE_TAG=${LOCALAI_VERSION}-vulkan info "Starting LocalAI Docker container..." - $SUDO docker run -v local-ai-data:/build/models \ + $SUDO docker run -v local-ai-data:/models \ --device /dev/dri \ --restart=always \ -e API_KEY=$API_KEY \ @@ -690,7 +690,7 @@ install_docker() { fi info "Starting LocalAI Docker container..." - $SUDO docker run -v local-ai-data:/build/models \ + $SUDO docker run -v local-ai-data:/models \ --gpus all \ --restart=always \ -e API_KEY=$API_KEY \ @@ -705,7 +705,7 @@ install_docker() { fi info "Starting LocalAI Docker container..." - $SUDO docker run -v local-ai-data:/build/models \ + $SUDO docker run -v local-ai-data:/models \ --device /dev/dri \ --device /dev/kfd \ --group-add=video \ @@ -723,7 +723,7 @@ install_docker() { fi info "Starting LocalAI Docker container..." - $SUDO docker run -v local-ai-data:/build/models \ + $SUDO docker run -v local-ai-data:/models \ --device /dev/dri \ --restart=always \ -e API_KEY=$API_KEY \ diff --git a/entrypoint.sh b/entrypoint.sh index 389c846d5..fdaa92eba 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -1,7 +1,7 @@ #!/bin/bash set -e -cd /build +cd / # If we have set EXTRA_BACKENDS, then we need to prepare the backends if [ -n "$EXTRA_BACKENDS" ]; then @@ -13,38 +13,23 @@ if [ -n "$EXTRA_BACKENDS" ]; then done fi -if [ "$REBUILD" != "false" ]; then - rm -rf ./local-ai - make build -j${BUILD_PARALLELISM:-1} +echo "CPU info:" +grep -e "model\sname" /proc/cpuinfo | head -1 +grep -e "flags" /proc/cpuinfo | head -1 +if grep -q -e "\savx\s" /proc/cpuinfo ; then + echo "CPU: AVX found OK" else - echo "@@@@@" - echo "Skipping rebuild" - echo "@@@@@" - echo "If you are experiencing issues with the pre-compiled builds, try setting REBUILD=true" - echo "If you are still experiencing issues with the build, try setting CMAKE_ARGS and disable the instructions set as needed:" - echo 'CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF"' - echo "see the documentation at: https://localai.io/basics/build/index.html" - echo "Note: See also https://github.com/go-skynet/LocalAI/issues/288" - echo "@@@@@" - echo "CPU info:" - grep -e "model\sname" /proc/cpuinfo | head -1 - grep -e "flags" /proc/cpuinfo | head -1 - if grep -q -e "\savx\s" /proc/cpuinfo ; then - echo "CPU: AVX found OK" - else - echo "CPU: no AVX found" - fi - if grep -q -e "\savx2\s" /proc/cpuinfo ; then - echo "CPU: AVX2 found OK" - else - echo "CPU: no AVX2 found" - fi - if grep -q -e "\savx512" /proc/cpuinfo ; then - echo "CPU: AVX512 found OK" - else - echo "CPU: no AVX512 found" - fi - echo "@@@@@" + echo "CPU: no AVX found" +fi +if grep -q -e "\savx2\s" /proc/cpuinfo ; then + echo "CPU: AVX2 found OK" +else + echo "CPU: no AVX2 found" +fi +if grep -q -e "\savx512" /proc/cpuinfo ; then + echo "CPU: AVX512 found OK" +else + echo "CPU: no AVX512 found" fi exec ./local-ai "$@"