diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index efb5eebb2..c6577be8b 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -51,12 +51,12 @@ jobs: grpc-base-image: "ubuntu:22.04" runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" - - build-type: 'sycl_f16' + - build-type: 'sycl' platforms: 'linux/amd64' tag-latest: 'false' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" grpc-base-image: "ubuntu:22.04" - tag-suffix: 'sycl-f16' + tag-suffix: 'sycl' runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" - build-type: 'vulkan' diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index f9a90c78b..d0c175e38 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -109,24 +109,15 @@ jobs: skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" aio: "-aio-gpu-vulkan" - - build-type: 'sycl_f16' + - build-type: 'sycl' platforms: 'linux/amd64' tag-latest: 'auto' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" grpc-base-image: "ubuntu:22.04" - tag-suffix: '-gpu-intel-f16' + tag-suffix: '-gpu-intel' runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" - aio: "-aio-gpu-intel-f16" - - build-type: 'sycl_f32' - platforms: 'linux/amd64' - tag-latest: 'auto' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - grpc-base-image: "ubuntu:22.04" - tag-suffix: '-gpu-intel-f32' - runs-on: 'ubuntu-latest' - makeflags: "--jobs=3 --output-sync=target" - aio: "-aio-gpu-intel-f32" + aio: "-aio-gpu-intel" gh-runner: uses: ./.github/workflows/image_build.yml diff --git a/Dockerfile b/Dockerfile index fa885dd34..6bb6571f5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -100,6 +100,11 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then ldconfig \ ; fi +RUN expr "${BUILD_TYPE}" : sycl && \ + echo "intel" > /run/localai/capability || \ + echo "Not Intel" + + # Cuda ENV PATH=/usr/local/cuda/bin:${PATH} diff --git a/Makefile b/Makefile index 1f2730a63..175094385 100644 --- a/Makefile +++ b/Makefile @@ -5,8 +5,6 @@ BINARY_NAME=local-ai GORELEASER?= -ONEAPI_VERSION?=2025.2 - export BUILD_TYPE?= GO_TAGS?= @@ -340,19 +338,11 @@ docker-aio-all: docker-image-intel: docker build \ - --build-arg BASE_IMAGE=intel/oneapi-basekit:${ONEAPI_VERSION}.0-0-devel-ubuntu24.04 \ + --build-arg BASE_IMAGE=quay.io/go-skynet/intel-oneapi-base:latest \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg GO_TAGS="$(GO_TAGS)" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ - --build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) . - -docker-image-intel-xpu: - docker build \ - --build-arg BASE_IMAGE=intel/oneapi-basekit:${ONEAPI_VERSION}.0-0-devel-ubuntu22.04 \ - --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ - --build-arg GO_TAGS="$(GO_TAGS)" \ - --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ - --build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) . + --build-arg BUILD_TYPE=sycl -t $(DOCKER_IMAGE) . ######################################################## ## Backends diff --git a/README.md b/README.md index 3521dffdd..1d9097c58 100644 --- a/README.md +++ b/README.md @@ -140,11 +140,7 @@ docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri ### Intel GPU Images (oneAPI): ```bash -# Intel GPU with FP16 support -docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/dev/dri/renderD128 localai/localai:latest-gpu-intel-f16 - -# Intel GPU with FP32 support -docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/dev/dri/renderD128 localai/localai:latest-gpu-intel-f32 +docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/dev/dri/renderD128 localai/localai:latest-gpu-intel ``` ### Vulkan GPU Images: @@ -166,7 +162,7 @@ docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-ai docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11 # Intel GPU version -docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel-f16 +docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel # AMD GPU version docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas diff --git a/backend/Dockerfile.golang b/backend/Dockerfile.golang index 99906008e..dbfee61e2 100644 --- a/backend/Dockerfile.golang +++ b/backend/Dockerfile.golang @@ -96,17 +96,6 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then ldconfig \ ; fi -# Intel oneAPI requirements -RUN <}}-gpu-intel-f32-core`, `{{< version >}}-gpu-intel-f16`, ... +To use SYCL, use the images with `gpu-intel` in the tag, for example `{{< version >}}-gpu-intel`, ... The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags). @@ -276,7 +276,7 @@ The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=ta To run LocalAI with Docker and sycl starting `phi-2`, you can use the following command as an example: ```bash -docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080 -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-gpu-intel-f32 phi-2 +docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080 -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-gpu-intel phi-2 ``` ### Notes @@ -284,7 +284,7 @@ docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080 - In addition to the commands to run LocalAI normally, you need to specify `--device /dev/dri` to docker, for example: ```bash -docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f16 +docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel ``` Note also that sycl does have a known issue to hang with `mmap: true`. You have to disable it in the model configuration if explicitly enabled. diff --git a/docs/content/docs/getting-started/build.md b/docs/content/docs/getting-started/build.md index 04b2a5dfe..feef50826 100644 --- a/docs/content/docs/getting-started/build.md +++ b/docs/content/docs/getting-started/build.md @@ -197,4 +197,4 @@ docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_ Note: - BUILD_TYPE can be either: `cublas`, `hipblas`, `sycl_f16`, `sycl_f32`, `metal`. -- BASE_IMAGE is tested on `ubuntu:22.04` (and defaults to it) \ No newline at end of file +- BASE_IMAGE is tested on `ubuntu:22.04` (and defaults to it) and `quay.io/go-skynet/intel-oneapi-base:latest` for intel/sycl diff --git a/docs/content/docs/getting-started/container-images.md b/docs/content/docs/getting-started/container-images.md index 300df6c1c..c284a4545 100644 --- a/docs/content/docs/getting-started/container-images.md +++ b/docs/content/docs/getting-started/container-images.md @@ -131,8 +131,7 @@ docker run -p 8080:8080 --name local-ai -ti -v localai-models:/models localai/lo | Latest images for Nvidia GPU (CUDA11) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-11` | `localai/localai:latest-aio-gpu-nvidia-cuda-11` | | Latest images for Nvidia GPU (CUDA12) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-12` | `localai/localai:latest-aio-gpu-nvidia-cuda-12` | | Latest images for AMD GPU | `quay.io/go-skynet/local-ai:latest-aio-gpu-hipblas` | `localai/localai:latest-aio-gpu-hipblas` | -| Latest images for Intel GPU (sycl f16) | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel-f16` | `localai/localai:latest-aio-gpu-intel-f16` | -| Latest images for Intel GPU (sycl f32) | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel-f32` | `localai/localai:latest-aio-gpu-intel-f32` | +| Latest images for Intel GPU | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel` | `localai/localai:latest-aio-gpu-intel` | ### Available environment variables @@ -179,23 +178,13 @@ Standard container images do not have pre-installed models. {{% /tab %}} -{{% tab tabName="Intel GPU (sycl f16)" %}} +{{% tab tabName="Intel GPU" %}} | Description | Quay | Docker Hub | | --- | --- |-------------------------------------------------------------| -| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-intel-f16` | `localai/localai:master-gpu-intel-f16` | -| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel-f16` | `localai/localai:latest-gpu-intel-f16` | -| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f16` | `localai/localai:{{< version >}}-gpu-intel-f16` | - -{{% /tab %}} - -{{% tab tabName="Intel GPU (sycl f32)" %}} - -| Description | Quay | Docker Hub | -| --- | --- |-------------------------------------------------------------| -| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-intel-f32` | `localai/localai:master-gpu-intel-f32` | -| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel-f32` | `localai/localai:latest-gpu-intel-f32` | -| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f32` | `localai/localai:{{< version >}}-gpu-intel-f32` | +| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-intel` | `localai/localai:master-gpu-intel` | +| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel` | `localai/localai:latest-gpu-intel` | +| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel` | `localai/localai:{{< version >}}-gpu-intel` | {{% /tab %}} diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md index 6d51583a3..5640adefc 100644 --- a/docs/content/docs/getting-started/quickstart.md +++ b/docs/content/docs/getting-started/quickstart.md @@ -59,11 +59,7 @@ docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri #### Intel GPU Images (oneAPI): ```bash -# Intel GPU with FP16 support -docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f16 - -# Intel GPU with FP32 support -docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f32 +docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel ``` #### Vulkan GPU Images: @@ -85,7 +81,7 @@ docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-ai docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11 # Intel GPU version -docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel-f16 +docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel # AMD GPU version docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas diff --git a/docs/static/install.sh b/docs/static/install.sh index 6624c86d4..503079305 100755 --- a/docs/static/install.sh +++ b/docs/static/install.sh @@ -715,11 +715,10 @@ install_docker() { $envs \ -d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND elif [ "$HAS_INTEL" ]; then - # Default to FP32 for better compatibility - IMAGE_TAG=${LOCALAI_VERSION}-gpu-intel-f32 + IMAGE_TAG=${LOCALAI_VERSION}-gpu-intel # AIO if [ "$USE_AIO" = true ]; then - IMAGE_TAG=${LOCALAI_VERSION}-aio-gpu-intel-f32 + IMAGE_TAG=${LOCALAI_VERSION}-aio-gpu-intel fi info "Starting LocalAI Docker container..."