fix: Diffusers and XPU fixes (#5737)

* fix(README): Add device flags for Intel/XPU

Signed-off-by: Richard Palethorpe <io@richiejp.com>

* fix(diffusers/xpu): Set device to XPU and ignore CUDA request when on Intel

Signed-off-by: Richard Palethorpe <io@richiejp.com>

---------

Signed-off-by: Richard Palethorpe <io@richiejp.com>
This commit is contained in:
Richard Palethorpe
2025-07-01 11:36:17 +01:00
committed by GitHub
parent 9f957d547d
commit b37cef3718
3 changed files with 16 additions and 11 deletions

View File

@@ -141,10 +141,10 @@ docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri
```bash
# Intel GPU with FP16 support
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f16
docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/dev/dri/renderD128 localai/localai:latest-gpu-intel-f16
# Intel GPU with FP32 support
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f32
docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/dev/dri/renderD128 localai/localai:latest-gpu-intel-f32
```
### Vulkan GPU Images:

View File

@@ -38,9 +38,7 @@ DISABLE_CPU_OFFLOAD = os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1"
FRAMES = os.environ.get("FRAMES", "64")
if XPU:
import intel_extension_for_pytorch as ipex
print(ipex.xpu.get_device_name(0))
print(torch.xpu.get_device_name(0))
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
@@ -336,6 +334,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
request.LoraAdapter = os.path.join(request.ModelPath, request.LoraAdapter)
device = "cpu" if not request.CUDA else "cuda"
if XPU:
device = "xpu"
self.device = device
if request.LoraAdapter:
# Check if its a local file and not a directory ( we load lora differently for a safetensor file )
@@ -359,12 +359,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
self.pipe.set_adapters(adapters_name, adapter_weights=adapters_weights)
if request.CUDA:
self.pipe.to('cuda')
if device != "cpu":
self.pipe.to(device)
if self.controlnet:
self.controlnet.to('cuda')
if XPU:
self.pipe = self.pipe.to("xpu")
self.controlnet.to(device)
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
# Implement your logic here for the LoadModel service

View File

@@ -6,4 +6,10 @@ else
source $backend_dir/../common/libbackend.sh
fi
startBackend $@
if [ -d "/opt/intel" ]; then
# Assumes we are using the Intel oneAPI container image
# https://github.com/intel/intel-extension-for-pytorch/issues/538
export XPU=1
fi
startBackend $@