From b37cef371880a8fdbb7d6a41926bf242b0b51eb2 Mon Sep 17 00:00:00 2001 From: Richard Palethorpe Date: Tue, 1 Jul 2025 11:36:17 +0100 Subject: [PATCH] fix: Diffusers and XPU fixes (#5737) * fix(README): Add device flags for Intel/XPU Signed-off-by: Richard Palethorpe * fix(diffusers/xpu): Set device to XPU and ignore CUDA request when on Intel Signed-off-by: Richard Palethorpe --------- Signed-off-by: Richard Palethorpe --- README.md | 4 ++-- backend/python/diffusers/backend.py | 15 +++++++-------- backend/python/diffusers/run.sh | 8 +++++++- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index ab322d255..d3497df56 100644 --- a/README.md +++ b/README.md @@ -141,10 +141,10 @@ docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri ```bash # Intel GPU with FP16 support -docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f16 +docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/dev/dri/renderD128 localai/localai:latest-gpu-intel-f16 # Intel GPU with FP32 support -docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f32 +docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/dev/dri/renderD128 localai/localai:latest-gpu-intel-f32 ``` ### Vulkan GPU Images: diff --git a/backend/python/diffusers/backend.py b/backend/python/diffusers/backend.py index 2d8db5338..1a5f1785a 100755 --- a/backend/python/diffusers/backend.py +++ b/backend/python/diffusers/backend.py @@ -38,9 +38,7 @@ DISABLE_CPU_OFFLOAD = os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1" FRAMES = os.environ.get("FRAMES", "64") if XPU: - import intel_extension_for_pytorch as ipex - - print(ipex.xpu.get_device_name(0)) + print(torch.xpu.get_device_name(0)) # If MAX_WORKERS are specified in the environment use it, otherwise default to 1 MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) @@ -336,6 +334,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): request.LoraAdapter = os.path.join(request.ModelPath, request.LoraAdapter) device = "cpu" if not request.CUDA else "cuda" + if XPU: + device = "xpu" self.device = device if request.LoraAdapter: # Check if its a local file and not a directory ( we load lora differently for a safetensor file ) @@ -359,12 +359,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): self.pipe.set_adapters(adapters_name, adapter_weights=adapters_weights) - if request.CUDA: - self.pipe.to('cuda') + if device != "cpu": + self.pipe.to(device) if self.controlnet: - self.controlnet.to('cuda') - if XPU: - self.pipe = self.pipe.to("xpu") + self.controlnet.to(device) + except Exception as err: return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") # Implement your logic here for the LoadModel service diff --git a/backend/python/diffusers/run.sh b/backend/python/diffusers/run.sh index 82b7b09ec..ee730f21f 100755 --- a/backend/python/diffusers/run.sh +++ b/backend/python/diffusers/run.sh @@ -6,4 +6,10 @@ else source $backend_dir/../common/libbackend.sh fi -startBackend $@ \ No newline at end of file +if [ -d "/opt/intel" ]; then + # Assumes we are using the Intel oneAPI container image + # https://github.com/intel/intel-extension-for-pytorch/issues/538 + export XPU=1 +fi + +startBackend $@