mirror of
https://github.com/mudler/LocalAI.git
synced 2025-12-30 22:20:20 -06:00
fix: Diffusers and XPU fixes (#5737)
* fix(README): Add device flags for Intel/XPU Signed-off-by: Richard Palethorpe <io@richiejp.com> * fix(diffusers/xpu): Set device to XPU and ignore CUDA request when on Intel Signed-off-by: Richard Palethorpe <io@richiejp.com> --------- Signed-off-by: Richard Palethorpe <io@richiejp.com>
This commit is contained in:
committed by
GitHub
parent
9f957d547d
commit
b37cef3718
@@ -141,10 +141,10 @@ docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri
|
||||
|
||||
```bash
|
||||
# Intel GPU with FP16 support
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f16
|
||||
docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/dev/dri/renderD128 localai/localai:latest-gpu-intel-f16
|
||||
|
||||
# Intel GPU with FP32 support
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f32
|
||||
docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/dev/dri/renderD128 localai/localai:latest-gpu-intel-f32
|
||||
```
|
||||
|
||||
### Vulkan GPU Images:
|
||||
|
||||
@@ -38,9 +38,7 @@ DISABLE_CPU_OFFLOAD = os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1"
|
||||
FRAMES = os.environ.get("FRAMES", "64")
|
||||
|
||||
if XPU:
|
||||
import intel_extension_for_pytorch as ipex
|
||||
|
||||
print(ipex.xpu.get_device_name(0))
|
||||
print(torch.xpu.get_device_name(0))
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
@@ -336,6 +334,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
request.LoraAdapter = os.path.join(request.ModelPath, request.LoraAdapter)
|
||||
|
||||
device = "cpu" if not request.CUDA else "cuda"
|
||||
if XPU:
|
||||
device = "xpu"
|
||||
self.device = device
|
||||
if request.LoraAdapter:
|
||||
# Check if its a local file and not a directory ( we load lora differently for a safetensor file )
|
||||
@@ -359,12 +359,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
|
||||
self.pipe.set_adapters(adapters_name, adapter_weights=adapters_weights)
|
||||
|
||||
if request.CUDA:
|
||||
self.pipe.to('cuda')
|
||||
if device != "cpu":
|
||||
self.pipe.to(device)
|
||||
if self.controlnet:
|
||||
self.controlnet.to('cuda')
|
||||
if XPU:
|
||||
self.pipe = self.pipe.to("xpu")
|
||||
self.controlnet.to(device)
|
||||
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
# Implement your logic here for the LoadModel service
|
||||
|
||||
@@ -6,4 +6,10 @@ else
|
||||
source $backend_dir/../common/libbackend.sh
|
||||
fi
|
||||
|
||||
startBackend $@
|
||||
if [ -d "/opt/intel" ]; then
|
||||
# Assumes we are using the Intel oneAPI container image
|
||||
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||
export XPU=1
|
||||
fi
|
||||
|
||||
startBackend $@
|
||||
|
||||
Reference in New Issue
Block a user