From e38610e5215508ea7399f3dd6307bd43fc9a585e Mon Sep 17 00:00:00 2001 From: fakezeta Date: Tue, 30 Apr 2024 10:13:04 +0200 Subject: [PATCH] feat: OpenVINO acceleration for embeddings in transformer backend (#2190) OpenVINO acceleration for embeddings New argument type: OVModelForFeatureExtraction --- .../python/transformers/transformers_server.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index a27c24dab..93b2ce257 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -153,6 +153,21 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"}, device=device_map) self.OV = True + elif request.Type == "OVModelForFeatureExtraction": + from optimum.intel.openvino import OVModelForFeatureExtraction + from openvino.runtime import Core + + if "GPU" in Core().available_devices: + device_map="GPU" + else: + device_map="CPU" + self.model = OVModelForFeatureExtraction.from_pretrained(model_name, + compile=True, + trust_remote_code=request.TrustRemoteCode, + ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"}, + export=True, + device=device_map) + self.OV = True else: self.model = AutoModel.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode,