From 52da6a160d9930900dc14144c21df414614dcdfc Mon Sep 17 00:00:00 2001 From: Ethan Yang Date: Tue, 4 Jun 2024 04:27:17 +0800 Subject: [PATCH] community[patch]: Update OpenVINO embedding and reranker to support static input shape (#22171) It can help to deploy embedding models on NPU device --- .../document_compressors/openvino_rerank.py | 16 +++++++++++++--- .../langchain_community/embeddings/openvino.py | 17 ++++++++++++++--- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/libs/community/langchain_community/document_compressors/openvino_rerank.py b/libs/community/langchain_community/document_compressors/openvino_rerank.py index b5222a2dcd2..24fb9267301 100644 --- a/libs/community/langchain_community/document_compressors/openvino_rerank.py +++ b/libs/community/langchain_community/document_compressors/openvino_rerank.py @@ -114,9 +114,19 @@ class OpenVINOReranker(BaseDocumentCompressor): passages = request.passages query_passage_pairs = [[query, passage["text"]] for passage in passages] - input_tensors = self.tokenizer( - query_passage_pairs, padding=True, truncation=True, return_tensors="pt" - ) + length = self.ov_model.request.inputs[0].get_partial_shape()[1] + if length.is_dynamic: + input_tensors = self.tokenizer( + query_passage_pairs, padding=True, truncation=True, return_tensors="pt" + ) + else: + input_tensors = self.tokenizer( + query_passage_pairs, + padding="max_length", + max_length=length.get_length(), + truncation=True, + return_tensors="pt", + ) outputs = self.ov_model(**input_tensors, return_dict=True) if outputs[0].shape[1] > 1: diff --git a/libs/community/langchain_community/embeddings/openvino.py b/libs/community/langchain_community/embeddings/openvino.py index 7e4d52fd592..dd4e939a89d 100644 --- a/libs/community/langchain_community/embeddings/openvino.py +++ b/libs/community/langchain_community/embeddings/openvino.py @@ -210,9 +210,20 @@ class OpenVINOEmbeddings(BaseModel, Embeddings): 0, len(sentences), batch_size, desc="Batches", disable=not show_progress_bar ): sentences_batch = sentences_sorted[start_index : start_index + batch_size] - features = self.tokenizer( - sentences_batch, padding=True, truncation=True, return_tensors="pt" - ) + + length = self.ov_model.request.inputs[0].get_partial_shape()[1] + if length.is_dynamic: + features = self.tokenizer( + sentences_batch, padding=True, truncation=True, return_tensors="pt" + ) + else: + features = self.tokenizer( + sentences_batch, + padding="max_length", + max_length=length.get_length(), + truncation=True, + return_tensors="pt", + ) out_features = self.ov_model(**features) if mean_pooling: