community[patch]: Update OpenVINO embedding and reranker to support static input shape (#22171)

It can help to deploy embedding models on NPU device
2025-09-09 06:53:59 +00:00 · 2024-06-04 04:27:17 +08:00
parent c599732e1a
commit 52da6a160d
2 changed files with 27 additions and 6 deletions
--- a/libs/community/langchain_community/document_compressors/openvino_rerank.py
+++ b/libs/community/langchain_community/document_compressors/openvino_rerank.py
@@ -114,9 +114,19 @@ class OpenVINOReranker(BaseDocumentCompressor):
        passages = request.passages

        query_passage_pairs = [[query, passage["text"]] for passage in passages]
+        length = self.ov_model.request.inputs[0].get_partial_shape()[1]
+        if length.is_dynamic:
            input_tensors = self.tokenizer(
                query_passage_pairs, padding=True, truncation=True, return_tensors="pt"
            )
+        else:
+            input_tensors = self.tokenizer(
+                query_passage_pairs,
+                padding="max_length",
+                max_length=length.get_length(),
+                truncation=True,
+                return_tensors="pt",
+            )

        outputs = self.ov_model(**input_tensors, return_dict=True)
        if outputs[0].shape[1] > 1:
--- a/libs/community/langchain_community/embeddings/openvino.py
+++ b/libs/community/langchain_community/embeddings/openvino.py
@@ -210,9 +210,20 @@ class OpenVINOEmbeddings(BaseModel, Embeddings):
            0, len(sentences), batch_size, desc="Batches", disable=not show_progress_bar
        ):
            sentences_batch = sentences_sorted[start_index : start_index + batch_size]
+
+            length = self.ov_model.request.inputs[0].get_partial_shape()[1]
+            if length.is_dynamic:
                features = self.tokenizer(
                    sentences_batch, padding=True, truncation=True, return_tensors="pt"
                )
+            else:
+                features = self.tokenizer(
+                    sentences_batch,
+                    padding="max_length",
+                    max_length=length.get_length(),
+                    truncation=True,
+                    return_tensors="pt",
+                )

            out_features = self.ov_model(**features)
            if mean_pooling: