community[patch]: Update OpenVINO embedding and reranker to support static input shape (#22171)

It can help to deploy embedding models on NPU device
2025-09-09 23:12:38 +00:00 · 2024-06-04 04:27:17 +08:00
parent c599732e1a
commit 52da6a160d
2 changed files with 27 additions and 6 deletions
--- a/libs/community/langchain_community/document_compressors/openvino_rerank.py
+++ b/libs/community/langchain_community/document_compressors/openvino_rerank.py
@@ -114,9 +114,19 @@ class OpenVINOReranker(BaseDocumentCompressor):
        passages = request.passages
        query_passage_pairs = [[query, passage["text"]] for passage in passages]
        length = self.ov_model.request.inputs[0].get_partial_shape()[1]
        if length.is_dynamic:
            input_tensors = self.tokenizer(
                query_passage_pairs, padding=True, truncation=True, return_tensors="pt"
            )
        else:
            input_tensors = self.tokenizer(
                query_passage_pairs,
                padding="max_length",
                max_length=length.get_length(),
                truncation=True,
                return_tensors="pt",
            )
        outputs = self.ov_model(**input_tensors, return_dict=True)
        if outputs[0].shape[1] > 1:
--- a/libs/community/langchain_community/embeddings/openvino.py
+++ b/libs/community/langchain_community/embeddings/openvino.py
@@ -210,9 +210,20 @@ class OpenVINOEmbeddings(BaseModel, Embeddings):
            0, len(sentences), batch_size, desc="Batches", disable=not show_progress_bar
        ):
            sentences_batch = sentences_sorted[start_index : start_index + batch_size]
            length = self.ov_model.request.inputs[0].get_partial_shape()[1]
            if length.is_dynamic:
                features = self.tokenizer(
                    sentences_batch, padding=True, truncation=True, return_tensors="pt"
                )
            else:
                features = self.tokenizer(
                    sentences_batch,
                    padding="max_length",
                    max_length=length.get_length(),
                    truncation=True,
                    return_tensors="pt",
                )
            out_features = self.ov_model(**features)
            if mean_pooling: