community[patch]: Update OpenVINO embedding and reranker to support static input shape (#22171)

It can help to deploy embedding models on NPU device
This commit is contained in:
Ethan Yang 2024-06-04 04:27:17 +08:00 committed by GitHub
parent c599732e1a
commit 52da6a160d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 27 additions and 6 deletions

View File

@ -114,9 +114,19 @@ class OpenVINOReranker(BaseDocumentCompressor):
passages = request.passages
query_passage_pairs = [[query, passage["text"]] for passage in passages]
length = self.ov_model.request.inputs[0].get_partial_shape()[1]
if length.is_dynamic:
input_tensors = self.tokenizer(
query_passage_pairs, padding=True, truncation=True, return_tensors="pt"
)
else:
input_tensors = self.tokenizer(
query_passage_pairs,
padding="max_length",
max_length=length.get_length(),
truncation=True,
return_tensors="pt",
)
outputs = self.ov_model(**input_tensors, return_dict=True)
if outputs[0].shape[1] > 1:

View File

@ -210,9 +210,20 @@ class OpenVINOEmbeddings(BaseModel, Embeddings):
0, len(sentences), batch_size, desc="Batches", disable=not show_progress_bar
):
sentences_batch = sentences_sorted[start_index : start_index + batch_size]
length = self.ov_model.request.inputs[0].get_partial_shape()[1]
if length.is_dynamic:
features = self.tokenizer(
sentences_batch, padding=True, truncation=True, return_tensors="pt"
)
else:
features = self.tokenizer(
sentences_batch,
padding="max_length",
max_length=length.get_length(),
truncation=True,
return_tensors="pt",
)
out_features = self.ov_model(**features)
if mean_pooling: