mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-21 03:51:42 +00:00
community[patch]: Update OpenVINO embedding and reranker to support static input shape (#22171)
It can help to deploy embedding models on NPU device
This commit is contained in:
parent
c599732e1a
commit
52da6a160d
@ -114,9 +114,19 @@ class OpenVINOReranker(BaseDocumentCompressor):
|
|||||||
passages = request.passages
|
passages = request.passages
|
||||||
|
|
||||||
query_passage_pairs = [[query, passage["text"]] for passage in passages]
|
query_passage_pairs = [[query, passage["text"]] for passage in passages]
|
||||||
|
length = self.ov_model.request.inputs[0].get_partial_shape()[1]
|
||||||
|
if length.is_dynamic:
|
||||||
input_tensors = self.tokenizer(
|
input_tensors = self.tokenizer(
|
||||||
query_passage_pairs, padding=True, truncation=True, return_tensors="pt"
|
query_passage_pairs, padding=True, truncation=True, return_tensors="pt"
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
input_tensors = self.tokenizer(
|
||||||
|
query_passage_pairs,
|
||||||
|
padding="max_length",
|
||||||
|
max_length=length.get_length(),
|
||||||
|
truncation=True,
|
||||||
|
return_tensors="pt",
|
||||||
|
)
|
||||||
|
|
||||||
outputs = self.ov_model(**input_tensors, return_dict=True)
|
outputs = self.ov_model(**input_tensors, return_dict=True)
|
||||||
if outputs[0].shape[1] > 1:
|
if outputs[0].shape[1] > 1:
|
||||||
|
@ -210,9 +210,20 @@ class OpenVINOEmbeddings(BaseModel, Embeddings):
|
|||||||
0, len(sentences), batch_size, desc="Batches", disable=not show_progress_bar
|
0, len(sentences), batch_size, desc="Batches", disable=not show_progress_bar
|
||||||
):
|
):
|
||||||
sentences_batch = sentences_sorted[start_index : start_index + batch_size]
|
sentences_batch = sentences_sorted[start_index : start_index + batch_size]
|
||||||
|
|
||||||
|
length = self.ov_model.request.inputs[0].get_partial_shape()[1]
|
||||||
|
if length.is_dynamic:
|
||||||
features = self.tokenizer(
|
features = self.tokenizer(
|
||||||
sentences_batch, padding=True, truncation=True, return_tensors="pt"
|
sentences_batch, padding=True, truncation=True, return_tensors="pt"
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
features = self.tokenizer(
|
||||||
|
sentences_batch,
|
||||||
|
padding="max_length",
|
||||||
|
max_length=length.get_length(),
|
||||||
|
truncation=True,
|
||||||
|
return_tensors="pt",
|
||||||
|
)
|
||||||
|
|
||||||
out_features = self.ov_model(**features)
|
out_features = self.ov_model(**features)
|
||||||
if mean_pooling:
|
if mean_pooling:
|
||||||
|
Loading…
Reference in New Issue
Block a user