diff --git a/libs/community/langchain_community/embeddings/ascend.py b/libs/community/langchain_community/embeddings/ascend.py index c8cb059177b..940b84bbfc5 100644 --- a/libs/community/langchain_community/embeddings/ascend.py +++ b/libs/community/langchain_community/embeddings/ascend.py @@ -30,6 +30,7 @@ class AscendEmbeddings(Embeddings, BaseModel): document_instruction: str = "" use_fp16: bool = True pooling_method: Optional[str] = "cls" + batch_size: int = 32 model: Any tokenizer: Any @@ -119,7 +120,18 @@ class AscendEmbeddings(Embeddings, BaseModel): ) def embed_documents(self, texts: List[str]) -> List[List[float]]: - return self.encode([self.document_instruction + text for text in texts]) + try: + import numpy as np + except ImportError as e: + raise ImportError( + "Unable to import numpy, please install with `pip install -U numpy`." + ) from e + embedding_list = [] + for i in range(0, len(texts), self.batch_size): + texts_ = texts[i : i + self.batch_size] + emb = self.encode([self.document_instruction + text for text in texts_]) + embedding_list.append(emb) + return np.concatenate(embedding_list) def embed_query(self, text: str) -> List[float]: return self.encode([self.query_instruction + text])[0]