langchain[minor]: Make EmbeddingsFilters async (#22737)

Add native async implementation for EmbeddingsFilter
This commit is contained in:
Philippe PRADOS
2024-06-12 18:27:26 +02:00
committed by GitHub
parent b45bf78d2e
commit 23c22fcbc9
5 changed files with 127 additions and 0 deletions

View File

@@ -75,6 +75,20 @@ def _get_embeddings_from_stateful_docs(
return embedded_documents
async def _aget_embeddings_from_stateful_docs(
embeddings: Embeddings, documents: Sequence[_DocumentWithState]
) -> List[List[float]]:
if len(documents) and "embedded_doc" in documents[0].state:
embedded_documents = [doc.state["embedded_doc"] for doc in documents]
else:
embedded_documents = await embeddings.aembed_documents(
[d.page_content for d in documents]
)
for doc, embedding in zip(documents, embedded_documents):
doc.state["embedded_doc"] = embedding
return embedded_documents
def _filter_cluster_embeddings(
embedded_documents: List[List[float]],
num_clusters: int,