mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-25 04:49:17 +00:00
langchain_chroma: added document.id support (#27995)
Description: * Added internal `Document.id` support to Chroma VectorStore Dependencies: * https://github.com/langchain-ai/langchain/pull/27968 should be merged first and this PR should be re-based on top of those changes. Tests: * Modified/Added tests for `Document.id` support. All tests are passing. Note: I am not a member of the Chroma team. --------- Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com> Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
@@ -44,10 +44,14 @@ def _results_to_docs_and_scores(results: Any) -> List[Tuple[Document, float]]:
|
||||
return [
|
||||
# TODO: Chroma can do batch querying,
|
||||
# we shouldn't hard code to the 1st result
|
||||
(Document(page_content=result[0], metadata=result[1] or {}), result[2])
|
||||
(
|
||||
Document(page_content=result[0], metadata=result[1] or {}, id=result[2]),
|
||||
result[3],
|
||||
)
|
||||
for result in zip(
|
||||
results["documents"][0],
|
||||
results["metadatas"][0],
|
||||
results["ids"][0],
|
||||
results["distances"][0],
|
||||
)
|
||||
]
|
||||
@@ -1185,6 +1189,8 @@ class Chroma(VectorStore):
|
||||
"""
|
||||
texts = [doc.page_content for doc in documents]
|
||||
metadatas = [doc.metadata for doc in documents]
|
||||
if ids is None:
|
||||
ids = [doc.id if doc.id else "" for doc in documents]
|
||||
return cls.from_texts(
|
||||
texts=texts,
|
||||
embedding=embedding,
|
||||
|
Reference in New Issue
Block a user