chroma[patch]: fix bug (#28538)

Fix bug introduced in
https://github.com/langchain-ai/langchain/pull/27995

If all document IDs are `""`, the chroma SDK will raise
```
DuplicateIDError: Expected IDs to be unique
```

Caught by [docs
tests](https://github.com/langchain-ai/langchain/actions/runs/12180395579/job/33974633950),
but added a test to langchain-chroma as well.
This commit is contained in:
ccurme 2024-12-05 10:37:19 -05:00 committed by GitHub
parent ecff9a01e4
commit 8f9b3b7498
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 17 additions and 1 deletions

View File

@ -1228,7 +1228,7 @@ class Chroma(VectorStore):
texts = [doc.page_content for doc in documents] texts = [doc.page_content for doc in documents]
metadatas = [doc.metadata for doc in documents] metadatas = [doc.metadata for doc in documents]
if ids is None: if ids is None:
ids = [doc.id if doc.id else "" for doc in documents] ids = [doc.id if doc.id else str(uuid.uuid4()) for doc in documents]
return cls.from_texts( return cls.from_texts(
texts=texts, texts=texts,
embedding=embedding, embedding=embedding,

View File

@ -51,6 +51,22 @@ def test_chroma() -> None:
assert output[0].id is not None assert output[0].id is not None
def test_from_documents() -> None:
"""Test init using .from_documents."""
documents = [
Document(page_content="foo"),
Document(page_content="bar"),
Document(page_content="baz"),
]
docsearch = Chroma.from_documents(documents=documents, embedding=FakeEmbeddings())
output = docsearch.similarity_search("foo", k=1)
docsearch.delete_collection()
assert len(output) == 1
assert output[0].page_content == "foo"
assert output[0].id is not None
def test_chroma_with_ids() -> None: def test_chroma_with_ids() -> None:
"""Test end to end construction and search.""" """Test end to end construction and search."""
texts = ["foo", "bar", "baz"] texts = ["foo", "bar", "baz"]