mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-03 11:47:49 +00:00
chroma[patch]: fix bug (#28538)
Fix bug introduced in https://github.com/langchain-ai/langchain/pull/27995 If all document IDs are `""`, the chroma SDK will raise ``` DuplicateIDError: Expected IDs to be unique ``` Caught by [docs tests](https://github.com/langchain-ai/langchain/actions/runs/12180395579/job/33974633950), but added a test to langchain-chroma as well.
This commit is contained in:
parent
ecff9a01e4
commit
8f9b3b7498
@ -1228,7 +1228,7 @@ class Chroma(VectorStore):
|
||||
texts = [doc.page_content for doc in documents]
|
||||
metadatas = [doc.metadata for doc in documents]
|
||||
if ids is None:
|
||||
ids = [doc.id if doc.id else "" for doc in documents]
|
||||
ids = [doc.id if doc.id else str(uuid.uuid4()) for doc in documents]
|
||||
return cls.from_texts(
|
||||
texts=texts,
|
||||
embedding=embedding,
|
||||
|
@ -51,6 +51,22 @@ def test_chroma() -> None:
|
||||
assert output[0].id is not None
|
||||
|
||||
|
||||
def test_from_documents() -> None:
|
||||
"""Test init using .from_documents."""
|
||||
documents = [
|
||||
Document(page_content="foo"),
|
||||
Document(page_content="bar"),
|
||||
Document(page_content="baz"),
|
||||
]
|
||||
docsearch = Chroma.from_documents(documents=documents, embedding=FakeEmbeddings())
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
|
||||
docsearch.delete_collection()
|
||||
assert len(output) == 1
|
||||
assert output[0].page_content == "foo"
|
||||
assert output[0].id is not None
|
||||
|
||||
|
||||
def test_chroma_with_ids() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
|
Loading…
Reference in New Issue
Block a user