mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-14 07:07:34 +00:00
fix chroma updated upsert interface (#7643)
new chroma release seems to not support empty dicts for metadata. related to #7633
This commit is contained in:
parent
a673a51efa
commit
c17a80f11c
@ -164,9 +164,36 @@ class Chroma(VectorStore):
|
||||
embeddings = None
|
||||
if self._embedding_function is not None:
|
||||
embeddings = self._embedding_function.embed_documents(list(texts))
|
||||
self._collection.upsert(
|
||||
metadatas=metadatas, embeddings=embeddings, documents=texts, ids=ids
|
||||
)
|
||||
|
||||
if metadatas:
|
||||
texts = list(texts)
|
||||
empty = []
|
||||
non_empty = []
|
||||
for i, m in enumerate(metadatas):
|
||||
if m:
|
||||
non_empty.append(i)
|
||||
else:
|
||||
empty.append(i)
|
||||
if non_empty:
|
||||
metadatas = [metadatas[i] for i in non_empty]
|
||||
texts_with_metadatas = [texts[i] for i in non_empty]
|
||||
embeddings_with_metadatas = (
|
||||
[embeddings[i] for i in non_empty] if embeddings else None
|
||||
)
|
||||
ids_with_metadata = [ids[i] for i in non_empty]
|
||||
self._collection.upsert(
|
||||
metadatas=metadatas,
|
||||
embeddings=embeddings_with_metadatas,
|
||||
documents=texts_with_metadatas,
|
||||
ids=ids_with_metadata,
|
||||
)
|
||||
|
||||
texts = [texts[j] for j in empty]
|
||||
embeddings = [embeddings[j] for j in empty] if embeddings else None
|
||||
ids = [ids[j] for j in empty]
|
||||
|
||||
if texts:
|
||||
self._collection.upsert(embeddings=embeddings, documents=texts, ids=ids)
|
||||
return ids
|
||||
|
||||
def similarity_search(
|
||||
|
@ -281,3 +281,21 @@ def test_init_from_client_settings() -> None:
|
||||
|
||||
client_settings = chromadb.config.Settings()
|
||||
Chroma(client_settings=client_settings)
|
||||
|
||||
|
||||
def test_chroma_add_documents_no_metadata() -> None:
|
||||
db = Chroma(embedding_function=FakeEmbeddings())
|
||||
db.add_documents([Document(page_content="foo")])
|
||||
|
||||
|
||||
def test_chroma_add_documents_mixed_metadata() -> None:
|
||||
db = Chroma(embedding_function=FakeEmbeddings())
|
||||
docs = [
|
||||
Document(page_content="foo"),
|
||||
Document(page_content="bar", metadata={"baz": 1}),
|
||||
]
|
||||
db.add_documents(docs)
|
||||
search = db.similarity_search("foo bar")
|
||||
assert sorted(search, key=lambda d: d.page_content) == sorted(
|
||||
docs, key=lambda d: d.page_content
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user