mirror of
https://github.com/hwchase17/langchain.git
synced 2026-01-29 21:30:18 +00:00
Chroma persistence (#1028)
This PR adds persistence to the Chroma vector store. Users can supply a `persist_directory` with any of the `Chroma` creation methods. If supplied, the store will be automatically persisted at that directory. If a user creates a new `Chroma` instance with the same persistence directory, it will get loaded up automatically. If they use `from_texts` or `from_documents` in this way, the documents will be loaded into the existing store. There is the chance of some funky behavior if the user passes a different embedding function from the one used to create the collection - we will make this easier in future updates. For now, we log a warning.
This commit is contained in:
@@ -26,3 +26,36 @@ def test_chroma_with_metadatas() -> None:
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo", metadata={"page": "0"})]
|
||||
|
||||
|
||||
def test_chroma_with_persistence() -> None:
|
||||
"""Test end to end construction and search, with persistence."""
|
||||
chroma_persist_dir = "./tests/persist_dir"
|
||||
collection_name = "test_collection"
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Chroma.from_texts(
|
||||
collection_name=collection_name,
|
||||
texts=texts,
|
||||
embedding=FakeEmbeddings(),
|
||||
persist_directory=chroma_persist_dir,
|
||||
)
|
||||
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
docsearch.persist()
|
||||
|
||||
# Get a new VectorStore from the persisted directory
|
||||
docsearch = Chroma(
|
||||
collection_name=collection_name,
|
||||
embedding_function=FakeEmbeddings(),
|
||||
persist_directory=chroma_persist_dir,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
|
||||
# Clean up
|
||||
docsearch.delete_collection()
|
||||
|
||||
# Persist doesn't need to be called again
|
||||
# Data will be automatically persisted on object deletion
|
||||
# Or on program exit
|
||||
|
||||
Reference in New Issue
Block a user