Chroma in LangChain (#1010)

Chroma is a simple to use, open-source, zero-config, zero setup
vectorstore.

Simply `pip install chromadb`, and you're good to go. 

Out-of-the-box Chroma is suitable for most LangChain workloads, but is
highly flexible. I tested to 1M embs on my M1 mac, with out issues and
reasonably fast query times.

Look out for future releases as we integrate more Chroma features with
LangChain!
This commit is contained in:
Anton Troynikov
2023-02-12 17:43:48 -08:00
committed by GitHub
parent 05d8969c79
commit 78abd277ff
3 changed files with 206 additions and 0 deletions

View File

@@ -0,0 +1,28 @@
"""Test Chroma functionality."""
from langchain.docstore.document import Document
from langchain.vectorstores import Chroma
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
def test_chroma() -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
docsearch = Chroma.from_texts(
collection_name="test_collection", texts=texts, embedding=FakeEmbeddings()
)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
def test_chroma_with_metadatas() -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": str(i)} for i in range(len(texts))]
docsearch = Chroma.from_texts(
collection_name="test_collection",
texts=texts,
embedding=FakeEmbeddings(),
metadatas=metadatas,
)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo", metadata={"page": "0"})]