Compare commits

...

1 Commits

Author SHA1 Message Date
Dev 2049
ec6e11c8cd clean 2023-05-19 14:25:58 -07:00
4 changed files with 32 additions and 61 deletions

View File

@@ -156,7 +156,7 @@
],
"source": [
"embedding = OpenAIEmbeddings()\n",
"vectordb = Chroma.from_documents(documents=documents, embedding=embedding)\n",
"vectordb = Chroma.from_documents(documents, embedding)\n",
"retriever = vectordb.as_retriever()\n",
"qa_chain = RetrievalQA.from_chain_type(\n",
" llm=OpenAI(), chain_type=\"stuff\", retriever=retriever, return_source_documents=True\n",
@@ -326,7 +326,7 @@
"\n",
"document_content_description = \"Contents of this chunk\"\n",
"llm = OpenAI(temperature=0)\n",
"vectordb = Chroma.from_documents(documents=documents, embedding=embedding)\n",
"vectordb = Chroma.from_documents(documents, embedding)\n",
"retriever = SelfQueryRetriever.from_llm(\n",
" llm, vectordb, document_content_description, metadata_field_info, verbose=True\n",
")\n",
@@ -398,7 +398,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.10"
"version": "3.11.3"
}
},
"nbformat": 4,

View File

@@ -228,7 +228,7 @@
"persist_directory = 'db'\n",
"\n",
"embedding = OpenAIEmbeddings()\n",
"vectordb = Chroma.from_documents(documents=docs, embedding=embedding, persist_directory=persist_directory)"
"vectordb = Chroma.from_documents(docs, embedding, persist_directory=persist_directory)"
]
},
{
@@ -361,7 +361,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
"version": "3.11.3"
}
},
"nbformat": 4,

View File

@@ -90,6 +90,7 @@ class Chroma(VectorStore):
self._embedding_function = embedding_function
self._persist_directory = persist_directory
self._collection = self._client.get_or_create_collection(
name=collection_name,
embedding_function=self._embedding_function.embed_documents
@@ -354,7 +355,7 @@ class Chroma(VectorStore):
def from_texts(
cls: Type[Chroma],
texts: List[str],
embedding: Optional[Embeddings] = None,
embedding: Embeddings,
metadatas: Optional[List[dict]] = None,
ids: Optional[List[str]] = None,
collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME,
@@ -389,43 +390,3 @@ class Chroma(VectorStore):
)
chroma_collection.add_texts(texts=texts, metadatas=metadatas, ids=ids)
return chroma_collection
@classmethod
def from_documents(
cls: Type[Chroma],
documents: List[Document],
embedding: Optional[Embeddings] = None,
ids: Optional[List[str]] = None,
collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME,
persist_directory: Optional[str] = None,
client_settings: Optional[chromadb.config.Settings] = None,
client: Optional[chromadb.Client] = None, # Add this line
**kwargs: Any,
) -> Chroma:
"""Create a Chroma vectorstore from a list of documents.
If a persist_directory is specified, the collection will be persisted there.
Otherwise, the data will be ephemeral in-memory.
Args:
collection_name (str): Name of the collection to create.
persist_directory (Optional[str]): Directory to persist the collection.
ids (Optional[List[str]]): List of document IDs. Defaults to None.
documents (List[Document]): List of documents to add to the vectorstore.
embedding (Optional[Embeddings]): Embedding function. Defaults to None.
client_settings (Optional[chromadb.config.Settings]): Chroma client settings
Returns:
Chroma: Chroma vectorstore.
"""
texts = [doc.page_content for doc in documents]
metadatas = [doc.metadata for doc in documents]
return cls.from_texts(
texts=texts,
embedding=embedding,
metadatas=metadatas,
ids=ids,
collection_name=collection_name,
persist_directory=persist_directory,
client_settings=client_settings,
client=client,
)

View File

@@ -10,7 +10,9 @@ def test_chroma() -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
docsearch = Chroma.from_texts(
collection_name="test_collection", texts=texts, embedding=FakeEmbeddings()
texts,
FakeEmbeddings(),
collection_name="test_collection",
)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
@@ -21,7 +23,9 @@ async def test_chroma_async() -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
docsearch = Chroma.from_texts(
collection_name="test_collection", texts=texts, embedding=FakeEmbeddings()
texts,
FakeEmbeddings(),
collection_name="test_collection",
)
output = await docsearch.asimilarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
@@ -32,9 +36,9 @@ def test_chroma_with_metadatas() -> None:
texts = ["foo", "bar", "baz"]
metadatas = [{"page": str(i)} for i in range(len(texts))]
docsearch = Chroma.from_texts(
texts,
FakeEmbeddings(),
collection_name="test_collection",
texts=texts,
embedding=FakeEmbeddings(),
metadatas=metadatas,
)
output = docsearch.similarity_search("foo", k=1)
@@ -46,9 +50,9 @@ def test_chroma_with_metadatas_with_scores() -> None:
texts = ["foo", "bar", "baz"]
metadatas = [{"page": str(i)} for i in range(len(texts))]
docsearch = Chroma.from_texts(
texts,
FakeEmbeddings(),
collection_name="test_collection",
texts=texts,
embedding=FakeEmbeddings(),
metadatas=metadatas,
)
output = docsearch.similarity_search_with_score("foo", k=1)
@@ -60,9 +64,9 @@ def test_chroma_search_filter() -> None:
texts = ["far", "bar", "baz"]
metadatas = [{"first_letter": "{}".format(text[0])} for text in texts]
docsearch = Chroma.from_texts(
texts,
FakeEmbeddings(),
collection_name="test_collection",
texts=texts,
embedding=FakeEmbeddings(),
metadatas=metadatas,
)
output = docsearch.similarity_search("far", k=1, filter={"first_letter": "f"})
@@ -76,9 +80,9 @@ def test_chroma_search_filter_with_scores() -> None:
texts = ["far", "bar", "baz"]
metadatas = [{"first_letter": "{}".format(text[0])} for text in texts]
docsearch = Chroma.from_texts(
texts,
FakeEmbeddings(),
collection_name="test_collection",
texts=texts,
embedding=FakeEmbeddings(),
metadatas=metadatas,
)
output = docsearch.similarity_search_with_score(
@@ -101,9 +105,9 @@ def test_chroma_with_persistence() -> None:
collection_name = "test_collection"
texts = ["foo", "bar", "baz"]
docsearch = Chroma.from_texts(
texts,
FakeEmbeddings(),
collection_name=collection_name,
texts=texts,
embedding=FakeEmbeddings(),
persist_directory=chroma_persist_dir,
)
@@ -132,7 +136,9 @@ def test_chroma_mmr() -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
docsearch = Chroma.from_texts(
collection_name="test_collection", texts=texts, embedding=FakeEmbeddings()
texts,
FakeEmbeddings(),
collection_name="test_collection",
)
output = docsearch.max_marginal_relevance_search("foo", k=1)
assert output == [Document(page_content="foo")]
@@ -143,7 +149,9 @@ def test_chroma_mmr_by_vector() -> None:
texts = ["foo", "bar", "baz"]
embeddings = FakeEmbeddings()
docsearch = Chroma.from_texts(
collection_name="test_collection", texts=texts, embedding=embeddings
texts,
embeddings,
collection_name="test_collection",
)
embedded_query = embeddings.embed_query("foo")
output = docsearch.max_marginal_relevance_search_by_vector(embedded_query, k=1)
@@ -154,7 +162,9 @@ def test_chroma_with_include_parameter() -> None:
"""Test end to end construction and include parameter."""
texts = ["foo", "bar", "baz"]
docsearch = Chroma.from_texts(
collection_name="test_collection", texts=texts, embedding=FakeEmbeddings()
texts,
FakeEmbeddings(),
collection_name="test_collection",
)
output = docsearch.get(include=["embeddings"])
assert output["embeddings"] is not None