chroma docs (#1012)

This commit is contained in:
Harrison Chase
2023-02-12 23:02:01 -08:00
committed by GitHub
parent 0c553d2064
commit 7fb33fca47
18 changed files with 354 additions and 179 deletions

View File

@@ -338,7 +338,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
"version": "3.9.1"
},
"vscode": {
"interpreter": {

View File

@@ -160,7 +160,7 @@
"outputs": [],
"source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain.vectorstores import FAISS\n",
"from langchain.vectorstores import Chroma\n",
"\n",
"with open('../../state_of_the_union.txt') as f:\n",
" state_of_the_union = f.read()\n",
@@ -173,9 +173,18 @@
"execution_count": 12,
"id": "bfcfc039",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Running Chroma using direct local API.\n",
"Using DuckDB in-memory for database. Data will be transient.\n"
]
}
],
"source": [
"docsearch = FAISS.from_texts(texts, embeddings)\n",
"docsearch = Chroma.from_texts(texts, embeddings)\n",
"\n",
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs = docsearch.similarity_search(query)"
@@ -201,7 +210,7 @@
"\n",
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
"\n",
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence. \n"
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.\n"
]
}
],
@@ -220,7 +229,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "llm-env",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -234,7 +243,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.0 (default, Nov 15 2020, 06:25:35) \n[Clang 10.0.0 ]"
"version": "3.9.1"
},
"vscode": {
"interpreter": {

View File

@@ -27,7 +27,7 @@
"source": [
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS, Qdrant"
"from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS, Qdrant, Chroma"
]
},
{
@@ -51,16 +51,25 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 9,
"id": "015f4ff5",
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Running Chroma using direct local API.\n",
"Using DuckDB in-memory for database. Data will be transient.\n"
]
}
],
"source": [
"docsearch = FAISS.from_texts(texts, embeddings)\n",
"docsearch = Chroma.from_texts(texts, embeddings)\n",
"\n",
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs = docsearch.similarity_search(query)"
@@ -68,7 +77,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 10,
"id": "67baf32e",
"metadata": {
"pycharm": {
@@ -109,17 +118,17 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 11,
"id": "70758e4f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['64108bd0-4d91-485c-9743-1e18debdd59e']"
"['a05e3d0c-ab40-11ed-a853-e65801318981']"
]
},
"execution_count": 5,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -130,7 +139,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 12,
"id": "4edeb88f",
"metadata": {},
"outputs": [],
@@ -141,7 +150,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 13,
"id": "1cba64a2",
"metadata": {},
"outputs": [
@@ -151,7 +160,7 @@
"Document(page_content='Ankush went to Princeton', lookup_str='', metadata={}, lookup_index=0)"
]
},
"execution_count": 7,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@@ -171,7 +180,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 14,
"id": "df4a459c",
"metadata": {},
"outputs": [],
@@ -181,12 +190,21 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 15,
"id": "4b480245",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Running Chroma using direct local API.\n",
"Using DuckDB in-memory for database. Data will be transient.\n"
]
}
],
"source": [
"docsearch = FAISS.from_documents(documents, embeddings)\n",
"docsearch = Chroma.from_documents(documents, embeddings)\n",
"\n",
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs = docsearch.similarity_search(query)"
@@ -194,7 +212,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 16,
"id": "86aa4cda",
"metadata": {},
"outputs": [
@@ -212,7 +230,7 @@
"\n",
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
"\n",
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence. \n"
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.\n"
]
}
],
@@ -225,10 +243,28 @@
"id": "2445a5e6",
"metadata": {},
"source": [
"## FAISS-specific\n",
"## FAISS\n",
"There are some FAISS specific methods. One of them is `similarity_search_with_score`, which allows you to return not only the documents but also the similarity score of the query to them."
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "479e22ce",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Exiting: Cleaning up .chroma directory\n"
]
}
],
"source": [
"docsearch = FAISS.from_texts(texts, embeddings)"
]
},
{
"cell_type": "code",
"execution_count": 4,