mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-05 04:38:26 +00:00
add docs for chroma persistance (#1202)
This commit is contained in:
parent
5bdb8dd6fe
commit
047231840d
@ -12,7 +12,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 1,
|
||||||
"id": "aac9563e",
|
"id": "aac9563e",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@ -25,7 +25,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": 2,
|
||||||
"id": "a3c3999a",
|
"id": "a3c3999a",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@ -41,7 +41,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 6,
|
"execution_count": 3,
|
||||||
"id": "5eabdb75",
|
"id": "5eabdb75",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@ -63,7 +63,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 7,
|
"execution_count": 4,
|
||||||
"id": "4b172de8",
|
"id": "4b172de8",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@ -89,10 +89,115 @@
|
|||||||
"print(docs[0].page_content)"
|
"print(docs[0].page_content)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "8061454b",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Persistance\n",
|
||||||
|
"\n",
|
||||||
|
"The below steps cover how to persist a ChromaDB instance"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "2b76db26",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Initialize PeristedChromaDB\n",
|
||||||
|
"Create embeddings for each chunk and insert into the Chroma vector database. The persist_directory argument tells ChromaDB where to store the database when it's persisted.\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"id": "cdb86e0d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Running Chroma using direct local API.\n",
|
||||||
|
"No existing DB found in db, skipping load\n",
|
||||||
|
"No existing DB found in db, skipping load\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Embed and store the texts\n",
|
||||||
|
"# Supplying a persist_directory will store the embeddings on disk\n",
|
||||||
|
"persist_directory = 'db'\n",
|
||||||
|
"\n",
|
||||||
|
"embedding = OpenAIEmbeddings()\n",
|
||||||
|
"vectordb = Chroma.from_documents(documents=docs, embedding=embedding, persist_directory=persist_directory)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "f568a322",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Persist the Database\n",
|
||||||
|
"In a notebook, we should call persist() to ensure the embeddings are written to disk. This isn't necessary in a script - the database will be automatically persisted when the client object is destroyed."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"id": "74b08cb4",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Persisting DB to disk, putting it in the save folder db\n",
|
||||||
|
"PersistentDuckDB del, about to run persist\n",
|
||||||
|
"Persisting DB to disk, putting it in the save folder db\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"vectordb.persist()\n",
|
||||||
|
"vectordb = None"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "cc9ed900",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Load the Database from disk, and create the chain\n",
|
||||||
|
"Be sure to pass the same persist_directory and embedding_function as you did when you instantiated the database. Initialize the chain we will use for question answering."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 10,
|
||||||
|
"id": "31fecfe9",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Running Chroma using direct local API.\n",
|
||||||
|
"loaded in 4 embeddings\n",
|
||||||
|
"loaded in 1 collections\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Now we can load the persisted database from disk, and use it as normal. \n",
|
||||||
|
"vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "a359ed74",
|
"id": "4dde7a0d",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": []
|
"source": []
|
||||||
|
Loading…
Reference in New Issue
Block a user