Docs: Using SupabaseVectorStore with existing documents (#10907)

## Description
Adds additional docs on how to use `SupabaseVectorStore` with existing
data in your DB (vs inserting new documents each time).
This commit is contained in:
Greg Richardson 2023-09-22 09:18:56 -06:00 committed by GitHub
parent 9d4b710a48
commit 4eee789dd3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -92,7 +92,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 15,
"id": "19846a7b-99bc-47a7-8e1c-f13c2497f1ae", "id": "19846a7b-99bc-47a7-8e1c-f13c2497f1ae",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -105,7 +105,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 16,
"id": "c71c3901-d44b-4d09-92c5-3018628c28fa", "id": "c71c3901-d44b-4d09-92c5-3018628c28fa",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -115,7 +115,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 17,
"id": "8b91ecfa-f61b-489a-a337-dff1f12f6ab2", "id": "8b91ecfa-f61b-489a-a337-dff1f12f6ab2",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -138,51 +138,66 @@
"load_dotenv()" "load_dotenv()"
] ]
}, },
{
"cell_type": "markdown",
"id": "924d4df5",
"metadata": {},
"source": [
"First we'll create a Supabase client and instantiate a OpenAI embeddings class."
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 19,
"id": "5ce44f7c", "id": "5ce44f7c",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import os\n", "import os\n",
"from supabase.client import Client, create_client\n", "from supabase.client import Client, create_client\n",
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.vectorstores import SupabaseVectorStore\n",
"\n", "\n",
"supabase_url = os.environ.get(\"SUPABASE_URL\")\n", "supabase_url = os.environ.get(\"SUPABASE_URL\")\n",
"supabase_key = os.environ.get(\"SUPABASE_SERVICE_KEY\")\n", "supabase_key = os.environ.get(\"SUPABASE_SERVICE_KEY\")\n",
"supabase: Client = create_client(supabase_url, supabase_key)" "supabase: Client = create_client(supabase_url, supabase_key)\n",
"\n",
"embeddings = OpenAIEmbeddings()"
]
},
{
"cell_type": "markdown",
"id": "0c707d4c",
"metadata": {},
"source": [
"Next we'll load and parse some data for our vector store (skip if you already have documents with embeddings stored in your DB)."
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 20,
"id": "aac9563e", "id": "aac9563e",
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.embeddings.openai import OpenAIEmbeddings\n", "\n",
"from langchain.text_splitter import CharacterTextSplitter\n", "from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain.vectorstores import SupabaseVectorStore\n",
"from langchain.document_loaders import TextLoader"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "a3c3999a",
"metadata": {},
"outputs": [],
"source": [
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"\n", "\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)"
"\n", ]
"embeddings = OpenAIEmbeddings()" },
{
"cell_type": "markdown",
"id": "5abb9b93",
"metadata": {},
"source": [
"Insert the above documents into the database. Embeddings will automatically be generated for each document."
] ]
}, },
{ {
@ -192,13 +207,39 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# We're using the default `documents` table here. You can modify this by passing in a `table_name` argument to the `from_documents` method.\n", "\n",
"vector_store = SupabaseVectorStore.from_documents(docs, embeddings, client=supabase)" "vector_store = SupabaseVectorStore.from_documents(docs, embeddings, client=supabase, table_name=\"documents\", query_name=\"match_documents\")"
]
},
{
"cell_type": "markdown",
"id": "e169345d",
"metadata": {},
"source": [
"Alternatively if you already have documents with embeddings in your database, simply instantiate a new `SupabaseVectorStore` directly:"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 10,
"id": "397e3e7d",
"metadata": {},
"outputs": [],
"source": [
"vector_store = SupabaseVectorStore(embedding=embeddings, client=supabase, table_name=\"documents\", query_name=\"match_documents\")"
]
},
{
"cell_type": "markdown",
"id": "e28ce092",
"metadata": {},
"source": [
"Finally, test it out by performing a similarity search:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5eabdb75", "id": "5eabdb75",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -209,7 +250,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": null,
"id": "4b172de8", "id": "4b172de8",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -431,7 +472,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.10.12" "version": "3.11.5"
} }
}, },
"nbformat": 4, "nbformat": 4,