mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-16 09:48:04 +00:00
[docs]: standardize vectorstores (#24797)
This commit is contained in:
parent
ac649800df
commit
511242280b
@ -17,20 +17,7 @@
|
||||
"source": [
|
||||
"# __ModuleName__VectorStore\n",
|
||||
"\n",
|
||||
"This notebook covers how to get started with the __ModuleName__ vector store.\n",
|
||||
"\n",
|
||||
"## Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d97b55c2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# install package\n",
|
||||
"!pip install -U __package_name__"
|
||||
"This notebook covers how to get started with the __ModuleName__ vector store."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -38,14 +25,80 @@
|
||||
"id": "36fdc060",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Environment Setup\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"Make sure to set the following environment variables:\n",
|
||||
"- TODO: Update with relevant info.\n",
|
||||
"- TODO: Update minimum version to be correct.\n",
|
||||
"\n",
|
||||
"- TODO: fill out relevant environment variables or secrets\n",
|
||||
"- Op\n",
|
||||
"To access __ModuleName__ vector stores you'll need to create a/an __ModuleName__ account, get an API key, and install the `__package_name__` integration package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "64e28aa6",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "raw"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"%pip install -qU \"__package_name__>=MINIMUM_VERSION\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9695dee7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"## Usage"
|
||||
"- TODO: Update with relevant info.\n",
|
||||
"\n",
|
||||
"Head to (TODO: link) to sign up to __ModuleName__ and generate an API key. Once you've done this set the __MODULE_NAME___API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "894c30e4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"__MODULE_NAME___API_KEY\"):\n",
|
||||
" import getpass\n",
|
||||
" os.environ[\"__MODULE_NAME___API_KEY\"] = getpass.getpass(\"Enter your __ModuleName__ API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "93df377e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"- TODO: Fill out with relevant init params"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -59,7 +112,224 @@
|
||||
"source": [
|
||||
"from __module_name__.vectorstores import __ModuleName__VectorStore\n",
|
||||
"\n",
|
||||
"# TODO: switch for preferred way to init and use your vector store\n"
|
||||
"vector_store = __ModuleName__VectorStore()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ac6071d4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Manage vector store\n",
|
||||
"\n",
|
||||
"### Add items to vector store\n",
|
||||
"\n",
|
||||
"- TODO: Edit and then run code cell to generate output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "17f5efc0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"document_1 = Document(\n",
|
||||
" page_content=\"foo\",\n",
|
||||
" metadata={\"source\": \"https://example.com\"}\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_2 = Document(\n",
|
||||
" page_content=\"bar\",\n",
|
||||
" metadata={\"source\": \"https://example.com\"}\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_2 = Document(\n",
|
||||
" page_content=\"baz\",\n",
|
||||
" metadata={\"source\": \"https://example.com\"}\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"documents = [document_1, document_2]\n",
|
||||
"\n",
|
||||
"vector_store.add_documents(documents=documents,ids=[\"1\",\"2\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c738c3e0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Update items in vector store\n",
|
||||
"\n",
|
||||
"- TODO: Edit and then run code cell to generate output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f0aa8b71",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"updated_document = Document(\n",
|
||||
" page_content=\"qux\",\n",
|
||||
" metadata={\"source\": \"https://another-example.com\"}\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"vector_store.update_documents(document_id=\"1\",document=updated_document)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "dcf1b905",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete items from vector store\n",
|
||||
"\n",
|
||||
"- TODO: Edit and then run code cell to generate output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ef61e188",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_store.delete(ids=[\"3\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c3620501",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Query vector store\n",
|
||||
"\n",
|
||||
"Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n",
|
||||
"\n",
|
||||
"### Query directly\n",
|
||||
"\n",
|
||||
"Performing a simple similarity search can be done as follows:\n",
|
||||
"\n",
|
||||
"- TODO: Edit and then run code cell to generate output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "aa0a16fa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search(query=\"thud\",k=1,filter={\"source\":\"https://example.com\"})\n",
|
||||
"for doc in results:\n",
|
||||
" print(f\"* {doc.page_content} [{doc.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3ed9d733",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to execute a similarity search and receive the corresponding scores you can run:\n",
|
||||
"\n",
|
||||
"- TODO: Edit and then run code cell to generate output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5efd2eaa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search_with_score(query=\"thud\",k=1,filter={\"source\":\"https://example.com\"})\n",
|
||||
"for doc, score in results:\n",
|
||||
" print(f\"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0c235cdc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Query by turning into retriever\n",
|
||||
"\n",
|
||||
"You can also transform the vector store into a retriever for easier usage in your chains. \n",
|
||||
"\n",
|
||||
"- TODO: Edit and then run code cell to generate output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f3460093",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = vector_store.as_retriever()\n",
|
||||
"retriever.invoke(\"thud\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "901c75dc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Using retriever in a simple RAG chain:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "619b5ef6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"from langchain import hub\n",
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\")\n",
|
||||
"\n",
|
||||
"prompt = hub.pull(\"rlm/rag-prompt\")\n",
|
||||
"\n",
|
||||
"def format_docs(docs):\n",
|
||||
" return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
|
||||
"\n",
|
||||
"rag_chain = (\n",
|
||||
" {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
|
||||
" | prompt\n",
|
||||
" | llm\n",
|
||||
" | StrOutputParser()\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"rag_chain.invoke(\"thud\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "069f1b5f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## TODO: Any functionality specific to this vector store\n",
|
||||
"\n",
|
||||
"E.g. creating a persisten database to save to your disk, etc."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8a27244f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all __ModuleName__VectorStore features and configurations head to the API reference: https://api.python.langchain.com/en/latest/vectorstores/__module_name__.vectorstores.__ModuleName__VectorStore.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -26,15 +26,109 @@ VST = TypeVar("VST", bound=VectorStore)
|
||||
|
||||
|
||||
class __ModuleName__VectorStore(VectorStore):
|
||||
"""__ModuleName__ vector store.
|
||||
# TODO: Replace all TODOs in docstring.
|
||||
"""__ModuleName__ vector store integration.
|
||||
|
||||
Example:
|
||||
# TODO: Replace with relevant packages, env vars.
|
||||
Setup:
|
||||
Install ``__package_name__`` and set environment variable ``__MODULE_NAME___API_KEY``.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U __package_name__
|
||||
export __MODULE_NAME___API_KEY="your-api-key"
|
||||
|
||||
# TODO: Populate with relevant params.
|
||||
Key init args — indexing params:
|
||||
collection_name: str
|
||||
Name of the collection.
|
||||
embedding_function: Embeddings
|
||||
Embedding function to use.
|
||||
|
||||
# TODO: Populate with relevant params.
|
||||
Key init args — client params:
|
||||
client: Optional[Client]
|
||||
Client to use.
|
||||
connection_args: Optional[dict]
|
||||
Connection arguments.
|
||||
|
||||
# TODO: Replace with relevant init params.
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
from __module_name__.vectorstores import __ModuleName__VectorStore
|
||||
|
||||
vectorstore = __ModuleName__VectorStore()
|
||||
"""
|
||||
vector_store = __ModuleName__VectorStore(
|
||||
collection_name="foo",
|
||||
embedding_function=OpenAIEmbeddings(),
|
||||
connection_args={"uri": "./foo.db"},
|
||||
# other params...
|
||||
)
|
||||
|
||||
# TODO: Populate with relevant variables.
|
||||
Add Documents:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
document = Document(page_content="foo", metadata={"baz": "bar"})
|
||||
vector_store.add_documents([document],ids=["1"])
|
||||
|
||||
# TODO: Populate with relevant variables.
|
||||
Update Documents:
|
||||
.. code-block:: python
|
||||
|
||||
updated_document = Document(
|
||||
page_content="qux",
|
||||
metadata={"bar": "baz"}
|
||||
)
|
||||
|
||||
vector_store.update_documents(document_id="1",document=updated_document)
|
||||
|
||||
Delete Documents:
|
||||
.. code-block:: python
|
||||
|
||||
vector_store.delete(ids=["1"])
|
||||
|
||||
# TODO: Fill out with relevant variables and example output.
|
||||
Search:
|
||||
.. code-block:: python
|
||||
|
||||
results = vector_store.similarity_search(query="thud",k=1)
|
||||
for doc in results:
|
||||
print(f"* {doc.page_content} [{doc.metadata}]")
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
# TODO: Example output
|
||||
|
||||
# TODO: Fill out with relevant variables and example output.
|
||||
Search with score:
|
||||
.. code-block:: python
|
||||
|
||||
results = vector_store.similarity_search_with_score(query="thud",k=1)
|
||||
for doc, score in results:
|
||||
print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
# TODO: Example output
|
||||
|
||||
# TODO: Fill out with relevant variables and example output.
|
||||
Use as Retriever:
|
||||
.. code-block:: python
|
||||
|
||||
retriever = vector_store.as_retriever(
|
||||
search_type="mmr",
|
||||
search_kwargs={"k": 1, "fetch_k": 10, "lambda_mult": 0.5},
|
||||
)
|
||||
retriever.invoke("thud")
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
# TODO: Example output
|
||||
|
||||
""" # noqa: E501
|
||||
|
||||
def add_texts(
|
||||
self,
|
||||
|
@ -155,8 +155,8 @@ def create_doc(
|
||||
str,
|
||||
typer.Option(
|
||||
help=(
|
||||
"The type of component. Currently only 'ChatModel', 'DocumentLoader' "
|
||||
"supported."
|
||||
"The type of component. Currently only 'ChatModel', "
|
||||
"'DocumentLoader', 'VectorStore' supported."
|
||||
),
|
||||
),
|
||||
] = "ChatModel",
|
||||
@ -209,6 +209,10 @@ def create_doc(
|
||||
Path(__file__).parents[1]
|
||||
/ "integration_template/docs/document_loaders.ipynb"
|
||||
)
|
||||
elif component_type == "VectorStore":
|
||||
docs_template = (
|
||||
Path(__file__).parents[1] / "integration_template/docs/vectorstores.ipynb"
|
||||
)
|
||||
shutil.copy(docs_template, destination_path)
|
||||
|
||||
# replacements in file
|
||||
|
Loading…
Reference in New Issue
Block a user