mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-15 22:44:36 +00:00
[docs]: vector store integration pages (#24858)
Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
@@ -45,25 +45,142 @@ DEFAULT_INSERT_BATCH_SIZE = 100_000
|
||||
|
||||
|
||||
class MongoDBAtlasVectorSearch(VectorStore):
|
||||
"""`MongoDB Atlas Vector Search` vector store.
|
||||
"""MongoDBAtlas vector store integration.
|
||||
|
||||
To use, you should have both:
|
||||
- the ``pymongo`` python package installed
|
||||
- a connection string associated with a MongoDB Atlas Cluster having deployed an
|
||||
Atlas Search index
|
||||
Setup:
|
||||
Install ``langchain-mongodb`` and ``pymongo`` and setup a MongoDB Atlas cluster (read through [this guide](https://www.mongodb.com/docs/manual/reference/connection-string/) to do so).
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -qU langchain-mongodb pymongo
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_mongodb import MongoDBAtlasVectorSearch
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from pymongo import MongoClient
|
||||
import getpass
|
||||
|
||||
mongo_client = MongoClient("<YOUR-CONNECTION-STRING>")
|
||||
collection = mongo_client["<db_name>"]["<collection_name>"]
|
||||
embeddings = OpenAIEmbeddings()
|
||||
vectorstore = MongoDBAtlasVectorSearch(collection, embeddings)
|
||||
"""
|
||||
MONGODB_ATLAS_CLUSTER_URI = getpass.getpass("MongoDB Atlas Cluster URI:")
|
||||
|
||||
Key init args — indexing params:
|
||||
embedding: Embeddings
|
||||
Embedding function to use.
|
||||
|
||||
Key init args — client params:
|
||||
collection: Collection
|
||||
MongoDB collection to use.
|
||||
index_name: str
|
||||
Name of the Atlas Search index.
|
||||
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
from pymongo import MongoClient
|
||||
from langchain_mongodb.vectorstores import MongoDBAtlasVectorSearch
|
||||
from pymongo import MongoClient
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
|
||||
# initialize MongoDB python client
|
||||
client = MongoClient(MONGODB_ATLAS_CLUSTER_URI)
|
||||
|
||||
DB_NAME = "langchain_test_db"
|
||||
COLLECTION_NAME = "langchain_test_vectorstores"
|
||||
ATLAS_VECTOR_SEARCH_INDEX_NAME = "langchain-test-index-vectorstores"
|
||||
|
||||
MONGODB_COLLECTION = client[DB_NAME][COLLECTION_NAME]
|
||||
|
||||
vector_store = MongoDBAtlasVectorSearch(
|
||||
collection=MONGODB_COLLECTION,
|
||||
embedding=OpenAIEmbeddings(),
|
||||
index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME,
|
||||
relevance_score_fn="cosine",
|
||||
)
|
||||
|
||||
Add Documents:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
document_1 = Document(page_content="foo", metadata={"baz": "bar"})
|
||||
document_2 = Document(page_content="thud", metadata={"bar": "baz"})
|
||||
document_3 = Document(page_content="i will be deleted :(")
|
||||
|
||||
documents = [document_1, document_2, document_3]
|
||||
ids = ["1", "2", "3"]
|
||||
vector_store.add_documents(documents=documents, ids=ids)
|
||||
|
||||
Delete Documents:
|
||||
.. code-block:: python
|
||||
|
||||
vector_store.delete(ids=["3"])
|
||||
|
||||
Search:
|
||||
.. code-block:: python
|
||||
|
||||
results = vector_store.similarity_search(query="thud",k=1)
|
||||
for doc in results:
|
||||
print(f"* {doc.page_content} [{doc.metadata}]")
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
* thud [{'_id': '2', 'baz': 'baz'}]
|
||||
|
||||
|
||||
Search with filter:
|
||||
.. code-block:: python
|
||||
|
||||
results = vector_store.similarity_search(query="thud",k=1,filter={"bar": "baz"})
|
||||
for doc in results:
|
||||
print(f"* {doc.page_content} [{doc.metadata}]")
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
* thud [{'_id': '2', 'baz': 'baz'}]
|
||||
|
||||
Search with score:
|
||||
.. code-block:: python
|
||||
|
||||
results = vector_store.similarity_search_with_score(query="qux",k=1)
|
||||
for doc, score in results:
|
||||
print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
* [SIM=0.916096] foo [{'_id': '1', 'baz': 'bar'}]
|
||||
|
||||
Async:
|
||||
.. code-block:: python
|
||||
|
||||
# add documents
|
||||
# await vector_store.aadd_documents(documents=documents, ids=ids)
|
||||
|
||||
# delete documents
|
||||
# await vector_store.adelete(ids=["3"])
|
||||
|
||||
# search
|
||||
# results = vector_store.asimilarity_search(query="thud",k=1)
|
||||
|
||||
# search with score
|
||||
results = await vector_store.asimilarity_search_with_score(query="qux",k=1)
|
||||
for doc,score in results:
|
||||
print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
* [SIM=0.916096] foo [{'_id': '1', 'baz': 'bar'}]
|
||||
|
||||
Use as Retriever:
|
||||
.. code-block:: python
|
||||
|
||||
retriever = vector_store.as_retriever(
|
||||
search_type="mmr",
|
||||
search_kwargs={"k": 1, "fetch_k": 2, "lambda_mult": 0.5},
|
||||
)
|
||||
retriever.invoke("thud")
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
[Document(metadata={'_id': '2', 'embedding': [-0.01850726455450058, -0.0014740974875167012, -0.009762819856405258, ...], 'baz': 'baz'}, page_content='thud')]
|
||||
|
||||
""" # noqa: E501
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
Reference in New Issue
Block a user