mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-12 00:11:17 +00:00
LangChain-MongoDB: [Experimental] Driver-side index creation helper (#19359)
## Description Created a helper method to make vector search indexes via client-side pymongo. **Recent Update** -- Removed error suppressing/overwriting layer in favor of letting the original exception provide information. ## ToDo's - [x] Make _wait_untils for integration test delete index functionalities. - [x] Add documentation for its use. Highlight it's experimental - [x] Post Integration Test Results in a screenshot - [x] Get review from MongoDB internal team (@shaneharvey, @blink1073 , @NoahStapp , @caseyclements) - [x] **Add tests and docs**: If you're adding a new integration, please include 1. Added new integration tests. Not eligible for unit testing since the operation is Atlas Cloud specific. 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory.  - [x] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. See contribution guidelines for more: https://python.langchain.com/docs/contributing/
This commit is contained in:
105
libs/partners/mongodb/langchain_mongodb/index.py
Normal file
105
libs/partners/mongodb/langchain_mongodb/index.py
Normal file
@@ -0,0 +1,105 @@
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from pymongo.collection import Collection
|
||||
from pymongo.operations import SearchIndexModel
|
||||
|
||||
logger = logging.getLogger(__file__)
|
||||
|
||||
|
||||
def _vector_search_index_definition(
|
||||
dimensions: int,
|
||||
path: str,
|
||||
similarity: str,
|
||||
filters: Optional[List[Dict[str, str]]],
|
||||
) -> Dict[str, Any]:
|
||||
return {
|
||||
"fields": [
|
||||
{
|
||||
"numDimensions": dimensions,
|
||||
"path": path,
|
||||
"similarity": similarity,
|
||||
"type": "vector",
|
||||
},
|
||||
*(filters or []),
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def create_vector_search_index(
|
||||
collection: Collection,
|
||||
index_name: str,
|
||||
dimensions: int,
|
||||
path: str,
|
||||
similarity: str,
|
||||
filters: List[Dict[str, str]],
|
||||
) -> None:
|
||||
"""Experimental Utility function to create a vector search index
|
||||
|
||||
Args:
|
||||
collection (Collection): MongoDB Collection
|
||||
index_name (str): Name of Index
|
||||
dimensions (int): Number of dimensions in embedding
|
||||
path (str): field with vector embedding
|
||||
similarity (str): The similarity score used for the index
|
||||
filters (List[Dict[str, str]]): additional filters for index definition.
|
||||
"""
|
||||
logger.info("Creating Search Index %s on %s", index_name, collection.name)
|
||||
result = collection.create_search_index(
|
||||
SearchIndexModel(
|
||||
definition=_vector_search_index_definition(
|
||||
dimensions=dimensions, path=path, similarity=similarity, filters=filters
|
||||
),
|
||||
name=index_name,
|
||||
type="vectorSearch",
|
||||
)
|
||||
)
|
||||
logger.info(result)
|
||||
|
||||
|
||||
def drop_vector_search_index(collection: Collection, index_name: str) -> None:
|
||||
"""Drop a created vector search index
|
||||
|
||||
Args:
|
||||
collection (Collection): MongoDB Collection with index to be dropped
|
||||
index_name (str): Name of the MongoDB index
|
||||
"""
|
||||
logger.info(
|
||||
"Dropping Search Index %s from Collection: %s", index_name, collection.name
|
||||
)
|
||||
collection.drop_search_index(index_name)
|
||||
logger.info("Vector Search index %s.%s dropped", collection.name, index_name)
|
||||
|
||||
|
||||
def update_vector_search_index(
|
||||
collection: Collection,
|
||||
index_name: str,
|
||||
dimensions: int,
|
||||
path: str,
|
||||
similarity: str,
|
||||
filters: List[Dict[str, str]],
|
||||
) -> None:
|
||||
"""Leverages the updateSearchIndex call
|
||||
|
||||
Args:
|
||||
collection (Collection): MongoDB Collection
|
||||
index_name (str): Name of Index
|
||||
dimensions (int): Number of dimensions in embedding.
|
||||
path (str): field with vector embedding.
|
||||
similarity (str): The similarity score used for the index.
|
||||
filters (List[Dict[str, str]]): additional filters for index definition.
|
||||
"""
|
||||
|
||||
logger.info(
|
||||
"Updating Search Index %s from Collection: %s", index_name, collection.name
|
||||
)
|
||||
collection.update_search_index(
|
||||
name=index_name,
|
||||
definition=_vector_search_index_definition(
|
||||
dimensions=dimensions,
|
||||
path=path,
|
||||
similarity=similarity,
|
||||
filters=filters,
|
||||
),
|
||||
)
|
||||
logger.info("Update succeeded")
|
@@ -18,6 +18,11 @@ logger = logging.getLogger(__name__)
|
||||
Matrix = Union[List[List[float]], List[np.ndarray], np.ndarray]
|
||||
|
||||
|
||||
class FailCode:
|
||||
INDEX_NOT_FOUND = 27
|
||||
INDEX_ALREADY_EXISTS = 68
|
||||
|
||||
|
||||
def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
|
||||
"""Row-wise cosine similarity between two equal-width matrices."""
|
||||
if len(X) == 0 or len(Y) == 0:
|
||||
|
@@ -24,7 +24,12 @@ from langchain_core.vectorstores import VectorStore
|
||||
from pymongo import MongoClient
|
||||
from pymongo.collection import Collection
|
||||
from pymongo.driver_info import DriverInfo
|
||||
from pymongo.errors import CollectionInvalid
|
||||
|
||||
from langchain_mongodb.index import (
|
||||
create_vector_search_index,
|
||||
update_vector_search_index,
|
||||
)
|
||||
from langchain_mongodb.utils import maximal_marginal_relevance
|
||||
|
||||
MongoDBDocumentType = TypeVar("MongoDBDocumentType", bound=Dict[str, Any])
|
||||
@@ -489,3 +494,42 @@ class MongoDBAtlasVectorSearch(VectorStore):
|
||||
lambda_mult=lambda_mult,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def create_vector_search_index(
|
||||
self,
|
||||
dimensions: int,
|
||||
filters: Optional[List[Dict[str, str]]] = None,
|
||||
update: bool = False,
|
||||
) -> None:
|
||||
"""Creates a MongoDB Atlas vectorSearch index for the VectorStore
|
||||
|
||||
Note**: This method may fail as it requires a MongoDB Atlas with
|
||||
these pre-requisites:
|
||||
- M10 cluster or higher
|
||||
- https://www.mongodb.com/docs/atlas/atlas-vector-search/create-index/#prerequisites
|
||||
|
||||
Args:
|
||||
dimensions (int): Number of dimensions in embedding
|
||||
filters (Optional[List[Dict[str, str]]], optional): additional filters
|
||||
for index definition.
|
||||
Defaults to None.
|
||||
update (bool, optional): Updates existing vectorSearch index.
|
||||
Defaults to False.
|
||||
"""
|
||||
try:
|
||||
self._collection.database.create_collection(self._collection.name)
|
||||
except CollectionInvalid:
|
||||
pass
|
||||
|
||||
index_operation = (
|
||||
update_vector_search_index if update else create_vector_search_index
|
||||
)
|
||||
|
||||
index_operation(
|
||||
collection=self._collection,
|
||||
index_name=self._index_name,
|
||||
dimensions=dimensions,
|
||||
path=self._embedding_key,
|
||||
similarity=self._relevance_score_fn,
|
||||
filters=filters or [],
|
||||
)
|
||||
|
Reference in New Issue
Block a user