mongodb: Add Hybrid and Full-Text Search Retrievers, release 0.2.0 (#25057)

## Description

This pull-request extends the existing vector search strategies of
MongoDBAtlasVectorSearch to include Hybrid (Reciprocal Rank Fusion) and
Full-text via new Retrievers.

There is a small breaking change in the form of the `prefilter` kwarg to
search. For this, and because we have now added a great deal of
features, including programmatic Index creation/deletion since 0.1.0, we
plan to bump the version to 0.2.0.

### Checklist
* Unit tests have been extended
* formatting has been applied
* One mypy error remains which will either go away in CI or be
simplified.

---------

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
Casey Clements 2024-08-07 16:10:29 -04:00 committed by GitHub
parent f337408b0f
commit 6e9a8b188f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
22 changed files with 1749 additions and 508 deletions

View File

@ -18,7 +18,7 @@ MONGODB_ATLAS_CLUSTER_URI = os.environ.get("MONGODB_ATLAS_CLUSTER_URI")
DB_NAME = "langchain_db" DB_NAME = "langchain_db"
COLLECTION_NAME = "test" COLLECTION_NAME = "test"
ATLAS_VECTOR_SEARCH_INDEX_NAME = "index_name" ATLAS_VECTOR_SEARCH_INDEX_NAME = "index_name"
MONGODB_COLLECTION = client[DB_NAME][COLLECITON_NAME] MONGODB_COLLECTION = client[DB_NAME][COLLECTION_NAME]
# Create the vector search via `from_connection_string` # Create the vector search via `from_connection_string`
vector_search = MongoDBAtlasVectorSearch.from_connection_string( vector_search = MongoDBAtlasVectorSearch.from_connection_string(

View File

@ -1,8 +1,16 @@
"""
Integrate your operational database and vector search in a single, unified,
fully managed platform with full vector database capabilities on MongoDB Atlas.
Store your operational data, metadata, and vector embeddings in oue VectorStore,
MongoDBAtlasVectorSearch.
Insert into a Chain via a Vector, FullText, or Hybrid Retriever.
"""
from langchain_mongodb.cache import MongoDBAtlasSemanticCache, MongoDBCache from langchain_mongodb.cache import MongoDBAtlasSemanticCache, MongoDBCache
from langchain_mongodb.chat_message_histories import MongoDBChatMessageHistory from langchain_mongodb.chat_message_histories import MongoDBChatMessageHistory
from langchain_mongodb.vectorstores import ( from langchain_mongodb.vectorstores import MongoDBAtlasVectorSearch
MongoDBAtlasVectorSearch,
)
__all__ = [ __all__ = [
"MongoDBAtlasVectorSearch", "MongoDBAtlasVectorSearch",

View File

@ -1,10 +1,4 @@
""" """LangChain MongoDB Caches."""
LangChain MongoDB Caches
Functions "_loads_generations" and "_dumps_generations"
are duplicated in this utility from modules:
- "libs/community/langchain_community/cache.py"
"""
import json import json
import logging import logging
@ -27,100 +21,6 @@ from langchain_mongodb.vectorstores import MongoDBAtlasVectorSearch
logger = logging.getLogger(__file__) logger = logging.getLogger(__file__)
def _generate_mongo_client(connection_string: str) -> MongoClient:
return MongoClient(
connection_string,
driver=DriverInfo(name="Langchain", version=version("langchain-mongodb")),
)
def _dumps_generations(generations: RETURN_VAL_TYPE) -> str:
"""
Serialization for generic RETURN_VAL_TYPE, i.e. sequence of `Generation`
Args:
generations (RETURN_VAL_TYPE): A list of language model generations.
Returns:
str: a single string representing a list of generations.
This function (+ its counterpart `_loads_generations`) rely on
the dumps/loads pair with Reviver, so are able to deal
with all subclasses of Generation.
Each item in the list can be `dumps`ed to a string,
then we make the whole list of strings into a json-dumped.
"""
return json.dumps([dumps(_item) for _item in generations])
def _loads_generations(generations_str: str) -> Union[RETURN_VAL_TYPE, None]:
"""
Deserialization of a string into a generic RETURN_VAL_TYPE
(i.e. a sequence of `Generation`).
See `_dumps_generations`, the inverse of this function.
Args:
generations_str (str): A string representing a list of generations.
Compatible with the legacy cache-blob format
Does not raise exceptions for malformed entries, just logs a warning
and returns none: the caller should be prepared for such a cache miss.
Returns:
RETURN_VAL_TYPE: A list of generations.
"""
try:
generations = [loads(_item_str) for _item_str in json.loads(generations_str)]
return generations
except (json.JSONDecodeError, TypeError):
# deferring the (soft) handling to after the legacy-format attempt
pass
try:
gen_dicts = json.loads(generations_str)
# not relying on `_load_generations_from_json` (which could disappear):
generations = [Generation(**generation_dict) for generation_dict in gen_dicts]
logger.warning(
f"Legacy 'Generation' cached blob encountered: '{generations_str}'"
)
return generations
except (json.JSONDecodeError, TypeError):
logger.warning(
f"Malformed/unparsable cached blob encountered: '{generations_str}'"
)
return None
def _wait_until(
predicate: Callable, success_description: Any, timeout: float = 10.0
) -> None:
"""Wait up to 10 seconds (by default) for predicate to be true.
E.g.:
wait_until(lambda: client.primary == ('a', 1),
'connect to the primary')
If the lambda-expression isn't true after 10 seconds, we raise
AssertionError("Didn't ever connect to the primary").
Returns the predicate's first true value.
"""
start = time.time()
interval = min(float(timeout) / 100, 0.1)
while True:
retval = predicate()
if retval:
return retval
if time.time() - start > timeout:
raise TimeoutError("Didn't ever %s" % success_description)
time.sleep(interval)
class MongoDBCache(BaseCache): class MongoDBCache(BaseCache):
"""MongoDB Atlas cache """MongoDB Atlas cache
@ -216,7 +116,7 @@ class MongoDBAtlasSemanticCache(BaseCache, MongoDBAtlasVectorSearch):
collection_name: str = "default", collection_name: str = "default",
database_name: str = "default", database_name: str = "default",
index_name: str = "default", index_name: str = "default",
wait_until_ready: bool = False, wait_until_ready: Optional[float] = None,
score_threshold: Optional[float] = None, score_threshold: Optional[float] = None,
**kwargs: Dict[str, Any], **kwargs: Dict[str, Any],
): ):
@ -233,8 +133,8 @@ class MongoDBAtlasSemanticCache(BaseCache, MongoDBAtlasVectorSearch):
Defaults to "default". Defaults to "default".
index_name: Name of the Atlas Search index. index_name: Name of the Atlas Search index.
defaults to 'default' defaults to 'default'
wait_until_ready (bool): Block until MongoDB Atlas finishes indexing wait_until_ready (float): Wait this time for Atlas to finish indexing
the stored text. Hard timeout of 10 seconds. Defaults to False. the stored text. Defaults to None.
""" """
client = _generate_mongo_client(connection_string) client = _generate_mongo_client(connection_string)
self.collection = client[database_name][collection_name] self.collection = client[database_name][collection_name]
@ -272,7 +172,7 @@ class MongoDBAtlasSemanticCache(BaseCache, MongoDBAtlasVectorSearch):
prompt: str, prompt: str,
llm_string: str, llm_string: str,
return_val: RETURN_VAL_TYPE, return_val: RETURN_VAL_TYPE,
wait_until_ready: Optional[bool] = None, wait_until_ready: Optional[float] = None,
) -> None: ) -> None:
"""Update cache based on prompt and llm_string.""" """Update cache based on prompt and llm_string."""
self.add_texts( self.add_texts(
@ -290,7 +190,7 @@ class MongoDBAtlasSemanticCache(BaseCache, MongoDBAtlasVectorSearch):
return self.lookup(prompt, llm_string) == return_val return self.lookup(prompt, llm_string) == return_val
if wait: if wait:
_wait_until(is_indexed, return_val) _wait_until(is_indexed, return_val, timeout=wait)
def clear(self, **kwargs: Any) -> None: def clear(self, **kwargs: Any) -> None:
"""Clear cache that can take additional keyword arguments. """Clear cache that can take additional keyword arguments.
@ -302,3 +202,107 @@ class MongoDBAtlasSemanticCache(BaseCache, MongoDBAtlasVectorSearch):
self.clear(llm_string="fake-model") self.clear(llm_string="fake-model")
""" """
self.collection.delete_many({**kwargs}) self.collection.delete_many({**kwargs})
def _generate_mongo_client(connection_string: str) -> MongoClient:
return MongoClient(
connection_string,
driver=DriverInfo(name="Langchain", version=version("langchain-mongodb")),
)
def _dumps_generations(generations: RETURN_VAL_TYPE) -> str:
"""
Serialization for generic RETURN_VAL_TYPE, i.e. sequence of `Generation`
Args:
generations (RETURN_VAL_TYPE): A list of language model generations.
Returns:
str: a single string representing a list of generations.
This, and "_dumps_generations" are duplicated in this utility
from modules: "libs/community/langchain_community/cache.py"
This function and its counterpart rely on
the dumps/loads pair with Reviver, so are able to deal
with all subclasses of Generation.
Each item in the list can be `dumps`ed to a string,
then we make the whole list of strings into a json-dumped.
"""
return json.dumps([dumps(_item) for _item in generations])
def _loads_generations(generations_str: str) -> Union[RETURN_VAL_TYPE, None]:
"""
Deserialization of a string into a generic RETURN_VAL_TYPE
(i.e. a sequence of `Generation`).
Args:
generations_str (str): A string representing a list of generations.
Returns:
RETURN_VAL_TYPE: A list of generations.
This function and its counterpart rely on
the dumps/loads pair with Reviver, so are able to deal
with all subclasses of Generation.
See `_dumps_generations`, the inverse of this function.
Compatible with the legacy cache-blob format
Does not raise exceptions for malformed entries, just logs a warning
and returns none: the caller should be prepared for such a cache miss.
"""
try:
generations = [loads(_item_str) for _item_str in json.loads(generations_str)]
return generations
except (json.JSONDecodeError, TypeError):
# deferring the (soft) handling to after the legacy-format attempt
pass
try:
gen_dicts = json.loads(generations_str)
# not relying on `_load_generations_from_json` (which could disappear):
generations = [Generation(**generation_dict) for generation_dict in gen_dicts]
logger.warning(
f"Legacy 'Generation' cached blob encountered: '{generations_str}'"
)
return generations
except (json.JSONDecodeError, TypeError):
logger.warning(
f"Malformed/unparsable cached blob encountered: '{generations_str}'"
)
return None
def _wait_until(
predicate: Callable, success_description: Any, timeout: float = 10.0
) -> None:
"""Wait up to 10 seconds (by default) for predicate to be true.
E.g.:
wait_until(lambda: client.primary == ('a', 1),
'connect to the primary')
If the lambda-expression isn't true after 10 seconds, we raise
AssertionError("Didn't ever connect to the primary").
Returns the predicate's first true value.
"""
start = time.time()
interval = min(float(timeout) / 100, 0.1)
while True:
retval = predicate()
if retval:
return retval
if time.time() - start > timeout:
raise TimeoutError("Didn't ever %s" % success_description)
time.sleep(interval)

View File

@ -1,3 +1,5 @@
"""Search Index Commands"""
import logging import logging
from time import monotonic, sleep from time import monotonic, sleep
from typing import Any, Callable, Dict, List, Optional from typing import Any, Callable, Dict, List, Optional
@ -8,8 +10,6 @@ from pymongo.operations import SearchIndexModel
logger = logging.getLogger(__file__) logger = logging.getLogger(__file__)
_DELAY = 0.5 # Interval between checks for index operations
def _search_index_error_message() -> str: def _search_index_error_message() -> str:
return ( return (
@ -25,19 +25,24 @@ def _vector_search_index_definition(
dimensions: int, dimensions: int,
path: str, path: str,
similarity: str, similarity: str,
filters: Optional[List[Dict[str, str]]], filters: Optional[List[str]] = None,
**kwargs: Any,
) -> Dict[str, Any]: ) -> Dict[str, Any]:
return { # https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-type/
"fields": [ fields = [
{ {
"numDimensions": dimensions, "numDimensions": dimensions,
"path": path, "path": path,
"similarity": similarity, "similarity": similarity,
"type": "vector", "type": "vector",
}, },
*(filters or []),
] ]
} if filters:
for field in filters:
fields.append({"type": "filter", "path": field})
definition = {"fields": fields}
definition.update(kwargs)
return definition
def create_vector_search_index( def create_vector_search_index(
@ -46,9 +51,10 @@ def create_vector_search_index(
dimensions: int, dimensions: int,
path: str, path: str,
similarity: str, similarity: str,
filters: Optional[List[Dict[str, str]]] = None, filters: Optional[List[str]] = None,
*, *,
wait_until_complete: Optional[float] = None, wait_until_complete: Optional[float] = None,
**kwargs: Any,
) -> None: ) -> None:
"""Experimental Utility function to create a vector search index """Experimental Utility function to create a vector search index
@ -58,9 +64,10 @@ def create_vector_search_index(
dimensions (int): Number of dimensions in embedding dimensions (int): Number of dimensions in embedding
path (str): field with vector embedding path (str): field with vector embedding
similarity (str): The similarity score used for the index similarity (str): The similarity score used for the index
filters (List[Dict[str, str]]): additional filters for index definition. filters (List[str]): Fields/paths to index to allow filtering in $vectorSearch
wait_until_complete (Optional[float]): If provided, number of seconds to wait wait_until_complete (Optional[float]): If provided, number of seconds to wait
until search index is ready. until search index is ready.
kwargs: Keyword arguments supplying any additional options to SearchIndexModel.
""" """
logger.info("Creating Search Index %s on %s", index_name, collection.name) logger.info("Creating Search Index %s on %s", index_name, collection.name)
@ -72,6 +79,7 @@ def create_vector_search_index(
path=path, path=path,
similarity=similarity, similarity=similarity,
filters=filters, filters=filters,
**kwargs,
), ),
name=index_name, name=index_name,
type="vectorSearch", type="vectorSearch",
@ -83,7 +91,7 @@ def create_vector_search_index(
if wait_until_complete: if wait_until_complete:
_wait_for_predicate( _wait_for_predicate(
predicate=lambda: _is_index_ready(collection, index_name), predicate=lambda: _is_index_ready(collection, index_name),
err=f"Index {index_name} creation did not finish in {wait_until_complete}!", err=f"{index_name=} did not complete in {wait_until_complete}!",
timeout=wait_until_complete, timeout=wait_until_complete,
) )
logger.info(result) logger.info(result)
@ -127,9 +135,10 @@ def update_vector_search_index(
dimensions: int, dimensions: int,
path: str, path: str,
similarity: str, similarity: str,
filters: List[Dict[str, str]], filters: Optional[List[str]] = None,
*, *,
wait_until_complete: Optional[float] = None, wait_until_complete: Optional[float] = None,
**kwargs: Any,
) -> None: ) -> None:
"""Update a search index. """Update a search index.
@ -138,12 +147,13 @@ def update_vector_search_index(
Args: Args:
collection (Collection): MongoDB Collection collection (Collection): MongoDB Collection
index_name (str): Name of Index index_name (str): Name of Index
dimensions (int): Number of dimensions in embedding. dimensions (int): Number of dimensions in embedding
path (str): field with vector embedding. path (str): field with vector embedding
similarity (str): The similarity score used for the index. similarity (str): The similarity score used for the index.
filters (List[Dict[str, str]]): additional filters for index definition. filters (List[str]): Fields/paths to index to allow filtering in $vectorSearch
wait_until_complete (Optional[float]): If provided, number of seconds to wait wait_until_complete (Optional[float]): If provided, number of seconds to wait
until search index is ready. until search index is ready.
kwargs: Keyword arguments supplying any additional options to SearchIndexModel.
""" """
logger.info( logger.info(
@ -157,6 +167,7 @@ def update_vector_search_index(
path=path, path=path,
similarity=similarity, similarity=similarity,
filters=filters, filters=filters,
**kwargs,
), ),
) )
except OperationFailure as e: except OperationFailure as e:
@ -201,7 +212,7 @@ def _wait_for_predicate(
Args: Args:
predicate (Callable[, bool]): A function that returns a boolean value predicate (Callable[, bool]): A function that returns a boolean value
err (str): Error message to raise if nothing occurs err (str): Error message to raise if nothing occurs
timeout (float, optional): wait time for predicate. Defaults to TIMEOUT. timeout (float, optional): Wait time for predicate. Defaults to TIMEOUT.
interval (float, optional): Interval to check predicate. Defaults to DELAY. interval (float, optional): Interval to check predicate. Defaults to DELAY.
Raises: Raises:
@ -212,3 +223,48 @@ def _wait_for_predicate(
if monotonic() - start > timeout: if monotonic() - start > timeout:
raise TimeoutError(err) raise TimeoutError(err)
sleep(interval) sleep(interval)
def create_fulltext_search_index(
collection: Collection,
index_name: str,
field: str,
*,
wait_until_complete: Optional[float] = None,
**kwargs: Any,
) -> None:
"""Experimental Utility function to create an Atlas Search index
Args:
collection (Collection): MongoDB Collection
index_name (str): Name of Index
field (str): Field to index
wait_until_complete (Optional[float]): If provided, number of seconds to wait
until search index is ready
kwargs: Keyword arguments supplying any additional options to SearchIndexModel.
"""
logger.info("Creating Search Index %s on %s", index_name, collection.name)
definition = {
"mappings": {"dynamic": False, "fields": {field: [{"type": "string"}]}}
}
try:
result = collection.create_search_index(
SearchIndexModel(
definition=definition,
name=index_name,
type="search",
**kwargs,
)
)
except OperationFailure as e:
raise OperationFailure(_search_index_error_message()) from e
if wait_until_complete:
_wait_for_predicate(
predicate=lambda: _is_index_ready(collection, index_name),
err=f"{index_name=} did not complete in {wait_until_complete}!",
timeout=wait_until_complete,
)
logger.info(result)

View File

@ -0,0 +1,160 @@
"""Aggregation pipeline components used in Atlas Full-Text, Vector, and Hybrid Search
See the following for more:
- `Full-Text Search <https://www.mongodb.com/docs/atlas/atlas-search/aggregation-stages/search/#mongodb-pipeline-pipe.-search>`_
- `MongoDB Operators <https://www.mongodb.com/docs/atlas/atlas-search/operators-and-collectors/#std-label-operators-ref>`_
- `Vector Search <https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/>`_
- `Filter Example <https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/#atlas-vector-search-pre-filter>`_
"""
from typing import Any, Dict, List, Optional
def text_search_stage(
query: str,
search_field: str,
index_name: str,
limit: Optional[int] = None,
filter: Optional[Dict[str, Any]] = None,
include_scores: Optional[bool] = True,
**kwargs: Any,
) -> List[Dict[str, Any]]: # noqa: E501
"""Full-Text search using Lucene's standard (BM25) analyzer
Args:
query: Input text to search for
search_field: Field in Collection that will be searched
index_name: Atlas Search Index name
limit: Maximum number of documents to return. Default of no limit
filter: Any MQL match expression comparing an indexed field
include_scores: Scores provide measure of relative relevance
Returns:
Dictionary defining the $search stage
"""
pipeline = [
{
"$search": {
"index": index_name,
"text": {"query": query, "path": search_field},
}
}
]
if filter:
pipeline.append({"$match": filter}) # type: ignore
if include_scores:
pipeline.append({"$set": {"score": {"$meta": "searchScore"}}})
if limit:
pipeline.append({"$limit": limit}) # type: ignore
return pipeline # type: ignore
def vector_search_stage(
query_vector: List[float],
search_field: str,
index_name: str,
top_k: int = 4,
filter: Optional[Dict[str, Any]] = None,
oversampling_factor: int = 10,
**kwargs: Any,
) -> Dict[str, Any]: # noqa: E501
"""Vector Search Stage without Scores.
Scoring is applied later depending on strategy.
vector search includes a vectorSearchScore that is typically used.
hybrid uses Reciprocal Rank Fusion.
Args:
query_vector: List of embedding vector
search_field: Field in Collection containing embedding vectors
index_name: Name of Atlas Vector Search Index tied to Collection
top_k: Number of documents to return
oversampling_factor: this times limit is the number of candidates
filter: MQL match expression comparing an indexed field.
Some operators are not supported.
See `vectorSearch filter docs <https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/#atlas-vector-search-pre-filter>`_
Returns:
Dictionary defining the $vectorSearch
"""
stage = {
"index": index_name,
"path": search_field,
"queryVector": query_vector,
"numCandidates": top_k * oversampling_factor,
"limit": top_k,
}
if filter:
stage["filter"] = filter
return {"$vectorSearch": stage}
def combine_pipelines(
pipeline: List[Any], stage: List[Dict[str, Any]], collection_name: str
) -> None:
"""Combines two aggregations into a single result set in-place."""
if pipeline:
pipeline.append({"$unionWith": {"coll": collection_name, "pipeline": stage}})
else:
pipeline.extend(stage)
def reciprocal_rank_stage(
score_field: str, penalty: float = 0, **kwargs: Any
) -> List[Dict[str, Any]]:
"""Stage adds Reciprocal Rank Fusion weighting.
First, it pushes documents retrieved from previous stage
into a temporary sub-document. It then unwinds to establish
the rank to each and applies the penalty.
Args:
score_field: A unique string to identify the search being ranked
penalty: A non-negative float.
extra_fields: Any fields other than text_field that one wishes to keep.
Returns:
RRF score := \frac{1}{rank + penalty} with rank in [1,2,..,n]
"""
rrf_pipeline = [
{"$group": {"_id": None, "docs": {"$push": "$$ROOT"}}},
{"$unwind": {"path": "$docs", "includeArrayIndex": "rank"}},
{
"$addFields": {
f"docs.{score_field}": {
"$divide": [1.0, {"$add": ["$rank", penalty, 1]}]
},
"docs.rank": "$rank",
"_id": "$docs._id",
}
},
{"$replaceRoot": {"newRoot": "$docs"}},
]
return rrf_pipeline # type: ignore
def final_hybrid_stage(
scores_fields: List[str], limit: int, **kwargs: Any
) -> List[Dict[str, Any]]:
"""Sum weighted scores, sort, and apply limit.
Args:
scores_fields: List of fields given to scores of vector and text searches
limit: Number of documents to return
Returns:
Final aggregation stages
"""
return [
{"$group": {"_id": "$_id", "docs": {"$mergeObjects": "$$ROOT"}}},
{"$replaceRoot": {"newRoot": "$docs"}},
{"$set": {score: {"$ifNull": [f"${score}", 0]} for score in scores_fields}},
{"$addFields": {"score": {"$add": [f"${score}" for score in scores_fields]}}},
{"$sort": {"score": -1}},
{"$limit": limit},
]

View File

@ -0,0 +1,15 @@
"""Search Retrievers of various types.
Use ``MongoDBAtlasVectorSearch.as_retriever(**)``
to create MongoDB's core Vector Search Retriever.
"""
from langchain_mongodb.retrievers.full_text_search import (
MongoDBAtlasFullTextSearchRetriever,
)
from langchain_mongodb.retrievers.hybrid_search import MongoDBAtlasHybridSearchRetriever
__all__ = [
"MongoDBAtlasHybridSearchRetriever",
"MongoDBAtlasFullTextSearchRetriever",
]

View File

@ -0,0 +1,59 @@
from typing import Any, Dict, List, Optional
from langchain_core.callbacks.manager import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
from pymongo.collection import Collection
from langchain_mongodb.pipelines import text_search_stage
from langchain_mongodb.utils import make_serializable
class MongoDBAtlasFullTextSearchRetriever(BaseRetriever):
"""Hybrid Search Retriever performs full-text searches
using Lucene's standard (BM25) analyzer.
"""
collection: Collection
"""MongoDB Collection on an Atlas cluster"""
search_index_name: str
"""Atlas Search Index name"""
search_field: str
"""Collection field that contains the text to be searched. It must be indexed"""
top_k: Optional[int] = None
"""Number of documents to return. Default is no limit"""
filter: Optional[Dict[str, Any]] = None
"""(Optional) List of MQL match expression comparing an indexed field"""
show_embeddings: float = False
"""If true, returned Document metadata will include vectors"""
def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> List[Document]:
"""Retrieve documents that are highest scoring / most similar to query.
Args:
query: String to find relevant documents for
run_manager: The callback handler to use
Returns:
List of relevant documents
"""
pipeline = text_search_stage( # type: ignore
query=query,
search_field=self.search_field,
index_name=self.search_index_name,
limit=self.top_k,
filter=self.filter,
)
# Execution
cursor = self.collection.aggregate(pipeline) # type: ignore[arg-type]
# Formatting
docs = []
for res in cursor:
text = res.pop(self.search_field)
make_serializable(res)
docs.append(Document(page_content=text, metadata=res))
return docs

View File

@ -0,0 +1,126 @@
from typing import Any, Dict, List, Optional
from langchain_core.callbacks.manager import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
from pymongo.collection import Collection
from langchain_mongodb import MongoDBAtlasVectorSearch
from langchain_mongodb.pipelines import (
combine_pipelines,
final_hybrid_stage,
reciprocal_rank_stage,
text_search_stage,
vector_search_stage,
)
from langchain_mongodb.utils import make_serializable
class MongoDBAtlasHybridSearchRetriever(BaseRetriever):
"""Hybrid Search Retriever combines vector and full-text searches
weighting them the via Reciprocal Rank Fusion (RRF) algorithm.
Increasing the vector_penalty will reduce the importance on the vector search.
Increasing the fulltext_penalty will correspondingly reduce the fulltext score.
For more on the algorithm,see
https://learn.microsoft.com/en-us/azure/search/hybrid-search-ranking
"""
vectorstore: MongoDBAtlasVectorSearch
"""MongoDBAtlas VectorStore"""
search_index_name: str
"""Atlas Search Index (full-text) name"""
top_k: int = 4
"""Number of documents to return."""
oversampling_factor: int = 10
"""This times top_k is the number of candidates chosen at each step"""
pre_filter: Optional[Dict[str, Any]] = None
"""(Optional) Any MQL match expression comparing an indexed field"""
post_filter: Optional[List[Dict[str, Any]]] = None
"""(Optional) Pipeline of MongoDB aggregation stages for postprocessing."""
vector_penalty: float = 60.0
"""Penalty applied to vector search results in RRF: scores=1/(rank + penalty)"""
fulltext_penalty: float = 60.0
"""Penalty applied to full-text search results in RRF: scores=1/(rank + penalty)"""
show_embeddings: float = False
"""If true, returned Document metadata will include vectors."""
@property
def collection(self) -> Collection:
return self.vectorstore._collection
def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> List[Document]:
"""Retrieve documents that are highest scoring / most similar to query.
Note that the same query is used in both searches,
embedded for vector search, and as-is for full-text search.
Args:
query: String to find relevant documents for
run_manager: The callback handler to use
Returns:
List of relevant documents
"""
query_vector = self.vectorstore._embedding.embed_query(query)
scores_fields = ["vector_score", "fulltext_score"]
pipeline: List[Any] = []
# First we build up the aggregation pipeline,
# then it is passed to the server to execute
# Vector Search stage
vector_pipeline = [
vector_search_stage(
query_vector=query_vector,
search_field=self.vectorstore._embedding_key,
index_name=self.vectorstore._index_name,
top_k=self.top_k,
filter=self.pre_filter,
oversampling_factor=self.oversampling_factor,
)
]
vector_pipeline += reciprocal_rank_stage("vector_score", self.vector_penalty)
combine_pipelines(pipeline, vector_pipeline, self.collection.name)
# Full-Text Search stage
text_pipeline = text_search_stage(
query=query,
search_field=self.vectorstore._text_key,
index_name=self.search_index_name,
limit=self.top_k,
filter=self.pre_filter,
)
text_pipeline.extend(
reciprocal_rank_stage("fulltext_score", self.fulltext_penalty)
)
combine_pipelines(pipeline, text_pipeline, self.collection.name)
# Sum and sort stage
pipeline.extend(
final_hybrid_stage(scores_fields=scores_fields, limit=self.top_k)
)
# Removal of embeddings unless requested.
if not self.show_embeddings:
pipeline.append({"$project": {self.vectorstore._embedding_key: 0}})
# Post filtering
if self.post_filter is not None:
pipeline.extend(self.post_filter)
# Execution
cursor = self.collection.aggregate(pipeline) # type: ignore[arg-type]
# Formatting
docs = []
for res in cursor:
text = res.pop(self.vectorstore._text_key)
# score = res.pop("score") # The score remains buried!
make_serializable(res)
docs.append(Document(page_content=text, metadata=res))
return docs

View File

@ -1,6 +1,13 @@
""" """Various Utility Functions
Tools for the Maximal Marginal Relevance (MMR) reranking.
Duplicated from langchain_community to avoid cross-dependencies. - Tools for handling bson.ObjectId
The help IDs live as ObjectId in MongoDB and str in Langchain and JSON.
- Tools for the Maximal Marginal Relevance (MMR) reranking
These are duplicated from langchain_community to avoid cross-dependencies.
Functions "maximal_marginal_relevance" and "cosine_similarity" Functions "maximal_marginal_relevance" and "cosine_similarity"
are duplicated in this utility respectively from modules: are duplicated in this utility respectively from modules:
@ -21,11 +28,6 @@ logger = logging.getLogger(__name__)
Matrix = Union[List[List[float]], List[np.ndarray], np.ndarray] Matrix = Union[List[List[float]], List[np.ndarray], np.ndarray]
class FailCode:
INDEX_NOT_FOUND = 27
INDEX_ALREADY_EXISTS = 68
def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray: def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
"""Row-wise cosine similarity between two equal-width matrices.""" """Row-wise cosine similarity between two equal-width matrices."""
if len(X) == 0 or len(Y) == 0: if len(X) == 0 or len(Y) == 0:
@ -65,7 +67,37 @@ def maximal_marginal_relevance(
lambda_mult: float = 0.5, lambda_mult: float = 0.5,
k: int = 4, k: int = 4,
) -> List[int]: ) -> List[int]:
"""Calculate maximal marginal relevance.""" """Compute Maximal Marginal Relevance (MMR).
MMR is a technique used to select documents that are both relevant to the query
and diverse among themselves. This function returns the indices
of the top-k embeddings that maximize the marginal relevance.
Args:
query_embedding (np.ndarray): The embedding vector of the query.
embedding_list (list of np.ndarray): A list containing the embedding vectors
of the candidate documents.
lambda_mult (float, optional): The trade-off parameter between
relevance and diversity. Defaults to 0.5.
k (int, optional): The number of embeddings to select. Defaults to 4.
Returns:
list of int: The indices of the embeddings that maximize the marginal relevance.
Notes:
The Maximal Marginal Relevance (MMR) is computed using the following formula:
MMR = argmax_{D_i R \ S} [λ * Sim(D_i, Q) - (1 - λ) * max_{D_j S} Sim(D_i, D_j)]
where:
- R is the set of candidate documents,
- S is the set of selected documents,
- Q is the query embedding,
- Sim(D_i, Q) is the similarity between document D_i and the query,
- Sim(D_i, D_j) is the similarity between documents D_i and D_j,
- λ is the trade-off parameter.
"""
if min(k, len(embedding_list)) <= 0: if min(k, len(embedding_list)) <= 0:
return [] return []
if query_embedding.ndim == 1: if query_embedding.ndim == 1:
@ -137,6 +169,7 @@ def make_serializable(
obj: Dict[str, Any], obj: Dict[str, Any],
) -> None: ) -> None:
"""Recursively cast values in a dict to a form able to json.dump""" """Recursively cast values in a dict to a form able to json.dump"""
from bson import ObjectId from bson import ObjectId
for k, v in obj.items(): for k, v in obj.items():

View File

@ -29,6 +29,7 @@ from langchain_mongodb.index import (
create_vector_search_index, create_vector_search_index,
update_vector_search_index, update_vector_search_index,
) )
from langchain_mongodb.pipelines import vector_search_stage
from langchain_mongodb.utils import ( from langchain_mongodb.utils import (
make_serializable, make_serializable,
maximal_marginal_relevance, maximal_marginal_relevance,
@ -36,7 +37,6 @@ from langchain_mongodb.utils import (
str_to_oid, str_to_oid,
) )
MongoDBDocumentType = TypeVar("MongoDBDocumentType", bound=Dict[str, Any])
VST = TypeVar("VST", bound=VectorStore) VST = TypeVar("VST", bound=VectorStore)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -47,17 +47,38 @@ DEFAULT_INSERT_BATCH_SIZE = 100_000
class MongoDBAtlasVectorSearch(VectorStore): class MongoDBAtlasVectorSearch(VectorStore):
"""MongoDB Atlas vector store integration. """MongoDB Atlas vector store integration.
MongoDBAtlasVectorSearch performs data operations on
text, embeddings and arbitrary data. In addition to CRUD operations,
the VectorStore provides Vector Search
based on similarity of embedding vectors following the
Hierarchical Navigable Small Worlds (HNSW) algorithm.
This supports a number of models to ascertain scores,
"similarity" (default), "MMR", and "similarity_score_threshold".
These are described in the search_type argument to as_retriever,
which provides the Runnable.invoke(query) API, allowing
MongoDBAtlasVectorSearch to be used within a chain.
Setup: Setup:
Install ``langchain-mongodb`` and ``pymongo`` and setup a MongoDB Atlas cluster (read through [this guide](https://www.mongodb.com/docs/manual/reference/connection-string/) to do so). * Set up a MongoDB Atlas cluster. The free tier M0 will allow you to start.
Search Indexes are only available on Atlas, the fully managed cloud service,
not the self-managed MongoDB.
Follow [this guide](https://www.mongodb.com/basics/mongodb-atlas-tutorial)
* Create a Collection and a Vector Search Index.The procedure is described
[here](https://www.mongodb.com/docs/atlas/atlas-vector-search/create-index/#procedure).
* Install ``langchain-mongodb``
.. code-block:: bash .. code-block:: bash
pip install -qU langchain-mongodb pymongo pip install -qU langchain-mongodb pymongo
.. code-block:: python .. code-block:: python
import getpass import getpass
MONGODB_ATLAS_CLUSTER_URI = getpass.getpass("MongoDB Atlas Cluster URI:") MONGODB_ATLAS_CLUSTER_URI = getpass.getpass("MongoDB Atlas Cluster URI:")
Key init args indexing params: Key init args indexing params:
@ -127,7 +148,7 @@ class MongoDBAtlasVectorSearch(VectorStore):
Search with filter: Search with filter:
.. code-block:: python .. code-block:: python
results = vector_store.similarity_search(query="thud",k=1,filter={"bar": "baz"}) results = vector_store.similarity_search(query="thud",k=1,post_filter=[{"bar": "baz"]})
for doc in results: for doc in results:
print(f"* {doc.page_content} [{doc.metadata}]") print(f"* {doc.page_content} [{doc.metadata}]")
@ -184,29 +205,24 @@ class MongoDBAtlasVectorSearch(VectorStore):
def __init__( def __init__(
self, self,
collection: Collection[MongoDBDocumentType], collection: Collection[Dict[str, Any]],
embedding: Embeddings, embedding: Embeddings,
*, index_name: str = "vector_index",
index_name: str = "default",
text_key: str = "text", text_key: str = "text",
embedding_key: str = "embedding", embedding_key: str = "embedding",
relevance_score_fn: str = "cosine", relevance_score_fn: str = "cosine",
**kwargs: Any,
): ):
""" """
Args: Args:
collection: MongoDB collection to add the texts to. collection: MongoDB collection to add the texts to
embedding: Text embedding model to use. embedding: Text embedding model to use
text_key: MongoDB field that will contain the text for each text_key: MongoDB field that will contain the text for each document
document. index_name: Existing Atlas Vector Search Index
defaults to 'text' embedding_key: Field that will contain the embedding for each document
embedding_key: MongoDB field that will contain the embedding for vector_index_name: Name of the Atlas Vector Search index
each document. relevance_score_fn: The similarity score used for the index
defaults to 'embedding' Currently supported: 'euclidean', 'cosine', and 'dotProduct'
index_name: Name of the Atlas Search index.
defaults to 'default'
relevance_score_fn: The similarity score used for the index.
defaults to 'cosine'
Currently supported: 'euclidean', 'cosine', and 'dotProduct'.
""" """
self._collection = collection self._collection = collection
self._embedding = embedding self._embedding = embedding
@ -412,69 +428,32 @@ class MongoDBAtlasVectorSearch(VectorStore):
start = end start = end
return result_ids return result_ids
def _similarity_search_with_score(
self,
embedding: List[float],
k: int = 4,
pre_filter: Optional[Dict] = None,
post_filter_pipeline: Optional[List[Dict]] = None,
include_embedding: bool = False,
include_ids: bool = False,
**kwargs: Any,
) -> List[Tuple[Document, float]]:
"""Core implementation."""
params = {
"queryVector": embedding,
"path": self._embedding_key,
"numCandidates": k * 10,
"limit": k,
"index": self._index_name,
}
if pre_filter:
params["filter"] = pre_filter
query = {"$vectorSearch": params}
pipeline = [
query,
{"$set": {"score": {"$meta": "vectorSearchScore"}}},
]
# Exclude the embedding key from the return payload
if not include_embedding:
pipeline.append({"$project": {self._embedding_key: 0}})
if post_filter_pipeline is not None:
pipeline.extend(post_filter_pipeline)
cursor = self._collection.aggregate(pipeline) # type: ignore[arg-type]
docs = []
for res in cursor:
text = res.pop(self._text_key)
score = res.pop("score")
make_serializable(res)
docs.append((Document(page_content=text, metadata=res), score))
return docs
def similarity_search_with_score( def similarity_search_with_score(
self, self,
query: str, query: str,
k: int = 4, k: int = 4,
pre_filter: Optional[Dict] = None, pre_filter: Optional[Dict[str, Any]] = None,
post_filter_pipeline: Optional[List[Dict]] = None, post_filter_pipeline: Optional[List[Dict]] = None,
oversampling_factor: int = 10,
include_embeddings: bool = False,
**kwargs: Any, **kwargs: Any,
) -> List[Tuple[Document, float]]: ) -> List[Tuple[Document, float]]: # noqa: E501
"""Return MongoDB documents most similar to the given query and their scores. """Return MongoDB documents most similar to the given query and their scores.
Uses the vectorSearch operator available in MongoDB Atlas Search. Atlas Vector Search eliminates the need to run a separate
For more: https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/ search system alongside your database.
Args: Args:
query: Text to look up documents similar to. query: Input text of semantic query
k: (Optional) number of documents to return. Defaults to 4. k: Number of documents to return. Also known as top_k.
pre_filter: (Optional) dictionary of argument(s) to prefilter document pre_filter: List of MQL match expressions comparing an indexed field
fields on. post_filter_pipeline: (Optional) Arbitrary pipeline of MongoDB
post_filter_pipeline: (Optional) Pipeline of MongoDB aggregation stages aggregation stages applied after the search is complete.
following the vectorSearch stage. oversampling_factor: This times k is the number of candidates chosen
at each step in the in HNSW Vector Search
include_embeddings: If True, the embedding vector of each result
will be included in metadata.
kwargs: Additional arguments are specific to the search_type
Returns: Returns:
List of documents most similar to the query and their scores. List of documents most similar to the query and their scores.
@ -485,6 +464,8 @@ class MongoDBAtlasVectorSearch(VectorStore):
k=k, k=k,
pre_filter=pre_filter, pre_filter=pre_filter,
post_filter_pipeline=post_filter_pipeline, post_filter_pipeline=post_filter_pipeline,
oversampling_factor=oversampling_factor,
include_embeddings=include_embeddings,
**kwargs, **kwargs,
) )
return docs return docs
@ -493,36 +474,46 @@ class MongoDBAtlasVectorSearch(VectorStore):
self, self,
query: str, query: str,
k: int = 4, k: int = 4,
pre_filter: Optional[Dict] = None, pre_filter: Optional[Dict[str, Any]] = None,
post_filter_pipeline: Optional[List[Dict]] = None, post_filter_pipeline: Optional[List[Dict]] = None,
oversampling_factor: int = 10,
include_scores: bool = False,
include_embeddings: bool = False,
**kwargs: Any, **kwargs: Any,
) -> List[Document]: ) -> List[Document]: # noqa: E501
"""Return MongoDB documents most similar to the given query. """Return MongoDB documents most similar to the given query.
Uses the vectorSearch operator available in MongoDB Atlas Search. Atlas Vector Search eliminates the need to run a separate
For more: https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/ search system alongside your database.
Args: Args:
query: Text to look up documents similar to. query: Input text of semantic query
k: (Optional) number of documents to return. Defaults to 4. k: (Optional) number of documents to return. Defaults to 4.
pre_filter: (Optional) dictionary of argument(s) to prefilter document pre_filter: List of MQL match expressions comparing an indexed field
fields on.
post_filter_pipeline: (Optional) Pipeline of MongoDB aggregation stages post_filter_pipeline: (Optional) Pipeline of MongoDB aggregation stages
following the vectorSearch stage. to filter/process results after $vectorSearch.
oversampling_factor: Multiple of k used when generating number of candidates
at each step in the HNSW Vector Search,
include_scores: If True, the query score of each result
will be included in metadata.
include_embeddings: If True, the embedding vector of each result
will be included in metadata.
kwargs: Additional arguments are specific to the search_type
Returns: Returns:
List of documents most similar to the query and their scores. List of documents most similar to the query and their scores.
""" """
additional = kwargs.get("additional")
docs_and_scores = self.similarity_search_with_score( docs_and_scores = self.similarity_search_with_score(
query, query,
k=k, k=k,
pre_filter=pre_filter, pre_filter=pre_filter,
post_filter_pipeline=post_filter_pipeline, post_filter_pipeline=post_filter_pipeline,
oversampling_factor=oversampling_factor,
include_embeddings=include_embeddings,
**kwargs, **kwargs,
) )
if additional and "similarity_score" in additional: if include_scores:
for doc, score in docs_and_scores: for doc, score in docs_and_scores:
doc.metadata["score"] = score doc.metadata["score"] = score
return [doc for doc, _ in docs_and_scores] return [doc for doc, _ in docs_and_scores]
@ -533,7 +524,7 @@ class MongoDBAtlasVectorSearch(VectorStore):
k: int = 4, k: int = 4,
fetch_k: int = 20, fetch_k: int = 20,
lambda_mult: float = 0.5, lambda_mult: float = 0.5,
pre_filter: Optional[Dict] = None, pre_filter: Optional[Dict[str, Any]] = None,
post_filter_pipeline: Optional[List[Dict]] = None, post_filter_pipeline: Optional[List[Dict]] = None,
**kwargs: Any, **kwargs: Any,
) -> List[Document]: ) -> List[Document]:
@ -549,18 +540,15 @@ class MongoDBAtlasVectorSearch(VectorStore):
algorithm. Defaults to 20. algorithm. Defaults to 20.
lambda_mult: Number between 0 and 1 that determines the degree lambda_mult: Number between 0 and 1 that determines the degree
of diversity among the results with 0 corresponding of diversity among the results with 0 corresponding
to maximum diversity and 1 to minimum diversity. to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
Defaults to 0.5. pre_filter: List of MQL match expressions comparing an indexed field
pre_filter: (Optional) dictionary of argument(s) to prefilter on document
fields.
post_filter_pipeline: (Optional) pipeline of MongoDB aggregation stages post_filter_pipeline: (Optional) pipeline of MongoDB aggregation stages
following the vectorSearch stage. following the $vectorSearch stage.
Returns: Returns:
List of documents selected by maximal marginal relevance. List of documents selected by maximal marginal relevance.
""" """
query_embedding = self._embedding.embed_query(query)
return self.max_marginal_relevance_search_by_vector( return self.max_marginal_relevance_search_by_vector(
embedding=query_embedding, embedding=self._embedding.embed_query(query),
k=k, k=k,
fetch_k=fetch_k, fetch_k=fetch_k,
lambda_mult=lambda_mult, lambda_mult=lambda_mult,
@ -575,7 +563,7 @@ class MongoDBAtlasVectorSearch(VectorStore):
texts: List[str], texts: List[str],
embedding: Embeddings, embedding: Embeddings,
metadatas: Optional[List[Dict]] = None, metadatas: Optional[List[Dict]] = None,
collection: Optional[Collection[MongoDBDocumentType]] = None, collection: Optional[Collection] = None,
ids: Optional[List[str]] = None, ids: Optional[List[str]] = None,
**kwargs: Any, **kwargs: Any,
) -> MongoDBAtlasVectorSearch: ) -> MongoDBAtlasVectorSearch:
@ -588,6 +576,9 @@ class MongoDBAtlasVectorSearch(VectorStore):
This is intended to be a quick way to get started. This is intended to be a quick way to get started.
See `MongoDBAtlasVectorSearch` for kwargs and further description.
Example: Example:
.. code-block:: python .. code-block:: python
from pymongo import MongoClient from pymongo import MongoClient
@ -649,8 +640,9 @@ class MongoDBAtlasVectorSearch(VectorStore):
k: int = 4, k: int = 4,
fetch_k: int = 20, fetch_k: int = 20,
lambda_mult: float = 0.5, lambda_mult: float = 0.5,
pre_filter: Optional[Dict] = None, pre_filter: Optional[Dict[str, Any]] = None,
post_filter_pipeline: Optional[List[Dict]] = None, post_filter_pipeline: Optional[List[Dict]] = None,
oversampling_factor: int = 10,
**kwargs: Any, **kwargs: Any,
) -> List[Document]: # type: ignore ) -> List[Document]: # type: ignore
"""Return docs selected using the maximal marginal relevance. """Return docs selected using the maximal marginal relevance.
@ -666,10 +658,13 @@ class MongoDBAtlasVectorSearch(VectorStore):
of diversity among the results with 0 corresponding of diversity among the results with 0 corresponding
to maximum diversity and 1 to minimum diversity. to maximum diversity and 1 to minimum diversity.
Defaults to 0.5. Defaults to 0.5.
pre_filter: (Optional) dictionary of argument(s) to prefilter on document pre_filter: (Optional) dictionary of arguments to filter document fields on.
fields.
post_filter_pipeline: (Optional) pipeline of MongoDB aggregation stages post_filter_pipeline: (Optional) pipeline of MongoDB aggregation stages
following the vectorSearch stage. following the vectorSearch stage.
oversampling_factor: Multiple of k used when generating number
of candidates in HNSW Vector Search,
kwargs: Additional arguments are specific to the search_type
Returns: Returns:
List of Documents selected by maximal marginal relevance. List of Documents selected by maximal marginal relevance.
""" """
@ -678,7 +673,8 @@ class MongoDBAtlasVectorSearch(VectorStore):
k=fetch_k, k=fetch_k,
pre_filter=pre_filter, pre_filter=pre_filter,
post_filter_pipeline=post_filter_pipeline, post_filter_pipeline=post_filter_pipeline,
include_embedding=kwargs.pop("include_embedding", True), include_embeddings=True,
oversampling_factor=oversampling_factor,
**kwargs, **kwargs,
) )
mmr_doc_indexes = maximal_marginal_relevance( mmr_doc_indexes = maximal_marginal_relevance(
@ -696,31 +692,82 @@ class MongoDBAtlasVectorSearch(VectorStore):
k: int = 4, k: int = 4,
fetch_k: int = 20, fetch_k: int = 20,
lambda_mult: float = 0.5, lambda_mult: float = 0.5,
pre_filter: Optional[Dict[str, Any]] = None,
post_filter_pipeline: Optional[List[Dict]] = None,
oversampling_factor: int = 10,
**kwargs: Any, **kwargs: Any,
) -> List[Document]: ) -> List[Document]:
"""Return docs selected using the maximal marginal relevance.""" """Return docs selected using the maximal marginal relevance."""
return await run_in_executor( return await run_in_executor(
None, None,
self.max_marginal_relevance_search_by_vector, self.max_marginal_relevance_search_by_vector, # type: ignore[arg-type]
embedding, embedding,
k=k, k=k,
fetch_k=fetch_k, fetch_k=fetch_k,
lambda_mult=lambda_mult, lambda_mult=lambda_mult,
pre_filter=pre_filter,
post_filter_pipeline=post_filter_pipeline,
oversampling_factor=oversampling_factor,
**kwargs, **kwargs,
) )
def _similarity_search_with_score(
self,
query_vector: List[float],
k: int = 4,
pre_filter: Optional[Dict[str, Any]] = None,
post_filter_pipeline: Optional[List[Dict]] = None,
oversampling_factor: int = 10,
include_embeddings: bool = False,
**kwargs: Any,
) -> List[Tuple[Document, float]]:
"""Core search routine. See external methods for details."""
# Atlas Vector Search, potentially with filter
pipeline = [
vector_search_stage(
query_vector,
self._embedding_key,
self._index_name,
k,
pre_filter,
oversampling_factor,
**kwargs,
),
{"$set": {"score": {"$meta": "vectorSearchScore"}}},
]
# Remove embeddings unless requested.
if not include_embeddings:
pipeline.append({"$project": {self._embedding_key: 0}})
# Post-processing
if post_filter_pipeline is not None:
pipeline.extend(post_filter_pipeline)
# Execution
cursor = self._collection.aggregate(pipeline) # type: ignore[arg-type]
docs = []
# Format
for res in cursor:
text = res.pop(self._text_key)
score = res.pop("score")
make_serializable(res)
docs.append((Document(page_content=text, metadata=res), score))
return docs
def create_vector_search_index( def create_vector_search_index(
self, self,
dimensions: int, dimensions: int,
filters: Optional[List[Dict[str, str]]] = None, filters: Optional[List[str]] = None,
update: bool = False, update: bool = False,
) -> None: ) -> None:
"""Creates a MongoDB Atlas vectorSearch index for the VectorStore """Creates a MongoDB Atlas vectorSearch index for the VectorStore
Note**: This method may fail as it requires a MongoDB Atlas with Note**: This method may fail as it requires a MongoDB Atlas with these
these pre-requisites: `pre-requisites <https://www.mongodb.com/docs/atlas/atlas-vector-search/create-index/#prerequisites>`.
- M10 cluster or higher Currently, vector and full-text search index operations need to be
- https://www.mongodb.com/docs/atlas/atlas-vector-search/create-index/#prerequisites performed manually on the Atlas UI for shared M0 clusters.
Args: Args:
dimensions (int): Number of dimensions in embedding dimensions (int): Number of dimensions in embedding

View File

@ -1,91 +1,103 @@
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. # This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
[[package]]
name = "aiohappyeyeballs"
version = "2.3.4"
description = "Happy Eyeballs for asyncio"
optional = false
python-versions = "<4.0,>=3.8"
files = [
{file = "aiohappyeyeballs-2.3.4-py3-none-any.whl", hash = "sha256:40a16ceffcf1fc9e142fd488123b2e218abc4188cf12ac20c67200e1579baa42"},
{file = "aiohappyeyeballs-2.3.4.tar.gz", hash = "sha256:7e1ae8399c320a8adec76f6c919ed5ceae6edd4c3672f4d9eae2b27e37c80ff6"},
]
[[package]] [[package]]
name = "aiohttp" name = "aiohttp"
version = "3.9.5" version = "3.10.1"
description = "Async http client/server framework (asyncio)" description = "Async http client/server framework (asyncio)"
optional = false optional = false
python-versions = ">=3.8" python-versions = ">=3.8"
files = [ files = [
{file = "aiohttp-3.9.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:fcde4c397f673fdec23e6b05ebf8d4751314fa7c24f93334bf1f1364c1c69ac7"}, {file = "aiohttp-3.10.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:47b4c2412960e64d97258f40616efddaebcb34ff664c8a972119ed38fac2a62c"},
{file = "aiohttp-3.9.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d6b3f1fabe465e819aed2c421a6743d8debbde79b6a8600739300630a01bf2c"}, {file = "aiohttp-3.10.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e7dbf637f87dd315fa1f36aaed8afa929ee2c607454fb7791e74c88a0d94da59"},
{file = "aiohttp-3.9.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6ae79c1bc12c34082d92bf9422764f799aee4746fd7a392db46b7fd357d4a17a"}, {file = "aiohttp-3.10.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c8fb76214b5b739ce59e2236a6489d9dc3483649cfd6f563dbf5d8e40dbdd57d"},
{file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d3ebb9e1316ec74277d19c5f482f98cc65a73ccd5430540d6d11682cd857430"}, {file = "aiohttp-3.10.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c577cdcf8f92862363b3d598d971c6a84ed8f0bf824d4cc1ce70c2fb02acb4a"},
{file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84dabd95154f43a2ea80deffec9cb44d2e301e38a0c9d331cc4aa0166fe28ae3"}, {file = "aiohttp-3.10.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:777e23609899cb230ad2642b4bdf1008890f84968be78de29099a8a86f10b261"},
{file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c8a02fbeca6f63cb1f0475c799679057fc9268b77075ab7cf3f1c600e81dd46b"}, {file = "aiohttp-3.10.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b07286a1090483799599a2f72f76ac396993da31f6e08efedb59f40876c144fa"},
{file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c26959ca7b75ff768e2776d8055bf9582a6267e24556bb7f7bd29e677932be72"}, {file = "aiohttp-3.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9db600a86414a9a653e3c1c7f6a2f6a1894ab8f83d11505247bd1b90ad57157"},
{file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:714d4e5231fed4ba2762ed489b4aec07b2b9953cf4ee31e9871caac895a839c0"}, {file = "aiohttp-3.10.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:01c3f1eb280008e51965a8d160a108c333136f4a39d46f516c64d2aa2e6a53f2"},
{file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e7a6a8354f1b62e15d48e04350f13e726fa08b62c3d7b8401c0a1314f02e3558"}, {file = "aiohttp-3.10.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f5dd109a925fee4c9ac3f6a094900461a2712df41745f5d04782ebcbe6479ccb"},
{file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c413016880e03e69d166efb5a1a95d40f83d5a3a648d16486592c49ffb76d0db"}, {file = "aiohttp-3.10.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:8c81ff4afffef9b1186639506d70ea90888218f5ddfff03870e74ec80bb59970"},
{file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ff84aeb864e0fac81f676be9f4685f0527b660f1efdc40dcede3c251ef1e867f"}, {file = "aiohttp-3.10.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:2a384dfbe8bfebd203b778a30a712886d147c61943675f4719b56725a8bbe803"},
{file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ad7f2919d7dac062f24d6f5fe95d401597fbb015a25771f85e692d043c9d7832"}, {file = "aiohttp-3.10.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:b9fb6508893dc31cfcbb8191ef35abd79751db1d6871b3e2caee83959b4d91eb"},
{file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:702e2c7c187c1a498a4e2b03155d52658fdd6fda882d3d7fbb891a5cf108bb10"}, {file = "aiohttp-3.10.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:88596384c3bec644a96ae46287bb646d6a23fa6014afe3799156aef42669c6bd"},
{file = "aiohttp-3.9.5-cp310-cp310-win32.whl", hash = "sha256:67c3119f5ddc7261d47163ed86d760ddf0e625cd6246b4ed852e82159617b5fb"}, {file = "aiohttp-3.10.1-cp310-cp310-win32.whl", hash = "sha256:68164d43c580c2e8bf8e0eb4960142919d304052ccab92be10250a3a33b53268"},
{file = "aiohttp-3.9.5-cp310-cp310-win_amd64.whl", hash = "sha256:471f0ef53ccedec9995287f02caf0c068732f026455f07db3f01a46e49d76bbb"}, {file = "aiohttp-3.10.1-cp310-cp310-win_amd64.whl", hash = "sha256:d6bbe2c90c10382ca96df33b56e2060404a4f0f88673e1e84b44c8952517e5f3"},
{file = "aiohttp-3.9.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e0ae53e33ee7476dd3d1132f932eeb39bf6125083820049d06edcdca4381f342"}, {file = "aiohttp-3.10.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f6979b4f20d3e557a867da9d9227de4c156fcdcb348a5848e3e6190fd7feb972"},
{file = "aiohttp-3.9.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c088c4d70d21f8ca5c0b8b5403fe84a7bc8e024161febdd4ef04575ef35d474d"}, {file = "aiohttp-3.10.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:03c0c380c83f8a8d4416224aafb88d378376d6f4cadebb56b060688251055cd4"},
{file = "aiohttp-3.9.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:639d0042b7670222f33b0028de6b4e2fad6451462ce7df2af8aee37dcac55424"}, {file = "aiohttp-3.10.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1c2b104e81b3c3deba7e6f5bc1a9a0e9161c380530479970766a6655b8b77c7c"},
{file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f26383adb94da5e7fb388d441bf09c61e5e35f455a3217bfd790c6b6bc64b2ee"}, {file = "aiohttp-3.10.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b023b68c61ab0cd48bd38416b421464a62c381e32b9dc7b4bdfa2905807452a4"},
{file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:66331d00fb28dc90aa606d9a54304af76b335ae204d1836f65797d6fe27f1ca2"}, {file = "aiohttp-3.10.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1a07c76a82390506ca0eabf57c0540cf5a60c993c442928fe4928472c4c6e5e6"},
{file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4ff550491f5492ab5ed3533e76b8567f4b37bd2995e780a1f46bca2024223233"}, {file = "aiohttp-3.10.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:41d8dab8c64ded1edf117d2a64f353efa096c52b853ef461aebd49abae979f16"},
{file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f22eb3a6c1080d862befa0a89c380b4dafce29dc6cd56083f630073d102eb595"}, {file = "aiohttp-3.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:615348fab1a9ef7d0960a905e83ad39051ae9cb0d2837da739b5d3a7671e497a"},
{file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a81b1143d42b66ffc40a441379387076243ef7b51019204fd3ec36b9f69e77d6"}, {file = "aiohttp-3.10.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:256ee6044214ee9d66d531bb374f065ee94e60667d6bbeaa25ca111fc3997158"},
{file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f64fd07515dad67f24b6ea4a66ae2876c01031de91c93075b8093f07c0a2d93d"}, {file = "aiohttp-3.10.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b7d5bb926805022508b7ddeaad957f1fce7a8d77532068d7bdb431056dc630cd"},
{file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:93e22add827447d2e26d67c9ac0161756007f152fdc5210277d00a85f6c92323"}, {file = "aiohttp-3.10.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:028faf71b338f069077af6315ad54281612705d68889f5d914318cbc2aab0d50"},
{file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:55b39c8684a46e56ef8c8d24faf02de4a2b2ac60d26cee93bc595651ff545de9"}, {file = "aiohttp-3.10.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:5c12310d153b27aa630750be44e79313acc4e864c421eb7d2bc6fa3429c41bf8"},
{file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4715a9b778f4293b9f8ae7a0a7cef9829f02ff8d6277a39d7f40565c737d3771"}, {file = "aiohttp-3.10.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:de1a91d5faded9054957ed0a9e01b9d632109341942fc123947ced358c5d9009"},
{file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:afc52b8d969eff14e069a710057d15ab9ac17cd4b6753042c407dcea0e40bf75"}, {file = "aiohttp-3.10.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9c186b270979fb1dee3ababe2d12fb243ed7da08b30abc83ebac3a928a4ddb15"},
{file = "aiohttp-3.9.5-cp311-cp311-win32.whl", hash = "sha256:b3df71da99c98534be076196791adca8819761f0bf6e08e07fd7da25127150d6"}, {file = "aiohttp-3.10.1-cp311-cp311-win32.whl", hash = "sha256:4a9ce70f5e00380377aac0e568abd075266ff992be2e271765f7b35d228a990c"},
{file = "aiohttp-3.9.5-cp311-cp311-win_amd64.whl", hash = "sha256:88e311d98cc0bf45b62fc46c66753a83445f5ab20038bcc1b8a1cc05666f428a"}, {file = "aiohttp-3.10.1-cp311-cp311-win_amd64.whl", hash = "sha256:a77c79bac8d908d839d32c212aef2354d2246eb9deb3e2cb01ffa83fb7a6ea5d"},
{file = "aiohttp-3.9.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:c7a4b7a6cf5b6eb11e109a9755fd4fda7d57395f8c575e166d363b9fc3ec4678"}, {file = "aiohttp-3.10.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:2212296cdb63b092e295c3e4b4b442e7b7eb41e8a30d0f53c16d5962efed395d"},
{file = "aiohttp-3.9.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:0a158704edf0abcac8ac371fbb54044f3270bdbc93e254a82b6c82be1ef08f3c"}, {file = "aiohttp-3.10.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:4dcb127ca3eb0a61205818a606393cbb60d93b7afb9accd2fd1e9081cc533144"},
{file = "aiohttp-3.9.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d153f652a687a8e95ad367a86a61e8d53d528b0530ef382ec5aaf533140ed00f"}, {file = "aiohttp-3.10.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cb8b79a65332e1a426ccb6290ce0409e1dc16b4daac1cc5761e059127fa3d134"},
{file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82a6a97d9771cb48ae16979c3a3a9a18b600a8505b1115cfe354dfb2054468b4"}, {file = "aiohttp-3.10.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68cc24f707ed9cb961f6ee04020ca01de2c89b2811f3cf3361dc7c96a14bfbcc"},
{file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:60cdbd56f4cad9f69c35eaac0fbbdf1f77b0ff9456cebd4902f3dd1cf096464c"}, {file = "aiohttp-3.10.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9cb54f5725b4b37af12edf6c9e834df59258c82c15a244daa521a065fbb11717"},
{file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8676e8fd73141ded15ea586de0b7cda1542960a7b9ad89b2b06428e97125d4fa"}, {file = "aiohttp-3.10.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:51d03e948e53b3639ce4d438f3d1d8202898ec6655cadcc09ec99229d4adc2a9"},
{file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da00da442a0e31f1c69d26d224e1efd3a1ca5bcbf210978a2ca7426dfcae9f58"}, {file = "aiohttp-3.10.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:786299d719eb5d868f161aeec56d589396b053925b7e0ce36e983d30d0a3e55c"},
{file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18f634d540dd099c262e9f887c8bbacc959847cfe5da7a0e2e1cf3f14dbf2daf"}, {file = "aiohttp-3.10.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abda4009a30d51d3f06f36bc7411a62b3e647fa6cc935ef667e3e3d3a7dd09b1"},
{file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:320e8618eda64e19d11bdb3bd04ccc0a816c17eaecb7e4945d01deee2a22f95f"}, {file = "aiohttp-3.10.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:67f7639424c313125213954e93a6229d3a1d386855d70c292a12628f600c7150"},
{file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:2faa61a904b83142747fc6a6d7ad8fccff898c849123030f8e75d5d967fd4a81"}, {file = "aiohttp-3.10.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:8e5a26d7aac4c0d8414a347da162696eea0629fdce939ada6aedf951abb1d745"},
{file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:8c64a6dc3fe5db7b1b4d2b5cb84c4f677768bdc340611eca673afb7cf416ef5a"}, {file = "aiohttp-3.10.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:120548d89f14b76a041088b582454d89389370632ee12bf39d919cc5c561d1ca"},
{file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:393c7aba2b55559ef7ab791c94b44f7482a07bf7640d17b341b79081f5e5cd1a"}, {file = "aiohttp-3.10.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:f5293726943bdcea24715b121d8c4ae12581441d22623b0e6ab12d07ce85f9c4"},
{file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:c671dc117c2c21a1ca10c116cfcd6e3e44da7fcde37bf83b2be485ab377b25da"}, {file = "aiohttp-3.10.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1f8605e573ed6c44ec689d94544b2c4bb1390aaa723a8b5a2cc0a5a485987a68"},
{file = "aiohttp-3.9.5-cp312-cp312-win32.whl", hash = "sha256:5a7ee16aab26e76add4afc45e8f8206c95d1d75540f1039b84a03c3b3800dd59"}, {file = "aiohttp-3.10.1-cp312-cp312-win32.whl", hash = "sha256:e7168782621be4448d90169a60c8b37e9b0926b3b79b6097bc180c0a8a119e73"},
{file = "aiohttp-3.9.5-cp312-cp312-win_amd64.whl", hash = "sha256:5ca51eadbd67045396bc92a4345d1790b7301c14d1848feaac1d6a6c9289e888"}, {file = "aiohttp-3.10.1-cp312-cp312-win_amd64.whl", hash = "sha256:8fbf8c0ded367c5c8eaf585f85ca8dd85ff4d5b73fb8fe1e6ac9e1b5e62e11f7"},
{file = "aiohttp-3.9.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:694d828b5c41255e54bc2dddb51a9f5150b4eefa9886e38b52605a05d96566e8"}, {file = "aiohttp-3.10.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:54b7f4a20d7cc6bfa4438abbde069d417bb7a119f870975f78a2b99890226d55"},
{file = "aiohttp-3.9.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0605cc2c0088fcaae79f01c913a38611ad09ba68ff482402d3410bf59039bfb8"}, {file = "aiohttp-3.10.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2fa643ca990323db68911b92f3f7a0ca9ae300ae340d0235de87c523601e58d9"},
{file = "aiohttp-3.9.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4558e5012ee03d2638c681e156461d37b7a113fe13970d438d95d10173d25f78"}, {file = "aiohttp-3.10.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d8311d0d690487359fe2247ec5d2cac9946e70d50dced8c01ce9e72341c21151"},
{file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dbc053ac75ccc63dc3a3cc547b98c7258ec35a215a92bd9f983e0aac95d3d5b"}, {file = "aiohttp-3.10.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:222821c60b8f6a64c5908cb43d69c0ee978a1188f6a8433d4757d39231b42cdb"},
{file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4109adee842b90671f1b689901b948f347325045c15f46b39797ae1bf17019de"}, {file = "aiohttp-3.10.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e7b55d9ede66af7feb6de87ff277e0ccf6d51c7db74cc39337fe3a0e31b5872d"},
{file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6ea1a5b409a85477fd8e5ee6ad8f0e40bf2844c270955e09360418cfd09abac"}, {file = "aiohttp-3.10.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5a95151a5567b3b00368e99e9c5334a919514f60888a6b6d2054fea5e66e527e"},
{file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3c2890ca8c59ee683fd09adf32321a40fe1cf164e3387799efb2acebf090c11"}, {file = "aiohttp-3.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e9e9171d2fe6bfd9d3838a6fe63b1e91b55e0bf726c16edf265536e4eafed19"},
{file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3916c8692dbd9d55c523374a3b8213e628424d19116ac4308e434dbf6d95bbdd"}, {file = "aiohttp-3.10.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a57e73f9523e980f6101dc9a83adcd7ac0006ea8bf7937ca3870391c7bb4f8ff"},
{file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8d1964eb7617907c792ca00b341b5ec3e01ae8c280825deadbbd678447b127e1"}, {file = "aiohttp-3.10.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:0df51a3d70a2bfbb9c921619f68d6d02591f24f10e9c76de6f3388c89ed01de6"},
{file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:d5ab8e1f6bee051a4bf6195e38a5c13e5e161cb7bad83d8854524798bd9fcd6e"}, {file = "aiohttp-3.10.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:b0de63ff0307eac3961b4af74382d30220d4813f36b7aaaf57f063a1243b4214"},
{file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:52c27110f3862a1afbcb2af4281fc9fdc40327fa286c4625dfee247c3ba90156"}, {file = "aiohttp-3.10.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:8db9b749f589b5af8e4993623dbda6716b2b7a5fcb0fa2277bf3ce4b278c7059"},
{file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:7f64cbd44443e80094309875d4f9c71d0401e966d191c3d469cde4642bc2e031"}, {file = "aiohttp-3.10.1-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:6b14c19172eb53b63931d3e62a9749d6519f7c121149493e6eefca055fcdb352"},
{file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8b4f72fbb66279624bfe83fd5eb6aea0022dad8eec62b71e7bf63ee1caadeafe"}, {file = "aiohttp-3.10.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:5cd57ad998e3038aa87c38fe85c99ed728001bf5dde8eca121cadee06ee3f637"},
{file = "aiohttp-3.9.5-cp38-cp38-win32.whl", hash = "sha256:6380c039ec52866c06d69b5c7aad5478b24ed11696f0e72f6b807cfb261453da"}, {file = "aiohttp-3.10.1-cp38-cp38-win32.whl", hash = "sha256:df31641e3f02b77eb3c5fb63c0508bee0fc067cf153da0e002ebbb0db0b6d91a"},
{file = "aiohttp-3.9.5-cp38-cp38-win_amd64.whl", hash = "sha256:da22dab31d7180f8c3ac7c7635f3bcd53808f374f6aa333fe0b0b9e14b01f91a"}, {file = "aiohttp-3.10.1-cp38-cp38-win_amd64.whl", hash = "sha256:93094eba50bc2ad4c40ff4997ead1fdcd41536116f2e7d6cfec9596a8ecb3615"},
{file = "aiohttp-3.9.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:1732102949ff6087589408d76cd6dea656b93c896b011ecafff418c9661dc4ed"}, {file = "aiohttp-3.10.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:440954ddc6b77257e67170d57b1026aa9545275c33312357472504eef7b4cc0b"},
{file = "aiohttp-3.9.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c6021d296318cb6f9414b48e6a439a7f5d1f665464da507e8ff640848ee2a58a"}, {file = "aiohttp-3.10.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f9f8beed277488a52ee2b459b23c4135e54d6a819eaba2e120e57311015b58e9"},
{file = "aiohttp-3.9.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:239f975589a944eeb1bad26b8b140a59a3a320067fb3cd10b75c3092405a1372"}, {file = "aiohttp-3.10.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d8a8221a63602008550022aa3a4152ca357e1dde7ab3dd1da7e1925050b56863"},
{file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b7b30258348082826d274504fbc7c849959f1989d86c29bc355107accec6cfb"}, {file = "aiohttp-3.10.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a702bd3663b5cbf3916e84bf332400d24cdb18399f0877ca6b313ce6c08bfb43"},
{file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cd2adf5c87ff6d8b277814a28a535b59e20bfea40a101db6b3bdca7e9926bc24"}, {file = "aiohttp-3.10.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1988b370536eb14f0ce7f3a4a5b422ab64c4e255b3f5d7752c5f583dc8c967fc"},
{file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9a3d838441bebcf5cf442700e3963f58b5c33f015341f9ea86dcd7d503c07e2"}, {file = "aiohttp-3.10.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7ccf1f0a304352c891d124ac1a9dea59b14b2abed1704aaa7689fc90ef9c5be1"},
{file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e3a1ae66e3d0c17cf65c08968a5ee3180c5a95920ec2731f53343fac9bad106"}, {file = "aiohttp-3.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc3ea6ef2a83edad84bbdb5d96e22f587b67c68922cd7b6f9d8f24865e655bcf"},
{file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9c69e77370cce2d6df5d12b4e12bdcca60c47ba13d1cbbc8645dd005a20b738b"}, {file = "aiohttp-3.10.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:89b47c125ab07f0831803b88aeb12b04c564d5f07a1c1a225d4eb4d2f26e8b5e"},
{file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0cbf56238f4bbf49dab8c2dc2e6b1b68502b1e88d335bea59b3f5b9f4c001475"}, {file = "aiohttp-3.10.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:21778552ef3d44aac3278cc6f6d13a6423504fa5f09f2df34bfe489ed9ded7f5"},
{file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d1469f228cd9ffddd396d9948b8c9cd8022b6d1bf1e40c6f25b0fb90b4f893ed"}, {file = "aiohttp-3.10.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:bde0693073fd5e542e46ea100aa6c1a5d36282dbdbad85b1c3365d5421490a92"},
{file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:45731330e754f5811c314901cebdf19dd776a44b31927fa4b4dbecab9e457b0c"}, {file = "aiohttp-3.10.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:bf66149bb348d8e713f3a8e0b4f5b952094c2948c408e1cfef03b49e86745d60"},
{file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:3fcb4046d2904378e3aeea1df51f697b0467f2aac55d232c87ba162709478c46"}, {file = "aiohttp-3.10.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:587237571a85716d6f71f60d103416c9df7d5acb55d96d3d3ced65f39bff9c0c"},
{file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8cf142aa6c1a751fcb364158fd710b8a9be874b81889c2bd13aa8893197455e2"}, {file = "aiohttp-3.10.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:bfe33cba6e127d0b5b417623c9aa621f0a69f304742acdca929a9fdab4593693"},
{file = "aiohttp-3.9.5-cp39-cp39-win32.whl", hash = "sha256:7b179eea70833c8dee51ec42f3b4097bd6370892fa93f510f76762105568cf09"}, {file = "aiohttp-3.10.1-cp39-cp39-win32.whl", hash = "sha256:9fbff00646cf8211b330690eb2fd64b23e1ce5b63a342436c1d1d6951d53d8dd"},
{file = "aiohttp-3.9.5-cp39-cp39-win_amd64.whl", hash = "sha256:38d80498e2e169bc61418ff36170e0aad0cd268da8b38a17c4cf29d254a8b3f1"}, {file = "aiohttp-3.10.1-cp39-cp39-win_amd64.whl", hash = "sha256:5951c328f9ac42d7bce7a6ded535879bc9ae13032818d036749631fa27777905"},
{file = "aiohttp-3.9.5.tar.gz", hash = "sha256:edea7d15772ceeb29db4aff55e482d4bcfb6ae160ce144f2682de02f6d693551"}, {file = "aiohttp-3.10.1.tar.gz", hash = "sha256:8b0d058e4e425d3b45e8ec70d49b402f4d6b21041e674798b1f91ba027c73f28"},
] ]
[package.dependencies] [package.dependencies]
aiohappyeyeballs = ">=2.3.0"
aiosignal = ">=1.1.2" aiosignal = ">=1.1.2"
async-timeout = {version = ">=4.0,<5.0", markers = "python_version < \"3.11\""} async-timeout = {version = ">=4.0,<5.0", markers = "python_version < \"3.11\""}
attrs = ">=17.3.0" attrs = ">=17.3.0"
@ -94,7 +106,7 @@ multidict = ">=4.5,<7.0"
yarl = ">=1.0,<2.0" yarl = ">=1.0,<2.0"
[package.extras] [package.extras]
speedups = ["Brotli", "aiodns", "brotlicffi"] speedups = ["Brotli", "aiodns (>=3.2.0)", "brotlicffi"]
[[package]] [[package]]
name = "aiosignal" name = "aiosignal"
@ -124,6 +136,28 @@ files = [
[package.dependencies] [package.dependencies]
typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.9\""} typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.9\""}
[[package]]
name = "anyio"
version = "4.4.0"
description = "High level compatibility layer for multiple asynchronous event loop implementations"
optional = false
python-versions = ">=3.8"
files = [
{file = "anyio-4.4.0-py3-none-any.whl", hash = "sha256:c1b2d8f46a8a812513012e1107cb0e68c17159a7a594208005a57dc776e1bdc7"},
{file = "anyio-4.4.0.tar.gz", hash = "sha256:5aadc6a1bbb7cdb0bede386cac5e2940f5e2ff3aa20277e991cf028e0585ce94"},
]
[package.dependencies]
exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
idna = ">=2.8"
sniffio = ">=1.1"
typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""}
[package.extras]
doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"]
trio = ["trio (>=0.23)"]
[[package]] [[package]]
name = "async-timeout" name = "async-timeout"
version = "4.0.3" version = "4.0.3"
@ -137,22 +171,22 @@ files = [
[[package]] [[package]]
name = "attrs" name = "attrs"
version = "23.2.0" version = "24.1.0"
description = "Classes Without Boilerplate" description = "Classes Without Boilerplate"
optional = false optional = false
python-versions = ">=3.7" python-versions = ">=3.7"
files = [ files = [
{file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"}, {file = "attrs-24.1.0-py3-none-any.whl", hash = "sha256:377b47448cb61fea38533f671fba0d0f8a96fd58facd4dc518e3dac9dbea0905"},
{file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"}, {file = "attrs-24.1.0.tar.gz", hash = "sha256:adbdec84af72d38be7628e353a09b6a6790d15cd71819f6e9d7b0faa8a125745"},
] ]
[package.extras] [package.extras]
cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
dev = ["attrs[tests]", "pre-commit"] cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
tests = ["attrs[tests-no-zope]", "zope-interface"] docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"]
tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"] tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"]
[[package]] [[package]]
name = "certifi" name = "certifi"
@ -292,6 +326,17 @@ files = [
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
] ]
[[package]]
name = "distro"
version = "1.9.0"
description = "Distro - an OS platform information API"
optional = false
python-versions = ">=3.6"
files = [
{file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
{file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
]
[[package]] [[package]]
name = "dnspython" name = "dnspython"
version = "2.6.1" version = "2.6.1"
@ -497,6 +542,62 @@ files = [
docs = ["Sphinx", "furo"] docs = ["Sphinx", "furo"]
test = ["objgraph", "psutil"] test = ["objgraph", "psutil"]
[[package]]
name = "h11"
version = "0.14.0"
description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
optional = false
python-versions = ">=3.7"
files = [
{file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
{file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
]
[[package]]
name = "httpcore"
version = "1.0.5"
description = "A minimal low-level HTTP client."
optional = false
python-versions = ">=3.8"
files = [
{file = "httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5"},
{file = "httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61"},
]
[package.dependencies]
certifi = "*"
h11 = ">=0.13,<0.15"
[package.extras]
asyncio = ["anyio (>=4.0,<5.0)"]
http2 = ["h2 (>=3,<5)"]
socks = ["socksio (==1.*)"]
trio = ["trio (>=0.22.0,<0.26.0)"]
[[package]]
name = "httpx"
version = "0.27.0"
description = "The next generation HTTP client."
optional = false
python-versions = ">=3.8"
files = [
{file = "httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5"},
{file = "httpx-0.27.0.tar.gz", hash = "sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5"},
]
[package.dependencies]
anyio = "*"
certifi = "*"
httpcore = "==1.*"
idna = "*"
sniffio = "*"
[package.extras]
brotli = ["brotli", "brotlicffi"]
cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
http2 = ["h2 (>=3,<5)"]
socks = ["socksio (==1.*)"]
[[package]] [[package]]
name = "idna" name = "idna"
version = "3.7" version = "3.7"
@ -546,7 +647,7 @@ files = [
[[package]] [[package]]
name = "langchain" name = "langchain"
version = "0.2.9" version = "0.2.12"
description = "Building applications with LLMs through composability" description = "Building applications with LLMs through composability"
optional = false optional = false
python-versions = ">=3.8.1,<4.0" python-versions = ">=3.8.1,<4.0"
@ -556,7 +657,7 @@ develop = true
[package.dependencies] [package.dependencies]
aiohttp = "^3.8.3" aiohttp = "^3.8.3"
async-timeout = {version = "^4.0.0", markers = "python_version < \"3.11\""} async-timeout = {version = "^4.0.0", markers = "python_version < \"3.11\""}
langchain-core = "^0.2.20" langchain-core = "^0.2.27"
langchain-text-splitters = "^0.2.0" langchain-text-splitters = "^0.2.0"
langsmith = "^0.1.17" langsmith = "^0.1.17"
numpy = [ numpy = [
@ -575,7 +676,7 @@ url = "../../langchain"
[[package]] [[package]]
name = "langchain-core" name = "langchain-core"
version = "0.2.21" version = "0.2.28"
description = "Building applications with LLMs through composability" description = "Building applications with LLMs through composability"
optional = false optional = false
python-versions = ">=3.8.1,<4.0" python-versions = ">=3.8.1,<4.0"
@ -592,14 +693,33 @@ pydantic = [
] ]
PyYAML = ">=5.3" PyYAML = ">=5.3"
tenacity = "^8.1.0,!=8.4.0" tenacity = "^8.1.0,!=8.4.0"
typing-extensions = ">=4.7"
[package.source] [package.source]
type = "directory" type = "directory"
url = "../../core" url = "../../core"
[[package]]
name = "langchain-openai"
version = "0.1.20"
description = "An integration package connecting OpenAI and LangChain"
optional = false
python-versions = ">=3.8.1,<4.0"
files = []
develop = true
[package.dependencies]
langchain-core = "^0.2.26"
openai = "^1.32.0"
tiktoken = ">=0.7,<1"
[package.source]
type = "directory"
url = "../openai"
[[package]] [[package]]
name = "langchain-text-splitters" name = "langchain-text-splitters"
version = "0.2.2" version = "0.2.3"
description = "LangChain text splitting utilities" description = "LangChain text splitting utilities"
optional = false optional = false
python-versions = ">=3.8.1,<4.0" python-versions = ">=3.8.1,<4.0"
@ -615,13 +735,13 @@ url = "../../text-splitters"
[[package]] [[package]]
name = "langsmith" name = "langsmith"
version = "0.1.90" version = "0.1.96"
description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
optional = false optional = false
python-versions = "<4.0,>=3.8.1" python-versions = "<4.0,>=3.8.1"
files = [ files = [
{file = "langsmith-0.1.90-py3-none-any.whl", hash = "sha256:40b43d908ae0ff061a7ef6f4e7eabf2624db54359210b6bc784ddfa32484e524"}, {file = "langsmith-0.1.96-py3-none-any.whl", hash = "sha256:1e8285c3f84cffebc761ff5624647de20686dbbf659f5d1135918261f85bad13"},
{file = "langsmith-0.1.90.tar.gz", hash = "sha256:f7032acf3736a3db52698126b60da0d909aba20b20ad8a70b678e64bf878c17d"}, {file = "langsmith-0.1.96.tar.gz", hash = "sha256:01b7fa7d538b6409ee74bff458cc3dcdc1799fc70d329f79eb26ba54c32991ae"},
] ]
[package.dependencies] [package.dependencies]
@ -733,44 +853,44 @@ files = [
[[package]] [[package]]
name = "mypy" name = "mypy"
version = "1.10.1" version = "1.11.1"
description = "Optional static typing for Python" description = "Optional static typing for Python"
optional = false optional = false
python-versions = ">=3.8" python-versions = ">=3.8"
files = [ files = [
{file = "mypy-1.10.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e36f229acfe250dc660790840916eb49726c928e8ce10fbdf90715090fe4ae02"}, {file = "mypy-1.11.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a32fc80b63de4b5b3e65f4be82b4cfa362a46702672aa6a0f443b4689af7008c"},
{file = "mypy-1.10.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:51a46974340baaa4145363b9e051812a2446cf583dfaeba124af966fa44593f7"}, {file = "mypy-1.11.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c1952f5ea8a5a959b05ed5f16452fddadbaae48b5d39235ab4c3fc444d5fd411"},
{file = "mypy-1.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:901c89c2d67bba57aaaca91ccdb659aa3a312de67f23b9dfb059727cce2e2e0a"}, {file = "mypy-1.11.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e1e30dc3bfa4e157e53c1d17a0dad20f89dc433393e7702b813c10e200843b03"},
{file = "mypy-1.10.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0cd62192a4a32b77ceb31272d9e74d23cd88c8060c34d1d3622db3267679a5d9"}, {file = "mypy-1.11.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2c63350af88f43a66d3dfeeeb8d77af34a4f07d760b9eb3a8697f0386c7590b4"},
{file = "mypy-1.10.1-cp310-cp310-win_amd64.whl", hash = "sha256:a2cbc68cb9e943ac0814c13e2452d2046c2f2b23ff0278e26599224cf164e78d"}, {file = "mypy-1.11.1-cp310-cp310-win_amd64.whl", hash = "sha256:a831671bad47186603872a3abc19634f3011d7f83b083762c942442d51c58d58"},
{file = "mypy-1.10.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bd6f629b67bb43dc0d9211ee98b96d8dabc97b1ad38b9b25f5e4c4d7569a0c6a"}, {file = "mypy-1.11.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7b6343d338390bb946d449677726edf60102a1c96079b4f002dedff375953fc5"},
{file = "mypy-1.10.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a1bbb3a6f5ff319d2b9d40b4080d46cd639abe3516d5a62c070cf0114a457d84"}, {file = "mypy-1.11.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e4fe9f4e5e521b458d8feb52547f4bade7ef8c93238dfb5bbc790d9ff2d770ca"},
{file = "mypy-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8edd4e9bbbc9d7b79502eb9592cab808585516ae1bcc1446eb9122656c6066f"}, {file = "mypy-1.11.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:886c9dbecc87b9516eff294541bf7f3655722bf22bb898ee06985cd7269898de"},
{file = "mypy-1.10.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6166a88b15f1759f94a46fa474c7b1b05d134b1b61fca627dd7335454cc9aa6b"}, {file = "mypy-1.11.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fca4a60e1dd9fd0193ae0067eaeeb962f2d79e0d9f0f66223a0682f26ffcc809"},
{file = "mypy-1.10.1-cp311-cp311-win_amd64.whl", hash = "sha256:5bb9cd11c01c8606a9d0b83ffa91d0b236a0e91bc4126d9ba9ce62906ada868e"}, {file = "mypy-1.11.1-cp311-cp311-win_amd64.whl", hash = "sha256:0bd53faf56de9643336aeea1c925012837432b5faf1701ccca7fde70166ccf72"},
{file = "mypy-1.10.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d8681909f7b44d0b7b86e653ca152d6dff0eb5eb41694e163c6092124f8246d7"}, {file = "mypy-1.11.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f39918a50f74dc5969807dcfaecafa804fa7f90c9d60506835036cc1bc891dc8"},
{file = "mypy-1.10.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:378c03f53f10bbdd55ca94e46ec3ba255279706a6aacaecac52ad248f98205d3"}, {file = "mypy-1.11.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0bc71d1fb27a428139dd78621953effe0d208aed9857cb08d002280b0422003a"},
{file = "mypy-1.10.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bacf8f3a3d7d849f40ca6caea5c055122efe70e81480c8328ad29c55c69e93e"}, {file = "mypy-1.11.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b868d3bcff720dd7217c383474008ddabaf048fad8d78ed948bb4b624870a417"},
{file = "mypy-1.10.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:701b5f71413f1e9855566a34d6e9d12624e9e0a8818a5704d74d6b0402e66c04"}, {file = "mypy-1.11.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a707ec1527ffcdd1c784d0924bf5cb15cd7f22683b919668a04d2b9c34549d2e"},
{file = "mypy-1.10.1-cp312-cp312-win_amd64.whl", hash = "sha256:3c4c2992f6ea46ff7fce0072642cfb62af7a2484efe69017ed8b095f7b39ef31"}, {file = "mypy-1.11.1-cp312-cp312-win_amd64.whl", hash = "sha256:64f4a90e3ea07f590c5bcf9029035cf0efeae5ba8be511a8caada1a4893f5525"},
{file = "mypy-1.10.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:604282c886497645ffb87b8f35a57ec773a4a2721161e709a4422c1636ddde5c"}, {file = "mypy-1.11.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:749fd3213916f1751fff995fccf20c6195cae941dc968f3aaadf9bb4e430e5a2"},
{file = "mypy-1.10.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37fd87cab83f09842653f08de066ee68f1182b9b5282e4634cdb4b407266bade"}, {file = "mypy-1.11.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b639dce63a0b19085213ec5fdd8cffd1d81988f47a2dec7100e93564f3e8fb3b"},
{file = "mypy-1.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8addf6313777dbb92e9564c5d32ec122bf2c6c39d683ea64de6a1fd98b90fe37"}, {file = "mypy-1.11.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c956b49c5d865394d62941b109728c5c596a415e9c5b2be663dd26a1ff07bc0"},
{file = "mypy-1.10.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5cc3ca0a244eb9a5249c7c583ad9a7e881aa5d7b73c35652296ddcdb33b2b9c7"}, {file = "mypy-1.11.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:45df906e8b6804ef4b666af29a87ad9f5921aad091c79cc38e12198e220beabd"},
{file = "mypy-1.10.1-cp38-cp38-win_amd64.whl", hash = "sha256:1b3a2ffce52cc4dbaeee4df762f20a2905aa171ef157b82192f2e2f368eec05d"}, {file = "mypy-1.11.1-cp38-cp38-win_amd64.whl", hash = "sha256:d44be7551689d9d47b7abc27c71257adfdb53f03880841a5db15ddb22dc63edb"},
{file = "mypy-1.10.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fe85ed6836165d52ae8b88f99527d3d1b2362e0cb90b005409b8bed90e9059b3"}, {file = "mypy-1.11.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2684d3f693073ab89d76da8e3921883019ea8a3ec20fa5d8ecca6a2db4c54bbe"},
{file = "mypy-1.10.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c2ae450d60d7d020d67ab440c6e3fae375809988119817214440033f26ddf7bf"}, {file = "mypy-1.11.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:79c07eb282cb457473add5052b63925e5cc97dfab9812ee65a7c7ab5e3cb551c"},
{file = "mypy-1.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6be84c06e6abd72f960ba9a71561c14137a583093ffcf9bbfaf5e613d63fa531"}, {file = "mypy-1.11.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11965c2f571ded6239977b14deebd3f4c3abd9a92398712d6da3a772974fad69"},
{file = "mypy-1.10.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2189ff1e39db399f08205e22a797383613ce1cb0cb3b13d8bcf0170e45b96cc3"}, {file = "mypy-1.11.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a2b43895a0f8154df6519706d9bca8280cda52d3d9d1514b2d9c3e26792a0b74"},
{file = "mypy-1.10.1-cp39-cp39-win_amd64.whl", hash = "sha256:97a131ee36ac37ce9581f4220311247ab6cba896b4395b9c87af0675a13a755f"}, {file = "mypy-1.11.1-cp39-cp39-win_amd64.whl", hash = "sha256:1a81cf05975fd61aec5ae16501a091cfb9f605dc3e3c878c0da32f250b74760b"},
{file = "mypy-1.10.1-py3-none-any.whl", hash = "sha256:71d8ac0b906354ebda8ef1673e5fde785936ac1f29ff6987c7483cfbd5a4235a"}, {file = "mypy-1.11.1-py3-none-any.whl", hash = "sha256:0624bdb940255d2dd24e829d99a13cfeb72e4e9031f9492148f410ed30bcab54"},
{file = "mypy-1.10.1.tar.gz", hash = "sha256:1f8f492d7db9e3593ef42d4f115f04e556130f2819ad33ab84551403e97dd4c0"}, {file = "mypy-1.11.1.tar.gz", hash = "sha256:f404a0b069709f18bbdb702eb3dcfe51910602995de00bd39cea3050b5772d08"},
] ]
[package.dependencies] [package.dependencies]
mypy-extensions = ">=1.0.0" mypy-extensions = ">=1.0.0"
tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
typing-extensions = ">=4.1.0" typing-extensions = ">=4.6.0"
[package.extras] [package.extras]
dmypy = ["psutil (>=4.0)"] dmypy = ["psutil (>=4.0)"]
@ -871,6 +991,29 @@ files = [
{file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"}, {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"},
] ]
[[package]]
name = "openai"
version = "1.38.0"
description = "The official Python library for the openai API"
optional = false
python-versions = ">=3.7.1"
files = [
{file = "openai-1.38.0-py3-none-any.whl", hash = "sha256:a19ef052f1676320f52183ae6f9775da6d888fbe3aec57886117163c095d9f7c"},
{file = "openai-1.38.0.tar.gz", hash = "sha256:30fb324bf452ecb1194ca7dbc64566a4d7aa054c6a5da857937ede7d517a220b"},
]
[package.dependencies]
anyio = ">=3.5.0,<5"
distro = ">=1.7.0,<2"
httpx = ">=0.23.0,<1"
pydantic = ">=1.9.0,<3"
sniffio = "*"
tqdm = ">4"
typing-extensions = ">=4.7,<5"
[package.extras]
datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
[[package]] [[package]]
name = "orjson" name = "orjson"
version = "3.10.6" version = "3.10.6"
@ -1298,6 +1441,94 @@ files = [
{file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
] ]
[[package]]
name = "regex"
version = "2024.7.24"
description = "Alternative regular expression module, to replace re."
optional = false
python-versions = ">=3.8"
files = [
{file = "regex-2024.7.24-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:228b0d3f567fafa0633aee87f08b9276c7062da9616931382993c03808bb68ce"},
{file = "regex-2024.7.24-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3426de3b91d1bc73249042742f45c2148803c111d1175b283270177fdf669024"},
{file = "regex-2024.7.24-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f273674b445bcb6e4409bf8d1be67bc4b58e8b46fd0d560055d515b8830063cd"},
{file = "regex-2024.7.24-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23acc72f0f4e1a9e6e9843d6328177ae3074b4182167e34119ec7233dfeccf53"},
{file = "regex-2024.7.24-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65fd3d2e228cae024c411c5ccdffae4c315271eee4a8b839291f84f796b34eca"},
{file = "regex-2024.7.24-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c414cbda77dbf13c3bc88b073a1a9f375c7b0cb5e115e15d4b73ec3a2fbc6f59"},
{file = "regex-2024.7.24-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf7a89eef64b5455835f5ed30254ec19bf41f7541cd94f266ab7cbd463f00c41"},
{file = "regex-2024.7.24-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:19c65b00d42804e3fbea9708f0937d157e53429a39b7c61253ff15670ff62cb5"},
{file = "regex-2024.7.24-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7a5486ca56c8869070a966321d5ab416ff0f83f30e0e2da1ab48815c8d165d46"},
{file = "regex-2024.7.24-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:6f51f9556785e5a203713f5efd9c085b4a45aecd2a42573e2b5041881b588d1f"},
{file = "regex-2024.7.24-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:a4997716674d36a82eab3e86f8fa77080a5d8d96a389a61ea1d0e3a94a582cf7"},
{file = "regex-2024.7.24-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:c0abb5e4e8ce71a61d9446040c1e86d4e6d23f9097275c5bd49ed978755ff0fe"},
{file = "regex-2024.7.24-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:18300a1d78cf1290fa583cd8b7cde26ecb73e9f5916690cf9d42de569c89b1ce"},
{file = "regex-2024.7.24-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:416c0e4f56308f34cdb18c3f59849479dde5b19febdcd6e6fa4d04b6c31c9faa"},
{file = "regex-2024.7.24-cp310-cp310-win32.whl", hash = "sha256:fb168b5924bef397b5ba13aabd8cf5df7d3d93f10218d7b925e360d436863f66"},
{file = "regex-2024.7.24-cp310-cp310-win_amd64.whl", hash = "sha256:6b9fc7e9cc983e75e2518496ba1afc524227c163e43d706688a6bb9eca41617e"},
{file = "regex-2024.7.24-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:382281306e3adaaa7b8b9ebbb3ffb43358a7bbf585fa93821300a418bb975281"},
{file = "regex-2024.7.24-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4fdd1384619f406ad9037fe6b6eaa3de2749e2e12084abc80169e8e075377d3b"},
{file = "regex-2024.7.24-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3d974d24edb231446f708c455fd08f94c41c1ff4f04bcf06e5f36df5ef50b95a"},
{file = "regex-2024.7.24-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a2ec4419a3fe6cf8a4795752596dfe0adb4aea40d3683a132bae9c30b81e8d73"},
{file = "regex-2024.7.24-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eb563dd3aea54c797adf513eeec819c4213d7dbfc311874eb4fd28d10f2ff0f2"},
{file = "regex-2024.7.24-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:45104baae8b9f67569f0f1dca5e1f1ed77a54ae1cd8b0b07aba89272710db61e"},
{file = "regex-2024.7.24-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:994448ee01864501912abf2bad9203bffc34158e80fe8bfb5b031f4f8e16da51"},
{file = "regex-2024.7.24-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3fac296f99283ac232d8125be932c5cd7644084a30748fda013028c815ba3364"},
{file = "regex-2024.7.24-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7e37e809b9303ec3a179085415cb5f418ecf65ec98cdfe34f6a078b46ef823ee"},
{file = "regex-2024.7.24-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:01b689e887f612610c869421241e075c02f2e3d1ae93a037cb14f88ab6a8934c"},
{file = "regex-2024.7.24-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f6442f0f0ff81775eaa5b05af8a0ffa1dda36e9cf6ec1e0d3d245e8564b684ce"},
{file = "regex-2024.7.24-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:871e3ab2838fbcb4e0865a6e01233975df3a15e6fce93b6f99d75cacbd9862d1"},
{file = "regex-2024.7.24-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c918b7a1e26b4ab40409820ddccc5d49871a82329640f5005f73572d5eaa9b5e"},
{file = "regex-2024.7.24-cp311-cp311-win32.whl", hash = "sha256:2dfbb8baf8ba2c2b9aa2807f44ed272f0913eeeba002478c4577b8d29cde215c"},
{file = "regex-2024.7.24-cp311-cp311-win_amd64.whl", hash = "sha256:538d30cd96ed7d1416d3956f94d54e426a8daf7c14527f6e0d6d425fcb4cca52"},
{file = "regex-2024.7.24-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:fe4ebef608553aff8deb845c7f4f1d0740ff76fa672c011cc0bacb2a00fbde86"},
{file = "regex-2024.7.24-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:74007a5b25b7a678459f06559504f1eec2f0f17bca218c9d56f6a0a12bfffdad"},
{file = "regex-2024.7.24-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7df9ea48641da022c2a3c9c641650cd09f0cd15e8908bf931ad538f5ca7919c9"},
{file = "regex-2024.7.24-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a1141a1dcc32904c47f6846b040275c6e5de0bf73f17d7a409035d55b76f289"},
{file = "regex-2024.7.24-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80c811cfcb5c331237d9bad3bea2c391114588cf4131707e84d9493064d267f9"},
{file = "regex-2024.7.24-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7214477bf9bd195894cf24005b1e7b496f46833337b5dedb7b2a6e33f66d962c"},
{file = "regex-2024.7.24-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d55588cba7553f0b6ec33130bc3e114b355570b45785cebdc9daed8c637dd440"},
{file = "regex-2024.7.24-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:558a57cfc32adcf19d3f791f62b5ff564922942e389e3cfdb538a23d65a6b610"},
{file = "regex-2024.7.24-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a512eed9dfd4117110b1881ba9a59b31433caed0c4101b361f768e7bcbaf93c5"},
{file = "regex-2024.7.24-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:86b17ba823ea76256b1885652e3a141a99a5c4422f4a869189db328321b73799"},
{file = "regex-2024.7.24-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5eefee9bfe23f6df09ffb6dfb23809f4d74a78acef004aa904dc7c88b9944b05"},
{file = "regex-2024.7.24-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:731fcd76bbdbf225e2eb85b7c38da9633ad3073822f5ab32379381e8c3c12e94"},
{file = "regex-2024.7.24-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:eaef80eac3b4cfbdd6de53c6e108b4c534c21ae055d1dbea2de6b3b8ff3def38"},
{file = "regex-2024.7.24-cp312-cp312-win32.whl", hash = "sha256:185e029368d6f89f36e526764cf12bf8d6f0e3a2a7737da625a76f594bdfcbfc"},
{file = "regex-2024.7.24-cp312-cp312-win_amd64.whl", hash = "sha256:2f1baff13cc2521bea83ab2528e7a80cbe0ebb2c6f0bfad15be7da3aed443908"},
{file = "regex-2024.7.24-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:66b4c0731a5c81921e938dcf1a88e978264e26e6ac4ec96a4d21ae0354581ae0"},
{file = "regex-2024.7.24-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:88ecc3afd7e776967fa16c80f974cb79399ee8dc6c96423321d6f7d4b881c92b"},
{file = "regex-2024.7.24-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:64bd50cf16bcc54b274e20235bf8edbb64184a30e1e53873ff8d444e7ac656b2"},
{file = "regex-2024.7.24-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb462f0e346fcf41a901a126b50f8781e9a474d3927930f3490f38a6e73b6950"},
{file = "regex-2024.7.24-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a82465ebbc9b1c5c50738536fdfa7cab639a261a99b469c9d4c7dcbb2b3f1e57"},
{file = "regex-2024.7.24-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:68a8f8c046c6466ac61a36b65bb2395c74451df2ffb8458492ef49900efed293"},
{file = "regex-2024.7.24-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac8e84fff5d27420f3c1e879ce9929108e873667ec87e0c8eeb413a5311adfe"},
{file = "regex-2024.7.24-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba2537ef2163db9e6ccdbeb6f6424282ae4dea43177402152c67ef869cf3978b"},
{file = "regex-2024.7.24-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:43affe33137fcd679bdae93fb25924979517e011f9dea99163f80b82eadc7e53"},
{file = "regex-2024.7.24-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:c9bb87fdf2ab2370f21e4d5636e5317775e5d51ff32ebff2cf389f71b9b13750"},
{file = "regex-2024.7.24-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:945352286a541406f99b2655c973852da7911b3f4264e010218bbc1cc73168f2"},
{file = "regex-2024.7.24-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:8bc593dcce679206b60a538c302d03c29b18e3d862609317cb560e18b66d10cf"},
{file = "regex-2024.7.24-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:3f3b6ca8eae6d6c75a6cff525c8530c60e909a71a15e1b731723233331de4169"},
{file = "regex-2024.7.24-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c51edc3541e11fbe83f0c4d9412ef6c79f664a3745fab261457e84465ec9d5a8"},
{file = "regex-2024.7.24-cp38-cp38-win32.whl", hash = "sha256:d0a07763776188b4db4c9c7fb1b8c494049f84659bb387b71c73bbc07f189e96"},
{file = "regex-2024.7.24-cp38-cp38-win_amd64.whl", hash = "sha256:8fd5afd101dcf86a270d254364e0e8dddedebe6bd1ab9d5f732f274fa00499a5"},
{file = "regex-2024.7.24-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0ffe3f9d430cd37d8fa5632ff6fb36d5b24818c5c986893063b4e5bdb84cdf24"},
{file = "regex-2024.7.24-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:25419b70ba00a16abc90ee5fce061228206173231f004437730b67ac77323f0d"},
{file = "regex-2024.7.24-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:33e2614a7ce627f0cdf2ad104797d1f68342d967de3695678c0cb84f530709f8"},
{file = "regex-2024.7.24-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d33a0021893ede5969876052796165bab6006559ab845fd7b515a30abdd990dc"},
{file = "regex-2024.7.24-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:04ce29e2c5fedf296b1a1b0acc1724ba93a36fb14031f3abfb7abda2806c1535"},
{file = "regex-2024.7.24-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b16582783f44fbca6fcf46f61347340c787d7530d88b4d590a397a47583f31dd"},
{file = "regex-2024.7.24-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:836d3cc225b3e8a943d0b02633fb2f28a66e281290302a79df0e1eaa984ff7c1"},
{file = "regex-2024.7.24-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:438d9f0f4bc64e8dea78274caa5af971ceff0f8771e1a2333620969936ba10be"},
{file = "regex-2024.7.24-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:973335b1624859cb0e52f96062a28aa18f3a5fc77a96e4a3d6d76e29811a0e6e"},
{file = "regex-2024.7.24-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:c5e69fd3eb0b409432b537fe3c6f44ac089c458ab6b78dcec14478422879ec5f"},
{file = "regex-2024.7.24-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:fbf8c2f00904eaf63ff37718eb13acf8e178cb940520e47b2f05027f5bb34ce3"},
{file = "regex-2024.7.24-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ae2757ace61bc4061b69af19e4689fa4416e1a04840f33b441034202b5cd02d4"},
{file = "regex-2024.7.24-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:44fc61b99035fd9b3b9453f1713234e5a7c92a04f3577252b45feefe1b327759"},
{file = "regex-2024.7.24-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:84c312cdf839e8b579f504afcd7b65f35d60b6285d892b19adea16355e8343c9"},
{file = "regex-2024.7.24-cp39-cp39-win32.whl", hash = "sha256:ca5b2028c2f7af4e13fb9fc29b28d0ce767c38c7facdf64f6c2cd040413055f1"},
{file = "regex-2024.7.24-cp39-cp39-win_amd64.whl", hash = "sha256:7c479f5ae937ec9985ecaf42e2e10631551d909f203e31308c12d703922742f9"},
{file = "regex-2024.7.24.tar.gz", hash = "sha256:9cfd009eed1a46b27c14039ad5bbc5e71b6367c5b2e6d5f5da0ea91600817506"},
]
[[package]] [[package]]
name = "requests" name = "requests"
version = "2.32.3" version = "2.32.3"
@ -1321,29 +1552,29 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
[[package]] [[package]]
name = "ruff" name = "ruff"
version = "0.5.3" version = "0.5.6"
description = "An extremely fast Python linter and code formatter, written in Rust." description = "An extremely fast Python linter and code formatter, written in Rust."
optional = false optional = false
python-versions = ">=3.7" python-versions = ">=3.7"
files = [ files = [
{file = "ruff-0.5.3-py3-none-linux_armv6l.whl", hash = "sha256:b12424d9db7347fa63c5ed9af010003338c63c629fb9c9c6adb2aa4f5699729b"}, {file = "ruff-0.5.6-py3-none-linux_armv6l.whl", hash = "sha256:a0ef5930799a05522985b9cec8290b185952f3fcd86c1772c3bdbd732667fdcd"},
{file = "ruff-0.5.3-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b8d72c5684bbd4ed304a9a955ee2e67f57b35f6193222ade910cca8a805490e3"}, {file = "ruff-0.5.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b652dc14f6ef5d1552821e006f747802cc32d98d5509349e168f6bf0ee9f8f42"},
{file = "ruff-0.5.3-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d2fc2cdb85ccac1e816cc9d5d8cedefd93661bd957756d902543af32a6b04a71"}, {file = "ruff-0.5.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:80521b88d26a45e871f31e4b88938fd87db7011bb961d8afd2664982dfc3641a"},
{file = "ruff-0.5.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf4bc751240b2fab5d19254571bcacb315c7b0b00bf3c912d52226a82bbec073"}, {file = "ruff-0.5.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9bc8f328a9f1309ae80e4d392836e7dbc77303b38ed4a7112699e63d3b066ab"},
{file = "ruff-0.5.3-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bc697ec874fdd7c7ba0a85ec76ab38f8595224868d67f097c5ffc21136e72fcd"}, {file = "ruff-0.5.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4d394940f61f7720ad371ddedf14722ee1d6250fd8d020f5ea5a86e7be217daf"},
{file = "ruff-0.5.3-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e791d34d3557a3819b3704bc1f087293c821083fa206812842fa363f6018a192"}, {file = "ruff-0.5.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:111a99cdb02f69ddb2571e2756e017a1496c2c3a2aeefe7b988ddab38b416d36"},
{file = "ruff-0.5.3-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:76bb5a87fd397520b91a83eae8a2f7985236d42dd9459f09eef58e7f5c1d8316"}, {file = "ruff-0.5.6-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:e395daba77a79f6dc0d07311f94cc0560375ca20c06f354c7c99af3bf4560c5d"},
{file = "ruff-0.5.3-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a8cfc7a26422c78e94f1ec78ec02501bbad2df5834907e75afe474cc6b83a8c1"}, {file = "ruff-0.5.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c476acb43c3c51e3c614a2e878ee1589655fa02dab19fe2db0423a06d6a5b1b6"},
{file = "ruff-0.5.3-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:96066c4328a49fce2dd40e80f7117987369feec30ab771516cf95f1cc2db923c"}, {file = "ruff-0.5.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e2ff8003f5252fd68425fd53d27c1f08b201d7ed714bb31a55c9ac1d4c13e2eb"},
{file = "ruff-0.5.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03bfe9ab5bdc0b08470c3b261643ad54ea86edc32b64d1e080892d7953add3ad"}, {file = "ruff-0.5.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c94e084ba3eaa80c2172918c2ca2eb2230c3f15925f4ed8b6297260c6ef179ad"},
{file = "ruff-0.5.3-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:7704582a026fa02cca83efd76671a98ee6eb412c4230209efe5e2a006c06db62"}, {file = "ruff-0.5.6-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:1f77c1c3aa0669fb230b06fb24ffa3e879391a3ba3f15e3d633a752da5a3e670"},
{file = "ruff-0.5.3-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:08058d077e21b856d32ebf483443390e29dc44d927608dc8f092ff6776519da9"}, {file = "ruff-0.5.6-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:f908148c93c02873210a52cad75a6eda856b2cbb72250370ce3afef6fb99b1ed"},
{file = "ruff-0.5.3-py3-none-musllinux_1_2_i686.whl", hash = "sha256:77d49484429ed7c7e6e2e75a753f153b7b58f875bdb4158ad85af166a1ec1822"}, {file = "ruff-0.5.6-py3-none-musllinux_1_2_i686.whl", hash = "sha256:563a7ae61ad284187d3071d9041c08019975693ff655438d8d4be26e492760bd"},
{file = "ruff-0.5.3-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:642cbff6cbfa38d2566d8db086508d6f472edb136cbfcc4ea65997745368c29e"}, {file = "ruff-0.5.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:94fe60869bfbf0521e04fd62b74cbca21cbc5beb67cbb75ab33fe8c174f54414"},
{file = "ruff-0.5.3-py3-none-win32.whl", hash = "sha256:eafc45dd8bdc37a00b28e68cc038daf3ca8c233d73fea276dcd09defb1352841"}, {file = "ruff-0.5.6-py3-none-win32.whl", hash = "sha256:e6a584c1de6f8591c2570e171cc7ce482bb983d49c70ddf014393cd39e9dfaed"},
{file = "ruff-0.5.3-py3-none-win_amd64.whl", hash = "sha256:cbaec2ddf4f78e5e9ecf5456ea0f496991358a1d883862ed0b9e947e2b6aea93"}, {file = "ruff-0.5.6-py3-none-win_amd64.whl", hash = "sha256:d7fe7dccb1a89dc66785d7aa0ac283b2269712d8ed19c63af908fdccca5ccc1a"},
{file = "ruff-0.5.3-py3-none-win_arm64.whl", hash = "sha256:05fbd2cb404775d6cd7f2ff49504e2d20e13ef95fa203bd1ab22413af70d420b"}, {file = "ruff-0.5.6-py3-none-win_arm64.whl", hash = "sha256:57c6c0dd997b31b536bff49b9eee5ed3194d60605a4427f735eeb1f9c1b8d264"},
{file = "ruff-0.5.3.tar.gz", hash = "sha256:2a3eb4f1841771fa5b67a56be9c2d16fd3cc88e378bd86aaeaec2f7e6bcdd0a2"}, {file = "ruff-0.5.6.tar.gz", hash = "sha256:07c9e3c2a8e1fe377dd460371c3462671a728c981c3205a5217291422209f642"},
] ]
[[package]] [[package]]
@ -1357,6 +1588,17 @@ files = [
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
] ]
[[package]]
name = "sniffio"
version = "1.3.1"
description = "Sniff out which async library your code is running under"
optional = false
python-versions = ">=3.7"
files = [
{file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
]
[[package]] [[package]]
name = "sqlalchemy" name = "sqlalchemy"
version = "2.0.31" version = "2.0.31"
@ -1473,6 +1715,58 @@ files = [
doc = ["reno", "sphinx"] doc = ["reno", "sphinx"]
test = ["pytest", "tornado (>=4.5)", "typeguard"] test = ["pytest", "tornado (>=4.5)", "typeguard"]
[[package]]
name = "tiktoken"
version = "0.7.0"
description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models"
optional = false
python-versions = ">=3.8"
files = [
{file = "tiktoken-0.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:485f3cc6aba7c6b6ce388ba634fbba656d9ee27f766216f45146beb4ac18b25f"},
{file = "tiktoken-0.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e54be9a2cd2f6d6ffa3517b064983fb695c9a9d8aa7d574d1ef3c3f931a99225"},
{file = "tiktoken-0.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79383a6e2c654c6040e5f8506f3750db9ddd71b550c724e673203b4f6b4b4590"},
{file = "tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d4511c52caacf3c4981d1ae2df85908bd31853f33d30b345c8b6830763f769c"},
{file = "tiktoken-0.7.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:13c94efacdd3de9aff824a788353aa5749c0faee1fbe3816df365ea450b82311"},
{file = "tiktoken-0.7.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8e58c7eb29d2ab35a7a8929cbeea60216a4ccdf42efa8974d8e176d50c9a3df5"},
{file = "tiktoken-0.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:21a20c3bd1dd3e55b91c1331bf25f4af522c525e771691adbc9a69336fa7f702"},
{file = "tiktoken-0.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:10c7674f81e6e350fcbed7c09a65bca9356eaab27fb2dac65a1e440f2bcfe30f"},
{file = "tiktoken-0.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:084cec29713bc9d4189a937f8a35dbdfa785bd1235a34c1124fe2323821ee93f"},
{file = "tiktoken-0.7.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:811229fde1652fedcca7c6dfe76724d0908775b353556d8a71ed74d866f73f7b"},
{file = "tiktoken-0.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86b6e7dc2e7ad1b3757e8a24597415bafcfb454cebf9a33a01f2e6ba2e663992"},
{file = "tiktoken-0.7.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1063c5748be36344c7e18c7913c53e2cca116764c2080177e57d62c7ad4576d1"},
{file = "tiktoken-0.7.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:20295d21419bfcca092644f7e2f2138ff947a6eb8cfc732c09cc7d76988d4a89"},
{file = "tiktoken-0.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:959d993749b083acc57a317cbc643fb85c014d055b2119b739487288f4e5d1cb"},
{file = "tiktoken-0.7.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:71c55d066388c55a9c00f61d2c456a6086673ab7dec22dd739c23f77195b1908"},
{file = "tiktoken-0.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:09ed925bccaa8043e34c519fbb2f99110bd07c6fd67714793c21ac298e449410"},
{file = "tiktoken-0.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03c6c40ff1db0f48a7b4d2dafeae73a5607aacb472fa11f125e7baf9dce73704"},
{file = "tiktoken-0.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d20b5c6af30e621b4aca094ee61777a44118f52d886dbe4f02b70dfe05c15350"},
{file = "tiktoken-0.7.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d427614c3e074004efa2f2411e16c826f9df427d3c70a54725cae860f09e4bf4"},
{file = "tiktoken-0.7.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8c46d7af7b8c6987fac9b9f61041b452afe92eb087d29c9ce54951280f899a97"},
{file = "tiktoken-0.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:0bc603c30b9e371e7c4c7935aba02af5994a909fc3c0fe66e7004070858d3f8f"},
{file = "tiktoken-0.7.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2398fecd38c921bcd68418675a6d155fad5f5e14c2e92fcf5fe566fa5485a858"},
{file = "tiktoken-0.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8f5f6afb52fb8a7ea1c811e435e4188f2bef81b5e0f7a8635cc79b0eef0193d6"},
{file = "tiktoken-0.7.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:861f9ee616766d736be4147abac500732b505bf7013cfaf019b85892637f235e"},
{file = "tiktoken-0.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54031f95c6939f6b78122c0aa03a93273a96365103793a22e1793ee86da31685"},
{file = "tiktoken-0.7.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:fffdcb319b614cf14f04d02a52e26b1d1ae14a570f90e9b55461a72672f7b13d"},
{file = "tiktoken-0.7.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c72baaeaefa03ff9ba9688624143c858d1f6b755bb85d456d59e529e17234769"},
{file = "tiktoken-0.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:131b8aeb043a8f112aad9f46011dced25d62629091e51d9dc1adbf4a1cc6aa98"},
{file = "tiktoken-0.7.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cabc6dc77460df44ec5b879e68692c63551ae4fae7460dd4ff17181df75f1db7"},
{file = "tiktoken-0.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8d57f29171255f74c0aeacd0651e29aa47dff6f070cb9f35ebc14c82278f3b25"},
{file = "tiktoken-0.7.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ee92776fdbb3efa02a83f968c19d4997a55c8e9ce7be821ceee04a1d1ee149c"},
{file = "tiktoken-0.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e215292e99cb41fbc96988ef62ea63bb0ce1e15f2c147a61acc319f8b4cbe5bf"},
{file = "tiktoken-0.7.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8a81bac94769cab437dd3ab0b8a4bc4e0f9cf6835bcaa88de71f39af1791727a"},
{file = "tiktoken-0.7.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d6d73ea93e91d5ca771256dfc9d1d29f5a554b83821a1dc0891987636e0ae226"},
{file = "tiktoken-0.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:2bcb28ddf79ffa424f171dfeef9a4daff61a94c631ca6813f43967cb263b83b9"},
{file = "tiktoken-0.7.0.tar.gz", hash = "sha256:1077266e949c24e0291f6c350433c6f0971365ece2b173a23bc3b9f9defef6b6"},
]
[package.dependencies]
regex = ">=2022.1.18"
requests = ">=2.26.0"
[package.extras]
blobfile = ["blobfile (>=2)"]
[[package]] [[package]]
name = "tomli" name = "tomli"
version = "2.0.1" version = "2.0.1"
@ -1484,6 +1778,26 @@ files = [
{file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
] ]
[[package]]
name = "tqdm"
version = "4.66.5"
description = "Fast, Extensible Progress Meter"
optional = false
python-versions = ">=3.7"
files = [
{file = "tqdm-4.66.5-py3-none-any.whl", hash = "sha256:90279a3770753eafc9194a0364852159802111925aa30eb3f9d85b0e805ac7cd"},
{file = "tqdm-4.66.5.tar.gz", hash = "sha256:e1020aef2e5096702d8a025ac7d16b1577279c9d63f8375b63083e9a5f0fcbad"},
]
[package.dependencies]
colorama = {version = "*", markers = "platform_system == \"Windows\""}
[package.extras]
dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"]
notebook = ["ipywidgets (>=6)"]
slack = ["slack-sdk"]
telegram = ["requests"]
[[package]] [[package]]
name = "typing-extensions" name = "typing-extensions"
version = "4.12.2" version = "4.12.2"
@ -1662,4 +1976,4 @@ multidict = ">=4.0"
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = ">=3.8.1,<4.0" python-versions = ">=3.8.1,<4.0"
content-hash = "e456e34ed87e1dbec96547f5a124cb408b4c557e5e14c61e9cb5ebf3a7e07ef5" content-hash = "0118e4cc42983fecdb592157b956a4036d666ff4fc06de95bc964195f97f39bf"

View File

@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry] [tool.poetry]
name = "langchain-mongodb" name = "langchain-mongodb"
version = "0.1.7" version = "0.1.8"
description = "An integration package connecting MongoDB and LangChain" description = "An integration package connecting MongoDB and LangChain"
authors = [] authors = []
readme = "README.md" readme = "README.md"
@ -67,7 +67,9 @@ pytest-asyncio = "^0.21.1"
[tool.poetry.group.codespell.dependencies] [tool.poetry.group.codespell.dependencies]
codespell = "^2.2.0" codespell = "^2.2.0"
[tool.poetry.group.test_integration.dependencies] [tool.poetry.group.test_integration.dependencies.langchain-openai]
path = "../openai"
develop = true
[tool.poetry.group.lint.dependencies] [tool.poetry.group.lint.dependencies]
ruff = "^0.5" ruff = "^0.5"

View File

@ -10,7 +10,8 @@ from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
from langchain_core.outputs import ChatGeneration, Generation, LLMResult from langchain_core.outputs import ChatGeneration, Generation, LLMResult
from langchain_mongodb.cache import MongoDBAtlasSemanticCache, MongoDBCache from langchain_mongodb.cache import MongoDBAtlasSemanticCache, MongoDBCache
from tests.utils import ConsistentFakeEmbeddings, FakeChatModel, FakeLLM
from ..utils import ConsistentFakeEmbeddings, FakeChatModel, FakeLLM
CONN_STRING = os.environ.get("MONGODB_ATLAS_URI") CONN_STRING = os.environ.get("MONGODB_ATLAS_URI")
INDEX_NAME = "langchain-test-index-semantic-cache" INDEX_NAME = "langchain-test-index-semantic-cache"
@ -31,7 +32,7 @@ def llm_cache(cls: Any) -> BaseCache:
database_name=DATABASE, database_name=DATABASE,
index_name=INDEX_NAME, index_name=INDEX_NAME,
score_threshold=0.5, score_threshold=0.5,
wait_until_ready=True, wait_until_ready=15.0,
) )
) )
assert get_llm_cache() assert get_llm_cache()

View File

@ -0,0 +1,144 @@
"Demonstrates MongoDBAtlasVectorSearch.as_retriever() invoked in a chain" ""
from __future__ import annotations
import os
from time import sleep
import pytest # type: ignore[import-not-found]
from langchain_core.documents import Document
from langchain_core.output_parsers.string import StrOutputParser
from langchain_core.prompts.chat import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from pymongo import MongoClient
from pymongo.collection import Collection
from langchain_mongodb import index
from ..utils import PatchedMongoDBAtlasVectorSearch
CONNECTION_STRING = os.environ.get("MONGODB_ATLAS_URI")
DB_NAME = "langchain_test_db"
COLLECTION_NAME = "langchain_test_chain_example"
INDEX_NAME = "vector_index_with_filter"
DIMENSIONS = 1536
TIMEOUT = 60.0
INTERVAL = 0.5
@pytest.fixture
def collection() -> Collection:
"""A Collection with both a Vector and a Full-text Search Index"""
client: MongoClient = MongoClient(CONNECTION_STRING)
if COLLECTION_NAME not in client[DB_NAME].list_collection_names():
clxn = client[DB_NAME].create_collection(COLLECTION_NAME)
else:
clxn = client[DB_NAME][COLLECTION_NAME]
clxn.delete_many({})
if all([INDEX_NAME != ix["name"] for ix in clxn.list_search_indexes()]):
index.create_vector_search_index(
collection=clxn,
index_name=INDEX_NAME,
dimensions=DIMENSIONS,
path="embedding",
similarity="cosine",
filters=None,
wait_until_complete=TIMEOUT,
)
return clxn
@pytest.mark.skipif(
"OPENAI_API_KEY" not in os.environ, reason="Requires OpenAI for chat responses."
)
def test_chain(
collection: Collection,
) -> None:
"""Demonstrate usage of MongoDBAtlasVectorSearch in a realistic chain
Follows example in the docs: https://python.langchain.com/v0.2/docs/how_to/hybrid/
Requires OpenAI_API_KEY for embedding and chat model.
Requires INDEX_NAME to have been set up on MONGODB_ATLAS_URI
"""
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
embedding_openai = OpenAIEmbeddings(
openai_api_key=os.environ["OPENAI_API_KEY"], # type: ignore # noqa
model="text-embedding-3-small",
)
vectorstore = PatchedMongoDBAtlasVectorSearch(
collection=collection,
embedding=embedding_openai,
index_name=INDEX_NAME,
text_key="page_content",
)
texts = [
"In 2023, I visited Paris",
"In 2022, I visited New York",
"In 2021, I visited New Orleans",
"In 2019, I visited San Francisco",
"In 2020, I visited Vancouver",
]
vectorstore.add_texts(texts)
# Give the index time to build (For CI)
sleep(TIMEOUT)
query = "In the United States, what city did I visit last?"
# One can do vector search on the vector store, using its various search types.
k = len(texts)
store_output = list(vectorstore.similarity_search(query=query, k=k))
assert len(store_output) == k
assert isinstance(store_output[0], Document)
# Unfortunately, the VectorStore output cannot be given to a Chat Model
# If we wish Chat Model to answer based on our own data,
# we have to give it the right things to work with.
# The way that Langchain does this is by piping results along in
# a Chain: https://python.langchain.com/v0.1/docs/modules/chains/
# Now, we can turn our VectorStore into something Runnable in a Chain
# by turning it into a Retriever.
# For the simple VectorSearch Retriever, we can do this like so.
retriever = vectorstore.as_retriever(search_kwargs=dict(k=k))
# This does not do much other than expose our search function
# as an invoke() method with a a certain API, a Runnable.
retriever_output = retriever.invoke(query)
assert len(retriever_output) == len(texts)
assert retriever_output[0].page_content == store_output[0].page_content
# To get a natural language response to our question,
# we need ChatOpenAI, a template to better frame the question as a prompt,
# and a parser to send the output to a string.
# Together, these become our Chain!
# Here goes:
template = """Answer the question based only on the following context.
Answer in as few words as possible.
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
model = ChatOpenAI()
chain = (
{"context": retriever, "question": RunnablePassthrough()} # type: ignore
| prompt
| model
| StrOutputParser()
)
answer = chain.invoke("What city did I visit last?")
assert "Paris" in answer

View File

@ -1,6 +1,7 @@
"""Search index commands are only supported on Atlas Clusters >=M10""" """Search index commands are only supported on Atlas Clusters >=M10"""
import os import os
from typing import Generator, List, Optional
import pytest import pytest
from pymongo import MongoClient from pymongo import MongoClient
@ -8,23 +9,28 @@ from pymongo.collection import Collection
from langchain_mongodb import index from langchain_mongodb import index
TIMEOUT = 120
DIMENSIONS = 10
@pytest.fixture @pytest.fixture
def collection() -> Collection: def collection() -> Generator:
"""Depending on uri, this could point to any type of cluster.""" """Depending on uri, this could point to any type of cluster."""
uri = os.environ.get("MONGODB_ATLAS_URI") uri = os.environ.get("MONGODB_ATLAS_URI")
client: MongoClient = MongoClient(uri) client: MongoClient = MongoClient(uri)
clxn = client["db"].create_collection("collection") clxn = client["db"]["collection"]
return clxn clxn.insert_one({"foo": "bar"})
yield clxn
clxn.drop()
def test_search_index_commands(collection: Collection) -> None: def test_search_index_commands(collection: Collection) -> None:
index_name = "vector_index" index_name = "vector_index"
dimensions = 1536 dimensions = DIMENSIONS
path = "embedding" path = "embedding"
similarity = "cosine" similarity = "cosine"
filters: list = [] filters: Optional[List[str]] = None
wait_until_complete = 120 wait_until_complete = TIMEOUT
for index_info in collection.list_search_indexes(): for index_info in collection.list_search_indexes():
index.drop_vector_search_index( index.drop_vector_search_index(
@ -52,10 +58,10 @@ def test_search_index_commands(collection: Collection) -> None:
index.update_vector_search_index( index.update_vector_search_index(
collection, collection,
index_name, index_name,
1536, DIMENSIONS,
"embedding", "embedding",
new_similarity, new_similarity,
[], filters=[],
wait_until_complete=wait_until_complete, wait_until_complete=wait_until_complete,
) )

View File

@ -0,0 +1,176 @@
import os
from time import sleep
from typing import List
import pytest
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from pymongo import MongoClient
from pymongo.collection import Collection
from langchain_mongodb import index
from langchain_mongodb.retrievers import (
MongoDBAtlasFullTextSearchRetriever,
MongoDBAtlasHybridSearchRetriever,
)
from ..utils import ConsistentFakeEmbeddings, PatchedMongoDBAtlasVectorSearch
CONNECTION_STRING = os.environ.get("MONGODB_ATLAS_URI")
DB_NAME = "langchain_test_db"
COLLECTION_NAME = "test_retrievers"
VECTOR_INDEX_NAME = "vector_index"
EMBEDDING_FIELD = "embedding"
PAGE_CONTENT_FIELD = "text"
SEARCH_INDEX_NAME = "text_index"
DIMENSIONS = 1536
TIMEOUT = 60.0
INTERVAL = 0.5
@pytest.fixture
def example_documents() -> List[Document]:
return [
Document(page_content="In 2023, I visited Paris"),
Document(page_content="In 2022, I visited New York"),
Document(page_content="In 2021, I visited New Orleans"),
Document(page_content="Sandwiches are beautiful. Sandwiches are fine."),
]
@pytest.fixture
def embedding_openai() -> Embeddings:
from langchain_openai import OpenAIEmbeddings
try:
return OpenAIEmbeddings(
openai_api_key=os.environ["OPENAI_API_KEY"], # type: ignore # noqa
model="text-embedding-3-small",
)
except Exception:
return ConsistentFakeEmbeddings(DIMENSIONS)
@pytest.fixture
def collection() -> Collection:
"""A Collection with both a Vector and a Full-text Search Index"""
client: MongoClient = MongoClient(CONNECTION_STRING)
if COLLECTION_NAME not in client[DB_NAME].list_collection_names():
clxn = client[DB_NAME].create_collection(COLLECTION_NAME)
else:
clxn = client[DB_NAME][COLLECTION_NAME]
clxn.delete_many({})
if not any([VECTOR_INDEX_NAME == ix["name"] for ix in clxn.list_search_indexes()]):
index.create_vector_search_index(
collection=clxn,
index_name=VECTOR_INDEX_NAME,
dimensions=DIMENSIONS,
path="embedding",
similarity="cosine",
wait_until_complete=TIMEOUT,
)
if not any([SEARCH_INDEX_NAME == ix["name"] for ix in clxn.list_search_indexes()]):
index.create_fulltext_search_index(
collection=clxn,
index_name=SEARCH_INDEX_NAME,
field=PAGE_CONTENT_FIELD,
wait_until_complete=TIMEOUT,
)
return clxn
def test_hybrid_retriever(
embedding_openai: Embeddings,
collection: Collection,
example_documents: List[Document],
) -> None:
"""Test basic usage of MongoDBAtlasHybridSearchRetriever"""
vectorstore = PatchedMongoDBAtlasVectorSearch(
collection=collection,
embedding=embedding_openai,
index_name=VECTOR_INDEX_NAME,
text_key=PAGE_CONTENT_FIELD,
)
vectorstore.add_documents(example_documents)
sleep(TIMEOUT) # Wait for documents to be sync'd
retriever = MongoDBAtlasHybridSearchRetriever(
vectorstore=vectorstore,
search_index_name=SEARCH_INDEX_NAME,
top_k=3,
)
query1 = "What was the latest city that I visited?"
results = retriever.invoke(query1)
assert len(results) == 3
assert "Paris" in results[0].page_content
query2 = "When was the last time I visited new orleans?"
results = retriever.invoke(query2)
assert "New Orleans" in results[0].page_content
def test_fulltext_retriever(
collection: Collection,
example_documents: List[Document],
) -> None:
"""Test result of performing fulltext search
Independent of the VectorStore, one adds documents
via MongoDB's Collection API
"""
#
collection.insert_many(
[{PAGE_CONTENT_FIELD: doc.page_content} for doc in example_documents]
)
sleep(TIMEOUT) # Wait for documents to be sync'd
retriever = MongoDBAtlasFullTextSearchRetriever(
collection=collection,
search_index_name=SEARCH_INDEX_NAME,
search_field=PAGE_CONTENT_FIELD,
)
query = "When was the last time I visited new orleans?"
results = retriever.invoke(query)
assert "New Orleans" in results[0].page_content
assert "score" in results[0].metadata
def test_vector_retriever(
embedding_openai: Embeddings,
collection: Collection,
example_documents: List[Document],
) -> None:
"""Test VectorStoreRetriever"""
vectorstore = PatchedMongoDBAtlasVectorSearch(
collection=collection,
embedding=embedding_openai,
index_name=VECTOR_INDEX_NAME,
text_key=PAGE_CONTENT_FIELD,
)
vectorstore.add_documents(example_documents)
sleep(TIMEOUT) # Wait for documents to be sync'd
retriever = vectorstore.as_retriever()
query1 = "What was the latest city that I visited?"
results = retriever.invoke(query1)
assert len(results) == 4
assert "Paris" in results[0].page_content
query2 = "When was the last time I visited new orleans?"
results = retriever.invoke(query2)
assert "New Orleans" in results[0].page_content

View File

@ -4,7 +4,7 @@ from __future__ import annotations
import os import os
from time import monotonic, sleep from time import monotonic, sleep
from typing import Any, Dict, Generator, Iterable, List, Optional, Union from typing import Any, Dict, List
import pytest # type: ignore[import-not-found] import pytest # type: ignore[import-not-found]
from bson import ObjectId from bson import ObjectId
@ -14,11 +14,10 @@ from pymongo import MongoClient
from pymongo.collection import Collection from pymongo.collection import Collection
from pymongo.errors import OperationFailure from pymongo.errors import OperationFailure
from langchain_mongodb import MongoDBAtlasVectorSearch
from langchain_mongodb.index import drop_vector_search_index from langchain_mongodb.index import drop_vector_search_index
from langchain_mongodb.utils import oid_to_str from langchain_mongodb.utils import oid_to_str
from ..utils import ConsistentFakeEmbeddings from ..utils import ConsistentFakeEmbeddings, PatchedMongoDBAtlasVectorSearch
INDEX_NAME = "langchain-test-index-vectorstores" INDEX_NAME = "langchain-test-index-vectorstores"
INDEX_CREATION_NAME = "langchain-test-index-vectorstores-create-test" INDEX_CREATION_NAME = "langchain-test-index-vectorstores-create-test"
@ -32,41 +31,14 @@ TIMEOUT = 120.0
INTERVAL = 0.5 INTERVAL = 0.5
class PatchedMongoDBAtlasVectorSearch(MongoDBAtlasVectorSearch): @pytest.fixture
def bulk_embed_and_insert_texts( def example_documents() -> List[Document]:
self, return [
texts: Union[List[str], Iterable[str]], Document(page_content="Dogs are tough.", metadata={"a": 1}),
metadatas: Union[List[dict], Generator[dict, Any, Any]], Document(page_content="Cats have fluff.", metadata={"b": 1}),
ids: Optional[List[str]] = None, Document(page_content="What is a sandwich?", metadata={"c": 1}),
) -> List: Document(page_content="That fence is purple.", metadata={"d": 1, "e": 2}),
"""Patched insert_texts that waits for data to be indexed before returning""" ]
ids_inserted = super().bulk_embed_and_insert_texts(texts, metadatas, ids)
start = monotonic()
while len(ids_inserted) != len(self.similarity_search("sandwich")) and (
monotonic() - start <= TIMEOUT
):
sleep(INTERVAL)
return ids_inserted
def create_vector_search_index(
self,
dimensions: int,
filters: Optional[List[Dict[str, str]]] = None,
update: bool = False,
) -> None:
result = super().create_vector_search_index(
dimensions=dimensions, filters=filters, update=update
)
start = monotonic()
while monotonic() - start <= TIMEOUT:
if indexes := list(
self._collection.list_search_indexes(name=self._index_name)
):
if indexes[0].get("status") == "READY":
return result
sleep(INTERVAL)
raise TimeoutError(f"{self._index_name} never reached 'status: READY'")
def _await_index_deletion(coll: Collection, index_name: str) -> None: def _await_index_deletion(coll: Collection, index_name: str) -> None:
@ -137,40 +109,46 @@ class TestMongoDBAtlasVectorSearch:
@pytest.fixture @pytest.fixture
def embeddings(self) -> Embeddings: def embeddings(self) -> Embeddings:
try:
from langchain_openai import OpenAIEmbeddings
return OpenAIEmbeddings(
openai_api_key=os.environ["OPENAI_API_KEY"], # type: ignore # noqa
model="text-embedding-3-small",
)
except Exception:
return ConsistentFakeEmbeddings(DIMENSIONS) return ConsistentFakeEmbeddings(DIMENSIONS)
def test_from_documents(self, embeddings: Embeddings, collection: Any) -> None: def test_from_documents(
self,
embeddings: Embeddings,
collection: Any,
example_documents: List[Document],
) -> None:
"""Test end to end construction and search.""" """Test end to end construction and search."""
documents = [
Document(page_content="Dogs are tough.", metadata={"a": 1}),
Document(page_content="Cats have fluff.", metadata={"b": 1}),
Document(page_content="What is a sandwich?", metadata={"c": 1}),
Document(page_content="That fence is purple.", metadata={"d": 1, "e": 2}),
]
vectorstore = PatchedMongoDBAtlasVectorSearch.from_documents( vectorstore = PatchedMongoDBAtlasVectorSearch.from_documents(
documents, example_documents,
embeddings, embedding=embeddings,
collection=collection, collection=collection,
index_name=INDEX_NAME, index_name=INDEX_NAME,
) )
output = vectorstore.similarity_search("Sandwich", k=1) output = vectorstore.similarity_search("Sandwich", k=1)
assert len(output) == 1 assert len(output) == 1
# Check for the presence of the metadata key # Check for the presence of the metadata key
assert any([key.page_content == output[0].page_content for key in documents]) assert any(
[key.page_content == output[0].page_content for key in example_documents]
)
def test_from_documents_no_embedding_return( def test_from_documents_no_embedding_return(
self, embeddings: Embeddings, collection: Any self,
embeddings: Embeddings,
collection: Any,
example_documents: List[Document],
) -> None: ) -> None:
"""Test end to end construction and search.""" """Test end to end construction and search."""
documents = [
Document(page_content="Dogs are tough.", metadata={"a": 1}),
Document(page_content="Cats have fluff.", metadata={"b": 1}),
Document(page_content="What is a sandwich?", metadata={"c": 1}),
Document(page_content="That fence is purple.", metadata={"d": 1, "e": 2}),
]
vectorstore = PatchedMongoDBAtlasVectorSearch.from_documents( vectorstore = PatchedMongoDBAtlasVectorSearch.from_documents(
documents, example_documents,
embeddings, embedding=embeddings,
collection=collection, collection=collection,
index_name=INDEX_NAME, index_name=INDEX_NAME,
) )
@ -179,37 +157,38 @@ class TestMongoDBAtlasVectorSearch:
# Check for presence of embedding in each document # Check for presence of embedding in each document
assert all(["embedding" not in key.metadata for key in output]) assert all(["embedding" not in key.metadata for key in output])
# Check for the presence of the metadata key # Check for the presence of the metadata key
assert any([key.page_content == output[0].page_content for key in documents]) assert any(
[key.page_content == output[0].page_content for key in example_documents]
)
def test_from_documents_embedding_return( def test_from_documents_embedding_return(
self, embeddings: Embeddings, collection: Any self,
embeddings: Embeddings,
collection: Any,
example_documents: List[Document],
) -> None: ) -> None:
"""Test end to end construction and search.""" """Test end to end construction and search."""
documents = [
Document(page_content="Dogs are tough.", metadata={"a": 1}),
Document(page_content="Cats have fluff.", metadata={"b": 1}),
Document(page_content="What is a sandwich?", metadata={"c": 1}),
Document(page_content="That fence is purple.", metadata={"d": 1, "e": 2}),
]
vectorstore = PatchedMongoDBAtlasVectorSearch.from_documents( vectorstore = PatchedMongoDBAtlasVectorSearch.from_documents(
documents, example_documents,
embeddings, embedding=embeddings,
collection=collection, collection=collection,
index_name=INDEX_NAME, index_name=INDEX_NAME,
) )
output = vectorstore.similarity_search("Sandwich", k=1, include_embedding=True) output = vectorstore.similarity_search("Sandwich", k=1, include_embeddings=True)
assert len(output) == 1 assert len(output) == 1
# Check for presence of embedding in each document # Check for presence of embedding in each document
assert all([key.metadata.get("embedding") for key in output]) assert all([key.metadata.get("embedding") for key in output])
# Check for the presence of the metadata key # Check for the presence of the metadata key
assert any([key.page_content == output[0].page_content for key in documents]) assert any(
[key.page_content == output[0].page_content for key in example_documents]
)
def test_from_texts( def test_from_texts(
self, embeddings: Embeddings, collection: Collection, texts: List[str] self, embeddings: Embeddings, collection: Collection, texts: List[str]
) -> None: ) -> None:
vectorstore = PatchedMongoDBAtlasVectorSearch.from_texts( vectorstore = PatchedMongoDBAtlasVectorSearch.from_texts(
texts, texts,
embeddings, embedding=embeddings,
collection=collection, collection=collection,
index_name=INDEX_NAME, index_name=INDEX_NAME,
) )
@ -226,7 +205,7 @@ class TestMongoDBAtlasVectorSearch:
metakeys = ["a", "b", "c", "d", "e"] metakeys = ["a", "b", "c", "d", "e"]
vectorstore = PatchedMongoDBAtlasVectorSearch.from_texts( vectorstore = PatchedMongoDBAtlasVectorSearch.from_texts(
texts, texts,
embeddings, embedding=embeddings,
metadatas=metadatas, metadatas=metadatas,
collection=collection, collection=collection,
index_name=INDEX_NAME, index_name=INDEX_NAME,
@ -242,21 +221,26 @@ class TestMongoDBAtlasVectorSearch:
metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}] metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}]
vectorstore = PatchedMongoDBAtlasVectorSearch.from_texts( vectorstore = PatchedMongoDBAtlasVectorSearch.from_texts(
texts, texts,
embeddings, embedding=embeddings,
metadatas=metadatas, metadatas=metadatas,
collection=collection, collection=collection,
index_name=INDEX_NAME, index_name=INDEX_NAME,
) )
output = vectorstore.similarity_search( does_not_match_filter = vectorstore.similarity_search(
"Sandwich", k=1, pre_filter={"c": {"$lte": 0}} "Sandwich", k=1, pre_filter={"c": {"$lte": 0}}
) )
assert output == [] assert does_not_match_filter == []
matches_filter = vectorstore.similarity_search(
"Sandwich", k=3, pre_filter={"c": {"$gt": 0}}
)
assert len(matches_filter) == 1
def test_mmr(self, embeddings: Embeddings, collection: Any) -> None: def test_mmr(self, embeddings: Embeddings, collection: Any) -> None:
texts = ["foo", "foo", "fou", "foy"] texts = ["foo", "foo", "fou", "foy"]
vectorstore = PatchedMongoDBAtlasVectorSearch.from_texts( vectorstore = PatchedMongoDBAtlasVectorSearch.from_texts(
texts, texts,
embeddings, embedding=embeddings,
collection=collection, collection=collection,
index_name=INDEX_NAME, index_name=INDEX_NAME,
) )
@ -266,10 +250,57 @@ class TestMongoDBAtlasVectorSearch:
assert output[0].page_content == "foo" assert output[0].page_content == "foo"
assert output[1].page_content != "foo" assert output[1].page_content != "foo"
def test_retriever(
self,
embeddings: Embeddings,
collection: Any,
example_documents: List[Document],
) -> None:
"""Demonstrate usage and parity of VectorStore similarity_search
with Retriever.invoke."""
vectorstore = PatchedMongoDBAtlasVectorSearch.from_documents(
example_documents,
embedding=embeddings,
collection=collection,
index_name=INDEX_NAME,
)
query = "sandwich"
retriever_default_kwargs = vectorstore.as_retriever()
result_retriever = retriever_default_kwargs.invoke(query)
result_vectorstore = vectorstore.similarity_search(query)
assert all(
[
result_retriever[i].page_content == result_vectorstore[i].page_content
for i in range(len(result_retriever))
]
)
def test_include_embeddings(
self,
embeddings: Embeddings,
collection: Any,
example_documents: List[Document],
) -> None:
"""Test explicitly passing vector kwarg matches default."""
vectorstore = PatchedMongoDBAtlasVectorSearch.from_documents(
documents=example_documents,
embedding=embeddings,
collection=collection,
index_name=INDEX_NAME,
)
output_with = vectorstore.similarity_search(
"Sandwich", include_embeddings=True, k=1
)
assert vectorstore._embedding_key in output_with[0].metadata
output_without = vectorstore.similarity_search("Sandwich", k=1)
assert vectorstore._embedding_key not in output_without[0].metadata
def test_delete( def test_delete(
self, embeddings: Embeddings, collection: Any, texts: List[str] self, embeddings: Embeddings, collection: Any, texts: List[str]
) -> None: ) -> None:
vectorstore = MongoDBAtlasVectorSearch( # PatchedMongoDBAtlasVectorSearch( vectorstore = PatchedMongoDBAtlasVectorSearch(
collection=collection, collection=collection,
embedding=embeddings, embedding=embeddings,
index_name=INDEX_NAME, index_name=INDEX_NAME,
@ -296,7 +327,10 @@ class TestMongoDBAtlasVectorSearch:
collection: Collection, collection: Collection,
texts: List[str], texts: List[str],
) -> None: ) -> None:
"""Tests API of add_texts, focussing on id treatment""" """Tests API of add_texts, focussing on id treatment
Warning: This is slow because of the number of cases
"""
metadatas: List[Dict[str, Any]] = [ metadatas: List[Dict[str, Any]] = [
{"a": 1}, {"a": 1},
{"b": 1}, {"b": 1},
@ -380,7 +414,6 @@ class TestMongoDBAtlasVectorSearch:
self, self,
embeddings: Embeddings, embeddings: Embeddings,
collection: Collection, collection: Collection,
index_name: str = INDEX_NAME,
) -> None: ) -> None:
"""Tests add_documents.""" """Tests add_documents."""
vectorstore = PatchedMongoDBAtlasVectorSearch( vectorstore = PatchedMongoDBAtlasVectorSearch(
@ -428,13 +461,13 @@ class TestMongoDBAtlasVectorSearch:
self, embeddings: Embeddings, index_collection: Any self, embeddings: Embeddings, index_collection: Any
) -> None: ) -> None:
vectorstore = PatchedMongoDBAtlasVectorSearch( vectorstore = PatchedMongoDBAtlasVectorSearch(
index_collection, embeddings, index_name=INDEX_CREATION_NAME index_collection, embedding=embeddings, index_name=INDEX_CREATION_NAME
) )
vectorstore.create_vector_search_index(dimensions=1536) vectorstore.create_vector_search_index(dimensions=1536)
def test_index_update(self, embeddings: Embeddings, index_collection: Any) -> None: def test_index_update(self, embeddings: Embeddings, index_collection: Any) -> None:
vectorstore = PatchedMongoDBAtlasVectorSearch( vectorstore = PatchedMongoDBAtlasVectorSearch(
index_collection, embeddings, index_name=INDEX_CREATION_NAME index_collection, embedding=embeddings, index_name=INDEX_CREATION_NAME
) )
vectorstore.create_vector_search_index(dimensions=1536) vectorstore.create_vector_search_index(dimensions=1536)
vectorstore.create_vector_search_index(dimensions=1536, update=True) vectorstore.create_vector_search_index(dimensions=1536, update=True)

View File

@ -12,7 +12,8 @@ from pymongo.collection import Collection
from langchain_mongodb.cache import MongoDBAtlasSemanticCache, MongoDBCache from langchain_mongodb.cache import MongoDBAtlasSemanticCache, MongoDBCache
from langchain_mongodb.vectorstores import MongoDBAtlasVectorSearch from langchain_mongodb.vectorstores import MongoDBAtlasVectorSearch
from tests.utils import ConsistentFakeEmbeddings, FakeChatModel, FakeLLM, MockCollection
from ..utils import ConsistentFakeEmbeddings, FakeChatModel, FakeLLM, MockCollection
CONN_STRING = "MockString" CONN_STRING = "MockString"
COLLECTION = "default" COLLECTION = "default"
@ -74,7 +75,7 @@ def llm_cache(cls: Any) -> BaseCache:
connection_string=CONN_STRING, connection_string=CONN_STRING,
collection_name=COLLECTION, collection_name=COLLECTION,
database_name=DATABASE, database_name=DATABASE,
wait_until_ready=True, wait_until_ready=15.0,
) )
) )
assert get_llm_cache() assert get_llm_cache()
@ -207,7 +208,7 @@ def test_mongodb_atlas_cache_matrix(
for prompt_i, llm_generations_i in zip(prompts, llm_generations): for prompt_i, llm_generations_i in zip(prompts, llm_generations):
_execute_test(prompt_i, llm_string, llm_generations_i) _execute_test(prompt_i, llm_string, llm_generations_i)
get_llm_cache()._collection._simluate_cache_aggregation_query = True # type: ignore get_llm_cache()._collection._simulate_cache_aggregation_query = True # type: ignore
assert llm.generate(prompts) == LLMResult( assert llm.generate(prompts) == LLMResult(
generations=llm_generations, llm_output={} generations=llm_generations, llm_output={}
) )

View File

@ -4,7 +4,8 @@ from langchain.memory import ConversationBufferMemory # type: ignore[import-not
from langchain_core.messages import message_to_dict from langchain_core.messages import message_to_dict
from langchain_mongodb.chat_message_histories import MongoDBChatMessageHistory from langchain_mongodb.chat_message_histories import MongoDBChatMessageHistory
from tests.utils import MockCollection
from ..utils import MockCollection
class PatchedMongoDBChatMessageHistory(MongoDBChatMessageHistory): class PatchedMongoDBChatMessageHistory(MongoDBChatMessageHistory):

View File

@ -10,6 +10,9 @@ from pymongo.errors import OperationFailure, ServerSelectionTimeoutError
from langchain_mongodb import index from langchain_mongodb import index
DIMENSION = 10
TIMEOUT = 10
@pytest.fixture @pytest.fixture
def collection() -> Collection: def collection() -> Collection:
@ -25,19 +28,33 @@ def collection() -> Collection:
def test_create_vector_search_index(collection: Collection) -> None: def test_create_vector_search_index(collection: Collection) -> None:
with pytest.raises((OperationFailure, ServerSelectionTimeoutError)): with pytest.raises((OperationFailure, ServerSelectionTimeoutError)):
index.create_vector_search_index( index.create_vector_search_index(
collection, "index_name", 1536, "embedding", "cosine", [] collection,
"index_name",
DIMENSION,
"embedding",
"cosine",
[],
wait_until_complete=TIMEOUT,
) )
def test_drop_vector_search_index(collection: Collection) -> None: def test_drop_vector_search_index(collection: Collection) -> None:
with pytest.raises((OperationFailure, ServerSelectionTimeoutError)): with pytest.raises((OperationFailure, ServerSelectionTimeoutError)):
index.drop_vector_search_index(collection, "index_name") index.drop_vector_search_index(
collection, "index_name", wait_until_complete=TIMEOUT
)
def test_update_vector_search_index(collection: Collection) -> None: def test_update_vector_search_index(collection: Collection) -> None:
with pytest.raises((OperationFailure, ServerSelectionTimeoutError)): with pytest.raises((OperationFailure, ServerSelectionTimeoutError)):
index.update_vector_search_index( index.update_vector_search_index(
collection, "index_name", 1536, "embedding", "cosine", [] collection,
"index_name",
DIMENSION,
"embedding",
"cosine",
[],
wait_until_complete=TIMEOUT,
) )

View File

@ -2,14 +2,13 @@ from json import dumps, loads
from typing import Any, Optional from typing import Any, Optional
import pytest # type: ignore[import-not-found] import pytest # type: ignore[import-not-found]
from bson import ObjectId, json_util
from langchain_core.documents import Document from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings from langchain_core.embeddings import Embeddings
from pymongo.collection import Collection from pymongo.collection import Collection
from langchain_mongodb import MongoDBAtlasVectorSearch from langchain_mongodb import MongoDBAtlasVectorSearch
from langchain_mongodb.utils import str_to_oid
from tests.utils import ConsistentFakeEmbeddings, MockCollection from ..utils import ConsistentFakeEmbeddings, MockCollection
INDEX_NAME = "langchain-test-index" INDEX_NAME = "langchain-test-index"
NAMESPACE = "langchain_test_db.langchain_test_collection" NAMESPACE = "langchain_test_db.langchain_test_collection"
@ -81,8 +80,7 @@ class TestMongoDBAtlasVectorSearch:
# Validate the ObjectId provided is json serializable # Validate the ObjectId provided is json serializable
assert loads(dumps(output[0].page_content)) == output[0].page_content assert loads(dumps(output[0].page_content)) == output[0].page_content
assert loads(dumps(output[0].metadata)) == output[0].metadata assert loads(dumps(output[0].metadata)) == output[0].metadata
json_metadata = dumps(output[0].metadata) # normal json.dumps assert isinstance(output[0].metadata["_id"], str)
isinstance(str_to_oid(json_util.loads(json_metadata)["_id"]), ObjectId)
def test_from_documents( def test_from_documents(
self, embedding_openai: Embeddings, collection: MockCollection self, embedding_openai: Embeddings, collection: MockCollection
@ -98,7 +96,7 @@ class TestMongoDBAtlasVectorSearch:
documents, documents,
embedding_openai, embedding_openai,
collection=collection, collection=collection,
index_name=INDEX_NAME, vector_index_name=INDEX_NAME,
) )
self._validate_search( self._validate_search(
vectorstore, collection, metadata=documents[2].metadata["c"] vectorstore, collection, metadata=documents[2].metadata["c"]
@ -117,7 +115,7 @@ class TestMongoDBAtlasVectorSearch:
texts, texts,
embedding_openai, embedding_openai,
collection=collection, collection=collection,
index_name=INDEX_NAME, vector_index_name=INDEX_NAME,
) )
self._validate_search(vectorstore, collection, metadata=None) self._validate_search(vectorstore, collection, metadata=None)
@ -136,7 +134,7 @@ class TestMongoDBAtlasVectorSearch:
embedding_openai, embedding_openai,
metadatas=metadatas, metadatas=metadatas,
collection=collection, collection=collection,
index_name=INDEX_NAME, vector_index_name=INDEX_NAME,
) )
self._validate_search(vectorstore, collection, metadata=metadatas[2]["c"]) self._validate_search(vectorstore, collection, metadata=metadatas[2]["c"])
@ -155,7 +153,7 @@ class TestMongoDBAtlasVectorSearch:
embedding_openai, embedding_openai,
metadatas=metadatas, metadatas=metadatas,
collection=collection, collection=collection,
index_name=INDEX_NAME, vector_index_name=INDEX_NAME,
) )
collection._aggregate_result = list( collection._aggregate_result = list(
filter( filter(
@ -175,9 +173,9 @@ class TestMongoDBAtlasVectorSearch:
texts = ["foo", "foo", "fou", "foy"] texts = ["foo", "foo", "fou", "foy"]
vectorstore = MongoDBAtlasVectorSearch.from_texts( vectorstore = MongoDBAtlasVectorSearch.from_texts(
texts, texts,
embedding_openai, embedding=embedding_openai,
collection=collection, collection=collection,
index_name=INDEX_NAME, vector_index_name=INDEX_NAME,
) )
query = "foo" query = "foo"
self._validate_search( self._validate_search(

View File

@ -1,7 +1,8 @@
from __future__ import annotations from __future__ import annotations
from copy import deepcopy from copy import deepcopy
from typing import Any, Dict, List, Mapping, Optional, cast from time import monotonic, sleep
from typing import Any, Dict, Generator, Iterable, List, Mapping, Optional, Union, cast
from bson import ObjectId from bson import ObjectId
from langchain_core.callbacks.manager import ( from langchain_core.callbacks.manager import (
@ -20,8 +21,47 @@ from langchain_core.pydantic_v1 import validator
from pymongo.collection import Collection from pymongo.collection import Collection
from pymongo.results import DeleteResult, InsertManyResult from pymongo.results import DeleteResult, InsertManyResult
from langchain_mongodb import MongoDBAtlasVectorSearch
from langchain_mongodb.cache import MongoDBAtlasSemanticCache from langchain_mongodb.cache import MongoDBAtlasSemanticCache
TIMEOUT = 120
INTERVAL = 0.5
class PatchedMongoDBAtlasVectorSearch(MongoDBAtlasVectorSearch):
def bulk_embed_and_insert_texts(
self,
texts: Union[List[str], Iterable[str]],
metadatas: Union[List[dict], Generator[dict, Any, Any]],
ids: Optional[List[str]] = None,
) -> List:
"""Patched insert_texts that waits for data to be indexed before returning"""
ids_inserted = super().bulk_embed_and_insert_texts(texts, metadatas, ids)
start = monotonic()
while len(ids_inserted) != len(self.similarity_search("sandwich")) and (
monotonic() - start <= TIMEOUT
):
sleep(INTERVAL)
return ids_inserted
def create_vector_search_index(
self,
dimensions: int,
filters: Optional[List[str]] = None,
update: bool = False,
) -> None:
result = super().create_vector_search_index(
dimensions=dimensions, filters=filters, update=update
)
start = monotonic()
while monotonic() - start <= TIMEOUT:
if indexes := list(
self._collection.list_search_indexes(name=self._index_name)
):
if indexes[0].get("status") == "READY":
return result
sleep(INTERVAL)
class ConsistentFakeEmbeddings(Embeddings): class ConsistentFakeEmbeddings(Embeddings):
"""Fake embeddings functionality for testing.""" """Fake embeddings functionality for testing."""
@ -147,13 +187,13 @@ class MockCollection(Collection):
_aggregate_result: List[Any] _aggregate_result: List[Any]
_insert_result: Optional[InsertManyResult] _insert_result: Optional[InsertManyResult]
_data: List[Any] _data: List[Any]
_simluate_cache_aggregation_query: bool _simulate_cache_aggregation_query: bool
def __init__(self) -> None: def __init__(self) -> None:
self._data = [] self._data = []
self._aggregate_result = [] self._aggregate_result = []
self._insert_result = None self._insert_result = None
self._simluate_cache_aggregation_query = False self._simulate_cache_aggregation_query = False
def delete_many(self, *args, **kwargs) -> DeleteResult: # type: ignore def delete_many(self, *args, **kwargs) -> DeleteResult: # type: ignore
old_len = len(self._data) old_len = len(self._data)
@ -201,7 +241,7 @@ class MockCollection(Collection):
elif upsert: elif upsert:
self._data.append({**find_query, **set_options}) self._data.append({**find_query, **set_options})
def _execute_cache_aggreation_query(self, *args, **kwargs) -> List[Dict[str, Any]]: # type: ignore def _execute_cache_aggregation_query(self, *args, **kwargs) -> List[Dict[str, Any]]: # type: ignore
"""Helper function only to be used for MongoDBAtlasSemanticCache Testing """Helper function only to be used for MongoDBAtlasSemanticCache Testing
Returns: Returns:
@ -223,12 +263,12 @@ class MockCollection(Collection):
return acc return acc
def aggregate(self, *args, **kwargs) -> List[Any]: # type: ignore def aggregate(self, *args, **kwargs) -> List[Any]: # type: ignore
if self._simluate_cache_aggregation_query: if self._simulate_cache_aggregation_query:
return deepcopy(self._execute_cache_aggreation_query(*args, **kwargs)) return deepcopy(self._execute_cache_aggregation_query(*args, **kwargs))
return deepcopy(self._aggregate_result) return deepcopy(self._aggregate_result)
def count_documents(self, *args, **kwargs) -> int: # type: ignore def count_documents(self, *args, **kwargs) -> int: # type: ignore
return len(self._data) return len(self._data)
def __repr__(self) -> str: def __repr__(self) -> str:
return "FakeCollection" return "MockCollection"