diff --git a/docs/docs/integrations/vectorstores/zep.ipynb b/docs/docs/integrations/vectorstores/zep.ipynb index cc969cb3ad4..2ad8b1473d1 100644 --- a/docs/docs/integrations/vectorstores/zep.ipynb +++ b/docs/docs/integrations/vectorstores/zep.ipynb @@ -8,6 +8,13 @@ }, "source": [ "# Zep\n", + "## VectorStore Example for [Zep](https://docs.getzep.com/) - Fast, scalable building blocks for LLM Apps\n", + "\n", + "### More on Zep:\n", + "\n", + "Zep is an open source platform for productionizing LLM apps. Go from a prototype\n", + "built in LangChain or LlamaIndex, or a custom app, to production in minutes without\n", + "rewriting code.\n", "\n", "## Fast, Scalable Building Blocks for LLM Apps\n", "Zep is an open source platform for productionizing LLM apps. Go from a prototype\n", @@ -104,6 +111,7 @@ " config=config,\n", " api_url=ZEP_API_URL,\n", " api_key=ZEP_API_KEY,\n", + " embedding=None, # we'll have Zep embed our documents using its low-latency embedder\n", ")" ] }, @@ -123,9 +131,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "Embedding status: 0/402 documents embedded\n", - "Embedding status: 0/402 documents embedded\n", - "Embedding status: 402/402 documents embedded\n" + "Embedding status: 0/401 documents embedded\n", + "Embedding status: 0/401 documents embedded\n", + "Embedding status: 0/401 documents embedded\n", + "Embedding status: 0/401 documents embedded\n", + "Embedding status: 0/401 documents embedded\n", + "Embedding status: 0/401 documents embedded\n", + "Embedding status: 401/401 documents embedded\n" ] } ], @@ -179,33 +191,34 @@ "name": "stdout", "output_type": "stream", "text": [ - "Tables necessary to determine the places of the planets are not less\r\n", - "necessary than those for the sun, moon, and stars. Some notion of the\r\n", - "number and complexity of these tables may be formed, when we state that\r\n", - "the positions of the two principal planets, (and these are the most\r\n", - "necessary for the navigator,) Jupiter and Saturn, require each not less\r\n", - "than one hundred and sixteen tables. Yet it is not only necessary to\r\n", - "predict the position of these bodies, but it is likewise expedient to -> 0.8998482592744614 \n", + "the positions of the two principal planets, (and these the most\n", + "necessary for the navigator,) Jupiter and Saturn, require each not less\n", + "than one hundred and sixteen tables. Yet it is not only necessary to\n", + "predict the position of these bodies, but it is likewise expedient to\n", + "tabulate the motions of the four satellites of Jupiter, to predict the\n", + "exact times at which they enter his shadow, and at which their shadows\n", + "cross his disc, as well as the times at which they are interposed -> 0.9003241539387915 \n", "====\n", "\n", - "tabulate the motions of the four satellites of Jupiter, to predict the\r\n", - "exact times at which they enter his shadow, and at which their shadows\r\n", - "cross his disc, as well as the times at which they are interposed\r\n", - "between him and the Earth, and he between them and the Earth.\r\n", - "\r\n", - "Among the extensive classes of tables here enumerated, there are several\r\n", - "which are in their nature permanent and unalterable, and would never\r\n", - "require to be recomputed, if they could once be computed with perfect -> 0.8976143854195493 \n", + "furnish more than a small fraction of that aid to navigation (in the\n", + "large sense of that term), which, with greater facility, expedition, and\n", + "economy in the calculation and printing of tables, it might be made to\n", + "supply.\n", + "\n", + "Tables necessary to determine the places of the planets are not less\n", + "necessary than those for the sun, moon, and stars. Some notion of the\n", + "number and complexity of these tables may be formed, when we state that -> 0.8911165633479508 \n", "====\n", "\n", - "the scheme of notation thus applied, immediately suggested the\r\n", - "advantages which must attend it as an instrument for expressing the\r\n", - "structure, operation, and circulation of the animal system; and we\r\n", - "entertain no doubt of its adequacy for that purpose. Not only the\r\n", - "mechanical connexion of the solid members of the bodies of men and\r\n", - "animals, but likewise the structure and operation of the softer parts,\r\n", - "including the muscles, integuments, membranes, &c. the nature, motion, -> 0.889982614061763 \n", - "====\n" + "the scheme of notation thus applied, immediately suggested the\n", + "advantages which must attend it as an instrument for expressing the\n", + "structure, operation, and circulation of the animal system; and we\n", + "entertain no doubt of its adequacy for that purpose. Not only the\n", + "mechanical connexion of the solid members of the bodies of men and\n", + "animals, but likewise the structure and operation of the softer parts,\n", + "including the muscles, integuments, membranes, &c. the nature, motion, -> 0.8899750214770481 \n", + "====\n", + "\n" ] } ], @@ -226,7 +239,9 @@ "collapsed": false }, "source": [ - "## Search over Collection Re-ranked by MMR" + "## Search over Collection Re-ranked by MMR\n", + "\n", + "Zep offers native, hardware-accelerated MMR re-ranking of search results." ] }, { @@ -245,33 +260,34 @@ "name": "stdout", "output_type": "stream", "text": [ - "Tables necessary to determine the places of the planets are not less\r\n", - "necessary than those for the sun, moon, and stars. Some notion of the\r\n", - "number and complexity of these tables may be formed, when we state that\r\n", - "the positions of the two principal planets, (and these the most\r\n", - "necessary for the navigator,) Jupiter and Saturn, require each not less\r\n", - "than one hundred and sixteen tables. Yet it is not only necessary to\r\n", - "predict the position of these bodies, but it is likewise expedient to \n", + "the positions of the two principal planets, (and these the most\n", + "necessary for the navigator,) Jupiter and Saturn, require each not less\n", + "than one hundred and sixteen tables. Yet it is not only necessary to\n", + "predict the position of these bodies, but it is likewise expedient to\n", + "tabulate the motions of the four satellites of Jupiter, to predict the\n", + "exact times at which they enter his shadow, and at which their shadows\n", + "cross his disc, as well as the times at which they are interposed \n", "====\n", "\n", - "the scheme of notation thus applied, immediately suggested the\r\n", - "advantages which must attend it as an instrument for expressing the\r\n", - "structure, operation, and circulation of the animal system; and we\r\n", - "entertain no doubt of its adequacy for that purpose. Not only the\r\n", - "mechanical connexion of the solid members of the bodies of men and\r\n", - "animals, but likewise the structure and operation of the softer parts,\r\n", + "the scheme of notation thus applied, immediately suggested the\n", + "advantages which must attend it as an instrument for expressing the\n", + "structure, operation, and circulation of the animal system; and we\n", + "entertain no doubt of its adequacy for that purpose. Not only the\n", + "mechanical connexion of the solid members of the bodies of men and\n", + "animals, but likewise the structure and operation of the softer parts,\n", "including the muscles, integuments, membranes, &c. the nature, motion, \n", "====\n", "\n", - "tabulate the motions of the four satellites of Jupiter, to predict the\r\n", - "exact times at which they enter his shadow, and at which their shadows\r\n", - "cross his disc, as well as the times at which they are interposed\r\n", - "between him and the Earth, and he between them and the Earth.\r\n", - "\r\n", - "Among the extensive classes of tables here enumerated, there are several\r\n", - "which are in their nature permanent and unalterable, and would never\r\n", - "require to be recomputed, if they could once be computed with perfect \n", - "====\n" + "resistance, economizing time, harmonizing the mechanism, and giving to\n", + "the whole mechanical action the utmost practical perfection.\n", + "\n", + "The system of mechanical contrivances by which the results, here\n", + "attempted to be described, are attained, form only one order of\n", + "expedients adopted in this machinery;--although such is the perfection\n", + "of their action, that in any ordinary case they would be regarded as\n", + "having attained the ends in view with an almost superfluous degree of \n", + "====\n", + "\n" ] } ], @@ -311,17 +327,23 @@ "name": "stdout", "output_type": "stream", "text": [ - "Embedding status: 402/1692 documents embedded\n", - "Embedding status: 402/1692 documents embedded\n", - "Embedding status: 552/1692 documents embedded\n", - "Embedding status: 702/1692 documents embedded\n", - "Embedding status: 1002/1692 documents embedded\n", - "Embedding status: 1002/1692 documents embedded\n", - "Embedding status: 1152/1692 documents embedded\n", - "Embedding status: 1302/1692 documents embedded\n", - "Embedding status: 1452/1692 documents embedded\n", - "Embedding status: 1602/1692 documents embedded\n", - "Embedding status: 1692/1692 documents embedded\n" + "Embedding status: 401/1691 documents embedded\n", + "Embedding status: 401/1691 documents embedded\n", + "Embedding status: 401/1691 documents embedded\n", + "Embedding status: 401/1691 documents embedded\n", + "Embedding status: 401/1691 documents embedded\n", + "Embedding status: 401/1691 documents embedded\n", + "Embedding status: 901/1691 documents embedded\n", + "Embedding status: 901/1691 documents embedded\n", + "Embedding status: 901/1691 documents embedded\n", + "Embedding status: 901/1691 documents embedded\n", + "Embedding status: 901/1691 documents embedded\n", + "Embedding status: 901/1691 documents embedded\n", + "Embedding status: 1401/1691 documents embedded\n", + "Embedding status: 1401/1691 documents embedded\n", + "Embedding status: 1401/1691 documents embedded\n", + "Embedding status: 1401/1691 documents embedded\n", + "Embedding status: 1691/1691 documents embedded\n" ] } ], @@ -366,33 +388,33 @@ "name": "stdout", "output_type": "stream", "text": [ - "by that body to Mr Babbage:--'In no department of science, or of the\r\n", - "arts, does this discovery promise to be so eminently useful as in that\r\n", - "of astronomy, and its kindred sciences, with the various arts dependent\r\n", - "on them. In none are computations more operose than those which\r\n", - "astronomy in particular requires;--in none are preparatory facilities\r\n", - "more needful;--in none is error more detrimental. The practical\r\n", - "astronomer is interrupted in his pursuit, and diverted from his task of -> {'source': 'https://www.gutenberg.org/cache/epub/71292/pg71292.txt'} \n", + "or remotely, for this purpose. But in addition to these, a great number\n", + "of tables, exclusively astronomical, are likewise indispensable. The\n", + "predictions of the astronomer, with respect to the positions and motions\n", + "of the bodies of the firmament, are the means, and the only means, which\n", + "enable the mariner to prosecute his art. By these he is enabled to\n", + "discover the distance of his ship from the Line, and the extent of his -> {'source': 'https://www.gutenberg.org/cache/epub/71292/pg71292.txt'} \n", "====\n", "\n", - "possess all knowledge which is likely to be useful to him in his work,\r\n", - "and this I have endeavored in my case to do. If I remember rightly, you\r\n", - "on one occasion, in the early days of our friendship, defined my limits\r\n", - "in a very precise fashion.”\r\n", - "\r\n", - "“Yes,” I answered, laughing. “It was a singular document. Philosophy,\r\n", - "astronomy, and politics were marked at zero, I remember. Botany\r\n", + "possess all knowledge which is likely to be useful to him in his work,\n", + "and this I have endeavored in my case to do. If I remember rightly, you\n", + "on one occasion, in the early days of our friendship, defined my limits\n", + "in a very precise fashion.”\n", + "\n", + "“Yes,” I answered, laughing. “It was a singular document. Philosophy,\n", + "astronomy, and politics were marked at zero, I remember. Botany\n", "variable, geology profound as regards the mud-stains from any region -> {'source': 'https://www.gutenberg.org/files/48320/48320-0.txt'} \n", "====\n", "\n", - "in all its relations; but above all, with Astronomy and Navigation. So\r\n", - "important have they been considered, that in many instances large sums\r\n", - "have been appropriated by the most enlightened nations in the production\r\n", - "of them; and yet so numerous and insurmountable have been the\r\n", - "difficulties attending the attainment of this end, that after all, even\r\n", - "navigators, putting aside every other department of art and science,\r\n", - "have, until very recently, been scantily and imperfectly supplied with -> {'source': 'https://www.gutenberg.org/cache/epub/71292/pg71292.txt'} \n", - "====\n" + "of astronomy, and its kindred sciences, with the various arts dependent\n", + "on them. In none are computations more operose than those which\n", + "astronomy in particular requires;--in none are preparatory facilities\n", + "more needful;--in none is error more detrimental. The practical\n", + "astronomer is interrupted in his pursuit, and diverted from his task of\n", + "observation by the irksome labours of computation, or his diligence in\n", + "observing becomes ineffectual for want of yet greater industry of -> {'source': 'https://www.gutenberg.org/cache/epub/71292/pg71292.txt'} \n", + "====\n", + "\n" ] } ], @@ -430,38 +452,39 @@ "name": "stdout", "output_type": "stream", "text": [ - "possess all knowledge which is likely to be useful to him in his work,\r\n", - "and this I have endeavored in my case to do. If I remember rightly, you\r\n", - "on one occasion, in the early days of our friendship, defined my limits\r\n", - "in a very precise fashion.”\r\n", - "\r\n", - "“Yes,” I answered, laughing. “It was a singular document. Philosophy,\r\n", - "astronomy, and politics were marked at zero, I remember. Botany\r\n", + "possess all knowledge which is likely to be useful to him in his work,\n", + "and this I have endeavored in my case to do. If I remember rightly, you\n", + "on one occasion, in the early days of our friendship, defined my limits\n", + "in a very precise fashion.”\n", + "\n", + "“Yes,” I answered, laughing. “It was a singular document. Philosophy,\n", + "astronomy, and politics were marked at zero, I remember. Botany\n", "variable, geology profound as regards the mud-stains from any region -> {'source': 'https://www.gutenberg.org/files/48320/48320-0.txt'} \n", "====\n", "\n", - "the light shining upon his strong-set aquiline features. So he sat as I\r\n", - "dropped off to sleep, and so he sat when a sudden ejaculation caused me\r\n", - "to wake up, and I found the summer sun shining into the apartment. The\r\n", - "pipe was still between his lips, the smoke still curled upward, and the\r\n", - "room was full of a dense tobacco haze, but nothing remained of the heap\r\n", - "of shag which I had seen upon the previous night.\r\n", - "\r\n", - "“Awake, Watson?” he asked.\r\n", - "\r\n", - "“Yes.”\r\n", - "\r\n", + "the light shining upon his strong-set aquiline features. So he sat as I\n", + "dropped off to sleep, and so he sat when a sudden ejaculation caused me\n", + "to wake up, and I found the summer sun shining into the apartment. The\n", + "pipe was still between his lips, the smoke still curled upward, and the\n", + "room was full of a dense tobacco haze, but nothing remained of the heap\n", + "of shag which I had seen upon the previous night.\n", + "\n", + "“Awake, Watson?” he asked.\n", + "\n", + "“Yes.”\n", + "\n", "“Game for a morning drive?” -> {'source': 'https://www.gutenberg.org/files/48320/48320-0.txt'} \n", "====\n", "\n", - "“I glanced at the books upon the table, and in spite of my ignorance\r\n", - "of German I could see that two of them were treatises on science, the\r\n", - "others being volumes of poetry. Then I walked across to the window,\r\n", - "hoping that I might catch some glimpse of the country-side, but an oak\r\n", - "shutter, heavily barred, was folded across it. It was a wonderfully\r\n", - "silent house. There was an old clock ticking loudly somewhere in the\r\n", + "“I glanced at the books upon the table, and in spite of my ignorance\n", + "of German I could see that two of them were treatises on science, the\n", + "others being volumes of poetry. Then I walked across to the window,\n", + "hoping that I might catch some glimpse of the country-side, but an oak\n", + "shutter, heavily barred, was folded across it. It was a wonderfully\n", + "silent house. There was an old clock ticking loudly somewhere in the\n", "passage, but otherwise everything was deadly still. A vague feeling of -> {'source': 'https://www.gutenberg.org/files/48320/48320-0.txt'} \n", - "====\n" + "====\n", + "\n" ] } ], @@ -479,6 +502,14 @@ "for d in docs:\n", " print(d.page_content, \" -> \", d.metadata, \"\\n====\\n\")" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96132aa6", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -490,13 +521,13 @@ "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", + "pygments_lexer": "ipython3", "version": "3.11.6" } }, diff --git a/libs/langchain/langchain/vectorstores/zep.py b/libs/langchain/langchain/vectorstores/zep.py index a55ad08ed15..7872b1f9254 100644 --- a/libs/langchain/langchain/vectorstores/zep.py +++ b/libs/langchain/langchain/vectorstores/zep.py @@ -5,12 +5,9 @@ import warnings from dataclasses import asdict, dataclass from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple -import numpy as np - from langchain.docstore.document import Document from langchain.schema.embeddings import Embeddings from langchain.schema.vectorstore import VectorStore -from langchain.vectorstores.utils import maximal_marginal_relevance if TYPE_CHECKING: from zep_python.document import Document as ZepDocument @@ -112,8 +109,7 @@ class ZepVectorStore(VectorStore): collection = self._client.document.get_collection(self.collection_name) except NotFoundError: logger.info( - f"Collection {self.collection_name} not found. " - "Creating new collection." + f"Collection {self.collection_name} not found. Creating new collection." ) collection = self._create_collection() @@ -452,23 +448,6 @@ class ZepVectorStore(VectorStore): for doc in results ] - def _max_marginal_relevance_selection( - self, - query_vector: List[float], - results: List["ZepDocument"], - *, - lambda_mult: float = 0.5, - k: int = 4, - ) -> List[Document]: - mmr_selected = maximal_marginal_relevance( - np.array([query_vector], dtype=np.float32), - [d.embedding for d in results], - lambda_mult=lambda_mult, - k=k, - ) - selected = [results[i] for i in mmr_selected] - return [Document(page_content=d.content, metadata=d.metadata) for d in selected] - def max_marginal_relevance_search( self, query: str, @@ -487,6 +466,8 @@ class ZepVectorStore(VectorStore): query: Text to look up documents similar to. k: Number of Documents to return. Defaults to 4. fetch_k: Number of Documents to fetch to pass to MMR algorithm. + Zep determines this automatically and this parameter is + ignored. lambda_mult: Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. @@ -504,16 +485,24 @@ class ZepVectorStore(VectorStore): if not self._collection.is_auto_embedded and self._embedding: query_vector = self._embedding.embed_query(query) results = self._collection.search( - embedding=query_vector, limit=k, metadata=metadata, **kwargs + embedding=query_vector, + limit=k, + metadata=metadata, + search_type="mmr", + mmr_lambda=lambda_mult, + **kwargs, ) else: results, query_vector = self._collection.search_return_query_vector( - query, limit=k, metadata=metadata, **kwargs + query, + limit=k, + metadata=metadata, + search_type="mmr", + mmr_lambda=lambda_mult, + **kwargs, ) - return self._max_marginal_relevance_selection( - query_vector, results, k=k, lambda_mult=lambda_mult - ) + return [Document(page_content=d.content, metadata=d.metadata) for d in results] async def amax_marginal_relevance_search( self, @@ -534,16 +523,24 @@ class ZepVectorStore(VectorStore): if not self._collection.is_auto_embedded and self._embedding: query_vector = self._embedding.embed_query(query) results = await self._collection.asearch( - embedding=query_vector, limit=k, metadata=metadata, **kwargs + embedding=query_vector, + limit=k, + metadata=metadata, + search_type="mmr", + mmr_lambda=lambda_mult, + **kwargs, ) else: results, query_vector = await self._collection.asearch_return_query_vector( - query, limit=k, metadata=metadata, **kwargs + query, + limit=k, + metadata=metadata, + search_type="mmr", + mmr_lambda=lambda_mult, + **kwargs, ) - return self._max_marginal_relevance_selection( - query_vector, results, k=k, lambda_mult=lambda_mult - ) + return [Document(page_content=d.content, metadata=d.metadata) for d in results] def max_marginal_relevance_search_by_vector( self, @@ -563,6 +560,8 @@ class ZepVectorStore(VectorStore): embedding: Embedding to look up documents similar to. k: Number of Documents to return. Defaults to 4. fetch_k: Number of Documents to fetch to pass to MMR algorithm. + Zep determines this automatically and this parameter is + ignored. lambda_mult: Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. @@ -577,12 +576,15 @@ class ZepVectorStore(VectorStore): ) results = self._collection.search( - embedding=embedding, limit=k, metadata=metadata, **kwargs + embedding=embedding, + limit=k, + metadata=metadata, + search_type="mmr", + mmr_lambda=lambda_mult, + **kwargs, ) - return self._max_marginal_relevance_selection( - embedding, results, k=k, lambda_mult=lambda_mult - ) + return [Document(page_content=d.content, metadata=d.metadata) for d in results] async def amax_marginal_relevance_search_by_vector( self, @@ -600,12 +602,15 @@ class ZepVectorStore(VectorStore): ) results = await self._collection.asearch( - embedding=embedding, limit=k, metadata=metadata, **kwargs + embedding=embedding, + limit=k, + metadata=metadata, + search_type="mmr", + mmr_lambda=lambda_mult, + **kwargs, ) - return self._max_marginal_relevance_selection( - embedding, results, k=k, lambda_mult=lambda_mult - ) + return [Document(page_content=d.content, metadata=d.metadata) for d in results] @classmethod def from_texts(