Harrison/add top k (#4707)

Co-authored-by: blc16 <benlc@umich.edu>
2025-09-07 14:03:26 +00:00 · 2023-05-15 09:09:22 -07:00
parent 0551594722
commit dd95f0892d
6 changed files with 209 additions and 20 deletions
--- a/docs/modules/indexes/retrievers/examples/chroma_self_query_retriever.ipynb
+++ b/docs/modules/indexes/retrievers/examples/chroma_self_query_retriever.ipynb
@@ -32,7 +32,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
   "id": "cb4a5787",
   "metadata": {},
   "outputs": [],
@@ -46,7 +46,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
   "id": "bcbe04d9",
   "metadata": {},
   "outputs": [
@@ -83,7 +83,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
   "id": "86e34dbf",
   "metadata": {},
   "outputs": [],
@@ -138,7 +138,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "query='dinosaur' filter=None\n"
+      "query='dinosaur' filter=None limit=None\n"
     ]
    },
    {
@@ -170,7 +170,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "query=' ' filter=Comparison(comparator=<Comparator.GT: 'gt'>, attribute='rating', value=8.5)\n"
+      "query=' ' filter=Comparison(comparator=<Comparator.GT: 'gt'>, attribute='rating', value=8.5) limit=None\n"
     ]
    },
    {
@@ -200,7 +200,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "query='women' filter=Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='director', value='Greta Gerwig')\n"
+      "query='women' filter=Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='director', value='Greta Gerwig') limit=None\n"
     ]
    },
    {
@@ -229,7 +229,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "query=' ' filter=Operation(operator=<Operator.AND: 'and'>, arguments=[Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='genre', value='science fiction'), Comparison(comparator=<Comparator.GT: 'gt'>, attribute='rating', value=8.5)])\n"
+      "query=' ' filter=Operation(operator=<Operator.AND: 'and'>, arguments=[Comparison(comparator=<Comparator.GT: 'gt'>, attribute='rating', value=8.5), Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='genre', value='science fiction')]) limit=None\n"
     ]
    },
    {
@@ -258,7 +258,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "query='toys' filter=Operation(operator=<Operator.AND: 'and'>, arguments=[Comparison(comparator=<Comparator.GT: 'gt'>, attribute='year', value=1990), Comparison(comparator=<Comparator.LT: 'lt'>, attribute='year', value=2005), Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='genre', value='animated')])\n"
+      "query='toys' filter=Operation(operator=<Operator.AND: 'and'>, arguments=[Comparison(comparator=<Comparator.GT: 'gt'>, attribute='year', value=1990), Comparison(comparator=<Comparator.LT: 'lt'>, attribute='year', value=2005), Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='genre', value='animated')]) limit=None\n"
     ]
    },
    {
@@ -277,10 +277,69 @@
    "retriever.get_relevant_documents(\"What's a movie after 1990 but before 2005 that's all about toys, and preferably is animated\")"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "87513116",
+   "metadata": {},
+   "source": [
+    "## Filter k\n",
+    "\n",
+    "We can also use the self query retriever to specify `k`: the number of documents to fetch.\n",
+    "\n",
+    "We can do this by passing `enable_limit=True` to the constructor."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "73cfca56",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "retriever = SelfQueryRetriever.from_llm(\n",
+    "    llm, \n",
+    "    vectorstore, \n",
+    "    document_content_description, \n",
+    "    metadata_field_info, \n",
+    "    enable_limit=True,\n",
+    "    verbose=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "60110338",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "query='dinosaur' filter=None limit=2\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'year': 1993, 'rating': 7.7, 'genre': 'science fiction'}),\n",
+       " Document(page_content='Toys come alive and have a blast doing so', metadata={'year': 1995, 'genre': 'animated'})]"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# This example only specifies a relevant query\n",
+    "retriever.get_relevant_documents(\"what are two movies about dinosaurs\")"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "60110338",
+   "id": "f15d84b3",
   "metadata": {},
   "outputs": [],
   "source": []
--- a/docs/modules/indexes/retrievers/examples/self_query_retriever.ipynb
+++ b/docs/modules/indexes/retrievers/examples/self_query_retriever.ipynb
@@ -295,13 +295,45 @@
    "retriever.get_relevant_documents(\"What's a movie after 1990 but before 2005 that's all about toys, and preferably is animated\")"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "6fe7536c",
+   "metadata": {},
+   "source": [
+    "## Filter k\n",
+    "\n",
+    "We can also use the self query retriever to specify `k`: the number of documents to fetch.\n",
+    "\n",
+    "We can do this by passing `enable_limit=True` to the constructor."
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "69bbd809",
+   "id": "3a2937c2",
   "metadata": {},
   "outputs": [],
-   "source": []
+   "source": [
+    "retriever = SelfQueryRetriever.from_llm(\n",
+    "    llm, \n",
+    "    vectorstore, \n",
+    "    document_content_description, \n",
+    "    metadata_field_info, \n",
+    "    enable_limit=True,\n",
+    "    verbose=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "83d233aa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This example only specifies a relevant query\n",
+    "retriever.get_relevant_documents(\"What are two movies about dinosaurs\")"
+   ]
  }
 ],
 "metadata": {