add filter to sklearn vector store functions (#8113)

# What - This is to add filter option to sklearn vectore store functions  --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2025-09-12 12:59:07 +00:00 · 2023-08-04 15:06:41 +09:00
parent 2759e2d857
commit 0f0ccfe7f6
3 changed files with 131 additions and 26 deletions
--- a/docs/extras/integrations/vectorstores/sklearn.ipynb
+++ b/docs/extras/integrations/vectorstores/sklearn.ipynb
@@ -13,7 +13,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -56,7 +56,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -65,7 +65,7 @@
    "from langchain.vectorstores import SKLearnVectorStore\n",
    "from langchain.document_loaders import TextLoader\n",
    "\n",
-    "loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
+    "loader = TextLoader(\"../../../extras/modules/state_of_the_union.txt\")\n",
    "documents = loader.load()\n",
    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
    "docs = text_splitter.split_documents(documents)\n",
@@ -81,7 +81,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
@@ -100,6 +100,7 @@
   ],
   "source": [
    "import tempfile\n",
+    "import os\n",
    "\n",
    "persist_path = os.path.join(tempfile.gettempdir(), \"union.parquet\")\n",
    "\n",
@@ -184,6 +185,32 @@
    "print(docs[0].page_content)"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Filter"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1\n"
+     ]
+    }
+   ],
+   "source": [
+    "_filter = {\"id\": \"c53e6eac-0070-403c-8435-a9e528539610\"}\n",
+    "docs = vector_store.similarity_search(query, filter=_filter)\n",
+    "print(len(docs))"
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -217,7 +244,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.10.1"
  }
 },
 "nbformat": 4,