diff --git a/cookbook/oracleai_demo.ipynb b/cookbook/oracleai_demo.ipynb index 9dcb08f934d..3d0f0eaf3f6 100644 --- a/cookbook/oracleai_demo.ipynb +++ b/cookbook/oracleai_demo.ipynb @@ -47,10 +47,12 @@ "source": [ "### Prerequisites\n", "\n", - "Please install the Oracle Database [python-oracledb driver](https://pypi.org/project/oracledb/) to use LangChain with Oracle AI Vector Search:\n", + "You'll need to install `langchain-oracledb` with `python -m pip install -U langchain-oracledb` to use this integration.\n", + "\n", + "The `python-oracledb` driver is installed automatically as a dependency of langchain-oracledb.\n", "\n", "```\n", - "$ python -m pip install --upgrade oracledb\n", + "$ python -m pip install -U langchain-oracledb\n", "```" ] }, @@ -217,7 +219,7 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain_community.embeddings.oracleai import OracleEmbeddings\n", + "from langchain_oracledb.embeddings.oracleai import OracleEmbeddings\n", "\n", "# please update with your related information\n", "# make sure that you have onnx file in the system\n", @@ -296,7 +298,7 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain_community.document_loaders.oracleai import OracleDocLoader\n", + "from langchain_oracledb.document_loaders.oracleai import OracleDocLoader\n", "from langchain_core.documents import Document\n", "\n", "# loading from Oracle Database table\n", @@ -354,7 +356,7 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain_community.utilities.oracleai import OracleSummary\n", + "from langchain_oracledb.utilities.oracleai import OracleSummary\n", "from langchain_core.documents import Document\n", "\n", "# using 'database' provider\n", @@ -395,7 +397,7 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain_community.document_loaders.oracleai import OracleTextSplitter\n", + "from langchain_oracledb.document_loaders.oracleai import OracleTextSplitter\n", "from langchain_core.documents import Document\n", "\n", "# split by default parameters\n", @@ -452,7 +454,7 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain_community.embeddings.oracleai import OracleEmbeddings\n", + "from langchain_oracledb.embeddings.oracleai import OracleEmbeddings\n", "from langchain_core.documents import Document\n", "\n", "# using ONNX model loaded to Oracle Database\n", @@ -498,14 +500,14 @@ "import sys\n", "\n", "import oracledb\n", - "from langchain_community.document_loaders.oracleai import (\n", + "from langchain_oracledb.document_loaders.oracleai import (\n", " OracleDocLoader,\n", " OracleTextSplitter,\n", ")\n", - "from langchain_community.embeddings.oracleai import OracleEmbeddings\n", - "from langchain_community.utilities.oracleai import OracleSummary\n", - "from langchain_community.vectorstores import oraclevs\n", - "from langchain_community.vectorstores.oraclevs import OracleVS\n", + "from langchain_oracledb.embeddings.oracleai import OracleEmbeddings\n", + "from langchain_oracledb.utilities.oracleai import OracleSummary\n", + "from langchain_oracledb.vectorstores import oraclevs\n", + "from langchain_oracledb.vectorstores.oraclevs import OracleVS\n", "from langchain_community.vectorstores.utils import DistanceStrategy\n", "from langchain_core.documents import Document" ] @@ -677,19 +679,19 @@ "outputs": [], "source": [ "query = \"What is Oracle AI Vector Store?\"\n", - "filter = {\"document_id\": [\"1\"]}\n", + "db_filter = {\"document_id\": \"1\"}\n", "\n", "# Similarity search without a filter\n", "print(vectorstore.similarity_search(query, 1))\n", "\n", "# Similarity search with a filter\n", - "print(vectorstore.similarity_search(query, 1, filter=filter))\n", + "print(vectorstore.similarity_search(query, 1, filter=db_filter))\n", "\n", "# Similarity search with relevance score\n", "print(vectorstore.similarity_search_with_score(query, 1))\n", "\n", "# Similarity search with relevance score with filter\n", - "print(vectorstore.similarity_search_with_score(query, 1, filter=filter))\n", + "print(vectorstore.similarity_search_with_score(query, 1, filter=db_filter))\n", "\n", "# Max marginal relevance search\n", "print(vectorstore.max_marginal_relevance_search(query, 1, fetch_k=20, lambda_mult=0.5))\n", @@ -697,7 +699,7 @@ "# Max marginal relevance search with filter\n", "print(\n", " vectorstore.max_marginal_relevance_search(\n", - " query, 1, fetch_k=20, lambda_mult=0.5, filter=filter\n", + " query, 1, fetch_k=20, lambda_mult=0.5, filter=db_filter\n", " )\n", ")" ] diff --git a/docs/docs/integrations/document_loaders/oracleadb_loader.ipynb b/docs/docs/integrations/document_loaders/oracleadb_loader.ipynb index 02405461cc3..7b710a98547 100644 --- a/docs/docs/integrations/document_loaders/oracleadb_loader.ipynb +++ b/docs/docs/integrations/document_loaders/oracleadb_loader.ipynb @@ -16,13 +16,7 @@ "This notebook covers how to load documents from Oracle Autonomous Database.\n", "\n", "## Prerequisites\n", - "1. Install python-oracledb:\n", - "\n", - " `pip install oracledb`\n", - " \n", - " See [Installing python-oracledb](https://python-oracledb.readthedocs.io/en/latest/user_guide/installation.html).\n", - "\n", - "2. A database that python-oracledb's default 'Thin' mode can connected to. This is true of Oracle Autonomous Database, see [python-oracledb Architecture](https://python-oracledb.readthedocs.io/en/latest/user_guide/introduction.html#architecture).\n" + "1. A database that python-oracledb's default 'Thin' mode can connected to. This is true of Oracle Autonomous Database, see [python-oracledb Architecture](https://python-oracledb.readthedocs.io/en/latest/user_guide/introduction.html#architecture).\n" ] }, { @@ -38,17 +32,12 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } - }, - "outputs": [], + "cell_type": "markdown", + "metadata": {}, "source": [ - "pip install oracledb" + "You'll need to install `langchain-oracledb` with `python -m pip install -U langchain-oracledb` to use this integration.\n", + "\n", + "The `python-oracledb` driver is installed automatically as a dependency of langchain-oracledb." ] }, { @@ -62,7 +51,21 @@ }, "outputs": [], "source": [ - "from langchain_community.document_loaders import OracleAutonomousDatabaseLoader\n", + "# python -m pip install -U langchain-oracledb" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "from langchain_oracledb.document_loaders import OracleAutonomousDatabaseLoader\n", "from settings import s" ] }, @@ -99,7 +102,7 @@ " config_dir=s.CONFIG_DIR,\n", " wallet_location=s.WALLET_LOCATION,\n", " wallet_password=s.PASSWORD,\n", - " tns_name=s.TNS_NAME,\n", + " dsn=s.DSN,\n", ")\n", "doc_1 = doc_loader_1.load()\n", "\n", @@ -108,7 +111,7 @@ " user=s.USERNAME,\n", " password=s.PASSWORD,\n", " schema=s.SCHEMA,\n", - " connection_string=s.CONNECTION_STRING,\n", + " dsn=s.DSN,\n", " wallet_location=s.WALLET_LOCATION,\n", " wallet_password=s.PASSWORD,\n", ")\n", @@ -147,7 +150,7 @@ " password=s.PASSWORD,\n", " schema=s.SCHEMA,\n", " config_dir=s.CONFIG_DIR,\n", - " tns_name=s.TNS_NAME,\n", + " dsn=s.DSN,\n", " parameters=[\"Direct Sales\"],\n", ")\n", "doc_3 = doc_loader_3.load()\n", @@ -157,7 +160,7 @@ " user=s.USERNAME,\n", " password=s.PASSWORD,\n", " schema=s.SCHEMA,\n", - " connection_string=s.CONNECTION_STRING,\n", + " dsn=s.DSN,\n", " parameters=[\"Direct Sales\"],\n", ")\n", "doc_4 = doc_loader_4.load()" diff --git a/docs/docs/integrations/document_loaders/oracleai.ipynb b/docs/docs/integrations/document_loaders/oracleai.ipynb index 0f521cf979b..832105d19b0 100644 --- a/docs/docs/integrations/document_loaders/oracleai.ipynb +++ b/docs/docs/integrations/document_loaders/oracleai.ipynb @@ -42,7 +42,9 @@ "source": [ "### Prerequisites\n", "\n", - "Please install Oracle Python Client driver to use Langchain with Oracle AI Vector Search. " + "You'll need to install `langchain-oracledb` with `python -m pip install -U langchain-oracledb` to use this integration.\n", + "\n", + "The `python-oracledb` driver is installed automatically as a dependency of langchain-oracledb." ] }, { @@ -51,7 +53,7 @@ "metadata": {}, "outputs": [], "source": [ - "# pip install oracledb" + "# python -m pip install -U langchain-oracledb" ] }, { @@ -154,7 +156,7 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain_community.document_loaders.oracleai import OracleDocLoader\n", + "from langchain_oracledb.document_loaders.oracleai import OracleDocLoader\n", "from langchain_core.documents import Document\n", "\n", "\"\"\"\n", @@ -199,7 +201,7 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain_community.document_loaders.oracleai import OracleTextSplitter\n", + "from langchain_oracledb.document_loaders.oracleai import OracleTextSplitter\n", "from langchain_core.documents import Document\n", "\n", "\"\"\"\n", diff --git a/docs/docs/integrations/text_embedding/oracleai.ipynb b/docs/docs/integrations/text_embedding/oracleai.ipynb index 1cb2c2adca7..ee343ba5e30 100644 --- a/docs/docs/integrations/text_embedding/oracleai.ipynb +++ b/docs/docs/integrations/text_embedding/oracleai.ipynb @@ -42,7 +42,9 @@ "source": [ "### Prerequisites\n", "\n", - "Ensure you have the Oracle Python Client driver installed to facilitate the integration of Langchain with Oracle AI Vector Search." + "You'll need to install `langchain-oracledb` with `python -m pip install -U langchain-oracledb` to use this integration.\n", + "\n", + "The `python-oracledb` driver is installed automatically as a dependency of langchain-oracledb." ] }, { @@ -51,7 +53,7 @@ "metadata": {}, "outputs": [], "source": [ - "# pip install oracledb" + "# python -m pip install -U langchain-oracledb" ] }, { @@ -113,7 +115,7 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain_community.embeddings.oracleai import OracleEmbeddings\n", + "from langchain_oracledb.embeddings.oracleai import OracleEmbeddings\n", "\n", "# Update the directory and file names for your ONNX model\n", "# make sure that you have onnx file in the system\n", @@ -223,7 +225,7 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain_community.embeddings.oracleai import OracleEmbeddings\n", + "from langchain_oracledb.embeddings.oracleai import OracleEmbeddings\n", "from langchain_core.documents import Document\n", "\n", "\"\"\"\n", @@ -237,10 +239,10 @@ "\n", "# using huggingface\n", "embedder_params = {\n", - " \"provider\": \"huggingface\", \n", - " \"credential_name\": \"HF_CRED\", \n", - " \"url\": \"https://api-inference.huggingface.co/pipeline/feature-extraction/\", \n", - " \"model\": \"sentence-transformers/all-MiniLM-L6-v2\", \n", + " \"provider\": \"huggingface\",\n", + " \"credential_name\": \"HF_CRED\",\n", + " \"url\": \"https://api-inference.huggingface.co/pipeline/feature-extraction/\",\n", + " \"model\": \"sentence-transformers/all-MiniLM-L6-v2\",\n", " \"wait_for_model\": \"true\"\n", "}\n", "\"\"\"\n", diff --git a/docs/docs/integrations/tools/oracleai.ipynb b/docs/docs/integrations/tools/oracleai.ipynb index d857c1f73e7..97ecdd97f4e 100644 --- a/docs/docs/integrations/tools/oracleai.ipynb +++ b/docs/docs/integrations/tools/oracleai.ipynb @@ -42,7 +42,9 @@ "source": [ "### Prerequisites\n", "\n", - "Please install Oracle Python Client driver to use Langchain with Oracle AI Vector Search. " + "You'll need to install `langchain-oracledb` with `python -m pip install -U langchain-oracledb` to use this integration.\n", + "\n", + "The `python-oracledb` driver is installed automatically as a dependency of langchain-oracledb." ] }, { @@ -51,7 +53,7 @@ "metadata": {}, "outputs": [], "source": [ - "# pip install oracledb" + "# python -m pip install -U langchain-oracledb" ] }, { @@ -123,7 +125,7 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain_community.utilities.oracleai import OracleSummary\n", + "from langchain_oracledb.utilities.oracleai import OracleSummary\n", "from langchain_core.documents import Document\n", "\n", "\"\"\"\n", diff --git a/docs/docs/integrations/vectorstores/oracle.ipynb b/docs/docs/integrations/vectorstores/oracle.ipynb index fbdf8085b7e..f5c9541e9c0 100644 --- a/docs/docs/integrations/vectorstores/oracle.ipynb +++ b/docs/docs/integrations/vectorstores/oracle.ipynb @@ -43,9 +43,9 @@ "source": [ "### Prerequisites for using Langchain with Oracle AI Vector Search\n", "\n", - "You'll need to install `langchain-community` with `pip install -qU langchain-community` to use this integration\n", + "You'll need to install `langchain-oracledb` with `python -m pip install -U langchain-oracledb` to use this integration.\n", "\n", - "Please install Oracle Python Client driver to use Langchain with Oracle AI Vector Search. " + "The `python-oracledb` driver is installed automatically as a dependency of langchain-oracledb." ] }, { @@ -55,7 +55,7 @@ "metadata": {}, "outputs": [], "source": [ - "# pip install oracledb" + "# python -m pip install -U langchain-oracledb" ] }, { @@ -103,8 +103,8 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain_community.vectorstores import oraclevs\n", - "from langchain_community.vectorstores.oraclevs import OracleVS\n", + "from langchain_oracledb.vectorstores import oraclevs\n", + "from langchain_oracledb.vectorstores.oraclevs import OracleVS\n", "from langchain_community.vectorstores.utils import DistanceStrategy\n", "from langchain_core.documents import Document\n", "from langchain_huggingface import HuggingFaceEmbeddings" @@ -400,7 +400,111 @@ "id": "7223d048-5c0b-4e91-a91b-a7daa9f86758", "metadata": {}, "source": [ - "### Demonstrate advanced searches on all six vector stores, with and without attribute filtering – with filtering, we only select the document id 101 and nothing else" + "### Demonstrate advanced searches on all six vector stores, with and without attribute filtering – with filtering, we only select the document id 101 and nothing else.\n", + "\n", + "Oracle Database 23ai supports pre-filtering, in-filtering, and post-filtering to enhance AI Vector Search capabilities. These filtering mechanisms allow users to apply constraints before, during, and after performing vector similarity searches, improving search performance and accuracy.\n", + "\n", + "Key Points about Filtering in Oracle 23ai:\n", + "1. Pre-filtering\n", + " Applies traditional SQL filters to reduce the dataset before performing the vector similarity search.\n", + " Helps improve efficiency by limiting the amount of data processed by AI algorithms.\n", + "2. In-filtering\n", + " Utilizes AI Vector Search to perform similarity searches directly on vector embeddings, using optimized indexes and algorithms.\n", + " Efficiently filters results based on vector similarity without requiring full dataset scans.\n", + "3. Post-filtering\n", + " Applies additional SQL filtering to refine the results after the vector similarity search.\n", + " Allows further refinement based on business logic or additional metadata conditions.\n", + "\n", + "\n", + "**Why is this Important?**\n", + "- Performance Optimization: Pre-filtering significantly reduces query execution time, making searches on massive datasets more efficient.\n", + "- Accuracy Enhancement: In-filtering ensures that vector searches are semantically meaningful, improving the quality of search results.\n" + ] + }, + { + "cell_type": "markdown", + "id": "71406bf9", + "metadata": {}, + "source": [ + "#### Filter Details\n", + "\n", + "`OracleVS` supports a set of filters that can be applied to `metadata` fields using `filter` parameter. These filters allow you to select and refine data based on various criteria. \n", + "\n", + "**Available Filter Operators:**\n", + "\n", + "| Operator | Description |\n", + "|--------------------------|--------------------------------------------------------------------------------------------------|\n", + "| \\$exists | Field exists. |\n", + "| \\$eq | Field value equals the operand value (`=`). |\n", + "| \\$ne | Field exists and value does not equal the operand value (`!=`). |\n", + "| \\$gt | Field value is greater than the operand value (`>`). |\n", + "| \\$lt | Field value is less than the operand value (`<`). |\n", + "| \\$gte | Field value is greater than or equal to the operand value (`>=`). |\n", + "| \\$lte | Field value is less than or equal to the operand value (`<=`). |\n", + "| \\$between | Field value is between (or equal to) two values in the operand array. |\n", + "| \\$startsWith | Field value starts with the operand value. |\n", + "| \\$hasSubstring | Field value contains the operand as a substring. |\n", + "| \\$instr | Field value contains the operand as a substring. |\n", + "| \\$regex | Field value matches the given regular expression pattern. |\n", + "| \\$like | Field value matches the operand pattern (using SQL-like syntax). |\n", + "| \\$in | Field value equals at least one value in the operand array. |\n", + "| \\$nin | Field exists, but its value is not equal to any in the operand array, or the field does not exist.|\n", + "| \\$all | Field value is an array containing all items from the operand array, or a scalar matching a single operand. |\n", + "\n", + "- You can combine these filters using logical operators:\n", + "\n", + "| Logical Operator | Description |\n", + "|------------------|----------------------|\n", + "| \\$and | Logical AND |\n", + "| \\$or | Logical OR |\n", + "| \\$nor | Logical NOR |\n", + "\n", + "**Example Filter:**\n", + "```json\n", + "{\n", + " \"age\": 65,\n", + " \"name\": {\"$regex\": \"*rk\"},\n", + " \"$or\": [\n", + " {\n", + " \"$and\": [\n", + " {\"name\": \"Jason\"},\n", + " {\"drinks\": {\"$in\": [\"tea\", \"soda\"]}}\n", + " ]\n", + " },\n", + " {\n", + " \"$nor\": [\n", + " {\"age\": {\"$lt\": 65}},\n", + " {\"name\": \"Jason\"}\n", + " ]\n", + " }\n", + " ]\n", + "}\n", + "```\n", + "\n", + "**Additional Usage Tips:**\n", + "- You can omit `$and` when all filters in an object must be satisfied. These two are equivalent:\n", + "```json\n", + "{ \"$and\": [\n", + " { \"name\": { \"$startsWith\": \"Fred\" } },\n", + " { \"salary\": { \"$gt\": 10000, \"$lte\": 20000 } }\n", + "]}\n", + "```\n", + "```json\n", + "{\n", + " \"name\": { \"$startsWith\": \"Fred\" },\n", + " \"salary\": { \"$gt\": 10000, \"$lte\": 20000 }\n", + "}\n", + "```\n", + "- The `$not` clause can negate a comparison operator:\n", + "```json\n", + "{ \"address.zip\": { \"$not\": { \"$eq\": \"90001\" } } }\n", + "```\n", + "- Using `field: scalar` is equivalent to `field: { \"$eq\": scalar }`:\n", + "```json\n", + "{ \"animal\": \"cat\" }\n", + "```\n", + "\n", + "For more filter examples, refer to the [test specification](https://github.com/oracle/langchain-oracle/blob/main/libs/oracledb/tests/integration_tests/vectorstores/test_oraclevs.py)." ] }, { @@ -415,7 +519,23 @@ " query = \"How are LOBS stored in Oracle Database\"\n", " # Constructing a filter for direct comparison against document metadata\n", " # This filter aims to include documents whose metadata 'id' is exactly '2'\n", - " filter_criteria = {\"id\": [\"101\"]} # Direct comparison filter\n", + " db_filter = {\n", + " \"$and\": [\n", + " {\"id\": \"101\"}, # FilterCondition\n", + " {\n", + " \"$or\": [ # FilterGroup\n", + " {\"status\": \"approved\"},\n", + " {\"link\": \"Document Example Test 2\"},\n", + " {\n", + " \"$and\": [ # Nested FilterGroup\n", + " {\"status\": \"approved\"},\n", + " {\"link\": \"Document Example Test 2\"},\n", + " ]\n", + " },\n", + " ]\n", + " },\n", + " ]\n", + " }\n", "\n", " for i, vs in enumerate(vector_stores, start=1):\n", " print(f\"\\n--- Vector Store {i} Advanced Searches ---\")\n", @@ -425,7 +545,7 @@ "\n", " # Similarity search with a filter\n", " print(\"\\nSimilarity search results with filter:\")\n", - " print(vs.similarity_search(query, 2, filter=filter_criteria))\n", + " print(vs.similarity_search(query, 2, filter=db_filter))\n", "\n", " # Similarity search with relevance score\n", " print(\"\\nSimilarity search with relevance score:\")\n", @@ -433,7 +553,7 @@ "\n", " # Similarity search with relevance score with filter\n", " print(\"\\nSimilarity search with relevance score with filter:\")\n", - " print(vs.similarity_search_with_score(query, 2, filter=filter_criteria))\n", + " print(vs.similarity_search_with_score(query, 2, filter=db_filter))\n", "\n", " # Max marginal relevance search\n", " print(\"\\nMax marginal relevance search results:\")\n", @@ -443,7 +563,7 @@ " print(\"\\nMax marginal relevance search results with filter:\")\n", " print(\n", " vs.max_marginal_relevance_search(\n", - " query, 2, fetch_k=20, lambda_mult=0.5, filter=filter_criteria\n", + " query, 2, fetch_k=20, lambda_mult=0.5, filter=db_filter\n", " )\n", " )\n", "\n", @@ -477,7 +597,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.13.5" } }, "nbformat": 4,