mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-16 18:24:31 +00:00
Compare commits
24 Commits
langchain=
...
langchain-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5c000f8d79 | ||
|
|
8c13e8a79b | ||
|
|
7ecf9996f1 | ||
|
|
fee91d43b7 | ||
|
|
62559b20b3 | ||
|
|
484a009012 | ||
|
|
27e73ebe57 | ||
|
|
6feddfae88 | ||
|
|
811e9cee8b | ||
|
|
144f2821af | ||
|
|
682d21c3de | ||
|
|
ee6c922c91 | ||
|
|
5b6d1a907d | ||
|
|
c038991590 | ||
|
|
b868c78a12 | ||
|
|
d310f9c71e | ||
|
|
ba9dc04ffa | ||
|
|
8021d2a2ab | ||
|
|
c9e9470c5a | ||
|
|
ee1adaacaa | ||
|
|
9639457222 | ||
|
|
3ef8b24277 | ||
|
|
36c2ca3c8b | ||
|
|
6e1e0c7d5c |
@@ -57,3 +57,4 @@ Notebook | Description
|
||||
[two_agent_debate_tools.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/two_agent_debate_tools.ipynb) | Simulate multi-agent dialogues where the agents can utilize various tools.
|
||||
[two_player_dnd.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/two_player_dnd.ipynb) | Simulate a two-player dungeons & dragons game, where a dialogue simulator class is used to coordinate the dialogue between the protagonist and the dungeon master.
|
||||
[wikibase_agent.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/wikibase_agent.ipynb) | Create a simple wikibase agent that utilizes sparql generation, with testing done on http://wikidata.org.
|
||||
[oracleai_demo.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/oracleai_demo.ipynb) | This guide outlines how to utilize Oracle AI Vector Search alongside Langchain for an end-to-end RAG pipeline, providing step-by-step examples. The process includes loading documents from various sources using OracleDocLoader, summarizing them either within or outside the database with OracleSummary, and generating embeddings similarly through OracleEmbeddings. It also covers chunking documents according to specific requirements using Advanced Oracle Capabilities from OracleTextSplitter, and finally, storing and indexing these documents in a Vector Store for querying with OracleVS.
|
||||
872
cookbook/oracleai_demo.ipynb
Normal file
872
cookbook/oracleai_demo.ipynb
Normal file
@@ -0,0 +1,872 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Oracle AI Vector Search with Document Processing\n",
|
||||
"Oracle AI Vector Search is designed for Artificial Intelligence (AI) workloads that allows you to query data based on semantics, rather than keywords.\n",
|
||||
"One of the biggest benefit of Oracle AI Vector Search is that semantic search on unstructured data can be combined with relational search on business data in one single system. This is not only powerful but also significantly more effective because you don't need to add a specialized vector database, eliminating the pain of data fragmentation between multiple systems.\n",
|
||||
"\n",
|
||||
"In addition, because Oracle has been building database technologies for so long, your vectors can benefit from all of Oracle Database's most powerful features, like the following:\n",
|
||||
"\n",
|
||||
" * Partitioning Support\n",
|
||||
" * Real Application Clusters scalability\n",
|
||||
" * Exadata smart scans\n",
|
||||
" * Shard processing across geographically distributed databases\n",
|
||||
" * Transactions\n",
|
||||
" * Parallel SQL\n",
|
||||
" * Disaster recovery\n",
|
||||
" * Security\n",
|
||||
" * Oracle Machine Learning\n",
|
||||
" * Oracle Graph Database\n",
|
||||
" * Oracle Spatial and Graph\n",
|
||||
" * Oracle Blockchain\n",
|
||||
" * JSON\n",
|
||||
"\n",
|
||||
"This guide demonstrates how Oracle AI Vector Search can be used with Langchain to serve an end-to-end RAG pipeline. This guide goes through examples of:\n",
|
||||
"\n",
|
||||
" * Loading the documents from various sources using OracleDocLoader\n",
|
||||
" * Summarizing them within/outside the database using OracleSummary\n",
|
||||
" * Generating embeddings for them within/outside the database using OracleEmbeddings\n",
|
||||
" * Chunking them according to different requirements using Advanced Oracle Capabilities from OracleTextSplitter\n",
|
||||
" * Storing and Indexing them in a Vector Store and querying them for queries in OracleVS"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prerequisites\n",
|
||||
"\n",
|
||||
"Please install Oracle Python Client driver to use Langchain with Oracle AI Vector Search. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# pip install oracledb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create Demo User\n",
|
||||
"First, create a demo user with all the required privileges. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Connection successful!\n",
|
||||
"User setup done!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"\n",
|
||||
"import oracledb\n",
|
||||
"\n",
|
||||
"# please update with your username, password, hostname and service_name\n",
|
||||
"# please make sure this user has sufficient privileges to perform all below\n",
|
||||
"username = \"\"\n",
|
||||
"password = \"\"\n",
|
||||
"dsn = \"\"\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" conn = oracledb.connect(user=username, password=password, dsn=dsn)\n",
|
||||
" print(\"Connection successful!\")\n",
|
||||
"\n",
|
||||
" cursor = conn.cursor()\n",
|
||||
" cursor.execute(\n",
|
||||
" \"\"\"\n",
|
||||
" begin\n",
|
||||
" -- drop user\n",
|
||||
" begin\n",
|
||||
" execute immediate 'drop user testuser cascade';\n",
|
||||
" exception\n",
|
||||
" when others then\n",
|
||||
" dbms_output.put_line('Error setting up user.');\n",
|
||||
" end;\n",
|
||||
" execute immediate 'create user testuser identified by testuser';\n",
|
||||
" execute immediate 'grant connect, unlimited tablespace, create credential, create procedure, create any index to testuser';\n",
|
||||
" execute immediate 'create or replace directory DEMO_PY_DIR as ''/scratch/hroy/view_storage/hroy_devstorage/demo/orachain''';\n",
|
||||
" execute immediate 'grant read, write on directory DEMO_PY_DIR to public';\n",
|
||||
" execute immediate 'grant create mining model to testuser';\n",
|
||||
"\n",
|
||||
" -- network access\n",
|
||||
" begin\n",
|
||||
" DBMS_NETWORK_ACL_ADMIN.APPEND_HOST_ACE(\n",
|
||||
" host => '*',\n",
|
||||
" ace => xs$ace_type(privilege_list => xs$name_list('connect'),\n",
|
||||
" principal_name => 'testuser',\n",
|
||||
" principal_type => xs_acl.ptype_db));\n",
|
||||
" end;\n",
|
||||
" end;\n",
|
||||
" \"\"\"\n",
|
||||
" )\n",
|
||||
" print(\"User setup done!\")\n",
|
||||
" cursor.close()\n",
|
||||
" conn.close()\n",
|
||||
"except Exception as e:\n",
|
||||
" print(\"User setup failed!\")\n",
|
||||
" cursor.close()\n",
|
||||
" conn.close()\n",
|
||||
" sys.exit(1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Process Documents using Oracle AI\n",
|
||||
"Let's think about a scenario that the users have some documents in Oracle Database or in a file system. They want to use the data for Oracle AI Vector Search using Langchain.\n",
|
||||
"\n",
|
||||
"For that, the users need to do some document preprocessing. The first step would be to read the documents, generate their summary(if needed) and then chunk/split them if needed. After that, they need to generate the embeddings for those chunks and store into Oracle AI Vector Store. Finally, the users will perform some semantic queries on those data. \n",
|
||||
"\n",
|
||||
"Oracle AI Vector Search Langchain library provides a range of document processing functionalities including document loading, splitting, generating summary and embeddings.\n",
|
||||
"\n",
|
||||
"In the following sections, we will go through how to use Oracle AI Langchain APIs to achieve each of these functionalities individually. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Connect to Demo User\n",
|
||||
"The following sample code will show how to connect to Oracle Database. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Connection successful!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"\n",
|
||||
"import oracledb\n",
|
||||
"\n",
|
||||
"# please update with your username, password, hostname and service_name\n",
|
||||
"username = \"\"\n",
|
||||
"password = \"\"\n",
|
||||
"dsn = \"\"\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" conn = oracledb.connect(user=username, password=password, dsn=dsn)\n",
|
||||
" print(\"Connection successful!\")\n",
|
||||
"except Exception as e:\n",
|
||||
" print(\"Connection failed!\")\n",
|
||||
" sys.exit(1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Populate a Demo Table\n",
|
||||
"Create a demo table and insert some sample documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Table created and populated.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"try:\n",
|
||||
" cursor = conn.cursor()\n",
|
||||
"\n",
|
||||
" drop_table_sql = \"\"\"drop table demo_tab\"\"\"\n",
|
||||
" cursor.execute(drop_table_sql)\n",
|
||||
"\n",
|
||||
" create_table_sql = \"\"\"create table demo_tab (id number, data clob)\"\"\"\n",
|
||||
" cursor.execute(create_table_sql)\n",
|
||||
"\n",
|
||||
" insert_row_sql = \"\"\"insert into demo_tab values (:1, :2)\"\"\"\n",
|
||||
" rows_to_insert = [\n",
|
||||
" (\n",
|
||||
" 1,\n",
|
||||
" \"If the answer to any preceding questions is yes, then the database stops the search and allocates space from the specified tablespace; otherwise, space is allocated from the database default shared temporary tablespace.\",\n",
|
||||
" ),\n",
|
||||
" (\n",
|
||||
" 2,\n",
|
||||
" \"A tablespace can be online (accessible) or offline (not accessible) whenever the database is open.\\nA tablespace is usually online so that its data is available to users. The SYSTEM tablespace and temporary tablespaces cannot be taken offline.\",\n",
|
||||
" ),\n",
|
||||
" (\n",
|
||||
" 3,\n",
|
||||
" \"The database stores LOBs differently from other data types. Creating a LOB column implicitly creates a LOB segment and a LOB index. The tablespace containing the LOB segment and LOB index, which are always stored together, may be different from the tablespace containing the table.\\nSometimes the database can store small amounts of LOB data in the table itself rather than in a separate LOB segment.\",\n",
|
||||
" ),\n",
|
||||
" ]\n",
|
||||
" cursor.executemany(insert_row_sql, rows_to_insert)\n",
|
||||
"\n",
|
||||
" conn.commit()\n",
|
||||
"\n",
|
||||
" print(\"Table created and populated.\")\n",
|
||||
" cursor.close()\n",
|
||||
"except Exception as e:\n",
|
||||
" print(\"Table creation failed.\")\n",
|
||||
" cursor.close()\n",
|
||||
" conn.close()\n",
|
||||
" sys.exit(1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"\n",
|
||||
"Now that we have a demo user and a demo table with some data, we just need to do one more setup. For embedding and summary, we have a few provider options that the users can choose from such as database, 3rd party providers like ocigenai, huggingface, openai, etc. If the users choose to use 3rd party provider, they need to create a credential with corresponding authentication information. On the other hand, if the users choose to use 'database' as provider, they need to load an onnx model to Oracle Database for embeddings; however, for summary, they don't need to do anything."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load ONNX Model\n",
|
||||
"\n",
|
||||
"To generate embeddings, Oracle provides a few provider options for users to choose from. The users can choose 'database' provider or some 3rd party providers like OCIGENAI, HuggingFace, etc.\n",
|
||||
"\n",
|
||||
"***Note*** If the users choose database option, they need to load an ONNX model to Oracle Database. The users do not need to load an ONNX model to Oracle Database if they choose to use 3rd party provider to generate embeddings.\n",
|
||||
"\n",
|
||||
"One of the core benefits of using an ONNX model is that the users do not need to transfer their data to 3rd party to generate embeddings. And also, since it does not involve any network or REST API calls, it may provide better performance.\n",
|
||||
"\n",
|
||||
"Here is the sample code to load an ONNX model to Oracle Database:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ONNX model loaded.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.embeddings.oracleai import OracleEmbeddings\n",
|
||||
"\n",
|
||||
"# please update with your related information\n",
|
||||
"# make sure that you have onnx file in the system\n",
|
||||
"onnx_dir = \"DEMO_PY_DIR\"\n",
|
||||
"onnx_file = \"tinybert.onnx\"\n",
|
||||
"model_name = \"demo_model\"\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" OracleEmbeddings.load_onnx_model(conn, onnx_dir, onnx_file, model_name)\n",
|
||||
" print(\"ONNX model loaded.\")\n",
|
||||
"except Exception as e:\n",
|
||||
" print(\"ONNX model loading failed!\")\n",
|
||||
" sys.exit(1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create Credential\n",
|
||||
"\n",
|
||||
"On the other hand, if the users choose to use 3rd party provider to generate embeddings and summary, they need to create credential to access 3rd party provider's end points.\n",
|
||||
"\n",
|
||||
"***Note:*** The users do not need to create any credential if they choose to use 'database' provider to generate embeddings and summary. Should the users choose to 3rd party provider, they need to create credential for the 3rd party provider they want to use. \n",
|
||||
"\n",
|
||||
"Here is a sample example:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"try:\n",
|
||||
" cursor = conn.cursor()\n",
|
||||
" cursor.execute(\n",
|
||||
" \"\"\"\n",
|
||||
" declare\n",
|
||||
" jo json_object_t;\n",
|
||||
" begin\n",
|
||||
" -- HuggingFace\n",
|
||||
" dbms_vector_chain.drop_credential(credential_name => 'HF_CRED');\n",
|
||||
" jo := json_object_t();\n",
|
||||
" jo.put('access_token', '<access_token>');\n",
|
||||
" dbms_vector_chain.create_credential(\n",
|
||||
" credential_name => 'HF_CRED',\n",
|
||||
" params => json(jo.to_string));\n",
|
||||
"\n",
|
||||
" -- OCIGENAI\n",
|
||||
" dbms_vector_chain.drop_credential(credential_name => 'OCI_CRED');\n",
|
||||
" jo := json_object_t();\n",
|
||||
" jo.put('user_ocid','<user_ocid>');\n",
|
||||
" jo.put('tenancy_ocid','<tenancy_ocid>');\n",
|
||||
" jo.put('compartment_ocid','<compartment_ocid>');\n",
|
||||
" jo.put('private_key','<private_key>');\n",
|
||||
" jo.put('fingerprint','<fingerprint>');\n",
|
||||
" dbms_vector_chain.create_credential(\n",
|
||||
" credential_name => 'OCI_CRED',\n",
|
||||
" params => json(jo.to_string));\n",
|
||||
" end;\n",
|
||||
" \"\"\"\n",
|
||||
" )\n",
|
||||
" cursor.close()\n",
|
||||
" print(\"Credentials created.\")\n",
|
||||
"except Exception as ex:\n",
|
||||
" cursor.close()\n",
|
||||
" raise"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load Documents\n",
|
||||
"The users can load the documents from Oracle Database or a file system or both. They just need to set the loader parameters accordingly. Please refer to the Oracle AI Vector Search Guide book for complete information about these parameters.\n",
|
||||
"\n",
|
||||
"The main benefit of using OracleDocLoader is that it can handle 150+ different file formats. You don't need to use different types of loader for different file formats. Here is the list formats that we support: [Oracle Text Supported Document Formats](https://docs.oracle.com/en/database/oracle/oracle-database/23/ccref/oracle-text-supported-document-formats.html)\n",
|
||||
"\n",
|
||||
"The following sample code will show how to do that:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Number of docs loaded: 3\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders.oracleai import OracleDocLoader\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"# loading from Oracle Database table\n",
|
||||
"# make sure you have the table with this specification\n",
|
||||
"loader_params = {}\n",
|
||||
"loader_params = {\n",
|
||||
" \"owner\": \"testuser\",\n",
|
||||
" \"tablename\": \"demo_tab\",\n",
|
||||
" \"colname\": \"data\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"\"\"\" load the docs \"\"\"\n",
|
||||
"loader = OracleDocLoader(conn=conn, params=loader_params)\n",
|
||||
"docs = loader.load()\n",
|
||||
"\n",
|
||||
"\"\"\" verify \"\"\"\n",
|
||||
"print(f\"Number of docs loaded: {len(docs)}\")\n",
|
||||
"# print(f\"Document-0: {docs[0].page_content}\") # content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Generate Summary\n",
|
||||
"Now that the user loaded the documents, they may want to generate a summary for each document. The Oracle AI Vector Search Langchain library provides an API to do that. There are a few summary generation provider options including Database, OCIGENAI, HuggingFace and so on. The users can choose their preferred provider to generate a summary. Like before, they just need to set the summary parameters accordingly. Please refer to the Oracle AI Vector Search Guide book for complete information about these parameters."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"***Note:*** The users may need to set proxy if they want to use some 3rd party summary generation providers other than Oracle's in-house and default provider: 'database'. If you don't have proxy, please remove the proxy parameter when you instantiate the OracleSummary."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# proxy to be used when we instantiate summary and embedder object\n",
|
||||
"proxy = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The following sample code will show how to generate summary:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Number of Summaries: 3\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.utilities.oracleai import OracleSummary\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"# using 'database' provider\n",
|
||||
"summary_params = {\n",
|
||||
" \"provider\": \"database\",\n",
|
||||
" \"glevel\": \"S\",\n",
|
||||
" \"numParagraphs\": 1,\n",
|
||||
" \"language\": \"english\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# get the summary instance\n",
|
||||
"# Remove proxy if not required\n",
|
||||
"summ = OracleSummary(conn=conn, params=summary_params, proxy=proxy)\n",
|
||||
"\n",
|
||||
"list_summary = []\n",
|
||||
"for doc in docs:\n",
|
||||
" summary = summ.get_summary(doc.page_content)\n",
|
||||
" list_summary.append(summary)\n",
|
||||
"\n",
|
||||
"\"\"\" verify \"\"\"\n",
|
||||
"print(f\"Number of Summaries: {len(list_summary)}\")\n",
|
||||
"# print(f\"Summary-0: {list_summary[0]}\") #content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Split Documents\n",
|
||||
"The documents can be in different sizes: small, medium, large, or very large. The users like to split/chunk their documents into smaller pieces to generate embeddings. There are lots of different splitting customizations the users can do. Please refer to the Oracle AI Vector Search Guide book for complete information about these parameters.\n",
|
||||
"\n",
|
||||
"The following sample code will show how to do that:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 50,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Number of Chunks: 3\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders.oracleai import OracleTextSplitter\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"# split by default parameters\n",
|
||||
"splitter_params = {\"normalize\": \"all\"}\n",
|
||||
"\n",
|
||||
"\"\"\" get the splitter instance \"\"\"\n",
|
||||
"splitter = OracleTextSplitter(conn=conn, params=splitter_params)\n",
|
||||
"\n",
|
||||
"list_chunks = []\n",
|
||||
"for doc in docs:\n",
|
||||
" chunks = splitter.split_text(doc.page_content)\n",
|
||||
" list_chunks.extend(chunks)\n",
|
||||
"\n",
|
||||
"\"\"\" verify \"\"\"\n",
|
||||
"print(f\"Number of Chunks: {len(list_chunks)}\")\n",
|
||||
"# print(f\"Chunk-0: {list_chunks[0]}\") # content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Generate Embeddings\n",
|
||||
"Now that the documents are chunked as per requirements, the users may want to generate embeddings for these chunks. Oracle AI Vector Search provides a number of ways to generate embeddings. The users can load an ONNX embedding model to Oracle Database and use it to generate embeddings or use some 3rd party API's end points to generate embeddings. Please refer to the Oracle AI Vector Search Guide book for complete information about these parameters."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"***Note:*** The users may need to set proxy if they want to use some 3rd party embedding generation providers other than 'database' provider (aka using ONNX model)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# proxy to be used when we instantiate summary and embedder object\n",
|
||||
"proxy = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The following sample code will show how to generate embeddings:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 51,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Number of embeddings: 3\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.embeddings.oracleai import OracleEmbeddings\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"# using ONNX model loaded to Oracle Database\n",
|
||||
"embedder_params = {\"provider\": \"database\", \"model\": \"demo_model\"}\n",
|
||||
"\n",
|
||||
"# get the embedding instance\n",
|
||||
"# Remove proxy if not required\n",
|
||||
"embedder = OracleEmbeddings(conn=conn, params=embedder_params, proxy=proxy)\n",
|
||||
"\n",
|
||||
"embeddings = []\n",
|
||||
"for doc in docs:\n",
|
||||
" chunks = splitter.split_text(doc.page_content)\n",
|
||||
" for chunk in chunks:\n",
|
||||
" embed = embedder.embed_query(chunk)\n",
|
||||
" embeddings.append(embed)\n",
|
||||
"\n",
|
||||
"\"\"\" verify \"\"\"\n",
|
||||
"print(f\"Number of embeddings: {len(embeddings)}\")\n",
|
||||
"# print(f\"Embedding-0: {embeddings[0]}\") # content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Oracle AI Vector Store\n",
|
||||
"Now that you know how to use Oracle AI Langchain library APIs individually to process the documents, let us show how to integrate with Oracle AI Vector Store to facilitate the semantic searches."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, let's import all the dependencies."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 52,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"\n",
|
||||
"import oracledb\n",
|
||||
"from langchain_community.document_loaders.oracleai import (\n",
|
||||
" OracleDocLoader,\n",
|
||||
" OracleTextSplitter,\n",
|
||||
")\n",
|
||||
"from langchain_community.embeddings.oracleai import OracleEmbeddings\n",
|
||||
"from langchain_community.utilities.oracleai import OracleSummary\n",
|
||||
"from langchain_community.vectorstores import oraclevs\n",
|
||||
"from langchain_community.vectorstores.oraclevs import OracleVS\n",
|
||||
"from langchain_community.vectorstores.utils import DistanceStrategy\n",
|
||||
"from langchain_core.documents import Document"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next, let's combine all document processing stages together. Here is the sample code below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 53,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Connection successful!\n",
|
||||
"ONNX model loaded.\n",
|
||||
"Number of total chunks with metadata: 3\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\"\"\"\n",
|
||||
"In this sample example, we will use 'database' provider for both summary and embeddings.\n",
|
||||
"So, we don't need to do the followings:\n",
|
||||
" - set proxy for 3rd party providers\n",
|
||||
" - create credential for 3rd party providers\n",
|
||||
"\n",
|
||||
"If you choose to use 3rd party provider, \n",
|
||||
"please follow the necessary steps for proxy and credential.\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"# oracle connection\n",
|
||||
"# please update with your username, password, hostname, and service_name\n",
|
||||
"username = \"\"\n",
|
||||
"password = \"\"\n",
|
||||
"dsn = \"\"\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" conn = oracledb.connect(user=username, password=password, dsn=dsn)\n",
|
||||
" print(\"Connection successful!\")\n",
|
||||
"except Exception as e:\n",
|
||||
" print(\"Connection failed!\")\n",
|
||||
" sys.exit(1)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# load onnx model\n",
|
||||
"# please update with your related information\n",
|
||||
"onnx_dir = \"DEMO_PY_DIR\"\n",
|
||||
"onnx_file = \"tinybert.onnx\"\n",
|
||||
"model_name = \"demo_model\"\n",
|
||||
"try:\n",
|
||||
" OracleEmbeddings.load_onnx_model(conn, onnx_dir, onnx_file, model_name)\n",
|
||||
" print(\"ONNX model loaded.\")\n",
|
||||
"except Exception as e:\n",
|
||||
" print(\"ONNX model loading failed!\")\n",
|
||||
" sys.exit(1)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# params\n",
|
||||
"# please update necessary fields with related information\n",
|
||||
"loader_params = {\n",
|
||||
" \"owner\": \"testuser\",\n",
|
||||
" \"tablename\": \"demo_tab\",\n",
|
||||
" \"colname\": \"data\",\n",
|
||||
"}\n",
|
||||
"summary_params = {\n",
|
||||
" \"provider\": \"database\",\n",
|
||||
" \"glevel\": \"S\",\n",
|
||||
" \"numParagraphs\": 1,\n",
|
||||
" \"language\": \"english\",\n",
|
||||
"}\n",
|
||||
"splitter_params = {\"normalize\": \"all\"}\n",
|
||||
"embedder_params = {\"provider\": \"database\", \"model\": \"demo_model\"}\n",
|
||||
"\n",
|
||||
"# instantiate loader, summary, splitter, and embedder\n",
|
||||
"loader = OracleDocLoader(conn=conn, params=loader_params)\n",
|
||||
"summary = OracleSummary(conn=conn, params=summary_params)\n",
|
||||
"splitter = OracleTextSplitter(conn=conn, params=splitter_params)\n",
|
||||
"embedder = OracleEmbeddings(conn=conn, params=embedder_params)\n",
|
||||
"\n",
|
||||
"# process the documents\n",
|
||||
"chunks_with_mdata = []\n",
|
||||
"for id, doc in enumerate(docs, start=1):\n",
|
||||
" summ = summary.get_summary(doc.page_content)\n",
|
||||
" chunks = splitter.split_text(doc.page_content)\n",
|
||||
" for ic, chunk in enumerate(chunks, start=1):\n",
|
||||
" chunk_metadata = doc.metadata.copy()\n",
|
||||
" chunk_metadata[\"id\"] = chunk_metadata[\"_oid\"] + \"$\" + str(id) + \"$\" + str(ic)\n",
|
||||
" chunk_metadata[\"document_id\"] = str(id)\n",
|
||||
" chunk_metadata[\"document_summary\"] = str(summ[0])\n",
|
||||
" chunks_with_mdata.append(\n",
|
||||
" Document(page_content=str(chunk), metadata=chunk_metadata)\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\"\"\" verify \"\"\"\n",
|
||||
"print(f\"Number of total chunks with metadata: {len(chunks_with_mdata)}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"At this point, we have processed the documents and generated chunks with metadata. Next, we will create Oracle AI Vector Store with those chunks.\n",
|
||||
"\n",
|
||||
"Here is the sample code how to do that:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Vector Store Table: oravs\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# create Oracle AI Vector Store\n",
|
||||
"vectorstore = OracleVS.from_documents(\n",
|
||||
" chunks_with_mdata,\n",
|
||||
" embedder,\n",
|
||||
" client=conn,\n",
|
||||
" table_name=\"oravs\",\n",
|
||||
" distance_strategy=DistanceStrategy.DOT_PRODUCT,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\"\"\" verify \"\"\"\n",
|
||||
"print(f\"Vector Store Table: {vectorstore.table_name}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The above example creates a vector store with DOT_PRODUCT distance strategy. \n",
|
||||
"\n",
|
||||
"However, the users can create Oracle AI Vector Store provides different distance strategies. Please see the [comprehensive guide](/docs/integrations/vectorstores/oracle) for more information."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now that we have embeddings stored in vector stores, let's create an index on them to get better semantic search performance during query time.\n",
|
||||
"\n",
|
||||
"***Note*** If you are getting some insufficient memory error, please increase ***vector_memory_size*** in your database.\n",
|
||||
"\n",
|
||||
"Here is the sample code to create an index:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 56,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"oraclevs.create_index(\n",
|
||||
" conn, vectorstore, params={\"idx_name\": \"hnsw_oravs\", \"idx_type\": \"HNSW\"}\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(\"Index created.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The above example creates a default HNSW index on the embeddings stored in 'oravs' table. The users can set different parameters as per their requirements. Please refer to the Oracle AI Vector Search Guide book for complete information about these parameters.\n",
|
||||
"\n",
|
||||
"Also, there are different types of vector indices that the users can create. Please see the [comprehensive guide](/docs/integrations/vectorstores/oracle) for more information.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Perform Semantic Search\n",
|
||||
"All set!\n",
|
||||
"\n",
|
||||
"We have processed the documents, stored them to vector store, and then created index to get better query performance. Now let's do some semantic searches.\n",
|
||||
"\n",
|
||||
"Here is the sample code for this:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 58,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Document(page_content='The database stores LOBs differently from other data types. Creating a LOB column implicitly creates a LOB segment and a LOB index. The tablespace containing the LOB segment and LOB index, which are always stored together, may be different from the tablespace containing the table. Sometimes the database can store small amounts of LOB data in the table itself rather than in a separate LOB segment.', metadata={'_oid': '662f2f257677f3c2311a8ff999fd34e5', '_rowid': 'AAAR/xAAEAAAAAnAAC', 'id': '662f2f257677f3c2311a8ff999fd34e5$3$1', 'document_id': '3', 'document_summary': 'Sometimes the database can store small amounts of LOB data in the table itself rather than in a separate LOB segment.\\n\\n'})]\n",
|
||||
"[]\n",
|
||||
"[(Document(page_content='The database stores LOBs differently from other data types. Creating a LOB column implicitly creates a LOB segment and a LOB index. The tablespace containing the LOB segment and LOB index, which are always stored together, may be different from the tablespace containing the table. Sometimes the database can store small amounts of LOB data in the table itself rather than in a separate LOB segment.', metadata={'_oid': '662f2f257677f3c2311a8ff999fd34e5', '_rowid': 'AAAR/xAAEAAAAAnAAC', 'id': '662f2f257677f3c2311a8ff999fd34e5$3$1', 'document_id': '3', 'document_summary': 'Sometimes the database can store small amounts of LOB data in the table itself rather than in a separate LOB segment.\\n\\n'}), 0.055675752460956573)]\n",
|
||||
"[]\n",
|
||||
"[Document(page_content='If the answer to any preceding questions is yes, then the database stops the search and allocates space from the specified tablespace; otherwise, space is allocated from the database default shared temporary tablespace.', metadata={'_oid': '662f2f253acf96b33b430b88699490a2', '_rowid': 'AAAR/xAAEAAAAAnAAA', 'id': '662f2f253acf96b33b430b88699490a2$1$1', 'document_id': '1', 'document_summary': 'If the answer to any preceding questions is yes, then the database stops the search and allocates space from the specified tablespace; otherwise, space is allocated from the database default shared temporary tablespace.\\n\\n'})]\n",
|
||||
"[Document(page_content='If the answer to any preceding questions is yes, then the database stops the search and allocates space from the specified tablespace; otherwise, space is allocated from the database default shared temporary tablespace.', metadata={'_oid': '662f2f253acf96b33b430b88699490a2', '_rowid': 'AAAR/xAAEAAAAAnAAA', 'id': '662f2f253acf96b33b430b88699490a2$1$1', 'document_id': '1', 'document_summary': 'If the answer to any preceding questions is yes, then the database stops the search and allocates space from the specified tablespace; otherwise, space is allocated from the database default shared temporary tablespace.\\n\\n'})]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What is Oracle AI Vector Store?\"\n",
|
||||
"filter = {\"document_id\": [\"1\"]}\n",
|
||||
"\n",
|
||||
"# Similarity search without a filter\n",
|
||||
"print(vectorstore.similarity_search(query, 1))\n",
|
||||
"\n",
|
||||
"# Similarity search with a filter\n",
|
||||
"print(vectorstore.similarity_search(query, 1, filter=filter))\n",
|
||||
"\n",
|
||||
"# Similarity search with relevance score\n",
|
||||
"print(vectorstore.similarity_search_with_score(query, 1))\n",
|
||||
"\n",
|
||||
"# Similarity search with relevance score with filter\n",
|
||||
"print(vectorstore.similarity_search_with_score(query, 1, filter=filter))\n",
|
||||
"\n",
|
||||
"# Max marginal relevance search\n",
|
||||
"print(vectorstore.max_marginal_relevance_search(query, 1, fetch_k=20, lambda_mult=0.5))\n",
|
||||
"\n",
|
||||
"# Max marginal relevance search with filter\n",
|
||||
"print(\n",
|
||||
" vectorstore.max_marginal_relevance_search(\n",
|
||||
" query, 1, fetch_k=20, lambda_mult=0.5, filter=filter\n",
|
||||
" )\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -30,13 +30,24 @@
|
||||
"source": [
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"from langchain_community.document_loaders.larksuite import LarkSuiteDocLoader\n",
|
||||
"from langchain_community.document_loaders.larksuite import (\n",
|
||||
" LarkSuiteDocLoader,\n",
|
||||
" LarkSuiteWikiLoader,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"DOMAIN = input(\"larksuite domain\")\n",
|
||||
"ACCESS_TOKEN = getpass(\"larksuite tenant_access_token or user_access_token\")\n",
|
||||
"DOCUMENT_ID = input(\"larksuite document id\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4b6b9a66",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load From Document"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
@@ -65,6 +76,38 @@
|
||||
"pprint(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "86f4a714",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load From Wiki"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "7332dfb9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Document(page_content='Test doc\\nThis is a test wiki doc.\\n', metadata={'document_id': 'TxOKdtMWaoSTDLxYS4ZcdEI7nwc', 'revision_id': 15, 'title': 'Test doc'})]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from pprint import pprint\n",
|
||||
"\n",
|
||||
"DOCUMENT_ID = input(\"larksuite wiki id\")\n",
|
||||
"larksuite_loader = LarkSuiteWikiLoader(DOMAIN, ACCESS_TOKEN, DOCUMENT_ID)\n",
|
||||
"docs = larksuite_loader.load()\n",
|
||||
"\n",
|
||||
"pprint(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
|
||||
236
docs/docs/integrations/document_loaders/oracleai.ipynb
Normal file
236
docs/docs/integrations/document_loaders/oracleai.ipynb
Normal file
@@ -0,0 +1,236 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Oracle AI Vector Search: Document Processing\n",
|
||||
"Oracle AI Vector Search is designed for Artificial Intelligence (AI) workloads that allows you to query data based on semantics, rather than keywords. One of the biggest benefit of Oracle AI Vector Search is that semantic search on unstructured data can be combined with relational search on business data in one single system. This is not only powerful but also significantly more effective because you don't need to add a specialized vector database, eliminating the pain of data fragmentation between multiple systems.\n",
|
||||
"\n",
|
||||
"The guide demonstrates how to use Document Processing Capabilities within Oracle AI Vector Search to load and chunk documents using OracleDocLoader and OracleTextSplitter respectively."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prerequisites\n",
|
||||
"\n",
|
||||
"Please install Oracle Python Client driver to use Langchain with Oracle AI Vector Search. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# pip install oracledb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Connect to Oracle Database\n",
|
||||
"The following sample code will show how to connect to Oracle Database. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"\n",
|
||||
"import oracledb\n",
|
||||
"\n",
|
||||
"# please update with your username, password, hostname and service_name\n",
|
||||
"username = \"<username>\"\n",
|
||||
"password = \"<password>\"\n",
|
||||
"dsn = \"<hostname>/<service_name>\"\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" conn = oracledb.connect(user=username, password=password, dsn=dsn)\n",
|
||||
" print(\"Connection successful!\")\n",
|
||||
"except Exception as e:\n",
|
||||
" print(\"Connection failed!\")\n",
|
||||
" sys.exit(1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's create a table and insert some sample docs to test."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"try:\n",
|
||||
" cursor = conn.cursor()\n",
|
||||
"\n",
|
||||
" drop_table_sql = \"\"\"drop table if exists demo_tab\"\"\"\n",
|
||||
" cursor.execute(drop_table_sql)\n",
|
||||
"\n",
|
||||
" create_table_sql = \"\"\"create table demo_tab (id number, data clob)\"\"\"\n",
|
||||
" cursor.execute(create_table_sql)\n",
|
||||
"\n",
|
||||
" insert_row_sql = \"\"\"insert into demo_tab values (:1, :2)\"\"\"\n",
|
||||
" rows_to_insert = [\n",
|
||||
" (\n",
|
||||
" 1,\n",
|
||||
" \"If the answer to any preceding questions is yes, then the database stops the search and allocates space from the specified tablespace; otherwise, space is allocated from the database default shared temporary tablespace.\",\n",
|
||||
" ),\n",
|
||||
" (\n",
|
||||
" 2,\n",
|
||||
" \"A tablespace can be online (accessible) or offline (not accessible) whenever the database is open.\\nA tablespace is usually online so that its data is available to users. The SYSTEM tablespace and temporary tablespaces cannot be taken offline.\",\n",
|
||||
" ),\n",
|
||||
" (\n",
|
||||
" 3,\n",
|
||||
" \"The database stores LOBs differently from other data types. Creating a LOB column implicitly creates a LOB segment and a LOB index. The tablespace containing the LOB segment and LOB index, which are always stored together, may be different from the tablespace containing the table.\\nSometimes the database can store small amounts of LOB data in the table itself rather than in a separate LOB segment.\",\n",
|
||||
" ),\n",
|
||||
" ]\n",
|
||||
" cursor.executemany(insert_row_sql, rows_to_insert)\n",
|
||||
"\n",
|
||||
" conn.commit()\n",
|
||||
"\n",
|
||||
" print(\"Table created and populated.\")\n",
|
||||
" cursor.close()\n",
|
||||
"except Exception as e:\n",
|
||||
" print(\"Table creation failed.\")\n",
|
||||
" cursor.close()\n",
|
||||
" conn.close()\n",
|
||||
" sys.exit(1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load Documents\n",
|
||||
"The users can load the documents from Oracle Database or a file system or both. They just need to set the loader parameters accordingly. Please refer to the Oracle AI Vector Search Guide book for complete information about these parameters.\n",
|
||||
"\n",
|
||||
"The main benefit of using OracleDocLoader is that it can handle 150+ different file formats. You don't need to use different types of loader for different file formats. Here is the list of the formats that we support: [Oracle Text Supported Document Formats](https://docs.oracle.com/en/database/oracle/oracle-database/23/ccref/oracle-text-supported-document-formats.html)\n",
|
||||
"\n",
|
||||
"The following sample code will show how to do that:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders.oracleai import OracleDocLoader\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"\"\"\"\n",
|
||||
"# loading a local file\n",
|
||||
"loader_params = {}\n",
|
||||
"loader_params[\"file\"] = \"<file>\"\n",
|
||||
"\n",
|
||||
"# loading from a local directory\n",
|
||||
"loader_params = {}\n",
|
||||
"loader_params[\"dir\"] = \"<directory>\"\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"# loading from Oracle Database table\n",
|
||||
"loader_params = {\n",
|
||||
" \"owner\": \"<owner>\",\n",
|
||||
" \"tablename\": \"demo_tab\",\n",
|
||||
" \"colname\": \"data\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"\"\"\" load the docs \"\"\"\n",
|
||||
"loader = OracleDocLoader(conn=conn, params=loader_params)\n",
|
||||
"docs = loader.load()\n",
|
||||
"\n",
|
||||
"\"\"\" verify \"\"\"\n",
|
||||
"print(f\"Number of docs loaded: {len(docs)}\")\n",
|
||||
"# print(f\"Document-0: {docs[0].page_content}\") # content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Split Documents\n",
|
||||
"The documents can be in different sizes: small, medium, large, or very large. The users like to split/chunk their documents into smaller pieces to generate embeddings. There are lots of different splitting customizations the users can do. Please refer to the Oracle AI Vector Search Guide book for complete information about these parameters.\n",
|
||||
"\n",
|
||||
"The following sample code will show how to do that:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders.oracleai import OracleTextSplitter\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"\"\"\"\n",
|
||||
"# Some examples\n",
|
||||
"# split by chars, max 500 chars\n",
|
||||
"splitter_params = {\"split\": \"chars\", \"max\": 500, \"normalize\": \"all\"}\n",
|
||||
"\n",
|
||||
"# split by words, max 100 words\n",
|
||||
"splitter_params = {\"split\": \"words\", \"max\": 100, \"normalize\": \"all\"}\n",
|
||||
"\n",
|
||||
"# split by sentence, max 20 sentences\n",
|
||||
"splitter_params = {\"split\": \"sentence\", \"max\": 20, \"normalize\": \"all\"}\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"# split by default parameters\n",
|
||||
"splitter_params = {\"normalize\": \"all\"}\n",
|
||||
"\n",
|
||||
"# get the splitter instance\n",
|
||||
"splitter = OracleTextSplitter(conn=conn, params=splitter_params)\n",
|
||||
"\n",
|
||||
"list_chunks = []\n",
|
||||
"for doc in docs:\n",
|
||||
" chunks = splitter.split_text(doc.page_content)\n",
|
||||
" list_chunks.extend(chunks)\n",
|
||||
"\n",
|
||||
"\"\"\" verify \"\"\"\n",
|
||||
"print(f\"Number of Chunks: {len(list_chunks)}\")\n",
|
||||
"# print(f\"Chunk-0: {list_chunks[0]}\") # content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### End to End Demo\n",
|
||||
"Please refer to our complete demo guide [Oracle AI Vector Search End-to-End Demo Guide](https://github.com/langchain-ai/langchain/tree/master/cookbook/oracleai_demo.ipynb) to build an end to end RAG pipeline with the help of Oracle AI Vector Search.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -54,7 +54,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -76,7 +76,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -93,31 +93,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "897501860fe4452b836f816c72d955dd",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2024-04-24 21:20:12,461 - INFO - Converting the current model to sym_int4 format......\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = IpexLLM.from_model_id(\n",
|
||||
" model_id=\"lmsys/vicuna-7b-v1.5\",\n",
|
||||
@@ -134,29 +112,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/opt/anaconda3/envs/shane-langchain-3.11/lib/python3.11/site-packages/langchain_core/_api/deprecation.py:119: LangChainDeprecationWarning: The class `LLMChain` was deprecated in LangChain 0.1.17 and will be removed in 0.3.0. Use RunnableSequence, e.g., `prompt | llm` instead.\n",
|
||||
" warn_deprecated(\n",
|
||||
"/opt/anaconda3/envs/shane-langchain-3.11/lib/python3.11/site-packages/transformers/generation/utils.py:1369: UserWarning: Using `max_length`'s default (4096) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"AI stands for \"Artificial Intelligence.\" It refers to the development of computer systems that can perform tasks that typically require human intelligence, such as visual perception, speech recognition, decision-making, and language translation. AI can be achieved through a combination of techniques such as machine learning, natural language processing, computer vision, and robotics. The ultimate goal of AI research is to create machines that can think and learn like humans, and can even exceed human capabilities in certain areas.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"llm_chain = prompt | llm\n",
|
||||
"\n",
|
||||
"question = \"What is AI?\"\n",
|
||||
"output = llm_chain.invoke(question)"
|
||||
@@ -179,7 +139,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -198,17 +158,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2024-04-24 21:20:35,874 - INFO - Converting the current model to sym_int4 format......\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm_lowbit = IpexLLM.from_model_id_low_bit(\n",
|
||||
" model_id=saved_lowbit_model_path,\n",
|
||||
@@ -227,27 +179,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/opt/anaconda3/envs/shane-langchain-3.11/lib/python3.11/site-packages/transformers/generation/utils.py:1369: UserWarning: Using `max_length`'s default (4096) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"AI stands for \"Artificial Intelligence.\" It refers to the development of computer systems that can perform tasks that typically require human intelligence, such as visual perception, speech recognition, decision-making, and language translation. AI can be achieved through a combination of techniques such as machine learning, natural language processing, computer vision, and robotics. The ultimate goal of AI research is to create machines that can think and learn like humans, and can even exceed human capabilities in certain areas.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm_lowbit)\n",
|
||||
"llm_chain = prompt | llm_lowbit\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"question = \"What is AI?\"\n",
|
||||
"output = llm_chain.invoke(question)"
|
||||
@@ -256,7 +193,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "shane-diffusion",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -270,7 +207,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
"version": "3.11.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
> [NVIDIA AI Foundation Endpoints](https://www.nvidia.com/en-us/ai-data-science/foundation-models/) give users easy access to NVIDIA hosted API endpoints for
|
||||
> NVIDIA AI Foundation Models like `Mixtral 8x7B`, `Llama 2`, `Stable Diffusion`, etc. These models,
|
||||
> hosted on the [NVIDIA NGC catalog](https://catalog.ngc.nvidia.com/ai-foundation-models), are optimized, tested, and hosted on
|
||||
> hosted on the [NVIDIA API catalog](https://build.nvidia.com/), are optimized, tested, and hosted on
|
||||
> the NVIDIA AI platform, making them fast and easy to evaluate, further customize,
|
||||
> and seamlessly run at peak performance on any accelerated stack.
|
||||
>
|
||||
|
||||
65
docs/docs/integrations/providers/oracleai.mdx
Normal file
65
docs/docs/integrations/providers/oracleai.mdx
Normal file
@@ -0,0 +1,65 @@
|
||||
# OracleAI Vector Search
|
||||
Oracle AI Vector Search is designed for Artificial Intelligence (AI) workloads that allows you to query data based on semantics, rather than keywords. One of the biggest benefit of Oracle AI Vector Search is that semantic search on unstructured data can be combined with relational search on business data in one single system.
|
||||
This is not only powerful but also significantly more effective because you dont need to add a specialized vector database, eliminating the pain of data fragmentation between multiple systems.
|
||||
|
||||
In addition, because Oracle has been building database technologies for so long, your vectors can benefit from all of Oracle Database's most powerful features, like the following:
|
||||
|
||||
* Partitioning Support
|
||||
* Real Application Clusters scalability
|
||||
* Exadata smart scans
|
||||
* Shard processing across geographically distributed databases
|
||||
* Transactions
|
||||
* Parallel SQL
|
||||
* Disaster recovery
|
||||
* Security
|
||||
* Oracle Machine Learning
|
||||
* Oracle Graph Database
|
||||
* Oracle Spatial and Graph
|
||||
* Oracle Blockchain
|
||||
* JSON
|
||||
|
||||
|
||||
## Document Loaders
|
||||
|
||||
Please check the [usage example](/docs/integrations/document_loaders/oracleai).
|
||||
|
||||
```python
|
||||
from langchain_community.document_loaders.oracleai import OracleDocLoader
|
||||
```
|
||||
|
||||
## Text Splitter
|
||||
|
||||
Please check the [usage example](/docs/integrations/document_loaders/oracleai).
|
||||
|
||||
```python
|
||||
from langchain_community.document_loaders.oracleai import OracleTextSplitter
|
||||
```
|
||||
|
||||
## Embeddings
|
||||
|
||||
Please check the [usage example](/docs/integrations/text_embedding/oracleai).
|
||||
|
||||
```python
|
||||
from langchain_community.embeddings.oracleai import OracleEmbeddings
|
||||
```
|
||||
|
||||
## Summary
|
||||
|
||||
Please check the [usage example](/docs/integrations/tools/oracleai).
|
||||
|
||||
```python
|
||||
from langchain_community.utilities.oracleai import OracleSummary
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
Please check the [usage example](/docs/integrations/vectorstores/oracle).
|
||||
|
||||
```python
|
||||
from langchain_community.vectorstores.oraclevs import OracleVS
|
||||
```
|
||||
|
||||
## End to End Demo
|
||||
|
||||
Please check the [Oracle AI Vector Search End-to-End Demo Guide](https://github.com/langchain-ai/langchain/blob/master/cookbook/oracleai_demo).
|
||||
|
||||
@@ -85,9 +85,6 @@
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"## API Key can be found by going to NVIDIA NGC -> AI Foundation Models -> (some model) -> Get API Code or similar.\n",
|
||||
"## 10K free queries to any endpoint (which is a lot actually).\n",
|
||||
"\n",
|
||||
"# del os.environ['NVIDIA_API_KEY'] ## delete key and reset\n",
|
||||
"if os.environ.get(\"NVIDIA_API_KEY\", \"\").startswith(\"nvapi-\"):\n",
|
||||
" print(\"Valid NVIDIA_API_KEY already in environment. Delete to reset\")\n",
|
||||
@@ -112,11 +109,7 @@
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"The main requirement when initializing an embedding model is to provide the model name. An example is `nvolveqa_40k` below.\n",
|
||||
"\n",
|
||||
"For `nvovleqa_40k`, you can also specify the `model_type` as `passage` or `query`. When doing retrieval, you will get best results if you embed the source documents with the `passage` type and the user queries with the `query` type.\n",
|
||||
"\n",
|
||||
"If not provided, the `embed_query` method will default to the `query` type, and the `embed_documents` mehod will default to the `passage` type."
|
||||
"When initializing an embedding model you can select a model by passing it, e.g. `ai-embed-qa-4` below, or use the default by not passing any arguments."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -129,10 +122,7 @@
|
||||
"source": [
|
||||
"from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings\n",
|
||||
"\n",
|
||||
"embedder = NVIDIAEmbeddings(model=\"nvolveqa_40k\")\n",
|
||||
"\n",
|
||||
"# Alternatively, if you want to specify whether it will use the query or passage type\n",
|
||||
"# embedder = NVIDIAEmbeddings(model=\"nvolveqa_40k\", model_type=\"passage\")"
|
||||
"embedder = NVIDIAEmbeddings(model=\"ai-embed-qa-4\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -156,7 +146,7 @@
|
||||
"id": "pcDu3v4CbmWk"
|
||||
},
|
||||
"source": [
|
||||
"### **Similarity/Speed Test**\n",
|
||||
"### **Similarity**\n",
|
||||
"\n",
|
||||
"The following is a quick test of the methods in terms of usage, format, and speed for the use case of embedding the following data points:\n",
|
||||
"\n",
|
||||
@@ -250,7 +240,7 @@
|
||||
"s = time.perf_counter()\n",
|
||||
"# To use the \"query\" mode, we have to add it as an instance arg\n",
|
||||
"q_embeddings = NVIDIAEmbeddings(\n",
|
||||
" model=\"nvolveqa_40k\", model_type=\"query\"\n",
|
||||
" model=\"ai-embed-qa-4\", model_type=\"query\"\n",
|
||||
").embed_documents(\n",
|
||||
" [\n",
|
||||
" \"What's the weather like in Komchatka?\",\n",
|
||||
@@ -501,7 +491,7 @@
|
||||
"source": [
|
||||
"vectorstore = FAISS.from_texts(\n",
|
||||
" [\"harrison worked at kensho\"],\n",
|
||||
" embedding=NVIDIAEmbeddings(model=\"nvolveqa_40k\"),\n",
|
||||
" embedding=NVIDIAEmbeddings(model=\"ai-embed-qa-4\"),\n",
|
||||
")\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
@@ -515,7 +505,7 @@
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"model = ChatNVIDIA(model=\"mixtral_8x7b\")\n",
|
||||
"model = ChatNVIDIA(model=\"ai-mixtral-8x7b-instruct\")\n",
|
||||
"\n",
|
||||
"chain = (\n",
|
||||
" {\"context\": retriever, \"question\": RunnablePassthrough()}\n",
|
||||
|
||||
262
docs/docs/integrations/text_embedding/oracleai.ipynb
Normal file
262
docs/docs/integrations/text_embedding/oracleai.ipynb
Normal file
@@ -0,0 +1,262 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Oracle AI Vector Search: Generate Embeddings\n",
|
||||
"Oracle AI Vector Search is designed for Artificial Intelligence (AI) workloads that allows you to query data based on semantics, rather than keywords. One of the biggest benefit of Oracle AI Vector Search is that semantic search on unstructured data can be combined with relational search on business data in one single system. This is not only powerful but also significantly more effective because you don't need to add a specialized vector database, eliminating the pain of data fragmentation between multiple systems.\n",
|
||||
"\n",
|
||||
"The guide demonstrates how to use Embedding Capabilities within Oracle AI Vector Search to generate embeddings for your documents using OracleEmbeddings."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prerequisites\n",
|
||||
"\n",
|
||||
"Please install Oracle Python Client driver to use Langchain with Oracle AI Vector Search. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# pip install oracledb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Connect to Oracle Database\n",
|
||||
"The following sample code will show how to connect to Oracle Database. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"\n",
|
||||
"import oracledb\n",
|
||||
"\n",
|
||||
"# please update with your username, password, hostname and service_name\n",
|
||||
"username = \"<username>\"\n",
|
||||
"password = \"<password>\"\n",
|
||||
"dsn = \"<hostname>/<service_name>\"\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" conn = oracledb.connect(user=username, password=password, dsn=dsn)\n",
|
||||
" print(\"Connection successful!\")\n",
|
||||
"except Exception as e:\n",
|
||||
" print(\"Connection failed!\")\n",
|
||||
" sys.exit(1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For embedding, we have a few provider options that the users can choose from such as database, 3rd party providers like ocigenai, huggingface, openai, etc. If the users choose to use 3rd party provider, they need to create a credential with corresponding authentication information. On the other hand, if the users choose to use 'database' as provider, they need to load an onnx model to Oracle Database for embeddings."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load ONNX Model\n",
|
||||
"\n",
|
||||
"To generate embeddings, Oracle provides a few provider options for users to choose from. The users can choose 'database' provider or some 3rd party providers like OCIGENAI, HuggingFace, etc.\n",
|
||||
"\n",
|
||||
"***Note*** If the users choose database option, they need to load an ONNX model to Oracle Database. The users do not need to load an ONNX model to Oracle Database if they choose to use 3rd party provider to generate embeddings.\n",
|
||||
"\n",
|
||||
"One of the core benefits of using an ONNX model is that the users do not need to transfer their data to 3rd party to generate embeddings. And also, since it does not involve any network or REST API calls, it may provide better performance.\n",
|
||||
"\n",
|
||||
"Here is the sample code to load an ONNX model to Oracle Database:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.embeddings.oracleai import OracleEmbeddings\n",
|
||||
"\n",
|
||||
"# please update with your related information\n",
|
||||
"# make sure that you have onnx file in the system\n",
|
||||
"onnx_dir = \"DEMO_DIR\"\n",
|
||||
"onnx_file = \"tinybert.onnx\"\n",
|
||||
"model_name = \"demo_model\"\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" OracleEmbeddings.load_onnx_model(conn, onnx_dir, onnx_file, model_name)\n",
|
||||
" print(\"ONNX model loaded.\")\n",
|
||||
"except Exception as e:\n",
|
||||
" print(\"ONNX model loading failed!\")\n",
|
||||
" sys.exit(1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create Credential\n",
|
||||
"\n",
|
||||
"On the other hand, if the users choose to use 3rd party provider to generate embeddings, they need to create credential to access 3rd party provider's end points.\n",
|
||||
"\n",
|
||||
"***Note:*** The users do not need to create any credential if they choose to use 'database' provider to generate embeddings. Should the users choose to 3rd party provider, they need to create credential for the 3rd party provider they want to use. \n",
|
||||
"\n",
|
||||
"Here is a sample example:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"try:\n",
|
||||
" cursor = conn.cursor()\n",
|
||||
" cursor.execute(\n",
|
||||
" \"\"\"\n",
|
||||
" declare\n",
|
||||
" jo json_object_t;\n",
|
||||
" begin\n",
|
||||
" -- HuggingFace\n",
|
||||
" dbms_vector_chain.drop_credential(credential_name => 'HF_CRED');\n",
|
||||
" jo := json_object_t();\n",
|
||||
" jo.put('access_token', '<access_token>');\n",
|
||||
" dbms_vector_chain.create_credential(\n",
|
||||
" credential_name => 'HF_CRED',\n",
|
||||
" params => json(jo.to_string));\n",
|
||||
"\n",
|
||||
" -- OCIGENAI\n",
|
||||
" dbms_vector_chain.drop_credential(credential_name => 'OCI_CRED');\n",
|
||||
" jo := json_object_t();\n",
|
||||
" jo.put('user_ocid','<user_ocid>');\n",
|
||||
" jo.put('tenancy_ocid','<tenancy_ocid>');\n",
|
||||
" jo.put('compartment_ocid','<compartment_ocid>');\n",
|
||||
" jo.put('private_key','<private_key>');\n",
|
||||
" jo.put('fingerprint','<fingerprint>');\n",
|
||||
" dbms_vector_chain.create_credential(\n",
|
||||
" credential_name => 'OCI_CRED',\n",
|
||||
" params => json(jo.to_string));\n",
|
||||
" end;\n",
|
||||
" \"\"\"\n",
|
||||
" )\n",
|
||||
" cursor.close()\n",
|
||||
" print(\"Credentials created.\")\n",
|
||||
"except Exception as ex:\n",
|
||||
" cursor.close()\n",
|
||||
" raise"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Generate Embeddings\n",
|
||||
"Oracle AI Vector Search provides a number of ways to generate embeddings. The users can load an ONNX embedding model to Oracle Database and use it to generate embeddings or use some 3rd party API's end points to generate embeddings. Please refer to the Oracle AI Vector Search Guide book for complete information about these parameters."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"***Note:*** The users may need to set proxy if they want to use some 3rd party embedding generation providers other than 'database' provider (aka using ONNX model)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# proxy to be used when we instantiate summary and embedder object\n",
|
||||
"proxy = \"<proxy>\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The following sample code will show how to generate embeddings:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.embeddings.oracleai import OracleEmbeddings\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"\"\"\"\n",
|
||||
"# using ocigenai\n",
|
||||
"embedder_params = {\n",
|
||||
" \"provider\": \"ocigenai\",\n",
|
||||
" \"credential_name\": \"OCI_CRED\",\n",
|
||||
" \"url\": \"https://inference.generativeai.us-chicago-1.oci.oraclecloud.com/20231130/actions/embedText\",\n",
|
||||
" \"model\": \"cohere.embed-english-light-v3.0\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# using huggingface\n",
|
||||
"embedder_params = {\n",
|
||||
" \"provider\": \"huggingface\", \n",
|
||||
" \"credential_name\": \"HF_CRED\", \n",
|
||||
" \"url\": \"https://api-inference.huggingface.co/pipeline/feature-extraction/\", \n",
|
||||
" \"model\": \"sentence-transformers/all-MiniLM-L6-v2\", \n",
|
||||
" \"wait_for_model\": \"true\"\n",
|
||||
"}\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"# using ONNX model loaded to Oracle Database\n",
|
||||
"embedder_params = {\"provider\": \"database\", \"model\": \"demo_model\"}\n",
|
||||
"\n",
|
||||
"# Remove proxy if not required\n",
|
||||
"embedder = OracleEmbeddings(conn=conn, params=embedder_params, proxy=proxy)\n",
|
||||
"embed = embedder.embed_query(\"Hello World!\")\n",
|
||||
"\n",
|
||||
"\"\"\" verify \"\"\"\n",
|
||||
"print(f\"Embedding generated by OracleEmbeddings: {embed}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### End to End Demo\n",
|
||||
"Please refer to our complete demo guide [Oracle AI Vector Search End-to-End Demo Guide](https://github.com/langchain-ai/langchain/tree/master/cookbook/oracleai_demo.ipynb) to build an end to end RAG pipeline with the help of Oracle AI Vector Search.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -46,7 +46,7 @@
|
||||
"source": [
|
||||
"## Using `ZERO_SHOT_REACT_DESCRIPTION`\n",
|
||||
"\n",
|
||||
"This shows how to initialize the agent using the `ZERO_SHOT_REACT_DESCRIPTION` agent type. Note that this is an alternative to the above."
|
||||
"This shows how to initialize the agent using the `ZERO_SHOT_REACT_DESCRIPTION` agent type."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
174
docs/docs/integrations/tools/oracleai.ipynb
Normal file
174
docs/docs/integrations/tools/oracleai.ipynb
Normal file
@@ -0,0 +1,174 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Oracle AI Vector Search: Generate Summary\n",
|
||||
"Oracle AI Vector Search is designed for Artificial Intelligence (AI) workloads that allows you to query data based on semantics, rather than keywords. One of the biggest benefit of Oracle AI Vector Search is that semantic search on unstructured data can be combined with relational search on business data in one single system. This is not only powerful but also significantly more effective because you don't need to add a specialized vector database, eliminating the pain of data fragmentation between multiple systems.\n",
|
||||
"\n",
|
||||
"The guide demonstrates how to use Summary Capabilities within Oracle AI Vector Search to generate summary for your documents using OracleSummary."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prerequisites\n",
|
||||
"\n",
|
||||
"Please install Oracle Python Client driver to use Langchain with Oracle AI Vector Search. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# pip install oracledb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Connect to Oracle Database\n",
|
||||
"The following sample code will show how to connect to Oracle Database. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"\n",
|
||||
"import oracledb\n",
|
||||
"\n",
|
||||
"# please update with your username, password, hostname and service_name\n",
|
||||
"username = \"<username>\"\n",
|
||||
"password = \"<password>\"\n",
|
||||
"dsn = \"<hostname>/<service_name>\"\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" conn = oracledb.connect(user=username, password=password, dsn=dsn)\n",
|
||||
" print(\"Connection successful!\")\n",
|
||||
"except Exception as e:\n",
|
||||
" print(\"Connection failed!\")\n",
|
||||
" sys.exit(1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Generate Summary\n",
|
||||
"The Oracle AI Vector Search Langchain library provides APIs to generate summaries of documents. There are a few summary generation provider options including Database, OCIGENAI, HuggingFace and so on. The users can choose their preferred provider to generate a summary. They just need to set the summary parameters accordingly. Please refer to the Oracle AI Vector Search Guide book for complete information about these parameters."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"***Note:*** The users may need to set proxy if they want to use some 3rd party summary generation providers other than Oracle's in-house and default provider: 'database'. If you don't have proxy, please remove the proxy parameter when you instantiate the OracleSummary."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# proxy to be used when we instantiate summary and embedder object\n",
|
||||
"proxy = \"<proxy>\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The following sample code will show how to generate summary:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.utilities.oracleai import OracleSummary\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"\"\"\"\n",
|
||||
"# using 'ocigenai' provider\n",
|
||||
"summary_params = {\n",
|
||||
" \"provider\": \"ocigenai\",\n",
|
||||
" \"credential_name\": \"OCI_CRED\",\n",
|
||||
" \"url\": \"https://inference.generativeai.us-chicago-1.oci.oraclecloud.com/20231130/actions/summarizeText\",\n",
|
||||
" \"model\": \"cohere.command\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# using 'huggingface' provider\n",
|
||||
"summary_params = {\n",
|
||||
" \"provider\": \"huggingface\",\n",
|
||||
" \"credential_name\": \"HF_CRED\",\n",
|
||||
" \"url\": \"https://api-inference.huggingface.co/models/\",\n",
|
||||
" \"model\": \"facebook/bart-large-cnn\",\n",
|
||||
" \"wait_for_model\": \"true\"\n",
|
||||
"}\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"# using 'database' provider\n",
|
||||
"summary_params = {\n",
|
||||
" \"provider\": \"database\",\n",
|
||||
" \"glevel\": \"S\",\n",
|
||||
" \"numParagraphs\": 1,\n",
|
||||
" \"language\": \"english\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# get the summary instance\n",
|
||||
"# Remove proxy if not required\n",
|
||||
"summ = OracleSummary(conn=conn, params=summary_params, proxy=proxy)\n",
|
||||
"summary = summ.get_summary(\n",
|
||||
" \"In the heart of the forest, \"\n",
|
||||
" + \"a lone fox ventured out at dusk, seeking a lost treasure. \"\n",
|
||||
" + \"With each step, memories flooded back, guiding its path. \"\n",
|
||||
" + \"As the moon rose high, illuminating the night, the fox unearthed \"\n",
|
||||
" + \"not gold, but a forgotten friendship, worth more than any riches.\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(f\"Summary generated by OracleSummary: {summary}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### End to End Demo\n",
|
||||
"Please refer to our complete demo guide [Oracle AI Vector Search End-to-End Demo Guide](https://github.com/langchain-ai/langchain/tree/master/cookbook/oracleai_demo.ipynb) to build an end to end RAG pipeline with the help of Oracle AI Vector Search.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
469
docs/docs/integrations/vectorstores/oracle.ipynb
Normal file
469
docs/docs/integrations/vectorstores/oracle.ipynb
Normal file
@@ -0,0 +1,469 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "dd33e9d5-9dba-4aac-9f7f-4cf9e6686593",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Oracle AI Vector Search: Vector Store\n",
|
||||
"\n",
|
||||
"Oracle AI Vector Search is designed for Artificial Intelligence (AI) workloads that allows you to query data based on semantics, rather than keywords.\n",
|
||||
"One of the biggest benefit of Oracle AI Vector Search is that semantic search on unstructured data can be combined with relational search on business data in one single system.\n",
|
||||
"This is not only powerful but also significantly more effective because you dont need to add a specialized vector database, eliminating the pain of data fragmentation between multiple systems.\n",
|
||||
"\n",
|
||||
"In addition, because Oracle has been building database technologies for so long, your vectors can benefit from all of Oracle Database's most powerful features, like the following:\n",
|
||||
"\n",
|
||||
" * Partitioning Support\n",
|
||||
" * Real Application Clusters scalability\n",
|
||||
" * Exadata smart scans\n",
|
||||
" * Shard processing across geographically distributed databases\n",
|
||||
" * Transactions\n",
|
||||
" * Parallel SQL\n",
|
||||
" * Disaster recovery\n",
|
||||
" * Security\n",
|
||||
" * Oracle Machine Learning\n",
|
||||
" * Oracle Graph Database\n",
|
||||
" * Oracle Spatial and Graph\n",
|
||||
" * Oracle Blockchain\n",
|
||||
" * JSON"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7bd80054-c803-47e1-a259-c40ed073c37d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prerequisites for using Langchain with Oracle AI Vector Search\n",
|
||||
"\n",
|
||||
"Please install Oracle Python Client driver to use Langchain with Oracle AI Vector Search. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2bbb989d-c6fb-4ab9-bafd-a95fd48538d0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# pip install oracledb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0fceaa5a-95da-4ebd-8b8d-5e73bb653172",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Connect to Oracle AI Vector Search"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4421e4b7-2c7e-4bcd-82b3-9576595edd0f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import oracledb\n",
|
||||
"\n",
|
||||
"username = \"username\"\n",
|
||||
"password = \"password\"\n",
|
||||
"dsn = \"ipaddress:port/orclpdb1\"\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" connection = oracledb.connect(user=username, password=password, dsn=dsn)\n",
|
||||
" print(\"Connection successful!\")\n",
|
||||
"except Exception as e:\n",
|
||||
" print(\"Connection failed!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b11cf362-01b0-485d-8527-31b0fbb5028e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Import the required dependencies to play with Oracle AI Vector Search"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "43ea59e3-2910-45a6-b195-5f06094bb7c9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.embeddings import HuggingFaceEmbeddings\n",
|
||||
"from langchain_community.vectorstores import oraclevs\n",
|
||||
"from langchain_community.vectorstores.oraclevs import OracleVS\n",
|
||||
"from langchain_community.vectorstores.utils import DistanceStrategy\n",
|
||||
"from langchain_core.documents import Document"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0aac10dc-a9cc-4fdb-901c-1b7a4bbbe5a7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load Documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "70ac6982-b13a-4e8c-9c47-57c6d136ac60",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define a list of documents (These dummy examples are 5 random documents from Oracle Concepts Manual )\n",
|
||||
"\n",
|
||||
"documents_json_list = [\n",
|
||||
" {\n",
|
||||
" \"id\": \"cncpt_15.5.3.2.2_P4\",\n",
|
||||
" \"text\": \"If the answer to any preceding questions is yes, then the database stops the search and allocates space from the specified tablespace; otherwise, space is allocated from the database default shared temporary tablespace.\",\n",
|
||||
" \"link\": \"https://docs.oracle.com/en/database/oracle/oracle-database/23/cncpt/logical-storage-structures.html#GUID-5387D7B2-C0CA-4C1E-811B-C7EB9B636442\",\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"id\": \"cncpt_15.5.5_P1\",\n",
|
||||
" \"text\": \"A tablespace can be online (accessible) or offline (not accessible) whenever the database is open.\\nA tablespace is usually online so that its data is available to users. The SYSTEM tablespace and temporary tablespaces cannot be taken offline.\",\n",
|
||||
" \"link\": \"https://docs.oracle.com/en/database/oracle/oracle-database/23/cncpt/logical-storage-structures.html#GUID-D02B2220-E6F5-40D9-AFB5-BC69BCEF6CD4\",\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"id\": \"cncpt_22.3.4.3.1_P2\",\n",
|
||||
" \"text\": \"The database stores LOBs differently from other data types. Creating a LOB column implicitly creates a LOB segment and a LOB index. The tablespace containing the LOB segment and LOB index, which are always stored together, may be different from the tablespace containing the table.\\nSometimes the database can store small amounts of LOB data in the table itself rather than in a separate LOB segment.\",\n",
|
||||
" \"link\": \"https://docs.oracle.com/en/database/oracle/oracle-database/23/cncpt/concepts-for-database-developers.html#GUID-3C50EAB8-FC39-4BB3-B680-4EACCE49E866\",\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"id\": \"cncpt_22.3.4.3.1_P3\",\n",
|
||||
" \"text\": \"The LOB segment stores data in pieces called chunks. A chunk is a logically contiguous set of data blocks and is the smallest unit of allocation for a LOB. A row in the table stores a pointer called a LOB locator, which points to the LOB index. When the table is queried, the database uses the LOB index to quickly locate the LOB chunks.\",\n",
|
||||
" \"link\": \"https://docs.oracle.com/en/database/oracle/oracle-database/23/cncpt/concepts-for-database-developers.html#GUID-3C50EAB8-FC39-4BB3-B680-4EACCE49E866\",\n",
|
||||
" },\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "eaa942d6-5954-4898-8c32-3627b923a3a5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create Langchain Documents\n",
|
||||
"\n",
|
||||
"documents_langchain = []\n",
|
||||
"\n",
|
||||
"for doc in documents_json_list:\n",
|
||||
" metadata = {\"id\": doc[\"id\"], \"link\": doc[\"link\"]}\n",
|
||||
" doc_langchain = Document(page_content=doc[\"text\"], metadata=metadata)\n",
|
||||
" documents_langchain.append(doc_langchain)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6823f5e6-997c-4f15-927b-bd44c61f105f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Using AI Vector Search Create a bunch of Vector Stores with different distance strategies\n",
|
||||
"\n",
|
||||
"First we will create three vector stores each with different distance functions. Since we have not created indices in them yet, they will just create tables for now. Later we will use these vector stores to create HNSW indicies.\n",
|
||||
"\n",
|
||||
"You can manually connect to the Oracle Database and will see three tables \n",
|
||||
"Documents_DOT, Documents_COSINE and Documents_EUCLIDEAN. \n",
|
||||
"\n",
|
||||
"We will then create three additional tables Documents_DOT_IVF, Documents_COSINE_IVF and Documents_EUCLIDEAN_IVF which will be used\n",
|
||||
"to create IVF indicies on the tables instead of HNSW indices. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ed1b253e-5f5c-4a81-983c-74645213a170",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Ingest documents into Oracle Vector Store using different distance strategies\n",
|
||||
"\n",
|
||||
"model = HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-mpnet-base-v2\")\n",
|
||||
"\n",
|
||||
"vector_store_dot = OracleVS.from_documents(\n",
|
||||
" documents_langchain,\n",
|
||||
" model,\n",
|
||||
" client=connection,\n",
|
||||
" table_name=\"Documents_DOT\",\n",
|
||||
" distance_strategy=DistanceStrategy.DOT_PRODUCT,\n",
|
||||
")\n",
|
||||
"vector_store_max = OracleVS.from_documents(\n",
|
||||
" documents_langchain,\n",
|
||||
" model,\n",
|
||||
" client=connection,\n",
|
||||
" table_name=\"Documents_COSINE\",\n",
|
||||
" distance_strategy=DistanceStrategy.COSINE,\n",
|
||||
")\n",
|
||||
"vector_store_euclidean = OracleVS.from_documents(\n",
|
||||
" documents_langchain,\n",
|
||||
" model,\n",
|
||||
" client=connection,\n",
|
||||
" table_name=\"Documents_EUCLIDEAN\",\n",
|
||||
" distance_strategy=DistanceStrategy.EUCLIDEAN_DISTANCE,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Ingest documents into Oracle Vector Store using different distance strategies\n",
|
||||
"vector_store_dot_ivf = OracleVS.from_documents(\n",
|
||||
" documents_langchain,\n",
|
||||
" model,\n",
|
||||
" client=connection,\n",
|
||||
" table_name=\"Documents_DOT_IVF\",\n",
|
||||
" distance_strategy=DistanceStrategy.DOT_PRODUCT,\n",
|
||||
")\n",
|
||||
"vector_store_max_ivf = OracleVS.from_documents(\n",
|
||||
" documents_langchain,\n",
|
||||
" model,\n",
|
||||
" client=connection,\n",
|
||||
" table_name=\"Documents_COSINE_IVF\",\n",
|
||||
" distance_strategy=DistanceStrategy.COSINE,\n",
|
||||
")\n",
|
||||
"vector_store_euclidean_ivf = OracleVS.from_documents(\n",
|
||||
" documents_langchain,\n",
|
||||
" model,\n",
|
||||
" client=connection,\n",
|
||||
" table_name=\"Documents_EUCLIDEAN_IVF\",\n",
|
||||
" distance_strategy=DistanceStrategy.EUCLIDEAN_DISTANCE,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77c29505-8688-4b87-9a99-e648fbb2d425",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Demonstrating add, delete operations for texts, and basic similarity search\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "306563ae-577b-4bc7-8a92-3dd6a59310f5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def manage_texts(vector_stores):\n",
|
||||
" \"\"\"\n",
|
||||
" Adds texts to each vector store, demonstrates error handling for duplicate additions,\n",
|
||||
" and performs deletion of texts. Showcases similarity searches and index creation for each vector store.\n",
|
||||
"\n",
|
||||
" Args:\n",
|
||||
" - vector_stores (list): A list of OracleVS instances.\n",
|
||||
" \"\"\"\n",
|
||||
" texts = [\"Rohan\", \"Shailendra\"]\n",
|
||||
" metadata = [\n",
|
||||
" {\"id\": \"100\", \"link\": \"Document Example Test 1\"},\n",
|
||||
" {\"id\": \"101\", \"link\": \"Document Example Test 2\"},\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" for i, vs in enumerate(vector_stores, start=1):\n",
|
||||
" # Adding texts\n",
|
||||
" try:\n",
|
||||
" vs.add_texts(texts, metadata)\n",
|
||||
" print(f\"\\n\\n\\nAdd texts complete for vector store {i}\\n\\n\\n\")\n",
|
||||
" except Exception as ex:\n",
|
||||
" print(f\"\\n\\n\\nExpected error on duplicate add for vector store {i}\\n\\n\\n\")\n",
|
||||
"\n",
|
||||
" # Deleting texts using the value of 'id'\n",
|
||||
" vs.delete([metadata[0][\"id\"]])\n",
|
||||
" print(f\"\\n\\n\\nDelete texts complete for vector store {i}\\n\\n\\n\")\n",
|
||||
"\n",
|
||||
" # Similarity search\n",
|
||||
" results = vs.similarity_search(\"How are LOBS stored in Oracle Database\", 2)\n",
|
||||
" print(f\"\\n\\n\\nSimilarity search results for vector store {i}: {results}\\n\\n\\n\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"vector_store_list = [\n",
|
||||
" vector_store_dot,\n",
|
||||
" vector_store_max,\n",
|
||||
" vector_store_euclidean,\n",
|
||||
" vector_store_dot_ivf,\n",
|
||||
" vector_store_max_ivf,\n",
|
||||
" vector_store_euclidean_ivf,\n",
|
||||
"]\n",
|
||||
"manage_texts(vector_store_list)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0980cb33-69cf-4547-842a-afdc4d6fa7d3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Demonstrating index creation with specific parameters for each distance strategy\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "46298a27-e309-456e-b2b8-771d9cb3be29",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def create_search_indices(connection):\n",
|
||||
" \"\"\"\n",
|
||||
" Creates search indices for the vector stores, each with specific parameters tailored to their distance strategy.\n",
|
||||
" \"\"\"\n",
|
||||
" # Index for DOT_PRODUCT strategy\n",
|
||||
" # Notice we are creating a HNSW index with default parameters\n",
|
||||
" # This will default to creating a HNSW index with 8 Parallel Workers and use the Default Accuracy used by Oracle AI Vector Search\n",
|
||||
" oraclevs.create_index(\n",
|
||||
" connection,\n",
|
||||
" vector_store_dot,\n",
|
||||
" params={\"idx_name\": \"hnsw_idx1\", \"idx_type\": \"HNSW\"},\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Index for COSINE strategy with specific parameters\n",
|
||||
" # Notice we are creating a HNSW index with parallel 16 and Target Accuracy Specification as 97 percent\n",
|
||||
" oraclevs.create_index(\n",
|
||||
" connection,\n",
|
||||
" vector_store_max,\n",
|
||||
" params={\n",
|
||||
" \"idx_name\": \"hnsw_idx2\",\n",
|
||||
" \"idx_type\": \"HNSW\",\n",
|
||||
" \"accuracy\": 97,\n",
|
||||
" \"parallel\": 16,\n",
|
||||
" },\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Index for EUCLIDEAN_DISTANCE strategy with specific parameters\n",
|
||||
" # Notice we are creating a HNSW index by specifying Power User Parameters which are neighbors = 64 and efConstruction = 100\n",
|
||||
" oraclevs.create_index(\n",
|
||||
" connection,\n",
|
||||
" vector_store_euclidean,\n",
|
||||
" params={\n",
|
||||
" \"idx_name\": \"hnsw_idx3\",\n",
|
||||
" \"idx_type\": \"HNSW\",\n",
|
||||
" \"neighbors\": 64,\n",
|
||||
" \"efConstruction\": 100,\n",
|
||||
" },\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Index for DOT_PRODUCT strategy with specific parameters\n",
|
||||
" # Notice we are creating an IVF index with default parameters\n",
|
||||
" # This will default to creating an IVF index with 8 Parallel Workers and use the Default Accuracy used by Oracle AI Vector Search\n",
|
||||
" oraclevs.create_index(\n",
|
||||
" connection,\n",
|
||||
" vector_store_dot_ivf,\n",
|
||||
" params={\n",
|
||||
" \"idx_name\": \"ivf_idx1\",\n",
|
||||
" \"idx_type\": \"IVF\",\n",
|
||||
" },\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Index for COSINE strategy with specific parameters\n",
|
||||
" # Notice we are creating an IVF index with parallel 32 and Target Accuracy Specification as 90 percent\n",
|
||||
" oraclevs.create_index(\n",
|
||||
" connection,\n",
|
||||
" vector_store_max_ivf,\n",
|
||||
" params={\n",
|
||||
" \"idx_name\": \"ivf_idx2\",\n",
|
||||
" \"idx_type\": \"IVF\",\n",
|
||||
" \"accuracy\": 90,\n",
|
||||
" \"parallel\": 32,\n",
|
||||
" },\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Index for EUCLIDEAN_DISTANCE strategy with specific parameters\n",
|
||||
" # Notice we are creating an IVF index by specifying Power User Parameters which is neighbor_part = 64\n",
|
||||
" oraclevs.create_index(\n",
|
||||
" connection,\n",
|
||||
" vector_store_euclidean_ivf,\n",
|
||||
" params={\"idx_name\": \"ivf_idx3\", \"idx_type\": \"IVF\", \"neighbor_part\": 64},\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" print(\"Index creation complete.\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"create_search_indices(connection)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7223d048-5c0b-4e91-a91b-a7daa9f86758",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Now we will conduct a bunch of advanced searches on all six vector stores. Each of these three searches have a with and without filter version. The filter only selects the document with id 101 out and filters out everything else"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "37ca2e7d-9803-4260-95e7-62776d4fb820",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Conduct advanced searches after creating the indices\n",
|
||||
"def conduct_advanced_searches(vector_stores):\n",
|
||||
" query = \"How are LOBS stored in Oracle Database\"\n",
|
||||
" # Constructing a filter for direct comparison against document metadata\n",
|
||||
" # This filter aims to include documents whose metadata 'id' is exactly '2'\n",
|
||||
" filter_criteria = {\"id\": [\"101\"]} # Direct comparison filter\n",
|
||||
"\n",
|
||||
" for i, vs in enumerate(vector_stores, start=1):\n",
|
||||
" print(f\"\\n--- Vector Store {i} Advanced Searches ---\")\n",
|
||||
" # Similarity search without a filter\n",
|
||||
" print(\"\\nSimilarity search results without filter:\")\n",
|
||||
" print(vs.similarity_search(query, 2))\n",
|
||||
"\n",
|
||||
" # Similarity search with a filter\n",
|
||||
" print(\"\\nSimilarity search results with filter:\")\n",
|
||||
" print(vs.similarity_search(query, 2, filter=filter_criteria))\n",
|
||||
"\n",
|
||||
" # Similarity search with relevance score\n",
|
||||
" print(\"\\nSimilarity search with relevance score:\")\n",
|
||||
" print(vs.similarity_search_with_score(query, 2))\n",
|
||||
"\n",
|
||||
" # Similarity search with relevance score with filter\n",
|
||||
" print(\"\\nSimilarity search with relevance score with filter:\")\n",
|
||||
" print(vs.similarity_search_with_score(query, 2, filter=filter_criteria))\n",
|
||||
"\n",
|
||||
" # Max marginal relevance search\n",
|
||||
" print(\"\\nMax marginal relevance search results:\")\n",
|
||||
" print(vs.max_marginal_relevance_search(query, 2, fetch_k=20, lambda_mult=0.5))\n",
|
||||
"\n",
|
||||
" # Max marginal relevance search with filter\n",
|
||||
" print(\"\\nMax marginal relevance search results with filter:\")\n",
|
||||
" print(\n",
|
||||
" vs.max_marginal_relevance_search(\n",
|
||||
" query, 2, fetch_k=20, lambda_mult=0.5, filter=filter_criteria\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"conduct_advanced_searches(vector_store_list)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0da8c7e2-0db0-4363-b31b-a7a5e3f83717",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### End to End Demo\n",
|
||||
"Please refer to our complete demo guide [Oracle AI Vector Search End-to-End Demo Guide](https://github.com/langchain-ai/langchain/tree/master/cookbook/oracleai_demo.ipynb) to build an end to end RAG pipeline with the help of Oracle AI Vector Search.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -20,18 +20,15 @@
|
||||
"source": [
|
||||
"Chains refer to sequences of calls - whether to an LLM, a tool, or a data preprocessing step. The primary supported way to do this is with [LCEL](/docs/expression_language). \n",
|
||||
"\n",
|
||||
"LCEL is great for constructing your own chains, but it's also nice to have chains that you can use off-the-shelf. There are two types of off-the-shelf chains that LangChain supports:\n",
|
||||
"LCEL is great for constructing your chains, but it's also nice to have chains used off the shelf. There are two types of off-the-shelf chains that LangChain supports:\n",
|
||||
"\n",
|
||||
"- Chains that are built with LCEL. In this case, LangChain offers a higher-level constructor method. However, all that is being done under the hood is constructing a chain with LCEL. \n",
|
||||
"- [Legacy] Chains constructed by subclassing from a legacy `Chain` class. These chains do not use LCEL under the hood but are the standalone classes.\n",
|
||||
"\n",
|
||||
"- [Legacy] Chains constructed by subclassing from a legacy `Chain` class. These chains do not use LCEL under the hood but are rather standalone classes.\n",
|
||||
"\n",
|
||||
"We are working creating methods that create LCEL versions of all chains. We are doing this for a few reasons.\n",
|
||||
"We are working on creating methods that create LCEL versions of all chains. We are doing this for a few reasons.\n",
|
||||
"\n",
|
||||
"1. Chains constructed in this way are nice because if you want to modify the internals of a chain you can simply modify the LCEL.\n",
|
||||
"\n",
|
||||
"2. These chains natively support streaming, async, and batch out of the box.\n",
|
||||
"\n",
|
||||
"3. These chains automatically get observability at each step.\n",
|
||||
"\n",
|
||||
"This page contains two lists. First, a list of all LCEL chain constructors. Second, a list of all legacy Chains."
|
||||
@@ -44,32 +41,22 @@
|
||||
"source": [
|
||||
"## LCEL Chains\n",
|
||||
"\n",
|
||||
"Below is a table of all LCEL chain constructors. In addition, we report on:\n",
|
||||
"Below is a table of all `LCEL chain constructors`. \n",
|
||||
"\n",
|
||||
"**Chain Constructor**\n",
|
||||
"\n",
|
||||
"The constructor function for this chain. These are all methods that return LCEL runnables. We also link to the API documentation.\n",
|
||||
"\n",
|
||||
"**Function Calling**\n",
|
||||
"\n",
|
||||
"Whether this requires OpenAI function calling.\n",
|
||||
"\n",
|
||||
"**Other Tools**\n",
|
||||
"\n",
|
||||
"What other tools (if any) are used in this chain.\n",
|
||||
"\n",
|
||||
"**When to Use**\n",
|
||||
"\n",
|
||||
"Our commentary on when to use this chain.\n",
|
||||
"Table columns:\n",
|
||||
"\n",
|
||||
"- **Chain Constructor:** The constructor function for this chain. These are all methods that return LCEL Runnables. We also link to the API documentation.\n",
|
||||
"- **Function Calling:** Whether this requires OpenAI function calling.\n",
|
||||
"- **Other Tools:** Other tools (if any) used in this chain.\n",
|
||||
"- **When to Use:** Our commentary on when to use this chain.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"| Chain Constructor | Function Calling | Other Tools | When to Use |\n",
|
||||
"|----------------------------------|-------------------------|--------------|--------------------------------------------------------------------------------|\n",
|
||||
"| [create_stuff_documents_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.create_stuff_documents_chain.html#langchain.chains.combine_documents.stuff.create_stuff_documents_chain) | | | This chain takes a list of documents and formats them all into a prompt, then passes that prompt to an LLM. It passes ALL documents, so you should make sure it fits within the context window the LLM you are using. |\n",
|
||||
"| [create_openai_fn_runnable](https://api.python.langchain.com/en/latest/chains/langchain.chains.structured_output.base.create_openai_fn_runnable.html#langchain.chains.structured_output.base.create_openai_fn_runnable) | ✅ | | If you want to use OpenAI function calling to OPTIONALLY structured an output response. You may pass in multiple functions for it call, but it does not have to call it. |\n",
|
||||
"| [create_stuff_documents_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.create_stuff_documents_chain.html#langchain.chains.combine_documents.stuff.create_stuff_documents_chain) | | | This chain takes a list of documents and formats them all into a prompt, then passes that prompt to an LLM. It passes ALL documents, so you should make sure it fits within the context window of the LLM you are using. |\n",
|
||||
"| [create_openai_fn_runnable](https://api.python.langchain.com/en/latest/chains/langchain.chains.structured_output.base.create_openai_fn_runnable.html#langchain.chains.structured_output.base.create_openai_fn_runnable) | ✅ | | If you want to use OpenAI function calling to OPTIONALLY structured an output response. You may pass in multiple functions for its call, but it does not have to call it. |\n",
|
||||
"| [create_structured_output_runnable](https://api.python.langchain.com/en/latest/chains/langchain.chains.structured_output.base.create_structured_output_runnable.html#langchain.chains.structured_output.base.create_structured_output_runnable) | ✅ | | If you want to use OpenAI function calling to FORCE the LLM to respond with a certain function. You may only pass in one function, and the chain will ALWAYS return this response. |\n",
|
||||
"| [load_query_constructor_runnable](https://api.python.langchain.com/en/latest/chains/langchain.chains.query_constructor.base.load_query_constructor_runnable.html#langchain.chains.query_constructor.base.load_query_constructor_runnable) | | | Can be used to generate queries. You must specify a list of allowed operations, and then will return a runnable that converts a natural language query into those allowed operations. |\n",
|
||||
"| [load_query_constructor_runnable](https://api.python.langchain.com/en/latest/chains/langchain.chains.query_constructor.base.load_query_constructor_runnable.html#langchain.chains.query_constructor.base.load_query_constructor_runnable) | | | Can be used to generate queries. You must specify a list of allowed operations and then return a runnable that converts a natural language query into those allowed operations. |\n",
|
||||
"| [create_sql_query_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.sql_database.query.create_sql_query_chain.html#langchain.chains.sql_database.query.create_sql_query_chain) | | SQL Database | If you want to construct a query for a SQL database from natural language. |\n",
|
||||
"| [create_history_aware_retriever](https://api.python.langchain.com/en/latest/chains/langchain.chains.history_aware_retriever.create_history_aware_retriever.html#langchain.chains.history_aware_retriever.create_history_aware_retriever) | | Retriever | This chain takes in conversation history and then uses that to generate a search query which is passed to the underlying retriever. |\n",
|
||||
"| [create_retrieval_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.retrieval.create_retrieval_chain.html#langchain.chains.retrieval.create_retrieval_chain) | | Retriever | This chain takes in a user inquiry, which is then passed to the retriever to fetch relevant documents. Those documents (and original inputs) are then passed to an LLM to generate a response |"
|
||||
@@ -82,48 +69,39 @@
|
||||
"source": [
|
||||
"## Legacy Chains\n",
|
||||
"\n",
|
||||
"Below we report on the legacy chain types that exist. We will maintain support for these until we are able to create a LCEL alternative. We report on:\n",
|
||||
"Below are the `legacy chains`. We will maintain support for these until we create an LCEL alternative. \n",
|
||||
"\n",
|
||||
"**Chain**\n",
|
||||
"Table columns:\n",
|
||||
"\n",
|
||||
"Name of the chain, or name of the constructor method. If constructor method, this will return a `Chain` subclass.\n",
|
||||
"\n",
|
||||
"**Function Calling**\n",
|
||||
"\n",
|
||||
"Whether this requires OpenAI Function Calling.\n",
|
||||
"\n",
|
||||
"**Other Tools**\n",
|
||||
"\n",
|
||||
"Other tools used in the chain.\n",
|
||||
"\n",
|
||||
"**When to Use**\n",
|
||||
"\n",
|
||||
"Our commentary on when to use.\n",
|
||||
"- **Chain:** Name of the chain or name of the constructor method. If constructor method, this will return a `Chain` subclass.\n",
|
||||
"- **Function Calling:** Whether chain requires OpenAI Function Calling.\n",
|
||||
"- **Other Tools:** Other tools used in the chain.\n",
|
||||
"- **When to Use:** Our commentary on when to use.\n",
|
||||
"\n",
|
||||
"| Chain | Function Calling | Other Tools | When to Use |\n",
|
||||
"|------------------------------|--------------------|------------------------|-------------|\n",
|
||||
"| [APIChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.api.base.APIChain.html#langchain.chains.api.base.APIChain) | | Requests Wrapper | This chain uses an LLM to convert a query into an API request, then executes that request, gets back a response, and then passes that request to an LLM to respond |\n",
|
||||
"| [OpenAPIEndpointChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.api.openapi.chain.OpenAPIEndpointChain.html#langchain.chains.api.openapi.chain.OpenAPIEndpointChain) | | OpenAPI Spec | Similar to APIChain, this chain is designed to interact with APIs. The main difference is this is optimized for ease of use with OpenAPI endpoints |\n",
|
||||
"| [ConversationalRetrievalChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.conversational_retrieval.base.ConversationalRetrievalChain.html#langchain.chains.conversational_retrieval.base.ConversationalRetrievalChain) | | Retriever |This chain can be used to have **conversations** with a document. It takes in a question and (optional) previous conversation history. If there is previous conversation history, it uses an LLM to rewrite the conversation into a query to send to a retriever (otherwise it just uses the newest user input). It then fetches those documents and passes them (along with the conversation) to an LLM to respond. |\n",
|
||||
"| [StuffDocumentsChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.StuffDocumentsChain.html#langchain.chains.combine_documents.stuff.StuffDocumentsChain) | | |This chain takes a list of documents and formats them all into a prompt, then passes that prompt to an LLM. It passes ALL documents, so you should make sure it fits within the context window the LLM you are using. |\n",
|
||||
"| [ReduceDocumentsChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.reduce.ReduceDocumentsChain.html#langchain.chains.combine_documents.reduce.ReduceDocumentsChain) | | |This chain combines documents by iterative reducing them. It groups documents into chunks (less than some context length) then passes them into an LLM. It then takes the responses and continues to do this until it can fit everything into one final LLM call. Useful when you have a lot of documents, you want to have the LLM run over all of them, and you can do in parallel. |\n",
|
||||
"| [MapReduceDocumentsChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.map_reduce.MapReduceDocumentsChain.html#langchain.chains.combine_documents.map_reduce.MapReduceDocumentsChain) | | |This chain first passes each document through an LLM, then reduces them using the ReduceDocumentsChain. Useful in the same situations as ReduceDocumentsChain, but does an initial LLM call before trying to reduce the documents. |\n",
|
||||
"| [RefineDocumentsChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.refine.RefineDocumentsChain.html#langchain.chains.combine_documents.refine.RefineDocumentsChain) | | |This chain collapses documents by generating an initial answer based on the first document and then looping over the remaining documents to *refine* its answer. This operates sequentially, so it cannot be parallelized. It is useful in similar situatations as MapReduceDocuments Chain, but for cases where you want to build up an answer by refining the previous answer (rather than parallelizing calls). | |\n",
|
||||
"| [ConversationalRetrievalChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.conversational_retrieval.base.ConversationalRetrievalChain.html#langchain.chains.conversational_retrieval.base.ConversationalRetrievalChain) | | Retriever |This chain can be used to have **conversations** with a document. It takes in a question and (optional) previous conversation history. If there is a previous conversation history, it uses an LLM to rewrite the conversation into a query to send to a retriever (otherwise it just uses the newest user input). It then fetches those documents and passes them (along with the conversation) to an LLM to respond. |\n",
|
||||
"| [StuffDocumentsChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.StuffDocumentsChain.html#langchain.chains.combine_documents.stuff.StuffDocumentsChain) | | |This chain takes a list of documents and formats them all into a prompt, then passes that prompt to an LLM. It passes ALL documents, so you should make sure it fits within the context window of the LLM you are using. |\n",
|
||||
"| [ReduceDocumentsChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.reduce.ReduceDocumentsChain.html#langchain.chains.combine_documents.reduce.ReduceDocumentsChain) | | |This chain combines documents by iterative reducing them. It groups documents into chunks (less than some context length) and then passes them into an LLM. It then takes the responses and continues to do this until it can fit everything into one final LLM call. It is useful when you have a lot of documents, you want to have the LLM run over all of them, and you can do it in parallel. |\n",
|
||||
"| [MapReduceDocumentsChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.map_reduce.MapReduceDocumentsChain.html#langchain.chains.combine_documents.map_reduce.MapReduceDocumentsChain) | | |This chain first passes each document through an LLM, then reduces them using the `ReduceDocumentsChain`. It is useful in the same situations as `ReduceDocumentsChain`, but does an initial LLM call before trying to reduce the documents. |\n",
|
||||
"| [RefineDocumentsChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.refine.RefineDocumentsChain.html#langchain.chains.combine_documents.refine.RefineDocumentsChain) | | |This chain collapses documents by generating an initial answer based on the first document and then looping over the remaining documents to *refine* its answer. This operates sequentially, so it cannot be parallelized. It is useful in similar situations as MapReduceDocuments Chain, but for cases where you want to build up an answer by refining the previous answer (rather than parallelizing calls). | |\n",
|
||||
"| [MapRerankDocumentsChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.map_rerank.MapRerankDocumentsChain.html#langchain.chains.combine_documents.map_rerank.MapRerankDocumentsChain) | | | This calls on LLM on each document, asking it to not only answer but also produce a score of how confident it is. The answer with the highest confidence is then returned. This is useful when you have a lot of documents, but only want to answer based on a single document, rather than trying to combine answers (like Refine and Reduce methods do).|\n",
|
||||
"| [ConstitutionalChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.constitutional_ai.base.ConstitutionalChain.html#langchain.chains.constitutional_ai.base.ConstitutionalChain) | | |This chain answers, then attempts to refine its answer based on constitutional principles that are provided. Use this when you want to enforce that a chain's answer follows some principles. |\n",
|
||||
"| [LLMChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.llm.LLMChain.html#langchain.chains.llm.LLMChain) | | | |This chain simply combines a prompt with an LLM and an output parser. The recommended way to do this is just to use LCEL. |\n",
|
||||
"| [ElasticsearchDatabaseChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.elasticsearch_database.base.ElasticsearchDatabaseChain.html#langchain.chains.elasticsearch_database.base.ElasticsearchDatabaseChain) | | ElasticSearch Instance |This chain converts a natural language question to an ElasticSearch query, and then runs it, and then summarizes the response. This is useful for when you want to ask natural language questions of an Elastic Search database |\n",
|
||||
"| [ConstitutionalChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.constitutional_ai.base.ConstitutionalChain.html#langchain.chains.constitutional_ai.base.ConstitutionalChain) | | |This chain answers, then attempts to refine its answer based on constitutional principles that are provided. Use this to enforce that a chain's answer follows some principles. |\n",
|
||||
"| [LLMChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.llm.LLMChain.html#langchain.chains.llm.LLMChain) | | | |This chain simply combines a prompt with an LLM and an output parser. The recommended way to do this is to use LCEL. |\n",
|
||||
"| [ElasticsearchDatabaseChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.elasticsearch_database.base.ElasticsearchDatabaseChain.html#langchain.chains.elasticsearch_database.base.ElasticsearchDatabaseChain) | | Elasticsearch Instance |This chain converts a natural language question to an `Elasticsearch` query, and then runs it, and then summarizes the response. This is useful for when you want to ask natural language questions of an `Elasticsearch` database |\n",
|
||||
"| [FlareChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.flare.base.FlareChain.html#langchain.chains.flare.base.FlareChain) | | |This implements [FLARE](https://arxiv.org/abs/2305.06983), an advanced retrieval technique. It is primarily meant as an exploratory advanced retrieval method. |\n",
|
||||
"| [ArangoGraphQAChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.graph_qa.arangodb.ArangoGraphQAChain.html#langchain.chains.graph_qa.arangodb.ArangoGraphQAChain) | |Arango Graph |This chain constructs an Arango query from natural language, executes that query against the graph, and then passes the results back to an LLM to respond. |\n",
|
||||
"|[GraphCypherQAChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.graph_qa.cypher.GraphCypherQAChain.html#langchain.chains.graph_qa.cypher.GraphCypherQAChain) | |A graph that works with Cypher query language |This chain constructs an Cypher query from natural language, executes that query against the graph, and then passes the results back to an LLM to respond. |\n",
|
||||
"|[GraphCypherQAChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.graph_qa.cypher.GraphCypherQAChain.html#langchain.chains.graph_qa.cypher.GraphCypherQAChain) | |A graph that works with Cypher query language |This chain constructs a Cypher query from natural language, executes that query against the graph, and then passes the results back to an LLM to respond. |\n",
|
||||
"|[FalkorDBGraphQAChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.graph_qa.falkordb.FalkorDBQAChain.html#langchain.chains.graph_qa.falkordb.FalkorDBQAChain) | |Falkor Database | This chain constructs a FalkorDB query from natural language, executes that query against the graph, and then passes the results back to an LLM to respond. |\n",
|
||||
"|[HugeGraphQAChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.graph_qa.hugegraph.HugeGraphQAChain.html#langchain.chains.graph_qa.hugegraph.HugeGraphQAChain) | |HugeGraph |This chain constructs an HugeGraph query from natural language, executes that query against the graph, and then passes the results back to an LLM to respond. |\n",
|
||||
"|[KuzuQAChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.graph_qa.kuzu.KuzuQAChain.html#langchain.chains.graph_qa.kuzu.KuzuQAChain) | |Kuzu Graph |This chain constructs a Kuzu Graph query from natural language, executes that query against the graph, and then passes the results back to an LLM to respond. |\n",
|
||||
"|[NebulaGraphQAChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.graph_qa.nebulagraph.NebulaGraphQAChain.html#langchain.chains.graph_qa.nebulagraph.NebulaGraphQAChain) | |Nebula Graph |This chain constructs a Nebula Graph query from natural language, executes that query against the graph, and then passes the results back to an LLM to respond. |\n",
|
||||
"|[NeptuneOpenCypherQAChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.graph_qa.neptune_cypher.NeptuneOpenCypherQAChain.html#langchain.chains.graph_qa.neptune_cypher.NeptuneOpenCypherQAChain) | |Neptune Graph |This chain constructs an Neptune Graph query from natural language, executes that query against the graph, and then passes the results back to an LLM to respond. |\n",
|
||||
"|[GraphSparqlChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.graph_qa.sparql.GraphSparqlQAChain.html#langchain.chains.graph_qa.sparql.GraphSparqlQAChain) | |Graph that works with SparQL |This chain constructs an SparQL query from natural language, executes that query against the graph, and then passes the results back to an LLM to respond. |\n",
|
||||
"|[NeptuneOpenCypherQAChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.graph_qa.neptune_cypher.NeptuneOpenCypherQAChain.html#langchain.chains.graph_qa.neptune_cypher.NeptuneOpenCypherQAChain) | |Neptune Graph |This chain constructs a Neptune Graph query from natural language, executes that query against the graph, and then passes the results back to an LLM to respond. |\n",
|
||||
"|[GraphSparqlChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.graph_qa.sparql.GraphSparqlQAChain.html#langchain.chains.graph_qa.sparql.GraphSparqlQAChain) | |Graph that works with SparQL |This chain constructs a SparQL query from natural language, executes that query against the graph, and then passes the results back to an LLM to respond. |\n",
|
||||
"|[LLMMath](https://api.python.langchain.com/en/latest/chains/langchain.chains.llm_math.base.LLMMathChain.html#langchain.chains.llm_math.base.LLMMathChain) | | |This chain converts a user question to a math problem and then executes it (using [numexpr](https://github.com/pydata/numexpr)) |\n",
|
||||
"|[LLMCheckerChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.llm_checker.base.LLMCheckerChain.html#langchain.chains.llm_checker.base.LLMCheckerChain) | | |This chain uses a second LLM call to varify its initial answer. Use this when you to have an extra layer of validation on the initial LLM call. |\n",
|
||||
"|[LLMCheckerChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.llm_checker.base.LLMCheckerChain.html#langchain.chains.llm_checker.base.LLMCheckerChain) | | |This chain uses a second LLM call to verify its initial answer. Use this when you have an extra layer of validation on the initial LLM call. |\n",
|
||||
"|[LLMSummarizationChecker](https://api.python.langchain.com/en/latest/chains/langchain.chains.llm_summarization_checker.base.LLMSummarizationCheckerChain.html#langchain.chains.llm_summarization_checker.base.LLMSummarizationCheckerChain) | | |This chain creates a summary using a sequence of LLM calls to make sure it is extra correct. Use this over the normal summarization chain when you are okay with multiple LLM calls (eg you care more about accuracy than speed/cost). |\n",
|
||||
"|[create_citation_fuzzy_match_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.openai_functions.citation_fuzzy_match.create_citation_fuzzy_match_chain.html#langchain.chains.openai_functions.citation_fuzzy_match.create_citation_fuzzy_match_chain) |✅ | |Uses OpenAI function calling to answer questions and cite its sources. |\n",
|
||||
"|[create_extraction_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.openai_functions.extraction.create_extraction_chain.html#langchain.chains.openai_functions.extraction.create_extraction_chain) | ✅ | |Uses OpenAI Function calling to extract information from text. |\n",
|
||||
@@ -131,7 +109,7 @@
|
||||
"|[get_openapi_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.openai_functions.openapi.get_openapi_chain.html#langchain.chains.openai_functions.openapi.get_openapi_chain) | ✅ |OpenAPI Spec |Uses OpenAI function calling to query an OpenAPI. |\n",
|
||||
"|[create_qa_with_structure_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.openai_functions.qa_with_structure.create_qa_with_structure_chain.html#langchain.chains.openai_functions.qa_with_structure.create_qa_with_structure_chain) | ✅ | |Uses OpenAI function calling to do question answering over text and respond in a specific format. |\n",
|
||||
"|[create_qa_with_sources_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.openai_functions.qa_with_structure.create_qa_with_sources_chain.html#langchain.chains.openai_functions.qa_with_structure.create_qa_with_sources_chain) | ✅ | |Uses OpenAI function calling to answer questions with citations. |\n",
|
||||
"|[QAGenerationChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.qa_generation.base.QAGenerationChain.html#langchain.chains.qa_generation.base.QAGenerationChain) | | |Creates both questions and answers from documents. Can be used to generate question/answer pairs for evaluation of retrieval projects. | \n",
|
||||
"|[QAGenerationChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.qa_generation.base.QAGenerationChain.html#langchain.chains.qa_generation.base.QAGenerationChain) | | |Creates both questions and answers from documents. Used to generate question/answer pairs for evaluation of retrieval projects. | \n",
|
||||
"|[RetrievalQAWithSourcesChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.qa_with_sources.retrieval.RetrievalQAWithSourcesChain.html#langchain.chains.qa_with_sources.retrieval.RetrievalQAWithSourcesChain) | | Retriever |Does question answering over retrieved documents, and cites it sources. Use this when you want the answer response to have sources in the text response. Use this over `load_qa_with_sources_chain` when you want to use a retriever to fetch the relevant document as part of the chain (rather than pass them in).| \n",
|
||||
"|[load_qa_with_sources_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.qa_with_sources.loading.load_qa_with_sources_chain.html#langchain.chains.qa_with_sources.loading.load_qa_with_sources_chain) | |Retriever |Does question answering over documents you pass in, and cites it sources. Use this when you want the answer response to have sources in the text response. Use this over RetrievalQAWithSources when you want to pass in the documents directly (rather than rely on a retriever to get them).| \n",
|
||||
"|[RetrievalQA](https://api.python.langchain.com/en/latest/chains/langchain.chains.retrieval_qa.base.RetrievalQA.html#langchain.chains.retrieval_qa.base.RetrievalQA) | |Retriever |This chain first does a retrieval step to fetch relevant documents, then passes those documents into an LLM to generate a response.|\n",
|
||||
@@ -168,7 +146,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
392
docs/docs/use_cases/question_answering/hybrid.ipynb
Normal file
392
docs/docs/use_cases/question_answering/hybrid.ipynb
Normal file
@@ -0,0 +1,392 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "14d3fd06",
|
||||
"metadata": {
|
||||
"id": "14d3fd06"
|
||||
},
|
||||
"source": [
|
||||
"# Hybrid Search\n",
|
||||
"\n",
|
||||
"The standard search in LangChain is done by vector similarity. However, a number of vectorstores implementations (Astra DB, ElasticSearch, Neo4J, AzureSearch, ...) also support more advanced search combining vector similarity search and other search techniques (full-text, BM25, and so on). This is generally referred to as \"Hybrid\" search.\n",
|
||||
"\n",
|
||||
"**Step 1: Make sure the vectorstore you are using supports hybrid search**\n",
|
||||
"\n",
|
||||
"At the moment, there is no unified way to perform hybrid search in LangChain. Each vectorstore may have their own way to do it. This is generally exposed as a keyword argument that is passed in during `similarity_search`. By reading the documentation or source code, figure out whether the vectorstore you are using supports hybrid search, and, if so, how to use it.\n",
|
||||
"\n",
|
||||
"**Step 2: Add that parameter as a configurable field for the chain**\n",
|
||||
"\n",
|
||||
"This will let you easily call the chain and configure any relevant flags at runtime. See [this documentation](/docs/expression_language/primitives/configure) for more information on configuration.\n",
|
||||
"\n",
|
||||
"**Step 3: Call the chain with that configurable field**\n",
|
||||
"\n",
|
||||
"Now, at runtime you can call this chain with configurable field.\n",
|
||||
"\n",
|
||||
"## Code Example\n",
|
||||
"\n",
|
||||
"Let's see a concrete example of what this looks like in code. We will use the Cassandra/CQL interface of Astra DB for this example.\n",
|
||||
"\n",
|
||||
"Install the following Python package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c2efe35eea197769",
|
||||
"metadata": {
|
||||
"id": "c2efe35eea197769",
|
||||
"outputId": "527275b4-076e-4b22-945c-e41a59188116"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install \"cassio>=0.1.7\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b4ef96d44341cd84",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"id": "b4ef96d44341cd84"
|
||||
},
|
||||
"source": [
|
||||
"Get the [connection secrets](https://docs.datastax.com/en/astra/astra-db-vector/get-started/quickstart.html).\n",
|
||||
"\n",
|
||||
"Initialize cassio:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cb2cef097277c32e",
|
||||
"metadata": {
|
||||
"id": "cb2cef097277c32e",
|
||||
"outputId": "4c3d05a0-319a-44a0-8ec3-0a9c78453132"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import cassio\n",
|
||||
"\n",
|
||||
"cassio.init(\n",
|
||||
" database_id=\"Your database ID\",\n",
|
||||
" token=\"Your application token\",\n",
|
||||
" keyspace=\"Your key space\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e1e51444877f45eb",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"id": "e1e51444877f45eb"
|
||||
},
|
||||
"source": [
|
||||
"Create the Cassandra VectorStore with a standard [index analyzer](https://docs.datastax.com/en/astra/astra-db-vector/cql/use-analyzers-with-cql.html). The index analyzer is needed to enable term matching."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7345de3c",
|
||||
"metadata": {
|
||||
"id": "7345de3c",
|
||||
"outputId": "d38bcee0-0134-4ac6-8d35-afcce282481b"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from cassio.table.cql import STANDARD_ANALYZER\n",
|
||||
"from langchain_community.vectorstores import Cassandra\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"vectorstore = Cassandra(\n",
|
||||
" embedding=embeddings,\n",
|
||||
" table_name=\"test_hybrid\",\n",
|
||||
" body_index_options=[STANDARD_ANALYZER],\n",
|
||||
" session=None,\n",
|
||||
" keyspace=None,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"vectorstore.add_texts(\n",
|
||||
" [\n",
|
||||
" \"In 2023, I visited Paris\",\n",
|
||||
" \"In 2022, I visited New York\",\n",
|
||||
" \"In 2021, I visited New Orleans\",\n",
|
||||
" ]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "73887f23bbab978c",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"id": "73887f23bbab978c"
|
||||
},
|
||||
"source": [
|
||||
"If we do a standard similarity search, we get all the documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3c2a39fa",
|
||||
"metadata": {
|
||||
"id": "3c2a39fa",
|
||||
"outputId": "5290085b-896c-4c81-9b40-c315331b7009"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='In 2022, I visited New York'),\n",
|
||||
"Document(page_content='In 2023, I visited Paris'),\n",
|
||||
"Document(page_content='In 2021, I visited New Orleans')]"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vectorstore.as_retriever().invoke(\"What city did I visit last?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "78d4c3c79e67d8c3",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"id": "78d4c3c79e67d8c3"
|
||||
},
|
||||
"source": [
|
||||
"The Astra DB vectorstore `body_search` argument can be used to filter the search on the term `new`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "56393baa",
|
||||
"metadata": {
|
||||
"id": "56393baa",
|
||||
"outputId": "d1c939f3-342f-4df4-94a3-d25429b5a25e"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='In 2022, I visited New York'),\n",
|
||||
"Document(page_content='In 2021, I visited New Orleans')]"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vectorstore.as_retriever(search_kwargs={\"body_search\": \"new\"}).invoke(\n",
|
||||
" \"What city did I visit last?\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "88ae97ed",
|
||||
"metadata": {
|
||||
"id": "88ae97ed"
|
||||
},
|
||||
"source": [
|
||||
"We can now create the chain that we will use to do question-answering over"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "62707b4f",
|
||||
"metadata": {
|
||||
"id": "62707b4f"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_core.runnables import (\n",
|
||||
" ConfigurableField,\n",
|
||||
" RunnablePassthrough,\n",
|
||||
")\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b6778ffa",
|
||||
"metadata": {
|
||||
"id": "b6778ffa"
|
||||
},
|
||||
"source": [
|
||||
"This is basic question-answering chain set up."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "44a865f6",
|
||||
"metadata": {
|
||||
"id": "44a865f6"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"template = \"\"\"Answer the question based only on the following context:\n",
|
||||
"{context}\n",
|
||||
"Question: {question}\n",
|
||||
"\"\"\"\n",
|
||||
"prompt = ChatPromptTemplate.from_template(template)\n",
|
||||
"\n",
|
||||
"model = ChatOpenAI()\n",
|
||||
"\n",
|
||||
"retriever = vectorstore.as_retriever()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "72125166",
|
||||
"metadata": {
|
||||
"id": "72125166"
|
||||
},
|
||||
"source": [
|
||||
"Here we mark the retriever as having a configurable field. All vectorstore retrievers have `search_kwargs` as a field. This is just a dictionary, with vectorstore specific fields"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "babbadff",
|
||||
"metadata": {
|
||||
"id": "babbadff"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"configurable_retriever = retriever.configurable_fields(\n",
|
||||
" search_kwargs=ConfigurableField(\n",
|
||||
" id=\"search_kwargs\",\n",
|
||||
" name=\"Search Kwargs\",\n",
|
||||
" description=\"The search kwargs to use\",\n",
|
||||
" )\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2d481b70",
|
||||
"metadata": {
|
||||
"id": "2d481b70"
|
||||
},
|
||||
"source": [
|
||||
"We can now create the chain using our configurable retriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "210b0446",
|
||||
"metadata": {
|
||||
"id": "210b0446"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = (\n",
|
||||
" {\"context\": configurable_retriever, \"question\": RunnablePassthrough()}\n",
|
||||
" | prompt\n",
|
||||
" | model\n",
|
||||
" | StrOutputParser()\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a38037b2",
|
||||
"metadata": {
|
||||
"id": "a38037b2",
|
||||
"outputId": "1ea14996-5965-4a5e-9678-b9c35ce5c6de"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Paris"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.invoke(\"What city did I visit last?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7f6458c3",
|
||||
"metadata": {
|
||||
"id": "7f6458c3"
|
||||
},
|
||||
"source": [
|
||||
"We can now invoke the chain with configurable options. `search_kwargs` is the id of the configurable field. The value is the search kwargs to use for Astra DB."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9gYLqBTH8BFz",
|
||||
"metadata": {
|
||||
"id": "9gYLqBTH8BFz",
|
||||
"outputId": "4358a2e6-f306-48f1-dd5c-781ac8a33e89"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"New York"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.invoke(\n",
|
||||
" \"What city did I visit last?\",\n",
|
||||
" config={\"configurable\": {\"search_kwargs\": {\"body_search\": \"new\"}}},\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -80,6 +80,7 @@
|
||||
"- [Returning sources](/docs/use_cases/question_answering/sources): How to return the source documents used in a particular generation.\n",
|
||||
"- [Streaming](/docs/use_cases/question_answering/streaming): How to stream final answers as well as intermediate steps.\n",
|
||||
"- [Adding chat history](/docs/use_cases/question_answering/chat_history): How to add chat history to a Q&A app.\n",
|
||||
"- [Hybrid search](/docs/use_cases/question_answering/hybrid): How to do hybrid search.\n",
|
||||
"- [Per-user retrieval](/docs/use_cases/question_answering/per_user): How to do retrieval when each user has their own private data.\n",
|
||||
"- [Using agents](/docs/use_cases/question_answering/conversational_retrieval_agents): How to use agents for Q&A.\n",
|
||||
"- [Using local models](/docs/use_cases/question_answering/local_retrieval_qa): How to use local models for Q&A."
|
||||
|
||||
@@ -53,6 +53,7 @@
|
||||
label: "More",
|
||||
collapsed: true,
|
||||
items: [
|
||||
"use_cases/question_answering/hybrid",
|
||||
"use_cases/question_answering/per_user",
|
||||
"use_cases/question_answering/conversational_retrieval_agents",
|
||||
"use_cases/question_answering/local_retrieval_qa",
|
||||
|
||||
@@ -193,7 +193,7 @@ def create_sql_agent(
|
||||
]
|
||||
prompt = ChatPromptTemplate.from_messages(messages)
|
||||
agent = RunnableAgent(
|
||||
runnable=create_openai_functions_agent(llm, tools, prompt), # type: ignore
|
||||
runnable=create_openai_functions_agent(llm, tools, prompt),
|
||||
input_keys_arg=["input"],
|
||||
return_keys_arg=["output"],
|
||||
**kwargs,
|
||||
@@ -208,10 +208,10 @@ def create_sql_agent(
|
||||
]
|
||||
prompt = ChatPromptTemplate.from_messages(messages)
|
||||
if agent_type == "openai-tools":
|
||||
runnable = create_openai_tools_agent(llm, tools, prompt) # type: ignore
|
||||
runnable = create_openai_tools_agent(llm, tools, prompt)
|
||||
else:
|
||||
runnable = create_tool_calling_agent(llm, tools, prompt) # type: ignore
|
||||
agent = RunnableMultiActionAgent( # type: ignore[assignment]
|
||||
runnable = create_tool_calling_agent(llm, tools, prompt)
|
||||
agent = RunnableMultiActionAgent(
|
||||
runnable=runnable,
|
||||
input_keys_arg=["input"],
|
||||
return_keys_arg=["output"],
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
from langchain.chains.ernie_functions.base import (
|
||||
convert_to_ernie_function,
|
||||
create_ernie_fn_chain,
|
||||
create_ernie_fn_runnable,
|
||||
create_structured_output_chain,
|
||||
create_structured_output_runnable,
|
||||
get_ernie_output_parser,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"convert_to_ernie_function",
|
||||
"create_structured_output_chain",
|
||||
"create_ernie_fn_chain",
|
||||
"create_structured_output_runnable",
|
||||
"create_ernie_fn_runnable",
|
||||
"get_ernie_output_parser",
|
||||
]
|
||||
@@ -1,551 +0,0 @@
|
||||
"""Methods for creating chains that use Ernie function-calling APIs."""
|
||||
import inspect
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
List,
|
||||
Optional,
|
||||
Sequence,
|
||||
Tuple,
|
||||
Type,
|
||||
Union,
|
||||
cast,
|
||||
)
|
||||
|
||||
from langchain.chains import LLMChain
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.output_parsers import (
|
||||
BaseGenerationOutputParser,
|
||||
BaseLLMOutputParser,
|
||||
BaseOutputParser,
|
||||
)
|
||||
from langchain_core.prompts import BasePromptTemplate
|
||||
from langchain_core.pydantic_v1 import BaseModel
|
||||
from langchain_core.runnables import Runnable
|
||||
|
||||
from langchain_community.output_parsers.ernie_functions import (
|
||||
JsonOutputFunctionsParser,
|
||||
PydanticAttrOutputFunctionsParser,
|
||||
PydanticOutputFunctionsParser,
|
||||
)
|
||||
from langchain_community.utils.ernie_functions import convert_pydantic_to_ernie_function
|
||||
|
||||
PYTHON_TO_JSON_TYPES = {
|
||||
"str": "string",
|
||||
"int": "number",
|
||||
"float": "number",
|
||||
"bool": "boolean",
|
||||
}
|
||||
|
||||
|
||||
def _get_python_function_name(function: Callable) -> str:
|
||||
"""Get the name of a Python function."""
|
||||
return function.__name__
|
||||
|
||||
|
||||
def _parse_python_function_docstring(function: Callable) -> Tuple[str, dict]:
|
||||
"""Parse the function and argument descriptions from the docstring of a function.
|
||||
|
||||
Assumes the function docstring follows Google Python style guide.
|
||||
"""
|
||||
docstring = inspect.getdoc(function)
|
||||
if docstring:
|
||||
docstring_blocks = docstring.split("\n\n")
|
||||
descriptors = []
|
||||
args_block = None
|
||||
past_descriptors = False
|
||||
for block in docstring_blocks:
|
||||
if block.startswith("Args:"):
|
||||
args_block = block
|
||||
break
|
||||
elif block.startswith("Returns:") or block.startswith("Example:"):
|
||||
# Don't break in case Args come after
|
||||
past_descriptors = True
|
||||
elif not past_descriptors:
|
||||
descriptors.append(block)
|
||||
else:
|
||||
continue
|
||||
description = " ".join(descriptors)
|
||||
else:
|
||||
description = ""
|
||||
args_block = None
|
||||
arg_descriptions = {}
|
||||
if args_block:
|
||||
arg = None
|
||||
for line in args_block.split("\n")[1:]:
|
||||
if ":" in line:
|
||||
arg, desc = line.split(":")
|
||||
arg_descriptions[arg.strip()] = desc.strip()
|
||||
elif arg:
|
||||
arg_descriptions[arg.strip()] += " " + line.strip()
|
||||
return description, arg_descriptions
|
||||
|
||||
|
||||
def _get_python_function_arguments(function: Callable, arg_descriptions: dict) -> dict:
|
||||
"""Get JsonSchema describing a Python functions arguments.
|
||||
|
||||
Assumes all function arguments are of primitive types (int, float, str, bool) or
|
||||
are subclasses of pydantic.BaseModel.
|
||||
"""
|
||||
properties = {}
|
||||
annotations = inspect.getfullargspec(function).annotations
|
||||
for arg, arg_type in annotations.items():
|
||||
if arg == "return":
|
||||
continue
|
||||
if isinstance(arg_type, type) and issubclass(arg_type, BaseModel):
|
||||
# Mypy error:
|
||||
# "type" has no attribute "schema"
|
||||
properties[arg] = arg_type.schema() # type: ignore[attr-defined]
|
||||
elif arg_type.__name__ in PYTHON_TO_JSON_TYPES:
|
||||
properties[arg] = {"type": PYTHON_TO_JSON_TYPES[arg_type.__name__]}
|
||||
if arg in arg_descriptions:
|
||||
if arg not in properties:
|
||||
properties[arg] = {}
|
||||
properties[arg]["description"] = arg_descriptions[arg]
|
||||
return properties
|
||||
|
||||
|
||||
def _get_python_function_required_args(function: Callable) -> List[str]:
|
||||
"""Get the required arguments for a Python function."""
|
||||
spec = inspect.getfullargspec(function)
|
||||
required = spec.args[: -len(spec.defaults)] if spec.defaults else spec.args
|
||||
required += [k for k in spec.kwonlyargs if k not in (spec.kwonlydefaults or {})]
|
||||
|
||||
is_class = type(function) is type
|
||||
if is_class and required[0] == "self":
|
||||
required = required[1:]
|
||||
return required
|
||||
|
||||
|
||||
def convert_python_function_to_ernie_function(
|
||||
function: Callable,
|
||||
) -> Dict[str, Any]:
|
||||
"""Convert a Python function to an Ernie function-calling API compatible dict.
|
||||
|
||||
Assumes the Python function has type hints and a docstring with a description. If
|
||||
the docstring has Google Python style argument descriptions, these will be
|
||||
included as well.
|
||||
"""
|
||||
description, arg_descriptions = _parse_python_function_docstring(function)
|
||||
return {
|
||||
"name": _get_python_function_name(function),
|
||||
"description": description,
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": _get_python_function_arguments(function, arg_descriptions),
|
||||
"required": _get_python_function_required_args(function),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def convert_to_ernie_function(
|
||||
function: Union[Dict[str, Any], Type[BaseModel], Callable],
|
||||
) -> Dict[str, Any]:
|
||||
"""Convert a raw function/class to an Ernie function.
|
||||
|
||||
Args:
|
||||
function: Either a dictionary, a pydantic.BaseModel class, or a Python function.
|
||||
If a dictionary is passed in, it is assumed to already be a valid Ernie
|
||||
function.
|
||||
|
||||
Returns:
|
||||
A dict version of the passed in function which is compatible with the
|
||||
Ernie function-calling API.
|
||||
"""
|
||||
if isinstance(function, dict):
|
||||
return function
|
||||
elif isinstance(function, type) and issubclass(function, BaseModel):
|
||||
return cast(Dict, convert_pydantic_to_ernie_function(function))
|
||||
elif callable(function):
|
||||
return convert_python_function_to_ernie_function(function)
|
||||
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unsupported function type {type(function)}. Functions must be passed in"
|
||||
f" as Dict, pydantic.BaseModel, or Callable."
|
||||
)
|
||||
|
||||
|
||||
def get_ernie_output_parser(
|
||||
functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
|
||||
) -> Union[BaseOutputParser, BaseGenerationOutputParser]:
|
||||
"""Get the appropriate function output parser given the user functions.
|
||||
|
||||
Args:
|
||||
functions: Sequence where element is a dictionary, a pydantic.BaseModel class,
|
||||
or a Python function. If a dictionary is passed in, it is assumed to
|
||||
already be a valid Ernie function.
|
||||
|
||||
Returns:
|
||||
A PydanticOutputFunctionsParser if functions are Pydantic classes, otherwise
|
||||
a JsonOutputFunctionsParser. If there's only one function and it is
|
||||
not a Pydantic class, then the output parser will automatically extract
|
||||
only the function arguments and not the function name.
|
||||
"""
|
||||
function_names = [convert_to_ernie_function(f)["name"] for f in functions]
|
||||
if isinstance(functions[0], type) and issubclass(functions[0], BaseModel):
|
||||
if len(functions) > 1:
|
||||
pydantic_schema: Union[Dict, Type[BaseModel]] = {
|
||||
name: fn for name, fn in zip(function_names, functions)
|
||||
}
|
||||
else:
|
||||
pydantic_schema = functions[0]
|
||||
output_parser: Union[
|
||||
BaseOutputParser, BaseGenerationOutputParser
|
||||
] = PydanticOutputFunctionsParser(pydantic_schema=pydantic_schema)
|
||||
else:
|
||||
output_parser = JsonOutputFunctionsParser(args_only=len(functions) <= 1)
|
||||
return output_parser
|
||||
|
||||
|
||||
def create_ernie_fn_runnable(
|
||||
functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
|
||||
llm: Runnable,
|
||||
prompt: BasePromptTemplate,
|
||||
*,
|
||||
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
|
||||
**kwargs: Any,
|
||||
) -> Runnable:
|
||||
"""Create a runnable sequence that uses Ernie functions.
|
||||
|
||||
Args:
|
||||
functions: A sequence of either dictionaries, pydantic.BaseModels classes, or
|
||||
Python functions. If dictionaries are passed in, they are assumed to
|
||||
already be a valid Ernie functions. If only a single
|
||||
function is passed in, then it will be enforced that the model use that
|
||||
function. pydantic.BaseModels and Python functions should have docstrings
|
||||
describing what the function does. For best results, pydantic.BaseModels
|
||||
should have descriptions of the parameters and Python functions should have
|
||||
Google Python style args descriptions in the docstring. Additionally,
|
||||
Python functions should only use primitive types (str, int, float, bool) or
|
||||
pydantic.BaseModels for arguments.
|
||||
llm: Language model to use, assumed to support the Ernie function-calling API.
|
||||
prompt: BasePromptTemplate to pass to the model.
|
||||
output_parser: BaseLLMOutputParser to use for parsing model outputs. By default
|
||||
will be inferred from the function types. If pydantic.BaseModels are passed
|
||||
in, then the OutputParser will try to parse outputs using those. Otherwise
|
||||
model outputs will simply be parsed as JSON. If multiple functions are
|
||||
passed in and they are not pydantic.BaseModels, the chain output will
|
||||
include both the name of the function that was returned and the arguments
|
||||
to pass to the function.
|
||||
|
||||
Returns:
|
||||
A runnable sequence that will pass in the given functions to the model when run.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from langchain.chains.ernie_functions import create_ernie_fn_chain
|
||||
from langchain_community.chat_models import ErnieBotChat
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain.pydantic_v1 import BaseModel, Field
|
||||
|
||||
|
||||
class RecordPerson(BaseModel):
|
||||
\"\"\"Record some identifying information about a person.\"\"\"
|
||||
|
||||
name: str = Field(..., description="The person's name")
|
||||
age: int = Field(..., description="The person's age")
|
||||
fav_food: Optional[str] = Field(None, description="The person's favorite food")
|
||||
|
||||
|
||||
class RecordDog(BaseModel):
|
||||
\"\"\"Record some identifying information about a dog.\"\"\"
|
||||
|
||||
name: str = Field(..., description="The dog's name")
|
||||
color: str = Field(..., description="The dog's color")
|
||||
fav_food: Optional[str] = Field(None, description="The dog's favorite food")
|
||||
|
||||
|
||||
llm = ErnieBotChat(model_name="ERNIE-Bot-4")
|
||||
prompt = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
("user", "Make calls to the relevant function to record the entities in the following input: {input}"),
|
||||
("assistant", "OK!"),
|
||||
("user", "Tip: Make sure to answer in the correct format"),
|
||||
]
|
||||
)
|
||||
chain = create_ernie_fn_runnable([RecordPerson, RecordDog], llm, prompt)
|
||||
chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
|
||||
# -> RecordDog(name="Harry", color="brown", fav_food="chicken")
|
||||
""" # noqa: E501
|
||||
if not functions:
|
||||
raise ValueError("Need to pass in at least one function. Received zero.")
|
||||
ernie_functions = [convert_to_ernie_function(f) for f in functions]
|
||||
llm_kwargs: Dict[str, Any] = {"functions": ernie_functions, **kwargs}
|
||||
if len(ernie_functions) == 1:
|
||||
llm_kwargs["function_call"] = {"name": ernie_functions[0]["name"]}
|
||||
output_parser = output_parser or get_ernie_output_parser(functions)
|
||||
return prompt | llm.bind(**llm_kwargs) | output_parser
|
||||
|
||||
|
||||
def create_structured_output_runnable(
|
||||
output_schema: Union[Dict[str, Any], Type[BaseModel]],
|
||||
llm: Runnable,
|
||||
prompt: BasePromptTemplate,
|
||||
*,
|
||||
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
|
||||
**kwargs: Any,
|
||||
) -> Runnable:
|
||||
"""Create a runnable that uses an Ernie function to get a structured output.
|
||||
|
||||
Args:
|
||||
output_schema: Either a dictionary or pydantic.BaseModel class. If a dictionary
|
||||
is passed in, it's assumed to already be a valid JsonSchema.
|
||||
For best results, pydantic.BaseModels should have docstrings describing what
|
||||
the schema represents and descriptions for the parameters.
|
||||
llm: Language model to use, assumed to support the Ernie function-calling API.
|
||||
prompt: BasePromptTemplate to pass to the model.
|
||||
output_parser: BaseLLMOutputParser to use for parsing model outputs. By default
|
||||
will be inferred from the function types. If pydantic.BaseModels are passed
|
||||
in, then the OutputParser will try to parse outputs using those. Otherwise
|
||||
model outputs will simply be parsed as JSON.
|
||||
|
||||
Returns:
|
||||
A runnable sequence that will pass the given function to the model when run.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from langchain.chains.ernie_functions import create_structured_output_chain
|
||||
from langchain_community.chat_models import ErnieBotChat
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain.pydantic_v1 import BaseModel, Field
|
||||
|
||||
class Dog(BaseModel):
|
||||
\"\"\"Identifying information about a dog.\"\"\"
|
||||
|
||||
name: str = Field(..., description="The dog's name")
|
||||
color: str = Field(..., description="The dog's color")
|
||||
fav_food: Optional[str] = Field(None, description="The dog's favorite food")
|
||||
|
||||
llm = ErnieBotChat(model_name="ERNIE-Bot-4")
|
||||
prompt = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
("user", "Use the given format to extract information from the following input: {input}"),
|
||||
("assistant", "OK!"),
|
||||
("user", "Tip: Make sure to answer in the correct format"),
|
||||
]
|
||||
)
|
||||
chain = create_structured_output_chain(Dog, llm, prompt)
|
||||
chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
|
||||
# -> Dog(name="Harry", color="brown", fav_food="chicken")
|
||||
""" # noqa: E501
|
||||
if isinstance(output_schema, dict):
|
||||
function: Any = {
|
||||
"name": "output_formatter",
|
||||
"description": (
|
||||
"Output formatter. Should always be used to format your response to the"
|
||||
" user."
|
||||
),
|
||||
"parameters": output_schema,
|
||||
}
|
||||
else:
|
||||
|
||||
class _OutputFormatter(BaseModel):
|
||||
"""Output formatter. Should always be used to format your response to the user.""" # noqa: E501
|
||||
|
||||
output: output_schema # type: ignore
|
||||
|
||||
function = _OutputFormatter
|
||||
output_parser = output_parser or PydanticAttrOutputFunctionsParser(
|
||||
pydantic_schema=_OutputFormatter, attr_name="output"
|
||||
)
|
||||
return create_ernie_fn_runnable(
|
||||
[function],
|
||||
llm,
|
||||
prompt,
|
||||
output_parser=output_parser,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
""" --- Legacy --- """
|
||||
|
||||
|
||||
def create_ernie_fn_chain(
|
||||
functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
|
||||
llm: BaseLanguageModel,
|
||||
prompt: BasePromptTemplate,
|
||||
*,
|
||||
output_key: str = "function",
|
||||
output_parser: Optional[BaseLLMOutputParser] = None,
|
||||
**kwargs: Any,
|
||||
) -> LLMChain:
|
||||
"""[Legacy] Create an LLM chain that uses Ernie functions.
|
||||
|
||||
Args:
|
||||
functions: A sequence of either dictionaries, pydantic.BaseModels classes, or
|
||||
Python functions. If dictionaries are passed in, they are assumed to
|
||||
already be a valid Ernie functions. If only a single
|
||||
function is passed in, then it will be enforced that the model use that
|
||||
function. pydantic.BaseModels and Python functions should have docstrings
|
||||
describing what the function does. For best results, pydantic.BaseModels
|
||||
should have descriptions of the parameters and Python functions should have
|
||||
Google Python style args descriptions in the docstring. Additionally,
|
||||
Python functions should only use primitive types (str, int, float, bool) or
|
||||
pydantic.BaseModels for arguments.
|
||||
llm: Language model to use, assumed to support the Ernie function-calling API.
|
||||
prompt: BasePromptTemplate to pass to the model.
|
||||
output_key: The key to use when returning the output in LLMChain.__call__.
|
||||
output_parser: BaseLLMOutputParser to use for parsing model outputs. By default
|
||||
will be inferred from the function types. If pydantic.BaseModels are passed
|
||||
in, then the OutputParser will try to parse outputs using those. Otherwise
|
||||
model outputs will simply be parsed as JSON. If multiple functions are
|
||||
passed in and they are not pydantic.BaseModels, the chain output will
|
||||
include both the name of the function that was returned and the arguments
|
||||
to pass to the function.
|
||||
|
||||
Returns:
|
||||
An LLMChain that will pass in the given functions to the model when run.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from langchain.chains.ernie_functions import create_ernie_fn_chain
|
||||
from langchain_community.chat_models import ErnieBotChat
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
|
||||
from langchain.pydantic_v1 import BaseModel, Field
|
||||
|
||||
|
||||
class RecordPerson(BaseModel):
|
||||
\"\"\"Record some identifying information about a person.\"\"\"
|
||||
|
||||
name: str = Field(..., description="The person's name")
|
||||
age: int = Field(..., description="The person's age")
|
||||
fav_food: Optional[str] = Field(None, description="The person's favorite food")
|
||||
|
||||
|
||||
class RecordDog(BaseModel):
|
||||
\"\"\"Record some identifying information about a dog.\"\"\"
|
||||
|
||||
name: str = Field(..., description="The dog's name")
|
||||
color: str = Field(..., description="The dog's color")
|
||||
fav_food: Optional[str] = Field(None, description="The dog's favorite food")
|
||||
|
||||
|
||||
llm = ErnieBotChat(model_name="ERNIE-Bot-4")
|
||||
prompt = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
("user", "Make calls to the relevant function to record the entities in the following input: {input}"),
|
||||
("assistant", "OK!"),
|
||||
("user", "Tip: Make sure to answer in the correct format"),
|
||||
]
|
||||
)
|
||||
chain = create_ernie_fn_chain([RecordPerson, RecordDog], llm, prompt)
|
||||
chain.run("Harry was a chubby brown beagle who loved chicken")
|
||||
# -> RecordDog(name="Harry", color="brown", fav_food="chicken")
|
||||
""" # noqa: E501
|
||||
if not functions:
|
||||
raise ValueError("Need to pass in at least one function. Received zero.")
|
||||
ernie_functions = [convert_to_ernie_function(f) for f in functions]
|
||||
output_parser = output_parser or get_ernie_output_parser(functions)
|
||||
llm_kwargs: Dict[str, Any] = {
|
||||
"functions": ernie_functions,
|
||||
}
|
||||
if len(ernie_functions) == 1:
|
||||
llm_kwargs["function_call"] = {"name": ernie_functions[0]["name"]}
|
||||
llm_chain = LLMChain(
|
||||
llm=llm,
|
||||
prompt=prompt,
|
||||
output_parser=output_parser,
|
||||
llm_kwargs=llm_kwargs,
|
||||
output_key=output_key,
|
||||
**kwargs,
|
||||
)
|
||||
return llm_chain
|
||||
|
||||
|
||||
def create_structured_output_chain(
|
||||
output_schema: Union[Dict[str, Any], Type[BaseModel]],
|
||||
llm: BaseLanguageModel,
|
||||
prompt: BasePromptTemplate,
|
||||
*,
|
||||
output_key: str = "function",
|
||||
output_parser: Optional[BaseLLMOutputParser] = None,
|
||||
**kwargs: Any,
|
||||
) -> LLMChain:
|
||||
"""[Legacy] Create an LLMChain that uses an Ernie function to get a structured output.
|
||||
|
||||
Args:
|
||||
output_schema: Either a dictionary or pydantic.BaseModel class. If a dictionary
|
||||
is passed in, it's assumed to already be a valid JsonSchema.
|
||||
For best results, pydantic.BaseModels should have docstrings describing what
|
||||
the schema represents and descriptions for the parameters.
|
||||
llm: Language model to use, assumed to support the Ernie function-calling API.
|
||||
prompt: BasePromptTemplate to pass to the model.
|
||||
output_key: The key to use when returning the output in LLMChain.__call__.
|
||||
output_parser: BaseLLMOutputParser to use for parsing model outputs. By default
|
||||
will be inferred from the function types. If pydantic.BaseModels are passed
|
||||
in, then the OutputParser will try to parse outputs using those. Otherwise
|
||||
model outputs will simply be parsed as JSON.
|
||||
|
||||
Returns:
|
||||
An LLMChain that will pass the given function to the model.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from langchain.chains.ernie_functions import create_structured_output_chain
|
||||
from langchain_community.chat_models import ErnieBotChat
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
|
||||
from langchain.pydantic_v1 import BaseModel, Field
|
||||
|
||||
class Dog(BaseModel):
|
||||
\"\"\"Identifying information about a dog.\"\"\"
|
||||
|
||||
name: str = Field(..., description="The dog's name")
|
||||
color: str = Field(..., description="The dog's color")
|
||||
fav_food: Optional[str] = Field(None, description="The dog's favorite food")
|
||||
|
||||
llm = ErnieBotChat(model_name="ERNIE-Bot-4")
|
||||
prompt = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
("user", "Use the given format to extract information from the following input: {input}"),
|
||||
("assistant", "OK!"),
|
||||
("user", "Tip: Make sure to answer in the correct format"),
|
||||
]
|
||||
)
|
||||
chain = create_structured_output_chain(Dog, llm, prompt)
|
||||
chain.run("Harry was a chubby brown beagle who loved chicken")
|
||||
# -> Dog(name="Harry", color="brown", fav_food="chicken")
|
||||
""" # noqa: E501
|
||||
if isinstance(output_schema, dict):
|
||||
function: Any = {
|
||||
"name": "output_formatter",
|
||||
"description": (
|
||||
"Output formatter. Should always be used to format your response to the"
|
||||
" user."
|
||||
),
|
||||
"parameters": output_schema,
|
||||
}
|
||||
else:
|
||||
|
||||
class _OutputFormatter(BaseModel):
|
||||
"""Output formatter. Should always be used to format your response to the user.""" # noqa: E501
|
||||
|
||||
output: output_schema # type: ignore
|
||||
|
||||
function = _OutputFormatter
|
||||
output_parser = output_parser or PydanticAttrOutputFunctionsParser(
|
||||
pydantic_schema=_OutputFormatter, attr_name="output"
|
||||
)
|
||||
return create_ernie_fn_chain(
|
||||
[function],
|
||||
llm,
|
||||
prompt,
|
||||
output_key=output_key,
|
||||
output_parser=output_parser,
|
||||
**kwargs,
|
||||
)
|
||||
@@ -1 +0,0 @@
|
||||
"""Question answering over a knowledge graph."""
|
||||
@@ -1,241 +0,0 @@
|
||||
"""Question answering over a graph."""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain_core.callbacks import CallbackManagerForChainRun
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.prompts import BasePromptTemplate
|
||||
from langchain_core.pydantic_v1 import Field
|
||||
|
||||
from langchain_community.chains.graph_qa.prompts import (
|
||||
AQL_FIX_PROMPT,
|
||||
AQL_GENERATION_PROMPT,
|
||||
AQL_QA_PROMPT,
|
||||
)
|
||||
from langchain_community.graphs.arangodb_graph import ArangoGraph
|
||||
|
||||
|
||||
class ArangoGraphQAChain(Chain):
|
||||
"""Chain for question-answering against a graph by generating AQL statements.
|
||||
|
||||
*Security note*: Make sure that the database connection uses credentials
|
||||
that are narrowly-scoped to only include necessary permissions.
|
||||
Failure to do so may result in data corruption or loss, since the calling
|
||||
code may attempt commands that would result in deletion, mutation
|
||||
of data if appropriately prompted or reading sensitive data if such
|
||||
data is present in the database.
|
||||
The best way to guard against such negative outcomes is to (as appropriate)
|
||||
limit the permissions granted to the credentials used with this tool.
|
||||
|
||||
See https://python.langchain.com/docs/security for more information.
|
||||
"""
|
||||
|
||||
graph: ArangoGraph = Field(exclude=True)
|
||||
aql_generation_chain: LLMChain
|
||||
aql_fix_chain: LLMChain
|
||||
qa_chain: LLMChain
|
||||
input_key: str = "query" #: :meta private:
|
||||
output_key: str = "result" #: :meta private:
|
||||
|
||||
# Specifies the maximum number of AQL Query Results to return
|
||||
top_k: int = 10
|
||||
|
||||
# Specifies the set of AQL Query Examples that promote few-shot-learning
|
||||
aql_examples: str = ""
|
||||
|
||||
# Specify whether to return the AQL Query in the output dictionary
|
||||
return_aql_query: bool = False
|
||||
|
||||
# Specify whether to return the AQL JSON Result in the output dictionary
|
||||
return_aql_result: bool = False
|
||||
|
||||
# Specify the maximum amount of AQL Generation attempts that should be made
|
||||
max_aql_generation_attempts: int = 3
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
return [self.input_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
return [self.output_key]
|
||||
|
||||
@property
|
||||
def _chain_type(self) -> str:
|
||||
return "graph_aql_chain"
|
||||
|
||||
@classmethod
|
||||
def from_llm(
|
||||
cls,
|
||||
llm: BaseLanguageModel,
|
||||
*,
|
||||
qa_prompt: BasePromptTemplate = AQL_QA_PROMPT,
|
||||
aql_generation_prompt: BasePromptTemplate = AQL_GENERATION_PROMPT,
|
||||
aql_fix_prompt: BasePromptTemplate = AQL_FIX_PROMPT,
|
||||
**kwargs: Any,
|
||||
) -> ArangoGraphQAChain:
|
||||
"""Initialize from LLM."""
|
||||
qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
|
||||
aql_generation_chain = LLMChain(llm=llm, prompt=aql_generation_prompt)
|
||||
aql_fix_chain = LLMChain(llm=llm, prompt=aql_fix_prompt)
|
||||
|
||||
return cls(
|
||||
qa_chain=qa_chain,
|
||||
aql_generation_chain=aql_generation_chain,
|
||||
aql_fix_chain=aql_fix_chain,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def _call(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate an AQL statement from user input, use it retrieve a response
|
||||
from an ArangoDB Database instance, and respond to the user input
|
||||
in natural language.
|
||||
|
||||
Users can modify the following ArangoGraphQAChain Class Variables:
|
||||
|
||||
:var top_k: The maximum number of AQL Query Results to return
|
||||
:type top_k: int
|
||||
|
||||
:var aql_examples: A set of AQL Query Examples that are passed to
|
||||
the AQL Generation Prompt Template to promote few-shot-learning.
|
||||
Defaults to an empty string.
|
||||
:type aql_examples: str
|
||||
|
||||
:var return_aql_query: Whether to return the AQL Query in the
|
||||
output dictionary. Defaults to False.
|
||||
:type return_aql_query: bool
|
||||
|
||||
:var return_aql_result: Whether to return the AQL Query in the
|
||||
output dictionary. Defaults to False
|
||||
:type return_aql_result: bool
|
||||
|
||||
:var max_aql_generation_attempts: The maximum amount of AQL
|
||||
Generation attempts to be made prior to raising the last
|
||||
AQL Query Execution Error. Defaults to 3.
|
||||
:type max_aql_generation_attempts: int
|
||||
"""
|
||||
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
|
||||
callbacks = _run_manager.get_child()
|
||||
user_input = inputs[self.input_key]
|
||||
|
||||
#########################
|
||||
# Generate AQL Query #
|
||||
aql_generation_output = self.aql_generation_chain.run(
|
||||
{
|
||||
"adb_schema": self.graph.schema,
|
||||
"aql_examples": self.aql_examples,
|
||||
"user_input": user_input,
|
||||
},
|
||||
callbacks=callbacks,
|
||||
)
|
||||
#########################
|
||||
|
||||
aql_query = ""
|
||||
aql_error = ""
|
||||
aql_result = None
|
||||
aql_generation_attempt = 1
|
||||
|
||||
while (
|
||||
aql_result is None
|
||||
and aql_generation_attempt < self.max_aql_generation_attempts + 1
|
||||
):
|
||||
#####################
|
||||
# Extract AQL Query #
|
||||
pattern = r"```(?i:aql)?(.*?)```"
|
||||
matches = re.findall(pattern, aql_generation_output, re.DOTALL)
|
||||
if not matches:
|
||||
_run_manager.on_text(
|
||||
"Invalid Response: ", end="\n", verbose=self.verbose
|
||||
)
|
||||
_run_manager.on_text(
|
||||
aql_generation_output, color="red", end="\n", verbose=self.verbose
|
||||
)
|
||||
raise ValueError(f"Response is Invalid: {aql_generation_output}")
|
||||
|
||||
aql_query = matches[0]
|
||||
#####################
|
||||
|
||||
_run_manager.on_text(
|
||||
f"AQL Query ({aql_generation_attempt}):", verbose=self.verbose
|
||||
)
|
||||
_run_manager.on_text(
|
||||
aql_query, color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
|
||||
#####################
|
||||
# Execute AQL Query #
|
||||
from arango import AQLQueryExecuteError
|
||||
|
||||
try:
|
||||
aql_result = self.graph.query(aql_query, self.top_k)
|
||||
except AQLQueryExecuteError as e:
|
||||
aql_error = e.error_message
|
||||
|
||||
_run_manager.on_text(
|
||||
"AQL Query Execution Error: ", end="\n", verbose=self.verbose
|
||||
)
|
||||
_run_manager.on_text(
|
||||
aql_error, color="yellow", end="\n\n", verbose=self.verbose
|
||||
)
|
||||
|
||||
########################
|
||||
# Retry AQL Generation #
|
||||
aql_generation_output = self.aql_fix_chain.run(
|
||||
{
|
||||
"adb_schema": self.graph.schema,
|
||||
"aql_query": aql_query,
|
||||
"aql_error": aql_error,
|
||||
},
|
||||
callbacks=callbacks,
|
||||
)
|
||||
########################
|
||||
|
||||
#####################
|
||||
|
||||
aql_generation_attempt += 1
|
||||
|
||||
if aql_result is None:
|
||||
m = f"""
|
||||
Maximum amount of AQL Query Generation attempts reached.
|
||||
Unable to execute the AQL Query due to the following error:
|
||||
{aql_error}
|
||||
"""
|
||||
raise ValueError(m)
|
||||
|
||||
_run_manager.on_text("AQL Result:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
str(aql_result), color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
|
||||
########################
|
||||
# Interpret AQL Result #
|
||||
result = self.qa_chain(
|
||||
{
|
||||
"adb_schema": self.graph.schema,
|
||||
"user_input": user_input,
|
||||
"aql_query": aql_query,
|
||||
"aql_result": aql_result,
|
||||
},
|
||||
callbacks=callbacks,
|
||||
)
|
||||
########################
|
||||
|
||||
# Return results #
|
||||
result = {self.output_key: result[self.qa_chain.output_key]}
|
||||
|
||||
if self.return_aql_query:
|
||||
result["aql_query"] = aql_query
|
||||
|
||||
if self.return_aql_result:
|
||||
result["aql_result"] = aql_result
|
||||
|
||||
return result
|
||||
@@ -1,103 +0,0 @@
|
||||
"""Question answering over a graph."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain_core.callbacks.manager import CallbackManagerForChainRun
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.prompts import BasePromptTemplate
|
||||
from langchain_core.pydantic_v1 import Field
|
||||
|
||||
from langchain_community.chains.graph_qa.prompts import (
|
||||
ENTITY_EXTRACTION_PROMPT,
|
||||
GRAPH_QA_PROMPT,
|
||||
)
|
||||
from langchain_community.graphs.networkx_graph import NetworkxEntityGraph, get_entities
|
||||
|
||||
|
||||
class GraphQAChain(Chain):
|
||||
"""Chain for question-answering against a graph.
|
||||
|
||||
*Security note*: Make sure that the database connection uses credentials
|
||||
that are narrowly-scoped to only include necessary permissions.
|
||||
Failure to do so may result in data corruption or loss, since the calling
|
||||
code may attempt commands that would result in deletion, mutation
|
||||
of data if appropriately prompted or reading sensitive data if such
|
||||
data is present in the database.
|
||||
The best way to guard against such negative outcomes is to (as appropriate)
|
||||
limit the permissions granted to the credentials used with this tool.
|
||||
|
||||
See https://python.langchain.com/docs/security for more information.
|
||||
"""
|
||||
|
||||
graph: NetworkxEntityGraph = Field(exclude=True)
|
||||
entity_extraction_chain: LLMChain
|
||||
qa_chain: LLMChain
|
||||
input_key: str = "query" #: :meta private:
|
||||
output_key: str = "result" #: :meta private:
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Input keys.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.input_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
"""Output keys.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
_output_keys = [self.output_key]
|
||||
return _output_keys
|
||||
|
||||
@classmethod
|
||||
def from_llm(
|
||||
cls,
|
||||
llm: BaseLanguageModel,
|
||||
qa_prompt: BasePromptTemplate = GRAPH_QA_PROMPT,
|
||||
entity_prompt: BasePromptTemplate = ENTITY_EXTRACTION_PROMPT,
|
||||
**kwargs: Any,
|
||||
) -> GraphQAChain:
|
||||
"""Initialize from LLM."""
|
||||
qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
|
||||
entity_chain = LLMChain(llm=llm, prompt=entity_prompt)
|
||||
|
||||
return cls(
|
||||
qa_chain=qa_chain,
|
||||
entity_extraction_chain=entity_chain,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def _call(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, str]:
|
||||
"""Extract entities, look up info and answer question."""
|
||||
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
|
||||
question = inputs[self.input_key]
|
||||
|
||||
entity_string = self.entity_extraction_chain.run(question)
|
||||
|
||||
_run_manager.on_text("Entities Extracted:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
entity_string, color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
entities = get_entities(entity_string)
|
||||
context = ""
|
||||
all_triplets = []
|
||||
for entity in entities:
|
||||
all_triplets.extend(self.graph.get_entity_knowledge(entity))
|
||||
context = "\n".join(all_triplets)
|
||||
_run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(context, color="green", end="\n", verbose=self.verbose)
|
||||
result = self.qa_chain(
|
||||
{"question": question, "context": context},
|
||||
callbacks=_run_manager.get_child(),
|
||||
)
|
||||
return {self.output_key: result[self.qa_chain.output_key]}
|
||||
@@ -1,298 +0,0 @@
|
||||
"""Question answering over a graph."""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain_core.callbacks import CallbackManagerForChainRun
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.prompts import BasePromptTemplate
|
||||
from langchain_core.pydantic_v1 import Field
|
||||
|
||||
from langchain_community.chains.graph_qa.cypher_utils import (
|
||||
CypherQueryCorrector,
|
||||
Schema,
|
||||
)
|
||||
from langchain_community.chains.graph_qa.prompts import (
|
||||
CYPHER_GENERATION_PROMPT,
|
||||
CYPHER_QA_PROMPT,
|
||||
)
|
||||
from langchain_community.graphs.graph_store import GraphStore
|
||||
|
||||
INTERMEDIATE_STEPS_KEY = "intermediate_steps"
|
||||
|
||||
|
||||
def extract_cypher(text: str) -> str:
|
||||
"""Extract Cypher code from a text.
|
||||
|
||||
Args:
|
||||
text: Text to extract Cypher code from.
|
||||
|
||||
Returns:
|
||||
Cypher code extracted from the text.
|
||||
"""
|
||||
# The pattern to find Cypher code enclosed in triple backticks
|
||||
pattern = r"```(.*?)```"
|
||||
|
||||
# Find all matches in the input text
|
||||
matches = re.findall(pattern, text, re.DOTALL)
|
||||
|
||||
return matches[0] if matches else text
|
||||
|
||||
|
||||
def construct_schema(
|
||||
structured_schema: Dict[str, Any],
|
||||
include_types: List[str],
|
||||
exclude_types: List[str],
|
||||
) -> str:
|
||||
"""Filter the schema based on included or excluded types"""
|
||||
|
||||
def filter_func(x: str) -> bool:
|
||||
return x in include_types if include_types else x not in exclude_types
|
||||
|
||||
filtered_schema: Dict[str, Any] = {
|
||||
"node_props": {
|
||||
k: v
|
||||
for k, v in structured_schema.get("node_props", {}).items()
|
||||
if filter_func(k)
|
||||
},
|
||||
"rel_props": {
|
||||
k: v
|
||||
for k, v in structured_schema.get("rel_props", {}).items()
|
||||
if filter_func(k)
|
||||
},
|
||||
"relationships": [
|
||||
r
|
||||
for r in structured_schema.get("relationships", [])
|
||||
if all(filter_func(r[t]) for t in ["start", "end", "type"])
|
||||
],
|
||||
}
|
||||
|
||||
# Format node properties
|
||||
formatted_node_props = []
|
||||
for label, properties in filtered_schema["node_props"].items():
|
||||
props_str = ", ".join(
|
||||
[f"{prop['property']}: {prop['type']}" for prop in properties]
|
||||
)
|
||||
formatted_node_props.append(f"{label} {{{props_str}}}")
|
||||
|
||||
# Format relationship properties
|
||||
formatted_rel_props = []
|
||||
for rel_type, properties in filtered_schema["rel_props"].items():
|
||||
props_str = ", ".join(
|
||||
[f"{prop['property']}: {prop['type']}" for prop in properties]
|
||||
)
|
||||
formatted_rel_props.append(f"{rel_type} {{{props_str}}}")
|
||||
|
||||
# Format relationships
|
||||
formatted_rels = [
|
||||
f"(:{el['start']})-[:{el['type']}]->(:{el['end']})"
|
||||
for el in filtered_schema["relationships"]
|
||||
]
|
||||
|
||||
return "\n".join(
|
||||
[
|
||||
"Node properties are the following:",
|
||||
",".join(formatted_node_props),
|
||||
"Relationship properties are the following:",
|
||||
",".join(formatted_rel_props),
|
||||
"The relationships are the following:",
|
||||
",".join(formatted_rels),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
class GraphCypherQAChain(Chain):
|
||||
"""Chain for question-answering against a graph by generating Cypher statements.
|
||||
|
||||
*Security note*: Make sure that the database connection uses credentials
|
||||
that are narrowly-scoped to only include necessary permissions.
|
||||
Failure to do so may result in data corruption or loss, since the calling
|
||||
code may attempt commands that would result in deletion, mutation
|
||||
of data if appropriately prompted or reading sensitive data if such
|
||||
data is present in the database.
|
||||
The best way to guard against such negative outcomes is to (as appropriate)
|
||||
limit the permissions granted to the credentials used with this tool.
|
||||
|
||||
See https://python.langchain.com/docs/security for more information.
|
||||
"""
|
||||
|
||||
graph: GraphStore = Field(exclude=True)
|
||||
cypher_generation_chain: LLMChain
|
||||
qa_chain: LLMChain
|
||||
graph_schema: str
|
||||
input_key: str = "query" #: :meta private:
|
||||
output_key: str = "result" #: :meta private:
|
||||
top_k: int = 10
|
||||
"""Number of results to return from the query"""
|
||||
return_intermediate_steps: bool = False
|
||||
"""Whether or not to return the intermediate steps along with the final answer."""
|
||||
return_direct: bool = False
|
||||
"""Whether or not to return the result of querying the graph directly."""
|
||||
cypher_query_corrector: Optional[CypherQueryCorrector] = None
|
||||
"""Optional cypher validation tool"""
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Return the input keys.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.input_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
"""Return the output keys.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
_output_keys = [self.output_key]
|
||||
return _output_keys
|
||||
|
||||
@property
|
||||
def _chain_type(self) -> str:
|
||||
return "graph_cypher_chain"
|
||||
|
||||
@classmethod
|
||||
def from_llm(
|
||||
cls,
|
||||
llm: Optional[BaseLanguageModel] = None,
|
||||
*,
|
||||
qa_prompt: Optional[BasePromptTemplate] = None,
|
||||
cypher_prompt: Optional[BasePromptTemplate] = None,
|
||||
cypher_llm: Optional[BaseLanguageModel] = None,
|
||||
qa_llm: Optional[BaseLanguageModel] = None,
|
||||
exclude_types: List[str] = [],
|
||||
include_types: List[str] = [],
|
||||
validate_cypher: bool = False,
|
||||
qa_llm_kwargs: Optional[Dict[str, Any]] = None,
|
||||
cypher_llm_kwargs: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> GraphCypherQAChain:
|
||||
"""Initialize from LLM."""
|
||||
|
||||
if not cypher_llm and not llm:
|
||||
raise ValueError("Either `llm` or `cypher_llm` parameters must be provided")
|
||||
if not qa_llm and not llm:
|
||||
raise ValueError("Either `llm` or `qa_llm` parameters must be provided")
|
||||
if cypher_llm and qa_llm and llm:
|
||||
raise ValueError(
|
||||
"You can specify up to two of 'cypher_llm', 'qa_llm'"
|
||||
", and 'llm', but not all three simultaneously."
|
||||
)
|
||||
if cypher_prompt and cypher_llm_kwargs:
|
||||
raise ValueError(
|
||||
"Specifying cypher_prompt and cypher_llm_kwargs together is"
|
||||
" not allowed. Please pass prompt via cypher_llm_kwargs."
|
||||
)
|
||||
if qa_prompt and qa_llm_kwargs:
|
||||
raise ValueError(
|
||||
"Specifying qa_prompt and qa_llm_kwargs together is"
|
||||
" not allowed. Please pass prompt via qa_llm_kwargs."
|
||||
)
|
||||
use_qa_llm_kwargs = qa_llm_kwargs if qa_llm_kwargs is not None else {}
|
||||
use_cypher_llm_kwargs = (
|
||||
cypher_llm_kwargs if cypher_llm_kwargs is not None else {}
|
||||
)
|
||||
if "prompt" not in use_qa_llm_kwargs:
|
||||
use_qa_llm_kwargs["prompt"] = (
|
||||
qa_prompt if qa_prompt is not None else CYPHER_QA_PROMPT
|
||||
)
|
||||
if "prompt" not in use_cypher_llm_kwargs:
|
||||
use_cypher_llm_kwargs["prompt"] = (
|
||||
cypher_prompt if cypher_prompt is not None else CYPHER_GENERATION_PROMPT
|
||||
)
|
||||
|
||||
qa_chain = LLMChain(llm=qa_llm or llm, **use_qa_llm_kwargs) # type: ignore[arg-type]
|
||||
|
||||
cypher_generation_chain = LLMChain(
|
||||
llm=cypher_llm or llm, # type: ignore[arg-type]
|
||||
**use_cypher_llm_kwargs, # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
if exclude_types and include_types:
|
||||
raise ValueError(
|
||||
"Either `exclude_types` or `include_types` "
|
||||
"can be provided, but not both"
|
||||
)
|
||||
|
||||
graph_schema = construct_schema(
|
||||
kwargs["graph"].get_structured_schema, include_types, exclude_types
|
||||
)
|
||||
|
||||
cypher_query_corrector = None
|
||||
if validate_cypher:
|
||||
corrector_schema = [
|
||||
Schema(el["start"], el["type"], el["end"])
|
||||
for el in kwargs["graph"].structured_schema.get("relationships")
|
||||
]
|
||||
cypher_query_corrector = CypherQueryCorrector(corrector_schema)
|
||||
|
||||
return cls(
|
||||
graph_schema=graph_schema,
|
||||
qa_chain=qa_chain,
|
||||
cypher_generation_chain=cypher_generation_chain,
|
||||
cypher_query_corrector=cypher_query_corrector,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def _call(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate Cypher statement, use it to look up in db and answer question."""
|
||||
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
|
||||
callbacks = _run_manager.get_child()
|
||||
question = inputs[self.input_key]
|
||||
|
||||
intermediate_steps: List = []
|
||||
|
||||
generated_cypher = self.cypher_generation_chain.run(
|
||||
{"question": question, "schema": self.graph_schema}, callbacks=callbacks
|
||||
)
|
||||
|
||||
# Extract Cypher code if it is wrapped in backticks
|
||||
generated_cypher = extract_cypher(generated_cypher)
|
||||
|
||||
# Correct Cypher query if enabled
|
||||
if self.cypher_query_corrector:
|
||||
generated_cypher = self.cypher_query_corrector(generated_cypher)
|
||||
|
||||
_run_manager.on_text("Generated Cypher:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
generated_cypher, color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
|
||||
intermediate_steps.append({"query": generated_cypher})
|
||||
|
||||
# Retrieve and limit the number of results
|
||||
# Generated Cypher be null if query corrector identifies invalid schema
|
||||
if generated_cypher:
|
||||
context = self.graph.query(generated_cypher)[: self.top_k]
|
||||
else:
|
||||
context = []
|
||||
|
||||
if self.return_direct:
|
||||
final_result = context
|
||||
else:
|
||||
_run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
str(context), color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
|
||||
intermediate_steps.append({"context": context})
|
||||
|
||||
result = self.qa_chain(
|
||||
{"question": question, "context": context},
|
||||
callbacks=callbacks,
|
||||
)
|
||||
final_result = result[self.qa_chain.output_key]
|
||||
|
||||
chain_result: Dict[str, Any] = {self.output_key: final_result}
|
||||
if self.return_intermediate_steps:
|
||||
chain_result[INTERMEDIATE_STEPS_KEY] = intermediate_steps
|
||||
|
||||
return chain_result
|
||||
@@ -1,260 +0,0 @@
|
||||
import re
|
||||
from collections import namedtuple
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
Schema = namedtuple("Schema", ["left_node", "relation", "right_node"])
|
||||
|
||||
|
||||
class CypherQueryCorrector:
|
||||
"""
|
||||
Used to correct relationship direction in generated Cypher statements.
|
||||
This code is copied from the winner's submission to the Cypher competition:
|
||||
https://github.com/sakusaku-rich/cypher-direction-competition
|
||||
"""
|
||||
|
||||
property_pattern = re.compile(r"\{.+?\}")
|
||||
node_pattern = re.compile(r"\(.+?\)")
|
||||
path_pattern = re.compile(
|
||||
r"(\([^\,\(\)]*?(\{.+\})?[^\,\(\)]*?\))(<?-)(\[.*?\])?(->?)(\([^\,\(\)]*?(\{.+\})?[^\,\(\)]*?\))"
|
||||
)
|
||||
node_relation_node_pattern = re.compile(
|
||||
r"(\()+(?P<left_node>[^()]*?)\)(?P<relation>.*?)\((?P<right_node>[^()]*?)(\))+"
|
||||
)
|
||||
relation_type_pattern = re.compile(r":(?P<relation_type>.+?)?(\{.+\})?]")
|
||||
|
||||
def __init__(self, schemas: List[Schema]):
|
||||
"""
|
||||
Args:
|
||||
schemas: list of schemas
|
||||
"""
|
||||
self.schemas = schemas
|
||||
|
||||
def clean_node(self, node: str) -> str:
|
||||
"""
|
||||
Args:
|
||||
node: node in string format
|
||||
|
||||
"""
|
||||
node = re.sub(self.property_pattern, "", node)
|
||||
node = node.replace("(", "")
|
||||
node = node.replace(")", "")
|
||||
node = node.strip()
|
||||
return node
|
||||
|
||||
def detect_node_variables(self, query: str) -> Dict[str, List[str]]:
|
||||
"""
|
||||
Args:
|
||||
query: cypher query
|
||||
"""
|
||||
nodes = re.findall(self.node_pattern, query)
|
||||
nodes = [self.clean_node(node) for node in nodes]
|
||||
res: Dict[str, Any] = {}
|
||||
for node in nodes:
|
||||
parts = node.split(":")
|
||||
if parts == "":
|
||||
continue
|
||||
variable = parts[0]
|
||||
if variable not in res:
|
||||
res[variable] = []
|
||||
res[variable] += parts[1:]
|
||||
return res
|
||||
|
||||
def extract_paths(self, query: str) -> "List[str]":
|
||||
"""
|
||||
Args:
|
||||
query: cypher query
|
||||
"""
|
||||
paths = []
|
||||
idx = 0
|
||||
while matched := self.path_pattern.findall(query[idx:]):
|
||||
matched = matched[0]
|
||||
matched = [
|
||||
m for i, m in enumerate(matched) if i not in [1, len(matched) - 1]
|
||||
]
|
||||
path = "".join(matched)
|
||||
idx = query.find(path) + len(path) - len(matched[-1])
|
||||
paths.append(path)
|
||||
return paths
|
||||
|
||||
def judge_direction(self, relation: str) -> str:
|
||||
"""
|
||||
Args:
|
||||
relation: relation in string format
|
||||
"""
|
||||
direction = "BIDIRECTIONAL"
|
||||
if relation[0] == "<":
|
||||
direction = "INCOMING"
|
||||
if relation[-1] == ">":
|
||||
direction = "OUTGOING"
|
||||
return direction
|
||||
|
||||
def extract_node_variable(self, part: str) -> Optional[str]:
|
||||
"""
|
||||
Args:
|
||||
part: node in string format
|
||||
"""
|
||||
part = part.lstrip("(").rstrip(")")
|
||||
idx = part.find(":")
|
||||
if idx != -1:
|
||||
part = part[:idx]
|
||||
return None if part == "" else part
|
||||
|
||||
def detect_labels(
|
||||
self, str_node: str, node_variable_dict: Dict[str, Any]
|
||||
) -> List[str]:
|
||||
"""
|
||||
Args:
|
||||
str_node: node in string format
|
||||
node_variable_dict: dictionary of node variables
|
||||
"""
|
||||
splitted_node = str_node.split(":")
|
||||
variable = splitted_node[0]
|
||||
labels = []
|
||||
if variable in node_variable_dict:
|
||||
labels = node_variable_dict[variable]
|
||||
elif variable == "" and len(splitted_node) > 1:
|
||||
labels = splitted_node[1:]
|
||||
return labels
|
||||
|
||||
def verify_schema(
|
||||
self,
|
||||
from_node_labels: List[str],
|
||||
relation_types: List[str],
|
||||
to_node_labels: List[str],
|
||||
) -> bool:
|
||||
"""
|
||||
Args:
|
||||
from_node_labels: labels of the from node
|
||||
relation_type: type of the relation
|
||||
to_node_labels: labels of the to node
|
||||
"""
|
||||
valid_schemas = self.schemas
|
||||
if from_node_labels != []:
|
||||
from_node_labels = [label.strip("`") for label in from_node_labels]
|
||||
valid_schemas = [
|
||||
schema for schema in valid_schemas if schema[0] in from_node_labels
|
||||
]
|
||||
if to_node_labels != []:
|
||||
to_node_labels = [label.strip("`") for label in to_node_labels]
|
||||
valid_schemas = [
|
||||
schema for schema in valid_schemas if schema[2] in to_node_labels
|
||||
]
|
||||
if relation_types != []:
|
||||
relation_types = [type.strip("`") for type in relation_types]
|
||||
valid_schemas = [
|
||||
schema for schema in valid_schemas if schema[1] in relation_types
|
||||
]
|
||||
return valid_schemas != []
|
||||
|
||||
def detect_relation_types(self, str_relation: str) -> Tuple[str, List[str]]:
|
||||
"""
|
||||
Args:
|
||||
str_relation: relation in string format
|
||||
"""
|
||||
relation_direction = self.judge_direction(str_relation)
|
||||
relation_type = self.relation_type_pattern.search(str_relation)
|
||||
if relation_type is None or relation_type.group("relation_type") is None:
|
||||
return relation_direction, []
|
||||
relation_types = [
|
||||
t.strip().strip("!")
|
||||
for t in relation_type.group("relation_type").split("|")
|
||||
]
|
||||
return relation_direction, relation_types
|
||||
|
||||
def correct_query(self, query: str) -> str:
|
||||
"""
|
||||
Args:
|
||||
query: cypher query
|
||||
"""
|
||||
node_variable_dict = self.detect_node_variables(query)
|
||||
paths = self.extract_paths(query)
|
||||
for path in paths:
|
||||
original_path = path
|
||||
start_idx = 0
|
||||
while start_idx < len(path):
|
||||
match_res = re.match(self.node_relation_node_pattern, path[start_idx:])
|
||||
if match_res is None:
|
||||
break
|
||||
start_idx += match_res.start()
|
||||
match_dict = match_res.groupdict()
|
||||
left_node_labels = self.detect_labels(
|
||||
match_dict["left_node"], node_variable_dict
|
||||
)
|
||||
right_node_labels = self.detect_labels(
|
||||
match_dict["right_node"], node_variable_dict
|
||||
)
|
||||
end_idx = (
|
||||
start_idx
|
||||
+ 4
|
||||
+ len(match_dict["left_node"])
|
||||
+ len(match_dict["relation"])
|
||||
+ len(match_dict["right_node"])
|
||||
)
|
||||
original_partial_path = original_path[start_idx : end_idx + 1]
|
||||
relation_direction, relation_types = self.detect_relation_types(
|
||||
match_dict["relation"]
|
||||
)
|
||||
|
||||
if relation_types != [] and "".join(relation_types).find("*") != -1:
|
||||
start_idx += (
|
||||
len(match_dict["left_node"]) + len(match_dict["relation"]) + 2
|
||||
)
|
||||
continue
|
||||
|
||||
if relation_direction == "OUTGOING":
|
||||
is_legal = self.verify_schema(
|
||||
left_node_labels, relation_types, right_node_labels
|
||||
)
|
||||
if not is_legal:
|
||||
is_legal = self.verify_schema(
|
||||
right_node_labels, relation_types, left_node_labels
|
||||
)
|
||||
if is_legal:
|
||||
corrected_relation = "<" + match_dict["relation"][:-1]
|
||||
corrected_partial_path = original_partial_path.replace(
|
||||
match_dict["relation"], corrected_relation
|
||||
)
|
||||
query = query.replace(
|
||||
original_partial_path, corrected_partial_path
|
||||
)
|
||||
else:
|
||||
return ""
|
||||
elif relation_direction == "INCOMING":
|
||||
is_legal = self.verify_schema(
|
||||
right_node_labels, relation_types, left_node_labels
|
||||
)
|
||||
if not is_legal:
|
||||
is_legal = self.verify_schema(
|
||||
left_node_labels, relation_types, right_node_labels
|
||||
)
|
||||
if is_legal:
|
||||
corrected_relation = match_dict["relation"][1:] + ">"
|
||||
corrected_partial_path = original_partial_path.replace(
|
||||
match_dict["relation"], corrected_relation
|
||||
)
|
||||
query = query.replace(
|
||||
original_partial_path, corrected_partial_path
|
||||
)
|
||||
else:
|
||||
return ""
|
||||
else:
|
||||
is_legal = self.verify_schema(
|
||||
left_node_labels, relation_types, right_node_labels
|
||||
)
|
||||
is_legal |= self.verify_schema(
|
||||
right_node_labels, relation_types, left_node_labels
|
||||
)
|
||||
if not is_legal:
|
||||
return ""
|
||||
|
||||
start_idx += (
|
||||
len(match_dict["left_node"]) + len(match_dict["relation"]) + 2
|
||||
)
|
||||
return query
|
||||
|
||||
def __call__(self, query: str) -> str:
|
||||
"""Correct the query to make it valid. If
|
||||
Args:
|
||||
query: cypher query
|
||||
"""
|
||||
return self.correct_query(query)
|
||||
@@ -1,157 +0,0 @@
|
||||
"""Question answering over a graph."""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain_core.callbacks import CallbackManagerForChainRun
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.prompts import BasePromptTemplate
|
||||
from langchain_core.pydantic_v1 import Field
|
||||
|
||||
from langchain_community.chains.graph_qa.prompts import (
|
||||
CYPHER_GENERATION_PROMPT,
|
||||
CYPHER_QA_PROMPT,
|
||||
)
|
||||
from langchain_community.graphs import FalkorDBGraph
|
||||
|
||||
INTERMEDIATE_STEPS_KEY = "intermediate_steps"
|
||||
|
||||
|
||||
def extract_cypher(text: str) -> str:
|
||||
"""
|
||||
Extract Cypher code from a text.
|
||||
Args:
|
||||
text: Text to extract Cypher code from.
|
||||
|
||||
Returns:
|
||||
Cypher code extracted from the text.
|
||||
"""
|
||||
# The pattern to find Cypher code enclosed in triple backticks
|
||||
pattern = r"```(.*?)```"
|
||||
|
||||
# Find all matches in the input text
|
||||
matches = re.findall(pattern, text, re.DOTALL)
|
||||
|
||||
return matches[0] if matches else text
|
||||
|
||||
|
||||
class FalkorDBQAChain(Chain):
|
||||
"""Chain for question-answering against a graph by generating Cypher statements.
|
||||
|
||||
*Security note*: Make sure that the database connection uses credentials
|
||||
that are narrowly-scoped to only include necessary permissions.
|
||||
Failure to do so may result in data corruption or loss, since the calling
|
||||
code may attempt commands that would result in deletion, mutation
|
||||
of data if appropriately prompted or reading sensitive data if such
|
||||
data is present in the database.
|
||||
The best way to guard against such negative outcomes is to (as appropriate)
|
||||
limit the permissions granted to the credentials used with this tool.
|
||||
|
||||
See https://python.langchain.com/docs/security for more information.
|
||||
"""
|
||||
|
||||
graph: FalkorDBGraph = Field(exclude=True)
|
||||
cypher_generation_chain: LLMChain
|
||||
qa_chain: LLMChain
|
||||
input_key: str = "query" #: :meta private:
|
||||
output_key: str = "result" #: :meta private:
|
||||
top_k: int = 10
|
||||
"""Number of results to return from the query"""
|
||||
return_intermediate_steps: bool = False
|
||||
"""Whether or not to return the intermediate steps along with the final answer."""
|
||||
return_direct: bool = False
|
||||
"""Whether or not to return the result of querying the graph directly."""
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Return the input keys.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.input_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
"""Return the output keys.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
_output_keys = [self.output_key]
|
||||
return _output_keys
|
||||
|
||||
@property
|
||||
def _chain_type(self) -> str:
|
||||
return "graph_cypher_chain"
|
||||
|
||||
@classmethod
|
||||
def from_llm(
|
||||
cls,
|
||||
llm: BaseLanguageModel,
|
||||
*,
|
||||
qa_prompt: BasePromptTemplate = CYPHER_QA_PROMPT,
|
||||
cypher_prompt: BasePromptTemplate = CYPHER_GENERATION_PROMPT,
|
||||
**kwargs: Any,
|
||||
) -> FalkorDBQAChain:
|
||||
"""Initialize from LLM."""
|
||||
qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
|
||||
cypher_generation_chain = LLMChain(llm=llm, prompt=cypher_prompt)
|
||||
|
||||
return cls(
|
||||
qa_chain=qa_chain,
|
||||
cypher_generation_chain=cypher_generation_chain,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def _call(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate Cypher statement, use it to look up in db and answer question."""
|
||||
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
|
||||
callbacks = _run_manager.get_child()
|
||||
question = inputs[self.input_key]
|
||||
|
||||
intermediate_steps: List = []
|
||||
|
||||
generated_cypher = self.cypher_generation_chain.run(
|
||||
{"question": question, "schema": self.graph.schema}, callbacks=callbacks
|
||||
)
|
||||
|
||||
# Extract Cypher code if it is wrapped in backticks
|
||||
generated_cypher = extract_cypher(generated_cypher)
|
||||
|
||||
_run_manager.on_text("Generated Cypher:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
generated_cypher, color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
|
||||
intermediate_steps.append({"query": generated_cypher})
|
||||
|
||||
# Retrieve and limit the number of results
|
||||
context = self.graph.query(generated_cypher)[: self.top_k]
|
||||
|
||||
if self.return_direct:
|
||||
final_result = context
|
||||
else:
|
||||
_run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
str(context), color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
|
||||
intermediate_steps.append({"context": context})
|
||||
|
||||
result = self.qa_chain(
|
||||
{"question": question, "context": context},
|
||||
callbacks=callbacks,
|
||||
)
|
||||
final_result = result[self.qa_chain.output_key]
|
||||
|
||||
chain_result: Dict[str, Any] = {self.output_key: final_result}
|
||||
if self.return_intermediate_steps:
|
||||
chain_result[INTERMEDIATE_STEPS_KEY] = intermediate_steps
|
||||
|
||||
return chain_result
|
||||
@@ -1,221 +0,0 @@
|
||||
"""Question answering over a graph."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain_core.callbacks.manager import CallbackManager, CallbackManagerForChainRun
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.prompts import BasePromptTemplate
|
||||
from langchain_core.prompts.prompt import PromptTemplate
|
||||
from langchain_core.pydantic_v1 import Field
|
||||
|
||||
from langchain_community.chains.graph_qa.prompts import (
|
||||
CYPHER_QA_PROMPT,
|
||||
GRAPHDB_SPARQL_FIX_TEMPLATE,
|
||||
GREMLIN_GENERATION_PROMPT,
|
||||
)
|
||||
from langchain_community.graphs import GremlinGraph
|
||||
|
||||
INTERMEDIATE_STEPS_KEY = "intermediate_steps"
|
||||
|
||||
|
||||
def extract_gremlin(text: str) -> str:
|
||||
"""Extract Gremlin code from a text.
|
||||
|
||||
Args:
|
||||
text: Text to extract Gremlin code from.
|
||||
|
||||
Returns:
|
||||
Gremlin code extracted from the text.
|
||||
"""
|
||||
text = text.replace("`", "")
|
||||
if text.startswith("gremlin"):
|
||||
text = text[len("gremlin") :]
|
||||
return text.replace("\n", "")
|
||||
|
||||
|
||||
class GremlinQAChain(Chain):
|
||||
"""Chain for question-answering against a graph by generating gremlin statements.
|
||||
|
||||
*Security note*: Make sure that the database connection uses credentials
|
||||
that are narrowly-scoped to only include necessary permissions.
|
||||
Failure to do so may result in data corruption or loss, since the calling
|
||||
code may attempt commands that would result in deletion, mutation
|
||||
of data if appropriately prompted or reading sensitive data if such
|
||||
data is present in the database.
|
||||
The best way to guard against such negative outcomes is to (as appropriate)
|
||||
limit the permissions granted to the credentials used with this tool.
|
||||
|
||||
See https://python.langchain.com/docs/security for more information.
|
||||
"""
|
||||
|
||||
graph: GremlinGraph = Field(exclude=True)
|
||||
gremlin_generation_chain: LLMChain
|
||||
qa_chain: LLMChain
|
||||
gremlin_fix_chain: LLMChain
|
||||
max_fix_retries: int = 3
|
||||
input_key: str = "query" #: :meta private:
|
||||
output_key: str = "result" #: :meta private:
|
||||
top_k: int = 100
|
||||
return_direct: bool = False
|
||||
return_intermediate_steps: bool = False
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Input keys.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.input_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
"""Output keys.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
_output_keys = [self.output_key]
|
||||
return _output_keys
|
||||
|
||||
@classmethod
|
||||
def from_llm(
|
||||
cls,
|
||||
llm: BaseLanguageModel,
|
||||
*,
|
||||
gremlin_fix_prompt: BasePromptTemplate = PromptTemplate(
|
||||
input_variables=["error_message", "generated_sparql", "schema"],
|
||||
template=GRAPHDB_SPARQL_FIX_TEMPLATE.replace("SPARQL", "Gremlin").replace(
|
||||
"in Turtle format", ""
|
||||
),
|
||||
),
|
||||
qa_prompt: BasePromptTemplate = CYPHER_QA_PROMPT,
|
||||
gremlin_prompt: BasePromptTemplate = GREMLIN_GENERATION_PROMPT,
|
||||
**kwargs: Any,
|
||||
) -> GremlinQAChain:
|
||||
"""Initialize from LLM."""
|
||||
qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
|
||||
gremlin_generation_chain = LLMChain(llm=llm, prompt=gremlin_prompt)
|
||||
gremlinl_fix_chain = LLMChain(llm=llm, prompt=gremlin_fix_prompt)
|
||||
return cls(
|
||||
qa_chain=qa_chain,
|
||||
gremlin_generation_chain=gremlin_generation_chain,
|
||||
gremlin_fix_chain=gremlinl_fix_chain,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def _call(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, str]:
|
||||
"""Generate gremlin statement, use it to look up in db and answer question."""
|
||||
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
|
||||
callbacks = _run_manager.get_child()
|
||||
question = inputs[self.input_key]
|
||||
|
||||
intermediate_steps: List = []
|
||||
|
||||
chain_response = self.gremlin_generation_chain.invoke(
|
||||
{"question": question, "schema": self.graph.get_schema}, callbacks=callbacks
|
||||
)
|
||||
|
||||
generated_gremlin = extract_gremlin(
|
||||
chain_response[self.gremlin_generation_chain.output_key]
|
||||
)
|
||||
|
||||
_run_manager.on_text("Generated gremlin:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
generated_gremlin, color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
|
||||
intermediate_steps.append({"query": generated_gremlin})
|
||||
|
||||
if generated_gremlin:
|
||||
context = self.execute_with_retry(
|
||||
_run_manager, callbacks, generated_gremlin
|
||||
)[: self.top_k]
|
||||
else:
|
||||
context = []
|
||||
|
||||
if self.return_direct:
|
||||
final_result = context
|
||||
else:
|
||||
_run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
str(context), color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
|
||||
intermediate_steps.append({"context": context})
|
||||
|
||||
result = self.qa_chain.invoke(
|
||||
{"question": question, "context": context},
|
||||
callbacks=callbacks,
|
||||
)
|
||||
final_result = result[self.qa_chain.output_key]
|
||||
|
||||
chain_result: Dict[str, Any] = {self.output_key: final_result}
|
||||
if self.return_intermediate_steps:
|
||||
chain_result[INTERMEDIATE_STEPS_KEY] = intermediate_steps
|
||||
|
||||
return chain_result
|
||||
|
||||
def execute_query(self, query: str) -> List[Any]:
|
||||
try:
|
||||
return self.graph.query(query)
|
||||
except Exception as e:
|
||||
if hasattr(e, "status_message"):
|
||||
raise ValueError(e.status_message)
|
||||
else:
|
||||
raise ValueError(str(e))
|
||||
|
||||
def execute_with_retry(
|
||||
self,
|
||||
_run_manager: CallbackManagerForChainRun,
|
||||
callbacks: CallbackManager,
|
||||
generated_gremlin: str,
|
||||
) -> List[Any]:
|
||||
try:
|
||||
return self.execute_query(generated_gremlin)
|
||||
except Exception as e:
|
||||
retries = 0
|
||||
error_message = str(e)
|
||||
self.log_invalid_query(_run_manager, generated_gremlin, error_message)
|
||||
|
||||
while retries < self.max_fix_retries:
|
||||
try:
|
||||
fix_chain_result = self.gremlin_fix_chain.invoke(
|
||||
{
|
||||
"error_message": error_message,
|
||||
# we are borrowing template from sparql
|
||||
"generated_sparql": generated_gremlin,
|
||||
"schema": self.schema,
|
||||
},
|
||||
callbacks=callbacks,
|
||||
)
|
||||
fixed_gremlin = fix_chain_result[self.gremlin_fix_chain.output_key]
|
||||
return self.execute_query(fixed_gremlin)
|
||||
except Exception as e:
|
||||
retries += 1
|
||||
parse_exception = str(e)
|
||||
self.log_invalid_query(_run_manager, fixed_gremlin, parse_exception)
|
||||
|
||||
raise ValueError("The generated Gremlin query is invalid.")
|
||||
|
||||
def log_invalid_query(
|
||||
self,
|
||||
_run_manager: CallbackManagerForChainRun,
|
||||
generated_query: str,
|
||||
error_message: str,
|
||||
) -> None:
|
||||
_run_manager.on_text("Invalid Gremlin query: ", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
generated_query, color="red", end="\n", verbose=self.verbose
|
||||
)
|
||||
_run_manager.on_text(
|
||||
"Gremlin Query Parse Error: ", end="\n", verbose=self.verbose
|
||||
)
|
||||
_run_manager.on_text(
|
||||
error_message, color="red", end="\n\n", verbose=self.verbose
|
||||
)
|
||||
@@ -1,106 +0,0 @@
|
||||
"""Question answering over a graph."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain_core.callbacks import CallbackManagerForChainRun
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.prompts import BasePromptTemplate
|
||||
from langchain_core.pydantic_v1 import Field
|
||||
|
||||
from langchain_community.chains.graph_qa.prompts import (
|
||||
CYPHER_QA_PROMPT,
|
||||
GREMLIN_GENERATION_PROMPT,
|
||||
)
|
||||
from langchain_community.graphs.hugegraph import HugeGraph
|
||||
|
||||
|
||||
class HugeGraphQAChain(Chain):
|
||||
"""Chain for question-answering against a graph by generating gremlin statements.
|
||||
|
||||
*Security note*: Make sure that the database connection uses credentials
|
||||
that are narrowly-scoped to only include necessary permissions.
|
||||
Failure to do so may result in data corruption or loss, since the calling
|
||||
code may attempt commands that would result in deletion, mutation
|
||||
of data if appropriately prompted or reading sensitive data if such
|
||||
data is present in the database.
|
||||
The best way to guard against such negative outcomes is to (as appropriate)
|
||||
limit the permissions granted to the credentials used with this tool.
|
||||
|
||||
See https://python.langchain.com/docs/security for more information.
|
||||
"""
|
||||
|
||||
graph: HugeGraph = Field(exclude=True)
|
||||
gremlin_generation_chain: LLMChain
|
||||
qa_chain: LLMChain
|
||||
input_key: str = "query" #: :meta private:
|
||||
output_key: str = "result" #: :meta private:
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Input keys.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.input_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
"""Output keys.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
_output_keys = [self.output_key]
|
||||
return _output_keys
|
||||
|
||||
@classmethod
|
||||
def from_llm(
|
||||
cls,
|
||||
llm: BaseLanguageModel,
|
||||
*,
|
||||
qa_prompt: BasePromptTemplate = CYPHER_QA_PROMPT,
|
||||
gremlin_prompt: BasePromptTemplate = GREMLIN_GENERATION_PROMPT,
|
||||
**kwargs: Any,
|
||||
) -> HugeGraphQAChain:
|
||||
"""Initialize from LLM."""
|
||||
qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
|
||||
gremlin_generation_chain = LLMChain(llm=llm, prompt=gremlin_prompt)
|
||||
|
||||
return cls(
|
||||
qa_chain=qa_chain,
|
||||
gremlin_generation_chain=gremlin_generation_chain,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def _call(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, str]:
|
||||
"""Generate gremlin statement, use it to look up in db and answer question."""
|
||||
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
|
||||
callbacks = _run_manager.get_child()
|
||||
question = inputs[self.input_key]
|
||||
|
||||
generated_gremlin = self.gremlin_generation_chain.run(
|
||||
{"question": question, "schema": self.graph.get_schema}, callbacks=callbacks
|
||||
)
|
||||
|
||||
_run_manager.on_text("Generated gremlin:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
generated_gremlin, color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
context = self.graph.query(generated_gremlin)
|
||||
|
||||
_run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
str(context), color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
|
||||
result = self.qa_chain(
|
||||
{"question": question, "context": context},
|
||||
callbacks=callbacks,
|
||||
)
|
||||
return {self.output_key: result[self.qa_chain.output_key]}
|
||||
@@ -1,143 +0,0 @@
|
||||
"""Question answering over a graph."""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain_core.callbacks import CallbackManagerForChainRun
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.prompts import BasePromptTemplate
|
||||
from langchain_core.pydantic_v1 import Field
|
||||
|
||||
from langchain_community.chains.graph_qa.prompts import (
|
||||
CYPHER_QA_PROMPT,
|
||||
KUZU_GENERATION_PROMPT,
|
||||
)
|
||||
from langchain_community.graphs.kuzu_graph import KuzuGraph
|
||||
|
||||
|
||||
def remove_prefix(text: str, prefix: str) -> str:
|
||||
"""Remove a prefix from a text.
|
||||
|
||||
Args:
|
||||
text: Text to remove the prefix from.
|
||||
prefix: Prefix to remove from the text.
|
||||
|
||||
Returns:
|
||||
Text with the prefix removed.
|
||||
"""
|
||||
if text.startswith(prefix):
|
||||
return text[len(prefix) :]
|
||||
return text
|
||||
|
||||
|
||||
def extract_cypher(text: str) -> str:
|
||||
"""Extract Cypher code from a text.
|
||||
|
||||
Args:
|
||||
text: Text to extract Cypher code from.
|
||||
|
||||
Returns:
|
||||
Cypher code extracted from the text.
|
||||
"""
|
||||
# The pattern to find Cypher code enclosed in triple backticks
|
||||
pattern = r"```(.*?)```"
|
||||
|
||||
# Find all matches in the input text
|
||||
matches = re.findall(pattern, text, re.DOTALL)
|
||||
|
||||
return matches[0] if matches else text
|
||||
|
||||
|
||||
class KuzuQAChain(Chain):
|
||||
"""Question-answering against a graph by generating Cypher statements for Kùzu.
|
||||
|
||||
*Security note*: Make sure that the database connection uses credentials
|
||||
that are narrowly-scoped to only include necessary permissions.
|
||||
Failure to do so may result in data corruption or loss, since the calling
|
||||
code may attempt commands that would result in deletion, mutation
|
||||
of data if appropriately prompted or reading sensitive data if such
|
||||
data is present in the database.
|
||||
The best way to guard against such negative outcomes is to (as appropriate)
|
||||
limit the permissions granted to the credentials used with this tool.
|
||||
|
||||
See https://python.langchain.com/docs/security for more information.
|
||||
"""
|
||||
|
||||
graph: KuzuGraph = Field(exclude=True)
|
||||
cypher_generation_chain: LLMChain
|
||||
qa_chain: LLMChain
|
||||
input_key: str = "query" #: :meta private:
|
||||
output_key: str = "result" #: :meta private:
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Return the input keys.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.input_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
"""Return the output keys.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
_output_keys = [self.output_key]
|
||||
return _output_keys
|
||||
|
||||
@classmethod
|
||||
def from_llm(
|
||||
cls,
|
||||
llm: BaseLanguageModel,
|
||||
*,
|
||||
qa_prompt: BasePromptTemplate = CYPHER_QA_PROMPT,
|
||||
cypher_prompt: BasePromptTemplate = KUZU_GENERATION_PROMPT,
|
||||
**kwargs: Any,
|
||||
) -> KuzuQAChain:
|
||||
"""Initialize from LLM."""
|
||||
qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
|
||||
cypher_generation_chain = LLMChain(llm=llm, prompt=cypher_prompt)
|
||||
|
||||
return cls(
|
||||
qa_chain=qa_chain,
|
||||
cypher_generation_chain=cypher_generation_chain,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def _call(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, str]:
|
||||
"""Generate Cypher statement, use it to look up in db and answer question."""
|
||||
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
|
||||
callbacks = _run_manager.get_child()
|
||||
question = inputs[self.input_key]
|
||||
|
||||
generated_cypher = self.cypher_generation_chain.run(
|
||||
{"question": question, "schema": self.graph.get_schema}, callbacks=callbacks
|
||||
)
|
||||
# Extract Cypher code if it is wrapped in triple backticks
|
||||
# with the language marker "cypher"
|
||||
generated_cypher = remove_prefix(extract_cypher(generated_cypher), "cypher")
|
||||
|
||||
_run_manager.on_text("Generated Cypher:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
generated_cypher, color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
context = self.graph.query(generated_cypher)
|
||||
|
||||
_run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
str(context), color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
|
||||
result = self.qa_chain(
|
||||
{"question": question, "context": context},
|
||||
callbacks=callbacks,
|
||||
)
|
||||
return {self.output_key: result[self.qa_chain.output_key]}
|
||||
@@ -1,106 +0,0 @@
|
||||
"""Question answering over a graph."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain_core.callbacks import CallbackManagerForChainRun
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.prompts import BasePromptTemplate
|
||||
from langchain_core.pydantic_v1 import Field
|
||||
|
||||
from langchain_community.chains.graph_qa.prompts import (
|
||||
CYPHER_QA_PROMPT,
|
||||
NGQL_GENERATION_PROMPT,
|
||||
)
|
||||
from langchain_community.graphs.nebula_graph import NebulaGraph
|
||||
|
||||
|
||||
class NebulaGraphQAChain(Chain):
|
||||
"""Chain for question-answering against a graph by generating nGQL statements.
|
||||
|
||||
*Security note*: Make sure that the database connection uses credentials
|
||||
that are narrowly-scoped to only include necessary permissions.
|
||||
Failure to do so may result in data corruption or loss, since the calling
|
||||
code may attempt commands that would result in deletion, mutation
|
||||
of data if appropriately prompted or reading sensitive data if such
|
||||
data is present in the database.
|
||||
The best way to guard against such negative outcomes is to (as appropriate)
|
||||
limit the permissions granted to the credentials used with this tool.
|
||||
|
||||
See https://python.langchain.com/docs/security for more information.
|
||||
"""
|
||||
|
||||
graph: NebulaGraph = Field(exclude=True)
|
||||
ngql_generation_chain: LLMChain
|
||||
qa_chain: LLMChain
|
||||
input_key: str = "query" #: :meta private:
|
||||
output_key: str = "result" #: :meta private:
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Return the input keys.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.input_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
"""Return the output keys.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
_output_keys = [self.output_key]
|
||||
return _output_keys
|
||||
|
||||
@classmethod
|
||||
def from_llm(
|
||||
cls,
|
||||
llm: BaseLanguageModel,
|
||||
*,
|
||||
qa_prompt: BasePromptTemplate = CYPHER_QA_PROMPT,
|
||||
ngql_prompt: BasePromptTemplate = NGQL_GENERATION_PROMPT,
|
||||
**kwargs: Any,
|
||||
) -> NebulaGraphQAChain:
|
||||
"""Initialize from LLM."""
|
||||
qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
|
||||
ngql_generation_chain = LLMChain(llm=llm, prompt=ngql_prompt)
|
||||
|
||||
return cls(
|
||||
qa_chain=qa_chain,
|
||||
ngql_generation_chain=ngql_generation_chain,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def _call(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, str]:
|
||||
"""Generate nGQL statement, use it to look up in db and answer question."""
|
||||
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
|
||||
callbacks = _run_manager.get_child()
|
||||
question = inputs[self.input_key]
|
||||
|
||||
generated_ngql = self.ngql_generation_chain.run(
|
||||
{"question": question, "schema": self.graph.get_schema}, callbacks=callbacks
|
||||
)
|
||||
|
||||
_run_manager.on_text("Generated nGQL:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
generated_ngql, color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
context = self.graph.query(generated_ngql)
|
||||
|
||||
_run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
str(context), color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
|
||||
result = self.qa_chain(
|
||||
{"question": question, "context": context},
|
||||
callbacks=callbacks,
|
||||
)
|
||||
return {self.output_key: result[self.qa_chain.output_key]}
|
||||
@@ -1,217 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.chains.prompt_selector import ConditionalPromptSelector
|
||||
from langchain_core.callbacks import CallbackManagerForChainRun
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.prompts.base import BasePromptTemplate
|
||||
from langchain_core.pydantic_v1 import Field
|
||||
|
||||
from langchain_community.chains.graph_qa.prompts import (
|
||||
CYPHER_QA_PROMPT,
|
||||
NEPTUNE_OPENCYPHER_GENERATION_PROMPT,
|
||||
NEPTUNE_OPENCYPHER_GENERATION_SIMPLE_PROMPT,
|
||||
)
|
||||
from langchain_community.graphs import BaseNeptuneGraph
|
||||
|
||||
INTERMEDIATE_STEPS_KEY = "intermediate_steps"
|
||||
|
||||
|
||||
def trim_query(query: str) -> str:
|
||||
"""Trim the query to only include Cypher keywords."""
|
||||
keywords = (
|
||||
"CALL",
|
||||
"CREATE",
|
||||
"DELETE",
|
||||
"DETACH",
|
||||
"LIMIT",
|
||||
"MATCH",
|
||||
"MERGE",
|
||||
"OPTIONAL",
|
||||
"ORDER",
|
||||
"REMOVE",
|
||||
"RETURN",
|
||||
"SET",
|
||||
"SKIP",
|
||||
"UNWIND",
|
||||
"WITH",
|
||||
"WHERE",
|
||||
"//",
|
||||
)
|
||||
|
||||
lines = query.split("\n")
|
||||
new_query = ""
|
||||
|
||||
for line in lines:
|
||||
if line.strip().upper().startswith(keywords):
|
||||
new_query += line + "\n"
|
||||
|
||||
return new_query
|
||||
|
||||
|
||||
def extract_cypher(text: str) -> str:
|
||||
"""Extract Cypher code from text using Regex."""
|
||||
# The pattern to find Cypher code enclosed in triple backticks
|
||||
pattern = r"```(.*?)```"
|
||||
|
||||
# Find all matches in the input text
|
||||
matches = re.findall(pattern, text, re.DOTALL)
|
||||
|
||||
return matches[0] if matches else text
|
||||
|
||||
|
||||
def use_simple_prompt(llm: BaseLanguageModel) -> bool:
|
||||
"""Decides whether to use the simple prompt"""
|
||||
if llm._llm_type and "anthropic" in llm._llm_type: # type: ignore
|
||||
return True
|
||||
|
||||
# Bedrock anthropic
|
||||
if hasattr(llm, "model_id") and "anthropic" in llm.model_id: # type: ignore
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
PROMPT_SELECTOR = ConditionalPromptSelector(
|
||||
default_prompt=NEPTUNE_OPENCYPHER_GENERATION_PROMPT,
|
||||
conditionals=[(use_simple_prompt, NEPTUNE_OPENCYPHER_GENERATION_SIMPLE_PROMPT)],
|
||||
)
|
||||
|
||||
|
||||
class NeptuneOpenCypherQAChain(Chain):
|
||||
"""Chain for question-answering against a Neptune graph
|
||||
by generating openCypher statements.
|
||||
|
||||
*Security note*: Make sure that the database connection uses credentials
|
||||
that are narrowly-scoped to only include necessary permissions.
|
||||
Failure to do so may result in data corruption or loss, since the calling
|
||||
code may attempt commands that would result in deletion, mutation
|
||||
of data if appropriately prompted or reading sensitive data if such
|
||||
data is present in the database.
|
||||
The best way to guard against such negative outcomes is to (as appropriate)
|
||||
limit the permissions granted to the credentials used with this tool.
|
||||
|
||||
See https://python.langchain.com/docs/security for more information.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
chain = NeptuneOpenCypherQAChain.from_llm(
|
||||
llm=llm,
|
||||
graph=graph
|
||||
)
|
||||
response = chain.run(query)
|
||||
"""
|
||||
|
||||
graph: BaseNeptuneGraph = Field(exclude=True)
|
||||
cypher_generation_chain: LLMChain
|
||||
qa_chain: LLMChain
|
||||
input_key: str = "query" #: :meta private:
|
||||
output_key: str = "result" #: :meta private:
|
||||
top_k: int = 10
|
||||
return_intermediate_steps: bool = False
|
||||
"""Whether or not to return the intermediate steps along with the final answer."""
|
||||
return_direct: bool = False
|
||||
"""Whether or not to return the result of querying the graph directly."""
|
||||
extra_instructions: Optional[str] = None
|
||||
"""Extra instructions by the appended to the query generation prompt."""
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Return the input keys.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.input_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
"""Return the output keys.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
_output_keys = [self.output_key]
|
||||
return _output_keys
|
||||
|
||||
@classmethod
|
||||
def from_llm(
|
||||
cls,
|
||||
llm: BaseLanguageModel,
|
||||
*,
|
||||
qa_prompt: BasePromptTemplate = CYPHER_QA_PROMPT,
|
||||
cypher_prompt: Optional[BasePromptTemplate] = None,
|
||||
extra_instructions: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> NeptuneOpenCypherQAChain:
|
||||
"""Initialize from LLM."""
|
||||
qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
|
||||
|
||||
_cypher_prompt = cypher_prompt or PROMPT_SELECTOR.get_prompt(llm)
|
||||
cypher_generation_chain = LLMChain(llm=llm, prompt=_cypher_prompt)
|
||||
|
||||
return cls(
|
||||
qa_chain=qa_chain,
|
||||
cypher_generation_chain=cypher_generation_chain,
|
||||
extra_instructions=extra_instructions,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def _call(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate Cypher statement, use it to look up in db and answer question."""
|
||||
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
|
||||
callbacks = _run_manager.get_child()
|
||||
question = inputs[self.input_key]
|
||||
|
||||
intermediate_steps: List = []
|
||||
|
||||
generated_cypher = self.cypher_generation_chain.run(
|
||||
{
|
||||
"question": question,
|
||||
"schema": self.graph.get_schema,
|
||||
"extra_instructions": self.extra_instructions or "",
|
||||
},
|
||||
callbacks=callbacks,
|
||||
)
|
||||
|
||||
# Extract Cypher code if it is wrapped in backticks
|
||||
generated_cypher = extract_cypher(generated_cypher)
|
||||
generated_cypher = trim_query(generated_cypher)
|
||||
|
||||
_run_manager.on_text("Generated Cypher:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
generated_cypher, color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
|
||||
intermediate_steps.append({"query": generated_cypher})
|
||||
|
||||
context = self.graph.query(generated_cypher)
|
||||
|
||||
if self.return_direct:
|
||||
final_result = context
|
||||
else:
|
||||
_run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
str(context), color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
|
||||
intermediate_steps.append({"context": context})
|
||||
|
||||
result = self.qa_chain(
|
||||
{"question": question, "context": context},
|
||||
callbacks=callbacks,
|
||||
)
|
||||
final_result = result[self.qa_chain.output_key]
|
||||
|
||||
chain_result: Dict[str, Any] = {self.output_key: final_result}
|
||||
if self.return_intermediate_steps:
|
||||
chain_result[INTERMEDIATE_STEPS_KEY] = intermediate_steps
|
||||
|
||||
return chain_result
|
||||
@@ -1,204 +0,0 @@
|
||||
"""
|
||||
Question answering over an RDF or OWL graph using SPARQL.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain_core.callbacks.manager import CallbackManagerForChainRun
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.prompts.base import BasePromptTemplate
|
||||
from langchain_core.prompts.prompt import PromptTemplate
|
||||
from langchain_core.pydantic_v1 import Field
|
||||
|
||||
from langchain_community.chains.graph_qa.prompts import SPARQL_QA_PROMPT
|
||||
from langchain_community.graphs import NeptuneRdfGraph
|
||||
|
||||
INTERMEDIATE_STEPS_KEY = "intermediate_steps"
|
||||
|
||||
SPARQL_GENERATION_TEMPLATE = """
|
||||
Task: Generate a SPARQL SELECT statement for querying a graph database.
|
||||
For instance, to find all email addresses of John Doe, the following
|
||||
query in backticks would be suitable:
|
||||
```
|
||||
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
|
||||
SELECT ?email
|
||||
WHERE {{
|
||||
?person foaf:name "John Doe" .
|
||||
?person foaf:mbox ?email .
|
||||
}}
|
||||
```
|
||||
Instructions:
|
||||
Use only the node types and properties provided in the schema.
|
||||
Do not use any node types and properties that are not explicitly provided.
|
||||
Include all necessary prefixes.
|
||||
|
||||
Examples:
|
||||
|
||||
Schema:
|
||||
{schema}
|
||||
Note: Be as concise as possible.
|
||||
Do not include any explanations or apologies in your responses.
|
||||
Do not respond to any questions that ask for anything else than
|
||||
for you to construct a SPARQL query.
|
||||
Do not include any text except the SPARQL query generated.
|
||||
|
||||
The question is:
|
||||
{prompt}"""
|
||||
|
||||
SPARQL_GENERATION_PROMPT = PromptTemplate(
|
||||
input_variables=["schema", "prompt"], template=SPARQL_GENERATION_TEMPLATE
|
||||
)
|
||||
|
||||
|
||||
def extract_sparql(query: str) -> str:
|
||||
"""Extract SPARQL code from a text.
|
||||
|
||||
Args:
|
||||
query: Text to extract SPARQL code from.
|
||||
|
||||
Returns:
|
||||
SPARQL code extracted from the text.
|
||||
"""
|
||||
query = query.strip()
|
||||
querytoks = query.split("```")
|
||||
if len(querytoks) == 3:
|
||||
query = querytoks[1]
|
||||
|
||||
if query.startswith("sparql"):
|
||||
query = query[6:]
|
||||
elif query.startswith("<sparql>") and query.endswith("</sparql>"):
|
||||
query = query[8:-9]
|
||||
return query
|
||||
|
||||
|
||||
class NeptuneSparqlQAChain(Chain):
|
||||
"""Chain for question-answering against a Neptune graph
|
||||
by generating SPARQL statements.
|
||||
|
||||
*Security note*: Make sure that the database connection uses credentials
|
||||
that are narrowly-scoped to only include necessary permissions.
|
||||
Failure to do so may result in data corruption or loss, since the calling
|
||||
code may attempt commands that would result in deletion, mutation
|
||||
of data if appropriately prompted or reading sensitive data if such
|
||||
data is present in the database.
|
||||
The best way to guard against such negative outcomes is to (as appropriate)
|
||||
limit the permissions granted to the credentials used with this tool.
|
||||
|
||||
See https://python.langchain.com/docs/security for more information.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
chain = NeptuneSparqlQAChain.from_llm(
|
||||
llm=llm,
|
||||
graph=graph
|
||||
)
|
||||
response = chain.invoke(query)
|
||||
"""
|
||||
|
||||
graph: NeptuneRdfGraph = Field(exclude=True)
|
||||
sparql_generation_chain: LLMChain
|
||||
qa_chain: LLMChain
|
||||
input_key: str = "query" #: :meta private:
|
||||
output_key: str = "result" #: :meta private:
|
||||
top_k: int = 10
|
||||
return_intermediate_steps: bool = False
|
||||
"""Whether or not to return the intermediate steps along with the final answer."""
|
||||
return_direct: bool = False
|
||||
"""Whether or not to return the result of querying the graph directly."""
|
||||
extra_instructions: Optional[str] = None
|
||||
"""Extra instructions by the appended to the query generation prompt."""
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
return [self.input_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
_output_keys = [self.output_key]
|
||||
return _output_keys
|
||||
|
||||
@classmethod
|
||||
def from_llm(
|
||||
cls,
|
||||
llm: BaseLanguageModel,
|
||||
*,
|
||||
qa_prompt: BasePromptTemplate = SPARQL_QA_PROMPT,
|
||||
sparql_prompt: BasePromptTemplate = SPARQL_GENERATION_PROMPT,
|
||||
examples: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> NeptuneSparqlQAChain:
|
||||
"""Initialize from LLM."""
|
||||
qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
|
||||
template_to_use = SPARQL_GENERATION_TEMPLATE
|
||||
if examples:
|
||||
template_to_use = template_to_use.replace(
|
||||
"Examples:", "Examples: " + examples
|
||||
)
|
||||
sparql_prompt = PromptTemplate(
|
||||
input_variables=["schema", "prompt"], template=template_to_use
|
||||
)
|
||||
sparql_generation_chain = LLMChain(llm=llm, prompt=sparql_prompt)
|
||||
|
||||
return cls( # type: ignore[call-arg]
|
||||
qa_chain=qa_chain,
|
||||
sparql_generation_chain=sparql_generation_chain,
|
||||
examples=examples,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def _call(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, str]:
|
||||
"""
|
||||
Generate SPARQL query, use it to retrieve a response from the gdb and answer
|
||||
the question.
|
||||
"""
|
||||
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
|
||||
callbacks = _run_manager.get_child()
|
||||
prompt = inputs[self.input_key]
|
||||
|
||||
intermediate_steps: List = []
|
||||
|
||||
generated_sparql = self.sparql_generation_chain.run(
|
||||
{"prompt": prompt, "schema": self.graph.get_schema}, callbacks=callbacks
|
||||
)
|
||||
|
||||
# Extract SPARQL
|
||||
generated_sparql = extract_sparql(generated_sparql)
|
||||
|
||||
_run_manager.on_text("Generated SPARQL:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
generated_sparql, color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
|
||||
intermediate_steps.append({"query": generated_sparql})
|
||||
|
||||
context = self.graph.query(generated_sparql)
|
||||
|
||||
if self.return_direct:
|
||||
final_result = context
|
||||
else:
|
||||
_run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
str(context), color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
|
||||
intermediate_steps.append({"context": context})
|
||||
|
||||
result = self.qa_chain(
|
||||
{"prompt": prompt, "context": context},
|
||||
callbacks=callbacks,
|
||||
)
|
||||
final_result = result[self.qa_chain.output_key]
|
||||
|
||||
chain_result: Dict[str, Any] = {self.output_key: final_result}
|
||||
if self.return_intermediate_steps:
|
||||
chain_result[INTERMEDIATE_STEPS_KEY] = intermediate_steps
|
||||
|
||||
return chain_result
|
||||
@@ -1,190 +0,0 @@
|
||||
"""Question answering over a graph."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import rdflib
|
||||
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain_core.callbacks.manager import CallbackManager, CallbackManagerForChainRun
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.prompts.base import BasePromptTemplate
|
||||
from langchain_core.pydantic_v1 import Field
|
||||
|
||||
from langchain_community.chains.graph_qa.prompts import (
|
||||
GRAPHDB_QA_PROMPT,
|
||||
GRAPHDB_SPARQL_FIX_PROMPT,
|
||||
GRAPHDB_SPARQL_GENERATION_PROMPT,
|
||||
)
|
||||
from langchain_community.graphs import OntotextGraphDBGraph
|
||||
|
||||
|
||||
class OntotextGraphDBQAChain(Chain):
|
||||
"""Question-answering against Ontotext GraphDB
|
||||
https://graphdb.ontotext.com/ by generating SPARQL queries.
|
||||
|
||||
*Security note*: Make sure that the database connection uses credentials
|
||||
that are narrowly-scoped to only include necessary permissions.
|
||||
Failure to do so may result in data corruption or loss, since the calling
|
||||
code may attempt commands that would result in deletion, mutation
|
||||
of data if appropriately prompted or reading sensitive data if such
|
||||
data is present in the database.
|
||||
The best way to guard against such negative outcomes is to (as appropriate)
|
||||
limit the permissions granted to the credentials used with this tool.
|
||||
|
||||
See https://python.langchain.com/docs/security for more information.
|
||||
"""
|
||||
|
||||
graph: OntotextGraphDBGraph = Field(exclude=True)
|
||||
sparql_generation_chain: LLMChain
|
||||
sparql_fix_chain: LLMChain
|
||||
max_fix_retries: int
|
||||
qa_chain: LLMChain
|
||||
input_key: str = "query" #: :meta private:
|
||||
output_key: str = "result" #: :meta private:
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
return [self.input_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
_output_keys = [self.output_key]
|
||||
return _output_keys
|
||||
|
||||
@classmethod
|
||||
def from_llm(
|
||||
cls,
|
||||
llm: BaseLanguageModel,
|
||||
*,
|
||||
sparql_generation_prompt: BasePromptTemplate = GRAPHDB_SPARQL_GENERATION_PROMPT,
|
||||
sparql_fix_prompt: BasePromptTemplate = GRAPHDB_SPARQL_FIX_PROMPT,
|
||||
max_fix_retries: int = 5,
|
||||
qa_prompt: BasePromptTemplate = GRAPHDB_QA_PROMPT,
|
||||
**kwargs: Any,
|
||||
) -> OntotextGraphDBQAChain:
|
||||
"""Initialize from LLM."""
|
||||
sparql_generation_chain = LLMChain(llm=llm, prompt=sparql_generation_prompt)
|
||||
sparql_fix_chain = LLMChain(llm=llm, prompt=sparql_fix_prompt)
|
||||
max_fix_retries = max_fix_retries
|
||||
qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
|
||||
return cls(
|
||||
qa_chain=qa_chain,
|
||||
sparql_generation_chain=sparql_generation_chain,
|
||||
sparql_fix_chain=sparql_fix_chain,
|
||||
max_fix_retries=max_fix_retries,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def _call(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, str]:
|
||||
"""
|
||||
Generate a SPARQL query, use it to retrieve a response from GraphDB and answer
|
||||
the question.
|
||||
"""
|
||||
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
|
||||
callbacks = _run_manager.get_child()
|
||||
prompt = inputs[self.input_key]
|
||||
ontology_schema = self.graph.get_schema
|
||||
|
||||
sparql_generation_chain_result = self.sparql_generation_chain.invoke(
|
||||
{"prompt": prompt, "schema": ontology_schema}, callbacks=callbacks
|
||||
)
|
||||
generated_sparql = sparql_generation_chain_result[
|
||||
self.sparql_generation_chain.output_key
|
||||
]
|
||||
|
||||
generated_sparql = self._get_prepared_sparql_query(
|
||||
_run_manager, callbacks, generated_sparql, ontology_schema
|
||||
)
|
||||
query_results = self._execute_query(generated_sparql)
|
||||
|
||||
qa_chain_result = self.qa_chain.invoke(
|
||||
{"prompt": prompt, "context": query_results}, callbacks=callbacks
|
||||
)
|
||||
result = qa_chain_result[self.qa_chain.output_key]
|
||||
return {self.output_key: result}
|
||||
|
||||
def _get_prepared_sparql_query(
|
||||
self,
|
||||
_run_manager: CallbackManagerForChainRun,
|
||||
callbacks: CallbackManager,
|
||||
generated_sparql: str,
|
||||
ontology_schema: str,
|
||||
) -> str:
|
||||
try:
|
||||
return self._prepare_sparql_query(_run_manager, generated_sparql)
|
||||
except Exception as e:
|
||||
retries = 0
|
||||
error_message = str(e)
|
||||
self._log_invalid_sparql_query(
|
||||
_run_manager, generated_sparql, error_message
|
||||
)
|
||||
|
||||
while retries < self.max_fix_retries:
|
||||
try:
|
||||
sparql_fix_chain_result = self.sparql_fix_chain.invoke(
|
||||
{
|
||||
"error_message": error_message,
|
||||
"generated_sparql": generated_sparql,
|
||||
"schema": ontology_schema,
|
||||
},
|
||||
callbacks=callbacks,
|
||||
)
|
||||
generated_sparql = sparql_fix_chain_result[
|
||||
self.sparql_fix_chain.output_key
|
||||
]
|
||||
return self._prepare_sparql_query(_run_manager, generated_sparql)
|
||||
except Exception as e:
|
||||
retries += 1
|
||||
parse_exception = str(e)
|
||||
self._log_invalid_sparql_query(
|
||||
_run_manager, generated_sparql, parse_exception
|
||||
)
|
||||
|
||||
raise ValueError("The generated SPARQL query is invalid.")
|
||||
|
||||
def _prepare_sparql_query(
|
||||
self, _run_manager: CallbackManagerForChainRun, generated_sparql: str
|
||||
) -> str:
|
||||
from rdflib.plugins.sparql import prepareQuery
|
||||
|
||||
prepareQuery(generated_sparql)
|
||||
self._log_prepared_sparql_query(_run_manager, generated_sparql)
|
||||
return generated_sparql
|
||||
|
||||
def _log_prepared_sparql_query(
|
||||
self, _run_manager: CallbackManagerForChainRun, generated_query: str
|
||||
) -> None:
|
||||
_run_manager.on_text("Generated SPARQL:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
generated_query, color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
|
||||
def _log_invalid_sparql_query(
|
||||
self,
|
||||
_run_manager: CallbackManagerForChainRun,
|
||||
generated_query: str,
|
||||
error_message: str,
|
||||
) -> None:
|
||||
_run_manager.on_text("Invalid SPARQL query: ", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
generated_query, color="red", end="\n", verbose=self.verbose
|
||||
)
|
||||
_run_manager.on_text(
|
||||
"SPARQL Query Parse Error: ", end="\n", verbose=self.verbose
|
||||
)
|
||||
_run_manager.on_text(
|
||||
error_message, color="red", end="\n\n", verbose=self.verbose
|
||||
)
|
||||
|
||||
def _execute_query(self, query: str) -> List[rdflib.query.ResultRow]:
|
||||
try:
|
||||
return self.graph.query(query)
|
||||
except Exception:
|
||||
raise ValueError("Failed to execute the generated SPARQL query.")
|
||||
@@ -1,415 +0,0 @@
|
||||
# flake8: noqa
|
||||
from langchain_core.prompts.prompt import PromptTemplate
|
||||
|
||||
_DEFAULT_ENTITY_EXTRACTION_TEMPLATE = """Extract all entities from the following text. As a guideline, a proper noun is generally capitalized. You should definitely extract all names and places.
|
||||
|
||||
Return the output as a single comma-separated list, or NONE if there is nothing of note to return.
|
||||
|
||||
EXAMPLE
|
||||
i'm trying to improve Langchain's interfaces, the UX, its integrations with various products the user might want ... a lot of stuff.
|
||||
Output: Langchain
|
||||
END OF EXAMPLE
|
||||
|
||||
EXAMPLE
|
||||
i'm trying to improve Langchain's interfaces, the UX, its integrations with various products the user might want ... a lot of stuff. I'm working with Sam.
|
||||
Output: Langchain, Sam
|
||||
END OF EXAMPLE
|
||||
|
||||
Begin!
|
||||
|
||||
{input}
|
||||
Output:"""
|
||||
ENTITY_EXTRACTION_PROMPT = PromptTemplate(
|
||||
input_variables=["input"], template=_DEFAULT_ENTITY_EXTRACTION_TEMPLATE
|
||||
)
|
||||
|
||||
_DEFAULT_GRAPH_QA_TEMPLATE = """Use the following knowledge triplets to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
||||
|
||||
{context}
|
||||
|
||||
Question: {question}
|
||||
Helpful Answer:"""
|
||||
GRAPH_QA_PROMPT = PromptTemplate(
|
||||
template=_DEFAULT_GRAPH_QA_TEMPLATE, input_variables=["context", "question"]
|
||||
)
|
||||
|
||||
CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to query a graph database.
|
||||
Instructions:
|
||||
Use only the provided relationship types and properties in the schema.
|
||||
Do not use any other relationship types or properties that are not provided.
|
||||
Schema:
|
||||
{schema}
|
||||
Note: Do not include any explanations or apologies in your responses.
|
||||
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
|
||||
Do not include any text except the generated Cypher statement.
|
||||
|
||||
The question is:
|
||||
{question}"""
|
||||
CYPHER_GENERATION_PROMPT = PromptTemplate(
|
||||
input_variables=["schema", "question"], template=CYPHER_GENERATION_TEMPLATE
|
||||
)
|
||||
|
||||
NEBULAGRAPH_EXTRA_INSTRUCTIONS = """
|
||||
Instructions:
|
||||
|
||||
First, generate cypher then convert it to NebulaGraph Cypher dialect(rather than standard):
|
||||
1. it requires explicit label specification only when referring to node properties: v.`Foo`.name
|
||||
2. note explicit label specification is not needed for edge properties, so it's e.name instead of e.`Bar`.name
|
||||
3. it uses double equals sign for comparison: `==` rather than `=`
|
||||
For instance:
|
||||
```diff
|
||||
< MATCH (p:person)-[e:directed]->(m:movie) WHERE m.name = 'The Godfather II'
|
||||
< RETURN p.name, e.year, m.name;
|
||||
---
|
||||
> MATCH (p:`person`)-[e:directed]->(m:`movie`) WHERE m.`movie`.`name` == 'The Godfather II'
|
||||
> RETURN p.`person`.`name`, e.year, m.`movie`.`name`;
|
||||
```\n"""
|
||||
|
||||
NGQL_GENERATION_TEMPLATE = CYPHER_GENERATION_TEMPLATE.replace(
|
||||
"Generate Cypher", "Generate NebulaGraph Cypher"
|
||||
).replace("Instructions:", NEBULAGRAPH_EXTRA_INSTRUCTIONS)
|
||||
|
||||
NGQL_GENERATION_PROMPT = PromptTemplate(
|
||||
input_variables=["schema", "question"], template=NGQL_GENERATION_TEMPLATE
|
||||
)
|
||||
|
||||
KUZU_EXTRA_INSTRUCTIONS = """
|
||||
Instructions:
|
||||
|
||||
Generate the Kùzu dialect of Cypher with the following rules in mind:
|
||||
|
||||
1. Do not use a `WHERE EXISTS` clause to check the existence of a property.
|
||||
2. Do not omit the relationship pattern. Always use `()-[]->()` instead of `()->()`.
|
||||
3. Do not include any notes or comments even if the statement does not produce the expected result.
|
||||
```\n"""
|
||||
|
||||
KUZU_GENERATION_TEMPLATE = CYPHER_GENERATION_TEMPLATE.replace(
|
||||
"Generate Cypher", "Generate Kùzu Cypher"
|
||||
).replace("Instructions:", KUZU_EXTRA_INSTRUCTIONS)
|
||||
|
||||
KUZU_GENERATION_PROMPT = PromptTemplate(
|
||||
input_variables=["schema", "question"], template=KUZU_GENERATION_TEMPLATE
|
||||
)
|
||||
|
||||
GREMLIN_GENERATION_TEMPLATE = CYPHER_GENERATION_TEMPLATE.replace("Cypher", "Gremlin")
|
||||
|
||||
GREMLIN_GENERATION_PROMPT = PromptTemplate(
|
||||
input_variables=["schema", "question"], template=GREMLIN_GENERATION_TEMPLATE
|
||||
)
|
||||
|
||||
CYPHER_QA_TEMPLATE = """You are an assistant that helps to form nice and human understandable answers.
|
||||
The information part contains the provided information that you must use to construct an answer.
|
||||
The provided information is authoritative, you must never doubt it or try to use your internal knowledge to correct it.
|
||||
Make the answer sound as a response to the question. Do not mention that you based the result on the given information.
|
||||
Here is an example:
|
||||
|
||||
Question: Which managers own Neo4j stocks?
|
||||
Context:[manager:CTL LLC, manager:JANE STREET GROUP LLC]
|
||||
Helpful Answer: CTL LLC, JANE STREET GROUP LLC owns Neo4j stocks.
|
||||
|
||||
Follow this example when generating answers.
|
||||
If the provided information is empty, say that you don't know the answer.
|
||||
Information:
|
||||
{context}
|
||||
|
||||
Question: {question}
|
||||
Helpful Answer:"""
|
||||
CYPHER_QA_PROMPT = PromptTemplate(
|
||||
input_variables=["context", "question"], template=CYPHER_QA_TEMPLATE
|
||||
)
|
||||
|
||||
SPARQL_INTENT_TEMPLATE = """Task: Identify the intent of a prompt and return the appropriate SPARQL query type.
|
||||
You are an assistant that distinguishes different types of prompts and returns the corresponding SPARQL query types.
|
||||
Consider only the following query types:
|
||||
* SELECT: this query type corresponds to questions
|
||||
* UPDATE: this query type corresponds to all requests for deleting, inserting, or changing triples
|
||||
Note: Be as concise as possible.
|
||||
Do not include any explanations or apologies in your responses.
|
||||
Do not respond to any questions that ask for anything else than for you to identify a SPARQL query type.
|
||||
Do not include any unnecessary whitespaces or any text except the query type, i.e., either return 'SELECT' or 'UPDATE'.
|
||||
|
||||
The prompt is:
|
||||
{prompt}
|
||||
Helpful Answer:"""
|
||||
SPARQL_INTENT_PROMPT = PromptTemplate(
|
||||
input_variables=["prompt"], template=SPARQL_INTENT_TEMPLATE
|
||||
)
|
||||
|
||||
SPARQL_GENERATION_SELECT_TEMPLATE = """Task: Generate a SPARQL SELECT statement for querying a graph database.
|
||||
For instance, to find all email addresses of John Doe, the following query in backticks would be suitable:
|
||||
```
|
||||
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
|
||||
SELECT ?email
|
||||
WHERE {{
|
||||
?person foaf:name "John Doe" .
|
||||
?person foaf:mbox ?email .
|
||||
}}
|
||||
```
|
||||
Instructions:
|
||||
Use only the node types and properties provided in the schema.
|
||||
Do not use any node types and properties that are not explicitly provided.
|
||||
Include all necessary prefixes.
|
||||
Schema:
|
||||
{schema}
|
||||
Note: Be as concise as possible.
|
||||
Do not include any explanations or apologies in your responses.
|
||||
Do not respond to any questions that ask for anything else than for you to construct a SPARQL query.
|
||||
Do not include any text except the SPARQL query generated.
|
||||
|
||||
The question is:
|
||||
{prompt}"""
|
||||
SPARQL_GENERATION_SELECT_PROMPT = PromptTemplate(
|
||||
input_variables=["schema", "prompt"], template=SPARQL_GENERATION_SELECT_TEMPLATE
|
||||
)
|
||||
|
||||
SPARQL_GENERATION_UPDATE_TEMPLATE = """Task: Generate a SPARQL UPDATE statement for updating a graph database.
|
||||
For instance, to add 'jane.doe@foo.bar' as a new email address for Jane Doe, the following query in backticks would be suitable:
|
||||
```
|
||||
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
|
||||
INSERT {{
|
||||
?person foaf:mbox <mailto:jane.doe@foo.bar> .
|
||||
}}
|
||||
WHERE {{
|
||||
?person foaf:name "Jane Doe" .
|
||||
}}
|
||||
```
|
||||
Instructions:
|
||||
Make the query as short as possible and avoid adding unnecessary triples.
|
||||
Use only the node types and properties provided in the schema.
|
||||
Do not use any node types and properties that are not explicitly provided.
|
||||
Include all necessary prefixes.
|
||||
Schema:
|
||||
{schema}
|
||||
Note: Be as concise as possible.
|
||||
Do not include any explanations or apologies in your responses.
|
||||
Do not respond to any questions that ask for anything else than for you to construct a SPARQL query.
|
||||
Return only the generated SPARQL query, nothing else.
|
||||
|
||||
The information to be inserted is:
|
||||
{prompt}"""
|
||||
SPARQL_GENERATION_UPDATE_PROMPT = PromptTemplate(
|
||||
input_variables=["schema", "prompt"], template=SPARQL_GENERATION_UPDATE_TEMPLATE
|
||||
)
|
||||
|
||||
SPARQL_QA_TEMPLATE = """Task: Generate a natural language response from the results of a SPARQL query.
|
||||
You are an assistant that creates well-written and human understandable answers.
|
||||
The information part contains the information provided, which you can use to construct an answer.
|
||||
The information provided is authoritative, you must never doubt it or try to use your internal knowledge to correct it.
|
||||
Make your response sound like the information is coming from an AI assistant, but don't add any information.
|
||||
Information:
|
||||
{context}
|
||||
|
||||
Question: {prompt}
|
||||
Helpful Answer:"""
|
||||
SPARQL_QA_PROMPT = PromptTemplate(
|
||||
input_variables=["context", "prompt"], template=SPARQL_QA_TEMPLATE
|
||||
)
|
||||
|
||||
GRAPHDB_SPARQL_GENERATION_TEMPLATE = """
|
||||
Write a SPARQL SELECT query for querying a graph database.
|
||||
The ontology schema delimited by triple backticks in Turtle format is:
|
||||
```
|
||||
{schema}
|
||||
```
|
||||
Use only the classes and properties provided in the schema to construct the SPARQL query.
|
||||
Do not use any classes or properties that are not explicitly provided in the SPARQL query.
|
||||
Include all necessary prefixes.
|
||||
Do not include any explanations or apologies in your responses.
|
||||
Do not wrap the query in backticks.
|
||||
Do not include any text except the SPARQL query generated.
|
||||
The question delimited by triple backticks is:
|
||||
```
|
||||
{prompt}
|
||||
```
|
||||
"""
|
||||
GRAPHDB_SPARQL_GENERATION_PROMPT = PromptTemplate(
|
||||
input_variables=["schema", "prompt"],
|
||||
template=GRAPHDB_SPARQL_GENERATION_TEMPLATE,
|
||||
)
|
||||
|
||||
GRAPHDB_SPARQL_FIX_TEMPLATE = """
|
||||
This following SPARQL query delimited by triple backticks
|
||||
```
|
||||
{generated_sparql}
|
||||
```
|
||||
is not valid.
|
||||
The error delimited by triple backticks is
|
||||
```
|
||||
{error_message}
|
||||
```
|
||||
Give me a correct version of the SPARQL query.
|
||||
Do not change the logic of the query.
|
||||
Do not include any explanations or apologies in your responses.
|
||||
Do not wrap the query in backticks.
|
||||
Do not include any text except the SPARQL query generated.
|
||||
The ontology schema delimited by triple backticks in Turtle format is:
|
||||
```
|
||||
{schema}
|
||||
```
|
||||
"""
|
||||
|
||||
GRAPHDB_SPARQL_FIX_PROMPT = PromptTemplate(
|
||||
input_variables=["error_message", "generated_sparql", "schema"],
|
||||
template=GRAPHDB_SPARQL_FIX_TEMPLATE,
|
||||
)
|
||||
|
||||
GRAPHDB_QA_TEMPLATE = """Task: Generate a natural language response from the results of a SPARQL query.
|
||||
You are an assistant that creates well-written and human understandable answers.
|
||||
The information part contains the information provided, which you can use to construct an answer.
|
||||
The information provided is authoritative, you must never doubt it or try to use your internal knowledge to correct it.
|
||||
Make your response sound like the information is coming from an AI assistant, but don't add any information.
|
||||
Don't use internal knowledge to answer the question, just say you don't know if no information is available.
|
||||
Information:
|
||||
{context}
|
||||
|
||||
Question: {prompt}
|
||||
Helpful Answer:"""
|
||||
GRAPHDB_QA_PROMPT = PromptTemplate(
|
||||
input_variables=["context", "prompt"], template=GRAPHDB_QA_TEMPLATE
|
||||
)
|
||||
|
||||
AQL_GENERATION_TEMPLATE = """Task: Generate an ArangoDB Query Language (AQL) query from a User Input.
|
||||
|
||||
You are an ArangoDB Query Language (AQL) expert responsible for translating a `User Input` into an ArangoDB Query Language (AQL) query.
|
||||
|
||||
You are given an `ArangoDB Schema`. It is a JSON Object containing:
|
||||
1. `Graph Schema`: Lists all Graphs within the ArangoDB Database Instance, along with their Edge Relationships.
|
||||
2. `Collection Schema`: Lists all Collections within the ArangoDB Database Instance, along with their document/edge properties and a document/edge example.
|
||||
|
||||
You may also be given a set of `AQL Query Examples` to help you create the `AQL Query`. If provided, the `AQL Query Examples` should be used as a reference, similar to how `ArangoDB Schema` should be used.
|
||||
|
||||
Things you should do:
|
||||
- Think step by step.
|
||||
- Rely on `ArangoDB Schema` and `AQL Query Examples` (if provided) to generate the query.
|
||||
- Begin the `AQL Query` by the `WITH` AQL keyword to specify all of the ArangoDB Collections required.
|
||||
- Return the `AQL Query` wrapped in 3 backticks (```).
|
||||
- Use only the provided relationship types and properties in the `ArangoDB Schema` and any `AQL Query Examples` queries.
|
||||
- Only answer to requests related to generating an AQL Query.
|
||||
- If a request is unrelated to generating AQL Query, say that you cannot help the user.
|
||||
|
||||
Things you should not do:
|
||||
- Do not use any properties/relationships that can't be inferred from the `ArangoDB Schema` or the `AQL Query Examples`.
|
||||
- Do not include any text except the generated AQL Query.
|
||||
- Do not provide explanations or apologies in your responses.
|
||||
- Do not generate an AQL Query that removes or deletes any data.
|
||||
|
||||
Under no circumstance should you generate an AQL Query that deletes any data whatsoever.
|
||||
|
||||
ArangoDB Schema:
|
||||
{adb_schema}
|
||||
|
||||
AQL Query Examples (Optional):
|
||||
{aql_examples}
|
||||
|
||||
User Input:
|
||||
{user_input}
|
||||
|
||||
AQL Query:
|
||||
"""
|
||||
|
||||
AQL_GENERATION_PROMPT = PromptTemplate(
|
||||
input_variables=["adb_schema", "aql_examples", "user_input"],
|
||||
template=AQL_GENERATION_TEMPLATE,
|
||||
)
|
||||
|
||||
AQL_FIX_TEMPLATE = """Task: Address the ArangoDB Query Language (AQL) error message of an ArangoDB Query Language query.
|
||||
|
||||
You are an ArangoDB Query Language (AQL) expert responsible for correcting the provided `AQL Query` based on the provided `AQL Error`.
|
||||
|
||||
The `AQL Error` explains why the `AQL Query` could not be executed in the database.
|
||||
The `AQL Error` may also contain the position of the error relative to the total number of lines of the `AQL Query`.
|
||||
For example, 'error X at position 2:5' denotes that the error X occurs on line 2, column 5 of the `AQL Query`.
|
||||
|
||||
You are also given the `ArangoDB Schema`. It is a JSON Object containing:
|
||||
1. `Graph Schema`: Lists all Graphs within the ArangoDB Database Instance, along with their Edge Relationships.
|
||||
2. `Collection Schema`: Lists all Collections within the ArangoDB Database Instance, along with their document/edge properties and a document/edge example.
|
||||
|
||||
You will output the `Corrected AQL Query` wrapped in 3 backticks (```). Do not include any text except the Corrected AQL Query.
|
||||
|
||||
Remember to think step by step.
|
||||
|
||||
ArangoDB Schema:
|
||||
{adb_schema}
|
||||
|
||||
AQL Query:
|
||||
{aql_query}
|
||||
|
||||
AQL Error:
|
||||
{aql_error}
|
||||
|
||||
Corrected AQL Query:
|
||||
"""
|
||||
|
||||
AQL_FIX_PROMPT = PromptTemplate(
|
||||
input_variables=[
|
||||
"adb_schema",
|
||||
"aql_query",
|
||||
"aql_error",
|
||||
],
|
||||
template=AQL_FIX_TEMPLATE,
|
||||
)
|
||||
|
||||
AQL_QA_TEMPLATE = """Task: Generate a natural language `Summary` from the results of an ArangoDB Query Language query.
|
||||
|
||||
You are an ArangoDB Query Language (AQL) expert responsible for creating a well-written `Summary` from the `User Input` and associated `AQL Result`.
|
||||
|
||||
A user has executed an ArangoDB Query Language query, which has returned the AQL Result in JSON format.
|
||||
You are responsible for creating an `Summary` based on the AQL Result.
|
||||
|
||||
You are given the following information:
|
||||
- `ArangoDB Schema`: contains a schema representation of the user's ArangoDB Database.
|
||||
- `User Input`: the original question/request of the user, which has been translated into an AQL Query.
|
||||
- `AQL Query`: the AQL equivalent of the `User Input`, translated by another AI Model. Should you deem it to be incorrect, suggest a different AQL Query.
|
||||
- `AQL Result`: the JSON output returned by executing the `AQL Query` within the ArangoDB Database.
|
||||
|
||||
Remember to think step by step.
|
||||
|
||||
Your `Summary` should sound like it is a response to the `User Input`.
|
||||
Your `Summary` should not include any mention of the `AQL Query` or the `AQL Result`.
|
||||
|
||||
ArangoDB Schema:
|
||||
{adb_schema}
|
||||
|
||||
User Input:
|
||||
{user_input}
|
||||
|
||||
AQL Query:
|
||||
{aql_query}
|
||||
|
||||
AQL Result:
|
||||
{aql_result}
|
||||
"""
|
||||
AQL_QA_PROMPT = PromptTemplate(
|
||||
input_variables=["adb_schema", "user_input", "aql_query", "aql_result"],
|
||||
template=AQL_QA_TEMPLATE,
|
||||
)
|
||||
|
||||
|
||||
NEPTUNE_OPENCYPHER_EXTRA_INSTRUCTIONS = """
|
||||
Instructions:
|
||||
Generate the query in openCypher format and follow these rules:
|
||||
Do not use `NONE`, `ALL` or `ANY` predicate functions, rather use list comprehensions.
|
||||
Do not use `REDUCE` function. Rather use a combination of list comprehension and the `UNWIND` clause to achieve similar results.
|
||||
Do not use `FOREACH` clause. Rather use a combination of `WITH` and `UNWIND` clauses to achieve similar results.{extra_instructions}
|
||||
\n"""
|
||||
|
||||
NEPTUNE_OPENCYPHER_GENERATION_TEMPLATE = CYPHER_GENERATION_TEMPLATE.replace(
|
||||
"Instructions:", NEPTUNE_OPENCYPHER_EXTRA_INSTRUCTIONS
|
||||
)
|
||||
|
||||
NEPTUNE_OPENCYPHER_GENERATION_PROMPT = PromptTemplate(
|
||||
input_variables=["schema", "question", "extra_instructions"],
|
||||
template=NEPTUNE_OPENCYPHER_GENERATION_TEMPLATE,
|
||||
)
|
||||
|
||||
NEPTUNE_OPENCYPHER_GENERATION_SIMPLE_TEMPLATE = """
|
||||
Write an openCypher query to answer the following question. Do not explain the answer. Only return the query.{extra_instructions}
|
||||
Question: "{question}".
|
||||
Here is the property graph schema:
|
||||
{schema}
|
||||
\n"""
|
||||
|
||||
NEPTUNE_OPENCYPHER_GENERATION_SIMPLE_PROMPT = PromptTemplate(
|
||||
input_variables=["schema", "question", "extra_instructions"],
|
||||
template=NEPTUNE_OPENCYPHER_GENERATION_SIMPLE_TEMPLATE,
|
||||
)
|
||||
@@ -1,152 +0,0 @@
|
||||
"""
|
||||
Question answering over an RDF or OWL graph using SPARQL.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain_core.callbacks import CallbackManagerForChainRun
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.prompts.base import BasePromptTemplate
|
||||
from langchain_core.pydantic_v1 import Field
|
||||
|
||||
from langchain_community.chains.graph_qa.prompts import (
|
||||
SPARQL_GENERATION_SELECT_PROMPT,
|
||||
SPARQL_GENERATION_UPDATE_PROMPT,
|
||||
SPARQL_INTENT_PROMPT,
|
||||
SPARQL_QA_PROMPT,
|
||||
)
|
||||
from langchain_community.graphs.rdf_graph import RdfGraph
|
||||
|
||||
|
||||
class GraphSparqlQAChain(Chain):
|
||||
"""Question-answering against an RDF or OWL graph by generating SPARQL statements.
|
||||
|
||||
*Security note*: Make sure that the database connection uses credentials
|
||||
that are narrowly-scoped to only include necessary permissions.
|
||||
Failure to do so may result in data corruption or loss, since the calling
|
||||
code may attempt commands that would result in deletion, mutation
|
||||
of data if appropriately prompted or reading sensitive data if such
|
||||
data is present in the database.
|
||||
The best way to guard against such negative outcomes is to (as appropriate)
|
||||
limit the permissions granted to the credentials used with this tool.
|
||||
|
||||
See https://python.langchain.com/docs/security for more information.
|
||||
"""
|
||||
|
||||
graph: RdfGraph = Field(exclude=True)
|
||||
sparql_generation_select_chain: LLMChain
|
||||
sparql_generation_update_chain: LLMChain
|
||||
sparql_intent_chain: LLMChain
|
||||
qa_chain: LLMChain
|
||||
return_sparql_query: bool = False
|
||||
input_key: str = "query" #: :meta private:
|
||||
output_key: str = "result" #: :meta private:
|
||||
sparql_query_key: str = "sparql_query" #: :meta private:
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Return the input keys.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.input_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
"""Return the output keys.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
_output_keys = [self.output_key]
|
||||
return _output_keys
|
||||
|
||||
@classmethod
|
||||
def from_llm(
|
||||
cls,
|
||||
llm: BaseLanguageModel,
|
||||
*,
|
||||
qa_prompt: BasePromptTemplate = SPARQL_QA_PROMPT,
|
||||
sparql_select_prompt: BasePromptTemplate = SPARQL_GENERATION_SELECT_PROMPT,
|
||||
sparql_update_prompt: BasePromptTemplate = SPARQL_GENERATION_UPDATE_PROMPT,
|
||||
sparql_intent_prompt: BasePromptTemplate = SPARQL_INTENT_PROMPT,
|
||||
**kwargs: Any,
|
||||
) -> GraphSparqlQAChain:
|
||||
"""Initialize from LLM."""
|
||||
qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
|
||||
sparql_generation_select_chain = LLMChain(llm=llm, prompt=sparql_select_prompt)
|
||||
sparql_generation_update_chain = LLMChain(llm=llm, prompt=sparql_update_prompt)
|
||||
sparql_intent_chain = LLMChain(llm=llm, prompt=sparql_intent_prompt)
|
||||
|
||||
return cls(
|
||||
qa_chain=qa_chain,
|
||||
sparql_generation_select_chain=sparql_generation_select_chain,
|
||||
sparql_generation_update_chain=sparql_generation_update_chain,
|
||||
sparql_intent_chain=sparql_intent_chain,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def _call(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, str]:
|
||||
"""
|
||||
Generate SPARQL query, use it to retrieve a response from the gdb and answer
|
||||
the question.
|
||||
"""
|
||||
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
|
||||
callbacks = _run_manager.get_child()
|
||||
prompt = inputs[self.input_key]
|
||||
|
||||
_intent = self.sparql_intent_chain.run({"prompt": prompt}, callbacks=callbacks)
|
||||
intent = _intent.strip()
|
||||
|
||||
if "SELECT" in intent and "UPDATE" not in intent:
|
||||
sparql_generation_chain = self.sparql_generation_select_chain
|
||||
intent = "SELECT"
|
||||
elif "UPDATE" in intent and "SELECT" not in intent:
|
||||
sparql_generation_chain = self.sparql_generation_update_chain
|
||||
intent = "UPDATE"
|
||||
else:
|
||||
raise ValueError(
|
||||
"I am sorry, but this prompt seems to fit none of the currently "
|
||||
"supported SPARQL query types, i.e., SELECT and UPDATE."
|
||||
)
|
||||
|
||||
_run_manager.on_text("Identified intent:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(intent, color="green", end="\n", verbose=self.verbose)
|
||||
|
||||
generated_sparql = sparql_generation_chain.run(
|
||||
{"prompt": prompt, "schema": self.graph.get_schema}, callbacks=callbacks
|
||||
)
|
||||
|
||||
_run_manager.on_text("Generated SPARQL:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
generated_sparql, color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
|
||||
if intent == "SELECT":
|
||||
context = self.graph.query(generated_sparql)
|
||||
|
||||
_run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
str(context), color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
result = self.qa_chain(
|
||||
{"prompt": prompt, "context": context},
|
||||
callbacks=callbacks,
|
||||
)
|
||||
res = result[self.qa_chain.output_key]
|
||||
elif intent == "UPDATE":
|
||||
self.graph.update(generated_sparql)
|
||||
res = "Successfully inserted triples into the graph."
|
||||
else:
|
||||
raise ValueError("Unsupported SPARQL query type.")
|
||||
|
||||
chain_result: Dict[str, Any] = {self.output_key: res}
|
||||
if self.return_sparql_query:
|
||||
chain_result[self.sparql_query_key] = generated_sparql
|
||||
return chain_result
|
||||
@@ -1,97 +0,0 @@
|
||||
"""Chain that hits a URL and then uses an LLM to parse results."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain.chains import LLMChain
|
||||
from langchain.chains.base import Chain
|
||||
from langchain_core.callbacks import CallbackManagerForChainRun
|
||||
from langchain_core.pydantic_v1 import Extra, Field, root_validator
|
||||
|
||||
from langchain_community.utilities.requests import TextRequestsWrapper
|
||||
|
||||
DEFAULT_HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" # noqa: E501
|
||||
}
|
||||
|
||||
|
||||
class LLMRequestsChain(Chain):
|
||||
"""Chain that requests a URL and then uses an LLM to parse results.
|
||||
|
||||
**Security Note**: This chain can make GET requests to arbitrary URLs,
|
||||
including internal URLs.
|
||||
|
||||
Control access to who can run this chain and what network access
|
||||
this chain has.
|
||||
|
||||
See https://python.langchain.com/docs/security for more information.
|
||||
"""
|
||||
|
||||
llm_chain: LLMChain # type: ignore[valid-type]
|
||||
requests_wrapper: TextRequestsWrapper = Field(
|
||||
default_factory=lambda: TextRequestsWrapper(headers=DEFAULT_HEADERS),
|
||||
exclude=True,
|
||||
)
|
||||
text_length: int = 8000
|
||||
requests_key: str = "requests_result" #: :meta private:
|
||||
input_key: str = "url" #: :meta private:
|
||||
output_key: str = "output" #: :meta private:
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
extra = Extra.forbid
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Will be whatever keys the prompt expects.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.input_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
"""Will always return text key.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.output_key]
|
||||
|
||||
@root_validator()
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
try:
|
||||
from bs4 import BeautifulSoup # noqa: F401
|
||||
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import bs4 python package. "
|
||||
"Please install it with `pip install bs4`."
|
||||
)
|
||||
return values
|
||||
|
||||
def _call(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, Any]:
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
|
||||
# Other keys are assumed to be needed for LLM prediction
|
||||
other_keys = {k: v for k, v in inputs.items() if k != self.input_key}
|
||||
url = inputs[self.input_key]
|
||||
res = self.requests_wrapper.get(url)
|
||||
# extract the text from the html
|
||||
soup = BeautifulSoup(res, "html.parser")
|
||||
other_keys[self.requests_key] = soup.get_text()[: self.text_length]
|
||||
result = self.llm_chain.predict( # type: ignore[attr-defined]
|
||||
callbacks=_run_manager.get_child(), **other_keys
|
||||
)
|
||||
return {self.output_key: result}
|
||||
|
||||
@property
|
||||
def _chain_type(self) -> str:
|
||||
return "llm_requests_chain"
|
||||
@@ -1,229 +0,0 @@
|
||||
"""Chain that makes API calls and summarizes the responses to answer a question."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any, Dict, List, NamedTuple, Optional, cast
|
||||
|
||||
from langchain.chains.api.openapi.requests_chain import APIRequesterChain
|
||||
from langchain.chains.api.openapi.response_chain import APIResponderChain
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain_core.callbacks import CallbackManagerForChainRun, Callbacks
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field
|
||||
from requests import Response
|
||||
|
||||
from langchain_community.tools.openapi.utils.api_models import APIOperation
|
||||
from langchain_community.utilities.requests import Requests
|
||||
|
||||
|
||||
class _ParamMapping(NamedTuple):
|
||||
"""Mapping from parameter name to parameter value."""
|
||||
|
||||
query_params: List[str]
|
||||
body_params: List[str]
|
||||
path_params: List[str]
|
||||
|
||||
|
||||
class OpenAPIEndpointChain(Chain, BaseModel):
|
||||
"""Chain interacts with an OpenAPI endpoint using natural language."""
|
||||
|
||||
api_request_chain: LLMChain
|
||||
api_response_chain: Optional[LLMChain]
|
||||
api_operation: APIOperation
|
||||
requests: Requests = Field(exclude=True, default_factory=Requests)
|
||||
param_mapping: _ParamMapping = Field(alias="param_mapping")
|
||||
return_intermediate_steps: bool = False
|
||||
instructions_key: str = "instructions" #: :meta private:
|
||||
output_key: str = "output" #: :meta private:
|
||||
max_text_length: Optional[int] = Field(ge=0) #: :meta private:
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Expect input key.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.instructions_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
"""Expect output key.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
if not self.return_intermediate_steps:
|
||||
return [self.output_key]
|
||||
else:
|
||||
return [self.output_key, "intermediate_steps"]
|
||||
|
||||
def _construct_path(self, args: Dict[str, str]) -> str:
|
||||
"""Construct the path from the deserialized input."""
|
||||
path = self.api_operation.base_url + self.api_operation.path
|
||||
for param in self.param_mapping.path_params:
|
||||
path = path.replace(f"{{{param}}}", str(args.pop(param, "")))
|
||||
return path
|
||||
|
||||
def _extract_query_params(self, args: Dict[str, str]) -> Dict[str, str]:
|
||||
"""Extract the query params from the deserialized input."""
|
||||
query_params = {}
|
||||
for param in self.param_mapping.query_params:
|
||||
if param in args:
|
||||
query_params[param] = args.pop(param)
|
||||
return query_params
|
||||
|
||||
def _extract_body_params(self, args: Dict[str, str]) -> Optional[Dict[str, str]]:
|
||||
"""Extract the request body params from the deserialized input."""
|
||||
body_params = None
|
||||
if self.param_mapping.body_params:
|
||||
body_params = {}
|
||||
for param in self.param_mapping.body_params:
|
||||
if param in args:
|
||||
body_params[param] = args.pop(param)
|
||||
return body_params
|
||||
|
||||
def deserialize_json_input(self, serialized_args: str) -> dict:
|
||||
"""Use the serialized typescript dictionary.
|
||||
|
||||
Resolve the path, query params dict, and optional requestBody dict.
|
||||
"""
|
||||
args: dict = json.loads(serialized_args)
|
||||
path = self._construct_path(args)
|
||||
body_params = self._extract_body_params(args)
|
||||
query_params = self._extract_query_params(args)
|
||||
return {
|
||||
"url": path,
|
||||
"data": body_params,
|
||||
"params": query_params,
|
||||
}
|
||||
|
||||
def _get_output(self, output: str, intermediate_steps: dict) -> dict:
|
||||
"""Return the output from the API call."""
|
||||
if self.return_intermediate_steps:
|
||||
return {
|
||||
self.output_key: output,
|
||||
"intermediate_steps": intermediate_steps,
|
||||
}
|
||||
else:
|
||||
return {self.output_key: output}
|
||||
|
||||
def _call(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, str]:
|
||||
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
|
||||
intermediate_steps = {}
|
||||
instructions = inputs[self.instructions_key]
|
||||
instructions = instructions[: self.max_text_length]
|
||||
_api_arguments = self.api_request_chain.predict_and_parse(
|
||||
instructions=instructions, callbacks=_run_manager.get_child()
|
||||
)
|
||||
api_arguments = cast(str, _api_arguments)
|
||||
intermediate_steps["request_args"] = api_arguments
|
||||
_run_manager.on_text(
|
||||
api_arguments, color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
if api_arguments.startswith("ERROR"):
|
||||
return self._get_output(api_arguments, intermediate_steps)
|
||||
elif api_arguments.startswith("MESSAGE:"):
|
||||
return self._get_output(
|
||||
api_arguments[len("MESSAGE:") :], intermediate_steps
|
||||
)
|
||||
try:
|
||||
request_args = self.deserialize_json_input(api_arguments)
|
||||
method = getattr(self.requests, self.api_operation.method.value)
|
||||
api_response: Response = method(**request_args)
|
||||
if api_response.status_code != 200:
|
||||
method_str = str(self.api_operation.method.value)
|
||||
response_text = (
|
||||
f"{api_response.status_code}: {api_response.reason}"
|
||||
+ f"\nFor {method_str.upper()} {request_args['url']}\n"
|
||||
+ f"Called with args: {request_args['params']}"
|
||||
)
|
||||
else:
|
||||
response_text = api_response.text
|
||||
except Exception as e:
|
||||
response_text = f"Error with message {str(e)}"
|
||||
response_text = response_text[: self.max_text_length]
|
||||
intermediate_steps["response_text"] = response_text
|
||||
_run_manager.on_text(
|
||||
response_text, color="blue", end="\n", verbose=self.verbose
|
||||
)
|
||||
if self.api_response_chain is not None:
|
||||
_answer = self.api_response_chain.predict_and_parse(
|
||||
response=response_text,
|
||||
instructions=instructions,
|
||||
callbacks=_run_manager.get_child(),
|
||||
)
|
||||
answer = cast(str, _answer)
|
||||
_run_manager.on_text(answer, color="yellow", end="\n", verbose=self.verbose)
|
||||
return self._get_output(answer, intermediate_steps)
|
||||
else:
|
||||
return self._get_output(response_text, intermediate_steps)
|
||||
|
||||
@classmethod
|
||||
def from_url_and_method(
|
||||
cls,
|
||||
spec_url: str,
|
||||
path: str,
|
||||
method: str,
|
||||
llm: BaseLanguageModel,
|
||||
requests: Optional[Requests] = None,
|
||||
return_intermediate_steps: bool = False,
|
||||
**kwargs: Any,
|
||||
# TODO: Handle async
|
||||
) -> "OpenAPIEndpointChain":
|
||||
"""Create an OpenAPIEndpoint from a spec at the specified url."""
|
||||
operation = APIOperation.from_openapi_url(spec_url, path, method)
|
||||
return cls.from_api_operation(
|
||||
operation,
|
||||
requests=requests,
|
||||
llm=llm,
|
||||
return_intermediate_steps=return_intermediate_steps,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_api_operation(
|
||||
cls,
|
||||
operation: APIOperation,
|
||||
llm: BaseLanguageModel,
|
||||
requests: Optional[Requests] = None,
|
||||
verbose: bool = False,
|
||||
return_intermediate_steps: bool = False,
|
||||
raw_response: bool = False,
|
||||
callbacks: Callbacks = None,
|
||||
**kwargs: Any,
|
||||
# TODO: Handle async
|
||||
) -> "OpenAPIEndpointChain":
|
||||
"""Create an OpenAPIEndpointChain from an operation and a spec."""
|
||||
param_mapping = _ParamMapping(
|
||||
query_params=operation.query_params,
|
||||
body_params=operation.body_params,
|
||||
path_params=operation.path_params,
|
||||
)
|
||||
requests_chain = APIRequesterChain.from_llm_and_typescript(
|
||||
llm,
|
||||
typescript_definition=operation.to_typescript(),
|
||||
verbose=verbose,
|
||||
callbacks=callbacks,
|
||||
)
|
||||
if raw_response:
|
||||
response_chain = None
|
||||
else:
|
||||
response_chain = APIResponderChain.from_llm(
|
||||
llm, verbose=verbose, callbacks=callbacks
|
||||
)
|
||||
_requests = requests or Requests()
|
||||
return cls(
|
||||
api_request_chain=requests_chain,
|
||||
api_response_chain=response_chain,
|
||||
api_operation=operation,
|
||||
requests=_requests,
|
||||
param_mapping=param_mapping,
|
||||
verbose=verbose,
|
||||
return_intermediate_steps=return_intermediate_steps,
|
||||
callbacks=callbacks,
|
||||
**kwargs,
|
||||
)
|
||||
@@ -1,57 +0,0 @@
|
||||
# flake8: noqa
|
||||
REQUEST_TEMPLATE = """You are a helpful AI Assistant. Please provide JSON arguments to agentFunc() based on the user's instructions.
|
||||
|
||||
API_SCHEMA: ```typescript
|
||||
{schema}
|
||||
```
|
||||
|
||||
USER_INSTRUCTIONS: "{instructions}"
|
||||
|
||||
Your arguments must be plain json provided in a markdown block:
|
||||
|
||||
ARGS: ```json
|
||||
{{valid json conforming to API_SCHEMA}}
|
||||
```
|
||||
|
||||
Example
|
||||
-----
|
||||
|
||||
ARGS: ```json
|
||||
{{"foo": "bar", "baz": {{"qux": "quux"}}}}
|
||||
```
|
||||
|
||||
The block must be no more than 1 line long, and all arguments must be valid JSON. All string arguments must be wrapped in double quotes.
|
||||
You MUST strictly comply to the types indicated by the provided schema, including all required args.
|
||||
|
||||
If you don't have sufficient information to call the function due to things like requiring specific uuid's, you can reply with the following message:
|
||||
|
||||
Message: ```text
|
||||
Concise response requesting the additional information that would make calling the function successful.
|
||||
```
|
||||
|
||||
Begin
|
||||
-----
|
||||
ARGS:
|
||||
"""
|
||||
RESPONSE_TEMPLATE = """You are a helpful AI assistant trained to answer user queries from API responses.
|
||||
You attempted to call an API, which resulted in:
|
||||
API_RESPONSE: {response}
|
||||
|
||||
USER_COMMENT: "{instructions}"
|
||||
|
||||
|
||||
If the API_RESPONSE can answer the USER_COMMENT respond with the following markdown json block:
|
||||
Response: ```json
|
||||
{{"response": "Human-understandable synthesis of the API_RESPONSE"}}
|
||||
```
|
||||
|
||||
Otherwise respond with the following markdown json block:
|
||||
Response Error: ```json
|
||||
{{"response": "What you did and a concise statement of the resulting error. If it can be easily fixed, provide a suggestion."}}
|
||||
```
|
||||
|
||||
You MUST respond as a markdown json code block. The person you are responding to CANNOT see the API_RESPONSE, so if there is any relevant information there you must include it in your response.
|
||||
|
||||
Begin:
|
||||
---
|
||||
"""
|
||||
@@ -1,62 +0,0 @@
|
||||
"""request parser."""
|
||||
|
||||
import json
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from langchain.chains.api.openapi.prompts import REQUEST_TEMPLATE
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.output_parsers import BaseOutputParser
|
||||
from langchain_core.prompts.prompt import PromptTemplate
|
||||
|
||||
|
||||
class APIRequesterOutputParser(BaseOutputParser):
|
||||
"""Parse the request and error tags."""
|
||||
|
||||
def _load_json_block(self, serialized_block: str) -> str:
|
||||
try:
|
||||
return json.dumps(json.loads(serialized_block, strict=False))
|
||||
except json.JSONDecodeError:
|
||||
return "ERROR serializing request."
|
||||
|
||||
def parse(self, llm_output: str) -> str:
|
||||
"""Parse the request and error tags."""
|
||||
|
||||
json_match = re.search(r"```json(.*?)```", llm_output, re.DOTALL)
|
||||
if json_match:
|
||||
return self._load_json_block(json_match.group(1).strip())
|
||||
message_match = re.search(r"```text(.*?)```", llm_output, re.DOTALL)
|
||||
if message_match:
|
||||
return f"MESSAGE: {message_match.group(1).strip()}"
|
||||
return "ERROR making request"
|
||||
|
||||
@property
|
||||
def _type(self) -> str:
|
||||
return "api_requester"
|
||||
|
||||
|
||||
class APIRequesterChain(LLMChain):
|
||||
"""Get the request parser."""
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def from_llm_and_typescript(
|
||||
cls,
|
||||
llm: BaseLanguageModel,
|
||||
typescript_definition: str,
|
||||
verbose: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> LLMChain:
|
||||
"""Get the request parser."""
|
||||
output_parser = APIRequesterOutputParser()
|
||||
prompt = PromptTemplate(
|
||||
template=REQUEST_TEMPLATE,
|
||||
output_parser=output_parser,
|
||||
partial_variables={"schema": typescript_definition},
|
||||
input_variables=["instructions"],
|
||||
)
|
||||
return cls(prompt=prompt, llm=llm, verbose=verbose, **kwargs)
|
||||
@@ -1,57 +0,0 @@
|
||||
"""Response parser."""
|
||||
|
||||
import json
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from langchain.chains.api.openapi.prompts import RESPONSE_TEMPLATE
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.output_parsers import BaseOutputParser
|
||||
from langchain_core.prompts.prompt import PromptTemplate
|
||||
|
||||
|
||||
class APIResponderOutputParser(BaseOutputParser):
|
||||
"""Parse the response and error tags."""
|
||||
|
||||
def _load_json_block(self, serialized_block: str) -> str:
|
||||
try:
|
||||
response_content = json.loads(serialized_block, strict=False)
|
||||
return response_content.get("response", "ERROR parsing response.")
|
||||
except json.JSONDecodeError:
|
||||
return "ERROR parsing response."
|
||||
except:
|
||||
raise
|
||||
|
||||
def parse(self, llm_output: str) -> str:
|
||||
"""Parse the response and error tags."""
|
||||
json_match = re.search(r"```json(.*?)```", llm_output, re.DOTALL)
|
||||
if json_match:
|
||||
return self._load_json_block(json_match.group(1).strip())
|
||||
else:
|
||||
raise ValueError(f"No response found in output: {llm_output}.")
|
||||
|
||||
@property
|
||||
def _type(self) -> str:
|
||||
return "api_responder"
|
||||
|
||||
|
||||
class APIResponderChain(LLMChain):
|
||||
"""Get the response parser."""
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def from_llm(
|
||||
cls, llm: BaseLanguageModel, verbose: bool = True, **kwargs: Any
|
||||
) -> LLMChain:
|
||||
"""Get the response parser."""
|
||||
output_parser = APIResponderOutputParser()
|
||||
prompt = PromptTemplate(
|
||||
template=RESPONSE_TEMPLATE,
|
||||
output_parser=output_parser,
|
||||
input_variables=["response", "instructions"],
|
||||
)
|
||||
return cls(prompt=prompt, llm=llm, verbose=verbose, **kwargs)
|
||||
@@ -89,7 +89,7 @@ class ChatBaichuan(BaseChatModel):
|
||||
|
||||
baichuan_api_base: str = Field(default=DEFAULT_API_BASE)
|
||||
"""Baichuan custom endpoints"""
|
||||
baichuan_api_key: Optional[SecretStr] = None
|
||||
baichuan_api_key: Optional[SecretStr] = Field(default=None, alias="api_key")
|
||||
"""Baichuan API Key"""
|
||||
baichuan_secret_key: Optional[SecretStr] = None
|
||||
"""[DEPRECATED, keeping it for for backward compatibility] Baichuan Secret Key"""
|
||||
@@ -100,7 +100,7 @@ class ChatBaichuan(BaseChatModel):
|
||||
model = "Baichuan2-Turbo-192K"
|
||||
"""model name of Baichuan, default is `Baichuan2-Turbo-192K`,
|
||||
other options include `Baichuan2-Turbo`"""
|
||||
temperature: float = 0.3
|
||||
temperature: Optional[float] = Field(default=0.3)
|
||||
"""What sampling temperature to use."""
|
||||
top_k: int = 5
|
||||
"""What search sampling control to use."""
|
||||
|
||||
@@ -1,3 +1,17 @@
|
||||
from langchain.retrievers.document_compressors.cross_encoder import BaseCrossEncoder
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Tuple
|
||||
|
||||
__all__ = ["BaseCrossEncoder"]
|
||||
|
||||
class BaseCrossEncoder(ABC):
|
||||
"""Interface for cross encoder models."""
|
||||
|
||||
@abstractmethod
|
||||
def score(self, text_pairs: List[Tuple[str, str]]) -> List[float]:
|
||||
"""Score pairs' similarity.
|
||||
|
||||
Args:
|
||||
text_pairs: List of pairs of texts.
|
||||
|
||||
Returns:
|
||||
List of scores.
|
||||
"""
|
||||
|
||||
@@ -331,6 +331,10 @@ if TYPE_CHECKING:
|
||||
from langchain_community.document_loaders.oracleadb_loader import (
|
||||
OracleAutonomousDatabaseLoader,
|
||||
)
|
||||
from langchain_community.document_loaders.oracleai import (
|
||||
OracleDocLoader, # noqa: F401
|
||||
OracleTextSplitter, # noqa: F401
|
||||
)
|
||||
from langchain_community.document_loaders.org_mode import (
|
||||
UnstructuredOrgModeLoader,
|
||||
)
|
||||
@@ -624,6 +628,8 @@ _module_lookup = {
|
||||
"OnlinePDFLoader": "langchain_community.document_loaders.pdf",
|
||||
"OpenCityDataLoader": "langchain_community.document_loaders.open_city_data",
|
||||
"OracleAutonomousDatabaseLoader": "langchain_community.document_loaders.oracleadb_loader", # noqa: E501
|
||||
"OracleDocLoader": "langchain_community.document_loaders.oracleai",
|
||||
"OracleTextSplitter": "langchain_community.document_loaders.oracleai",
|
||||
"OutlookMessageLoader": "langchain_community.document_loaders.email",
|
||||
"PDFMinerLoader": "langchain_community.document_loaders.pdf",
|
||||
"PDFMinerPDFasHTMLLoader": "langchain_community.document_loaders.pdf",
|
||||
@@ -822,6 +828,8 @@ __all__ = [
|
||||
"OnlinePDFLoader",
|
||||
"OpenCityDataLoader",
|
||||
"OracleAutonomousDatabaseLoader",
|
||||
"OracleDocLoader",
|
||||
"OracleTextSplitter",
|
||||
"OutlookMessageLoader",
|
||||
"PDFMinerLoader",
|
||||
"PDFMinerPDFasHTMLLoader",
|
||||
|
||||
447
libs/community/langchain_community/document_loaders/oracleai.py
Normal file
447
libs/community/langchain_community/document_loaders/oracleai.py
Normal file
@@ -0,0 +1,447 @@
|
||||
# Authors:
|
||||
# Harichandan Roy (hroy)
|
||||
# David Jiang (ddjiang)
|
||||
#
|
||||
# -----------------------------------------------------------------------------
|
||||
# oracleai.py
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import struct
|
||||
import time
|
||||
import traceback
|
||||
from html.parser import HTMLParser
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from langchain_core.document_loaders import BaseLoader
|
||||
from langchain_core.documents import Document
|
||||
from langchain_text_splitters import TextSplitter
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from oracledb import Connection
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
"""ParseOracleDocMetadata class"""
|
||||
|
||||
|
||||
class ParseOracleDocMetadata(HTMLParser):
|
||||
"""Parse Oracle doc metadata..."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.reset()
|
||||
self.match = False
|
||||
self.metadata: Dict[str, Any] = {}
|
||||
|
||||
def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None:
|
||||
if tag == "meta":
|
||||
entry: Optional[str] = ""
|
||||
for name, value in attrs:
|
||||
if name == "name":
|
||||
entry = value
|
||||
if name == "content":
|
||||
if entry:
|
||||
self.metadata[entry] = value
|
||||
elif tag == "title":
|
||||
self.match = True
|
||||
|
||||
def handle_data(self, data: str) -> None:
|
||||
if self.match:
|
||||
self.metadata["title"] = data
|
||||
self.match = False
|
||||
|
||||
def get_metadata(self) -> Dict[str, Any]:
|
||||
return self.metadata
|
||||
|
||||
|
||||
"""OracleDocReader class"""
|
||||
|
||||
|
||||
class OracleDocReader:
|
||||
"""Read a file"""
|
||||
|
||||
@staticmethod
|
||||
def generate_object_id(input_string: Union[str, None] = None) -> str:
|
||||
out_length = 32 # output length
|
||||
hash_len = 8 # hash value length
|
||||
|
||||
if input_string is None:
|
||||
input_string = "".join(
|
||||
random.choices(
|
||||
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789",
|
||||
k=16,
|
||||
)
|
||||
)
|
||||
|
||||
# timestamp
|
||||
timestamp = int(time.time())
|
||||
timestamp_bin = struct.pack(">I", timestamp) # 4 bytes
|
||||
|
||||
# hash_value
|
||||
hashval_bin = hashlib.sha256(input_string.encode()).digest()
|
||||
hashval_bin = hashval_bin[:hash_len] # 8 bytes
|
||||
|
||||
# counter
|
||||
counter_bin = struct.pack(">I", random.getrandbits(32)) # 4 bytes
|
||||
|
||||
# binary object id
|
||||
object_id = timestamp_bin + hashval_bin + counter_bin # 16 bytes
|
||||
object_id_hex = object_id.hex() # 32 bytes
|
||||
object_id_hex = object_id_hex.zfill(
|
||||
out_length
|
||||
) # fill with zeros if less than 32 bytes
|
||||
|
||||
object_id_hex = object_id_hex[:out_length]
|
||||
|
||||
return object_id_hex
|
||||
|
||||
@staticmethod
|
||||
def read_file(
|
||||
conn: Connection, file_path: str, params: dict
|
||||
) -> Union[Document, None]:
|
||||
"""Read a file using OracleReader
|
||||
Args:
|
||||
conn: Oracle Connection,
|
||||
file_path: Oracle Directory,
|
||||
params: ONNX file name.
|
||||
Returns:
|
||||
Plain text and metadata as Langchain Document.
|
||||
"""
|
||||
|
||||
metadata: Dict[str, Any] = {}
|
||||
try:
|
||||
import oracledb
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Unable to import oracledb, please install with "
|
||||
"`pip install -U oracledb`."
|
||||
) from e
|
||||
try:
|
||||
oracledb.defaults.fetch_lobs = False
|
||||
cursor = conn.cursor()
|
||||
|
||||
with open(file_path, "rb") as f:
|
||||
data = f.read()
|
||||
|
||||
if data is None:
|
||||
return Document(page_content="", metadata=metadata)
|
||||
|
||||
mdata = cursor.var(oracledb.DB_TYPE_CLOB)
|
||||
text = cursor.var(oracledb.DB_TYPE_CLOB)
|
||||
cursor.execute(
|
||||
"""
|
||||
declare
|
||||
input blob;
|
||||
begin
|
||||
input := :blob;
|
||||
:mdata := dbms_vector_chain.utl_to_text(input, json(:pref));
|
||||
:text := dbms_vector_chain.utl_to_text(input);
|
||||
end;""",
|
||||
blob=data,
|
||||
pref=json.dumps(params),
|
||||
mdata=mdata,
|
||||
text=text,
|
||||
)
|
||||
cursor.close()
|
||||
|
||||
if mdata is None:
|
||||
metadata = {}
|
||||
else:
|
||||
doc_data = str(mdata.getvalue())
|
||||
if doc_data.startswith("<!DOCTYPE html") or doc_data.startswith(
|
||||
"<HTML>"
|
||||
):
|
||||
p = ParseOracleDocMetadata()
|
||||
p.feed(doc_data)
|
||||
metadata = p.get_metadata()
|
||||
|
||||
doc_id = OracleDocReader.generate_object_id(conn.username + "$" + file_path)
|
||||
metadata["_oid"] = doc_id
|
||||
metadata["_file"] = file_path
|
||||
|
||||
if text is None:
|
||||
return Document(page_content="", metadata=metadata)
|
||||
else:
|
||||
return Document(page_content=str(text.getvalue()), metadata=metadata)
|
||||
|
||||
except Exception as ex:
|
||||
logger.info(f"An exception occurred :: {ex}")
|
||||
logger.info(f"Skip processing {file_path}")
|
||||
cursor.close()
|
||||
return None
|
||||
|
||||
|
||||
"""OracleDocLoader class"""
|
||||
|
||||
|
||||
class OracleDocLoader(BaseLoader):
|
||||
"""Read documents using OracleDocLoader
|
||||
Args:
|
||||
conn: Oracle Connection,
|
||||
params: Loader parameters.
|
||||
"""
|
||||
|
||||
def __init__(self, conn: Connection, params: Dict[str, Any], **kwargs: Any):
|
||||
self.conn = conn
|
||||
self.params = json.loads(json.dumps(params))
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load data into LangChain Document objects..."""
|
||||
try:
|
||||
import oracledb
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Unable to import oracledb, please install with "
|
||||
"`pip install -U oracledb`."
|
||||
) from e
|
||||
|
||||
ncols = 0
|
||||
results: List[Document] = []
|
||||
metadata: Dict[str, Any] = {}
|
||||
m_params = {"plaintext": "false"}
|
||||
try:
|
||||
# extract the parameters
|
||||
if self.params is not None:
|
||||
self.file = self.params.get("file")
|
||||
self.dir = self.params.get("dir")
|
||||
self.owner = self.params.get("owner")
|
||||
self.tablename = self.params.get("tablename")
|
||||
self.colname = self.params.get("colname")
|
||||
else:
|
||||
raise Exception("Missing loader parameters")
|
||||
|
||||
oracledb.defaults.fetch_lobs = False
|
||||
|
||||
if self.file:
|
||||
doc = OracleDocReader.read_file(self.conn, self.file, m_params)
|
||||
|
||||
if doc is None:
|
||||
return results
|
||||
|
||||
results.append(doc)
|
||||
|
||||
if self.dir:
|
||||
skip_count = 0
|
||||
for file_name in os.listdir(self.dir):
|
||||
file_path = os.path.join(self.dir, file_name)
|
||||
if os.path.isfile(file_path):
|
||||
doc = OracleDocReader.read_file(self.conn, file_path, m_params)
|
||||
|
||||
if doc is None:
|
||||
skip_count = skip_count + 1
|
||||
logger.info(f"Total skipped: {skip_count}\n")
|
||||
else:
|
||||
results.append(doc)
|
||||
|
||||
if self.tablename:
|
||||
try:
|
||||
if self.owner is None or self.colname is None:
|
||||
raise Exception("Missing owner or column name or both.")
|
||||
|
||||
cursor = self.conn.cursor()
|
||||
self.mdata_cols = self.params.get("mdata_cols")
|
||||
if self.mdata_cols is not None:
|
||||
if len(self.mdata_cols) > 3:
|
||||
raise Exception(
|
||||
"Exceeds the max number of columns "
|
||||
+ "you can request for metadata."
|
||||
)
|
||||
|
||||
# execute a query to get column data types
|
||||
sql = (
|
||||
"select column_name, data_type from all_tab_columns "
|
||||
+ "where owner = :ownername and "
|
||||
+ "table_name = :tablename"
|
||||
)
|
||||
cursor.execute(
|
||||
sql,
|
||||
ownername=self.owner.upper(),
|
||||
tablename=self.tablename.upper(),
|
||||
)
|
||||
|
||||
# cursor.execute(sql)
|
||||
rows = cursor.fetchall()
|
||||
for row in rows:
|
||||
if row[0] in self.mdata_cols:
|
||||
if row[1] not in [
|
||||
"NUMBER",
|
||||
"BINARY_DOUBLE",
|
||||
"BINARY_FLOAT",
|
||||
"LONG",
|
||||
"DATE",
|
||||
"TIMESTAMP",
|
||||
"VARCHAR2",
|
||||
]:
|
||||
raise Exception(
|
||||
"The datatype for the column requested "
|
||||
+ "for metadata is not supported."
|
||||
)
|
||||
|
||||
self.mdata_cols_sql = ", rowid"
|
||||
if self.mdata_cols is not None:
|
||||
for col in self.mdata_cols:
|
||||
self.mdata_cols_sql = self.mdata_cols_sql + ", " + col
|
||||
|
||||
# [TODO] use bind variables
|
||||
sql = (
|
||||
"select dbms_vector_chain.utl_to_text(t."
|
||||
+ self.colname
|
||||
+ ", json('"
|
||||
+ json.dumps(m_params)
|
||||
+ "')) mdata, dbms_vector_chain.utl_to_text(t."
|
||||
+ self.colname
|
||||
+ ") text"
|
||||
+ self.mdata_cols_sql
|
||||
+ " from "
|
||||
+ self.owner
|
||||
+ "."
|
||||
+ self.tablename
|
||||
+ " t"
|
||||
)
|
||||
|
||||
cursor.execute(sql)
|
||||
for row in cursor:
|
||||
metadata = {}
|
||||
|
||||
if row is None:
|
||||
doc_id = OracleDocReader.generate_object_id(
|
||||
self.conn.username
|
||||
+ "$"
|
||||
+ self.owner
|
||||
+ "$"
|
||||
+ self.tablename
|
||||
+ "$"
|
||||
+ self.colname
|
||||
)
|
||||
metadata["_oid"] = doc_id
|
||||
results.append(Document(page_content="", metadata=metadata))
|
||||
else:
|
||||
if row[0] is not None:
|
||||
data = str(row[0])
|
||||
if data.startswith("<!DOCTYPE html") or data.startswith(
|
||||
"<HTML>"
|
||||
):
|
||||
p = ParseOracleDocMetadata()
|
||||
p.feed(data)
|
||||
metadata = p.get_metadata()
|
||||
|
||||
doc_id = OracleDocReader.generate_object_id(
|
||||
self.conn.username
|
||||
+ "$"
|
||||
+ self.owner
|
||||
+ "$"
|
||||
+ self.tablename
|
||||
+ "$"
|
||||
+ self.colname
|
||||
+ "$"
|
||||
+ str(row[2])
|
||||
)
|
||||
metadata["_oid"] = doc_id
|
||||
metadata["_rowid"] = row[2]
|
||||
|
||||
# process projected metadata cols
|
||||
if self.mdata_cols is not None:
|
||||
ncols = len(self.mdata_cols)
|
||||
|
||||
for i in range(0, ncols):
|
||||
metadata[self.mdata_cols[i]] = row[i + 2]
|
||||
|
||||
if row[1] is None:
|
||||
results.append(
|
||||
Document(page_content="", metadata=metadata)
|
||||
)
|
||||
else:
|
||||
results.append(
|
||||
Document(
|
||||
page_content=str(row[1]), metadata=metadata
|
||||
)
|
||||
)
|
||||
except Exception as ex:
|
||||
logger.info(f"An exception occurred :: {ex}")
|
||||
traceback.print_exc()
|
||||
cursor.close()
|
||||
raise
|
||||
|
||||
return results
|
||||
except Exception as ex:
|
||||
logger.info(f"An exception occurred :: {ex}")
|
||||
traceback.print_exc()
|
||||
raise
|
||||
|
||||
|
||||
class OracleTextSplitter(TextSplitter):
|
||||
"""Splitting text using Oracle chunker."""
|
||||
|
||||
def __init__(self, conn: Connection, params: Dict[str, Any], **kwargs: Any) -> None:
|
||||
"""Initialize."""
|
||||
self.conn = conn
|
||||
self.params = params
|
||||
super().__init__(**kwargs)
|
||||
try:
|
||||
import json
|
||||
|
||||
try:
|
||||
import oracledb
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Unable to import oracledb, please install with "
|
||||
"`pip install -U oracledb`."
|
||||
) from e
|
||||
|
||||
self._oracledb = oracledb
|
||||
self._json = json
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"oracledb or json or both are not installed. "
|
||||
+ "Please install them. "
|
||||
+ "Recommendations: `pip install oracledb`. "
|
||||
)
|
||||
|
||||
def split_text(self, text: str) -> List[str]:
|
||||
"""Split incoming text and return chunks."""
|
||||
|
||||
try:
|
||||
import oracledb
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Unable to import oracledb, please install with "
|
||||
"`pip install -U oracledb`."
|
||||
) from e
|
||||
|
||||
splits = []
|
||||
|
||||
try:
|
||||
# returns strings or bytes instead of a locator
|
||||
self._oracledb.defaults.fetch_lobs = False
|
||||
|
||||
cursor = self.conn.cursor()
|
||||
|
||||
cursor.setinputsizes(content=oracledb.CLOB)
|
||||
cursor.execute(
|
||||
"select t.column_value from "
|
||||
+ "dbms_vector_chain.utl_to_chunks(:content, json(:params)) t",
|
||||
content=text,
|
||||
params=self._json.dumps(self.params),
|
||||
)
|
||||
|
||||
while True:
|
||||
row = cursor.fetchone()
|
||||
if row is None:
|
||||
break
|
||||
d = self._json.loads(row[0])
|
||||
splits.append(d["chunk_data"])
|
||||
|
||||
return splits
|
||||
|
||||
except Exception as ex:
|
||||
logger.info(f"An exception occurred :: {ex}")
|
||||
traceback.print_exc()
|
||||
raise
|
||||
@@ -169,6 +169,9 @@ if TYPE_CHECKING:
|
||||
from langchain_community.embeddings.optimum_intel import (
|
||||
QuantizedBiEncoderEmbeddings,
|
||||
)
|
||||
from langchain_community.embeddings.oracleai import (
|
||||
OracleEmbeddings, # noqa: F401
|
||||
)
|
||||
from langchain_community.embeddings.premai import (
|
||||
PremAIEmbeddings,
|
||||
)
|
||||
@@ -267,6 +270,7 @@ __all__ = [
|
||||
"OpenAIEmbeddings",
|
||||
"OpenVINOBgeEmbeddings",
|
||||
"OpenVINOEmbeddings",
|
||||
"OracleEmbeddings",
|
||||
"PremAIEmbeddings",
|
||||
"QianfanEmbeddingsEndpoint",
|
||||
"QuantizedBgeEmbeddings",
|
||||
@@ -344,6 +348,7 @@ _module_lookup = {
|
||||
"QianfanEmbeddingsEndpoint": "langchain_community.embeddings.baidu_qianfan_endpoint", # noqa: E501
|
||||
"QuantizedBgeEmbeddings": "langchain_community.embeddings.itrex",
|
||||
"QuantizedBiEncoderEmbeddings": "langchain_community.embeddings.optimum_intel",
|
||||
"OracleEmbeddings": "langchain_community.embeddings.oracleai",
|
||||
"SagemakerEndpointEmbeddings": "langchain_community.embeddings.sagemaker_endpoint",
|
||||
"SelfHostedEmbeddings": "langchain_community.embeddings.self_hosted",
|
||||
"SelfHostedHuggingFaceEmbeddings": "langchain_community.embeddings.self_hosted_hugging_face", # noqa: E501
|
||||
|
||||
182
libs/community/langchain_community/embeddings/oracleai.py
Normal file
182
libs/community/langchain_community/embeddings/oracleai.py
Normal file
@@ -0,0 +1,182 @@
|
||||
# Authors:
|
||||
# Harichandan Roy (hroy)
|
||||
# David Jiang (ddjiang)
|
||||
#
|
||||
# -----------------------------------------------------------------------------
|
||||
# oracleai.py
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import traceback
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
||||
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.pydantic_v1 import BaseModel, Extra
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from oracledb import Connection
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
"""OracleEmbeddings class"""
|
||||
|
||||
|
||||
class OracleEmbeddings(BaseModel, Embeddings):
|
||||
"""Get Embeddings"""
|
||||
|
||||
"""Oracle Connection"""
|
||||
conn: Any
|
||||
"""Embedding Parameters"""
|
||||
params: Dict[str, Any]
|
||||
"""Proxy"""
|
||||
proxy: Optional[str] = None
|
||||
|
||||
def __init__(self, **kwargs: Any):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
extra = Extra.forbid
|
||||
|
||||
"""
|
||||
1 - user needs to have create procedure,
|
||||
create mining model, create any directory privilege.
|
||||
2 - grant create procedure, create mining model,
|
||||
create any directory to <user>;
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def load_onnx_model(
|
||||
conn: Connection, dir: str, onnx_file: str, model_name: str
|
||||
) -> None:
|
||||
"""Load an ONNX model to Oracle Database.
|
||||
Args:
|
||||
conn: Oracle Connection,
|
||||
dir: Oracle Directory,
|
||||
onnx_file: ONNX file name,
|
||||
model_name: Name of the model.
|
||||
"""
|
||||
|
||||
try:
|
||||
if conn is None or dir is None or onnx_file is None or model_name is None:
|
||||
raise Exception("Invalid input")
|
||||
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"""
|
||||
begin
|
||||
dbms_data_mining.drop_model(model_name => :model, force => true);
|
||||
SYS.DBMS_VECTOR.load_onnx_model(:path, :filename, :model,
|
||||
json('{"function" : "embedding",
|
||||
"embeddingOutput" : "embedding",
|
||||
"input": {"input": ["DATA"]}}'));
|
||||
end;""",
|
||||
path=dir,
|
||||
filename=onnx_file,
|
||||
model=model_name,
|
||||
)
|
||||
|
||||
cursor.close()
|
||||
|
||||
except Exception as ex:
|
||||
logger.info(f"An exception occurred :: {ex}")
|
||||
traceback.print_exc()
|
||||
cursor.close()
|
||||
raise
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
"""Compute doc embeddings using an OracleEmbeddings.
|
||||
Args:
|
||||
texts: The list of texts to embed.
|
||||
Returns:
|
||||
List of embeddings, one for each input text.
|
||||
"""
|
||||
|
||||
try:
|
||||
import oracledb
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Unable to import oracledb, please install with "
|
||||
"`pip install -U oracledb`."
|
||||
) from e
|
||||
|
||||
if texts is None:
|
||||
return None
|
||||
|
||||
embeddings: List[List[float]] = []
|
||||
try:
|
||||
# returns strings or bytes instead of a locator
|
||||
oracledb.defaults.fetch_lobs = False
|
||||
cursor = self.conn.cursor()
|
||||
|
||||
if self.proxy:
|
||||
cursor.execute(
|
||||
"begin utl_http.set_proxy(:proxy); end;", proxy=self.proxy
|
||||
)
|
||||
|
||||
for text in texts:
|
||||
cursor.execute(
|
||||
"select t.* "
|
||||
+ "from dbms_vector_chain.utl_to_embeddings(:content, "
|
||||
+ "json(:params)) t",
|
||||
content=text,
|
||||
params=json.dumps(self.params),
|
||||
)
|
||||
|
||||
for row in cursor:
|
||||
if row is None:
|
||||
embeddings.append([])
|
||||
else:
|
||||
rdata = json.loads(row[0])
|
||||
# dereference string as array
|
||||
vec = json.loads(rdata["embed_vector"])
|
||||
embeddings.append(vec)
|
||||
|
||||
cursor.close()
|
||||
return embeddings
|
||||
except Exception as ex:
|
||||
logger.info(f"An exception occurred :: {ex}")
|
||||
traceback.print_exc()
|
||||
cursor.close()
|
||||
raise
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
"""Compute query embedding using an OracleEmbeddings.
|
||||
Args:
|
||||
text: The text to embed.
|
||||
Returns:
|
||||
Embedding for the text.
|
||||
"""
|
||||
return self.embed_documents([text])[0]
|
||||
|
||||
|
||||
# uncomment the following code block to run the test
|
||||
|
||||
"""
|
||||
# A sample unit test.
|
||||
|
||||
''' get the Oracle connection '''
|
||||
conn = oracledb.connect(
|
||||
user="",
|
||||
password="",
|
||||
dsn="")
|
||||
print("Oracle connection is established...")
|
||||
|
||||
''' params '''
|
||||
embedder_params = {"provider":"database", "model":"demo_model"}
|
||||
proxy = ""
|
||||
|
||||
''' instance '''
|
||||
embedder = OracleEmbeddings(conn=conn, params=embedder_params, proxy=proxy)
|
||||
|
||||
embed = embedder.embed_query("Hello World!")
|
||||
print(f"Embedding generated by OracleEmbeddings: {embed}")
|
||||
|
||||
conn.close()
|
||||
print("Connection is closed.")
|
||||
|
||||
"""
|
||||
@@ -1,70 +0,0 @@
|
||||
"""Logic for converting internal query language to a valid AstraDB query."""
|
||||
from typing import Dict, Tuple, Union
|
||||
|
||||
from langchain_core.structured_query import (
|
||||
Comparator,
|
||||
Comparison,
|
||||
Operation,
|
||||
Operator,
|
||||
StructuredQuery,
|
||||
Visitor,
|
||||
)
|
||||
|
||||
MULTIPLE_ARITY_COMPARATORS = [Comparator.IN, Comparator.NIN]
|
||||
|
||||
|
||||
class AstraDBTranslator(Visitor):
|
||||
"""Translate AstraDB internal query language elements to valid filters."""
|
||||
|
||||
"""Subset of allowed logical comparators."""
|
||||
allowed_comparators = [
|
||||
Comparator.EQ,
|
||||
Comparator.NE,
|
||||
Comparator.GT,
|
||||
Comparator.GTE,
|
||||
Comparator.LT,
|
||||
Comparator.LTE,
|
||||
Comparator.IN,
|
||||
Comparator.NIN,
|
||||
]
|
||||
|
||||
"""Subset of allowed logical operators."""
|
||||
allowed_operators = [Operator.AND, Operator.OR]
|
||||
|
||||
def _format_func(self, func: Union[Operator, Comparator]) -> str:
|
||||
self._validate_func(func)
|
||||
map_dict = {
|
||||
Operator.AND: "$and",
|
||||
Operator.OR: "$or",
|
||||
Comparator.EQ: "$eq",
|
||||
Comparator.NE: "$ne",
|
||||
Comparator.GTE: "$gte",
|
||||
Comparator.LTE: "$lte",
|
||||
Comparator.LT: "$lt",
|
||||
Comparator.GT: "$gt",
|
||||
Comparator.IN: "$in",
|
||||
Comparator.NIN: "$nin",
|
||||
}
|
||||
return map_dict[func]
|
||||
|
||||
def visit_operation(self, operation: Operation) -> Dict:
|
||||
args = [arg.accept(self) for arg in operation.arguments]
|
||||
return {self._format_func(operation.operator): args}
|
||||
|
||||
def visit_comparison(self, comparison: Comparison) -> Dict:
|
||||
if comparison.comparator in MULTIPLE_ARITY_COMPARATORS and not isinstance(
|
||||
comparison.value, list
|
||||
):
|
||||
comparison.value = [comparison.value]
|
||||
|
||||
comparator = self._format_func(comparison.comparator)
|
||||
return {comparison.attribute: {comparator: comparison.value}}
|
||||
|
||||
def visit_structured_query(
|
||||
self, structured_query: StructuredQuery
|
||||
) -> Tuple[str, dict]:
|
||||
if structured_query.filter is None:
|
||||
kwargs = {}
|
||||
else:
|
||||
kwargs = {"filter": structured_query.filter.accept(self)}
|
||||
return structured_query.query, kwargs
|
||||
@@ -1,50 +0,0 @@
|
||||
from typing import Dict, Tuple, Union
|
||||
|
||||
from langchain_core.structured_query import (
|
||||
Comparator,
|
||||
Comparison,
|
||||
Operation,
|
||||
Operator,
|
||||
StructuredQuery,
|
||||
Visitor,
|
||||
)
|
||||
|
||||
|
||||
class ChromaTranslator(Visitor):
|
||||
"""Translate `Chroma` internal query language elements to valid filters."""
|
||||
|
||||
allowed_operators = [Operator.AND, Operator.OR]
|
||||
"""Subset of allowed logical operators."""
|
||||
allowed_comparators = [
|
||||
Comparator.EQ,
|
||||
Comparator.NE,
|
||||
Comparator.GT,
|
||||
Comparator.GTE,
|
||||
Comparator.LT,
|
||||
Comparator.LTE,
|
||||
]
|
||||
"""Subset of allowed logical comparators."""
|
||||
|
||||
def _format_func(self, func: Union[Operator, Comparator]) -> str:
|
||||
self._validate_func(func)
|
||||
return f"${func.value}"
|
||||
|
||||
def visit_operation(self, operation: Operation) -> Dict:
|
||||
args = [arg.accept(self) for arg in operation.arguments]
|
||||
return {self._format_func(operation.operator): args}
|
||||
|
||||
def visit_comparison(self, comparison: Comparison) -> Dict:
|
||||
return {
|
||||
comparison.attribute: {
|
||||
self._format_func(comparison.comparator): comparison.value
|
||||
}
|
||||
}
|
||||
|
||||
def visit_structured_query(
|
||||
self, structured_query: StructuredQuery
|
||||
) -> Tuple[str, dict]:
|
||||
if structured_query.filter is None:
|
||||
kwargs = {}
|
||||
else:
|
||||
kwargs = {"filter": structured_query.filter.accept(self)}
|
||||
return structured_query.query, kwargs
|
||||
@@ -1,64 +0,0 @@
|
||||
"""Logic for converting internal query language to a valid DashVector query."""
|
||||
from typing import Tuple, Union
|
||||
|
||||
from langchain_core.structured_query import (
|
||||
Comparator,
|
||||
Comparison,
|
||||
Operation,
|
||||
Operator,
|
||||
StructuredQuery,
|
||||
Visitor,
|
||||
)
|
||||
|
||||
|
||||
class DashvectorTranslator(Visitor):
|
||||
"""Logic for converting internal query language elements to valid filters."""
|
||||
|
||||
allowed_operators = [Operator.AND, Operator.OR]
|
||||
allowed_comparators = [
|
||||
Comparator.EQ,
|
||||
Comparator.GT,
|
||||
Comparator.GTE,
|
||||
Comparator.LT,
|
||||
Comparator.LTE,
|
||||
Comparator.LIKE,
|
||||
]
|
||||
|
||||
map_dict = {
|
||||
Operator.AND: " AND ",
|
||||
Operator.OR: " OR ",
|
||||
Comparator.EQ: " = ",
|
||||
Comparator.GT: " > ",
|
||||
Comparator.GTE: " >= ",
|
||||
Comparator.LT: " < ",
|
||||
Comparator.LTE: " <= ",
|
||||
Comparator.LIKE: " LIKE ",
|
||||
}
|
||||
|
||||
def _format_func(self, func: Union[Operator, Comparator]) -> str:
|
||||
self._validate_func(func)
|
||||
return self.map_dict[func]
|
||||
|
||||
def visit_operation(self, operation: Operation) -> str:
|
||||
args = [arg.accept(self) for arg in operation.arguments]
|
||||
return self._format_func(operation.operator).join(args)
|
||||
|
||||
def visit_comparison(self, comparison: Comparison) -> str:
|
||||
value = comparison.value
|
||||
if isinstance(value, str):
|
||||
if comparison.comparator == Comparator.LIKE:
|
||||
value = f"'%{value}%'"
|
||||
else:
|
||||
value = f"'{value}'"
|
||||
return (
|
||||
f"{comparison.attribute}{self._format_func(comparison.comparator)}{value}"
|
||||
)
|
||||
|
||||
def visit_structured_query(
|
||||
self, structured_query: StructuredQuery
|
||||
) -> Tuple[str, dict]:
|
||||
if structured_query.filter is None:
|
||||
kwargs = {}
|
||||
else:
|
||||
kwargs = {"filter": structured_query.filter.accept(self)}
|
||||
return structured_query.query, kwargs
|
||||
@@ -1,94 +0,0 @@
|
||||
from collections import ChainMap
|
||||
from itertools import chain
|
||||
from typing import Dict, Tuple
|
||||
|
||||
from langchain_core.structured_query import (
|
||||
Comparator,
|
||||
Comparison,
|
||||
Operation,
|
||||
Operator,
|
||||
StructuredQuery,
|
||||
Visitor,
|
||||
)
|
||||
|
||||
_COMPARATOR_TO_SYMBOL = {
|
||||
Comparator.EQ: "",
|
||||
Comparator.GT: " >",
|
||||
Comparator.GTE: " >=",
|
||||
Comparator.LT: " <",
|
||||
Comparator.LTE: " <=",
|
||||
Comparator.IN: "",
|
||||
Comparator.LIKE: " LIKE",
|
||||
}
|
||||
|
||||
|
||||
class DatabricksVectorSearchTranslator(Visitor):
|
||||
"""Translate `Databricks vector search` internal query language elements to
|
||||
valid filters."""
|
||||
|
||||
"""Subset of allowed logical operators."""
|
||||
allowed_operators = [Operator.AND, Operator.NOT, Operator.OR]
|
||||
|
||||
"""Subset of allowed logical comparators."""
|
||||
allowed_comparators = [
|
||||
Comparator.EQ,
|
||||
Comparator.GT,
|
||||
Comparator.GTE,
|
||||
Comparator.LT,
|
||||
Comparator.LTE,
|
||||
Comparator.IN,
|
||||
Comparator.LIKE,
|
||||
]
|
||||
|
||||
def _visit_and_operation(self, operation: Operation) -> Dict:
|
||||
return dict(ChainMap(*[arg.accept(self) for arg in operation.arguments]))
|
||||
|
||||
def _visit_or_operation(self, operation: Operation) -> Dict:
|
||||
filter_args = [arg.accept(self) for arg in operation.arguments]
|
||||
flattened_args = list(
|
||||
chain.from_iterable(filter_arg.items() for filter_arg in filter_args)
|
||||
)
|
||||
return {
|
||||
" OR ".join(key for key, _ in flattened_args): [
|
||||
value for _, value in flattened_args
|
||||
]
|
||||
}
|
||||
|
||||
def _visit_not_operation(self, operation: Operation) -> Dict:
|
||||
if len(operation.arguments) > 1:
|
||||
raise ValueError(
|
||||
f'"{operation.operator.value}" can have only one argument '
|
||||
f"in Databricks vector search"
|
||||
)
|
||||
filter_arg = operation.arguments[0].accept(self)
|
||||
return {
|
||||
f"{colum_with_bool_expression} NOT": value
|
||||
for colum_with_bool_expression, value in filter_arg.items()
|
||||
}
|
||||
|
||||
def visit_operation(self, operation: Operation) -> Dict:
|
||||
self._validate_func(operation.operator)
|
||||
if operation.operator == Operator.AND:
|
||||
return self._visit_and_operation(operation)
|
||||
elif operation.operator == Operator.OR:
|
||||
return self._visit_or_operation(operation)
|
||||
elif operation.operator == Operator.NOT:
|
||||
return self._visit_not_operation(operation)
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f'Operator "{operation.operator}" is not supported'
|
||||
)
|
||||
|
||||
def visit_comparison(self, comparison: Comparison) -> Dict:
|
||||
self._validate_func(comparison.comparator)
|
||||
comparator_symbol = _COMPARATOR_TO_SYMBOL[comparison.comparator]
|
||||
return {f"{comparison.attribute}{comparator_symbol}": comparison.value}
|
||||
|
||||
def visit_structured_query(
|
||||
self, structured_query: StructuredQuery
|
||||
) -> Tuple[str, dict]:
|
||||
if structured_query.filter is None:
|
||||
kwargs = {}
|
||||
else:
|
||||
kwargs = {"filters": structured_query.filter.accept(self)}
|
||||
return structured_query.query, kwargs
|
||||
@@ -1,88 +0,0 @@
|
||||
"""Logic for converting internal query language to a valid Chroma query."""
|
||||
from typing import Tuple, Union
|
||||
|
||||
from langchain_core.structured_query import (
|
||||
Comparator,
|
||||
Comparison,
|
||||
Operation,
|
||||
Operator,
|
||||
StructuredQuery,
|
||||
Visitor,
|
||||
)
|
||||
|
||||
COMPARATOR_TO_TQL = {
|
||||
Comparator.EQ: "==",
|
||||
Comparator.GT: ">",
|
||||
Comparator.GTE: ">=",
|
||||
Comparator.LT: "<",
|
||||
Comparator.LTE: "<=",
|
||||
}
|
||||
|
||||
|
||||
OPERATOR_TO_TQL = {
|
||||
Operator.AND: "and",
|
||||
Operator.OR: "or",
|
||||
Operator.NOT: "NOT",
|
||||
}
|
||||
|
||||
|
||||
def can_cast_to_float(string: str) -> bool:
|
||||
"""Check if a string can be cast to a float."""
|
||||
try:
|
||||
float(string)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
class DeepLakeTranslator(Visitor):
|
||||
"""Translate `DeepLake` internal query language elements to valid filters."""
|
||||
|
||||
allowed_operators = [Operator.AND, Operator.OR, Operator.NOT]
|
||||
"""Subset of allowed logical operators."""
|
||||
allowed_comparators = [
|
||||
Comparator.EQ,
|
||||
Comparator.GT,
|
||||
Comparator.GTE,
|
||||
Comparator.LT,
|
||||
Comparator.LTE,
|
||||
]
|
||||
"""Subset of allowed logical comparators."""
|
||||
|
||||
def _format_func(self, func: Union[Operator, Comparator]) -> str:
|
||||
self._validate_func(func)
|
||||
if isinstance(func, Operator):
|
||||
value = OPERATOR_TO_TQL[func.value] # type: ignore
|
||||
elif isinstance(func, Comparator):
|
||||
value = COMPARATOR_TO_TQL[func.value] # type: ignore
|
||||
return f"{value}"
|
||||
|
||||
def visit_operation(self, operation: Operation) -> str:
|
||||
args = [arg.accept(self) for arg in operation.arguments]
|
||||
operator = self._format_func(operation.operator)
|
||||
return "(" + (" " + operator + " ").join(args) + ")"
|
||||
|
||||
def visit_comparison(self, comparison: Comparison) -> str:
|
||||
comparator = self._format_func(comparison.comparator)
|
||||
values = comparison.value
|
||||
if isinstance(values, list):
|
||||
tql = []
|
||||
for value in values:
|
||||
comparison.value = value
|
||||
tql.append(self.visit_comparison(comparison))
|
||||
|
||||
return "(" + (" or ").join(tql) + ")"
|
||||
|
||||
if not can_cast_to_float(comparison.value):
|
||||
values = f"'{values}'"
|
||||
return f"metadata['{comparison.attribute}'] {comparator} {values}"
|
||||
|
||||
def visit_structured_query(
|
||||
self, structured_query: StructuredQuery
|
||||
) -> Tuple[str, dict]:
|
||||
if structured_query.filter is None:
|
||||
kwargs = {}
|
||||
else:
|
||||
tqL = f"SELECT * WHERE {structured_query.filter.accept(self)}"
|
||||
kwargs = {"tql": tqL}
|
||||
return structured_query.query, kwargs
|
||||
@@ -1,49 +0,0 @@
|
||||
from typing import Tuple, Union
|
||||
|
||||
from langchain_core.structured_query import (
|
||||
Comparator,
|
||||
Comparison,
|
||||
Operation,
|
||||
Operator,
|
||||
StructuredQuery,
|
||||
Visitor,
|
||||
)
|
||||
|
||||
|
||||
class DingoDBTranslator(Visitor):
|
||||
"""Translate `DingoDB` internal query language elements to valid filters."""
|
||||
|
||||
allowed_comparators = (
|
||||
Comparator.EQ,
|
||||
Comparator.NE,
|
||||
Comparator.LT,
|
||||
Comparator.LTE,
|
||||
Comparator.GT,
|
||||
Comparator.GTE,
|
||||
)
|
||||
"""Subset of allowed logical comparators."""
|
||||
allowed_operators = (Operator.AND, Operator.OR)
|
||||
"""Subset of allowed logical operators."""
|
||||
|
||||
def _format_func(self, func: Union[Operator, Comparator]) -> str:
|
||||
self._validate_func(func)
|
||||
return f"${func.value}"
|
||||
|
||||
def visit_operation(self, operation: Operation) -> Operation:
|
||||
return operation
|
||||
|
||||
def visit_comparison(self, comparison: Comparison) -> Comparison:
|
||||
return comparison
|
||||
|
||||
def visit_structured_query(
|
||||
self, structured_query: StructuredQuery
|
||||
) -> Tuple[str, dict]:
|
||||
if structured_query.filter is None:
|
||||
kwargs = {}
|
||||
else:
|
||||
kwargs = {
|
||||
"search_params": {
|
||||
"langchain_expr": structured_query.filter.accept(self)
|
||||
}
|
||||
}
|
||||
return structured_query.query, kwargs
|
||||
@@ -1,100 +0,0 @@
|
||||
from typing import Dict, Tuple, Union
|
||||
|
||||
from langchain_core.structured_query import (
|
||||
Comparator,
|
||||
Comparison,
|
||||
Operation,
|
||||
Operator,
|
||||
StructuredQuery,
|
||||
Visitor,
|
||||
)
|
||||
|
||||
|
||||
class ElasticsearchTranslator(Visitor):
|
||||
"""Translate `Elasticsearch` internal query language elements to valid filters."""
|
||||
|
||||
allowed_comparators = [
|
||||
Comparator.EQ,
|
||||
Comparator.GT,
|
||||
Comparator.GTE,
|
||||
Comparator.LT,
|
||||
Comparator.LTE,
|
||||
Comparator.CONTAIN,
|
||||
Comparator.LIKE,
|
||||
]
|
||||
"""Subset of allowed logical comparators."""
|
||||
|
||||
allowed_operators = [Operator.AND, Operator.OR, Operator.NOT]
|
||||
"""Subset of allowed logical operators."""
|
||||
|
||||
def _format_func(self, func: Union[Operator, Comparator]) -> str:
|
||||
self._validate_func(func)
|
||||
map_dict = {
|
||||
Operator.OR: "should",
|
||||
Operator.NOT: "must_not",
|
||||
Operator.AND: "must",
|
||||
Comparator.EQ: "term",
|
||||
Comparator.GT: "gt",
|
||||
Comparator.GTE: "gte",
|
||||
Comparator.LT: "lt",
|
||||
Comparator.LTE: "lte",
|
||||
Comparator.CONTAIN: "match",
|
||||
Comparator.LIKE: "match",
|
||||
}
|
||||
return map_dict[func]
|
||||
|
||||
def visit_operation(self, operation: Operation) -> Dict:
|
||||
args = [arg.accept(self) for arg in operation.arguments]
|
||||
|
||||
return {"bool": {self._format_func(operation.operator): args}}
|
||||
|
||||
def visit_comparison(self, comparison: Comparison) -> Dict:
|
||||
# ElasticsearchStore filters require to target
|
||||
# the metadata object field
|
||||
field = f"metadata.{comparison.attribute}"
|
||||
|
||||
is_range_comparator = comparison.comparator in [
|
||||
Comparator.GT,
|
||||
Comparator.GTE,
|
||||
Comparator.LT,
|
||||
Comparator.LTE,
|
||||
]
|
||||
|
||||
if is_range_comparator:
|
||||
value = comparison.value
|
||||
if isinstance(comparison.value, dict) and "date" in comparison.value:
|
||||
value = comparison.value["date"]
|
||||
return {"range": {field: {self._format_func(comparison.comparator): value}}}
|
||||
|
||||
if comparison.comparator == Comparator.CONTAIN:
|
||||
return {
|
||||
self._format_func(comparison.comparator): {
|
||||
field: {"query": comparison.value}
|
||||
}
|
||||
}
|
||||
|
||||
if comparison.comparator == Comparator.LIKE:
|
||||
return {
|
||||
self._format_func(comparison.comparator): {
|
||||
field: {"query": comparison.value, "fuzziness": "AUTO"}
|
||||
}
|
||||
}
|
||||
|
||||
# we assume that if the value is a string,
|
||||
# we want to use the keyword field
|
||||
field = f"{field}.keyword" if isinstance(comparison.value, str) else field
|
||||
|
||||
if isinstance(comparison.value, dict):
|
||||
if "date" in comparison.value:
|
||||
comparison.value = comparison.value["date"]
|
||||
|
||||
return {self._format_func(comparison.comparator): {field: comparison.value}}
|
||||
|
||||
def visit_structured_query(
|
||||
self, structured_query: StructuredQuery
|
||||
) -> Tuple[str, dict]:
|
||||
if structured_query.filter is None:
|
||||
kwargs = {}
|
||||
else:
|
||||
kwargs = {"filter": [structured_query.filter.accept(self)]}
|
||||
return structured_query.query, kwargs
|
||||
@@ -1,103 +0,0 @@
|
||||
"""Logic for converting internal query language to a valid Milvus query."""
|
||||
from typing import Tuple, Union
|
||||
|
||||
from langchain_core.structured_query import (
|
||||
Comparator,
|
||||
Comparison,
|
||||
Operation,
|
||||
Operator,
|
||||
StructuredQuery,
|
||||
Visitor,
|
||||
)
|
||||
|
||||
COMPARATOR_TO_BER = {
|
||||
Comparator.EQ: "==",
|
||||
Comparator.GT: ">",
|
||||
Comparator.GTE: ">=",
|
||||
Comparator.LT: "<",
|
||||
Comparator.LTE: "<=",
|
||||
Comparator.IN: "in",
|
||||
Comparator.LIKE: "like",
|
||||
}
|
||||
|
||||
UNARY_OPERATORS = [Operator.NOT]
|
||||
|
||||
|
||||
def process_value(value: Union[int, float, str], comparator: Comparator) -> str:
|
||||
"""Convert a value to a string and add double quotes if it is a string.
|
||||
|
||||
It required for comparators involving strings.
|
||||
|
||||
Args:
|
||||
value: The value to convert.
|
||||
comparator: The comparator.
|
||||
|
||||
Returns:
|
||||
The converted value as a string.
|
||||
"""
|
||||
#
|
||||
if isinstance(value, str):
|
||||
if comparator is Comparator.LIKE:
|
||||
# If the comparator is LIKE, add a percent sign after it for prefix matching
|
||||
# and add double quotes
|
||||
return f'"{value}%"'
|
||||
else:
|
||||
# If the value is already a string, add double quotes
|
||||
return f'"{value}"'
|
||||
else:
|
||||
# If the value is not a string, convert it to a string without double quotes
|
||||
return str(value)
|
||||
|
||||
|
||||
class MilvusTranslator(Visitor):
|
||||
"""Translate Milvus internal query language elements to valid filters."""
|
||||
|
||||
"""Subset of allowed logical operators."""
|
||||
allowed_operators = [Operator.AND, Operator.NOT, Operator.OR]
|
||||
|
||||
"""Subset of allowed logical comparators."""
|
||||
allowed_comparators = [
|
||||
Comparator.EQ,
|
||||
Comparator.GT,
|
||||
Comparator.GTE,
|
||||
Comparator.LT,
|
||||
Comparator.LTE,
|
||||
Comparator.IN,
|
||||
Comparator.LIKE,
|
||||
]
|
||||
|
||||
def _format_func(self, func: Union[Operator, Comparator]) -> str:
|
||||
self._validate_func(func)
|
||||
value = func.value
|
||||
if isinstance(func, Comparator):
|
||||
value = COMPARATOR_TO_BER[func]
|
||||
return f"{value}"
|
||||
|
||||
def visit_operation(self, operation: Operation) -> str:
|
||||
if operation.operator in UNARY_OPERATORS and len(operation.arguments) == 1:
|
||||
operator = self._format_func(operation.operator)
|
||||
return operator + "(" + operation.arguments[0].accept(self) + ")"
|
||||
elif operation.operator in UNARY_OPERATORS:
|
||||
raise ValueError(
|
||||
f'"{operation.operator.value}" can have only one argument in Milvus'
|
||||
)
|
||||
else:
|
||||
args = [arg.accept(self) for arg in operation.arguments]
|
||||
operator = self._format_func(operation.operator)
|
||||
return "(" + (" " + operator + " ").join(args) + ")"
|
||||
|
||||
def visit_comparison(self, comparison: Comparison) -> str:
|
||||
comparator = self._format_func(comparison.comparator)
|
||||
processed_value = process_value(comparison.value, comparison.comparator)
|
||||
attribute = comparison.attribute
|
||||
|
||||
return "( " + attribute + " " + comparator + " " + processed_value + " )"
|
||||
|
||||
def visit_structured_query(
|
||||
self, structured_query: StructuredQuery
|
||||
) -> Tuple[str, dict]:
|
||||
if structured_query.filter is None:
|
||||
kwargs = {}
|
||||
else:
|
||||
kwargs = {"expr": structured_query.filter.accept(self)}
|
||||
return structured_query.query, kwargs
|
||||
@@ -1,74 +0,0 @@
|
||||
"""Logic for converting internal query language to a valid MongoDB Atlas query."""
|
||||
from typing import Dict, Tuple, Union
|
||||
|
||||
from langchain_core.structured_query import (
|
||||
Comparator,
|
||||
Comparison,
|
||||
Operation,
|
||||
Operator,
|
||||
StructuredQuery,
|
||||
Visitor,
|
||||
)
|
||||
|
||||
MULTIPLE_ARITY_COMPARATORS = [Comparator.IN, Comparator.NIN]
|
||||
|
||||
|
||||
class MongoDBAtlasTranslator(Visitor):
|
||||
"""Translate Mongo internal query language elements to valid filters."""
|
||||
|
||||
"""Subset of allowed logical comparators."""
|
||||
allowed_comparators = [
|
||||
Comparator.EQ,
|
||||
Comparator.NE,
|
||||
Comparator.GT,
|
||||
Comparator.GTE,
|
||||
Comparator.LT,
|
||||
Comparator.LTE,
|
||||
Comparator.IN,
|
||||
Comparator.NIN,
|
||||
]
|
||||
|
||||
"""Subset of allowed logical operators."""
|
||||
allowed_operators = [Operator.AND, Operator.OR]
|
||||
|
||||
## Convert a operator or a comparator to Mongo Query Format
|
||||
def _format_func(self, func: Union[Operator, Comparator]) -> str:
|
||||
self._validate_func(func)
|
||||
map_dict = {
|
||||
Operator.AND: "$and",
|
||||
Operator.OR: "$or",
|
||||
Comparator.EQ: "$eq",
|
||||
Comparator.NE: "$ne",
|
||||
Comparator.GTE: "$gte",
|
||||
Comparator.LTE: "$lte",
|
||||
Comparator.LT: "$lt",
|
||||
Comparator.GT: "$gt",
|
||||
Comparator.IN: "$in",
|
||||
Comparator.NIN: "$nin",
|
||||
}
|
||||
return map_dict[func]
|
||||
|
||||
def visit_operation(self, operation: Operation) -> Dict:
|
||||
args = [arg.accept(self) for arg in operation.arguments]
|
||||
return {self._format_func(operation.operator): args}
|
||||
|
||||
def visit_comparison(self, comparison: Comparison) -> Dict:
|
||||
if comparison.comparator in MULTIPLE_ARITY_COMPARATORS and not isinstance(
|
||||
comparison.value, list
|
||||
):
|
||||
comparison.value = [comparison.value]
|
||||
|
||||
comparator = self._format_func(comparison.comparator)
|
||||
|
||||
attribute = comparison.attribute
|
||||
|
||||
return {attribute: {comparator: comparison.value}}
|
||||
|
||||
def visit_structured_query(
|
||||
self, structured_query: StructuredQuery
|
||||
) -> Tuple[str, dict]:
|
||||
if structured_query.filter is None:
|
||||
kwargs = {}
|
||||
else:
|
||||
kwargs = {"pre_filter": structured_query.filter.accept(self)}
|
||||
return structured_query.query, kwargs
|
||||
@@ -1,125 +0,0 @@
|
||||
import re
|
||||
from typing import Any, Callable, Dict, Tuple
|
||||
|
||||
from langchain_core.structured_query import (
|
||||
Comparator,
|
||||
Comparison,
|
||||
Operation,
|
||||
Operator,
|
||||
StructuredQuery,
|
||||
Visitor,
|
||||
)
|
||||
|
||||
|
||||
def _DEFAULT_COMPOSER(op_name: str) -> Callable:
|
||||
"""
|
||||
Default composer for logical operators.
|
||||
|
||||
Args:
|
||||
op_name: Name of the operator.
|
||||
|
||||
Returns:
|
||||
Callable that takes a list of arguments and returns a string.
|
||||
"""
|
||||
|
||||
def f(*args: Any) -> str:
|
||||
args_: map[str] = map(str, args)
|
||||
return f" {op_name} ".join(args_)
|
||||
|
||||
return f
|
||||
|
||||
|
||||
def _FUNCTION_COMPOSER(op_name: str) -> Callable:
|
||||
"""
|
||||
Composer for functions.
|
||||
|
||||
Args:
|
||||
op_name: Name of the function.
|
||||
|
||||
Returns:
|
||||
Callable that takes a list of arguments and returns a string.
|
||||
"""
|
||||
|
||||
def f(*args: Any) -> str:
|
||||
args_: map[str] = map(str, args)
|
||||
return f"{op_name}({','.join(args_)})"
|
||||
|
||||
return f
|
||||
|
||||
|
||||
class MyScaleTranslator(Visitor):
|
||||
"""Translate `MyScale` internal query language elements to valid filters."""
|
||||
|
||||
allowed_operators = [Operator.AND, Operator.OR, Operator.NOT]
|
||||
"""Subset of allowed logical operators."""
|
||||
|
||||
allowed_comparators = [
|
||||
Comparator.EQ,
|
||||
Comparator.GT,
|
||||
Comparator.GTE,
|
||||
Comparator.LT,
|
||||
Comparator.LTE,
|
||||
Comparator.CONTAIN,
|
||||
Comparator.LIKE,
|
||||
]
|
||||
|
||||
map_dict = {
|
||||
Operator.AND: _DEFAULT_COMPOSER("AND"),
|
||||
Operator.OR: _DEFAULT_COMPOSER("OR"),
|
||||
Operator.NOT: _DEFAULT_COMPOSER("NOT"),
|
||||
Comparator.EQ: _DEFAULT_COMPOSER("="),
|
||||
Comparator.GT: _DEFAULT_COMPOSER(">"),
|
||||
Comparator.GTE: _DEFAULT_COMPOSER(">="),
|
||||
Comparator.LT: _DEFAULT_COMPOSER("<"),
|
||||
Comparator.LTE: _DEFAULT_COMPOSER("<="),
|
||||
Comparator.CONTAIN: _FUNCTION_COMPOSER("has"),
|
||||
Comparator.LIKE: _DEFAULT_COMPOSER("ILIKE"),
|
||||
}
|
||||
|
||||
def __init__(self, metadata_key: str = "metadata") -> None:
|
||||
super().__init__()
|
||||
self.metadata_key = metadata_key
|
||||
|
||||
def visit_operation(self, operation: Operation) -> Dict:
|
||||
args = [arg.accept(self) for arg in operation.arguments]
|
||||
func = operation.operator
|
||||
self._validate_func(func)
|
||||
return self.map_dict[func](*args)
|
||||
|
||||
def visit_comparison(self, comparison: Comparison) -> Dict:
|
||||
regex = r"\((.*?)\)"
|
||||
matched = re.search(r"\(\w+\)", comparison.attribute)
|
||||
|
||||
# If arbitrary function is applied to an attribute
|
||||
if matched:
|
||||
attr = re.sub(
|
||||
regex,
|
||||
f"({self.metadata_key}.{matched.group(0)[1:-1]})",
|
||||
comparison.attribute,
|
||||
)
|
||||
else:
|
||||
attr = f"{self.metadata_key}.{comparison.attribute}"
|
||||
value = comparison.value
|
||||
comp = comparison.comparator
|
||||
|
||||
value = f"'{value}'" if isinstance(value, str) else value
|
||||
|
||||
# convert timestamp for datetime objects
|
||||
if isinstance(value, dict) and value.get("type") == "date":
|
||||
attr = f"parseDateTime32BestEffort({attr})"
|
||||
value = f"parseDateTime32BestEffort('{value['date']}')"
|
||||
|
||||
# string pattern match
|
||||
if comp is Comparator.LIKE:
|
||||
value = f"'%{value[1:-1]}%'"
|
||||
return self.map_dict[comp](attr, value)
|
||||
|
||||
def visit_structured_query(
|
||||
self, structured_query: StructuredQuery
|
||||
) -> Tuple[str, dict]:
|
||||
print(structured_query) # noqa: T201
|
||||
if structured_query.filter is None:
|
||||
kwargs = {}
|
||||
else:
|
||||
kwargs = {"where_str": structured_query.filter.accept(self)}
|
||||
return structured_query.query, kwargs
|
||||
@@ -1,104 +0,0 @@
|
||||
from typing import Dict, Tuple, Union
|
||||
|
||||
from langchain_core.structured_query import (
|
||||
Comparator,
|
||||
Comparison,
|
||||
Operation,
|
||||
Operator,
|
||||
StructuredQuery,
|
||||
Visitor,
|
||||
)
|
||||
|
||||
|
||||
class OpenSearchTranslator(Visitor):
|
||||
"""Translate `OpenSearch` internal query domain-specific
|
||||
language elements to valid filters."""
|
||||
|
||||
allowed_comparators = [
|
||||
Comparator.EQ,
|
||||
Comparator.LT,
|
||||
Comparator.LTE,
|
||||
Comparator.GT,
|
||||
Comparator.GTE,
|
||||
Comparator.CONTAIN,
|
||||
Comparator.LIKE,
|
||||
]
|
||||
"""Subset of allowed logical comparators."""
|
||||
|
||||
allowed_operators = [Operator.AND, Operator.OR, Operator.NOT]
|
||||
"""Subset of allowed logical operators."""
|
||||
|
||||
def _format_func(self, func: Union[Operator, Comparator]) -> str:
|
||||
self._validate_func(func)
|
||||
comp_operator_map = {
|
||||
Comparator.EQ: "term",
|
||||
Comparator.LT: "lt",
|
||||
Comparator.LTE: "lte",
|
||||
Comparator.GT: "gt",
|
||||
Comparator.GTE: "gte",
|
||||
Comparator.CONTAIN: "match",
|
||||
Comparator.LIKE: "fuzzy",
|
||||
Operator.AND: "must",
|
||||
Operator.OR: "should",
|
||||
Operator.NOT: "must_not",
|
||||
}
|
||||
return comp_operator_map[func]
|
||||
|
||||
def visit_operation(self, operation: Operation) -> Dict:
|
||||
args = [arg.accept(self) for arg in operation.arguments]
|
||||
|
||||
return {"bool": {self._format_func(operation.operator): args}}
|
||||
|
||||
def visit_comparison(self, comparison: Comparison) -> Dict:
|
||||
field = f"metadata.{comparison.attribute}"
|
||||
|
||||
if comparison.comparator in [
|
||||
Comparator.LT,
|
||||
Comparator.LTE,
|
||||
Comparator.GT,
|
||||
Comparator.GTE,
|
||||
]:
|
||||
if isinstance(comparison.value, dict):
|
||||
if "date" in comparison.value:
|
||||
return {
|
||||
"range": {
|
||||
field: {
|
||||
self._format_func(
|
||||
comparison.comparator
|
||||
): comparison.value["date"]
|
||||
}
|
||||
}
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"range": {
|
||||
field: {
|
||||
self._format_func(comparison.comparator): comparison.value
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if comparison.comparator == Comparator.LIKE:
|
||||
return {
|
||||
self._format_func(comparison.comparator): {
|
||||
field: {"value": comparison.value}
|
||||
}
|
||||
}
|
||||
|
||||
field = f"{field}.keyword" if isinstance(comparison.value, str) else field
|
||||
|
||||
if isinstance(comparison.value, dict):
|
||||
if "date" in comparison.value:
|
||||
comparison.value = comparison.value["date"]
|
||||
|
||||
return {self._format_func(comparison.comparator): {field: comparison.value}}
|
||||
|
||||
def visit_structured_query(
|
||||
self, structured_query: StructuredQuery
|
||||
) -> Tuple[str, dict]:
|
||||
if structured_query.filter is None:
|
||||
kwargs = {}
|
||||
else:
|
||||
kwargs = {"filter": structured_query.filter.accept(self)}
|
||||
|
||||
return structured_query.query, kwargs
|
||||
@@ -1,52 +0,0 @@
|
||||
from typing import Dict, Tuple, Union
|
||||
|
||||
from langchain_core.structured_query import (
|
||||
Comparator,
|
||||
Comparison,
|
||||
Operation,
|
||||
Operator,
|
||||
StructuredQuery,
|
||||
Visitor,
|
||||
)
|
||||
|
||||
|
||||
class PGVectorTranslator(Visitor):
|
||||
"""Translate `PGVector` internal query language elements to valid filters."""
|
||||
|
||||
allowed_operators = [Operator.AND, Operator.OR]
|
||||
"""Subset of allowed logical operators."""
|
||||
allowed_comparators = [
|
||||
Comparator.EQ,
|
||||
Comparator.NE,
|
||||
Comparator.GT,
|
||||
Comparator.LT,
|
||||
Comparator.IN,
|
||||
Comparator.NIN,
|
||||
Comparator.CONTAIN,
|
||||
Comparator.LIKE,
|
||||
]
|
||||
"""Subset of allowed logical comparators."""
|
||||
|
||||
def _format_func(self, func: Union[Operator, Comparator]) -> str:
|
||||
self._validate_func(func)
|
||||
return f"{func.value}"
|
||||
|
||||
def visit_operation(self, operation: Operation) -> Dict:
|
||||
args = [arg.accept(self) for arg in operation.arguments]
|
||||
return {self._format_func(operation.operator): args}
|
||||
|
||||
def visit_comparison(self, comparison: Comparison) -> Dict:
|
||||
return {
|
||||
comparison.attribute: {
|
||||
self._format_func(comparison.comparator): comparison.value
|
||||
}
|
||||
}
|
||||
|
||||
def visit_structured_query(
|
||||
self, structured_query: StructuredQuery
|
||||
) -> Tuple[str, dict]:
|
||||
if structured_query.filter is None:
|
||||
kwargs = {}
|
||||
else:
|
||||
kwargs = {"filter": structured_query.filter.accept(self)}
|
||||
return structured_query.query, kwargs
|
||||
@@ -1,57 +0,0 @@
|
||||
from typing import Dict, Tuple, Union
|
||||
|
||||
from langchain_core.structured_query import (
|
||||
Comparator,
|
||||
Comparison,
|
||||
Operation,
|
||||
Operator,
|
||||
StructuredQuery,
|
||||
Visitor,
|
||||
)
|
||||
|
||||
|
||||
class PineconeTranslator(Visitor):
|
||||
"""Translate `Pinecone` internal query language elements to valid filters."""
|
||||
|
||||
allowed_comparators = (
|
||||
Comparator.EQ,
|
||||
Comparator.NE,
|
||||
Comparator.LT,
|
||||
Comparator.LTE,
|
||||
Comparator.GT,
|
||||
Comparator.GTE,
|
||||
Comparator.IN,
|
||||
Comparator.NIN,
|
||||
)
|
||||
"""Subset of allowed logical comparators."""
|
||||
allowed_operators = (Operator.AND, Operator.OR)
|
||||
"""Subset of allowed logical operators."""
|
||||
|
||||
def _format_func(self, func: Union[Operator, Comparator]) -> str:
|
||||
self._validate_func(func)
|
||||
return f"${func.value}"
|
||||
|
||||
def visit_operation(self, operation: Operation) -> Dict:
|
||||
args = [arg.accept(self) for arg in operation.arguments]
|
||||
return {self._format_func(operation.operator): args}
|
||||
|
||||
def visit_comparison(self, comparison: Comparison) -> Dict:
|
||||
if comparison.comparator in (Comparator.IN, Comparator.NIN) and not isinstance(
|
||||
comparison.value, list
|
||||
):
|
||||
comparison.value = [comparison.value]
|
||||
|
||||
return {
|
||||
comparison.attribute: {
|
||||
self._format_func(comparison.comparator): comparison.value
|
||||
}
|
||||
}
|
||||
|
||||
def visit_structured_query(
|
||||
self, structured_query: StructuredQuery
|
||||
) -> Tuple[str, dict]:
|
||||
if structured_query.filter is None:
|
||||
kwargs = {}
|
||||
else:
|
||||
kwargs = {"filter": structured_query.filter.accept(self)}
|
||||
return structured_query.query, kwargs
|
||||
@@ -1,98 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Tuple
|
||||
|
||||
from langchain_core.structured_query import (
|
||||
Comparator,
|
||||
Comparison,
|
||||
Operation,
|
||||
Operator,
|
||||
StructuredQuery,
|
||||
Visitor,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
|
||||
class QdrantTranslator(Visitor):
|
||||
"""Translate `Qdrant` internal query language elements to valid filters."""
|
||||
|
||||
allowed_operators = (
|
||||
Operator.AND,
|
||||
Operator.OR,
|
||||
Operator.NOT,
|
||||
)
|
||||
"""Subset of allowed logical operators."""
|
||||
|
||||
allowed_comparators = (
|
||||
Comparator.EQ,
|
||||
Comparator.LT,
|
||||
Comparator.LTE,
|
||||
Comparator.GT,
|
||||
Comparator.GTE,
|
||||
Comparator.LIKE,
|
||||
)
|
||||
"""Subset of allowed logical comparators."""
|
||||
|
||||
def __init__(self, metadata_key: str):
|
||||
self.metadata_key = metadata_key
|
||||
|
||||
def visit_operation(self, operation: Operation) -> rest.Filter:
|
||||
try:
|
||||
from qdrant_client.http import models as rest
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Cannot import qdrant_client. Please install with `pip install "
|
||||
"qdrant-client`."
|
||||
) from e
|
||||
|
||||
args = [arg.accept(self) for arg in operation.arguments]
|
||||
operator = {
|
||||
Operator.AND: "must",
|
||||
Operator.OR: "should",
|
||||
Operator.NOT: "must_not",
|
||||
}[operation.operator]
|
||||
return rest.Filter(**{operator: args})
|
||||
|
||||
def visit_comparison(self, comparison: Comparison) -> rest.FieldCondition:
|
||||
try:
|
||||
from qdrant_client.http import models as rest
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Cannot import qdrant_client. Please install with `pip install "
|
||||
"qdrant-client`."
|
||||
) from e
|
||||
|
||||
self._validate_func(comparison.comparator)
|
||||
attribute = self.metadata_key + "." + comparison.attribute
|
||||
if comparison.comparator == Comparator.EQ:
|
||||
return rest.FieldCondition(
|
||||
key=attribute, match=rest.MatchValue(value=comparison.value)
|
||||
)
|
||||
if comparison.comparator == Comparator.LIKE:
|
||||
return rest.FieldCondition(
|
||||
key=attribute, match=rest.MatchText(text=comparison.value)
|
||||
)
|
||||
kwargs = {comparison.comparator.value: comparison.value}
|
||||
return rest.FieldCondition(key=attribute, range=rest.Range(**kwargs))
|
||||
|
||||
def visit_structured_query(
|
||||
self, structured_query: StructuredQuery
|
||||
) -> Tuple[str, dict]:
|
||||
try:
|
||||
from qdrant_client.http import models as rest
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Cannot import qdrant_client. Please install with `pip install "
|
||||
"qdrant-client`."
|
||||
) from e
|
||||
|
||||
if structured_query.filter is None:
|
||||
kwargs = {}
|
||||
else:
|
||||
filter = structured_query.filter.accept(self)
|
||||
if isinstance(filter, rest.FieldCondition):
|
||||
filter = rest.Filter(must=[filter])
|
||||
kwargs = {"filter": filter}
|
||||
return structured_query.query, kwargs
|
||||
@@ -1,103 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Tuple
|
||||
|
||||
from langchain_core.structured_query import (
|
||||
Comparator,
|
||||
Comparison,
|
||||
Operation,
|
||||
Operator,
|
||||
StructuredQuery,
|
||||
Visitor,
|
||||
)
|
||||
|
||||
from langchain_community.vectorstores.redis import Redis
|
||||
from langchain_community.vectorstores.redis.filters import (
|
||||
RedisFilterExpression,
|
||||
RedisFilterField,
|
||||
RedisFilterOperator,
|
||||
RedisNum,
|
||||
RedisTag,
|
||||
RedisText,
|
||||
)
|
||||
from langchain_community.vectorstores.redis.schema import RedisModel
|
||||
|
||||
_COMPARATOR_TO_BUILTIN_METHOD = {
|
||||
Comparator.EQ: "__eq__",
|
||||
Comparator.NE: "__ne__",
|
||||
Comparator.LT: "__lt__",
|
||||
Comparator.GT: "__gt__",
|
||||
Comparator.LTE: "__le__",
|
||||
Comparator.GTE: "__ge__",
|
||||
Comparator.CONTAIN: "__eq__",
|
||||
Comparator.LIKE: "__mod__",
|
||||
}
|
||||
|
||||
|
||||
class RedisTranslator(Visitor):
|
||||
"""Visitor for translating structured queries to Redis filter expressions."""
|
||||
|
||||
allowed_comparators = (
|
||||
Comparator.EQ,
|
||||
Comparator.NE,
|
||||
Comparator.LT,
|
||||
Comparator.LTE,
|
||||
Comparator.GT,
|
||||
Comparator.GTE,
|
||||
Comparator.CONTAIN,
|
||||
Comparator.LIKE,
|
||||
)
|
||||
"""Subset of allowed logical comparators."""
|
||||
allowed_operators = (Operator.AND, Operator.OR)
|
||||
"""Subset of allowed logical operators."""
|
||||
|
||||
def __init__(self, schema: RedisModel) -> None:
|
||||
self._schema = schema
|
||||
|
||||
def _attribute_to_filter_field(self, attribute: str) -> RedisFilterField:
|
||||
if attribute in [tf.name for tf in self._schema.text]:
|
||||
return RedisText(attribute)
|
||||
elif attribute in [tf.name for tf in self._schema.tag or []]:
|
||||
return RedisTag(attribute)
|
||||
elif attribute in [tf.name for tf in self._schema.numeric or []]:
|
||||
return RedisNum(attribute)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Invalid attribute {attribute} not in vector store schema. Schema is:"
|
||||
f"\n{self._schema.as_dict()}"
|
||||
)
|
||||
|
||||
def visit_comparison(self, comparison: Comparison) -> RedisFilterExpression:
|
||||
filter_field = self._attribute_to_filter_field(comparison.attribute)
|
||||
comparison_method = _COMPARATOR_TO_BUILTIN_METHOD[comparison.comparator]
|
||||
return getattr(filter_field, comparison_method)(comparison.value)
|
||||
|
||||
def visit_operation(self, operation: Operation) -> Any:
|
||||
left = operation.arguments[0].accept(self)
|
||||
if len(operation.arguments) > 2:
|
||||
right = self.visit_operation(
|
||||
Operation(
|
||||
operator=operation.operator, arguments=operation.arguments[1:]
|
||||
)
|
||||
)
|
||||
else:
|
||||
right = operation.arguments[1].accept(self)
|
||||
redis_operator = (
|
||||
RedisFilterOperator.OR
|
||||
if operation.operator == Operator.OR
|
||||
else RedisFilterOperator.AND
|
||||
)
|
||||
return RedisFilterExpression(operator=redis_operator, left=left, right=right)
|
||||
|
||||
def visit_structured_query(
|
||||
self, structured_query: StructuredQuery
|
||||
) -> Tuple[str, dict]:
|
||||
if structured_query.filter is None:
|
||||
kwargs = {}
|
||||
else:
|
||||
kwargs = {"filter": structured_query.filter.accept(self)}
|
||||
return structured_query.query, kwargs
|
||||
|
||||
@classmethod
|
||||
def from_vectorstore(cls, vectorstore: Redis) -> RedisTranslator:
|
||||
return cls(vectorstore._schema)
|
||||
@@ -1,97 +0,0 @@
|
||||
from typing import Any, Dict, Tuple
|
||||
|
||||
from langchain_core.structured_query import (
|
||||
Comparator,
|
||||
Comparison,
|
||||
Operation,
|
||||
Operator,
|
||||
StructuredQuery,
|
||||
Visitor,
|
||||
)
|
||||
|
||||
|
||||
class SupabaseVectorTranslator(Visitor):
|
||||
"""Translate Langchain filters to Supabase PostgREST filters."""
|
||||
|
||||
allowed_operators = [Operator.AND, Operator.OR]
|
||||
"""Subset of allowed logical operators."""
|
||||
|
||||
allowed_comparators = [
|
||||
Comparator.EQ,
|
||||
Comparator.NE,
|
||||
Comparator.GT,
|
||||
Comparator.GTE,
|
||||
Comparator.LT,
|
||||
Comparator.LTE,
|
||||
Comparator.LIKE,
|
||||
]
|
||||
"""Subset of allowed logical comparators."""
|
||||
|
||||
metadata_column = "metadata"
|
||||
|
||||
def _map_comparator(self, comparator: Comparator) -> str:
|
||||
"""
|
||||
Maps Langchain comparator to PostgREST comparator:
|
||||
|
||||
https://postgrest.org/en/stable/references/api/tables_views.html#operators
|
||||
"""
|
||||
postgrest_comparator = {
|
||||
Comparator.EQ: "eq",
|
||||
Comparator.NE: "neq",
|
||||
Comparator.GT: "gt",
|
||||
Comparator.GTE: "gte",
|
||||
Comparator.LT: "lt",
|
||||
Comparator.LTE: "lte",
|
||||
Comparator.LIKE: "like",
|
||||
}.get(comparator)
|
||||
|
||||
if postgrest_comparator is None:
|
||||
raise Exception(
|
||||
f"Comparator '{comparator}' is not currently "
|
||||
"supported in Supabase Vector"
|
||||
)
|
||||
|
||||
return postgrest_comparator
|
||||
|
||||
def _get_json_operator(self, value: Any) -> str:
|
||||
if isinstance(value, str):
|
||||
return "->>"
|
||||
else:
|
||||
return "->"
|
||||
|
||||
def visit_operation(self, operation: Operation) -> str:
|
||||
args = [arg.accept(self) for arg in operation.arguments]
|
||||
return f"{operation.operator.value}({','.join(args)})"
|
||||
|
||||
def visit_comparison(self, comparison: Comparison) -> str:
|
||||
if isinstance(comparison.value, list):
|
||||
return self.visit_operation(
|
||||
Operation(
|
||||
operator=Operator.AND,
|
||||
arguments=[
|
||||
Comparison(
|
||||
comparator=comparison.comparator,
|
||||
attribute=comparison.attribute,
|
||||
value=value,
|
||||
)
|
||||
for value in comparison.value
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
return ".".join(
|
||||
[
|
||||
f"{self.metadata_column}{self._get_json_operator(comparison.value)}{comparison.attribute}",
|
||||
f"{self._map_comparator(comparison.comparator)}",
|
||||
f"{comparison.value}",
|
||||
]
|
||||
)
|
||||
|
||||
def visit_structured_query(
|
||||
self, structured_query: StructuredQuery
|
||||
) -> Tuple[str, Dict[str, str]]:
|
||||
if structured_query.filter is None:
|
||||
kwargs = {}
|
||||
else:
|
||||
kwargs = {"postgrest_filter": structured_query.filter.accept(self)}
|
||||
return structured_query.query, kwargs
|
||||
@@ -1,116 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Sequence, Tuple
|
||||
|
||||
from langchain_core.structured_query import (
|
||||
Comparator,
|
||||
Comparison,
|
||||
Operation,
|
||||
Operator,
|
||||
StructuredQuery,
|
||||
Visitor,
|
||||
)
|
||||
|
||||
|
||||
class TencentVectorDBTranslator(Visitor):
|
||||
"""Translate StructuredQuery to Tencent VectorDB query."""
|
||||
|
||||
COMPARATOR_MAP = {
|
||||
Comparator.EQ: "=",
|
||||
Comparator.NE: "!=",
|
||||
Comparator.GT: ">",
|
||||
Comparator.GTE: ">=",
|
||||
Comparator.LT: "<",
|
||||
Comparator.LTE: "<=",
|
||||
Comparator.IN: "in",
|
||||
Comparator.NIN: "not in",
|
||||
}
|
||||
|
||||
allowed_comparators: Optional[Sequence[Comparator]] = list(COMPARATOR_MAP.keys())
|
||||
allowed_operators: Optional[Sequence[Operator]] = [
|
||||
Operator.AND,
|
||||
Operator.OR,
|
||||
Operator.NOT,
|
||||
]
|
||||
|
||||
def __init__(self, meta_keys: Optional[Sequence[str]] = None):
|
||||
"""Initialize the translator.
|
||||
|
||||
Args:
|
||||
meta_keys: List of meta keys to be used in the query. Default: [].
|
||||
"""
|
||||
self.meta_keys = meta_keys or []
|
||||
|
||||
def visit_operation(self, operation: Operation) -> str:
|
||||
"""Visit an operation node and return the translated query.
|
||||
|
||||
Args:
|
||||
operation: Operation node to be visited.
|
||||
|
||||
Returns:
|
||||
Translated query.
|
||||
"""
|
||||
if operation.operator in (Operator.AND, Operator.OR):
|
||||
ret = f" {operation.operator.value} ".join(
|
||||
[arg.accept(self) for arg in operation.arguments]
|
||||
)
|
||||
if operation.operator == Operator.OR:
|
||||
ret = f"({ret})"
|
||||
return ret
|
||||
else:
|
||||
return f"not ({operation.arguments[0].accept(self)})"
|
||||
|
||||
def visit_comparison(self, comparison: Comparison) -> str:
|
||||
"""Visit a comparison node and return the translated query.
|
||||
|
||||
Args:
|
||||
comparison: Comparison node to be visited.
|
||||
|
||||
Returns:
|
||||
Translated query.
|
||||
"""
|
||||
if self.meta_keys and comparison.attribute not in self.meta_keys:
|
||||
raise ValueError(
|
||||
f"Expr Filtering found Unsupported attribute: {comparison.attribute}"
|
||||
)
|
||||
|
||||
if comparison.comparator in self.COMPARATOR_MAP:
|
||||
if comparison.comparator in [Comparator.IN, Comparator.NIN]:
|
||||
value = map(
|
||||
lambda x: f'"{x}"' if isinstance(x, str) else x, comparison.value
|
||||
)
|
||||
return (
|
||||
f"{comparison.attribute}"
|
||||
f" {self.COMPARATOR_MAP[comparison.comparator]} "
|
||||
f"({', '.join(value)})"
|
||||
)
|
||||
if isinstance(comparison.value, str):
|
||||
return (
|
||||
f"{comparison.attribute} "
|
||||
f"{self.COMPARATOR_MAP[comparison.comparator]}"
|
||||
f' "{comparison.value}"'
|
||||
)
|
||||
return (
|
||||
f"{comparison.attribute}"
|
||||
f" {self.COMPARATOR_MAP[comparison.comparator]} "
|
||||
f"{comparison.value}"
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unsupported comparator {comparison.comparator}")
|
||||
|
||||
def visit_structured_query(
|
||||
self, structured_query: StructuredQuery
|
||||
) -> Tuple[str, dict]:
|
||||
"""Visit a structured query node and return the translated query.
|
||||
|
||||
Args:
|
||||
structured_query: StructuredQuery node to be visited.
|
||||
|
||||
Returns:
|
||||
Translated query and query kwargs.
|
||||
"""
|
||||
if structured_query.filter is None:
|
||||
kwargs = {}
|
||||
else:
|
||||
kwargs = {"expr": structured_query.filter.accept(self)}
|
||||
return structured_query.query, kwargs
|
||||
@@ -1,84 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Tuple, Union
|
||||
|
||||
from langchain_core.structured_query import (
|
||||
Comparator,
|
||||
Comparison,
|
||||
Operation,
|
||||
Operator,
|
||||
StructuredQuery,
|
||||
Visitor,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from timescale_vector import client
|
||||
|
||||
|
||||
class TimescaleVectorTranslator(Visitor):
|
||||
"""Translate the internal query language elements to valid filters."""
|
||||
|
||||
allowed_operators = [Operator.AND, Operator.OR, Operator.NOT]
|
||||
"""Subset of allowed logical operators."""
|
||||
|
||||
allowed_comparators = [
|
||||
Comparator.EQ,
|
||||
Comparator.GT,
|
||||
Comparator.GTE,
|
||||
Comparator.LT,
|
||||
Comparator.LTE,
|
||||
]
|
||||
|
||||
COMPARATOR_MAP = {
|
||||
Comparator.EQ: "==",
|
||||
Comparator.GT: ">",
|
||||
Comparator.GTE: ">=",
|
||||
Comparator.LT: "<",
|
||||
Comparator.LTE: "<=",
|
||||
}
|
||||
|
||||
OPERATOR_MAP = {Operator.AND: "AND", Operator.OR: "OR", Operator.NOT: "NOT"}
|
||||
|
||||
def _format_func(self, func: Union[Operator, Comparator]) -> str:
|
||||
self._validate_func(func)
|
||||
if isinstance(func, Operator):
|
||||
value = self.OPERATOR_MAP[func.value] # type: ignore
|
||||
elif isinstance(func, Comparator):
|
||||
value = self.COMPARATOR_MAP[func.value] # type: ignore
|
||||
return f"{value}"
|
||||
|
||||
def visit_operation(self, operation: Operation) -> client.Predicates:
|
||||
try:
|
||||
from timescale_vector import client
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Cannot import timescale-vector. Please install with `pip install "
|
||||
"timescale-vector`."
|
||||
) from e
|
||||
args = [arg.accept(self) for arg in operation.arguments]
|
||||
return client.Predicates(*args, operator=self._format_func(operation.operator))
|
||||
|
||||
def visit_comparison(self, comparison: Comparison) -> client.Predicates:
|
||||
try:
|
||||
from timescale_vector import client
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Cannot import timescale-vector. Please install with `pip install "
|
||||
"timescale-vector`."
|
||||
) from e
|
||||
return client.Predicates(
|
||||
(
|
||||
comparison.attribute,
|
||||
self._format_func(comparison.comparator),
|
||||
comparison.value,
|
||||
)
|
||||
)
|
||||
|
||||
def visit_structured_query(
|
||||
self, structured_query: StructuredQuery
|
||||
) -> Tuple[str, dict]:
|
||||
if structured_query.filter is None:
|
||||
kwargs = {}
|
||||
else:
|
||||
kwargs = {"predicates": structured_query.filter.accept(self)}
|
||||
return structured_query.query, kwargs
|
||||
@@ -1,70 +0,0 @@
|
||||
from typing import Tuple, Union
|
||||
|
||||
from langchain_core.structured_query import (
|
||||
Comparator,
|
||||
Comparison,
|
||||
Operation,
|
||||
Operator,
|
||||
StructuredQuery,
|
||||
Visitor,
|
||||
)
|
||||
|
||||
|
||||
def process_value(value: Union[int, float, str]) -> str:
|
||||
"""Convert a value to a string and add single quotes if it is a string."""
|
||||
if isinstance(value, str):
|
||||
return f"'{value}'"
|
||||
else:
|
||||
return str(value)
|
||||
|
||||
|
||||
class VectaraTranslator(Visitor):
|
||||
"""Translate `Vectara` internal query language elements to valid filters."""
|
||||
|
||||
allowed_operators = [Operator.AND, Operator.OR]
|
||||
"""Subset of allowed logical operators."""
|
||||
allowed_comparators = [
|
||||
Comparator.EQ,
|
||||
Comparator.NE,
|
||||
Comparator.GT,
|
||||
Comparator.GTE,
|
||||
Comparator.LT,
|
||||
Comparator.LTE,
|
||||
]
|
||||
"""Subset of allowed logical comparators."""
|
||||
|
||||
def _format_func(self, func: Union[Operator, Comparator]) -> str:
|
||||
map_dict = {
|
||||
Operator.AND: " and ",
|
||||
Operator.OR: " or ",
|
||||
Comparator.EQ: "=",
|
||||
Comparator.NE: "!=",
|
||||
Comparator.GT: ">",
|
||||
Comparator.GTE: ">=",
|
||||
Comparator.LT: "<",
|
||||
Comparator.LTE: "<=",
|
||||
}
|
||||
self._validate_func(func)
|
||||
return map_dict[func]
|
||||
|
||||
def visit_operation(self, operation: Operation) -> str:
|
||||
args = [arg.accept(self) for arg in operation.arguments]
|
||||
operator = self._format_func(operation.operator)
|
||||
return "( " + operator.join(args) + " )"
|
||||
|
||||
def visit_comparison(self, comparison: Comparison) -> str:
|
||||
comparator = self._format_func(comparison.comparator)
|
||||
processed_value = process_value(comparison.value)
|
||||
attribute = comparison.attribute
|
||||
return (
|
||||
"( " + "doc." + attribute + " " + comparator + " " + processed_value + " )"
|
||||
)
|
||||
|
||||
def visit_structured_query(
|
||||
self, structured_query: StructuredQuery
|
||||
) -> Tuple[str, dict]:
|
||||
if structured_query.filter is None:
|
||||
kwargs = {}
|
||||
else:
|
||||
kwargs = {"filter": structured_query.filter.accept(self)}
|
||||
return structured_query.query, kwargs
|
||||
@@ -1,79 +0,0 @@
|
||||
from datetime import datetime
|
||||
from typing import Dict, Tuple, Union
|
||||
|
||||
from langchain_core.structured_query import (
|
||||
Comparator,
|
||||
Comparison,
|
||||
Operation,
|
||||
Operator,
|
||||
StructuredQuery,
|
||||
Visitor,
|
||||
)
|
||||
|
||||
|
||||
class WeaviateTranslator(Visitor):
|
||||
"""Translate `Weaviate` internal query language elements to valid filters."""
|
||||
|
||||
allowed_operators = [Operator.AND, Operator.OR]
|
||||
"""Subset of allowed logical operators."""
|
||||
|
||||
allowed_comparators = [
|
||||
Comparator.EQ,
|
||||
Comparator.NE,
|
||||
Comparator.GTE,
|
||||
Comparator.LTE,
|
||||
Comparator.LT,
|
||||
Comparator.GT,
|
||||
]
|
||||
|
||||
def _format_func(self, func: Union[Operator, Comparator]) -> str:
|
||||
self._validate_func(func)
|
||||
# https://weaviate.io/developers/weaviate/api/graphql/filters
|
||||
map_dict = {
|
||||
Operator.AND: "And",
|
||||
Operator.OR: "Or",
|
||||
Comparator.EQ: "Equal",
|
||||
Comparator.NE: "NotEqual",
|
||||
Comparator.GTE: "GreaterThanEqual",
|
||||
Comparator.LTE: "LessThanEqual",
|
||||
Comparator.LT: "LessThan",
|
||||
Comparator.GT: "GreaterThan",
|
||||
}
|
||||
return map_dict[func]
|
||||
|
||||
def visit_operation(self, operation: Operation) -> Dict:
|
||||
args = [arg.accept(self) for arg in operation.arguments]
|
||||
return {"operator": self._format_func(operation.operator), "operands": args}
|
||||
|
||||
def visit_comparison(self, comparison: Comparison) -> Dict:
|
||||
value_type = "valueText"
|
||||
value = comparison.value
|
||||
if isinstance(comparison.value, bool):
|
||||
value_type = "valueBoolean"
|
||||
elif isinstance(comparison.value, float):
|
||||
value_type = "valueNumber"
|
||||
elif isinstance(comparison.value, int):
|
||||
value_type = "valueInt"
|
||||
elif (
|
||||
isinstance(comparison.value, dict)
|
||||
and comparison.value.get("type") == "date"
|
||||
):
|
||||
value_type = "valueDate"
|
||||
# ISO 8601 timestamp, formatted as RFC3339
|
||||
date = datetime.strptime(comparison.value["date"], "%Y-%m-%d")
|
||||
value = date.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
filter = {
|
||||
"path": [comparison.attribute],
|
||||
"operator": self._format_func(comparison.comparator),
|
||||
value_type: value,
|
||||
}
|
||||
return filter
|
||||
|
||||
def visit_structured_query(
|
||||
self, structured_query: StructuredQuery
|
||||
) -> Tuple[str, dict]:
|
||||
if structured_query.filter is None:
|
||||
kwargs = {}
|
||||
else:
|
||||
kwargs = {"where_filter": structured_query.filter.accept(self)}
|
||||
return structured_query.query, kwargs
|
||||
@@ -123,7 +123,6 @@ if TYPE_CHECKING:
|
||||
from langchain_community.retrievers.weaviate_hybrid_search import (
|
||||
WeaviateHybridSearchRetriever,
|
||||
)
|
||||
from langchain_community.retrievers.web_research import WebResearchRetriever
|
||||
from langchain_community.retrievers.wikipedia import (
|
||||
WikipediaRetriever,
|
||||
)
|
||||
@@ -175,7 +174,6 @@ _module_lookup = {
|
||||
"TavilySearchAPIRetriever": "langchain_community.retrievers.tavily_search_api",
|
||||
"VespaRetriever": "langchain_community.retrievers.vespa_retriever",
|
||||
"WeaviateHybridSearchRetriever": "langchain_community.retrievers.weaviate_hybrid_search", # noqa: E501
|
||||
"WebResearchRetriever": "langchain_community.retrievers.web_research",
|
||||
"WikipediaRetriever": "langchain_community.retrievers.wikipedia",
|
||||
"YouRetriever": "langchain_community.retrievers.you",
|
||||
"ZepRetriever": "langchain_community.retrievers.zep",
|
||||
@@ -196,8 +194,8 @@ __all__ = [
|
||||
"AmazonKnowledgeBasesRetriever",
|
||||
"ArceeRetriever",
|
||||
"ArxivRetriever",
|
||||
"AzureAISearchRetriever",
|
||||
"AzureCognitiveSearchRetriever",
|
||||
"AzureAISearchRetriever",
|
||||
"BM25Retriever",
|
||||
"BreebsRetriever",
|
||||
"ChaindeskRetriever",
|
||||
@@ -211,8 +209,8 @@ __all__ = [
|
||||
"GoogleDocumentAIWarehouseRetriever",
|
||||
"GoogleVertexAIMultiTurnSearchRetriever",
|
||||
"GoogleVertexAISearchRetriever",
|
||||
"KayAiRetriever",
|
||||
"KNNRetriever",
|
||||
"KayAiRetriever",
|
||||
"LlamaIndexGraphRetriever",
|
||||
"LlamaIndexRetriever",
|
||||
"MetalRetriever",
|
||||
@@ -225,11 +223,10 @@ __all__ = [
|
||||
"RememberizerRetriever",
|
||||
"RemoteLangChainRetriever",
|
||||
"SVMRetriever",
|
||||
"TavilySearchAPIRetriever",
|
||||
"TFIDFRetriever",
|
||||
"TavilySearchAPIRetriever",
|
||||
"VespaRetriever",
|
||||
"WeaviateHybridSearchRetriever",
|
||||
"WebResearchRetriever",
|
||||
"WikipediaRetriever",
|
||||
"YouRetriever",
|
||||
"ZepRetriever",
|
||||
|
||||
@@ -1,223 +0,0 @@
|
||||
import logging
|
||||
import re
|
||||
from typing import List, Optional
|
||||
|
||||
from langchain.chains import LLMChain
|
||||
from langchain.chains.prompt_selector import ConditionalPromptSelector
|
||||
from langchain_core.callbacks import (
|
||||
AsyncCallbackManagerForRetrieverRun,
|
||||
CallbackManagerForRetrieverRun,
|
||||
)
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.language_models import BaseLLM
|
||||
from langchain_core.output_parsers import BaseOutputParser
|
||||
from langchain_core.prompts import BasePromptTemplate, PromptTemplate
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field
|
||||
from langchain_core.retrievers import BaseRetriever
|
||||
from langchain_core.vectorstores import VectorStore
|
||||
from langchain_text_splitters import RecursiveCharacterTextSplitter, TextSplitter
|
||||
|
||||
from langchain_community.document_loaders import AsyncHtmlLoader
|
||||
from langchain_community.document_transformers import Html2TextTransformer
|
||||
from langchain_community.llms import LlamaCpp
|
||||
from langchain_community.utilities import GoogleSearchAPIWrapper
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SearchQueries(BaseModel):
|
||||
"""Search queries to research for the user's goal."""
|
||||
|
||||
queries: List[str] = Field(
|
||||
..., description="List of search queries to look up on Google"
|
||||
)
|
||||
|
||||
|
||||
DEFAULT_LLAMA_SEARCH_PROMPT = PromptTemplate(
|
||||
input_variables=["question"],
|
||||
template="""<<SYS>> \n You are an assistant tasked with improving Google search \
|
||||
results. \n <</SYS>> \n\n [INST] Generate THREE Google search queries that \
|
||||
are similar to this question. The output should be a numbered list of questions \
|
||||
and each should have a question mark at the end: \n\n {question} [/INST]""",
|
||||
)
|
||||
|
||||
DEFAULT_SEARCH_PROMPT = PromptTemplate(
|
||||
input_variables=["question"],
|
||||
template="""You are an assistant tasked with improving Google search \
|
||||
results. Generate THREE Google search queries that are similar to \
|
||||
this question. The output should be a numbered list of questions and each \
|
||||
should have a question mark at the end: {question}""",
|
||||
)
|
||||
|
||||
|
||||
class QuestionListOutputParser(BaseOutputParser[List[str]]):
|
||||
"""Output parser for a list of numbered questions."""
|
||||
|
||||
def parse(self, text: str) -> List[str]:
|
||||
lines = re.findall(r"\d+\..*?(?:\n|$)", text)
|
||||
return lines
|
||||
|
||||
|
||||
class WebResearchRetriever(BaseRetriever):
|
||||
"""`Google Search API` retriever."""
|
||||
|
||||
# Inputs
|
||||
vectorstore: VectorStore = Field(
|
||||
..., description="Vector store for storing web pages"
|
||||
)
|
||||
llm_chain: LLMChain
|
||||
search: GoogleSearchAPIWrapper = Field(..., description="Google Search API Wrapper")
|
||||
num_search_results: int = Field(1, description="Number of pages per Google search")
|
||||
text_splitter: TextSplitter = Field(
|
||||
RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=50),
|
||||
description="Text splitter for splitting web pages into chunks",
|
||||
)
|
||||
url_database: List[str] = Field(
|
||||
default_factory=list, description="List of processed URLs"
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_llm(
|
||||
cls,
|
||||
vectorstore: VectorStore,
|
||||
llm: BaseLLM,
|
||||
search: GoogleSearchAPIWrapper,
|
||||
prompt: Optional[BasePromptTemplate] = None,
|
||||
num_search_results: int = 1,
|
||||
text_splitter: RecursiveCharacterTextSplitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=1500, chunk_overlap=150
|
||||
),
|
||||
) -> "WebResearchRetriever":
|
||||
"""Initialize from llm using default template.
|
||||
|
||||
Args:
|
||||
vectorstore: Vector store for storing web pages
|
||||
llm: llm for search question generation
|
||||
search: GoogleSearchAPIWrapper
|
||||
prompt: prompt to generating search questions
|
||||
num_search_results: Number of pages per Google search
|
||||
text_splitter: Text splitter for splitting web pages into chunks
|
||||
|
||||
Returns:
|
||||
WebResearchRetriever
|
||||
"""
|
||||
|
||||
if not prompt:
|
||||
QUESTION_PROMPT_SELECTOR = ConditionalPromptSelector(
|
||||
default_prompt=DEFAULT_SEARCH_PROMPT,
|
||||
conditionals=[
|
||||
(lambda llm: isinstance(llm, LlamaCpp), DEFAULT_LLAMA_SEARCH_PROMPT)
|
||||
],
|
||||
)
|
||||
prompt = QUESTION_PROMPT_SELECTOR.get_prompt(llm)
|
||||
|
||||
# Use chat model prompt
|
||||
llm_chain = LLMChain(
|
||||
llm=llm,
|
||||
prompt=prompt,
|
||||
output_parser=QuestionListOutputParser(),
|
||||
)
|
||||
|
||||
return cls(
|
||||
vectorstore=vectorstore,
|
||||
llm_chain=llm_chain,
|
||||
search=search,
|
||||
num_search_results=num_search_results,
|
||||
text_splitter=text_splitter,
|
||||
)
|
||||
|
||||
def clean_search_query(self, query: str) -> str:
|
||||
# Some search tools (e.g., Google) will
|
||||
# fail to return results if query has a
|
||||
# leading digit: 1. "LangCh..."
|
||||
# Check if the first character is a digit
|
||||
if query[0].isdigit():
|
||||
# Find the position of the first quote
|
||||
first_quote_pos = query.find('"')
|
||||
if first_quote_pos != -1:
|
||||
# Extract the part of the string after the quote
|
||||
query = query[first_quote_pos + 1 :]
|
||||
# Remove the trailing quote if present
|
||||
if query.endswith('"'):
|
||||
query = query[:-1]
|
||||
return query.strip()
|
||||
|
||||
def search_tool(self, query: str, num_search_results: int = 1) -> List[dict]:
|
||||
"""Returns num_search_results pages per Google search."""
|
||||
query_clean = self.clean_search_query(query)
|
||||
result = self.search.results(query_clean, num_search_results)
|
||||
return result
|
||||
|
||||
def _get_relevant_documents(
|
||||
self,
|
||||
query: str,
|
||||
*,
|
||||
run_manager: CallbackManagerForRetrieverRun,
|
||||
) -> List[Document]:
|
||||
"""Search Google for documents related to the query input.
|
||||
|
||||
Args:
|
||||
query: user query
|
||||
|
||||
Returns:
|
||||
Relevant documents from all various urls.
|
||||
"""
|
||||
|
||||
# Get search questions
|
||||
logger.info("Generating questions for Google Search ...")
|
||||
result = self.llm_chain({"question": query})
|
||||
logger.info(f"Questions for Google Search (raw): {result}")
|
||||
questions = result["text"]
|
||||
logger.info(f"Questions for Google Search: {questions}")
|
||||
|
||||
# Get urls
|
||||
logger.info("Searching for relevant urls...")
|
||||
urls_to_look = []
|
||||
for query in questions:
|
||||
# Google search
|
||||
search_results = self.search_tool(query, self.num_search_results)
|
||||
logger.info("Searching for relevant urls...")
|
||||
logger.info(f"Search results: {search_results}")
|
||||
for res in search_results:
|
||||
if res.get("link", None):
|
||||
urls_to_look.append(res["link"])
|
||||
|
||||
# Relevant urls
|
||||
urls = set(urls_to_look)
|
||||
|
||||
# Check for any new urls that we have not processed
|
||||
new_urls = list(urls.difference(self.url_database))
|
||||
|
||||
logger.info(f"New URLs to load: {new_urls}")
|
||||
# Load, split, and add new urls to vectorstore
|
||||
if new_urls:
|
||||
loader = AsyncHtmlLoader(new_urls, ignore_load_errors=True)
|
||||
html2text = Html2TextTransformer()
|
||||
logger.info("Indexing new urls...")
|
||||
docs = loader.load()
|
||||
docs = list(html2text.transform_documents(docs))
|
||||
docs = self.text_splitter.split_documents(docs)
|
||||
self.vectorstore.add_documents(docs)
|
||||
self.url_database.extend(new_urls)
|
||||
|
||||
# Search for relevant splits
|
||||
# TODO: make this async
|
||||
logger.info("Grabbing most relevant splits from urls...")
|
||||
docs = []
|
||||
for query in questions:
|
||||
docs.extend(self.vectorstore.similarity_search(query))
|
||||
|
||||
# Get unique docs
|
||||
unique_documents_dict = {
|
||||
(doc.page_content, tuple(sorted(doc.metadata.items()))): doc for doc in docs
|
||||
}
|
||||
unique_documents = list(unique_documents_dict.values())
|
||||
return unique_documents
|
||||
|
||||
async def _aget_relevant_documents(
|
||||
self,
|
||||
query: str,
|
||||
*,
|
||||
run_manager: AsyncCallbackManagerForRetrieverRun,
|
||||
) -> List[Document]:
|
||||
raise NotImplementedError
|
||||
@@ -1,10 +1,13 @@
|
||||
"""Gmail tool utils."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import TYPE_CHECKING, List, Optional, Tuple
|
||||
|
||||
from langchain_core.utils import guard_import
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from google.auth.transport.requests import Request
|
||||
from google.oauth2.credentials import Credentials
|
||||
@@ -21,16 +24,15 @@ def import_google() -> Tuple[Request, Credentials]:
|
||||
Returns:
|
||||
Tuple[Request, Credentials]: Request and Credentials classes.
|
||||
"""
|
||||
# google-auth-httplib2
|
||||
try:
|
||||
from google.auth.transport.requests import Request
|
||||
from google.oauth2.credentials import Credentials
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"You need to install google-auth-httplib2 to use this toolkit. "
|
||||
"Try running pip install --upgrade google-auth-httplib2"
|
||||
)
|
||||
return Request, Credentials
|
||||
return (
|
||||
guard_import(
|
||||
module_name="google.auth.transport.requests",
|
||||
pip_name="google-auth-httplib2",
|
||||
).Request,
|
||||
guard_import(
|
||||
module_name="google.oauth2.credentials", pip_name="google-auth-httplib2"
|
||||
).Credentials,
|
||||
)
|
||||
|
||||
|
||||
def import_installed_app_flow() -> InstalledAppFlow:
|
||||
@@ -39,14 +41,9 @@ def import_installed_app_flow() -> InstalledAppFlow:
|
||||
Returns:
|
||||
InstalledAppFlow: InstalledAppFlow class.
|
||||
"""
|
||||
try:
|
||||
from google_auth_oauthlib.flow import InstalledAppFlow
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"You need to install google-auth-oauthlib to use this toolkit. "
|
||||
"Try running pip install --upgrade google-auth-oauthlib"
|
||||
)
|
||||
return InstalledAppFlow
|
||||
return guard_import(
|
||||
module_name="google_auth_oauthlib.flow", pip_name="google-auth-oauthlib"
|
||||
).InstalledAppFlow
|
||||
|
||||
|
||||
def import_googleapiclient_resource_builder() -> build_resource:
|
||||
@@ -55,14 +52,9 @@ def import_googleapiclient_resource_builder() -> build_resource:
|
||||
Returns:
|
||||
build_resource: googleapiclient.discovery.build function.
|
||||
"""
|
||||
try:
|
||||
from googleapiclient.discovery import build
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"You need to install googleapiclient to use this toolkit. "
|
||||
"Try running pip install --upgrade google-api-python-client"
|
||||
)
|
||||
return build
|
||||
return guard_import(
|
||||
module_name="googleapiclient.discovery", pip_name="google-api-python-client"
|
||||
).build
|
||||
|
||||
|
||||
DEFAULT_SCOPES = ["https://mail.google.com/"]
|
||||
@@ -77,8 +69,19 @@ def get_gmail_credentials(
|
||||
) -> Credentials:
|
||||
"""Get credentials."""
|
||||
# From https://developers.google.com/gmail/api/quickstart/python
|
||||
Request, Credentials = import_google()
|
||||
InstalledAppFlow = import_installed_app_flow()
|
||||
Request, Credentials = (
|
||||
guard_import(
|
||||
module_name="google.auth.transport.requests",
|
||||
pip_name="google-auth-httplib2",
|
||||
).Request,
|
||||
guard_import(
|
||||
module_name="google.oauth2.credentials", pip_name="google-auth-httplib2"
|
||||
).Credentials,
|
||||
)
|
||||
|
||||
InstalledAppFlow = guard_import(
|
||||
module_name="google_auth_oauthlib.flow", pip_name="google-auth-oauthlib"
|
||||
).InstalledAppFlow
|
||||
creds = None
|
||||
scopes = scopes or DEFAULT_SCOPES
|
||||
token_file = token_file or DEFAULT_CREDS_TOKEN_FILE
|
||||
@@ -111,7 +114,9 @@ def build_resource_service(
|
||||
) -> Resource:
|
||||
"""Build a Gmail service."""
|
||||
credentials = credentials or get_gmail_credentials()
|
||||
builder = import_googleapiclient_resource_builder()
|
||||
builder = guard_import(
|
||||
module_name="googleapiclient.discovery", pip_name="google-api-python-client"
|
||||
).build
|
||||
return builder(service_name, service_version, credentials=credentials)
|
||||
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, Optional, Tuple, Type
|
||||
|
||||
from langchain_core.pydantic_v1 import root_validator
|
||||
from langchain_core.tools import BaseTool
|
||||
from langchain_core.utils import guard_import
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from playwright.async_api import Browser as AsyncBrowser
|
||||
@@ -25,15 +26,10 @@ def lazy_import_playwright_browsers() -> Tuple[Type[AsyncBrowser], Type[SyncBrow
|
||||
Tuple[Type[AsyncBrowser], Type[SyncBrowser]]:
|
||||
AsyncBrowser and SyncBrowser classes.
|
||||
"""
|
||||
try:
|
||||
from playwright.async_api import Browser as AsyncBrowser
|
||||
from playwright.sync_api import Browser as SyncBrowser
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"The 'playwright' package is required to use the playwright tools."
|
||||
" Please install it with 'pip install playwright'."
|
||||
)
|
||||
return AsyncBrowser, SyncBrowser
|
||||
return (
|
||||
guard_import(module_name="playwright.async_api").AsyncBrowser,
|
||||
guard_import(module_name="playwright.sync_api").SyncBrowser,
|
||||
)
|
||||
|
||||
|
||||
class BaseBrowserTool(BaseTool):
|
||||
@@ -45,7 +41,8 @@ class BaseBrowserTool(BaseTool):
|
||||
@root_validator
|
||||
def validate_browser_provided(cls, values: dict) -> dict:
|
||||
"""Check that the arguments are valid."""
|
||||
lazy_import_playwright_browsers()
|
||||
guard_import(module_name="playwright.async_api").AsyncBrowser
|
||||
guard_import(module_name="playwright.sync_api").SyncBrowser
|
||||
if values.get("async_browser") is None and values.get("sync_browser") is None:
|
||||
raise ValueError("Either async_browser or sync_browser must be specified.")
|
||||
return values
|
||||
@@ -57,5 +54,6 @@ class BaseBrowserTool(BaseTool):
|
||||
async_browser: Optional[AsyncBrowser] = None,
|
||||
) -> BaseBrowserTool:
|
||||
"""Instantiate the tool."""
|
||||
lazy_import_playwright_browsers()
|
||||
guard_import(module_name="playwright.async_api").AsyncBrowser
|
||||
guard_import(module_name="playwright.sync_api").SyncBrowser
|
||||
return cls(sync_browser=sync_browser, async_browser=async_browser)
|
||||
|
||||
@@ -99,6 +99,9 @@ if TYPE_CHECKING:
|
||||
from langchain_community.utilities.openweathermap import (
|
||||
OpenWeatherMapAPIWrapper,
|
||||
)
|
||||
from langchain_community.utilities.oracleai import (
|
||||
OracleSummary, # noqa: F401
|
||||
)
|
||||
from langchain_community.utilities.outline import (
|
||||
OutlineAPIWrapper,
|
||||
)
|
||||
@@ -199,6 +202,7 @@ __all__ = [
|
||||
"NasaAPIWrapper",
|
||||
"NutritionAIAPI",
|
||||
"OpenWeatherMapAPIWrapper",
|
||||
"OracleSummary",
|
||||
"OutlineAPIWrapper",
|
||||
"Portkey",
|
||||
"PowerBIDataset",
|
||||
@@ -260,6 +264,7 @@ _module_lookup = {
|
||||
"NasaAPIWrapper": "langchain_community.utilities.nasa",
|
||||
"NutritionAIAPI": "langchain_community.utilities.passio_nutrition_ai",
|
||||
"OpenWeatherMapAPIWrapper": "langchain_community.utilities.openweathermap",
|
||||
"OracleSummary": "langchain_community.utilities.oracleai",
|
||||
"OutlineAPIWrapper": "langchain_community.utilities.outline",
|
||||
"Portkey": "langchain_community.utilities.portkey",
|
||||
"PowerBIDataset": "langchain_community.utilities.powerbi",
|
||||
|
||||
201
libs/community/langchain_community/utilities/oracleai.py
Normal file
201
libs/community/langchain_community/utilities/oracleai.py
Normal file
@@ -0,0 +1,201 @@
|
||||
# Authors:
|
||||
# Harichandan Roy (hroy)
|
||||
# David Jiang (ddjiang)
|
||||
#
|
||||
# -----------------------------------------------------------------------------
|
||||
# oracleai.py
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import traceback
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from oracledb import Connection
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
"""OracleSummary class"""
|
||||
|
||||
|
||||
class OracleSummary:
|
||||
"""Get Summary
|
||||
Args:
|
||||
conn: Oracle Connection,
|
||||
params: Summary parameters,
|
||||
proxy: Proxy
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, conn: Connection, params: Dict[str, Any], proxy: Optional[str] = None
|
||||
):
|
||||
self.conn = conn
|
||||
self.proxy = proxy
|
||||
self.summary_params = params
|
||||
|
||||
def get_summary(self, docs: Any) -> List[str]:
|
||||
"""Get the summary of the input docs.
|
||||
Args:
|
||||
docs: The documents to generate summary for.
|
||||
Allowed input types: str, Document, List[str], List[Document]
|
||||
Returns:
|
||||
List of summary text, one for each input doc.
|
||||
"""
|
||||
|
||||
try:
|
||||
import oracledb
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Unable to import oracledb, please install with "
|
||||
"`pip install -U oracledb`."
|
||||
) from e
|
||||
|
||||
if docs is None:
|
||||
return []
|
||||
|
||||
results: List[str] = []
|
||||
try:
|
||||
oracledb.defaults.fetch_lobs = False
|
||||
cursor = self.conn.cursor()
|
||||
|
||||
if self.proxy:
|
||||
cursor.execute(
|
||||
"begin utl_http.set_proxy(:proxy); end;", proxy=self.proxy
|
||||
)
|
||||
|
||||
if isinstance(docs, str):
|
||||
results = []
|
||||
|
||||
summary = cursor.var(oracledb.DB_TYPE_CLOB)
|
||||
cursor.execute(
|
||||
"""
|
||||
declare
|
||||
input clob;
|
||||
begin
|
||||
input := :data;
|
||||
:summ := dbms_vector_chain.utl_to_summary(input, json(:params));
|
||||
end;""",
|
||||
data=docs,
|
||||
params=json.dumps(self.summary_params),
|
||||
summ=summary,
|
||||
)
|
||||
|
||||
if summary is None:
|
||||
results.append("")
|
||||
else:
|
||||
results.append(str(summary.getvalue()))
|
||||
|
||||
elif isinstance(docs, Document):
|
||||
results = []
|
||||
|
||||
summary = cursor.var(oracledb.DB_TYPE_CLOB)
|
||||
cursor.execute(
|
||||
"""
|
||||
declare
|
||||
input clob;
|
||||
begin
|
||||
input := :data;
|
||||
:summ := dbms_vector_chain.utl_to_summary(input, json(:params));
|
||||
end;""",
|
||||
data=docs.page_content,
|
||||
params=json.dumps(self.summary_params),
|
||||
summ=summary,
|
||||
)
|
||||
|
||||
if summary is None:
|
||||
results.append("")
|
||||
else:
|
||||
results.append(str(summary.getvalue()))
|
||||
|
||||
elif isinstance(docs, List):
|
||||
results = []
|
||||
|
||||
for doc in docs:
|
||||
summary = cursor.var(oracledb.DB_TYPE_CLOB)
|
||||
if isinstance(doc, str):
|
||||
cursor.execute(
|
||||
"""
|
||||
declare
|
||||
input clob;
|
||||
begin
|
||||
input := :data;
|
||||
:summ := dbms_vector_chain.utl_to_summary(input,
|
||||
json(:params));
|
||||
end;""",
|
||||
data=doc,
|
||||
params=json.dumps(self.summary_params),
|
||||
summ=summary,
|
||||
)
|
||||
|
||||
elif isinstance(doc, Document):
|
||||
cursor.execute(
|
||||
"""
|
||||
declare
|
||||
input clob;
|
||||
begin
|
||||
input := :data;
|
||||
:summ := dbms_vector_chain.utl_to_summary(input,
|
||||
json(:params));
|
||||
end;""",
|
||||
data=doc.page_content,
|
||||
params=json.dumps(self.summary_params),
|
||||
summ=summary,
|
||||
)
|
||||
|
||||
else:
|
||||
raise Exception("Invalid input type")
|
||||
|
||||
if summary is None:
|
||||
results.append("")
|
||||
else:
|
||||
results.append(str(summary.getvalue()))
|
||||
|
||||
else:
|
||||
raise Exception("Invalid input type")
|
||||
|
||||
cursor.close()
|
||||
return results
|
||||
|
||||
except Exception as ex:
|
||||
logger.info(f"An exception occurred :: {ex}")
|
||||
traceback.print_exc()
|
||||
cursor.close()
|
||||
raise
|
||||
|
||||
|
||||
# uncomment the following code block to run the test
|
||||
|
||||
"""
|
||||
# A sample unit test.
|
||||
|
||||
''' get the Oracle connection '''
|
||||
conn = oracledb.connect(
|
||||
user="",
|
||||
password="",
|
||||
dsn="")
|
||||
print("Oracle connection is established...")
|
||||
|
||||
''' params '''
|
||||
summary_params = {"provider": "database","glevel": "S",
|
||||
"numParagraphs": 1,"language": "english"}
|
||||
proxy = ""
|
||||
|
||||
''' instance '''
|
||||
summ = OracleSummary(conn=conn, params=summary_params, proxy=proxy)
|
||||
|
||||
summary = summ.get_summary("In the heart of the forest, " +
|
||||
"a lone fox ventured out at dusk, seeking a lost treasure. " +
|
||||
"With each step, memories flooded back, guiding its path. " +
|
||||
"As the moon rose high, illuminating the night, the fox unearthed " +
|
||||
"not gold, but a forgotten friendship, worth more than any riches.")
|
||||
print(f"Summary generated by OracleSummary: {summary}")
|
||||
|
||||
conn.close()
|
||||
print("Connection is closed.")
|
||||
|
||||
"""
|
||||
@@ -178,6 +178,9 @@ if TYPE_CHECKING:
|
||||
from langchain_community.vectorstores.opensearch_vector_search import (
|
||||
OpenSearchVectorSearch,
|
||||
)
|
||||
from langchain_community.vectorstores.oraclevs import (
|
||||
OracleVS, # noqa: F401
|
||||
)
|
||||
from langchain_community.vectorstores.pathway import (
|
||||
PathwayVectorClient,
|
||||
)
|
||||
@@ -343,6 +346,7 @@ __all__ = [
|
||||
"MyScaleSettings",
|
||||
"Neo4jVector",
|
||||
"NeuralDBVectorStore",
|
||||
"OracleVS",
|
||||
"OpenSearchVectorSearch",
|
||||
"PGEmbedding",
|
||||
"PGVector",
|
||||
@@ -439,6 +443,7 @@ _module_lookup = {
|
||||
"Neo4jVector": "langchain_community.vectorstores.neo4j_vector",
|
||||
"NeuralDBVectorStore": "langchain_community.vectorstores.thirdai_neuraldb",
|
||||
"OpenSearchVectorSearch": "langchain_community.vectorstores.opensearch_vector_search", # noqa: E501
|
||||
"OracleVS": "langchain_community.vectorstores.oraclevs",
|
||||
"PathwayVectorClient": "langchain_community.vectorstores.pathway",
|
||||
"PGEmbedding": "langchain_community.vectorstores.pgembedding",
|
||||
"PGVector": "langchain_community.vectorstores.pgvector",
|
||||
|
||||
@@ -31,8 +31,6 @@ from langchain_community.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
CVST = TypeVar("CVST", bound="Cassandra")
|
||||
|
||||
_NOT_SET = object()
|
||||
|
||||
|
||||
class Cassandra(VectorStore):
|
||||
"""Apache Cassandra(R) for vector-store workloads.
|
||||
@@ -56,9 +54,9 @@ class Cassandra(VectorStore):
|
||||
|
||||
Args:
|
||||
embedding: Embedding function to use.
|
||||
session: Cassandra driver session.
|
||||
keyspace: Cassandra key space.
|
||||
table_name: Cassandra table.
|
||||
session: Cassandra driver session. If not provided, it is resolved from cassio.
|
||||
keyspace: Cassandra key space. If not provided, it is resolved from cassio.
|
||||
table_name: Cassandra table (required).
|
||||
ttl_seconds: Optional time-to-live for the added texts.
|
||||
body_index_options: Optional options used to create the body index.
|
||||
Eg. body_index_options = [cassio.table.cql.STANDARD_ANALYZER]
|
||||
@@ -83,9 +81,9 @@ class Cassandra(VectorStore):
|
||||
def __init__(
|
||||
self,
|
||||
embedding: Embeddings,
|
||||
session: Session,
|
||||
keyspace: str,
|
||||
table_name: str,
|
||||
session: Optional[Session] = None,
|
||||
keyspace: Optional[str] = None,
|
||||
table_name: str = "",
|
||||
ttl_seconds: Optional[int] = None,
|
||||
*,
|
||||
body_index_options: Optional[List[Tuple[str, Any]]] = None,
|
||||
@@ -98,7 +96,8 @@ class Cassandra(VectorStore):
|
||||
"Could not import cassio python package. "
|
||||
"Please install it with `pip install cassio`."
|
||||
)
|
||||
"""Create a vector table."""
|
||||
if not table_name:
|
||||
raise ValueError("Missing required parameter 'table_name'.")
|
||||
self.embedding = embedding
|
||||
self.session = session
|
||||
self.keyspace = keyspace
|
||||
@@ -779,8 +778,8 @@ class Cassandra(VectorStore):
|
||||
embedding: Embeddings,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
*,
|
||||
session: Session = _NOT_SET,
|
||||
keyspace: str = "",
|
||||
session: Optional[Session] = None,
|
||||
keyspace: Optional[str] = None,
|
||||
table_name: str = "",
|
||||
ids: Optional[List[str]] = None,
|
||||
batch_size: int = 16,
|
||||
@@ -794,8 +793,10 @@ class Cassandra(VectorStore):
|
||||
texts: Texts to add to the vectorstore.
|
||||
embedding: Embedding function to use.
|
||||
metadatas: Optional list of metadatas associated with the texts.
|
||||
session: Cassandra driver session (required).
|
||||
keyspace: Cassandra key space (required).
|
||||
session: Cassandra driver session.
|
||||
If not provided, it is resolved from cassio.
|
||||
keyspace: Cassandra key space.
|
||||
If not provided, it is resolved from cassio.
|
||||
table_name: Cassandra table (required).
|
||||
ids: Optional list of IDs associated with the texts.
|
||||
batch_size: Number of concurrent requests to send to the server.
|
||||
@@ -807,12 +808,6 @@ class Cassandra(VectorStore):
|
||||
Returns:
|
||||
a Cassandra vectorstore.
|
||||
"""
|
||||
if session is _NOT_SET:
|
||||
raise ValueError("session parameter is required")
|
||||
if not keyspace:
|
||||
raise ValueError("keyspace parameter is required")
|
||||
if not table_name:
|
||||
raise ValueError("table_name parameter is required")
|
||||
store = cls(
|
||||
embedding=embedding,
|
||||
session=session,
|
||||
@@ -833,8 +828,8 @@ class Cassandra(VectorStore):
|
||||
embedding: Embeddings,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
*,
|
||||
session: Session = _NOT_SET,
|
||||
keyspace: str = "",
|
||||
session: Optional[Session] = None,
|
||||
keyspace: Optional[str] = None,
|
||||
table_name: str = "",
|
||||
ids: Optional[List[str]] = None,
|
||||
concurrency: int = 16,
|
||||
@@ -848,8 +843,10 @@ class Cassandra(VectorStore):
|
||||
texts: Texts to add to the vectorstore.
|
||||
embedding: Embedding function to use.
|
||||
metadatas: Optional list of metadatas associated with the texts.
|
||||
session: Cassandra driver session (required).
|
||||
keyspace: Cassandra key space (required).
|
||||
session: Cassandra driver session.
|
||||
If not provided, it is resolved from cassio.
|
||||
keyspace: Cassandra key space.
|
||||
If not provided, it is resolved from cassio.
|
||||
table_name: Cassandra table (required).
|
||||
ids: Optional list of IDs associated with the texts.
|
||||
concurrency: Number of concurrent queries to send to the database.
|
||||
@@ -861,12 +858,6 @@ class Cassandra(VectorStore):
|
||||
Returns:
|
||||
a Cassandra vectorstore.
|
||||
"""
|
||||
if session is _NOT_SET:
|
||||
raise ValueError("session parameter is required")
|
||||
if not keyspace:
|
||||
raise ValueError("keyspace parameter is required")
|
||||
if not table_name:
|
||||
raise ValueError("table_name parameter is required")
|
||||
store = cls(
|
||||
embedding=embedding,
|
||||
session=session,
|
||||
@@ -887,8 +878,8 @@ class Cassandra(VectorStore):
|
||||
documents: List[Document],
|
||||
embedding: Embeddings,
|
||||
*,
|
||||
session: Session = _NOT_SET,
|
||||
keyspace: str = "",
|
||||
session: Optional[Session] = None,
|
||||
keyspace: Optional[str] = None,
|
||||
table_name: str = "",
|
||||
ids: Optional[List[str]] = None,
|
||||
batch_size: int = 16,
|
||||
@@ -901,8 +892,10 @@ class Cassandra(VectorStore):
|
||||
Args:
|
||||
documents: Documents to add to the vectorstore.
|
||||
embedding: Embedding function to use.
|
||||
session: Cassandra driver session (required).
|
||||
keyspace: Cassandra key space (required).
|
||||
session: Cassandra driver session.
|
||||
If not provided, it is resolved from cassio.
|
||||
keyspace: Cassandra key space.
|
||||
If not provided, it is resolved from cassio.
|
||||
table_name: Cassandra table (required).
|
||||
ids: Optional list of IDs associated with the documents.
|
||||
batch_size: Number of concurrent requests to send to the server.
|
||||
@@ -936,8 +929,8 @@ class Cassandra(VectorStore):
|
||||
documents: List[Document],
|
||||
embedding: Embeddings,
|
||||
*,
|
||||
session: Session = _NOT_SET,
|
||||
keyspace: str = "",
|
||||
session: Optional[Session] = None,
|
||||
keyspace: Optional[str] = None,
|
||||
table_name: str = "",
|
||||
ids: Optional[List[str]] = None,
|
||||
concurrency: int = 16,
|
||||
@@ -950,8 +943,10 @@ class Cassandra(VectorStore):
|
||||
Args:
|
||||
documents: Documents to add to the vectorstore.
|
||||
embedding: Embedding function to use.
|
||||
session: Cassandra driver session (required).
|
||||
keyspace: Cassandra key space (required).
|
||||
session: Cassandra driver session.
|
||||
If not provided, it is resolved from cassio.
|
||||
keyspace: Cassandra key space.
|
||||
If not provided, it is resolved from cassio.
|
||||
table_name: Cassandra table (required).
|
||||
ids: Optional list of IDs associated with the documents.
|
||||
concurrency: Number of concurrent queries to send to the database.
|
||||
|
||||
930
libs/community/langchain_community/vectorstores/oraclevs.py
Normal file
930
libs/community/langchain_community/vectorstores/oraclevs.py
Normal file
@@ -0,0 +1,930 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import array
|
||||
import functools
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
Iterable,
|
||||
List,
|
||||
Optional,
|
||||
Tuple,
|
||||
Type,
|
||||
TypeVar,
|
||||
Union,
|
||||
cast,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from oracledb import Connection
|
||||
|
||||
import numpy as np
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.vectorstores import VectorStore
|
||||
|
||||
from langchain_community.vectorstores.utils import (
|
||||
DistanceStrategy,
|
||||
maximal_marginal_relevance,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
log_level = os.getenv("LOG_LEVEL", "ERROR").upper()
|
||||
logging.basicConfig(
|
||||
level=getattr(logging, log_level),
|
||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||
)
|
||||
|
||||
|
||||
# Define a type variable that can be any kind of function
|
||||
T = TypeVar("T", bound=Callable[..., Any])
|
||||
|
||||
|
||||
def _handle_exceptions(func: T) -> T:
|
||||
@functools.wraps(func)
|
||||
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except RuntimeError as db_err:
|
||||
# Handle a known type of error (e.g., DB-related) specifically
|
||||
logger.exception("DB-related error occurred.")
|
||||
raise RuntimeError(
|
||||
"Failed due to a DB issue: {}".format(db_err)
|
||||
) from db_err
|
||||
except ValueError as val_err:
|
||||
# Handle another known type of error specifically
|
||||
logger.exception("Validation error.")
|
||||
raise ValueError("Validation failed: {}".format(val_err)) from val_err
|
||||
except Exception as e:
|
||||
# Generic handler for all other exceptions
|
||||
logger.exception("An unexpected error occurred: {}".format(e))
|
||||
raise RuntimeError("Unexpected error: {}".format(e)) from e
|
||||
|
||||
return cast(T, wrapper)
|
||||
|
||||
|
||||
def _table_exists(client: Connection, table_name: str) -> bool:
|
||||
try:
|
||||
import oracledb
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Unable to import oracledb, please install with "
|
||||
"`pip install -U oracledb`."
|
||||
) from e
|
||||
|
||||
try:
|
||||
with client.cursor() as cursor:
|
||||
cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
|
||||
return True
|
||||
except oracledb.DatabaseError as ex:
|
||||
err_obj = ex.args
|
||||
if err_obj[0].code == 942:
|
||||
return False
|
||||
raise
|
||||
|
||||
|
||||
@_handle_exceptions
|
||||
def _index_exists(client: Connection, index_name: str) -> bool:
|
||||
# Check if the index exists
|
||||
query = """
|
||||
SELECT index_name
|
||||
FROM all_indexes
|
||||
WHERE upper(index_name) = upper(:idx_name)
|
||||
"""
|
||||
|
||||
with client.cursor() as cursor:
|
||||
# Execute the query
|
||||
cursor.execute(query, idx_name=index_name.upper())
|
||||
result = cursor.fetchone()
|
||||
|
||||
# Check if the index exists
|
||||
return result is not None
|
||||
|
||||
|
||||
def _get_distance_function(distance_strategy: DistanceStrategy) -> str:
|
||||
# Dictionary to map distance strategies to their corresponding function
|
||||
# names
|
||||
distance_strategy2function = {
|
||||
DistanceStrategy.EUCLIDEAN_DISTANCE: "EUCLIDEAN",
|
||||
DistanceStrategy.DOT_PRODUCT: "DOT",
|
||||
DistanceStrategy.COSINE: "COSINE",
|
||||
}
|
||||
|
||||
# Attempt to return the corresponding distance function
|
||||
if distance_strategy in distance_strategy2function:
|
||||
return distance_strategy2function[distance_strategy]
|
||||
|
||||
# If it's an unsupported distance strategy, raise an error
|
||||
raise ValueError(f"Unsupported distance strategy: {distance_strategy}")
|
||||
|
||||
|
||||
def _get_index_name(base_name: str) -> str:
|
||||
unique_id = str(uuid.uuid4()).replace("-", "")
|
||||
return f"{base_name}_{unique_id}"
|
||||
|
||||
|
||||
@_handle_exceptions
|
||||
def _create_table(client: Connection, table_name: str, embedding_dim: int) -> None:
|
||||
cols_dict = {
|
||||
"id": "RAW(16) DEFAULT SYS_GUID() PRIMARY KEY",
|
||||
"text": "CLOB",
|
||||
"metadata": "CLOB",
|
||||
"embedding": f"vector({embedding_dim}, FLOAT32)",
|
||||
}
|
||||
|
||||
if not _table_exists(client, table_name):
|
||||
with client.cursor() as cursor:
|
||||
ddl_body = ", ".join(
|
||||
f"{col_name} {col_type}" for col_name, col_type in cols_dict.items()
|
||||
)
|
||||
ddl = f"CREATE TABLE {table_name} ({ddl_body})"
|
||||
cursor.execute(ddl)
|
||||
logger.info("Table created successfully...")
|
||||
else:
|
||||
logger.info("Table already exists...")
|
||||
|
||||
|
||||
@_handle_exceptions
|
||||
def create_index(
|
||||
client: Connection,
|
||||
vector_store: OracleVS,
|
||||
params: Optional[dict[str, Any]] = None,
|
||||
) -> None:
|
||||
if params:
|
||||
if params["idx_type"] == "HNSW":
|
||||
_create_hnsw_index(
|
||||
client, vector_store.table_name, vector_store.distance_strategy, params
|
||||
)
|
||||
elif params["idx_type"] == "IVF":
|
||||
_create_ivf_index(
|
||||
client, vector_store.table_name, vector_store.distance_strategy, params
|
||||
)
|
||||
else:
|
||||
_create_hnsw_index(
|
||||
client, vector_store.table_name, vector_store.distance_strategy, params
|
||||
)
|
||||
else:
|
||||
_create_hnsw_index(
|
||||
client, vector_store.table_name, vector_store.distance_strategy, params
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
@_handle_exceptions
|
||||
def _create_hnsw_index(
|
||||
client: Connection,
|
||||
table_name: str,
|
||||
distance_strategy: DistanceStrategy,
|
||||
params: Optional[dict[str, Any]] = None,
|
||||
) -> None:
|
||||
defaults = {
|
||||
"idx_name": "HNSW",
|
||||
"idx_type": "HNSW",
|
||||
"neighbors": 32,
|
||||
"efConstruction": 200,
|
||||
"accuracy": 90,
|
||||
"parallel": 8,
|
||||
}
|
||||
|
||||
if params:
|
||||
config = params.copy()
|
||||
# Ensure compulsory parts are included
|
||||
for compulsory_key in ["idx_name", "parallel"]:
|
||||
if compulsory_key not in config:
|
||||
if compulsory_key == "idx_name":
|
||||
config[compulsory_key] = _get_index_name(
|
||||
str(defaults[compulsory_key])
|
||||
)
|
||||
else:
|
||||
config[compulsory_key] = defaults[compulsory_key]
|
||||
|
||||
# Validate keys in config against defaults
|
||||
for key in config:
|
||||
if key not in defaults:
|
||||
raise ValueError(f"Invalid parameter: {key}")
|
||||
else:
|
||||
config = defaults
|
||||
|
||||
# Base SQL statement
|
||||
idx_name = config["idx_name"]
|
||||
base_sql = (
|
||||
f"create vector index {idx_name} on {table_name}(embedding) "
|
||||
f"ORGANIZATION INMEMORY NEIGHBOR GRAPH"
|
||||
)
|
||||
|
||||
# Optional parts depending on parameters
|
||||
accuracy_part = " WITH TARGET ACCURACY {accuracy}" if ("accuracy" in config) else ""
|
||||
distance_part = f" DISTANCE {_get_distance_function(distance_strategy)}"
|
||||
|
||||
parameters_part = ""
|
||||
if "neighbors" in config and "efConstruction" in config:
|
||||
parameters_part = (
|
||||
" parameters (type {idx_type}, neighbors {"
|
||||
"neighbors}, efConstruction {efConstruction})"
|
||||
)
|
||||
elif "neighbors" in config and "efConstruction" not in config:
|
||||
config["efConstruction"] = defaults["efConstruction"]
|
||||
parameters_part = (
|
||||
" parameters (type {idx_type}, neighbors {"
|
||||
"neighbors}, efConstruction {efConstruction})"
|
||||
)
|
||||
elif "neighbors" not in config and "efConstruction" in config:
|
||||
config["neighbors"] = defaults["neighbors"]
|
||||
parameters_part = (
|
||||
" parameters (type {idx_type}, neighbors {"
|
||||
"neighbors}, efConstruction {efConstruction})"
|
||||
)
|
||||
|
||||
# Always included part for parallel
|
||||
parallel_part = " parallel {parallel}"
|
||||
|
||||
# Combine all parts
|
||||
ddl_assembly = (
|
||||
base_sql + accuracy_part + distance_part + parameters_part + parallel_part
|
||||
)
|
||||
# Format the SQL with values from the params dictionary
|
||||
ddl = ddl_assembly.format(**config)
|
||||
|
||||
# Check if the index exists
|
||||
if not _index_exists(client, config["idx_name"]):
|
||||
with client.cursor() as cursor:
|
||||
cursor.execute(ddl)
|
||||
logger.info("Index created successfully...")
|
||||
else:
|
||||
logger.info("Index already exists...")
|
||||
|
||||
|
||||
@_handle_exceptions
|
||||
def _create_ivf_index(
|
||||
client: Connection,
|
||||
table_name: str,
|
||||
distance_strategy: DistanceStrategy,
|
||||
params: Optional[dict[str, Any]] = None,
|
||||
) -> None:
|
||||
# Default configuration
|
||||
defaults = {
|
||||
"idx_name": "IVF",
|
||||
"idx_type": "IVF",
|
||||
"neighbor_part": 32,
|
||||
"accuracy": 90,
|
||||
"parallel": 8,
|
||||
}
|
||||
|
||||
if params:
|
||||
config = params.copy()
|
||||
# Ensure compulsory parts are included
|
||||
for compulsory_key in ["idx_name", "parallel"]:
|
||||
if compulsory_key not in config:
|
||||
if compulsory_key == "idx_name":
|
||||
config[compulsory_key] = _get_index_name(
|
||||
str(defaults[compulsory_key])
|
||||
)
|
||||
else:
|
||||
config[compulsory_key] = defaults[compulsory_key]
|
||||
|
||||
# Validate keys in config against defaults
|
||||
for key in config:
|
||||
if key not in defaults:
|
||||
raise ValueError(f"Invalid parameter: {key}")
|
||||
else:
|
||||
config = defaults
|
||||
|
||||
# Base SQL statement
|
||||
idx_name = config["idx_name"]
|
||||
base_sql = (
|
||||
f"CREATE VECTOR INDEX {idx_name} ON {table_name}(embedding) "
|
||||
f"ORGANIZATION NEIGHBOR PARTITIONS"
|
||||
)
|
||||
|
||||
# Optional parts depending on parameters
|
||||
accuracy_part = " WITH TARGET ACCURACY {accuracy}" if ("accuracy" in config) else ""
|
||||
distance_part = f" DISTANCE {_get_distance_function(distance_strategy)}"
|
||||
|
||||
parameters_part = ""
|
||||
if "idx_type" in config and "neighbor_part" in config:
|
||||
parameters_part = (
|
||||
f" PARAMETERS (type {config['idx_type']}, neighbor"
|
||||
f" partitions {config['neighbor_part']})"
|
||||
)
|
||||
|
||||
# Always included part for parallel
|
||||
parallel_part = f" PARALLEL {config['parallel']}"
|
||||
|
||||
# Combine all parts
|
||||
ddl_assembly = (
|
||||
base_sql + accuracy_part + distance_part + parameters_part + parallel_part
|
||||
)
|
||||
# Format the SQL with values from the params dictionary
|
||||
ddl = ddl_assembly.format(**config)
|
||||
|
||||
# Check if the index exists
|
||||
if not _index_exists(client, config["idx_name"]):
|
||||
with client.cursor() as cursor:
|
||||
cursor.execute(ddl)
|
||||
logger.info("Index created successfully...")
|
||||
else:
|
||||
logger.info("Index already exists...")
|
||||
|
||||
|
||||
@_handle_exceptions
|
||||
def drop_table_purge(client: Connection, table_name: str) -> None:
|
||||
if _table_exists(client, table_name):
|
||||
cursor = client.cursor()
|
||||
with cursor:
|
||||
ddl = f"DROP TABLE {table_name} PURGE"
|
||||
cursor.execute(ddl)
|
||||
logger.info("Table dropped successfully...")
|
||||
else:
|
||||
logger.info("Table not found...")
|
||||
return
|
||||
|
||||
|
||||
@_handle_exceptions
|
||||
def drop_index_if_exists(client: Connection, index_name: str) -> None:
|
||||
if _index_exists(client, index_name):
|
||||
drop_query = f"DROP INDEX {index_name}"
|
||||
with client.cursor() as cursor:
|
||||
cursor.execute(drop_query)
|
||||
logger.info(f"Index {index_name} has been dropped.")
|
||||
else:
|
||||
logger.exception(f"Index {index_name} does not exist.")
|
||||
return
|
||||
|
||||
|
||||
class OracleVS(VectorStore):
|
||||
"""`OracleVS` vector store.
|
||||
|
||||
To use, you should have both:
|
||||
- the ``oracledb`` python package installed
|
||||
- a connection string associated with a OracleDBCluster having deployed an
|
||||
Search index
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.vectorstores import OracleVS
|
||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||
import oracledb
|
||||
|
||||
with oracledb.connect(user = user, passwd = pwd, dsn = dsn) as
|
||||
connection:
|
||||
print ("Database version:", connection.version)
|
||||
embeddings = OpenAIEmbeddings()
|
||||
query = ""
|
||||
vectors = OracleVS(connection, table_name, embeddings, query)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
client: Connection,
|
||||
embedding_function: Union[
|
||||
Callable[[str], List[float]],
|
||||
Embeddings,
|
||||
],
|
||||
table_name: str,
|
||||
distance_strategy: DistanceStrategy = DistanceStrategy.EUCLIDEAN_DISTANCE,
|
||||
query: Optional[str] = "What is a Oracle database",
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
):
|
||||
try:
|
||||
import oracledb
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Unable to import oracledb, please install with "
|
||||
"`pip install -U oracledb`."
|
||||
) from e
|
||||
|
||||
try:
|
||||
"""Initialize with oracledb client."""
|
||||
self.client = client
|
||||
"""Initialize with necessary components."""
|
||||
if not isinstance(embedding_function, Embeddings):
|
||||
logger.warning(
|
||||
"`embedding_function` is expected to be an Embeddings "
|
||||
"object, support "
|
||||
"for passing in a function will soon be removed."
|
||||
)
|
||||
self.embedding_function = embedding_function
|
||||
self.query = query
|
||||
embedding_dim = self.get_embedding_dimension()
|
||||
|
||||
self.table_name = table_name
|
||||
self.distance_strategy = distance_strategy
|
||||
self.params = params
|
||||
|
||||
_create_table(client, table_name, embedding_dim)
|
||||
except oracledb.DatabaseError as db_err:
|
||||
logger.exception(f"Database error occurred while create table: {db_err}")
|
||||
raise RuntimeError(
|
||||
"Failed to create table due to a database error."
|
||||
) from db_err
|
||||
except ValueError as val_err:
|
||||
logger.exception(f"Validation error: {val_err}")
|
||||
raise RuntimeError(
|
||||
"Failed to create table due to a validation error."
|
||||
) from val_err
|
||||
except Exception as ex:
|
||||
logger.exception("An unexpected error occurred while creating the index.")
|
||||
raise RuntimeError(
|
||||
"Failed to create table due to an unexpected error."
|
||||
) from ex
|
||||
|
||||
@property
|
||||
def embeddings(self) -> Optional[Embeddings]:
|
||||
"""
|
||||
A property that returns an Embeddings instance embedding_function
|
||||
is an instance of Embeddings, otherwise returns None.
|
||||
|
||||
Returns:
|
||||
Optional[Embeddings]: The embedding function if it's an instance of
|
||||
Embeddings, otherwise None.
|
||||
"""
|
||||
return (
|
||||
self.embedding_function
|
||||
if isinstance(self.embedding_function, Embeddings)
|
||||
else None
|
||||
)
|
||||
|
||||
def get_embedding_dimension(self) -> int:
|
||||
# Embed the single document by wrapping it in a list
|
||||
embedded_document = self._embed_documents(
|
||||
[self.query if self.query is not None else ""]
|
||||
)
|
||||
|
||||
# Get the first (and only) embedding's dimension
|
||||
return len(embedded_document[0])
|
||||
|
||||
def _embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
if isinstance(self.embedding_function, Embeddings):
|
||||
return self.embedding_function.embed_documents(texts)
|
||||
elif callable(self.embedding_function):
|
||||
return [self.embedding_function(text) for text in texts]
|
||||
else:
|
||||
raise TypeError(
|
||||
"The embedding_function is neither Embeddings nor callable."
|
||||
)
|
||||
|
||||
def _embed_query(self, text: str) -> List[float]:
|
||||
if isinstance(self.embedding_function, Embeddings):
|
||||
return self.embedding_function.embed_query(text)
|
||||
else:
|
||||
return self.embedding_function(text)
|
||||
|
||||
@_handle_exceptions
|
||||
def add_texts(
|
||||
self,
|
||||
texts: Iterable[str],
|
||||
metadatas: Optional[List[Dict[Any, Any]]] = None,
|
||||
ids: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[str]:
|
||||
"""Add more texts to the vectorstore index.
|
||||
Args:
|
||||
texts: Iterable of strings to add to the vectorstore.
|
||||
metadatas: Optional list of metadatas associated with the texts.
|
||||
ids: Optional list of ids for the texts that are being added to
|
||||
the vector store.
|
||||
kwargs: vectorstore specific parameters
|
||||
"""
|
||||
|
||||
texts = list(texts)
|
||||
if ids:
|
||||
# If ids are provided, hash them to maintain consistency
|
||||
processed_ids = [
|
||||
hashlib.sha256(_id.encode()).hexdigest()[:16].upper() for _id in ids
|
||||
]
|
||||
elif metadatas and all("id" in metadata for metadata in metadatas):
|
||||
# If no ids are provided but metadatas with ids are, generate
|
||||
# ids from metadatas
|
||||
processed_ids = [
|
||||
hashlib.sha256(metadata["id"].encode()).hexdigest()[:16].upper()
|
||||
for metadata in metadatas
|
||||
]
|
||||
else:
|
||||
# Generate new ids if none are provided
|
||||
generated_ids = [
|
||||
str(uuid.uuid4()) for _ in texts
|
||||
] # uuid4 is more standard for random UUIDs
|
||||
processed_ids = [
|
||||
hashlib.sha256(_id.encode()).hexdigest()[:16].upper()
|
||||
for _id in generated_ids
|
||||
]
|
||||
|
||||
embeddings = self._embed_documents(texts)
|
||||
if not metadatas:
|
||||
metadatas = [{} for _ in texts]
|
||||
docs = [
|
||||
(id_, text, json.dumps(metadata), array.array("f", embedding))
|
||||
for id_, text, metadata, embedding in zip(
|
||||
processed_ids, texts, metadatas, embeddings
|
||||
)
|
||||
]
|
||||
|
||||
with self.client.cursor() as cursor:
|
||||
cursor.executemany(
|
||||
f"INSERT INTO {self.table_name} (id, text, metadata, "
|
||||
f"embedding) VALUES (:1, :2, :3, :4)",
|
||||
docs,
|
||||
)
|
||||
self.client.commit()
|
||||
return processed_ids
|
||||
|
||||
def similarity_search(
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
filter: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return docs most similar to query."""
|
||||
if isinstance(self.embedding_function, Embeddings):
|
||||
embedding = self.embedding_function.embed_query(query)
|
||||
documents = self.similarity_search_by_vector(
|
||||
embedding=embedding, k=k, filter=filter, **kwargs
|
||||
)
|
||||
return documents
|
||||
|
||||
def similarity_search_by_vector(
|
||||
self,
|
||||
embedding: List[float],
|
||||
k: int = 4,
|
||||
filter: Optional[dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
docs_and_scores = self.similarity_search_by_vector_with_relevance_scores(
|
||||
embedding=embedding, k=k, filter=filter, **kwargs
|
||||
)
|
||||
return [doc for doc, _ in docs_and_scores]
|
||||
|
||||
def similarity_search_with_score(
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
filter: Optional[dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""Return docs most similar to query."""
|
||||
if isinstance(self.embedding_function, Embeddings):
|
||||
embedding = self.embedding_function.embed_query(query)
|
||||
docs_and_scores = self.similarity_search_by_vector_with_relevance_scores(
|
||||
embedding=embedding, k=k, filter=filter, **kwargs
|
||||
)
|
||||
return docs_and_scores
|
||||
|
||||
@_handle_exceptions
|
||||
def _get_clob_value(self, result: Any) -> str:
|
||||
try:
|
||||
import oracledb
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Unable to import oracledb, please install with "
|
||||
"`pip install -U oracledb`."
|
||||
) from e
|
||||
|
||||
clob_value = ""
|
||||
if result:
|
||||
if isinstance(result, oracledb.LOB):
|
||||
raw_data = result.read()
|
||||
if isinstance(raw_data, bytes):
|
||||
clob_value = raw_data.decode(
|
||||
"utf-8"
|
||||
) # Specify the correct encoding
|
||||
else:
|
||||
clob_value = raw_data
|
||||
elif isinstance(result, str):
|
||||
clob_value = result
|
||||
else:
|
||||
raise Exception("Unexpected type:", type(result))
|
||||
return clob_value
|
||||
|
||||
@_handle_exceptions
|
||||
def similarity_search_by_vector_with_relevance_scores(
|
||||
self,
|
||||
embedding: List[float],
|
||||
k: int = 4,
|
||||
filter: Optional[dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
docs_and_scores = []
|
||||
embedding_arr = array.array("f", embedding)
|
||||
|
||||
query = f"""
|
||||
SELECT id,
|
||||
text,
|
||||
metadata,
|
||||
vector_distance(embedding, :embedding,
|
||||
{_get_distance_function(self.distance_strategy)}) as distance
|
||||
FROM {self.table_name}
|
||||
ORDER BY distance
|
||||
FETCH APPROX FIRST :k ROWS ONLY
|
||||
"""
|
||||
# Execute the query
|
||||
with self.client.cursor() as cursor:
|
||||
cursor.execute(query, embedding=embedding_arr, k=k)
|
||||
results = cursor.fetchall()
|
||||
|
||||
# Filter results if filter is provided
|
||||
for result in results:
|
||||
metadata = json.loads(
|
||||
self._get_clob_value(result[2]) if result[2] is not None else "{}"
|
||||
)
|
||||
|
||||
# Apply filtering based on the 'filter' dictionary
|
||||
if filter:
|
||||
if all(metadata.get(key) in value for key, value in filter.items()):
|
||||
doc = Document(
|
||||
page_content=(
|
||||
self._get_clob_value(result[1])
|
||||
if result[1] is not None
|
||||
else ""
|
||||
),
|
||||
metadata=metadata,
|
||||
)
|
||||
distance = result[3]
|
||||
docs_and_scores.append((doc, distance))
|
||||
else:
|
||||
doc = Document(
|
||||
page_content=(
|
||||
self._get_clob_value(result[1])
|
||||
if result[1] is not None
|
||||
else ""
|
||||
),
|
||||
metadata=metadata,
|
||||
)
|
||||
distance = result[3]
|
||||
docs_and_scores.append((doc, distance))
|
||||
|
||||
return docs_and_scores
|
||||
|
||||
@_handle_exceptions
|
||||
def similarity_search_by_vector_returning_embeddings(
|
||||
self,
|
||||
embedding: List[float],
|
||||
k: int,
|
||||
filter: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Tuple[Document, float, np.ndarray[np.float32, Any]]]:
|
||||
documents = []
|
||||
embedding_arr = array.array("f", embedding)
|
||||
|
||||
query = f"""
|
||||
SELECT id,
|
||||
text,
|
||||
metadata,
|
||||
vector_distance(embedding, :embedding, {_get_distance_function(
|
||||
self.distance_strategy)}) as distance,
|
||||
embedding
|
||||
FROM {self.table_name}
|
||||
ORDER BY distance
|
||||
FETCH APPROX FIRST :k ROWS ONLY
|
||||
"""
|
||||
|
||||
# Execute the query
|
||||
with self.client.cursor() as cursor:
|
||||
cursor.execute(query, embedding=embedding_arr, k=k)
|
||||
results = cursor.fetchall()
|
||||
|
||||
for result in results:
|
||||
page_content_str = self._get_clob_value(result[1])
|
||||
metadata_str = self._get_clob_value(result[2])
|
||||
metadata = json.loads(metadata_str)
|
||||
|
||||
# Apply filter if provided and matches; otherwise, add all
|
||||
# documents
|
||||
if not filter or all(
|
||||
metadata.get(key) in value for key, value in filter.items()
|
||||
):
|
||||
document = Document(
|
||||
page_content=page_content_str, metadata=metadata
|
||||
)
|
||||
distance = result[3]
|
||||
# Assuming result[4] is already in the correct format;
|
||||
# adjust if necessary
|
||||
current_embedding = (
|
||||
np.array(result[4], dtype=np.float32)
|
||||
if result[4]
|
||||
else np.empty(0, dtype=np.float32)
|
||||
)
|
||||
documents.append((document, distance, current_embedding))
|
||||
return documents # type: ignore
|
||||
|
||||
@_handle_exceptions
|
||||
def max_marginal_relevance_search_with_score_by_vector(
|
||||
self,
|
||||
embedding: List[float],
|
||||
*,
|
||||
k: int = 4,
|
||||
fetch_k: int = 20,
|
||||
lambda_mult: float = 0.5,
|
||||
filter: Optional[Dict[str, Any]] = None,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""Return docs and their similarity scores selected using the
|
||||
maximal marginal
|
||||
relevance.
|
||||
|
||||
Maximal marginal relevance optimizes for similarity to query AND
|
||||
diversity
|
||||
among selected documents.
|
||||
|
||||
Args:
|
||||
self: An instance of the class
|
||||
embedding: Embedding to look up documents similar to.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
fetch_k: Number of Documents to fetch before filtering to
|
||||
pass to MMR algorithm.
|
||||
filter: (Optional[Dict[str, str]]): Filter by metadata. Defaults
|
||||
to None.
|
||||
lambda_mult: Number between 0 and 1 that determines the degree
|
||||
of diversity among the results with 0 corresponding
|
||||
to maximum diversity and 1 to minimum diversity.
|
||||
Defaults to 0.5.
|
||||
Returns:
|
||||
List of Documents and similarity scores selected by maximal
|
||||
marginal
|
||||
relevance and score for each.
|
||||
"""
|
||||
|
||||
# Fetch documents and their scores
|
||||
docs_scores_embeddings = self.similarity_search_by_vector_returning_embeddings(
|
||||
embedding, fetch_k, filter=filter
|
||||
)
|
||||
# Assuming documents_with_scores is a list of tuples (Document, score)
|
||||
|
||||
# If you need to split documents and scores for processing (e.g.,
|
||||
# for MMR calculation)
|
||||
documents, scores, embeddings = (
|
||||
zip(*docs_scores_embeddings) if docs_scores_embeddings else ([], [], [])
|
||||
)
|
||||
|
||||
# Assume maximal_marginal_relevance method accepts embeddings and
|
||||
# scores, and returns indices of selected docs
|
||||
mmr_selected_indices = maximal_marginal_relevance(
|
||||
np.array(embedding, dtype=np.float32),
|
||||
list(embeddings),
|
||||
k=k,
|
||||
lambda_mult=lambda_mult,
|
||||
)
|
||||
|
||||
# Filter documents based on MMR-selected indices and map scores
|
||||
mmr_selected_documents_with_scores = [
|
||||
(documents[i], scores[i]) for i in mmr_selected_indices
|
||||
]
|
||||
|
||||
return mmr_selected_documents_with_scores
|
||||
|
||||
@_handle_exceptions
|
||||
def max_marginal_relevance_search_by_vector(
|
||||
self,
|
||||
embedding: List[float],
|
||||
k: int = 4,
|
||||
fetch_k: int = 20,
|
||||
lambda_mult: float = 0.5,
|
||||
filter: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return docs selected using the maximal marginal relevance.
|
||||
|
||||
Maximal marginal relevance optimizes for similarity to query AND
|
||||
diversity
|
||||
among selected documents.
|
||||
|
||||
Args:
|
||||
self: An instance of the class
|
||||
embedding: Embedding to look up documents similar to.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||||
lambda_mult: Number between 0 and 1 that determines the degree
|
||||
of diversity among the results with 0 corresponding
|
||||
to maximum diversity and 1 to minimum diversity.
|
||||
Defaults to 0.5.
|
||||
filter: Optional[Dict[str, Any]]
|
||||
**kwargs: Any
|
||||
Returns:
|
||||
List of Documents selected by maximal marginal relevance.
|
||||
"""
|
||||
docs_and_scores = self.max_marginal_relevance_search_with_score_by_vector(
|
||||
embedding, k=k, fetch_k=fetch_k, lambda_mult=lambda_mult, filter=filter
|
||||
)
|
||||
return [doc for doc, _ in docs_and_scores]
|
||||
|
||||
@_handle_exceptions
|
||||
def max_marginal_relevance_search(
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
fetch_k: int = 20,
|
||||
lambda_mult: float = 0.5,
|
||||
filter: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return docs selected using the maximal marginal relevance.
|
||||
|
||||
Maximal marginal relevance optimizes for similarity to query AND
|
||||
diversity
|
||||
among selected documents.
|
||||
|
||||
Args:
|
||||
self: An instance of the class
|
||||
query: Text to look up documents similar to.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||||
lambda_mult: Number between 0 and 1 that determines the degree
|
||||
of diversity among the results with 0 corresponding
|
||||
to maximum diversity and 1 to minimum diversity.
|
||||
Defaults to 0.5.
|
||||
filter: Optional[Dict[str, Any]]
|
||||
**kwargs
|
||||
Returns:
|
||||
List of Documents selected by maximal marginal relevance.
|
||||
|
||||
`max_marginal_relevance_search` requires that `query` returns matched
|
||||
embeddings alongside the match documents.
|
||||
"""
|
||||
embedding = self._embed_query(query)
|
||||
documents = self.max_marginal_relevance_search_by_vector(
|
||||
embedding,
|
||||
k=k,
|
||||
fetch_k=fetch_k,
|
||||
lambda_mult=lambda_mult,
|
||||
filter=filter,
|
||||
**kwargs,
|
||||
)
|
||||
return documents
|
||||
|
||||
@_handle_exceptions
|
||||
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None:
|
||||
"""Delete by vector IDs.
|
||||
Args:
|
||||
self: An instance of the class
|
||||
ids: List of ids to delete.
|
||||
**kwargs
|
||||
"""
|
||||
|
||||
if ids is None:
|
||||
raise ValueError("No ids provided to delete.")
|
||||
|
||||
# Compute SHA-256 hashes of the ids and truncate them
|
||||
hashed_ids = [
|
||||
hashlib.sha256(_id.encode()).hexdigest()[:16].upper() for _id in ids
|
||||
]
|
||||
|
||||
# Constructing the SQL statement with individual placeholders
|
||||
placeholders = ", ".join([":id" + str(i + 1) for i in range(len(hashed_ids))])
|
||||
|
||||
ddl = f"DELETE FROM {self.table_name} WHERE id IN ({placeholders})"
|
||||
|
||||
# Preparing bind variables
|
||||
bind_vars = {
|
||||
f"id{i}": hashed_id for i, hashed_id in enumerate(hashed_ids, start=1)
|
||||
}
|
||||
|
||||
with self.client.cursor() as cursor:
|
||||
cursor.execute(ddl, bind_vars)
|
||||
self.client.commit()
|
||||
|
||||
@classmethod
|
||||
@_handle_exceptions
|
||||
def from_texts(
|
||||
cls: Type[OracleVS],
|
||||
texts: Iterable[str],
|
||||
embedding: Embeddings,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
**kwargs: Any,
|
||||
) -> OracleVS:
|
||||
"""Return VectorStore initialized from texts and embeddings."""
|
||||
client = kwargs.get("client")
|
||||
if client is None:
|
||||
raise ValueError("client parameter is required...")
|
||||
params = kwargs.get("params", {})
|
||||
|
||||
table_name = str(kwargs.get("table_name", "langchain"))
|
||||
|
||||
distance_strategy = cast(
|
||||
DistanceStrategy, kwargs.get("distance_strategy", None)
|
||||
)
|
||||
if not isinstance(distance_strategy, DistanceStrategy):
|
||||
raise TypeError(
|
||||
f"Expected DistanceStrategy got " f"{type(distance_strategy).__name__} "
|
||||
)
|
||||
|
||||
query = kwargs.get("query", "What is a Oracle database")
|
||||
|
||||
drop_table_purge(client, table_name)
|
||||
|
||||
vss = cls(
|
||||
client=client,
|
||||
embedding_function=embedding,
|
||||
table_name=table_name,
|
||||
distance_strategy=distance_strategy,
|
||||
query=query,
|
||||
params=params,
|
||||
)
|
||||
vss.add_texts(texts=list(texts), metadatas=metadatas)
|
||||
return vss
|
||||
105
libs/community/poetry.lock
generated
105
libs/community/poetry.lock
generated
@@ -1,4 +1,4 @@
|
||||
# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "aenum"
|
||||
@@ -3454,7 +3454,6 @@ files = [
|
||||
{file = "jq-1.6.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:227b178b22a7f91ae88525810441791b1ca1fc71c86f03190911793be15cec3d"},
|
||||
{file = "jq-1.6.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:780eb6383fbae12afa819ef676fc93e1548ae4b076c004a393af26a04b460742"},
|
||||
{file = "jq-1.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:08ded6467f4ef89fec35b2bf310f210f8cd13fbd9d80e521500889edf8d22441"},
|
||||
{file = "jq-1.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:49e44ed677713f4115bd5bf2dbae23baa4cd503be350e12a1c1f506b0687848f"},
|
||||
{file = "jq-1.6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:984f33862af285ad3e41e23179ac4795f1701822473e1a26bf87ff023e5a89ea"},
|
||||
{file = "jq-1.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f42264fafc6166efb5611b5d4cb01058887d050a6c19334f6a3f8a13bb369df5"},
|
||||
{file = "jq-1.6.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a67154f150aaf76cc1294032ed588436eb002097dd4fd1e283824bf753a05080"},
|
||||
@@ -3963,51 +3962,9 @@ files = [
|
||||
{file = "kiwisolver-1.4.5.tar.gz", hash = "sha256:e57e563a57fb22a142da34f38acc2fc1a5c864bc29ca1517a88abc963e60d6ec"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "langchain"
|
||||
version = "0.2.0rc1"
|
||||
description = "Building applications with LLMs through composability"
|
||||
optional = false
|
||||
python-versions = ">=3.8.1,<4.0"
|
||||
files = []
|
||||
develop = true
|
||||
|
||||
[package.dependencies]
|
||||
aiohttp = "^3.8.3"
|
||||
async-timeout = {version = "^4.0.0", markers = "python_version < \"3.11\""}
|
||||
dataclasses-json = ">= 0.5.7, < 0.7"
|
||||
langchain-core = "^0.1.48"
|
||||
langchain-text-splitters = ">=0.0.1,<0.1"
|
||||
langsmith = "^0.1.17"
|
||||
numpy = "^1"
|
||||
pydantic = ">=1,<3"
|
||||
PyYAML = ">=5.3"
|
||||
requests = "^2"
|
||||
SQLAlchemy = ">=1.4,<3"
|
||||
tenacity = "^8.1.0"
|
||||
|
||||
[package.extras]
|
||||
all = []
|
||||
azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-textanalytics (>=5.3.0,<6.0.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b8)", "openai (<2)"]
|
||||
clarifai = ["clarifai (>=9.1.0)"]
|
||||
cli = ["typer (>=0.9.0,<0.10.0)"]
|
||||
cohere = ["cohere (>=4,<6)"]
|
||||
docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"]
|
||||
embeddings = ["sentence-transformers (>=2,<3)"]
|
||||
extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cohere (>=4,<6)", "couchbase (>=4.1.9,<5.0.0)", "dashvector (>=1.0.1,<2.0.0)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.9.0,<0.10.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.3,<6.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (<2)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "rdflib (==7.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"]
|
||||
javascript = ["esprima (>=4.0.1,<5.0.0)"]
|
||||
llms = ["clarifai (>=9.1.0)", "cohere (>=4,<6)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (<2)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"]
|
||||
openai = ["openai (<2)", "tiktoken (>=0.3.2,<0.6.0)"]
|
||||
qdrant = ["qdrant-client (>=1.3.1,<2.0.0)"]
|
||||
text-helpers = ["chardet (>=5.1.0,<6.0.0)"]
|
||||
|
||||
[package.source]
|
||||
type = "directory"
|
||||
url = "../langchain"
|
||||
|
||||
[[package]]
|
||||
name = "langchain-core"
|
||||
version = "0.1.50"
|
||||
version = "0.1.51"
|
||||
description = "Building applications with LLMs through composability"
|
||||
optional = false
|
||||
python-versions = ">=3.8.1,<4.0"
|
||||
@@ -5485,6 +5442,49 @@ text = ["spacy", "wordcloud (>=1.8.1)"]
|
||||
torch = ["oracle_ads[viz]", "torch", "torchvision"]
|
||||
viz = ["bokeh (>=3.0.0,<3.2.0)", "folium (>=0.12.1)", "graphviz (<0.17)", "scipy (>=1.5.4)", "seaborn (>=0.11.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "oracledb"
|
||||
version = "2.2.0"
|
||||
description = "Python interface to Oracle Database"
|
||||
optional = true
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "oracledb-2.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:253a85eef53d97815b4d838e5275d0a99e33ec340eb4b945cd2371e2bcede46b"},
|
||||
{file = "oracledb-2.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa5c2982076366f59dade28b554b43a257ad426e55359124bc37f191f51c2d46"},
|
||||
{file = "oracledb-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:19408844bd4af5b4d40f06c3e5b88c6bfce4a749f61ab766f41b22c4070c5c15"},
|
||||
{file = "oracledb-2.2.0-cp310-cp310-win32.whl", hash = "sha256:c2e2e3f00d7eb7f4dabfa8996dc70db03bd7dbe474d2d1dc381daeff54cfdeff"},
|
||||
{file = "oracledb-2.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:efed536635b0fec5c1484eda55fad4affa57672b87596ec6273123a3133ba5b6"},
|
||||
{file = "oracledb-2.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c4b7e14b04dc2af4697ca561f9bcac110a67a7be2ccf868d789e92771017feca"},
|
||||
{file = "oracledb-2.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61bbf9cd64a2f3b65a12550329b2f0caed7d9aa5e892c0ce69d9ea7b3cb3cb8e"},
|
||||
{file = "oracledb-2.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e461d1c7ef4d3f03d84595a13754390a62300976782d7c29efc07fcc915e1b3"},
|
||||
{file = "oracledb-2.2.0-cp311-cp311-win32.whl", hash = "sha256:6c7da69d18cf02e469e15215af9c6f219256972a172c0e544a2ecc2a5cab9aa5"},
|
||||
{file = "oracledb-2.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:d0245f677e27ee0990eb0213485031dacdc837a89569563f1594b82ccb362255"},
|
||||
{file = "oracledb-2.2.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:10d2cd354a15e2b7e191256a0179874068fc64fa6543b2e20c9c1c38f0dd0839"},
|
||||
{file = "oracledb-2.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fbf07e0e88c9ff1555c9301d95c69e0d48263cf7df63172043fe0a042539e687"},
|
||||
{file = "oracledb-2.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6a1365d3e05ca73b638ef939f9a609fed0ae5da75d13b2cfb75601ab8b85fce"},
|
||||
{file = "oracledb-2.2.0-cp312-cp312-win32.whl", hash = "sha256:3fe57091a1463efac692b352e99f9daeab5ab375bab2060c5caba9a3a7743c15"},
|
||||
{file = "oracledb-2.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:e5ca9c050e18b2b1005b40d44a2098155445836071253ee5d547c7f285fc7729"},
|
||||
{file = "oracledb-2.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b5ad105aabc8ff32e3d3a343a92cf84976cf2454b6a6ff02065383fc3863e68d"},
|
||||
{file = "oracledb-2.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14a7f2572c358604186d857c80f384ad03226e372731770911856541a06bdd34"},
|
||||
{file = "oracledb-2.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa1fe78ed0cbf98593c1f3f620f751b725b189f8c845577e39a372f44b2bf384"},
|
||||
{file = "oracledb-2.2.0-cp37-cp37m-win32.whl", hash = "sha256:bcef115bd147d6f267e3b09cbc3fc04189bff69e94d05c1e266c698668061e8d"},
|
||||
{file = "oracledb-2.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:1272bf562bcd6ff5e23b1e1fe8c3363d7a66fe8f48b1e00c4fb081d5436e1df5"},
|
||||
{file = "oracledb-2.2.0-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:e0010aee0ed0a57964ce9f6cb0e2315a4ffce947121e0bb1c618e5091e64bab4"},
|
||||
{file = "oracledb-2.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:437d7c5a36f7e72ca36e1ac3f1a7c087bffa1cd0ba3a84471e54506c8572a5ad"},
|
||||
{file = "oracledb-2.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:581b7067283910a53b1ac1a50c0046058a21bd5c073d529bf695113db6d25f62"},
|
||||
{file = "oracledb-2.2.0-cp38-cp38-win32.whl", hash = "sha256:97fdc27a15f6441434a7ef563f522c8ceac19c2933f2da1082125670a2e2fc6b"},
|
||||
{file = "oracledb-2.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:c22a2052997a01e59a4c9c33c9c0593eebcb1d893addeda9cd57003c2e088a85"},
|
||||
{file = "oracledb-2.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b924ee3e7d41edb367e5bb4cbb30990ad447fedda9ef0fe29b691d36a8d338c2"},
|
||||
{file = "oracledb-2.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de3f9fa10b5f5c5dbe80dc7bdea5e5746abd411217e812fae66cc61c68f3f8f6"},
|
||||
{file = "oracledb-2.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba96a450275bceb5e0928e0dc01b5fb200e81ba04e99499d4930ccba681fd88a"},
|
||||
{file = "oracledb-2.2.0-cp39-cp39-win32.whl", hash = "sha256:35b6524b57979dbe8463af06648ad9972bce06e014a292ad96fec34c62665a8b"},
|
||||
{file = "oracledb-2.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:0b4968f39871d501ab16a2fe05b5b4ae954e338e6b9dcefeb9bced998ddd4c4b"},
|
||||
{file = "oracledb-2.2.0.tar.gz", hash = "sha256:f52c7df38b13243b5ce583457b80748a34682b9bb8370da2497868b71976798b"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
cryptography = ">=3.2.1"
|
||||
|
||||
[[package]]
|
||||
name = "orjson"
|
||||
version = "3.9.15"
|
||||
@@ -6064,6 +6064,8 @@ files = [
|
||||
{file = "psycopg2-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:426f9f29bde126913a20a96ff8ce7d73fd8a216cfb323b1f04da402d452853c3"},
|
||||
{file = "psycopg2-2.9.9-cp311-cp311-win32.whl", hash = "sha256:ade01303ccf7ae12c356a5e10911c9e1c51136003a9a1d92f7aa9d010fb98372"},
|
||||
{file = "psycopg2-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:121081ea2e76729acfb0673ff33755e8703d45e926e416cb59bae3a86c6a4981"},
|
||||
{file = "psycopg2-2.9.9-cp312-cp312-win32.whl", hash = "sha256:d735786acc7dd25815e89cc4ad529a43af779db2e25aa7c626de864127e5a024"},
|
||||
{file = "psycopg2-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:a7653d00b732afb6fc597e29c50ad28087dcb4fbfb28e86092277a559ae4e693"},
|
||||
{file = "psycopg2-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:5e0d98cade4f0e0304d7d6f25bbfbc5bd186e07b38eac65379309c4ca3193efa"},
|
||||
{file = "psycopg2-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:7e2dacf8b009a1c1e843b5213a87f7c544b2b042476ed7755be813eaf4e8347a"},
|
||||
{file = "psycopg2-2.9.9-cp38-cp38-win32.whl", hash = "sha256:ff432630e510709564c01dafdbe996cb552e0b9f3f065eb89bdce5bd31fabf4c"},
|
||||
@@ -6106,6 +6108,7 @@ files = [
|
||||
{file = "psycopg2_binary-2.9.9-cp311-cp311-win32.whl", hash = "sha256:dc4926288b2a3e9fd7b50dc6a1909a13bbdadfc67d93f3374d984e56f885579d"},
|
||||
{file = "psycopg2_binary-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:b76bedd166805480ab069612119ea636f5ab8f8771e640ae103e05a4aae3e417"},
|
||||
{file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8532fd6e6e2dc57bcb3bc90b079c60de896d2128c5d9d6f24a63875a95a088cf"},
|
||||
{file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0605eaed3eb239e87df0d5e3c6489daae3f7388d455d0c0b4df899519c6a38d"},
|
||||
{file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f8544b092a29a6ddd72f3556a9fcf249ec412e10ad28be6a0c0d948924f2212"},
|
||||
{file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d423c8d8a3c82d08fe8af900ad5b613ce3632a1249fd6a223941d0735fce493"},
|
||||
{file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e5afae772c00980525f6d6ecf7cbca55676296b580c0e6abb407f15f3706996"},
|
||||
@@ -6114,6 +6117,8 @@ files = [
|
||||
{file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb16c65dcb648d0a43a2521f2f0a2300f40639f6f8c1ecbc662141e4e3e1ee07"},
|
||||
{file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:911dda9c487075abd54e644ccdf5e5c16773470a6a5d3826fda76699410066fb"},
|
||||
{file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57fede879f08d23c85140a360c6a77709113efd1c993923c59fde17aa27599fe"},
|
||||
{file = "psycopg2_binary-2.9.9-cp312-cp312-win32.whl", hash = "sha256:64cf30263844fa208851ebb13b0732ce674d8ec6a0c86a4e160495d299ba3c93"},
|
||||
{file = "psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab"},
|
||||
{file = "psycopg2_binary-2.9.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2293b001e319ab0d869d660a704942c9e2cce19745262a8aba2115ef41a0a42a"},
|
||||
{file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ef7df18daf2c4c07e2695e8cfd5ee7f748a1d54d802330985a78d2a5a6dca9"},
|
||||
{file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a602ea5aff39bb9fac6308e9c9d82b9a35c2bf288e184a816002c9fae930b77"},
|
||||
@@ -6646,31 +6651,26 @@ python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "PyMuPDF-1.23.26-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:645a05321aecc8c45739f71f0eb574ce33138d19189582ffa5241fea3a8e2549"},
|
||||
{file = "PyMuPDF-1.23.26-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:2dfc9e010669ae92fade6fb72aaea49ebe3b8dcd7ee4dcbbe50115abcaa4d3fe"},
|
||||
{file = "PyMuPDF-1.23.26-cp310-none-manylinux2014_aarch64.whl", hash = "sha256:734ee380b3abd038602be79114194a3cb74ac102b7c943bcb333104575922c50"},
|
||||
{file = "PyMuPDF-1.23.26-cp310-none-manylinux2014_x86_64.whl", hash = "sha256:b22f8d854f8196ad5b20308c1cebad3d5189ed9f0988acbafa043947ea7e6c55"},
|
||||
{file = "PyMuPDF-1.23.26-cp310-none-win32.whl", hash = "sha256:cc0f794e3466bc96b5bf79d42fbc1551428751e3fef38ebc10ac70396b676144"},
|
||||
{file = "PyMuPDF-1.23.26-cp310-none-win_amd64.whl", hash = "sha256:2eb701247d8e685a24e45899d1175f01a3ce5fc792a4431c91fbb68633b29298"},
|
||||
{file = "PyMuPDF-1.23.26-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:e2804a64bb57da414781e312fb0561f6be67658ad57ed4a73dce008b23fc70a6"},
|
||||
{file = "PyMuPDF-1.23.26-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:97b40bb22e3056874634617a90e0ed24a5172cf71791b9e25d1d91c6743bc567"},
|
||||
{file = "PyMuPDF-1.23.26-cp311-none-manylinux2014_aarch64.whl", hash = "sha256:fab8833559bc47ab26ce736f915b8fc1dd37c108049b90396f7cd5e1004d7593"},
|
||||
{file = "PyMuPDF-1.23.26-cp311-none-manylinux2014_x86_64.whl", hash = "sha256:f25aafd3e7fb9d7761a22acf2b67d704f04cc36d4dc33a3773f0eb3f4ec3606f"},
|
||||
{file = "PyMuPDF-1.23.26-cp311-none-win32.whl", hash = "sha256:05e672ed3e82caca7ef02a88ace30130b1dd392a1190f03b2b58ffe7aa331400"},
|
||||
{file = "PyMuPDF-1.23.26-cp311-none-win_amd64.whl", hash = "sha256:92b3c4dd4d0491d495f333be2d41f4e1c155a409bc9d04b5ff29655dccbf4655"},
|
||||
{file = "PyMuPDF-1.23.26-cp312-none-macosx_10_9_x86_64.whl", hash = "sha256:a217689ede18cc6991b4e6a78afee8a440b3075d53b9dec4ba5ef7487d4547e9"},
|
||||
{file = "PyMuPDF-1.23.26-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:42ad2b819b90ce1947e11b90ec5085889df0a2e3aa0207bc97ecacfc6157cabc"},
|
||||
{file = "PyMuPDF-1.23.26-cp312-none-manylinux2014_aarch64.whl", hash = "sha256:99607649f89a02bba7d8ebe96e2410664316adc95e9337f7dfeff6a154f93049"},
|
||||
{file = "PyMuPDF-1.23.26-cp312-none-manylinux2014_x86_64.whl", hash = "sha256:bb42d4b8407b4de7cb58c28f01449f16f32a6daed88afb41108f1aeb3552bdd4"},
|
||||
{file = "PyMuPDF-1.23.26-cp312-none-win32.whl", hash = "sha256:c40d044411615e6f0baa7d3d933b3032cf97e168c7fa77d1be8a46008c109aee"},
|
||||
{file = "PyMuPDF-1.23.26-cp312-none-win_amd64.whl", hash = "sha256:3f876533aa7f9a94bcd9a0225ce72571b7808260903fec1d95c120bc842fb52d"},
|
||||
{file = "PyMuPDF-1.23.26-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:52df831d46beb9ff494f5fba3e5d069af6d81f49abf6b6e799ee01f4f8fa6799"},
|
||||
{file = "PyMuPDF-1.23.26-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:0bbb0cf6593e53524f3fc26fb5e6ead17c02c64791caec7c4afe61b677dedf80"},
|
||||
{file = "PyMuPDF-1.23.26-cp38-none-manylinux2014_aarch64.whl", hash = "sha256:5ef4360f20015673c20cf59b7e19afc97168795188c584254ed3778cde43ce77"},
|
||||
{file = "PyMuPDF-1.23.26-cp38-none-manylinux2014_x86_64.whl", hash = "sha256:d7cd88842b2e7f4c71eef4d87c98c35646b80b60e6375392d7ce40e519261f59"},
|
||||
{file = "PyMuPDF-1.23.26-cp38-none-win32.whl", hash = "sha256:6577e2f473625e2d0df5f5a3bf1e4519e94ae749733cc9937994d1b256687bfa"},
|
||||
{file = "PyMuPDF-1.23.26-cp38-none-win_amd64.whl", hash = "sha256:fbe1a3255b2cd0d769b2da2c4efdd0c0f30d4961a1aac02c0f75cf951b337aa4"},
|
||||
{file = "PyMuPDF-1.23.26-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:73fce034f2afea886a59ead2d0caedf27e2b2a8558b5da16d0286882e0b1eb82"},
|
||||
{file = "PyMuPDF-1.23.26-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:b3de8618b7cb5b36db611083840b3bcf09b11a893e2d8262f4e042102c7e65de"},
|
||||
{file = "PyMuPDF-1.23.26-cp39-none-manylinux2014_aarch64.whl", hash = "sha256:879e7f5ad35709d8760ab6103c3d5dac8ab8043a856ab3653fd324af7358ee87"},
|
||||
{file = "PyMuPDF-1.23.26-cp39-none-manylinux2014_x86_64.whl", hash = "sha256:deee96c2fd415ded7b5070d8d5b2c60679aee6ed0e28ac0d2cb998060d835c2c"},
|
||||
{file = "PyMuPDF-1.23.26-cp39-none-win32.whl", hash = "sha256:9f7f4ef99dd8ac97fb0b852efa3dcbee515798078b6c79a6a13c7b1e7c5d41a4"},
|
||||
{file = "PyMuPDF-1.23.26-cp39-none-win_amd64.whl", hash = "sha256:ba9a54552c7afb9ec85432c765e2fa9a81413acfaa7d70db7c9b528297749e5b"},
|
||||
@@ -7111,6 +7111,7 @@ files = [
|
||||
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
|
||||
@@ -10038,9 +10039,9 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
|
||||
|
||||
[extras]
|
||||
cli = ["typer"]
|
||||
extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "azure-ai-documentintelligence", "azure-identity", "azure-search-documents", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "cloudpickle", "cloudpickle", "cohere", "databricks-vectorsearch", "datasets", "dgml-utils", "elasticsearch", "esprima", "faiss-cpu", "feedparser", "fireworks-ai", "friendli-client", "geopandas", "gitpython", "google-cloud-documentai", "gql", "gradientai", "hdbcli", "hologres-vector", "html2text", "httpx", "httpx-sse", "javelin-sdk", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "msal", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "nvidia-riva-client", "oci", "openai", "openapi-pydantic", "oracle-ads", "pandas", "pdfminer-six", "pgvector", "praw", "premai", "psychicapi", "py-trello", "pyjwt", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "rdflib", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "tidb-vector", "timescale-vector", "tqdm", "tree-sitter", "tree-sitter-languages", "upstash-redis", "vdms", "xata", "xmltodict"]
|
||||
extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "azure-ai-documentintelligence", "azure-identity", "azure-search-documents", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "cloudpickle", "cloudpickle", "cohere", "databricks-vectorsearch", "datasets", "dgml-utils", "elasticsearch", "esprima", "faiss-cpu", "feedparser", "fireworks-ai", "friendli-client", "geopandas", "gitpython", "google-cloud-documentai", "gql", "gradientai", "hdbcli", "hologres-vector", "html2text", "httpx", "httpx-sse", "javelin-sdk", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "msal", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "nvidia-riva-client", "oci", "openai", "openapi-pydantic", "oracle-ads", "oracledb", "pandas", "pdfminer-six", "pgvector", "praw", "premai", "psychicapi", "py-trello", "pyjwt", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "rdflib", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "tidb-vector", "timescale-vector", "tqdm", "tree-sitter", "tree-sitter-languages", "upstash-redis", "vdms", "xata", "xmltodict"]
|
||||
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.8.1,<4.0"
|
||||
content-hash = "380deeac82f6dd4abee0baa9a1040082efe93f354d3bc6661e43771fc17d57a0"
|
||||
content-hash = "ca64e52a60e8ee6f2f4ea303e1779a4508f401e283f63861161cb6a9560e2178"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "langchain-community"
|
||||
version = "0.0.37rc1"
|
||||
version = "0.0.37"
|
||||
description = "Community contributed LangChain integrations."
|
||||
authors = []
|
||||
license = "MIT"
|
||||
@@ -9,8 +9,7 @@ repository = "https://github.com/langchain-ai/langchain"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.8.1,<4.0"
|
||||
langchain-core = "^0.1.48"
|
||||
langchain = "~0.2.0rc1"
|
||||
langchain-core = "^0.1.51"
|
||||
SQLAlchemy = ">=1.4,<3"
|
||||
requests = "^2"
|
||||
PyYAML = ">=5.3"
|
||||
@@ -103,6 +102,7 @@ premai = {version = "^0.3.25", optional = true}
|
||||
vdms = {version = "^0.0.20", optional = true}
|
||||
httpx-sse = {version = "^0.4.0", optional = true}
|
||||
pyjwt = {version = "^2.8.0", optional = true}
|
||||
oracledb = {version = "^2.2.0", optional = true}
|
||||
|
||||
[tool.poetry.group.test]
|
||||
optional = true
|
||||
@@ -126,7 +126,6 @@ pytest-socket = "^0.6.0"
|
||||
syrupy = "^4.0.2"
|
||||
requests-mock = "^1.11.0"
|
||||
langchain-core = {path = "../core", develop = true}
|
||||
langchain = {path = "../langchain", develop = true}
|
||||
|
||||
[tool.poetry.group.codespell]
|
||||
optional = true
|
||||
@@ -161,7 +160,6 @@ cassio = "^0.1.6"
|
||||
tiktoken = ">=0.3.2,<0.6.0"
|
||||
anthropic = "^0.3.11"
|
||||
langchain-core = { path = "../core", develop = true }
|
||||
langchain = {path = "../langchain", develop = true}
|
||||
fireworks-ai = "^0.9.0"
|
||||
vdms = "^0.0.20"
|
||||
exllamav2 = "^0.0.18"
|
||||
@@ -183,7 +181,6 @@ types-redis = "^4.3.21.6"
|
||||
mypy-protobuf = "^3.0.0"
|
||||
langchain-core = {path = "../core", develop = true}
|
||||
langchain-text-splitters = {path = "../text-splitters", develop = true}
|
||||
langchain = {path = "../langchain", develop = true}
|
||||
|
||||
[tool.poetry.group.dev]
|
||||
optional = true
|
||||
@@ -283,7 +280,8 @@ extended_testing = [
|
||||
"premai",
|
||||
"vdms",
|
||||
"httpx-sse",
|
||||
"pyjwt"
|
||||
"pyjwt",
|
||||
"oracledb"
|
||||
]
|
||||
|
||||
[tool.ruff]
|
||||
|
||||
@@ -1,81 +0,0 @@
|
||||
"""Fake Embedding class for testing purposes."""
|
||||
import math
|
||||
from typing import List
|
||||
|
||||
from langchain_core.embeddings import Embeddings
|
||||
|
||||
fake_texts = ["foo", "bar", "baz"]
|
||||
|
||||
|
||||
class FakeEmbeddings(Embeddings):
|
||||
"""Fake embeddings functionality for testing."""
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
"""Return simple embeddings.
|
||||
Embeddings encode each text as its index."""
|
||||
return [[float(1.0)] * 9 + [float(i)] for i in range(len(texts))]
|
||||
|
||||
async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
return self.embed_documents(texts)
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
"""Return constant query embeddings.
|
||||
Embeddings are identical to embed_documents(texts)[0].
|
||||
Distance to each text will be that text's index,
|
||||
as it was passed to embed_documents."""
|
||||
return [float(1.0)] * 9 + [float(0.0)]
|
||||
|
||||
async def aembed_query(self, text: str) -> List[float]:
|
||||
return self.embed_query(text)
|
||||
|
||||
|
||||
class ConsistentFakeEmbeddings(FakeEmbeddings):
|
||||
"""Fake embeddings which remember all the texts seen so far to return consistent
|
||||
vectors for the same texts."""
|
||||
|
||||
def __init__(self, dimensionality: int = 10) -> None:
|
||||
self.known_texts: List[str] = []
|
||||
self.dimensionality = dimensionality
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
"""Return consistent embeddings for each text seen so far."""
|
||||
out_vectors = []
|
||||
for text in texts:
|
||||
if text not in self.known_texts:
|
||||
self.known_texts.append(text)
|
||||
vector = [float(1.0)] * (self.dimensionality - 1) + [
|
||||
float(self.known_texts.index(text))
|
||||
]
|
||||
out_vectors.append(vector)
|
||||
return out_vectors
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
"""Return consistent embeddings for the text, if seen before, or a constant
|
||||
one if the text is unknown."""
|
||||
return self.embed_documents([text])[0]
|
||||
|
||||
|
||||
class AngularTwoDimensionalEmbeddings(Embeddings):
|
||||
"""
|
||||
From angles (as strings in units of pi) to unit embedding vectors on a circle.
|
||||
"""
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
"""
|
||||
Make a list of texts into a list of embedding vectors.
|
||||
"""
|
||||
return [self.embed_query(text) for text in texts]
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
"""
|
||||
Convert input text to a 'vector' (list of floats).
|
||||
If the text is a number, use it as the angle for the
|
||||
unit vector in units of pi.
|
||||
Any other input text becomes the singular result [0, 0] !
|
||||
"""
|
||||
try:
|
||||
angle = float(text)
|
||||
return [math.cos(angle * math.pi), math.sin(angle * math.pi)]
|
||||
except ValueError:
|
||||
# Assume: just test string, no attention is paid to values.
|
||||
return [0.0, 0.0]
|
||||
@@ -0,0 +1,447 @@
|
||||
# Authors:
|
||||
# Sudhir Kumar (sudhirkk)
|
||||
#
|
||||
# -----------------------------------------------------------------------------
|
||||
# test_oracleds.py
|
||||
# -----------------------------------------------------------------------------
|
||||
import sys
|
||||
|
||||
from langchain_community.document_loaders.oracleai import (
|
||||
OracleDocLoader,
|
||||
OracleTextSplitter,
|
||||
)
|
||||
from langchain_community.utilities.oracleai import OracleSummary
|
||||
from langchain_community.vectorstores.oraclevs import (
|
||||
_table_exists,
|
||||
drop_table_purge,
|
||||
)
|
||||
|
||||
uname = "hr"
|
||||
passwd = "hr"
|
||||
# uname = "LANGCHAINUSER"
|
||||
# passwd = "langchainuser"
|
||||
v_dsn = "100.70.107.245:1521/cdb1_pdb1.regress.rdbms.dev.us.oracle.com"
|
||||
|
||||
|
||||
### Test loader #####
|
||||
def test_loader_test() -> None:
|
||||
try:
|
||||
import oracledb
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
try:
|
||||
# oracle connection
|
||||
connection = oracledb.connect(user=uname, password=passwd, dsn=v_dsn)
|
||||
cursor = connection.cursor()
|
||||
|
||||
if _table_exists(connection, "LANGCHAIN_DEMO"):
|
||||
drop_table_purge(connection, "LANGCHAIN_DEMO")
|
||||
|
||||
cursor.execute("CREATE TABLE langchain_demo(id number, text varchar2(25))")
|
||||
|
||||
rows = [
|
||||
(1, "First"),
|
||||
(2, "Second"),
|
||||
(3, "Third"),
|
||||
(4, "Fourth"),
|
||||
(5, "Fifth"),
|
||||
(6, "Sixth"),
|
||||
(7, "Seventh"),
|
||||
]
|
||||
|
||||
cursor.executemany("insert into LANGCHAIN_DEMO(id, text) values (:1, :2)", rows)
|
||||
|
||||
connection.commit()
|
||||
|
||||
# local file, local directory, database column
|
||||
loader_params = {
|
||||
"owner": uname,
|
||||
"tablename": "LANGCHAIN_DEMO",
|
||||
"colname": "TEXT",
|
||||
}
|
||||
|
||||
# instantiate
|
||||
loader = OracleDocLoader(conn=connection, params=loader_params)
|
||||
|
||||
# load
|
||||
docs = loader.load()
|
||||
|
||||
# verify
|
||||
if len(docs) == 0:
|
||||
sys.exit(1)
|
||||
|
||||
if _table_exists(connection, "LANGCHAIN_DEMO"):
|
||||
drop_table_purge(connection, "LANGCHAIN_DEMO")
|
||||
|
||||
except Exception:
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# expectation : ORA-00942
|
||||
loader_params = {
|
||||
"owner": uname,
|
||||
"tablename": "COUNTRIES1",
|
||||
"colname": "COUNTRY_NAME",
|
||||
}
|
||||
|
||||
# instantiate
|
||||
loader = OracleDocLoader(conn=connection, params=loader_params)
|
||||
|
||||
# load
|
||||
docs = loader.load()
|
||||
if len(docs) == 0:
|
||||
pass
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
# expectation : file "SUDHIR" doesn't exist.
|
||||
loader_params = {"file": "SUDHIR"}
|
||||
|
||||
# instantiate
|
||||
loader = OracleDocLoader(conn=connection, params=loader_params)
|
||||
|
||||
# load
|
||||
docs = loader.load()
|
||||
if len(docs) == 0:
|
||||
pass
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
# expectation : path "SUDHIR" doesn't exist.
|
||||
loader_params = {"dir": "SUDHIR"}
|
||||
|
||||
# instantiate
|
||||
loader = OracleDocLoader(conn=connection, params=loader_params)
|
||||
|
||||
# load
|
||||
docs = loader.load()
|
||||
if len(docs) == 0:
|
||||
pass
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
### Test splitter ####
|
||||
def test_splitter_test() -> None:
|
||||
try:
|
||||
import oracledb
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
try:
|
||||
# oracle connection
|
||||
connection = oracledb.connect(user=uname, password=passwd, dsn=v_dsn)
|
||||
doc = """Langchain is a wonderful framework to load, split, chunk
|
||||
and embed your data!!"""
|
||||
|
||||
# by words , max = 1000
|
||||
splitter_params = {
|
||||
"by": "words",
|
||||
"max": "1000",
|
||||
"overlap": "200",
|
||||
"split": "custom",
|
||||
"custom_list": [","],
|
||||
"extended": "true",
|
||||
"normalize": "all",
|
||||
}
|
||||
|
||||
# instantiate
|
||||
splitter = OracleTextSplitter(conn=connection, params=splitter_params)
|
||||
|
||||
# generate chunks
|
||||
chunks = splitter.split_text(doc)
|
||||
|
||||
# verify
|
||||
if len(chunks) == 0:
|
||||
sys.exit(1)
|
||||
|
||||
# by chars , max = 4000
|
||||
splitter_params = {
|
||||
"by": "chars",
|
||||
"max": "4000",
|
||||
"overlap": "800",
|
||||
"split": "NEWLINE",
|
||||
"normalize": "all",
|
||||
}
|
||||
|
||||
# instantiate
|
||||
splitter = OracleTextSplitter(conn=connection, params=splitter_params)
|
||||
|
||||
# generate chunks
|
||||
chunks = splitter.split_text(doc)
|
||||
|
||||
# verify
|
||||
if len(chunks) == 0:
|
||||
sys.exit(1)
|
||||
|
||||
# by words , max = 10
|
||||
splitter_params = {
|
||||
"by": "words",
|
||||
"max": "10",
|
||||
"overlap": "2",
|
||||
"split": "SENTENCE",
|
||||
}
|
||||
|
||||
# instantiate
|
||||
splitter = OracleTextSplitter(conn=connection, params=splitter_params)
|
||||
|
||||
# generate chunks
|
||||
chunks = splitter.split_text(doc)
|
||||
|
||||
# verify
|
||||
if len(chunks) == 0:
|
||||
sys.exit(1)
|
||||
|
||||
# by chars , max = 50
|
||||
splitter_params = {
|
||||
"by": "chars",
|
||||
"max": "50",
|
||||
"overlap": "10",
|
||||
"split": "SPACE",
|
||||
"normalize": "all",
|
||||
}
|
||||
|
||||
# instantiate
|
||||
splitter = OracleTextSplitter(conn=connection, params=splitter_params)
|
||||
|
||||
# generate chunks
|
||||
chunks = splitter.split_text(doc)
|
||||
|
||||
# verify
|
||||
if len(chunks) == 0:
|
||||
sys.exit(1)
|
||||
|
||||
except Exception:
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# ORA-20003: invalid value xyz for BY parameter
|
||||
splitter_params = {"by": "xyz"}
|
||||
|
||||
# instantiate
|
||||
splitter = OracleTextSplitter(conn=connection, params=splitter_params)
|
||||
|
||||
# generate chunks
|
||||
chunks = splitter.split_text(doc)
|
||||
|
||||
# verify
|
||||
if len(chunks) == 0:
|
||||
pass
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
# Expectation: ORA-30584: invalid text chunking MAXIMUM - '10'
|
||||
splitter_params = {
|
||||
"by": "chars",
|
||||
"max": "10",
|
||||
"overlap": "2",
|
||||
"split": "SPACE",
|
||||
"normalize": "all",
|
||||
}
|
||||
|
||||
# instantiate
|
||||
splitter = OracleTextSplitter(conn=connection, params=splitter_params)
|
||||
|
||||
# generate chunks
|
||||
chunks = splitter.split_text(doc)
|
||||
|
||||
# verify
|
||||
if len(chunks) == 0:
|
||||
pass
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
# Expectation: ORA-30584: invalid text chunking MAXIMUM - '5'
|
||||
splitter_params = {
|
||||
"by": "words",
|
||||
"max": "5",
|
||||
"overlap": "2",
|
||||
"split": "SPACE",
|
||||
"normalize": "all",
|
||||
}
|
||||
|
||||
# instantiate
|
||||
splitter = OracleTextSplitter(conn=connection, params=splitter_params)
|
||||
|
||||
# generate chunks
|
||||
chunks = splitter.split_text(doc)
|
||||
|
||||
# verify
|
||||
if len(chunks) == 0:
|
||||
pass
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
# Expectation: ORA-30586: invalid text chunking SPLIT BY - SENTENCE
|
||||
splitter_params = {
|
||||
"by": "words",
|
||||
"max": "50",
|
||||
"overlap": "2",
|
||||
"split": "SENTENCE",
|
||||
"normalize": "all",
|
||||
}
|
||||
|
||||
# instantiate
|
||||
splitter = OracleTextSplitter(conn=connection, params=splitter_params)
|
||||
|
||||
# generate chunks
|
||||
chunks = splitter.split_text(doc)
|
||||
|
||||
# verify
|
||||
if len(chunks) == 0:
|
||||
pass
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
#### Test summary ####
|
||||
def test_summary_test() -> None:
|
||||
try:
|
||||
import oracledb
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
try:
|
||||
# oracle connection
|
||||
connection = oracledb.connect(user=uname, password=passwd, dsn=v_dsn)
|
||||
|
||||
# provider : Database, glevel : Paragraph
|
||||
summary_params = {
|
||||
"provider": "database",
|
||||
"glevel": "paragraph",
|
||||
"numParagraphs": 2,
|
||||
"language": "english",
|
||||
}
|
||||
|
||||
# summary
|
||||
summary = OracleSummary(conn=connection, params=summary_params)
|
||||
|
||||
doc = """It was 7 minutes after midnight. The dog was lying on the grass in
|
||||
of the lawn in front of Mrs Shears house. Its eyes were closed. It
|
||||
was running on its side, the way dogs run when they think they are
|
||||
cat in a dream. But the dog was not running or asleep. The dog was dead.
|
||||
was a garden fork sticking out of the dog. The points of the fork must
|
||||
gone all the way through the dog and into the ground because the fork
|
||||
not fallen over. I decided that the dog was probably killed with the
|
||||
because I could not see any other wounds in the dog and I do not think
|
||||
would stick a garden fork into a dog after it had died for some other
|
||||
like cancer for example, or a road accident. But I could not be certain"""
|
||||
|
||||
summaries = summary.get_summary(doc)
|
||||
|
||||
# verify
|
||||
if len(summaries) == 0:
|
||||
sys.exit(1)
|
||||
|
||||
# provider : Database, glevel : Sentence
|
||||
summary_params = {"provider": "database", "glevel": "Sentence"}
|
||||
|
||||
# summary
|
||||
summary = OracleSummary(conn=connection, params=summary_params)
|
||||
summaries = summary.get_summary(doc)
|
||||
|
||||
# verify
|
||||
if len(summaries) == 0:
|
||||
sys.exit(1)
|
||||
|
||||
# provider : Database, glevel : P
|
||||
summary_params = {"provider": "database", "glevel": "P"}
|
||||
|
||||
# summary
|
||||
summary = OracleSummary(conn=connection, params=summary_params)
|
||||
summaries = summary.get_summary(doc)
|
||||
|
||||
# verify
|
||||
if len(summaries) == 0:
|
||||
sys.exit(1)
|
||||
|
||||
# provider : Database, glevel : S
|
||||
summary_params = {
|
||||
"provider": "database",
|
||||
"glevel": "S",
|
||||
"numParagraphs": 16,
|
||||
"language": "english",
|
||||
}
|
||||
|
||||
# summary
|
||||
summary = OracleSummary(conn=connection, params=summary_params)
|
||||
summaries = summary.get_summary(doc)
|
||||
|
||||
# verify
|
||||
if len(summaries) == 0:
|
||||
sys.exit(1)
|
||||
|
||||
# provider : Database, glevel : S, doc = ' '
|
||||
summary_params = {"provider": "database", "glevel": "S", "numParagraphs": 2}
|
||||
|
||||
# summary
|
||||
summary = OracleSummary(conn=connection, params=summary_params)
|
||||
|
||||
doc = " "
|
||||
summaries = summary.get_summary(doc)
|
||||
|
||||
# verify
|
||||
if len(summaries) == 0:
|
||||
sys.exit(1)
|
||||
|
||||
except Exception:
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Expectation : DRG-11002: missing value for PROVIDER
|
||||
summary_params = {"provider": "database1", "glevel": "S"}
|
||||
|
||||
# summary
|
||||
summary = OracleSummary(conn=connection, params=summary_params)
|
||||
summaries = summary.get_summary(doc)
|
||||
|
||||
# verify
|
||||
if len(summaries) == 0:
|
||||
pass
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
# Expectation : DRG-11425: gist level SUDHIR is invalid,
|
||||
# DRG-11427: valid gist level values are S, P
|
||||
summary_params = {"provider": "database", "glevel": "SUDHIR"}
|
||||
|
||||
# summary
|
||||
summary = OracleSummary(conn=connection, params=summary_params)
|
||||
summaries = summary.get_summary(doc)
|
||||
|
||||
# verify
|
||||
if len(summaries) == 0:
|
||||
pass
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
# Expectation : DRG-11441: gist numParagraphs -2 is invalid
|
||||
summary_params = {"provider": "database", "glevel": "S", "numParagraphs": -2}
|
||||
|
||||
# summary
|
||||
summary = OracleSummary(conn=connection, params=summary_params)
|
||||
summaries = summary.get_summary(doc)
|
||||
|
||||
# verify
|
||||
if len(summaries) == 0:
|
||||
pass
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
@@ -1,73 +0,0 @@
|
||||
"""Integration test for embedding-based redundant doc filtering."""
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.document_transformers.embeddings_redundant_filter import (
|
||||
EmbeddingsClusteringFilter,
|
||||
EmbeddingsRedundantFilter,
|
||||
_DocumentWithState,
|
||||
)
|
||||
from langchain_community.embeddings import OpenAIEmbeddings
|
||||
|
||||
|
||||
def test_embeddings_redundant_filter() -> None:
|
||||
texts = [
|
||||
"What happened to all of my cookies?",
|
||||
"Where did all of my cookies go?",
|
||||
"I wish there were better Italian restaurants in my neighborhood.",
|
||||
]
|
||||
docs = [Document(page_content=t) for t in texts]
|
||||
embeddings = OpenAIEmbeddings()
|
||||
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
|
||||
actual = redundant_filter.transform_documents(docs)
|
||||
assert len(actual) == 2
|
||||
assert set(texts[:2]).intersection([d.page_content for d in actual])
|
||||
|
||||
|
||||
def test_embeddings_redundant_filter_with_state() -> None:
|
||||
texts = ["What happened to all of my cookies?", "foo bar baz"]
|
||||
state = {"embedded_doc": [0.5] * 10}
|
||||
docs = [_DocumentWithState(page_content=t, state=state) for t in texts]
|
||||
embeddings = OpenAIEmbeddings()
|
||||
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
|
||||
actual = redundant_filter.transform_documents(docs)
|
||||
assert len(actual) == 1
|
||||
|
||||
|
||||
def test_embeddings_clustering_filter() -> None:
|
||||
texts = [
|
||||
"What happened to all of my cookies?",
|
||||
"A cookie is a small, baked sweet treat and you can find it in the cookie",
|
||||
"monsters' jar.",
|
||||
"Cookies are good.",
|
||||
"I have nightmares about the cookie monster.",
|
||||
"The most popular pizza styles are: Neapolitan, New York-style and",
|
||||
"Chicago-style. You can find them on iconic restaurants in major cities.",
|
||||
"Neapolitan pizza: This is the original pizza style,hailing from Naples,",
|
||||
"Italy.",
|
||||
"I wish there were better Italian Pizza restaurants in my neighborhood.",
|
||||
"New York-style pizza: This is characterized by its large, thin crust, and",
|
||||
"generous toppings.",
|
||||
"The first movie to feature a robot was 'A Trip to the Moon' (1902).",
|
||||
"The first movie to feature a robot that could pass for a human was",
|
||||
"'Blade Runner' (1982)",
|
||||
"The first movie to feature a robot that could fall in love with a human",
|
||||
"was 'Her' (2013)",
|
||||
"A robot is a machine capable of carrying out complex actions automatically.",
|
||||
"There are certainly hundreds, if not thousands movies about robots like:",
|
||||
"'Blade Runner', 'Her' and 'A Trip to the Moon'",
|
||||
]
|
||||
|
||||
docs = [Document(page_content=t) for t in texts]
|
||||
embeddings = OpenAIEmbeddings()
|
||||
redundant_filter = EmbeddingsClusteringFilter(
|
||||
embeddings=embeddings,
|
||||
num_clusters=3,
|
||||
num_closest=1,
|
||||
sorted=True,
|
||||
)
|
||||
actual = redundant_filter.transform_documents(docs)
|
||||
assert len(actual) == 3
|
||||
assert texts[1] in [d.page_content for d in actual]
|
||||
assert texts[4] in [d.page_content for d in actual]
|
||||
assert texts[11] in [d.page_content for d in actual]
|
||||
@@ -0,0 +1,955 @@
|
||||
"""Test Oracle AI Vector Search functionality."""
|
||||
|
||||
# import required modules
|
||||
import sys
|
||||
import threading
|
||||
|
||||
from langchain_community.embeddings import HuggingFaceEmbeddings
|
||||
from langchain_community.vectorstores.oraclevs import (
|
||||
OracleVS,
|
||||
_create_table,
|
||||
_index_exists,
|
||||
_table_exists,
|
||||
create_index,
|
||||
drop_index_if_exists,
|
||||
drop_table_purge,
|
||||
)
|
||||
from langchain_community.vectorstores.utils import DistanceStrategy
|
||||
|
||||
username = ""
|
||||
password = ""
|
||||
dsn = ""
|
||||
|
||||
|
||||
############################
|
||||
####### table_exists #######
|
||||
############################
|
||||
def test_table_exists_test() -> None:
|
||||
try:
|
||||
import oracledb
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
try:
|
||||
connection = oracledb.connect(user=username, password=password, dsn=dsn)
|
||||
except Exception:
|
||||
sys.exit(1)
|
||||
# 1. Existing Table:(all capital letters)
|
||||
# expectation:True
|
||||
_table_exists(connection, "V$TRANSACTION")
|
||||
|
||||
# 2. Existing Table:(all small letters)
|
||||
# expectation:True
|
||||
_table_exists(connection, "v$transaction")
|
||||
|
||||
# 3. Non-Existing Table
|
||||
# expectation:false
|
||||
_table_exists(connection, "Hello")
|
||||
|
||||
# 4. Invalid Table Name
|
||||
# Expectation:ORA-00903: invalid table name
|
||||
try:
|
||||
_table_exists(connection, "123")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 5. Empty String
|
||||
# Expectation:ORA-00903: invalid table name
|
||||
try:
|
||||
_table_exists(connection, "")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 6. Special Character
|
||||
# Expectation:ORA-00911: #: invalid character after FROM
|
||||
try:
|
||||
_table_exists(connection, "##4")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 7. Table name length > 128
|
||||
# Expectation:ORA-00972: The identifier XXXXXXXXXX...XXXXXXXXXX...
|
||||
# exceeds the maximum length of 128 bytes.
|
||||
try:
|
||||
_table_exists(connection, "x" * 129)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 8. <Schema_Name.Table_Name>
|
||||
# Expectation:True
|
||||
_create_table(connection, "TB1", 65535)
|
||||
|
||||
# 9. Toggle Case (like TaBlE)
|
||||
# Expectation:True
|
||||
_table_exists(connection, "Tb1")
|
||||
drop_table_purge(connection, "TB1")
|
||||
|
||||
# 10. Table_Name→ "हिन्दी"
|
||||
# Expectation:True
|
||||
_create_table(connection, '"हिन्दी"', 545)
|
||||
_table_exists(connection, '"हिन्दी"')
|
||||
drop_table_purge(connection, '"हिन्दी"')
|
||||
|
||||
|
||||
############################
|
||||
####### create_table #######
|
||||
############################
|
||||
|
||||
|
||||
def test_create_table_test() -> None:
|
||||
try:
|
||||
import oracledb
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
try:
|
||||
connection = oracledb.connect(user=username, password=password, dsn=dsn)
|
||||
except Exception:
|
||||
sys.exit(1)
|
||||
|
||||
# 1. New table - HELLO
|
||||
# Dimension - 100
|
||||
# Expectation:table is created
|
||||
_create_table(connection, "HELLO", 100)
|
||||
|
||||
# 2. Existing table name
|
||||
# HELLO
|
||||
# Dimension - 110
|
||||
# Expectation:Nothing happens
|
||||
_create_table(connection, "HELLO", 110)
|
||||
drop_table_purge(connection, "HELLO")
|
||||
|
||||
# 3. New Table - 123
|
||||
# Dimension - 100
|
||||
# Expectation:ORA-00903: invalid table name
|
||||
try:
|
||||
_create_table(connection, "123", 100)
|
||||
drop_table_purge(connection, "123")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 4. New Table - Hello123
|
||||
# Dimension - 65535
|
||||
# Expectation:table is created
|
||||
_create_table(connection, "Hello123", 65535)
|
||||
drop_table_purge(connection, "Hello123")
|
||||
|
||||
# 5. New Table - T1
|
||||
# Dimension - 65536
|
||||
# Expectation:ORA-51801: VECTOR column type specification
|
||||
# has an unsupported dimension count ('65536').
|
||||
try:
|
||||
_create_table(connection, "T1", 65536)
|
||||
drop_table_purge(connection, "T1")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 6. New Table - T1
|
||||
# Dimension - 0
|
||||
# Expectation:ORA-51801: VECTOR column type specification has
|
||||
# an unsupported dimension count (0).
|
||||
try:
|
||||
_create_table(connection, "T1", 0)
|
||||
drop_table_purge(connection, "T1")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 7. New Table - T1
|
||||
# Dimension - -1
|
||||
# Expectation:ORA-51801: VECTOR column type specification has
|
||||
# an unsupported dimension count ('-').
|
||||
try:
|
||||
_create_table(connection, "T1", -1)
|
||||
drop_table_purge(connection, "T1")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 8. New Table - T2
|
||||
# Dimension - '1000'
|
||||
# Expectation:table is created
|
||||
_create_table(connection, "T2", int("1000"))
|
||||
drop_table_purge(connection, "T2")
|
||||
|
||||
# 9. New Table - T3
|
||||
# Dimension - 100 passed as a variable
|
||||
# Expectation:table is created
|
||||
val = 100
|
||||
_create_table(connection, "T3", val)
|
||||
drop_table_purge(connection, "T3")
|
||||
|
||||
# 10.
|
||||
# Expectation:ORA-00922: missing or invalid option
|
||||
val2 = """H
|
||||
ello"""
|
||||
try:
|
||||
_create_table(connection, val2, 545)
|
||||
drop_table_purge(connection, val2)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 11. New Table - हिन्दी
|
||||
# Dimension - 545
|
||||
# Expectation:table is created
|
||||
_create_table(connection, '"हिन्दी"', 545)
|
||||
drop_table_purge(connection, '"हिन्दी"')
|
||||
|
||||
# 12. <schema_name.table_name>
|
||||
# Expectation:failure - user does not exist
|
||||
try:
|
||||
_create_table(connection, "U1.TB4", 128)
|
||||
drop_table_purge(connection, "U1.TB4")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 13.
|
||||
# Expectation:table is created
|
||||
_create_table(connection, '"T5"', 128)
|
||||
drop_table_purge(connection, '"T5"')
|
||||
|
||||
# 14. Toggle Case
|
||||
# Expectation:table creation fails
|
||||
try:
|
||||
_create_table(connection, "TaBlE", 128)
|
||||
drop_table_purge(connection, "TaBlE")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 15. table_name as empty_string
|
||||
# Expectation: ORA-00903: invalid table name
|
||||
try:
|
||||
_create_table(connection, "", 128)
|
||||
drop_table_purge(connection, "")
|
||||
_create_table(connection, '""', 128)
|
||||
drop_table_purge(connection, '""')
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 16. Arithmetic Operations in dimension parameter
|
||||
# Expectation:table is created
|
||||
n = 1
|
||||
_create_table(connection, "T10", n + 500)
|
||||
drop_table_purge(connection, "T10")
|
||||
|
||||
# 17. String Operations in table_name&dimension parameter
|
||||
# Expectation:table is created
|
||||
_create_table(connection, "YaSh".replace("aS", "ok"), 500)
|
||||
drop_table_purge(connection, "YaSh".replace("aS", "ok"))
|
||||
|
||||
|
||||
##################################
|
||||
####### create_hnsw_index #######
|
||||
##################################
|
||||
|
||||
|
||||
def test_create_hnsw_index_test() -> None:
|
||||
try:
|
||||
import oracledb
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
try:
|
||||
connection = oracledb.connect(user=username, password=password, dsn=dsn)
|
||||
except Exception:
|
||||
sys.exit(1)
|
||||
# 1. Table_name - TB1
|
||||
# New Index
|
||||
# distance_strategy - DistanceStrategy.Dot_product
|
||||
# Expectation:Index created
|
||||
model1 = HuggingFaceEmbeddings(
|
||||
model_name="sentence-transformers/paraphrase-mpnet-base-v2"
|
||||
)
|
||||
vs = OracleVS(connection, model1, "TB1", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
create_index(connection, vs)
|
||||
|
||||
# 2. Creating same index again
|
||||
# Table_name - TB1
|
||||
# Expectation:Nothing happens
|
||||
try:
|
||||
create_index(connection, vs)
|
||||
drop_index_if_exists(connection, "HNSW")
|
||||
except Exception:
|
||||
pass
|
||||
drop_table_purge(connection, "TB1")
|
||||
|
||||
# 3. Create index with following parameters:
|
||||
# idx_name - hnsw_idx2
|
||||
# idx_type - HNSW
|
||||
# Expectation:Index created
|
||||
vs = OracleVS(connection, model1, "TB2", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
create_index(connection, vs, params={"idx_name": "hnsw_idx2", "idx_type": "HNSW"})
|
||||
drop_index_if_exists(connection, "hnsw_idx2")
|
||||
drop_table_purge(connection, "TB2")
|
||||
|
||||
# 4. Table Name - TB1
|
||||
# idx_name - "हिन्दी"
|
||||
# idx_type - HNSW
|
||||
# Expectation:Index created
|
||||
try:
|
||||
vs = OracleVS(connection, model1, "TB3", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
create_index(connection, vs, params={"idx_name": '"हिन्दी"', "idx_type": "HNSW"})
|
||||
drop_index_if_exists(connection, '"हिन्दी"')
|
||||
except Exception:
|
||||
pass
|
||||
drop_table_purge(connection, "TB3")
|
||||
|
||||
# 5. idx_name passed empty
|
||||
# Expectation:ORA-01741: illegal zero-length identifier
|
||||
try:
|
||||
vs = OracleVS(connection, model1, "TB4", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
create_index(connection, vs, params={"idx_name": '""', "idx_type": "HNSW"})
|
||||
drop_index_if_exists(connection, '""')
|
||||
except Exception:
|
||||
pass
|
||||
drop_table_purge(connection, "TB4")
|
||||
|
||||
# 6. idx_type left empty
|
||||
# Expectation:Index created
|
||||
try:
|
||||
vs = OracleVS(connection, model1, "TB5", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
create_index(connection, vs, params={"idx_name": "Hello", "idx_type": ""})
|
||||
drop_index_if_exists(connection, "Hello")
|
||||
except Exception:
|
||||
pass
|
||||
drop_table_purge(connection, "TB5")
|
||||
|
||||
# 7. efconstruction passed as parameter but not neighbours
|
||||
# Expectation:Index created
|
||||
vs = OracleVS(connection, model1, "TB7", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
create_index(
|
||||
connection,
|
||||
vs,
|
||||
params={"idx_name": "idx11", "efConstruction": 100, "idx_type": "HNSW"},
|
||||
)
|
||||
drop_index_if_exists(connection, "idx11")
|
||||
drop_table_purge(connection, "TB7")
|
||||
|
||||
# 8. efconstruction passed as parameter as well as neighbours
|
||||
# (for this idx_type parameter is also necessary)
|
||||
# Expectation:Index created
|
||||
vs = OracleVS(connection, model1, "TB8", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
create_index(
|
||||
connection,
|
||||
vs,
|
||||
params={
|
||||
"idx_name": "idx11",
|
||||
"efConstruction": 100,
|
||||
"neighbors": 80,
|
||||
"idx_type": "HNSW",
|
||||
},
|
||||
)
|
||||
drop_index_if_exists(connection, "idx11")
|
||||
drop_table_purge(connection, "TB8")
|
||||
|
||||
# 9. Limit of Values for(integer values):
|
||||
# parallel
|
||||
# efConstruction
|
||||
# Neighbors
|
||||
# Accuracy
|
||||
# 0<Accuracy<=100
|
||||
# 0<Neighbour<=2048
|
||||
# 0<efConstruction<=65535
|
||||
# 0<parallel<=255
|
||||
# Expectation:Index created
|
||||
drop_table_purge(connection, "TB9")
|
||||
vs = OracleVS(connection, model1, "TB9", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
create_index(
|
||||
connection,
|
||||
vs,
|
||||
params={
|
||||
"idx_name": "idx11",
|
||||
"efConstruction": 65535,
|
||||
"neighbors": 2048,
|
||||
"idx_type": "HNSW",
|
||||
"parallel": 255,
|
||||
},
|
||||
)
|
||||
drop_index_if_exists(connection, "idx11")
|
||||
drop_table_purge(connection, "TB9")
|
||||
# index not created:
|
||||
try:
|
||||
vs = OracleVS(connection, model1, "TB10", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
create_index(
|
||||
connection,
|
||||
vs,
|
||||
params={
|
||||
"idx_name": "idx11",
|
||||
"efConstruction": 0,
|
||||
"neighbors": 2048,
|
||||
"idx_type": "HNSW",
|
||||
"parallel": 255,
|
||||
},
|
||||
)
|
||||
drop_index_if_exists(connection, "idx11")
|
||||
except Exception:
|
||||
pass
|
||||
# index not created:
|
||||
try:
|
||||
vs = OracleVS(connection, model1, "TB11", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
create_index(
|
||||
connection,
|
||||
vs,
|
||||
params={
|
||||
"idx_name": "idx11",
|
||||
"efConstruction": 100,
|
||||
"neighbors": 0,
|
||||
"idx_type": "HNSW",
|
||||
"parallel": 255,
|
||||
},
|
||||
)
|
||||
drop_index_if_exists(connection, "idx11")
|
||||
except Exception:
|
||||
pass
|
||||
# index not created
|
||||
try:
|
||||
vs = OracleVS(connection, model1, "TB12", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
create_index(
|
||||
connection,
|
||||
vs,
|
||||
params={
|
||||
"idx_name": "idx11",
|
||||
"efConstruction": 100,
|
||||
"neighbors": 100,
|
||||
"idx_type": "HNSW",
|
||||
"parallel": 0,
|
||||
},
|
||||
)
|
||||
drop_index_if_exists(connection, "idx11")
|
||||
except Exception:
|
||||
pass
|
||||
# index not created
|
||||
try:
|
||||
vs = OracleVS(connection, model1, "TB13", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
create_index(
|
||||
connection,
|
||||
vs,
|
||||
params={
|
||||
"idx_name": "idx11",
|
||||
"efConstruction": 10,
|
||||
"neighbors": 100,
|
||||
"idx_type": "HNSW",
|
||||
"parallel": 10,
|
||||
"accuracy": 120,
|
||||
},
|
||||
)
|
||||
drop_index_if_exists(connection, "idx11")
|
||||
except Exception:
|
||||
pass
|
||||
# with negative values/out-of-bound values for all 4 of them, we get the same errors
|
||||
# Expectation:Index not created
|
||||
try:
|
||||
vs = OracleVS(connection, model1, "TB14", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
create_index(
|
||||
connection,
|
||||
vs,
|
||||
params={
|
||||
"idx_name": "idx11",
|
||||
"efConstruction": 200,
|
||||
"neighbors": 100,
|
||||
"idx_type": "HNSW",
|
||||
"parallel": "hello",
|
||||
"accuracy": 10,
|
||||
},
|
||||
)
|
||||
drop_index_if_exists(connection, "idx11")
|
||||
except Exception:
|
||||
pass
|
||||
drop_table_purge(connection, "TB10")
|
||||
drop_table_purge(connection, "TB11")
|
||||
drop_table_purge(connection, "TB12")
|
||||
drop_table_purge(connection, "TB13")
|
||||
drop_table_purge(connection, "TB14")
|
||||
|
||||
# 10. Table_name as <schema_name.table_name>
|
||||
# Expectation:Index created
|
||||
vs = OracleVS(connection, model1, "TB15", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
create_index(
|
||||
connection,
|
||||
vs,
|
||||
params={
|
||||
"idx_name": "idx11",
|
||||
"efConstruction": 200,
|
||||
"neighbors": 100,
|
||||
"idx_type": "HNSW",
|
||||
"parallel": 8,
|
||||
"accuracy": 10,
|
||||
},
|
||||
)
|
||||
drop_index_if_exists(connection, "idx11")
|
||||
drop_table_purge(connection, "TB15")
|
||||
|
||||
# 11. index_name as <schema_name.index_name>
|
||||
# Expectation:U1 not present
|
||||
try:
|
||||
vs = OracleVS(
|
||||
connection, model1, "U1.TB16", DistanceStrategy.EUCLIDEAN_DISTANCE
|
||||
)
|
||||
create_index(
|
||||
connection,
|
||||
vs,
|
||||
params={
|
||||
"idx_name": "U1.idx11",
|
||||
"efConstruction": 200,
|
||||
"neighbors": 100,
|
||||
"idx_type": "HNSW",
|
||||
"parallel": 8,
|
||||
"accuracy": 10,
|
||||
},
|
||||
)
|
||||
drop_index_if_exists(connection, "U1.idx11")
|
||||
drop_table_purge(connection, "TB16")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 12. Index_name size >129
|
||||
# Expectation:Index not created
|
||||
try:
|
||||
vs = OracleVS(connection, model1, "TB17", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
create_index(connection, vs, params={"idx_name": "x" * 129, "idx_type": "HNSW"})
|
||||
drop_index_if_exists(connection, "x" * 129)
|
||||
except Exception:
|
||||
pass
|
||||
drop_table_purge(connection, "TB17")
|
||||
|
||||
# 13. Index_name size 128
|
||||
# Expectation:Index created
|
||||
vs = OracleVS(connection, model1, "TB18", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
create_index(connection, vs, params={"idx_name": "x" * 128, "idx_type": "HNSW"})
|
||||
drop_index_if_exists(connection, "x" * 128)
|
||||
drop_table_purge(connection, "TB18")
|
||||
|
||||
|
||||
##################################
|
||||
####### index_exists #############
|
||||
##################################
|
||||
|
||||
|
||||
def test_index_exists_test() -> None:
|
||||
try:
|
||||
import oracledb
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
try:
|
||||
connection = oracledb.connect(user=username, password=password, dsn=dsn)
|
||||
except Exception:
|
||||
sys.exit(1)
|
||||
model1 = HuggingFaceEmbeddings(
|
||||
model_name="sentence-transformers/paraphrase-mpnet-base-v2"
|
||||
)
|
||||
# 1. Existing Index:(all capital letters)
|
||||
# Expectation:true
|
||||
vs = OracleVS(connection, model1, "TB1", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
create_index(connection, vs, params={"idx_name": "idx11", "idx_type": "HNSW"})
|
||||
_index_exists(connection, "IDX11")
|
||||
|
||||
# 2. Existing Table:(all small letters)
|
||||
# Expectation:true
|
||||
_index_exists(connection, "idx11")
|
||||
|
||||
# 3. Non-Existing Index
|
||||
# Expectation:False
|
||||
_index_exists(connection, "Hello")
|
||||
|
||||
# 4. Invalid Index Name
|
||||
# Expectation:Error
|
||||
try:
|
||||
_index_exists(connection, "123")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 5. Empty String
|
||||
# Expectation:Error
|
||||
try:
|
||||
_index_exists(connection, "")
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
_index_exists(connection, "")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 6. Special Character
|
||||
# Expectation:Error
|
||||
try:
|
||||
_index_exists(connection, "##4")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 7. Index name length > 128
|
||||
# Expectation:Error
|
||||
try:
|
||||
_index_exists(connection, "x" * 129)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 8. <Schema_Name.Index_Name>
|
||||
# Expectation:true
|
||||
_index_exists(connection, "U1.IDX11")
|
||||
|
||||
# 9. Toggle Case (like iDx11)
|
||||
# Expectation:true
|
||||
_index_exists(connection, "IdX11")
|
||||
|
||||
# 10. Index_Name→ "हिन्दी"
|
||||
# Expectation:true
|
||||
drop_index_if_exists(connection, "idx11")
|
||||
try:
|
||||
create_index(connection, vs, params={"idx_name": '"हिन्दी"', "idx_type": "HNSW"})
|
||||
_index_exists(connection, '"हिन्दी"')
|
||||
except Exception:
|
||||
pass
|
||||
drop_table_purge(connection, "TB1")
|
||||
|
||||
|
||||
##################################
|
||||
####### add_texts ################
|
||||
##################################
|
||||
|
||||
|
||||
def test_add_texts_test() -> None:
|
||||
try:
|
||||
import oracledb
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
try:
|
||||
connection = oracledb.connect(user=username, password=password, dsn=dsn)
|
||||
except Exception:
|
||||
sys.exit(1)
|
||||
# 1. Add 2 records to table
|
||||
# Expectation:Successful
|
||||
texts = ["Rohan", "Shailendra"]
|
||||
metadata = [
|
||||
{"id": "100", "link": "Document Example Test 1"},
|
||||
{"id": "101", "link": "Document Example Test 2"},
|
||||
]
|
||||
model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
||||
vs_obj = OracleVS(connection, model, "TB1", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
vs_obj.add_texts(texts, metadata)
|
||||
drop_table_purge(connection, "TB1")
|
||||
|
||||
# 2. Add record but metadata is not there
|
||||
# Expectation:An exception occurred :: Either specify an 'ids' list or
|
||||
# 'metadatas' with an 'id' attribute for each element.
|
||||
model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
||||
vs_obj = OracleVS(connection, model, "TB2", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
texts2 = ["Sri Ram", "Krishna"]
|
||||
vs_obj.add_texts(texts2)
|
||||
drop_table_purge(connection, "TB2")
|
||||
|
||||
# 3. Add record with ids option
|
||||
# ids are passed as string
|
||||
# ids are passed as empty string
|
||||
# ids are passed as multi-line string
|
||||
# ids are passed as "<string>"
|
||||
# Expectations:
|
||||
# Successful
|
||||
# Successful
|
||||
# Successful
|
||||
# Successful
|
||||
|
||||
vs_obj = OracleVS(connection, model, "TB4", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
ids3 = ["114", "124"]
|
||||
vs_obj.add_texts(texts2, ids=ids3)
|
||||
drop_table_purge(connection, "TB4")
|
||||
|
||||
vs_obj = OracleVS(connection, model, "TB5", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
ids4 = ["", "134"]
|
||||
vs_obj.add_texts(texts2, ids=ids4)
|
||||
drop_table_purge(connection, "TB5")
|
||||
|
||||
vs_obj = OracleVS(connection, model, "TB6", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
ids5 = [
|
||||
"""Good afternoon
|
||||
my friends""",
|
||||
"India",
|
||||
]
|
||||
vs_obj.add_texts(texts2, ids=ids5)
|
||||
drop_table_purge(connection, "TB6")
|
||||
|
||||
vs_obj = OracleVS(connection, model, "TB7", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
ids6 = ['"Good afternoon"', '"India"']
|
||||
vs_obj.add_texts(texts2, ids=ids6)
|
||||
drop_table_purge(connection, "TB7")
|
||||
|
||||
# 4. Add records with ids and metadatas
|
||||
# Expectation:Successful
|
||||
vs_obj = OracleVS(connection, model, "TB8", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
texts3 = ["Sri Ram 6", "Krishna 6"]
|
||||
ids7 = ["1", "2"]
|
||||
metadata = [
|
||||
{"id": "102", "link": "Document Example", "stream": "Science"},
|
||||
{"id": "104", "link": "Document Example 45"},
|
||||
]
|
||||
vs_obj.add_texts(texts3, metadata, ids=ids7)
|
||||
drop_table_purge(connection, "TB8")
|
||||
|
||||
# 5. Add 10000 records
|
||||
# Expectation:Successful
|
||||
vs_obj = OracleVS(connection, model, "TB9", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
texts4 = ["Sri Ram{0}".format(i) for i in range(1, 10000)]
|
||||
ids8 = ["Hello{0}".format(i) for i in range(1, 10000)]
|
||||
vs_obj.add_texts(texts4, ids=ids8)
|
||||
drop_table_purge(connection, "TB9")
|
||||
|
||||
# 6. Add 2 different record concurrently
|
||||
# Expectation:Successful
|
||||
def add(val: str) -> None:
|
||||
model = HuggingFaceEmbeddings(
|
||||
model_name="sentence-transformers/all-mpnet-base-v2"
|
||||
)
|
||||
vs_obj = OracleVS(
|
||||
connection, model, "TB10", DistanceStrategy.EUCLIDEAN_DISTANCE
|
||||
)
|
||||
texts5 = [val]
|
||||
ids9 = texts5
|
||||
vs_obj.add_texts(texts5, ids=ids9)
|
||||
|
||||
thread_1 = threading.Thread(target=add, args=("Sri Ram"))
|
||||
thread_2 = threading.Thread(target=add, args=("Sri Krishna"))
|
||||
thread_1.start()
|
||||
thread_2.start()
|
||||
thread_1.join()
|
||||
thread_2.join()
|
||||
drop_table_purge(connection, "TB10")
|
||||
|
||||
# 7. Add 2 same record concurrently
|
||||
# Expectation:Successful, For one of the insert,get primary key violation error
|
||||
def add1(val: str) -> None:
|
||||
model = HuggingFaceEmbeddings(
|
||||
model_name="sentence-transformers/all-mpnet-base-v2"
|
||||
)
|
||||
vs_obj = OracleVS(
|
||||
connection, model, "TB11", DistanceStrategy.EUCLIDEAN_DISTANCE
|
||||
)
|
||||
texts = [val]
|
||||
ids10 = texts
|
||||
vs_obj.add_texts(texts, ids=ids10)
|
||||
|
||||
try:
|
||||
thread_1 = threading.Thread(target=add1, args=("Sri Ram"))
|
||||
thread_2 = threading.Thread(target=add1, args=("Sri Ram"))
|
||||
thread_1.start()
|
||||
thread_2.start()
|
||||
thread_1.join()
|
||||
thread_2.join()
|
||||
except Exception:
|
||||
pass
|
||||
drop_table_purge(connection, "TB11")
|
||||
|
||||
# 8. create object with table name of type <schema_name.table_name>
|
||||
# Expectation:U1 does not exist
|
||||
try:
|
||||
vs_obj = OracleVS(connection, model, "U1.TB14", DistanceStrategy.DOT_PRODUCT)
|
||||
for i in range(1, 10):
|
||||
texts7 = ["Yash{0}".format(i)]
|
||||
ids13 = ["1234{0}".format(i)]
|
||||
vs_obj.add_texts(texts7, ids=ids13)
|
||||
drop_table_purge(connection, "TB14")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
##################################
|
||||
####### embed_documents(text) ####
|
||||
##################################
|
||||
def test_embed_documents_test() -> None:
|
||||
try:
|
||||
import oracledb
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
try:
|
||||
connection = oracledb.connect(user=username, password=password, dsn=dsn)
|
||||
except Exception:
|
||||
sys.exit(1)
|
||||
# 1. String Example-'Sri Ram'
|
||||
# Expectation:Vector Printed
|
||||
model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
||||
vs_obj = OracleVS(connection, model, "TB7", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
|
||||
# 4. List
|
||||
# Expectation:Vector Printed
|
||||
vs_obj._embed_documents(["hello", "yash"])
|
||||
drop_table_purge(connection, "TB7")
|
||||
|
||||
|
||||
##################################
|
||||
####### embed_query(text) ########
|
||||
##################################
|
||||
def test_embed_query_test() -> None:
|
||||
try:
|
||||
import oracledb
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
try:
|
||||
connection = oracledb.connect(user=username, password=password, dsn=dsn)
|
||||
except Exception:
|
||||
sys.exit(1)
|
||||
# 1. String
|
||||
# Expectation:Vector printed
|
||||
model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
||||
vs_obj = OracleVS(connection, model, "TB8", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
vs_obj._embed_query("Sri Ram")
|
||||
drop_table_purge(connection, "TB8")
|
||||
|
||||
# 3. Empty string
|
||||
# Expectation:[]
|
||||
vs_obj._embed_query("")
|
||||
|
||||
|
||||
##################################
|
||||
####### create_index #############
|
||||
##################################
|
||||
def test_create_index_test() -> None:
|
||||
try:
|
||||
import oracledb
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
try:
|
||||
connection = oracledb.connect(user=username, password=password, dsn=dsn)
|
||||
except Exception:
|
||||
sys.exit(1)
|
||||
# 1. No optional parameters passed
|
||||
# Expectation:Successful
|
||||
model1 = HuggingFaceEmbeddings(
|
||||
model_name="sentence-transformers/paraphrase-mpnet-base-v2"
|
||||
)
|
||||
vs = OracleVS(connection, model1, "TB1", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
create_index(connection, vs)
|
||||
drop_index_if_exists(connection, "HNSW")
|
||||
drop_table_purge(connection, "TB1")
|
||||
|
||||
# 2. ivf index
|
||||
# Expectation:Successful
|
||||
vs = OracleVS(connection, model1, "TB2", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
create_index(connection, vs, {"idx_type": "IVF", "idx_name": "IVF"})
|
||||
drop_index_if_exists(connection, "IVF")
|
||||
drop_table_purge(connection, "TB2")
|
||||
|
||||
# 3. ivf index with neighbour_part passed as parameter
|
||||
# Expectation:Successful
|
||||
vs = OracleVS(connection, model1, "TB3", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
create_index(connection, vs, {"idx_type": "IVF", "neighbor_part": 10})
|
||||
drop_index_if_exists(connection, "IVF")
|
||||
drop_table_purge(connection, "TB3")
|
||||
|
||||
# 4. ivf index with neighbour_part and accuracy passed as parameter
|
||||
# Expectation:Successful
|
||||
vs = OracleVS(connection, model1, "TB4", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
create_index(
|
||||
connection, vs, {"idx_type": "IVF", "neighbor_part": 10, "accuracy": 90}
|
||||
)
|
||||
drop_index_if_exists(connection, "IVF")
|
||||
drop_table_purge(connection, "TB4")
|
||||
|
||||
# 5. ivf index with neighbour_part and parallel passed as parameter
|
||||
# Expectation:Successful
|
||||
vs = OracleVS(connection, model1, "TB5", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
create_index(
|
||||
connection, vs, {"idx_type": "IVF", "neighbor_part": 10, "parallel": 90}
|
||||
)
|
||||
drop_index_if_exists(connection, "IVF")
|
||||
drop_table_purge(connection, "TB5")
|
||||
|
||||
# 6. ivf index and then perform dml(insert)
|
||||
# Expectation:Successful
|
||||
vs = OracleVS(connection, model1, "TB6", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
create_index(connection, vs, {"idx_type": "IVF", "idx_name": "IVF"})
|
||||
texts = ["Sri Ram", "Krishna"]
|
||||
vs.add_texts(texts)
|
||||
# perform delete
|
||||
vs.delete(["hello"])
|
||||
drop_index_if_exists(connection, "IVF")
|
||||
drop_table_purge(connection, "TB6")
|
||||
|
||||
# 7. ivf index with neighbour_part,parallel and accuracy passed as parameter
|
||||
# Expectation:Successful
|
||||
vs = OracleVS(connection, model1, "TB7", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
create_index(
|
||||
connection,
|
||||
vs,
|
||||
{"idx_type": "IVF", "neighbor_part": 10, "parallel": 90, "accuracy": 99},
|
||||
)
|
||||
drop_index_if_exists(connection, "IVF")
|
||||
drop_table_purge(connection, "TB7")
|
||||
|
||||
|
||||
##################################
|
||||
####### perform_search ###########
|
||||
##################################
|
||||
def test_perform_search_test() -> None:
|
||||
try:
|
||||
import oracledb
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
try:
|
||||
connection = oracledb.connect(user=username, password=password, dsn=dsn)
|
||||
except Exception:
|
||||
sys.exit(1)
|
||||
model1 = HuggingFaceEmbeddings(
|
||||
model_name="sentence-transformers/paraphrase-mpnet-base-v2"
|
||||
)
|
||||
vs_1 = OracleVS(connection, model1, "TB10", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
vs_2 = OracleVS(connection, model1, "TB11", DistanceStrategy.DOT_PRODUCT)
|
||||
vs_3 = OracleVS(connection, model1, "TB12", DistanceStrategy.COSINE)
|
||||
vs_4 = OracleVS(connection, model1, "TB13", DistanceStrategy.EUCLIDEAN_DISTANCE)
|
||||
vs_5 = OracleVS(connection, model1, "TB14", DistanceStrategy.DOT_PRODUCT)
|
||||
vs_6 = OracleVS(connection, model1, "TB15", DistanceStrategy.COSINE)
|
||||
|
||||
# vector store lists:
|
||||
vs_list = [vs_1, vs_2, vs_3, vs_4, vs_5, vs_6]
|
||||
|
||||
for i, vs in enumerate(vs_list, start=1):
|
||||
# insert data
|
||||
texts = ["Yash", "Varanasi", "Yashaswi", "Mumbai", "BengaluruYash"]
|
||||
metadatas = [
|
||||
{"id": "hello"},
|
||||
{"id": "105"},
|
||||
{"id": "106"},
|
||||
{"id": "yash"},
|
||||
{"id": "108"},
|
||||
]
|
||||
|
||||
vs.add_texts(texts, metadatas)
|
||||
|
||||
# create index
|
||||
if i == 1 or i == 2 or i == 3:
|
||||
create_index(connection, vs, {"idx_type": "HNSW", "idx_name": f"IDX1{i}"})
|
||||
else:
|
||||
create_index(connection, vs, {"idx_type": "IVF", "idx_name": f"IDX1{i}"})
|
||||
|
||||
# perform search
|
||||
query = "YashB"
|
||||
|
||||
filter = {"id": ["106", "108", "yash"]}
|
||||
|
||||
# similarity_searh without filter
|
||||
vs.similarity_search(query, 2)
|
||||
|
||||
# similarity_searh with filter
|
||||
vs.similarity_search(query, 2, filter=filter)
|
||||
|
||||
# Similarity search with relevance score
|
||||
vs.similarity_search_with_score(query, 2)
|
||||
|
||||
# Similarity search with relevance score with filter
|
||||
vs.similarity_search_with_score(query, 2, filter=filter)
|
||||
|
||||
# Max marginal relevance search
|
||||
vs.max_marginal_relevance_search(query, 2, fetch_k=20, lambda_mult=0.5)
|
||||
|
||||
# Max marginal relevance search with filter
|
||||
vs.max_marginal_relevance_search(
|
||||
query, 2, fetch_k=20, lambda_mult=0.5, filter=filter
|
||||
)
|
||||
|
||||
drop_table_purge(connection, "TB10")
|
||||
drop_table_purge(connection, "TB11")
|
||||
drop_table_purge(connection, "TB12")
|
||||
drop_table_purge(connection, "TB13")
|
||||
drop_table_purge(connection, "TB14")
|
||||
drop_table_purge(connection, "TB15")
|
||||
@@ -25,9 +25,7 @@ def test_initialization() -> None:
|
||||
"""Test chat model initialization."""
|
||||
|
||||
for model in [
|
||||
ChatBaichuan(
|
||||
model="Baichuan2-Turbo-192K", baichuan_api_key="test-api-key", timeout=40
|
||||
),
|
||||
ChatBaichuan(model="Baichuan2-Turbo-192K", api_key="test-api-key", timeout=40),
|
||||
ChatBaichuan(
|
||||
model="Baichuan2-Turbo-192K",
|
||||
baichuan_api_key="test-api-key",
|
||||
@@ -35,7 +33,9 @@ def test_initialization() -> None:
|
||||
),
|
||||
]:
|
||||
assert model.model == "Baichuan2-Turbo-192K"
|
||||
assert isinstance(model.baichuan_api_key, SecretStr)
|
||||
assert model.request_timeout == 40
|
||||
assert model.temperature == 0.3
|
||||
|
||||
|
||||
def test__convert_message_to_dict_human() -> None:
|
||||
|
||||
@@ -113,6 +113,8 @@ EXPECTED_ALL = [
|
||||
"OnlinePDFLoader",
|
||||
"OpenCityDataLoader",
|
||||
"OracleAutonomousDatabaseLoader",
|
||||
"OracleDocLoader",
|
||||
"OracleTextSplitter",
|
||||
"OutlookMessageLoader",
|
||||
"PDFMinerLoader",
|
||||
"PDFMinerPDFasHTMLLoader",
|
||||
|
||||
@@ -57,6 +57,7 @@ EXPECTED_ALL = [
|
||||
"ErnieEmbeddings",
|
||||
"JavelinAIGatewayEmbeddings",
|
||||
"OllamaEmbeddings",
|
||||
"OracleEmbeddings",
|
||||
"QianfanEmbeddingsEndpoint",
|
||||
"JohnSnowLabsEmbeddings",
|
||||
"VoyageEmbeddings",
|
||||
|
||||
@@ -39,7 +39,6 @@ EXPECTED_ALL = [
|
||||
"VespaRetriever",
|
||||
"WeaviateHybridSearchRetriever",
|
||||
"WikipediaRetriever",
|
||||
"WebResearchRetriever",
|
||||
"YouRetriever",
|
||||
"ZepRetriever",
|
||||
"ZillizRetriever",
|
||||
|
||||
@@ -47,7 +47,6 @@ def test_required_dependencies(poetry_conf: Mapping[str, Any]) -> None:
|
||||
"python",
|
||||
"requests",
|
||||
"tenacity",
|
||||
"langchain",
|
||||
]
|
||||
)
|
||||
|
||||
@@ -74,7 +73,6 @@ def test_test_group_dependencies(poetry_conf: Mapping[str, Any]) -> None:
|
||||
"duckdb-engine",
|
||||
"freezegun",
|
||||
"langchain-core",
|
||||
"langchain",
|
||||
"lark",
|
||||
"pandas",
|
||||
"pytest",
|
||||
|
||||
@@ -34,6 +34,7 @@ EXPECTED_ALL = [
|
||||
"NVIDIARivaTTS",
|
||||
"NVIDIARivaStream",
|
||||
"OpenWeatherMapAPIWrapper",
|
||||
"OracleSummary",
|
||||
"OutlineAPIWrapper",
|
||||
"NutritionAIAPI",
|
||||
"Portkey",
|
||||
|
||||
@@ -60,6 +60,7 @@ EXPECTED_ALL = [
|
||||
"Neo4jVector",
|
||||
"NeuralDBVectorStore",
|
||||
"OpenSearchVectorSearch",
|
||||
"OracleVS",
|
||||
"PGEmbedding",
|
||||
"PGVector",
|
||||
"PathwayVectorClient",
|
||||
|
||||
@@ -73,6 +73,7 @@ def test_compatible_vectorstore_documentation() -> None:
|
||||
"MomentoVectorIndex",
|
||||
"MyScale",
|
||||
"OpenSearchVectorSearch",
|
||||
"OracleVS",
|
||||
"PGVector",
|
||||
"Pinecone",
|
||||
"Qdrant",
|
||||
|
||||
@@ -55,6 +55,7 @@ _EXPECTED = [
|
||||
"MyScaleSettings",
|
||||
"Neo4jVector",
|
||||
"OpenSearchVectorSearch",
|
||||
"OracleVS",
|
||||
"PGEmbedding",
|
||||
"PGVector",
|
||||
"PathwayVectorClient",
|
||||
|
||||
@@ -64,8 +64,8 @@ def draw_mermaid(
|
||||
subgraph = ""
|
||||
# Add edges to the graph
|
||||
for edge in edges:
|
||||
src_prefix = edge.source.split(":")[0]
|
||||
tgt_prefix = edge.target.split(":")[0]
|
||||
src_prefix = edge.source.split(":")[0] if ":" in edge.source else None
|
||||
tgt_prefix = edge.target.split(":")[0] if ":" in edge.target else None
|
||||
# exit subgraph if source or target is not in the same subgraph
|
||||
if subgraph and (subgraph != src_prefix or subgraph != tgt_prefix):
|
||||
mermaid_graph += "\tend\n"
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
"""Generic utility functions."""
|
||||
|
||||
import contextlib
|
||||
import datetime
|
||||
import functools
|
||||
@@ -88,10 +89,11 @@ def guard_import(
|
||||
installed."""
|
||||
try:
|
||||
module = importlib.import_module(module_name, package)
|
||||
except ImportError:
|
||||
except (ImportError, ModuleNotFoundError):
|
||||
pip_name = pip_name or module_name.split(".")[0].replace("_", "-")
|
||||
raise ImportError(
|
||||
f"Could not import {module_name} python package. "
|
||||
f"Please install it with `pip install {pip_name or module_name}`."
|
||||
f"Please install it with `pip install {pip_name}`."
|
||||
)
|
||||
return module
|
||||
|
||||
|
||||
331
libs/core/poetry.lock
generated
331
libs/core/poetry.lock
generated
@@ -176,13 +176,13 @@ tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "p
|
||||
|
||||
[[package]]
|
||||
name = "babel"
|
||||
version = "2.14.0"
|
||||
version = "2.15.0"
|
||||
description = "Internationalization utilities"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "Babel-2.14.0-py3-none-any.whl", hash = "sha256:efb1a25b7118e67ce3a259bed20545c29cb68be8ad2c784c83689981b7a57287"},
|
||||
{file = "Babel-2.14.0.tar.gz", hash = "sha256:6919867db036398ba21eb5c7a0f6b28ab8cbc3ae7a73a44ebe34ae74a4e7d363"},
|
||||
{file = "Babel-2.15.0-py3-none-any.whl", hash = "sha256:08706bdad8d0a3413266ab61bd6c34d0c28d6e1e7badf40a2cebe67644e2e1fb"},
|
||||
{file = "babel-2.15.0.tar.gz", hash = "sha256:8daf0e265d05768bc6c7a314cf1321e9a123afc328cc635c18622a2f30a04413"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -834,13 +834,13 @@ testing = ["Django", "attrs", "colorama", "docopt", "pytest (<7.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "jinja2"
|
||||
version = "3.1.3"
|
||||
version = "3.1.4"
|
||||
description = "A very fast and expressive template engine."
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"},
|
||||
{file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"},
|
||||
{file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"},
|
||||
{file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -1217,13 +1217,13 @@ url = "../text-splitters"
|
||||
|
||||
[[package]]
|
||||
name = "langsmith"
|
||||
version = "0.1.53"
|
||||
version = "0.1.54"
|
||||
description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
|
||||
optional = false
|
||||
python-versions = "<4.0,>=3.8.1"
|
||||
files = [
|
||||
{file = "langsmith-0.1.53-py3-none-any.whl", hash = "sha256:867f9c4176f92e019398dda22a210db68c98a810234a5266cf4609236dcd3043"},
|
||||
{file = "langsmith-0.1.53.tar.gz", hash = "sha256:0ac271080fb67806f1b2c5de0e7c698c45a57b18b5d46e984e9b15dd38f0bc42"},
|
||||
{file = "langsmith-0.1.54-py3-none-any.whl", hash = "sha256:e8ba2758dbdff0fccb35337c28a5ab641dd980b22e178d390b72a15c9ae9caff"},
|
||||
{file = "langsmith-0.1.54.tar.gz", hash = "sha256:86f5a90e48303de897f37a893f8bb635eabdaf23e674099e8bc0f2e9ca2f8faf"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -1554,57 +1554,57 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "orjson"
|
||||
version = "3.10.2"
|
||||
version = "3.10.3"
|
||||
description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "orjson-3.10.2-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:87124c1b3471a072fda422e156dd7ef086d854937d68adc266f17f32a1043c95"},
|
||||
{file = "orjson-3.10.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1b79526bd039e775ad0f558800c3cd9f3bde878a1268845f63984d37bcbb5d1"},
|
||||
{file = "orjson-3.10.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:97f6dc97a6b2833a0d77598e7d016b6d964e4b0bc9576c89aa9a16fcf8ac902d"},
|
||||
{file = "orjson-3.10.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4e427ce004fe15e13dcfdbd6c9dc936abf83d85d2164ec415a8bd90954f6f781"},
|
||||
{file = "orjson-3.10.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f3e05f70ab6225ba38504a2be61935d6ebc09de2b1bc484c30cb96ca4fa24b8"},
|
||||
{file = "orjson-3.10.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f4e67821e3c1f0ec5dbef9dbd0bc9cd0fe4f0d8ba5d76a07038ee3843c9ac98a"},
|
||||
{file = "orjson-3.10.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:24877561fe96a3736224243d6e2e026a674a4ddeff2b02fdeac41801bd261c87"},
|
||||
{file = "orjson-3.10.2-cp310-none-win32.whl", hash = "sha256:5da4ce52892b00aa51f5c5781414dc2bcdecc8470d2d60eeaeadbc14c5d9540b"},
|
||||
{file = "orjson-3.10.2-cp310-none-win_amd64.whl", hash = "sha256:cee3df171d957e84f568c3920f1f077f7f2a69f8ce4303d4c1404b7aab2f365a"},
|
||||
{file = "orjson-3.10.2-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:a361e7ad84452416a469cdda7a2efeee8ddc9e06e4b95938b072045e205f86dc"},
|
||||
{file = "orjson-3.10.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b064251af6a2b7fb26e51b9abd3c1e615b53d5d5f87972263233d66d9c736a4"},
|
||||
{file = "orjson-3.10.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:464c30c24961cc83b2dc0e5532ed41084624ee1c71d4e7ef1aaec88f7a677393"},
|
||||
{file = "orjson-3.10.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4459005982748fda9871f04bce6a304c515afc46c96bef51e2bc81755c0f4ea0"},
|
||||
{file = "orjson-3.10.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abd0cd3a113a6ea0051c4a50cca65161ee50c014a01363554a1417d9f3c4529f"},
|
||||
{file = "orjson-3.10.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9a658ebc5143fbc0a9e3a10aafce4de50b01b1b0a41942038cb4bc6617f1e1d7"},
|
||||
{file = "orjson-3.10.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:2fa4addaf6a6b3eb836cf92c4986d5ef9215fbdc87e4891cf8fd97990972bba0"},
|
||||
{file = "orjson-3.10.2-cp311-none-win32.whl", hash = "sha256:faff04363bfcff9cb41ab09c0ce8db84b8d4a09a374305ec5b12210dfa3154ea"},
|
||||
{file = "orjson-3.10.2-cp311-none-win_amd64.whl", hash = "sha256:7aee7b31a6acecf65a94beef2191081692891b00e8b7e02fbcc0c85002d62d0b"},
|
||||
{file = "orjson-3.10.2-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:38d9e9eab01131fdccbe95bff4f1d8ea197d239b5c73396e2079d07730bfa205"},
|
||||
{file = "orjson-3.10.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bfd84ecf5ebe8ec334a95950427e7ade40135032b1f00e2b17f351b0ef6dc72b"},
|
||||
{file = "orjson-3.10.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a2ba009d85c3c98006759e62150d018d622aa79012fdeefbb70a42a542582b45"},
|
||||
{file = "orjson-3.10.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eac25b54fab6d9ccbf9dbc57555c2b52bf6d0802ea84bd2bd9670a161bd881dc"},
|
||||
{file = "orjson-3.10.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8e735d90a90caf746de59becf29642c8358cafcd9b1a906ae3566efcc495324"},
|
||||
{file = "orjson-3.10.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:12feeee9089654904c2c988788eb9d521f5752c83ea410969d1f58d05ea95943"},
|
||||
{file = "orjson-3.10.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:619a7a4df76497afd2e6f1c963cc7e13658b3d58425c3a2ccf0471ad61d71025"},
|
||||
{file = "orjson-3.10.2-cp312-none-win32.whl", hash = "sha256:460d221090b451a0e78813196ec9dd28d2e33103048cfd7c1a3312a532fe3b1f"},
|
||||
{file = "orjson-3.10.2-cp312-none-win_amd64.whl", hash = "sha256:7efa93a9540e6ac9fe01167389fd7b1f0250cbfe3a8f06fe23e045d2a2d5d6ac"},
|
||||
{file = "orjson-3.10.2-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9ceb283b8c048fb20bd1c703b10e710783a4f1ba7d5654358a25db99e9df94d5"},
|
||||
{file = "orjson-3.10.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:201bf2b96ba39941254ef6b02e080660861e1444ec50be55778e1c38446c2d39"},
|
||||
{file = "orjson-3.10.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:51a7b67c8cddf1a9de72d534244590103b1f17b2105d3bdcb221981bd97ab427"},
|
||||
{file = "orjson-3.10.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cde123c227e28ef9bba7092dc88abbd1933a0d7c17c58970c8ed8ec804e7add5"},
|
||||
{file = "orjson-3.10.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:09b51caf8720b6df448acf764312d4678aeed6852ebfa6f3aa28b6061155ffef"},
|
||||
{file = "orjson-3.10.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:f124d7e813e7b3d56bb7841d3d0884fec633f5f889a27a158d004b6b37e5ca98"},
|
||||
{file = "orjson-3.10.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:e33ac7a6b081688a2167b501c9813aa6ec1f2cc097c47ab5f33cca3e875da9dc"},
|
||||
{file = "orjson-3.10.2-cp38-none-win32.whl", hash = "sha256:8f4a91921270d646f50f90a9903f87baae24c6e376ef3c275fcd0ffc051117bb"},
|
||||
{file = "orjson-3.10.2-cp38-none-win_amd64.whl", hash = "sha256:148d266e300257ff6d8e8a5895cc1e12766b8db676510b4f1d79b0d07f666fdd"},
|
||||
{file = "orjson-3.10.2-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:27158a75e7239145cf385d2318fdb27fbcd1fc494a470ee68287147c8b214cb1"},
|
||||
{file = "orjson-3.10.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d26302b13e3f542b3e1ad1723e3543caf28e2f372391d21e1642de29c06e6209"},
|
||||
{file = "orjson-3.10.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:712cb3aa976311ae53de116a64949392aa5e7dcceda6769d5d7169d303d5ed09"},
|
||||
{file = "orjson-3.10.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9db3e6f23a6c9ce6c883a8e10e0eae0e2895327fb6e2286019b13153e59c672f"},
|
||||
{file = "orjson-3.10.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c44787769d93d1ef9f25a80644ef020e0f30f37045d6336133e421a414c8fe51"},
|
||||
{file = "orjson-3.10.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:53a43b18d280c8d18cb18437921a05ec478b908809f9e89ad60eb2fdf0ba96ac"},
|
||||
{file = "orjson-3.10.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:99e270b6a13027ed4c26c2b75b06c2cfb950934c8eb0400d70f4e6919bfe24f4"},
|
||||
{file = "orjson-3.10.2-cp39-none-win32.whl", hash = "sha256:d6f71486d211db9a01094cdd619ab594156a43ca04fa24e23ee04dac1509cdca"},
|
||||
{file = "orjson-3.10.2-cp39-none-win_amd64.whl", hash = "sha256:161f3b4e6364132562af80967ac3211e6681d320a01954da4915af579caab0b2"},
|
||||
{file = "orjson-3.10.2.tar.gz", hash = "sha256:47affe9f704c23e49a0fbb9d441af41f602474721e8639e8814640198f9ae32f"},
|
||||
{file = "orjson-3.10.3-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9fb6c3f9f5490a3eb4ddd46fc1b6eadb0d6fc16fb3f07320149c3286a1409dd8"},
|
||||
{file = "orjson-3.10.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:252124b198662eee80428f1af8c63f7ff077c88723fe206a25df8dc57a57b1fa"},
|
||||
{file = "orjson-3.10.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9f3e87733823089a338ef9bbf363ef4de45e5c599a9bf50a7a9b82e86d0228da"},
|
||||
{file = "orjson-3.10.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c8334c0d87103bb9fbbe59b78129f1f40d1d1e8355bbed2ca71853af15fa4ed3"},
|
||||
{file = "orjson-3.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1952c03439e4dce23482ac846e7961f9d4ec62086eb98ae76d97bd41d72644d7"},
|
||||
{file = "orjson-3.10.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c0403ed9c706dcd2809f1600ed18f4aae50be263bd7112e54b50e2c2bc3ebd6d"},
|
||||
{file = "orjson-3.10.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:382e52aa4270a037d41f325e7d1dfa395b7de0c367800b6f337d8157367bf3a7"},
|
||||
{file = "orjson-3.10.3-cp310-none-win32.whl", hash = "sha256:be2aab54313752c04f2cbaab4515291ef5af8c2256ce22abc007f89f42f49109"},
|
||||
{file = "orjson-3.10.3-cp310-none-win_amd64.whl", hash = "sha256:416b195f78ae461601893f482287cee1e3059ec49b4f99479aedf22a20b1098b"},
|
||||
{file = "orjson-3.10.3-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:73100d9abbbe730331f2242c1fc0bcb46a3ea3b4ae3348847e5a141265479700"},
|
||||
{file = "orjson-3.10.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:544a12eee96e3ab828dbfcb4d5a0023aa971b27143a1d35dc214c176fdfb29b3"},
|
||||
{file = "orjson-3.10.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:520de5e2ef0b4ae546bea25129d6c7c74edb43fc6cf5213f511a927f2b28148b"},
|
||||
{file = "orjson-3.10.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ccaa0a401fc02e8828a5bedfd80f8cd389d24f65e5ca3954d72c6582495b4bcf"},
|
||||
{file = "orjson-3.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a7bc9e8bc11bac40f905640acd41cbeaa87209e7e1f57ade386da658092dc16"},
|
||||
{file = "orjson-3.10.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3582b34b70543a1ed6944aca75e219e1192661a63da4d039d088a09c67543b08"},
|
||||
{file = "orjson-3.10.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1c23dfa91481de880890d17aa7b91d586a4746a4c2aa9a145bebdbaf233768d5"},
|
||||
{file = "orjson-3.10.3-cp311-none-win32.whl", hash = "sha256:1770e2a0eae728b050705206d84eda8b074b65ee835e7f85c919f5705b006c9b"},
|
||||
{file = "orjson-3.10.3-cp311-none-win_amd64.whl", hash = "sha256:93433b3c1f852660eb5abdc1f4dd0ced2be031ba30900433223b28ee0140cde5"},
|
||||
{file = "orjson-3.10.3-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:a39aa73e53bec8d410875683bfa3a8edf61e5a1c7bb4014f65f81d36467ea098"},
|
||||
{file = "orjson-3.10.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0943a96b3fa09bee1afdfccc2cb236c9c64715afa375b2af296c73d91c23eab2"},
|
||||
{file = "orjson-3.10.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e852baafceff8da3c9defae29414cc8513a1586ad93e45f27b89a639c68e8176"},
|
||||
{file = "orjson-3.10.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18566beb5acd76f3769c1d1a7ec06cdb81edc4d55d2765fb677e3eaa10fa99e0"},
|
||||
{file = "orjson-3.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bd2218d5a3aa43060efe649ec564ebedec8ce6ae0a43654b81376216d5ebd42"},
|
||||
{file = "orjson-3.10.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:cf20465e74c6e17a104ecf01bf8cd3b7b252565b4ccee4548f18b012ff2f8069"},
|
||||
{file = "orjson-3.10.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ba7f67aa7f983c4345eeda16054a4677289011a478ca947cd69c0a86ea45e534"},
|
||||
{file = "orjson-3.10.3-cp312-none-win32.whl", hash = "sha256:17e0713fc159abc261eea0f4feda611d32eabc35708b74bef6ad44f6c78d5ea0"},
|
||||
{file = "orjson-3.10.3-cp312-none-win_amd64.whl", hash = "sha256:4c895383b1ec42b017dd2c75ae8a5b862fc489006afde06f14afbdd0309b2af0"},
|
||||
{file = "orjson-3.10.3-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:be2719e5041e9fb76c8c2c06b9600fe8e8584e6980061ff88dcbc2691a16d20d"},
|
||||
{file = "orjson-3.10.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0175a5798bdc878956099f5c54b9837cb62cfbf5d0b86ba6d77e43861bcec2"},
|
||||
{file = "orjson-3.10.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:978be58a68ade24f1af7758626806e13cff7748a677faf95fbb298359aa1e20d"},
|
||||
{file = "orjson-3.10.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16bda83b5c61586f6f788333d3cf3ed19015e3b9019188c56983b5a299210eb5"},
|
||||
{file = "orjson-3.10.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ad1f26bea425041e0a1adad34630c4825a9e3adec49079b1fb6ac8d36f8b754"},
|
||||
{file = "orjson-3.10.3-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:9e253498bee561fe85d6325ba55ff2ff08fb5e7184cd6a4d7754133bd19c9195"},
|
||||
{file = "orjson-3.10.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0a62f9968bab8a676a164263e485f30a0b748255ee2f4ae49a0224be95f4532b"},
|
||||
{file = "orjson-3.10.3-cp38-none-win32.whl", hash = "sha256:8d0b84403d287d4bfa9bf7d1dc298d5c1c5d9f444f3737929a66f2fe4fb8f134"},
|
||||
{file = "orjson-3.10.3-cp38-none-win_amd64.whl", hash = "sha256:8bc7a4df90da5d535e18157220d7915780d07198b54f4de0110eca6b6c11e290"},
|
||||
{file = "orjson-3.10.3-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9059d15c30e675a58fdcd6f95465c1522b8426e092de9fff20edebfdc15e1cb0"},
|
||||
{file = "orjson-3.10.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8d40c7f7938c9c2b934b297412c067936d0b54e4b8ab916fd1a9eb8f54c02294"},
|
||||
{file = "orjson-3.10.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d4a654ec1de8fdaae1d80d55cee65893cb06494e124681ab335218be6a0691e7"},
|
||||
{file = "orjson-3.10.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:831c6ef73f9aa53c5f40ae8f949ff7681b38eaddb6904aab89dca4d85099cb78"},
|
||||
{file = "orjson-3.10.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99b880d7e34542db89f48d14ddecbd26f06838b12427d5a25d71baceb5ba119d"},
|
||||
{file = "orjson-3.10.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2e5e176c994ce4bd434d7aafb9ecc893c15f347d3d2bbd8e7ce0b63071c52e25"},
|
||||
{file = "orjson-3.10.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:b69a58a37dab856491bf2d3bbf259775fdce262b727f96aafbda359cb1d114d8"},
|
||||
{file = "orjson-3.10.3-cp39-none-win32.whl", hash = "sha256:b8d4d1a6868cde356f1402c8faeb50d62cee765a1f7ffcfd6de732ab0581e063"},
|
||||
{file = "orjson-3.10.3-cp39-none-win_amd64.whl", hash = "sha256:5102f50c5fc46d94f2033fe00d392588564378260d64377aec702f21a7a22912"},
|
||||
{file = "orjson-3.10.3.tar.gz", hash = "sha256:2b166507acae7ba2f7c315dcf185a9111ad5e992ac81f2d507aac39193c2c818"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1926,17 +1926,16 @@ typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
|
||||
|
||||
[[package]]
|
||||
name = "pygments"
|
||||
version = "2.17.2"
|
||||
version = "2.18.0"
|
||||
description = "Pygments is a syntax highlighting package written in Python."
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "pygments-2.17.2-py3-none-any.whl", hash = "sha256:b27c2826c47d0f3219f29554824c30c5e8945175d888647acd804ddd04af846c"},
|
||||
{file = "pygments-2.17.2.tar.gz", hash = "sha256:da46cec9fd2de5be3a8a784f434e4c4ab670b4ff54d605c4c2717e9d49c4c367"},
|
||||
{file = "pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a"},
|
||||
{file = "pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
plugins = ["importlib-metadata"]
|
||||
windows-terminal = ["colorama (>=0.4.6)"]
|
||||
|
||||
[[package]]
|
||||
@@ -2280,13 +2279,13 @@ cffi = {version = "*", markers = "implementation_name == \"pypy\""}
|
||||
|
||||
[[package]]
|
||||
name = "qtconsole"
|
||||
version = "5.5.1"
|
||||
version = "5.5.2"
|
||||
description = "Jupyter Qt console"
|
||||
optional = false
|
||||
python-versions = ">= 3.8"
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "qtconsole-5.5.1-py3-none-any.whl", hash = "sha256:8c75fa3e9b4ed884880ff7cea90a1b67451219279ec33deaee1d59e3df1a5d2b"},
|
||||
{file = "qtconsole-5.5.1.tar.gz", hash = "sha256:a0e806c6951db9490628e4df80caec9669b65149c7ba40f9bf033c025a5b56bc"},
|
||||
{file = "qtconsole-5.5.2-py3-none-any.whl", hash = "sha256:42d745f3d05d36240244a04e1e1ec2a86d5d9b6edb16dbdef582ccb629e87e0b"},
|
||||
{file = "qtconsole-5.5.2.tar.gz", hash = "sha256:6b5fb11274b297463706af84dcbbd5c92273b1f619e6d25d08874b0a88516989"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -2402,110 +2401,110 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "rpds-py"
|
||||
version = "0.18.0"
|
||||
version = "0.18.1"
|
||||
description = "Python bindings to Rust's persistent data structures (rpds)"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "rpds_py-0.18.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:5b4e7d8d6c9b2e8ee2d55c90b59c707ca59bc30058269b3db7b1f8df5763557e"},
|
||||
{file = "rpds_py-0.18.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c463ed05f9dfb9baebef68048aed8dcdc94411e4bf3d33a39ba97e271624f8f7"},
|
||||
{file = "rpds_py-0.18.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01e36a39af54a30f28b73096dd39b6802eddd04c90dbe161c1b8dbe22353189f"},
|
||||
{file = "rpds_py-0.18.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d62dec4976954a23d7f91f2f4530852b0c7608116c257833922a896101336c51"},
|
||||
{file = "rpds_py-0.18.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dd18772815d5f008fa03d2b9a681ae38d5ae9f0e599f7dda233c439fcaa00d40"},
|
||||
{file = "rpds_py-0.18.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:923d39efa3cfb7279a0327e337a7958bff00cc447fd07a25cddb0a1cc9a6d2da"},
|
||||
{file = "rpds_py-0.18.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39514da80f971362f9267c600b6d459bfbbc549cffc2cef8e47474fddc9b45b1"},
|
||||
{file = "rpds_py-0.18.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a34d557a42aa28bd5c48a023c570219ba2593bcbbb8dc1b98d8cf5d529ab1434"},
|
||||
{file = "rpds_py-0.18.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:93df1de2f7f7239dc9cc5a4a12408ee1598725036bd2dedadc14d94525192fc3"},
|
||||
{file = "rpds_py-0.18.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:34b18ba135c687f4dac449aa5157d36e2cbb7c03cbea4ddbd88604e076aa836e"},
|
||||
{file = "rpds_py-0.18.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c0b5dcf9193625afd8ecc92312d6ed78781c46ecbf39af9ad4681fc9f464af88"},
|
||||
{file = "rpds_py-0.18.0-cp310-none-win32.whl", hash = "sha256:c4325ff0442a12113a6379af66978c3fe562f846763287ef66bdc1d57925d337"},
|
||||
{file = "rpds_py-0.18.0-cp310-none-win_amd64.whl", hash = "sha256:7223a2a5fe0d217e60a60cdae28d6949140dde9c3bcc714063c5b463065e3d66"},
|
||||
{file = "rpds_py-0.18.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:3a96e0c6a41dcdba3a0a581bbf6c44bb863f27c541547fb4b9711fd8cf0ffad4"},
|
||||
{file = "rpds_py-0.18.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30f43887bbae0d49113cbaab729a112251a940e9b274536613097ab8b4899cf6"},
|
||||
{file = "rpds_py-0.18.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fcb25daa9219b4cf3a0ab24b0eb9a5cc8949ed4dc72acb8fa16b7e1681aa3c58"},
|
||||
{file = "rpds_py-0.18.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d68c93e381010662ab873fea609bf6c0f428b6d0bb00f2c6939782e0818d37bf"},
|
||||
{file = "rpds_py-0.18.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b34b7aa8b261c1dbf7720b5d6f01f38243e9b9daf7e6b8bc1fd4657000062f2c"},
|
||||
{file = "rpds_py-0.18.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2e6d75ab12b0bbab7215e5d40f1e5b738aa539598db27ef83b2ec46747df90e1"},
|
||||
{file = "rpds_py-0.18.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b8612cd233543a3781bc659c731b9d607de65890085098986dfd573fc2befe5"},
|
||||
{file = "rpds_py-0.18.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:aec493917dd45e3c69d00a8874e7cbed844efd935595ef78a0f25f14312e33c6"},
|
||||
{file = "rpds_py-0.18.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:661d25cbffaf8cc42e971dd570d87cb29a665f49f4abe1f9e76be9a5182c4688"},
|
||||
{file = "rpds_py-0.18.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1df3659d26f539ac74fb3b0c481cdf9d725386e3552c6fa2974f4d33d78e544b"},
|
||||
{file = "rpds_py-0.18.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a1ce3ba137ed54f83e56fb983a5859a27d43a40188ba798993812fed73c70836"},
|
||||
{file = "rpds_py-0.18.0-cp311-none-win32.whl", hash = "sha256:69e64831e22a6b377772e7fb337533c365085b31619005802a79242fee620bc1"},
|
||||
{file = "rpds_py-0.18.0-cp311-none-win_amd64.whl", hash = "sha256:998e33ad22dc7ec7e030b3df701c43630b5bc0d8fbc2267653577e3fec279afa"},
|
||||
{file = "rpds_py-0.18.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:7f2facbd386dd60cbbf1a794181e6aa0bd429bd78bfdf775436020172e2a23f0"},
|
||||
{file = "rpds_py-0.18.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1d9a5be316c15ffb2b3c405c4ff14448c36b4435be062a7f578ccd8b01f0c4d8"},
|
||||
{file = "rpds_py-0.18.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd5bf1af8efe569654bbef5a3e0a56eca45f87cfcffab31dd8dde70da5982475"},
|
||||
{file = "rpds_py-0.18.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5417558f6887e9b6b65b4527232553c139b57ec42c64570569b155262ac0754f"},
|
||||
{file = "rpds_py-0.18.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:56a737287efecafc16f6d067c2ea0117abadcd078d58721f967952db329a3e5c"},
|
||||
{file = "rpds_py-0.18.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8f03bccbd8586e9dd37219bce4d4e0d3ab492e6b3b533e973fa08a112cb2ffc9"},
|
||||
{file = "rpds_py-0.18.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4457a94da0d5c53dc4b3e4de1158bdab077db23c53232f37a3cb7afdb053a4e3"},
|
||||
{file = "rpds_py-0.18.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0ab39c1ba9023914297dd88ec3b3b3c3f33671baeb6acf82ad7ce883f6e8e157"},
|
||||
{file = "rpds_py-0.18.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9d54553c1136b50fd12cc17e5b11ad07374c316df307e4cfd6441bea5fb68496"},
|
||||
{file = "rpds_py-0.18.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0af039631b6de0397ab2ba16eaf2872e9f8fca391b44d3d8cac317860a700a3f"},
|
||||
{file = "rpds_py-0.18.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:84ffab12db93b5f6bad84c712c92060a2d321b35c3c9960b43d08d0f639d60d7"},
|
||||
{file = "rpds_py-0.18.0-cp312-none-win32.whl", hash = "sha256:685537e07897f173abcf67258bee3c05c374fa6fff89d4c7e42fb391b0605e98"},
|
||||
{file = "rpds_py-0.18.0-cp312-none-win_amd64.whl", hash = "sha256:e003b002ec72c8d5a3e3da2989c7d6065b47d9eaa70cd8808b5384fbb970f4ec"},
|
||||
{file = "rpds_py-0.18.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:08f9ad53c3f31dfb4baa00da22f1e862900f45908383c062c27628754af2e88e"},
|
||||
{file = "rpds_py-0.18.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c0013fe6b46aa496a6749c77e00a3eb07952832ad6166bd481c74bda0dcb6d58"},
|
||||
{file = "rpds_py-0.18.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e32a92116d4f2a80b629778280103d2a510a5b3f6314ceccd6e38006b5e92dcb"},
|
||||
{file = "rpds_py-0.18.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e541ec6f2ec456934fd279a3120f856cd0aedd209fc3852eca563f81738f6861"},
|
||||
{file = "rpds_py-0.18.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bed88b9a458e354014d662d47e7a5baafd7ff81c780fd91584a10d6ec842cb73"},
|
||||
{file = "rpds_py-0.18.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2644e47de560eb7bd55c20fc59f6daa04682655c58d08185a9b95c1970fa1e07"},
|
||||
{file = "rpds_py-0.18.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8e8916ae4c720529e18afa0b879473049e95949bf97042e938530e072fde061d"},
|
||||
{file = "rpds_py-0.18.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:465a3eb5659338cf2a9243e50ad9b2296fa15061736d6e26240e713522b6235c"},
|
||||
{file = "rpds_py-0.18.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:ea7d4a99f3b38c37eac212dbd6ec42b7a5ec51e2c74b5d3223e43c811609e65f"},
|
||||
{file = "rpds_py-0.18.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:67071a6171e92b6da534b8ae326505f7c18022c6f19072a81dcf40db2638767c"},
|
||||
{file = "rpds_py-0.18.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:41ef53e7c58aa4ef281da975f62c258950f54b76ec8e45941e93a3d1d8580594"},
|
||||
{file = "rpds_py-0.18.0-cp38-none-win32.whl", hash = "sha256:fdea4952db2793c4ad0bdccd27c1d8fdd1423a92f04598bc39425bcc2b8ee46e"},
|
||||
{file = "rpds_py-0.18.0-cp38-none-win_amd64.whl", hash = "sha256:7cd863afe7336c62ec78d7d1349a2f34c007a3cc6c2369d667c65aeec412a5b1"},
|
||||
{file = "rpds_py-0.18.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:5307def11a35f5ae4581a0b658b0af8178c65c530e94893345bebf41cc139d33"},
|
||||
{file = "rpds_py-0.18.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:77f195baa60a54ef9d2de16fbbfd3ff8b04edc0c0140a761b56c267ac11aa467"},
|
||||
{file = "rpds_py-0.18.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39f5441553f1c2aed4de4377178ad8ff8f9d733723d6c66d983d75341de265ab"},
|
||||
{file = "rpds_py-0.18.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9a00312dea9310d4cb7dbd7787e722d2e86a95c2db92fbd7d0155f97127bcb40"},
|
||||
{file = "rpds_py-0.18.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8f2fc11e8fe034ee3c34d316d0ad8808f45bc3b9ce5857ff29d513f3ff2923a1"},
|
||||
{file = "rpds_py-0.18.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:586f8204935b9ec884500498ccc91aa869fc652c40c093bd9e1471fbcc25c022"},
|
||||
{file = "rpds_py-0.18.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ddc2f4dfd396c7bfa18e6ce371cba60e4cf9d2e5cdb71376aa2da264605b60b9"},
|
||||
{file = "rpds_py-0.18.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5ddcba87675b6d509139d1b521e0c8250e967e63b5909a7e8f8944d0f90ff36f"},
|
||||
{file = "rpds_py-0.18.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:7bd339195d84439cbe5771546fe8a4e8a7a045417d8f9de9a368c434e42a721e"},
|
||||
{file = "rpds_py-0.18.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:d7c36232a90d4755b720fbd76739d8891732b18cf240a9c645d75f00639a9024"},
|
||||
{file = "rpds_py-0.18.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:6b0817e34942b2ca527b0e9298373e7cc75f429e8da2055607f4931fded23e20"},
|
||||
{file = "rpds_py-0.18.0-cp39-none-win32.whl", hash = "sha256:99f70b740dc04d09e6b2699b675874367885217a2e9f782bdf5395632ac663b7"},
|
||||
{file = "rpds_py-0.18.0-cp39-none-win_amd64.whl", hash = "sha256:6ef687afab047554a2d366e112dd187b62d261d49eb79b77e386f94644363294"},
|
||||
{file = "rpds_py-0.18.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:ad36cfb355e24f1bd37cac88c112cd7730873f20fb0bdaf8ba59eedf8216079f"},
|
||||
{file = "rpds_py-0.18.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:36b3ee798c58ace201289024b52788161e1ea133e4ac93fba7d49da5fec0ef9e"},
|
||||
{file = "rpds_py-0.18.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8a2f084546cc59ea99fda8e070be2fd140c3092dc11524a71aa8f0f3d5a55ca"},
|
||||
{file = "rpds_py-0.18.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e4461d0f003a0aa9be2bdd1b798a041f177189c1a0f7619fe8c95ad08d9a45d7"},
|
||||
{file = "rpds_py-0.18.0-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8db715ebe3bb7d86d77ac1826f7d67ec11a70dbd2376b7cc214199360517b641"},
|
||||
{file = "rpds_py-0.18.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:793968759cd0d96cac1e367afd70c235867831983f876a53389ad869b043c948"},
|
||||
{file = "rpds_py-0.18.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66e6a3af5a75363d2c9a48b07cb27c4ea542938b1a2e93b15a503cdfa8490795"},
|
||||
{file = "rpds_py-0.18.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6ef0befbb5d79cf32d0266f5cff01545602344eda89480e1dd88aca964260b18"},
|
||||
{file = "rpds_py-0.18.0-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:1d4acf42190d449d5e89654d5c1ed3a4f17925eec71f05e2a41414689cda02d1"},
|
||||
{file = "rpds_py-0.18.0-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:a5f446dd5055667aabaee78487f2b5ab72e244f9bc0b2ffebfeec79051679984"},
|
||||
{file = "rpds_py-0.18.0-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:9dbbeb27f4e70bfd9eec1be5477517365afe05a9b2c441a0b21929ee61048124"},
|
||||
{file = "rpds_py-0.18.0-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:22806714311a69fd0af9b35b7be97c18a0fc2826e6827dbb3a8c94eac6cf7eeb"},
|
||||
{file = "rpds_py-0.18.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:b34ae4636dfc4e76a438ab826a0d1eed2589ca7d9a1b2d5bb546978ac6485461"},
|
||||
{file = "rpds_py-0.18.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c8370641f1a7f0e0669ddccca22f1da893cef7628396431eb445d46d893e5cd"},
|
||||
{file = "rpds_py-0.18.0-pp38-pypy38_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c8362467a0fdeccd47935f22c256bec5e6abe543bf0d66e3d3d57a8fb5731863"},
|
||||
{file = "rpds_py-0.18.0-pp38-pypy38_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:11a8c85ef4a07a7638180bf04fe189d12757c696eb41f310d2426895356dcf05"},
|
||||
{file = "rpds_py-0.18.0-pp38-pypy38_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b316144e85316da2723f9d8dc75bada12fa58489a527091fa1d5a612643d1a0e"},
|
||||
{file = "rpds_py-0.18.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf1ea2e34868f6fbf070e1af291c8180480310173de0b0c43fc38a02929fc0e3"},
|
||||
{file = "rpds_py-0.18.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e546e768d08ad55b20b11dbb78a745151acbd938f8f00d0cfbabe8b0199b9880"},
|
||||
{file = "rpds_py-0.18.0-pp38-pypy38_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:4901165d170a5fde6f589acb90a6b33629ad1ec976d4529e769c6f3d885e3e80"},
|
||||
{file = "rpds_py-0.18.0-pp38-pypy38_pp73-musllinux_1_2_i686.whl", hash = "sha256:618a3d6cae6ef8ec88bb76dd80b83cfe415ad4f1d942ca2a903bf6b6ff97a2da"},
|
||||
{file = "rpds_py-0.18.0-pp38-pypy38_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ed4eb745efbff0a8e9587d22a84be94a5eb7d2d99c02dacf7bd0911713ed14dd"},
|
||||
{file = "rpds_py-0.18.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:6c81e5f372cd0dc5dc4809553d34f832f60a46034a5f187756d9b90586c2c307"},
|
||||
{file = "rpds_py-0.18.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:43fbac5f22e25bee1d482c97474f930a353542855f05c1161fd804c9dc74a09d"},
|
||||
{file = "rpds_py-0.18.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d7faa6f14017c0b1e69f5e2c357b998731ea75a442ab3841c0dbbbfe902d2c4"},
|
||||
{file = "rpds_py-0.18.0-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:08231ac30a842bd04daabc4d71fddd7e6d26189406d5a69535638e4dcb88fe76"},
|
||||
{file = "rpds_py-0.18.0-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:044a3e61a7c2dafacae99d1e722cc2d4c05280790ec5a05031b3876809d89a5c"},
|
||||
{file = "rpds_py-0.18.0-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3f26b5bd1079acdb0c7a5645e350fe54d16b17bfc5e71f371c449383d3342e17"},
|
||||
{file = "rpds_py-0.18.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:482103aed1dfe2f3b71a58eff35ba105289b8d862551ea576bd15479aba01f66"},
|
||||
{file = "rpds_py-0.18.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1374f4129f9bcca53a1bba0bb86bf78325a0374577cf7e9e4cd046b1e6f20e24"},
|
||||
{file = "rpds_py-0.18.0-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:635dc434ff724b178cb192c70016cc0ad25a275228f749ee0daf0eddbc8183b1"},
|
||||
{file = "rpds_py-0.18.0-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:bc362ee4e314870a70f4ae88772d72d877246537d9f8cb8f7eacf10884862432"},
|
||||
{file = "rpds_py-0.18.0-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:4832d7d380477521a8c1644bbab6588dfedea5e30a7d967b5fb75977c45fd77f"},
|
||||
{file = "rpds_py-0.18.0.tar.gz", hash = "sha256:42821446ee7a76f5d9f71f9e33a4fb2ffd724bb3e7f93386150b61a43115788d"},
|
||||
{file = "rpds_py-0.18.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:d31dea506d718693b6b2cffc0648a8929bdc51c70a311b2770f09611caa10d53"},
|
||||
{file = "rpds_py-0.18.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:732672fbc449bab754e0b15356c077cc31566df874964d4801ab14f71951ea80"},
|
||||
{file = "rpds_py-0.18.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a98a1f0552b5f227a3d6422dbd61bc6f30db170939bd87ed14f3c339aa6c7c9"},
|
||||
{file = "rpds_py-0.18.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7f1944ce16401aad1e3f7d312247b3d5de7981f634dc9dfe90da72b87d37887d"},
|
||||
{file = "rpds_py-0.18.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:38e14fb4e370885c4ecd734f093a2225ee52dc384b86fa55fe3f74638b2cfb09"},
|
||||
{file = "rpds_py-0.18.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08d74b184f9ab6289b87b19fe6a6d1a97fbfea84b8a3e745e87a5de3029bf944"},
|
||||
{file = "rpds_py-0.18.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d70129cef4a8d979caa37e7fe957202e7eee8ea02c5e16455bc9808a59c6b2f0"},
|
||||
{file = "rpds_py-0.18.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ce0bb20e3a11bd04461324a6a798af34d503f8d6f1aa3d2aa8901ceaf039176d"},
|
||||
{file = "rpds_py-0.18.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:81c5196a790032e0fc2464c0b4ab95f8610f96f1f2fa3d4deacce6a79852da60"},
|
||||
{file = "rpds_py-0.18.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:f3027be483868c99b4985fda802a57a67fdf30c5d9a50338d9db646d590198da"},
|
||||
{file = "rpds_py-0.18.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d44607f98caa2961bab4fa3c4309724b185b464cdc3ba6f3d7340bac3ec97cc1"},
|
||||
{file = "rpds_py-0.18.1-cp310-none-win32.whl", hash = "sha256:c273e795e7a0f1fddd46e1e3cb8be15634c29ae8ff31c196debb620e1edb9333"},
|
||||
{file = "rpds_py-0.18.1-cp310-none-win_amd64.whl", hash = "sha256:8352f48d511de5f973e4f2f9412736d7dea76c69faa6d36bcf885b50c758ab9a"},
|
||||
{file = "rpds_py-0.18.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6b5ff7e1d63a8281654b5e2896d7f08799378e594f09cf3674e832ecaf396ce8"},
|
||||
{file = "rpds_py-0.18.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8927638a4d4137a289e41d0fd631551e89fa346d6dbcfc31ad627557d03ceb6d"},
|
||||
{file = "rpds_py-0.18.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:154bf5c93d79558b44e5b50cc354aa0459e518e83677791e6adb0b039b7aa6a7"},
|
||||
{file = "rpds_py-0.18.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:07f2139741e5deb2c5154a7b9629bc5aa48c766b643c1a6750d16f865a82c5fc"},
|
||||
{file = "rpds_py-0.18.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8c7672e9fba7425f79019db9945b16e308ed8bc89348c23d955c8c0540da0a07"},
|
||||
{file = "rpds_py-0.18.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:489bdfe1abd0406eba6b3bb4fdc87c7fa40f1031de073d0cfb744634cc8fa261"},
|
||||
{file = "rpds_py-0.18.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c20f05e8e3d4fc76875fc9cb8cf24b90a63f5a1b4c5b9273f0e8225e169b100"},
|
||||
{file = "rpds_py-0.18.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:967342e045564cef76dfcf1edb700b1e20838d83b1aa02ab313e6a497cf923b8"},
|
||||
{file = "rpds_py-0.18.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2cc7c1a47f3a63282ab0f422d90ddac4aa3034e39fc66a559ab93041e6505da7"},
|
||||
{file = "rpds_py-0.18.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f7afbfee1157e0f9376c00bb232e80a60e59ed716e3211a80cb8506550671e6e"},
|
||||
{file = "rpds_py-0.18.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9e6934d70dc50f9f8ea47081ceafdec09245fd9f6032669c3b45705dea096b88"},
|
||||
{file = "rpds_py-0.18.1-cp311-none-win32.whl", hash = "sha256:c69882964516dc143083d3795cb508e806b09fc3800fd0d4cddc1df6c36e76bb"},
|
||||
{file = "rpds_py-0.18.1-cp311-none-win_amd64.whl", hash = "sha256:70a838f7754483bcdc830444952fd89645569e7452e3226de4a613a4c1793fb2"},
|
||||
{file = "rpds_py-0.18.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:3dd3cd86e1db5aadd334e011eba4e29d37a104b403e8ca24dcd6703c68ca55b3"},
|
||||
{file = "rpds_py-0.18.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:05f3d615099bd9b13ecf2fc9cf2d839ad3f20239c678f461c753e93755d629ee"},
|
||||
{file = "rpds_py-0.18.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35b2b771b13eee8729a5049c976197ff58a27a3829c018a04341bcf1ae409b2b"},
|
||||
{file = "rpds_py-0.18.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ee17cd26b97d537af8f33635ef38be873073d516fd425e80559f4585a7b90c43"},
|
||||
{file = "rpds_py-0.18.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b646bf655b135ccf4522ed43d6902af37d3f5dbcf0da66c769a2b3938b9d8184"},
|
||||
{file = "rpds_py-0.18.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:19ba472b9606c36716062c023afa2484d1e4220548751bda14f725a7de17b4f6"},
|
||||
{file = "rpds_py-0.18.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e30ac5e329098903262dc5bdd7e2086e0256aa762cc8b744f9e7bf2a427d3f8"},
|
||||
{file = "rpds_py-0.18.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d58ad6317d188c43750cb76e9deacf6051d0f884d87dc6518e0280438648a9ac"},
|
||||
{file = "rpds_py-0.18.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e1735502458621921cee039c47318cb90b51d532c2766593be6207eec53e5c4c"},
|
||||
{file = "rpds_py-0.18.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:f5bab211605d91db0e2995a17b5c6ee5edec1270e46223e513eaa20da20076ac"},
|
||||
{file = "rpds_py-0.18.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2fc24a329a717f9e2448f8cd1f960f9dac4e45b6224d60734edeb67499bab03a"},
|
||||
{file = "rpds_py-0.18.1-cp312-none-win32.whl", hash = "sha256:1805d5901779662d599d0e2e4159d8a82c0b05faa86ef9222bf974572286b2b6"},
|
||||
{file = "rpds_py-0.18.1-cp312-none-win_amd64.whl", hash = "sha256:720edcb916df872d80f80a1cc5ea9058300b97721efda8651efcd938a9c70a72"},
|
||||
{file = "rpds_py-0.18.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:c827576e2fa017a081346dce87d532a5310241648eb3700af9a571a6e9fc7e74"},
|
||||
{file = "rpds_py-0.18.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:aa3679e751408d75a0b4d8d26d6647b6d9326f5e35c00a7ccd82b78ef64f65f8"},
|
||||
{file = "rpds_py-0.18.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0abeee75434e2ee2d142d650d1e54ac1f8b01e6e6abdde8ffd6eeac6e9c38e20"},
|
||||
{file = "rpds_py-0.18.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed402d6153c5d519a0faf1bb69898e97fb31613b49da27a84a13935ea9164dfc"},
|
||||
{file = "rpds_py-0.18.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:338dee44b0cef8b70fd2ef54b4e09bb1b97fc6c3a58fea5db6cc083fd9fc2724"},
|
||||
{file = "rpds_py-0.18.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7750569d9526199c5b97e5a9f8d96a13300950d910cf04a861d96f4273d5b104"},
|
||||
{file = "rpds_py-0.18.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:607345bd5912aacc0c5a63d45a1f73fef29e697884f7e861094e443187c02be5"},
|
||||
{file = "rpds_py-0.18.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:207c82978115baa1fd8d706d720b4a4d2b0913df1c78c85ba73fe6c5804505f0"},
|
||||
{file = "rpds_py-0.18.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:6d1e42d2735d437e7e80bab4d78eb2e459af48c0a46e686ea35f690b93db792d"},
|
||||
{file = "rpds_py-0.18.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:5463c47c08630007dc0fe99fb480ea4f34a89712410592380425a9b4e1611d8e"},
|
||||
{file = "rpds_py-0.18.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:06d218939e1bf2ca50e6b0ec700ffe755e5216a8230ab3e87c059ebb4ea06afc"},
|
||||
{file = "rpds_py-0.18.1-cp38-none-win32.whl", hash = "sha256:312fe69b4fe1ffbe76520a7676b1e5ac06ddf7826d764cc10265c3b53f96dbe9"},
|
||||
{file = "rpds_py-0.18.1-cp38-none-win_amd64.whl", hash = "sha256:9437ca26784120a279f3137ee080b0e717012c42921eb07861b412340f85bae2"},
|
||||
{file = "rpds_py-0.18.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:19e515b78c3fc1039dd7da0a33c28c3154458f947f4dc198d3c72db2b6b5dc93"},
|
||||
{file = "rpds_py-0.18.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a7b28c5b066bca9a4eb4e2f2663012debe680f097979d880657f00e1c30875a0"},
|
||||
{file = "rpds_py-0.18.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:673fdbbf668dd958eff750e500495ef3f611e2ecc209464f661bc82e9838991e"},
|
||||
{file = "rpds_py-0.18.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d960de62227635d2e61068f42a6cb6aae91a7fe00fca0e3aeed17667c8a34611"},
|
||||
{file = "rpds_py-0.18.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:352a88dc7892f1da66b6027af06a2e7e5d53fe05924cc2cfc56495b586a10b72"},
|
||||
{file = "rpds_py-0.18.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4e0ee01ad8260184db21468a6e1c37afa0529acc12c3a697ee498d3c2c4dcaf3"},
|
||||
{file = "rpds_py-0.18.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4c39ad2f512b4041343ea3c7894339e4ca7839ac38ca83d68a832fc8b3748ab"},
|
||||
{file = "rpds_py-0.18.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:aaa71ee43a703c321906813bb252f69524f02aa05bf4eec85f0c41d5d62d0f4c"},
|
||||
{file = "rpds_py-0.18.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:6cd8098517c64a85e790657e7b1e509b9fe07487fd358e19431cb120f7d96338"},
|
||||
{file = "rpds_py-0.18.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:4adec039b8e2928983f885c53b7cc4cda8965b62b6596501a0308d2703f8af1b"},
|
||||
{file = "rpds_py-0.18.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:32b7daaa3e9389db3695964ce8e566e3413b0c43e3394c05e4b243a4cd7bef26"},
|
||||
{file = "rpds_py-0.18.1-cp39-none-win32.whl", hash = "sha256:2625f03b105328729f9450c8badda34d5243231eef6535f80064d57035738360"},
|
||||
{file = "rpds_py-0.18.1-cp39-none-win_amd64.whl", hash = "sha256:bf18932d0003c8c4d51a39f244231986ab23ee057d235a12b2684ea26a353590"},
|
||||
{file = "rpds_py-0.18.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:cbfbea39ba64f5e53ae2915de36f130588bba71245b418060ec3330ebf85678e"},
|
||||
{file = "rpds_py-0.18.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:a3d456ff2a6a4d2adcdf3c1c960a36f4fd2fec6e3b4902a42a384d17cf4e7a65"},
|
||||
{file = "rpds_py-0.18.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7700936ef9d006b7ef605dc53aa364da2de5a3aa65516a1f3ce73bf82ecfc7ae"},
|
||||
{file = "rpds_py-0.18.1-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:51584acc5916212e1bf45edd17f3a6b05fe0cbb40482d25e619f824dccb679de"},
|
||||
{file = "rpds_py-0.18.1-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:942695a206a58d2575033ff1e42b12b2aece98d6003c6bc739fbf33d1773b12f"},
|
||||
{file = "rpds_py-0.18.1-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b906b5f58892813e5ba5c6056d6a5ad08f358ba49f046d910ad992196ea61397"},
|
||||
{file = "rpds_py-0.18.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6f8e3fecca256fefc91bb6765a693d96692459d7d4c644660a9fff32e517843"},
|
||||
{file = "rpds_py-0.18.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7732770412bab81c5a9f6d20aeb60ae943a9b36dcd990d876a773526468e7163"},
|
||||
{file = "rpds_py-0.18.1-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:bd1105b50ede37461c1d51b9698c4f4be6e13e69a908ab7751e3807985fc0346"},
|
||||
{file = "rpds_py-0.18.1-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:618916f5535784960f3ecf8111581f4ad31d347c3de66d02e728de460a46303c"},
|
||||
{file = "rpds_py-0.18.1-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:17c6d2155e2423f7e79e3bb18151c686d40db42d8645e7977442170c360194d4"},
|
||||
{file = "rpds_py-0.18.1-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:6c4c4c3f878df21faf5fac86eda32671c27889e13570645a9eea0a1abdd50922"},
|
||||
{file = "rpds_py-0.18.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:fab6ce90574645a0d6c58890e9bcaac8d94dff54fb51c69e5522a7358b80ab64"},
|
||||
{file = "rpds_py-0.18.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:531796fb842b53f2695e94dc338929e9f9dbf473b64710c28af5a160b2a8927d"},
|
||||
{file = "rpds_py-0.18.1-pp38-pypy38_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:740884bc62a5e2bbb31e584f5d23b32320fd75d79f916f15a788d527a5e83644"},
|
||||
{file = "rpds_py-0.18.1-pp38-pypy38_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:998125738de0158f088aef3cb264a34251908dd2e5d9966774fdab7402edfab7"},
|
||||
{file = "rpds_py-0.18.1-pp38-pypy38_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e2be6e9dd4111d5b31ba3b74d17da54a8319d8168890fbaea4b9e5c3de630ae5"},
|
||||
{file = "rpds_py-0.18.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d0cee71bc618cd93716f3c1bf56653740d2d13ddbd47673efa8bf41435a60daa"},
|
||||
{file = "rpds_py-0.18.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2c3caec4ec5cd1d18e5dd6ae5194d24ed12785212a90b37f5f7f06b8bedd7139"},
|
||||
{file = "rpds_py-0.18.1-pp38-pypy38_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:27bba383e8c5231cd559affe169ca0b96ec78d39909ffd817f28b166d7ddd4d8"},
|
||||
{file = "rpds_py-0.18.1-pp38-pypy38_pp73-musllinux_1_2_i686.whl", hash = "sha256:a888e8bdb45916234b99da2d859566f1e8a1d2275a801bb8e4a9644e3c7e7909"},
|
||||
{file = "rpds_py-0.18.1-pp38-pypy38_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:6031b25fb1b06327b43d841f33842b383beba399884f8228a6bb3df3088485ff"},
|
||||
{file = "rpds_py-0.18.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:48c2faaa8adfacefcbfdb5f2e2e7bdad081e5ace8d182e5f4ade971f128e6bb3"},
|
||||
{file = "rpds_py-0.18.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:d85164315bd68c0806768dc6bb0429c6f95c354f87485ee3593c4f6b14def2bd"},
|
||||
{file = "rpds_py-0.18.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6afd80f6c79893cfc0574956f78a0add8c76e3696f2d6a15bca2c66c415cf2d4"},
|
||||
{file = "rpds_py-0.18.1-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fa242ac1ff583e4ec7771141606aafc92b361cd90a05c30d93e343a0c2d82a89"},
|
||||
{file = "rpds_py-0.18.1-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d21be4770ff4e08698e1e8e0bce06edb6ea0626e7c8f560bc08222880aca6a6f"},
|
||||
{file = "rpds_py-0.18.1-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c45a639e93a0c5d4b788b2613bd637468edd62f8f95ebc6fcc303d58ab3f0a8"},
|
||||
{file = "rpds_py-0.18.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:910e71711d1055b2768181efa0a17537b2622afeb0424116619817007f8a2b10"},
|
||||
{file = "rpds_py-0.18.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b9bb1f182a97880f6078283b3505a707057c42bf55d8fca604f70dedfdc0772a"},
|
||||
{file = "rpds_py-0.18.1-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:1d54f74f40b1f7aaa595a02ff42ef38ca654b1469bef7d52867da474243cc633"},
|
||||
{file = "rpds_py-0.18.1-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:8d2e182c9ee01135e11e9676e9a62dfad791a7a467738f06726872374a83db49"},
|
||||
{file = "rpds_py-0.18.1-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:636a15acc588f70fda1661234761f9ed9ad79ebed3f2125d44be0862708b666e"},
|
||||
{file = "rpds_py-0.18.1.tar.gz", hash = "sha256:dc48b479d540770c811fbd1eb9ba2bb66951863e448efec2e2c102625328e92f"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "langchain-core"
|
||||
version = "0.1.50"
|
||||
version = "0.1.51"
|
||||
description = "Building applications with LLMs through composability"
|
||||
authors = []
|
||||
license = "MIT"
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user