mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-23 23:29:21 +00:00
community[minor]: Improvements for NeptuneRdfGraph, Improve discovery of graph schema using database statistics (#19546)
Fixes linting for PR [19244](https://github.com/langchain-ai/langchain/pull/19244) --------- Co-authored-by: mhavey <mchavey@gmail.com>
This commit is contained in:
parent
fc6b92bb9a
commit
72ba738bf5
@ -6,7 +6,12 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# Neptune SPARQL QA Chain\n",
|
"# Neptune SPARQL QA Chain\n",
|
||||||
"\n",
|
"\n",
|
||||||
"This notebook shows use of LLM to query RDF graph in Amazon Neptune. This code uses a `NeptuneRdfGraph` class that connects with the Neptune database and loads it's schema. The `NeptuneSparqlQAChain` is used to connect the graph and LLM to ask natural language questions.\n",
|
"This QA chain queries Resource Description Framework (RDF) data in an Amazon Neptune graph database using the SPARQL query language and returns a human readable response.\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"This code uses a `NeptuneRdfGraph` class that connects with the Neptune database and loads its schema. The `NeptuneSparqlQAChain` is used to connect the graph and LLM to ask natural language questions.\n",
|
||||||
|
"\n",
|
||||||
|
"This notebook demonstrates an example using organizational data.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Requirements for running this notebook:\n",
|
"Requirements for running this notebook:\n",
|
||||||
"- Neptune 1.2.x cluster accessible from this notebook\n",
|
"- Neptune 1.2.x cluster accessible from this notebook\n",
|
||||||
@ -98,6 +103,40 @@
|
|||||||
"## Setup Chain"
|
"## Setup Chain"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"!pip install --upgrade --force-reinstall langchain"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"!pip install --upgrade --force-reinstall langchain-core"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"!pip install --upgrade --force-reinstall langchain-community"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"** Restart kernel **"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@ -209,24 +248,23 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"import boto3\n",
|
"import boto3\n",
|
||||||
"from langchain.chains.graph_qa.neptune_sparql import NeptuneSparqlQAChain\n",
|
"from langchain.chains.graph_qa.neptune_sparql import NeptuneSparqlQAChain\n",
|
||||||
"from langchain_community.chat_models import BedrockChat\n",
|
"from langchain.chat_models import BedrockChat\n",
|
||||||
|
"from langchain.llms import Bedrock\n",
|
||||||
"from langchain_community.graphs import NeptuneRdfGraph\n",
|
"from langchain_community.graphs import NeptuneRdfGraph\n",
|
||||||
"\n",
|
"\n",
|
||||||
"host = \"<neptune-host>\"\n",
|
"host = \"<your host>\"\n",
|
||||||
"port = \"<neptune-port>\"\n",
|
"port = 8182 # change if different\n",
|
||||||
"region = \"us-east-1\" # specify region\n",
|
"region = \"us-east-1\" # change if different\n",
|
||||||
|
"graph = NeptuneRdfGraph(host=host, port=port, use_iam_auth=True, region_name=region)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"graph = NeptuneRdfGraph(\n",
|
"# Optionally change the schema\n",
|
||||||
" host=host, port=port, use_iam_auth=True, region_name=region, hide_comments=True\n",
|
"# elems = graph.get_schema_elements\n",
|
||||||
")\n",
|
"# change elems ...\n",
|
||||||
"\n",
|
"# graph.load_schema(elems)\n",
|
||||||
"schema_elements = graph.get_schema_elements\n",
|
|
||||||
"# Optionally, you can update the schema_elements, and\n",
|
|
||||||
"# load the schema from the pruned elements.\n",
|
|
||||||
"graph.load_from_schema_elements(schema_elements)\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
|
"MODEL_ID = \"anthropic.claude-v2\"\n",
|
||||||
"bedrock_client = boto3.client(\"bedrock-runtime\")\n",
|
"bedrock_client = boto3.client(\"bedrock-runtime\")\n",
|
||||||
"llm = BedrockChat(model_id=\"anthropic.claude-v2\", client=bedrock_client)\n",
|
"llm = BedrockChat(model_id=MODEL_ID, client=bedrock_client)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"chain = NeptuneSparqlQAChain.from_llm(\n",
|
"chain = NeptuneSparqlQAChain.from_llm(\n",
|
||||||
" llm=llm,\n",
|
" llm=llm,\n",
|
||||||
|
@ -4,55 +4,25 @@ from typing import Any, Dict, Optional, Sequence
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
CLASS_QUERY = """
|
# Query to find OWL datatype properties
|
||||||
SELECT DISTINCT ?elem ?com
|
|
||||||
WHERE {
|
|
||||||
?instance a ?elem .
|
|
||||||
OPTIONAL { ?instance rdf:type/rdfs:subClassOf* ?elem } .
|
|
||||||
#FILTER (isIRI(?elem)) .
|
|
||||||
OPTIONAL { ?elem rdfs:comment ?com filter (lang(?com) = "en")}
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
|
|
||||||
REL_QUERY = """
|
|
||||||
SELECT DISTINCT ?elem ?com
|
|
||||||
WHERE {
|
|
||||||
?subj ?elem ?obj .
|
|
||||||
OPTIONAL {
|
|
||||||
?elem rdf:type/rdfs:subPropertyOf* ?proptype .
|
|
||||||
VALUES ?proptype { rdf:Property owl:DatatypeProperty owl:ObjectProperty } .
|
|
||||||
} .
|
|
||||||
OPTIONAL { ?elem rdfs:comment ?com filter (lang(?com) = "en")}
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
|
|
||||||
DTPROP_QUERY = """
|
DTPROP_QUERY = """
|
||||||
SELECT DISTINCT ?elem ?com
|
SELECT DISTINCT ?elem
|
||||||
WHERE {
|
WHERE {
|
||||||
?subj ?elem ?obj .
|
?elem a owl:DatatypeProperty .
|
||||||
OPTIONAL {
|
|
||||||
?elem rdf:type/rdfs:subPropertyOf* ?proptype .
|
|
||||||
?proptype a owl:DatatypeProperty .
|
|
||||||
} .
|
|
||||||
OPTIONAL { ?elem rdfs:comment ?com filter (lang(?com) = "en")}
|
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# Query to find OWL object properties
|
||||||
OPROP_QUERY = """
|
OPROP_QUERY = """
|
||||||
SELECT DISTINCT ?elem ?com
|
SELECT DISTINCT ?elem
|
||||||
WHERE {
|
WHERE {
|
||||||
?subj ?elem ?obj .
|
?elem a owl:ObjectProperty .
|
||||||
OPTIONAL {
|
|
||||||
?elem rdf:type/rdfs:subPropertyOf* ?proptype .
|
|
||||||
?proptype a owl:ObjectProperty .
|
|
||||||
} .
|
|
||||||
OPTIONAL { ?elem rdfs:comment ?com filter (lang(?com) = "en")}
|
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
ELEM_TYPES = {
|
ELEM_TYPES = {
|
||||||
"classes": CLASS_QUERY,
|
"classes": None,
|
||||||
"rels": REL_QUERY,
|
"rels": None,
|
||||||
"dtprops": DTPROP_QUERY,
|
"dtprops": DTPROP_QUERY,
|
||||||
"oprops": OPROP_QUERY,
|
"oprops": OPROP_QUERY,
|
||||||
}
|
}
|
||||||
@ -62,32 +32,33 @@ class NeptuneRdfGraph:
|
|||||||
"""Neptune wrapper for RDF graph operations.
|
"""Neptune wrapper for RDF graph operations.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
host: SPARQL endpoint host for Neptune
|
host: endpoint for the database instance
|
||||||
port: SPARQL endpoint port for Neptune. Defaults 8182.
|
port: port number for the database instance, default is 8182
|
||||||
use_iam_auth: boolean indicating IAM auth is enabled in Neptune cluster
|
use_iam_auth: boolean indicating IAM auth is enabled in Neptune cluster
|
||||||
region_name: AWS region required if use_iam_auth is True, e.g., us-west-2
|
use_https: whether to use secure connection, default is True
|
||||||
hide_comments: whether to include ontology comments in schema for prompt
|
client: optional boto3 Neptune client
|
||||||
|
credentials_profile_name: optional AWS profile name
|
||||||
|
region_name: optional AWS region, e.g., us-west-2
|
||||||
|
service: optional service name, default is neptunedata
|
||||||
|
sign: optional, whether to sign the request payload, default is True
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
graph = NeptuneRdfGraph(
|
graph = NeptuneRdfGraph(
|
||||||
host='<SPARQL host'>,
|
host='<SPARQL host'>,
|
||||||
port=<SPARQL port>,
|
port=<SPARQL port>
|
||||||
use_iam_auth=False
|
|
||||||
)
|
)
|
||||||
schema = graph.get_schema()
|
schema = graph.get_schema()
|
||||||
|
|
||||||
OR
|
OR
|
||||||
graph = NeptuneRdfGraph(
|
graph = NeptuneRdfGraph(
|
||||||
host='<SPARQL host'>,
|
host='<SPARQL host'>,
|
||||||
port=<SPARQL port>,
|
port=<SPARQL port>
|
||||||
use_iam_auth=False
|
|
||||||
)
|
)
|
||||||
schema_elem = graph.get_schema_elements()
|
schema_elem = graph.get_schema_elements()
|
||||||
... change schema_elements ...
|
#... change schema_elements ...
|
||||||
graph.load_schema(schema_elem)
|
graph.load_schema(schema_elem)
|
||||||
schema = graph.get_schema()
|
|
||||||
|
|
||||||
*Security note*: Make sure that the database connection uses credentials
|
*Security note*: Make sure that the database connection uses credentials
|
||||||
that are narrowly-scoped to only include necessary permissions.
|
that are narrowly-scoped to only include necessary permissions.
|
||||||
@ -105,27 +76,67 @@ class NeptuneRdfGraph:
|
|||||||
self,
|
self,
|
||||||
host: str,
|
host: str,
|
||||||
port: int = 8182,
|
port: int = 8182,
|
||||||
|
use_https: bool = True,
|
||||||
use_iam_auth: bool = False,
|
use_iam_auth: bool = False,
|
||||||
|
client: Any = None,
|
||||||
|
credentials_profile_name: Optional[str] = None,
|
||||||
region_name: Optional[str] = None,
|
region_name: Optional[str] = None,
|
||||||
hide_comments: bool = False,
|
service: str = "neptunedata",
|
||||||
|
sign: bool = True,
|
||||||
) -> None:
|
) -> None:
|
||||||
self.use_iam_auth = use_iam_auth
|
self.use_iam_auth = use_iam_auth
|
||||||
self.region_name = region_name
|
self.region_name = region_name
|
||||||
self.hide_comments = hide_comments
|
|
||||||
self.query_endpoint = f"https://{host}:{port}/sparql"
|
self.query_endpoint = f"https://{host}:{port}/sparql"
|
||||||
|
|
||||||
if self.use_iam_auth:
|
try:
|
||||||
try:
|
if client is not None:
|
||||||
|
self.client = client
|
||||||
|
else:
|
||||||
import boto3
|
import boto3
|
||||||
|
|
||||||
self.session = boto3.Session()
|
if credentials_profile_name is not None:
|
||||||
except ImportError:
|
self.session = boto3.Session(profile_name=credentials_profile_name)
|
||||||
raise ImportError(
|
else:
|
||||||
"Could not import boto3 python package. "
|
# use default credentials
|
||||||
"Please install it with `pip install boto3`."
|
self.session = boto3.Session()
|
||||||
)
|
|
||||||
else:
|
client_params = {}
|
||||||
self.session = None
|
if region_name:
|
||||||
|
client_params["region_name"] = region_name
|
||||||
|
|
||||||
|
protocol = "https" if use_https else "http"
|
||||||
|
|
||||||
|
client_params["endpoint_url"] = f"{protocol}://{host}:{port}"
|
||||||
|
|
||||||
|
if sign:
|
||||||
|
self.client = self.session.client(service, **client_params)
|
||||||
|
else:
|
||||||
|
from botocore import UNSIGNED
|
||||||
|
from botocore.config import Config
|
||||||
|
|
||||||
|
self.client = self.session.client(
|
||||||
|
service,
|
||||||
|
**client_params,
|
||||||
|
config=Config(signature_version=UNSIGNED),
|
||||||
|
)
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
raise ModuleNotFoundError(
|
||||||
|
"Could not import boto3 python package. "
|
||||||
|
"Please install it with `pip install boto3`."
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
if type(e).__name__ == "UnknownServiceError":
|
||||||
|
raise ModuleNotFoundError(
|
||||||
|
"NeptuneGraph requires a boto3 version 1.28.38 or greater."
|
||||||
|
"Please install it with `pip install -U boto3`."
|
||||||
|
) from e
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
"Could not load credentials to authenticate with AWS client. "
|
||||||
|
"Please check that credentials in the specified "
|
||||||
|
"profile name are valid."
|
||||||
|
) from e
|
||||||
|
|
||||||
# Set schema
|
# Set schema
|
||||||
self.schema = ""
|
self.schema = ""
|
||||||
@ -143,6 +154,12 @@ class NeptuneRdfGraph:
|
|||||||
def get_schema_elements(self) -> Dict[str, Any]:
|
def get_schema_elements(self) -> Dict[str, Any]:
|
||||||
return self.schema_elements
|
return self.schema_elements
|
||||||
|
|
||||||
|
def get_summary(self) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Obtain Neptune statistical summary of classes and predicates in the graph.
|
||||||
|
"""
|
||||||
|
return self.client.get_rdf_graph_summary(mode="detailed")
|
||||||
|
|
||||||
def query(
|
def query(
|
||||||
self,
|
self,
|
||||||
query: str,
|
query: str,
|
||||||
@ -197,12 +214,10 @@ class NeptuneRdfGraph:
|
|||||||
elem_str = {}
|
elem_str = {}
|
||||||
for elem in ELEM_TYPES:
|
for elem in ELEM_TYPES:
|
||||||
res_list = []
|
res_list = []
|
||||||
for elem_rec in self.schema_elements[elem]:
|
for elem_rec in schema_elements[elem]:
|
||||||
uri = elem_rec["uri"]
|
uri = elem_rec["uri"]
|
||||||
local = elem_rec["local"]
|
local = elem_rec["local"]
|
||||||
res_str = f"<{uri}> ({local})"
|
res_str = f"<{uri}> ({local})"
|
||||||
if self.hide_comments is False:
|
|
||||||
res_str = res_str + f", {elem_rec['comment']}"
|
|
||||||
res_list.append(res_str)
|
res_list.append(res_str)
|
||||||
elem_str[elem] = ", ".join(res_list)
|
elem_str[elem] = ", ".join(res_list)
|
||||||
|
|
||||||
@ -210,12 +225,12 @@ class NeptuneRdfGraph:
|
|||||||
"In the following, each IRI is followed by the local name and "
|
"In the following, each IRI is followed by the local name and "
|
||||||
"optionally its description in parentheses. \n"
|
"optionally its description in parentheses. \n"
|
||||||
"The graph supports the following node types:\n"
|
"The graph supports the following node types:\n"
|
||||||
f"{elem_str['classes']}"
|
f"{elem_str['classes']}\n"
|
||||||
"The graph supports the following relationships:\n"
|
"The graph supports the following relationships:\n"
|
||||||
f"{elem_str['rels']}"
|
f"{elem_str['rels']}\n"
|
||||||
"The graph supports the following OWL object properties, "
|
"The graph supports the following OWL object properties:\n"
|
||||||
f"{elem_str['dtprops']}"
|
f"{elem_str['dtprops']}\n"
|
||||||
"The graph supports the following OWL data properties, "
|
"The graph supports the following OWL data properties:\n"
|
||||||
f"{elem_str['oprops']}"
|
f"{elem_str['oprops']}"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -238,15 +253,40 @@ class NeptuneRdfGraph:
|
|||||||
"""
|
"""
|
||||||
self.schema_elements["distinct_prefixes"] = {}
|
self.schema_elements["distinct_prefixes"] = {}
|
||||||
|
|
||||||
|
# get summary and build list of classes and rels
|
||||||
|
summary = self.get_summary()
|
||||||
|
reslist = []
|
||||||
|
for c in summary["payload"]["graphSummary"]["classes"]:
|
||||||
|
uri = c
|
||||||
|
tokens = self._get_local_name(uri)
|
||||||
|
elem_record = {"uri": uri, "local": tokens[1]}
|
||||||
|
reslist.append(elem_record)
|
||||||
|
if tokens[0] not in self.schema_elements["distinct_prefixes"]:
|
||||||
|
self.schema_elements["distinct_prefixes"][tokens[0]] = "y"
|
||||||
|
self.schema_elements["classes"] = reslist
|
||||||
|
|
||||||
|
reslist = []
|
||||||
|
for r in summary["payload"]["graphSummary"]["predicates"]:
|
||||||
|
for p in r:
|
||||||
|
uri = p
|
||||||
|
tokens = self._get_local_name(uri)
|
||||||
|
elem_record = {"uri": uri, "local": tokens[1]}
|
||||||
|
reslist.append(elem_record)
|
||||||
|
if tokens[0] not in self.schema_elements["distinct_prefixes"]:
|
||||||
|
self.schema_elements["distinct_prefixes"][tokens[0]] = "y"
|
||||||
|
self.schema_elements["rels"] = reslist
|
||||||
|
|
||||||
|
# get dtprops and oprops too
|
||||||
for elem in ELEM_TYPES:
|
for elem in ELEM_TYPES:
|
||||||
items = self.query(ELEM_TYPES[elem])
|
q = ELEM_TYPES.get(elem)
|
||||||
|
if not q:
|
||||||
|
continue
|
||||||
|
items = self.query(q)
|
||||||
reslist = []
|
reslist = []
|
||||||
for r in items["results"]["bindings"]:
|
for r in items["results"]["bindings"]:
|
||||||
uri = r["elem"]["value"]
|
uri = r["elem"]["value"]
|
||||||
tokens = self._get_local_name(uri)
|
tokens = self._get_local_name(uri)
|
||||||
elem_record = {"uri": uri, "local": tokens[1]}
|
elem_record = {"uri": uri, "local": tokens[1]}
|
||||||
if not self.hide_comments:
|
|
||||||
elem_record["comment"] = r["com"]["value"] if "com" in r else ""
|
|
||||||
reslist.append(elem_record)
|
reslist.append(elem_record)
|
||||||
if tokens[0] not in self.schema_elements["distinct_prefixes"]:
|
if tokens[0] not in self.schema_elements["distinct_prefixes"]:
|
||||||
self.schema_elements["distinct_prefixes"][tokens[0]] = "y"
|
self.schema_elements["distinct_prefixes"][tokens[0]] = "y"
|
||||||
|
Loading…
Reference in New Issue
Block a user