diff --git a/docs/docs/use_cases/graph/integrations/neptune_sparql_qa.ipynb b/docs/docs/use_cases/graph/integrations/neptune_sparql_qa.ipynb index 3cba274628e..adad232ad6c 100644 --- a/docs/docs/use_cases/graph/integrations/neptune_sparql_qa.ipynb +++ b/docs/docs/use_cases/graph/integrations/neptune_sparql_qa.ipynb @@ -6,7 +6,12 @@ "source": [ "# Neptune SPARQL QA Chain\n", "\n", - "This notebook shows use of LLM to query RDF graph in Amazon Neptune. This code uses a `NeptuneRdfGraph` class that connects with the Neptune database and loads it's schema. The `NeptuneSparqlQAChain` is used to connect the graph and LLM to ask natural language questions.\n", + "This QA chain queries Resource Description Framework (RDF) data in an Amazon Neptune graph database using the SPARQL query language and returns a human readable response.\n", + "\n", + "\n", + "This code uses a `NeptuneRdfGraph` class that connects with the Neptune database and loads its schema. The `NeptuneSparqlQAChain` is used to connect the graph and LLM to ask natural language questions.\n", + "\n", + "This notebook demonstrates an example using organizational data.\n", "\n", "Requirements for running this notebook:\n", "- Neptune 1.2.x cluster accessible from this notebook\n", @@ -98,6 +103,40 @@ "## Setup Chain" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install --upgrade --force-reinstall langchain" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install --upgrade --force-reinstall langchain-core" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install --upgrade --force-reinstall langchain-community" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "** Restart kernel **" + ] + }, { "cell_type": "code", "execution_count": null, @@ -209,24 +248,23 @@ "source": [ "import boto3\n", "from langchain.chains.graph_qa.neptune_sparql import NeptuneSparqlQAChain\n", - "from langchain_community.chat_models import BedrockChat\n", + "from langchain.chat_models import BedrockChat\n", + "from langchain.llms import Bedrock\n", "from langchain_community.graphs import NeptuneRdfGraph\n", "\n", - "host = \"\"\n", - "port = \"\"\n", - "region = \"us-east-1\" # specify region\n", + "host = \"\"\n", + "port = 8182 # change if different\n", + "region = \"us-east-1\" # change if different\n", + "graph = NeptuneRdfGraph(host=host, port=port, use_iam_auth=True, region_name=region)\n", "\n", - "graph = NeptuneRdfGraph(\n", - " host=host, port=port, use_iam_auth=True, region_name=region, hide_comments=True\n", - ")\n", - "\n", - "schema_elements = graph.get_schema_elements\n", - "# Optionally, you can update the schema_elements, and\n", - "# load the schema from the pruned elements.\n", - "graph.load_from_schema_elements(schema_elements)\n", + "# Optionally change the schema\n", + "# elems = graph.get_schema_elements\n", + "# change elems ...\n", + "# graph.load_schema(elems)\n", "\n", + "MODEL_ID = \"anthropic.claude-v2\"\n", "bedrock_client = boto3.client(\"bedrock-runtime\")\n", - "llm = BedrockChat(model_id=\"anthropic.claude-v2\", client=bedrock_client)\n", + "llm = BedrockChat(model_id=MODEL_ID, client=bedrock_client)\n", "\n", "chain = NeptuneSparqlQAChain.from_llm(\n", " llm=llm,\n", diff --git a/libs/community/langchain_community/graphs/neptune_rdf_graph.py b/libs/community/langchain_community/graphs/neptune_rdf_graph.py index b9e0074a366..92a0093989b 100644 --- a/libs/community/langchain_community/graphs/neptune_rdf_graph.py +++ b/libs/community/langchain_community/graphs/neptune_rdf_graph.py @@ -4,55 +4,25 @@ from typing import Any, Dict, Optional, Sequence import requests -CLASS_QUERY = """ -SELECT DISTINCT ?elem ?com -WHERE { - ?instance a ?elem . - OPTIONAL { ?instance rdf:type/rdfs:subClassOf* ?elem } . - #FILTER (isIRI(?elem)) . - OPTIONAL { ?elem rdfs:comment ?com filter (lang(?com) = "en")} -} -""" - -REL_QUERY = """ -SELECT DISTINCT ?elem ?com -WHERE { - ?subj ?elem ?obj . - OPTIONAL { - ?elem rdf:type/rdfs:subPropertyOf* ?proptype . - VALUES ?proptype { rdf:Property owl:DatatypeProperty owl:ObjectProperty } . - } . - OPTIONAL { ?elem rdfs:comment ?com filter (lang(?com) = "en")} -} -""" - +# Query to find OWL datatype properties DTPROP_QUERY = """ -SELECT DISTINCT ?elem ?com +SELECT DISTINCT ?elem WHERE { - ?subj ?elem ?obj . - OPTIONAL { - ?elem rdf:type/rdfs:subPropertyOf* ?proptype . - ?proptype a owl:DatatypeProperty . - } . - OPTIONAL { ?elem rdfs:comment ?com filter (lang(?com) = "en")} + ?elem a owl:DatatypeProperty . } """ +# Query to find OWL object properties OPROP_QUERY = """ -SELECT DISTINCT ?elem ?com +SELECT DISTINCT ?elem WHERE { - ?subj ?elem ?obj . - OPTIONAL { - ?elem rdf:type/rdfs:subPropertyOf* ?proptype . - ?proptype a owl:ObjectProperty . - } . - OPTIONAL { ?elem rdfs:comment ?com filter (lang(?com) = "en")} + ?elem a owl:ObjectProperty . } """ ELEM_TYPES = { - "classes": CLASS_QUERY, - "rels": REL_QUERY, + "classes": None, + "rels": None, "dtprops": DTPROP_QUERY, "oprops": OPROP_QUERY, } @@ -62,32 +32,33 @@ class NeptuneRdfGraph: """Neptune wrapper for RDF graph operations. Args: - host: SPARQL endpoint host for Neptune - port: SPARQL endpoint port for Neptune. Defaults 8182. + host: endpoint for the database instance + port: port number for the database instance, default is 8182 use_iam_auth: boolean indicating IAM auth is enabled in Neptune cluster - region_name: AWS region required if use_iam_auth is True, e.g., us-west-2 - hide_comments: whether to include ontology comments in schema for prompt + use_https: whether to use secure connection, default is True + client: optional boto3 Neptune client + credentials_profile_name: optional AWS profile name + region_name: optional AWS region, e.g., us-west-2 + service: optional service name, default is neptunedata + sign: optional, whether to sign the request payload, default is True Example: .. code-block:: python graph = NeptuneRdfGraph( host=', - port=, - use_iam_auth=False + port= ) schema = graph.get_schema() OR graph = NeptuneRdfGraph( host=', - port=, - use_iam_auth=False + port= ) schema_elem = graph.get_schema_elements() - ... change schema_elements ... + #... change schema_elements ... graph.load_schema(schema_elem) - schema = graph.get_schema() *Security note*: Make sure that the database connection uses credentials that are narrowly-scoped to only include necessary permissions. @@ -105,27 +76,67 @@ class NeptuneRdfGraph: self, host: str, port: int = 8182, + use_https: bool = True, use_iam_auth: bool = False, + client: Any = None, + credentials_profile_name: Optional[str] = None, region_name: Optional[str] = None, - hide_comments: bool = False, + service: str = "neptunedata", + sign: bool = True, ) -> None: self.use_iam_auth = use_iam_auth self.region_name = region_name - self.hide_comments = hide_comments self.query_endpoint = f"https://{host}:{port}/sparql" - if self.use_iam_auth: - try: + try: + if client is not None: + self.client = client + else: import boto3 - self.session = boto3.Session() - except ImportError: - raise ImportError( - "Could not import boto3 python package. " - "Please install it with `pip install boto3`." - ) - else: - self.session = None + if credentials_profile_name is not None: + self.session = boto3.Session(profile_name=credentials_profile_name) + else: + # use default credentials + self.session = boto3.Session() + + client_params = {} + if region_name: + client_params["region_name"] = region_name + + protocol = "https" if use_https else "http" + + client_params["endpoint_url"] = f"{protocol}://{host}:{port}" + + if sign: + self.client = self.session.client(service, **client_params) + else: + from botocore import UNSIGNED + from botocore.config import Config + + self.client = self.session.client( + service, + **client_params, + config=Config(signature_version=UNSIGNED), + ) + + except ImportError: + raise ModuleNotFoundError( + "Could not import boto3 python package. " + "Please install it with `pip install boto3`." + ) + except Exception as e: + if type(e).__name__ == "UnknownServiceError": + raise ModuleNotFoundError( + "NeptuneGraph requires a boto3 version 1.28.38 or greater." + "Please install it with `pip install -U boto3`." + ) from e + else: + raise ValueError( + "Could not load credentials to authenticate with AWS client. " + "Please check that credentials in the specified " + "profile name are valid." + ) from e # Set schema self.schema = "" @@ -143,6 +154,12 @@ class NeptuneRdfGraph: def get_schema_elements(self) -> Dict[str, Any]: return self.schema_elements + def get_summary(self) -> Dict[str, Any]: + """ + Obtain Neptune statistical summary of classes and predicates in the graph. + """ + return self.client.get_rdf_graph_summary(mode="detailed") + def query( self, query: str, @@ -197,12 +214,10 @@ class NeptuneRdfGraph: elem_str = {} for elem in ELEM_TYPES: res_list = [] - for elem_rec in self.schema_elements[elem]: + for elem_rec in schema_elements[elem]: uri = elem_rec["uri"] local = elem_rec["local"] res_str = f"<{uri}> ({local})" - if self.hide_comments is False: - res_str = res_str + f", {elem_rec['comment']}" res_list.append(res_str) elem_str[elem] = ", ".join(res_list) @@ -210,12 +225,12 @@ class NeptuneRdfGraph: "In the following, each IRI is followed by the local name and " "optionally its description in parentheses. \n" "The graph supports the following node types:\n" - f"{elem_str['classes']}" + f"{elem_str['classes']}\n" "The graph supports the following relationships:\n" - f"{elem_str['rels']}" - "The graph supports the following OWL object properties, " - f"{elem_str['dtprops']}" - "The graph supports the following OWL data properties, " + f"{elem_str['rels']}\n" + "The graph supports the following OWL object properties:\n" + f"{elem_str['dtprops']}\n" + "The graph supports the following OWL data properties:\n" f"{elem_str['oprops']}" ) @@ -238,15 +253,40 @@ class NeptuneRdfGraph: """ self.schema_elements["distinct_prefixes"] = {} + # get summary and build list of classes and rels + summary = self.get_summary() + reslist = [] + for c in summary["payload"]["graphSummary"]["classes"]: + uri = c + tokens = self._get_local_name(uri) + elem_record = {"uri": uri, "local": tokens[1]} + reslist.append(elem_record) + if tokens[0] not in self.schema_elements["distinct_prefixes"]: + self.schema_elements["distinct_prefixes"][tokens[0]] = "y" + self.schema_elements["classes"] = reslist + + reslist = [] + for r in summary["payload"]["graphSummary"]["predicates"]: + for p in r: + uri = p + tokens = self._get_local_name(uri) + elem_record = {"uri": uri, "local": tokens[1]} + reslist.append(elem_record) + if tokens[0] not in self.schema_elements["distinct_prefixes"]: + self.schema_elements["distinct_prefixes"][tokens[0]] = "y" + self.schema_elements["rels"] = reslist + + # get dtprops and oprops too for elem in ELEM_TYPES: - items = self.query(ELEM_TYPES[elem]) + q = ELEM_TYPES.get(elem) + if not q: + continue + items = self.query(q) reslist = [] for r in items["results"]["bindings"]: uri = r["elem"]["value"] tokens = self._get_local_name(uri) elem_record = {"uri": uri, "local": tokens[1]} - if not self.hide_comments: - elem_record["comment"] = r["com"]["value"] if "com" in r else "" reslist.append(elem_record) if tokens[0] not in self.schema_elements["distinct_prefixes"]: self.schema_elements["distinct_prefixes"][tokens[0]] = "y"