community[minor]: Improvements for NeptuneRdfGraph, Improve discovery of graph schema using database statistics (#19546)

Fixes linting for PR [19244](https://github.com/langchain-ai/langchain/pull/19244) --------- Co-authored-by: mhavey <mchavey@gmail.com>
2025-08-13 14:50:00 +00:00 · 2024-03-26 07:36:51 -07:00 · 2024-03-26 07:36:51 -07:00 · 72ba738bf5
commit 72ba738bf5
parent fc6b92bb9a
2 changed files with 163 additions and 85 deletions
--- a/docs/docs/use_cases/graph/integrations/neptune_sparql_qa.ipynb
+++ b/docs/docs/use_cases/graph/integrations/neptune_sparql_qa.ipynb
@ -6,7 +6,12 @@
   "source": [
    "# Neptune SPARQL QA Chain\n",
    "\n",
-    "This notebook shows use of LLM to query RDF graph in Amazon Neptune. This code uses a `NeptuneRdfGraph` class that connects with the Neptune database and loads it's schema. The `NeptuneSparqlQAChain` is used to connect the graph and LLM to ask natural language questions.\n",
+    "This QA chain queries Resource Description Framework (RDF) data in an Amazon Neptune graph database using the SPARQL query language and returns a human readable response.\n",
    "\n",
    "\n",
    "This code uses a `NeptuneRdfGraph` class that connects with the Neptune database and loads its schema. The `NeptuneSparqlQAChain` is used to connect the graph and LLM to ask natural language questions.\n",
    "\n",
    "This notebook demonstrates an example using organizational data.\n",
    "\n",
    "Requirements for running this notebook:\n",
    "- Neptune 1.2.x cluster accessible from this notebook\n",
@ -98,6 +103,40 @@
    "## Setup Chain"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install --upgrade --force-reinstall langchain"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install --upgrade --force-reinstall langchain-core"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install --upgrade --force-reinstall langchain-community"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "** Restart kernel **"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
@ -209,24 +248,23 @@
   "source": [
    "import boto3\n",
    "from langchain.chains.graph_qa.neptune_sparql import NeptuneSparqlQAChain\n",
-    "from langchain_community.chat_models import BedrockChat\n",
+    "from langchain.chat_models import BedrockChat\n",
    "from langchain.llms import Bedrock\n",
    "from langchain_community.graphs import NeptuneRdfGraph\n",
    "\n",
-    "host = \"<neptune-host>\"\n",
+    "host = \"<your host>\"\n",
-    "port = \"<neptune-port>\"\n",
+    "port = 8182  # change if different\n",
-    "region = \"us-east-1\"  # specify region\n",
+    "region = \"us-east-1\"  # change if different\n",
    "graph = NeptuneRdfGraph(host=host, port=port, use_iam_auth=True, region_name=region)\n",
    "\n",
-    "graph = NeptuneRdfGraph(\n",
+    "# Optionally change the schema\n",
-    "    host=host, port=port, use_iam_auth=True, region_name=region, hide_comments=True\n",
+    "# elems = graph.get_schema_elements\n",
-    ")\n",
+    "# change elems ...\n",
-    "\n",
+    "# graph.load_schema(elems)\n",
    "schema_elements = graph.get_schema_elements\n",
    "# Optionally, you can update the schema_elements, and\n",
    "# load the schema from the pruned elements.\n",
    "graph.load_from_schema_elements(schema_elements)\n",
    "\n",
    "MODEL_ID = \"anthropic.claude-v2\"\n",
    "bedrock_client = boto3.client(\"bedrock-runtime\")\n",
-    "llm = BedrockChat(model_id=\"anthropic.claude-v2\", client=bedrock_client)\n",
+    "llm = BedrockChat(model_id=MODEL_ID, client=bedrock_client)\n",
    "\n",
    "chain = NeptuneSparqlQAChain.from_llm(\n",
    "    llm=llm,\n",
--- a/libs/community/langchain_community/graphs/neptune_rdf_graph.py
+++ b/libs/community/langchain_community/graphs/neptune_rdf_graph.py
@ -4,55 +4,25 @@ from typing import Any, Dict, Optional, Sequence
 import requests
-CLASS_QUERY = """
+# Query to find OWL datatype properties
 SELECT DISTINCT ?elem ?com
 WHERE { 
 ?instance a ?elem .
 OPTIONAL { ?instance rdf:type/rdfs:subClassOf* ?elem } .
 #FILTER (isIRI(?elem)) .
 OPTIONAL { ?elem rdfs:comment ?com filter (lang(?com) = "en")}
 }
 """
 REL_QUERY = """
 SELECT DISTINCT ?elem ?com
 WHERE { 
 ?subj ?elem ?obj . 
 OPTIONAL { 
     ?elem rdf:type/rdfs:subPropertyOf* ?proptype .
     VALUES  ?proptype  { rdf:Property owl:DatatypeProperty owl:ObjectProperty } .
 } . 
 OPTIONAL { ?elem rdfs:comment ?com filter (lang(?com) = "en")} 
 }
 """
 DTPROP_QUERY = """
-SELECT DISTINCT ?elem ?com
+SELECT DISTINCT ?elem 
 WHERE { 
- ?subj ?elem ?obj . 
+ ?elem a owl:DatatypeProperty . 
 OPTIONAL { 
     ?elem rdf:type/rdfs:subPropertyOf* ?proptype .
     ?proptype  a owl:DatatypeProperty .
 } . 
 OPTIONAL { ?elem rdfs:comment ?com filter (lang(?com) = "en")} 
 }
 """
 # Query to find OWL object properties
 OPROP_QUERY = """
-SELECT DISTINCT ?elem ?com
+SELECT DISTINCT ?elem 
 WHERE { 
- ?subj ?elem ?obj . 
+ ?elem a owl:ObjectProperty . 
 OPTIONAL { 
     ?elem rdf:type/rdfs:subPropertyOf* ?proptype .
     ?proptype  a owl:ObjectProperty .
 } . 
 OPTIONAL { ?elem rdfs:comment ?com filter (lang(?com) = "en")} 
 }
 """
 ELEM_TYPES = {
-    "classes": CLASS_QUERY,
+    "classes": None,
-    "rels": REL_QUERY,
+    "rels": None,
    "dtprops": DTPROP_QUERY,
    "oprops": OPROP_QUERY,
 }
@ -62,32 +32,33 @@ class NeptuneRdfGraph:
    """Neptune wrapper for RDF graph operations.
    Args:
-        host: SPARQL endpoint host for Neptune
+        host: endpoint for the database instance
-        port: SPARQL endpoint port for Neptune. Defaults 8182.
+        port: port number for the database instance, default is 8182
        use_iam_auth: boolean indicating IAM auth is enabled in Neptune cluster
-        region_name: AWS region required if use_iam_auth is True, e.g., us-west-2
+        use_https: whether to use secure connection, default is True
-        hide_comments: whether to include ontology comments in schema for prompt
+        client: optional boto3 Neptune client
        credentials_profile_name: optional AWS profile name
        region_name: optional AWS region, e.g., us-west-2
        service: optional service name, default is neptunedata
        sign: optional, whether to sign the request payload, default is True
    Example:
        .. code-block:: python
        graph = NeptuneRdfGraph(
            host='<SPARQL host'>,
-            port=<SPARQL port>,
+            port=<SPARQL port>
            use_iam_auth=False
        )
        schema = graph.get_schema()
        OR
        graph = NeptuneRdfGraph(
            host='<SPARQL host'>,
-            port=<SPARQL port>,
+            port=<SPARQL port>
            use_iam_auth=False
        )
        schema_elem = graph.get_schema_elements()
-        ... change schema_elements ...
+        #... change schema_elements ...
        graph.load_schema(schema_elem)
        schema = graph.get_schema()
    *Security note*: Make sure that the database connection uses credentials
        that are narrowly-scoped to only include necessary permissions.
@ -105,27 +76,67 @@ class NeptuneRdfGraph:
        self,
        host: str,
        port: int = 8182,
        use_https: bool = True,
        use_iam_auth: bool = False,
        client: Any = None,
        credentials_profile_name: Optional[str] = None,
        region_name: Optional[str] = None,
-        hide_comments: bool = False,
+        service: str = "neptunedata",
        sign: bool = True,
    ) -> None:
        self.use_iam_auth = use_iam_auth
        self.region_name = region_name
        self.hide_comments = hide_comments
        self.query_endpoint = f"https://{host}:{port}/sparql"
-        if self.use_iam_auth:
+        try:
-            try:
+            if client is not None:
                self.client = client
            else:
                import boto3
-                self.session = boto3.Session()
+                if credentials_profile_name is not None:
-            except ImportError:
+                    self.session = boto3.Session(profile_name=credentials_profile_name)
-                raise ImportError(
+                else:
-                    "Could not import boto3 python package. "
+                    # use default credentials
-                    "Please install it with `pip install boto3`."
+                    self.session = boto3.Session()
-                )
+
-        else:
+                client_params = {}
-            self.session = None
+                if region_name:
                    client_params["region_name"] = region_name
                protocol = "https" if use_https else "http"
                client_params["endpoint_url"] = f"{protocol}://{host}:{port}"
                if sign:
                    self.client = self.session.client(service, **client_params)
                else:
                    from botocore import UNSIGNED
                    from botocore.config import Config
                    self.client = self.session.client(
                        service,
                        **client_params,
                        config=Config(signature_version=UNSIGNED),
                    )
        except ImportError:
            raise ModuleNotFoundError(
                "Could not import boto3 python package. "
                "Please install it with `pip install boto3`."
            )
        except Exception as e:
            if type(e).__name__ == "UnknownServiceError":
                raise ModuleNotFoundError(
                    "NeptuneGraph requires a boto3 version 1.28.38 or greater."
                    "Please install it with `pip install -U boto3`."
                ) from e
            else:
                raise ValueError(
                    "Could not load credentials to authenticate with AWS client. "
                    "Please check that credentials in the specified "
                    "profile name are valid."
                ) from e
        # Set schema
        self.schema = ""
@ -143,6 +154,12 @@ class NeptuneRdfGraph:
    def get_schema_elements(self) -> Dict[str, Any]:
        return self.schema_elements
    def get_summary(self) -> Dict[str, Any]:
        """
        Obtain Neptune statistical summary of classes and predicates in the graph.
        """
        return self.client.get_rdf_graph_summary(mode="detailed")
    def query(
        self,
        query: str,
@ -197,12 +214,10 @@ class NeptuneRdfGraph:
        elem_str = {}
        for elem in ELEM_TYPES:
            res_list = []
-            for elem_rec in self.schema_elements[elem]:
+            for elem_rec in schema_elements[elem]:
                uri = elem_rec["uri"]
                local = elem_rec["local"]
                res_str = f"<{uri}> ({local})"
                if self.hide_comments is False:
                    res_str = res_str + f", {elem_rec['comment']}"
                res_list.append(res_str)
            elem_str[elem] = ", ".join(res_list)
@ -210,12 +225,12 @@ class NeptuneRdfGraph:
            "In the following, each IRI is followed by the local name and "
            "optionally its description in parentheses. \n"
            "The graph supports the following node types:\n"
-            f"{elem_str['classes']}"
+            f"{elem_str['classes']}\n"
            "The graph supports the following relationships:\n"
-            f"{elem_str['rels']}"
+            f"{elem_str['rels']}\n"
-            "The graph supports the following OWL object properties, "
+            "The graph supports the following OWL object properties:\n"
-            f"{elem_str['dtprops']}"
+            f"{elem_str['dtprops']}\n"
-            "The graph supports the following OWL data properties, "
+            "The graph supports the following OWL data properties:\n"
            f"{elem_str['oprops']}"
        )
@ -238,15 +253,40 @@ class NeptuneRdfGraph:
        """
        self.schema_elements["distinct_prefixes"] = {}
        # get summary and build list of classes and rels
        summary = self.get_summary()
        reslist = []
        for c in summary["payload"]["graphSummary"]["classes"]:
            uri = c
            tokens = self._get_local_name(uri)
            elem_record = {"uri": uri, "local": tokens[1]}
            reslist.append(elem_record)
            if tokens[0] not in self.schema_elements["distinct_prefixes"]:
                self.schema_elements["distinct_prefixes"][tokens[0]] = "y"
        self.schema_elements["classes"] = reslist
        reslist = []
        for r in summary["payload"]["graphSummary"]["predicates"]:
            for p in r:
                uri = p
                tokens = self._get_local_name(uri)
                elem_record = {"uri": uri, "local": tokens[1]}
                reslist.append(elem_record)
                if tokens[0] not in self.schema_elements["distinct_prefixes"]:
                    self.schema_elements["distinct_prefixes"][tokens[0]] = "y"
        self.schema_elements["rels"] = reslist
        # get dtprops and oprops too
        for elem in ELEM_TYPES:
-            items = self.query(ELEM_TYPES[elem])
+            q = ELEM_TYPES.get(elem)
            if not q:
                continue
            items = self.query(q)
            reslist = []
            for r in items["results"]["bindings"]:
                uri = r["elem"]["value"]
                tokens = self._get_local_name(uri)
                elem_record = {"uri": uri, "local": tokens[1]}
                if not self.hide_comments:
                    elem_record["comment"] = r["com"]["value"] if "com" in r else ""
                reslist.append(elem_record)
                if tokens[0] not in self.schema_elements["distinct_prefixes"]:
                    self.schema_elements["distinct_prefixes"][tokens[0]] = "y"