langchain[minor], community[minor]: Implement Ontotext GraphDB QA Chain (#16019)

- **Description:** Implement Ontotext GraphDB QA Chain - **Issue:** N/A - **Dependencies:** N/A - **Twitter handle:** @OntotextGraphDB
2025-09-16 23:13:31 +00:00 · 2024-01-29 22:25:53 +02:00
parent a08f9a7ff9
commit c95facc293
31 changed files with 2170 additions and 14 deletions
--- a/libs/community/langchain_community/graphs/init.py
+++ b/libs/community/langchain_community/graphs/init.py
@@ -9,6 +9,7 @@ from langchain_community.graphs.nebula_graph import NebulaGraph
 from langchain_community.graphs.neo4j_graph import Neo4jGraph
 from langchain_community.graphs.neptune_graph import NeptuneGraph
 from langchain_community.graphs.networkx_graph import NetworkxEntityGraph
+from langchain_community.graphs.ontotext_graphdb_graph import OntotextGraphDBGraph
 from langchain_community.graphs.rdf_graph import RdfGraph
 from langchain_community.graphs.tigergraph_graph import TigerGraph

@@ -24,4 +25,5 @@ __all__ = [
    "ArangoGraph",
    "FalkorDBGraph",
    "TigerGraph",
+    "OntotextGraphDBGraph",
 ]
--- a/libs/community/langchain_community/graphs/ontotext_graphdb_graph.py
+++ b/libs/community/langchain_community/graphs/ontotext_graphdb_graph.py
@@ -0,0 +1,213 @@
+from __future__ import annotations
+
+import os
+from typing import (
+    TYPE_CHECKING,
+    List,
+    Optional,
+    Union,
+)
+
+if TYPE_CHECKING:
+    import rdflib
+
+
+class OntotextGraphDBGraph:
+    """Ontotext GraphDB https://graphdb.ontotext.com/ wrapper for graph operations.
+
+    *Security note*: Make sure that the database connection uses credentials
+        that are narrowly-scoped to only include necessary permissions.
+        Failure to do so may result in data corruption or loss, since the calling
+        code may attempt commands that would result in deletion, mutation
+        of data if appropriately prompted or reading sensitive data if such
+        data is present in the database.
+        The best way to guard against such negative outcomes is to (as appropriate)
+        limit the permissions granted to the credentials used with this tool.
+
+        See https://python.langchain.com/docs/security for more information.
+    """
+
+    def __init__(
+        self,
+        query_endpoint: str,
+        query_ontology: Optional[str] = None,
+        local_file: Optional[str] = None,
+        local_file_format: Optional[str] = None,
+    ) -> None:
+        """
+        Set up the GraphDB wrapper
+
+        :param query_endpoint: SPARQL endpoint for queries, read access
+
+        If GraphDB is secured,
+        set the environment variables 'GRAPHDB_USERNAME' and 'GRAPHDB_PASSWORD'.
+
+        :param query_ontology: a `CONSTRUCT` query that is executed
+        on the SPARQL endpoint and returns the KG schema statements
+        Example:
+        'CONSTRUCT {?s ?p ?o} FROM <https://example.com/ontology/> WHERE {?s ?p ?o}'
+        Currently, DESCRIBE queries like
+        'PREFIX onto: <https://example.com/ontology/>
+        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+        DESCRIBE ?term WHERE {
+            ?term rdfs:isDefinedBy onto:
+        }'
+        are not supported, because DESCRIBE returns
+        the Symmetric Concise Bounded Description (SCBD),
+        i.e. also the incoming class links.
+        In case of large graphs with a million of instances, this is not efficient.
+        Check https://github.com/eclipse-rdf4j/rdf4j/issues/4857
+
+        :param local_file: a local RDF ontology file.
+        Supported RDF formats:
+        Turtle, RDF/XML, JSON-LD, N-Triples, Notation-3, Trig, Trix, N-Quads.
+        If the rdf format can't be determined from the file extension,
+        pass explicitly the rdf format in `local_file_format` param.
+
+        :param local_file_format: Used if the rdf format can't be determined
+        from the local file extension.
+        One of "json-ld", "xml", "n3", "turtle", "nt", "trig", "nquads", "trix"
+
+        Either `query_ontology` or `local_file` should be passed.
+        """
+
+        if query_ontology and local_file:
+            raise ValueError("Both file and query provided. Only one is allowed.")
+
+        if not query_ontology and not local_file:
+            raise ValueError("Neither file nor query provided. One is required.")
+
+        try:
+            import rdflib
+            from rdflib.plugins.stores import sparqlstore
+        except ImportError:
+            raise ValueError(
+                "Could not import rdflib python package. "
+                "Please install it with `pip install rdflib`."
+            )
+
+        auth = self._get_auth()
+        store = sparqlstore.SPARQLStore(auth=auth)
+        store.open(query_endpoint)
+
+        self.graph = rdflib.Graph(store, identifier=None, bind_namespaces="none")
+        self._check_connectivity()
+
+        if local_file:
+            ontology_schema_graph = self._load_ontology_schema_from_file(
+                local_file, local_file_format
+            )
+        else:
+            self._validate_user_query(query_ontology)
+            ontology_schema_graph = self._load_ontology_schema_with_query(
+                query_ontology
+            )
+        self.schema = ontology_schema_graph.serialize(format="turtle")
+
+    @staticmethod
+    def _get_auth() -> Union[tuple, None]:
+        """
+        Returns the basic authentication configuration
+        """
+        username = os.environ.get("GRAPHDB_USERNAME", None)
+        password = os.environ.get("GRAPHDB_PASSWORD", None)
+
+        if username:
+            if not password:
+                raise ValueError(
+                    "Environment variable 'GRAPHDB_USERNAME' is set, "
+                    "but 'GRAPHDB_PASSWORD' is not set."
+                )
+            else:
+                return username, password
+        return None
+
+    def _check_connectivity(self) -> None:
+        """
+        Executes a simple `ASK` query to check connectivity
+        """
+        try:
+            self.graph.query("ASK { ?s ?p ?o }")
+        except ValueError:
+            raise ValueError(
+                "Could not query the provided endpoint. "
+                "Please, check, if the value of the provided "
+                "query_endpoint points to the right repository. "
+                "If GraphDB is secured, please, "
+                "make sure that the environment variables "
+                "'GRAPHDB_USERNAME' and 'GRAPHDB_PASSWORD' are set."
+            )
+
+    @staticmethod
+    def _load_ontology_schema_from_file(local_file: str, local_file_format: str = None):
+        """
+        Parse the ontology schema statements from the provided file
+        """
+        import rdflib
+
+        if not os.path.exists(local_file):
+            raise FileNotFoundError(f"File {local_file} does not exist.")
+        if not os.access(local_file, os.R_OK):
+            raise PermissionError(f"Read permission for {local_file} is restricted")
+        graph = rdflib.ConjunctiveGraph()
+        try:
+            graph.parse(local_file, format=local_file_format)
+        except Exception as e:
+            raise ValueError(f"Invalid file format for {local_file} : ", e)
+        return graph
+
+    @staticmethod
+    def _validate_user_query(query_ontology: str) -> None:
+        """
+        Validate the query is a valid SPARQL CONSTRUCT query
+        """
+        from pyparsing import ParseException
+        from rdflib.plugins.sparql import prepareQuery
+
+        if not isinstance(query_ontology, str):
+            raise TypeError("Ontology query must be provided as string.")
+        try:
+            parsed_query = prepareQuery(query_ontology)
+        except ParseException as e:
+            raise ValueError("Ontology query is not a valid SPARQL query.", e)
+
+        if parsed_query.algebra.name != "ConstructQuery":
+            raise ValueError(
+                "Invalid query type. Only CONSTRUCT queries are supported."
+            )
+
+    def _load_ontology_schema_with_query(self, query: str):
+        """
+        Execute the query for collecting the ontology schema statements
+        """
+        from rdflib.exceptions import ParserError
+
+        try:
+            results = self.graph.query(query)
+        except ParserError as e:
+            raise ValueError(f"Generated SPARQL statement is invalid\n{e}")
+
+        return results.graph
+
+    @property
+    def get_schema(self) -> str:
+        """
+        Returns the schema of the graph database in turtle format
+        """
+        return self.schema
+
+    def query(
+        self,
+        query: str,
+    ) -> List[rdflib.query.ResultRow]:
+        """
+        Query the graph.
+        """
+        from rdflib.exceptions import ParserError
+        from rdflib.query import ResultRow
+
+        try:
+            res = self.graph.query(query)
+        except ParserError as e:
+            raise ValueError(f"Generated SPARQL statement is invalid\n{e}")
+        return [r for r in res if isinstance(r, ResultRow)]
--- a/libs/community/poetry.lock
+++ b/libs/community/poetry.lock
@@ -3433,7 +3433,6 @@ files = [
    {file = "jq-1.6.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:227b178b22a7f91ae88525810441791b1ca1fc71c86f03190911793be15cec3d"},
    {file = "jq-1.6.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:780eb6383fbae12afa819ef676fc93e1548ae4b076c004a393af26a04b460742"},
    {file = "jq-1.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:08ded6467f4ef89fec35b2bf310f210f8cd13fbd9d80e521500889edf8d22441"},
-    {file = "jq-1.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:49e44ed677713f4115bd5bf2dbae23baa4cd503be350e12a1c1f506b0687848f"},
    {file = "jq-1.6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:984f33862af285ad3e41e23179ac4795f1701822473e1a26bf87ff023e5a89ea"},
    {file = "jq-1.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f42264fafc6166efb5611b5d4cb01058887d050a6c19334f6a3f8a13bb369df5"},
    {file = "jq-1.6.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a67154f150aaf76cc1294032ed588436eb002097dd4fd1e283824bf753a05080"},
@@ -6223,6 +6222,7 @@ files = [
    {file = "pymongo-4.6.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b8729dbf25eb32ad0dc0b9bd5e6a0d0b7e5c2dc8ec06ad171088e1896b522a74"},
    {file = "pymongo-4.6.1-cp312-cp312-win32.whl", hash = "sha256:3177f783ae7e08aaf7b2802e0df4e4b13903520e8380915e6337cdc7a6ff01d8"},
    {file = "pymongo-4.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:00c199e1c593e2c8b033136d7a08f0c376452bac8a896c923fcd6f419e07bdd2"},
+    {file = "pymongo-4.6.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6dcc95f4bb9ed793714b43f4f23a7b0c57e4ef47414162297d6f650213512c19"},
    {file = "pymongo-4.6.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:13552ca505366df74e3e2f0a4f27c363928f3dff0eef9f281eb81af7f29bc3c5"},
    {file = "pymongo-4.6.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:77e0df59b1a4994ad30c6d746992ae887f9756a43fc25dec2db515d94cf0222d"},
    {file = "pymongo-4.6.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:3a7f02a58a0c2912734105e05dedbee4f7507e6f1bd132ebad520be0b11d46fd"},
@@ -7093,6 +7093,27 @@ PyYAML = "*"
 Shapely = ">=1.7.1"
 six = ">=1.15.0"

+[[package]]
+name = "rdflib"
+version = "7.0.0"
+description = "RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information."
+optional = true
+python-versions = ">=3.8.1,<4.0.0"
+files = [
+    {file = "rdflib-7.0.0-py3-none-any.whl", hash = "sha256:0438920912a642c866a513de6fe8a0001bd86ef975057d6962c79ce4771687cd"},
+    {file = "rdflib-7.0.0.tar.gz", hash = "sha256:9995eb8569428059b8c1affd26b25eac510d64f5043d9ce8c84e0d0036e995ae"},
+]
+
+[package.dependencies]
+isodate = ">=0.6.0,<0.7.0"
+pyparsing = ">=2.1.0,<4"
+
+[package.extras]
+berkeleydb = ["berkeleydb (>=18.1.0,<19.0.0)"]
+html = ["html5lib (>=1.0,<2.0)"]
+lxml = ["lxml (>=4.3.0,<5.0.0)"]
+networkx = ["networkx (>=2.0.0,<3.0.0)"]
+
 [[package]]
 name = "referencing"
 version = "0.31.1"
@@ -9226,9 +9247,9 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p

 [extras]
 cli = ["typer"]
-extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "azure-ai-documentintelligence", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "cohere", "dashvector", "databricks-vectorsearch", "datasets", "dgml-utils", "elasticsearch", "esprima", "faiss-cpu", "feedparser", "fireworks-ai", "geopandas", "gitpython", "google-cloud-documentai", "gql", "gradientai", "hdbcli", "hologres-vector", "html2text", "javelin-sdk", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "msal", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "oci", "openai", "openapi-pydantic", "oracle-ads", "pandas", "pdfminer-six", "pgvector", "praw", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "upstash-redis", "xata", "xmltodict", "zhipuai"]
+extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "azure-ai-documentintelligence", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "cohere", "dashvector", "databricks-vectorsearch", "datasets", "dgml-utils", "elasticsearch", "esprima", "faiss-cpu", "feedparser", "fireworks-ai", "geopandas", "gitpython", "google-cloud-documentai", "gql", "gradientai", "hdbcli", "hologres-vector", "html2text", "javelin-sdk", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "msal", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "oci", "openai", "openapi-pydantic", "oracle-ads", "pandas", "pdfminer-six", "pgvector", "praw", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "rdflib", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "upstash-redis", "xata", "xmltodict", "zhipuai"]

 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<4.0"
-content-hash = "064816bab088c1f6ff9902cb998291581b66a6d7762f965ff805b4e0b9b2e7e9"
+content-hash = "42d012441d7b42d273e11708b7e12308fc56b169d4d56c4c2511e7469743a983"
--- a/libs/community/pyproject.toml
+++ b/libs/community/pyproject.toml
@@ -90,6 +90,7 @@ zhipuai = {version = "^1.0.7", optional = true}
 elasticsearch = {version = "^8.12.0", optional = true}
 hdbcli = {version = "^2.19.21", optional = true}
 oci = {version = "^2.119.1", optional = true}
+rdflib = {version = "7.0.0", optional = true}

 [tool.poetry.group.test]
 optional = true
@@ -254,7 +255,8 @@ extended_testing = [
 "zhipuai",
 "elasticsearch",
 "hdbcli",
- "oci"
+ "oci",
+ "rdflib",
 ]

 [tool.ruff]
@@ -303,7 +305,7 @@ markers = [
 asyncio_mode = "auto"

 [tool.codespell]
-skip = '.git,*.pdf,*.svg,*.pdf,*.yaml,*.ipynb,poetry.lock,*.min.js,*.css,package-lock.json,example_data,_dist,examples'
+skip = '.git,*.pdf,*.svg,*.pdf,*.yaml,*.ipynb,poetry.lock,*.min.js,*.css,package-lock.json,example_data,_dist,examples,*.trig'
 # Ignore latin etc
 ignore-regex = '.*(Stati Uniti|Tense=Pres).*'
 # whats is a typo but used frequently in queries so kept as is
--- a/libs/community/tests/integration_tests/graphs/docker-compose-ontotext-graphdb/Dockerfile
+++ b/libs/community/tests/integration_tests/graphs/docker-compose-ontotext-graphdb/Dockerfile
@@ -0,0 +1,6 @@
+FROM ontotext/graphdb:10.5.1
+RUN mkdir -p /opt/graphdb/dist/data/repositories/langchain
+COPY config.ttl /opt/graphdb/dist/data/repositories/langchain/
+COPY starwars-data.trig /
+COPY graphdb_create.sh /run.sh
+ENTRYPOINT bash /run.sh
--- a/libs/community/tests/integration_tests/graphs/docker-compose-ontotext-graphdb/config.ttl
+++ b/libs/community/tests/integration_tests/graphs/docker-compose-ontotext-graphdb/config.ttl
@@ -0,0 +1,46 @@
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
+@prefix rep: <http://www.openrdf.org/config/repository#>.
+@prefix sr: <http://www.openrdf.org/config/repository/sail#>.
+@prefix sail: <http://www.openrdf.org/config/sail#>.
+@prefix graphdb: <http://www.ontotext.com/config/graphdb#>.
+
+[] a rep:Repository ;
+    rep:repositoryID "langchain" ;
+    rdfs:label "" ;
+    rep:repositoryImpl [
+        rep:repositoryType "graphdb:SailRepository" ;
+        sr:sailImpl [
+            sail:sailType "graphdb:Sail" ;
+
+            graphdb:read-only "false" ;
+
+            # Inference and Validation
+            graphdb:ruleset "empty" ;
+            graphdb:disable-sameAs "true" ;
+            graphdb:check-for-inconsistencies "false" ;
+
+            # Indexing
+            graphdb:entity-id-size "32" ;
+            graphdb:enable-context-index "false" ;
+            graphdb:enablePredicateList "true" ;
+            graphdb:enable-fts-index "false" ;
+            graphdb:fts-indexes ("default" "iri") ;
+            graphdb:fts-string-literals-index "default" ;
+            graphdb:fts-iris-index "none" ;
+
+            # Queries and Updates
+            graphdb:query-timeout "0" ;
+            graphdb:throw-QueryEvaluationException-on-timeout "false" ;
+            graphdb:query-limit-results "0" ;
+
+            # Settable in the file but otherwise hidden in the UI and in the RDF4J console
+            graphdb:base-URL "http://example.org/owlim#" ;
+            graphdb:defaultNS "" ;
+            graphdb:imports "" ;
+            graphdb:repository-type "file-repository" ;
+            graphdb:storage-folder "storage" ;
+            graphdb:entity-index-size "10000000" ;
+            graphdb:in-memory-literal-properties "true" ;
+            graphdb:enable-literal-index "true" ;
+        ]
+    ].
--- a/libs/community/tests/integration_tests/graphs/docker-compose-ontotext-graphdb/docker-compose.yaml
+++ b/libs/community/tests/integration_tests/graphs/docker-compose-ontotext-graphdb/docker-compose.yaml
@@ -0,0 +1,9 @@
+version: '3.7'
+
+services:
+
+  graphdb:
+    image: graphdb
+    container_name: graphdb
+    ports:
+      - "7200:7200"
--- a/libs/community/tests/integration_tests/graphs/docker-compose-ontotext-graphdb/graphdb_create.sh
+++ b/libs/community/tests/integration_tests/graphs/docker-compose-ontotext-graphdb/graphdb_create.sh
@@ -0,0 +1,33 @@
+#! /bin/bash
+REPOSITORY_ID="langchain"
+GRAPHDB_URI="http://localhost:7200/"
+
+echo -e "\nUsing GraphDB: ${GRAPHDB_URI}"
+
+function startGraphDB {
+ echo -e "\nStarting GraphDB..."
+ exec /opt/graphdb/dist/bin/graphdb
+}
+
+function waitGraphDBStart {
+  echo -e "\nWaiting GraphDB to start..."
+  for _ in $(seq 1 5); do
+    CHECK_RES=$(curl --silent --write-out '%{http_code}' --output /dev/null ${GRAPHDB_URI}/rest/repositories)
+    if [ "${CHECK_RES}" = '200' ]; then
+        echo -e "\nUp and running"
+        break
+    fi
+    sleep 30s
+    echo "CHECK_RES: ${CHECK_RES}"
+  done
+}
+
+function loadData {
+  echo -e "\nImporting starwars-data.trig"
+  curl -X POST -H "Content-Type: application/x-trig" -T /starwars-data.trig ${GRAPHDB_URI}/repositories/${REPOSITORY_ID}/statements
+}
+
+startGraphDB &
+waitGraphDBStart
+loadData
+wait
--- a/libs/community/tests/integration_tests/graphs/docker-compose-ontotext-graphdb/start.sh
+++ b/libs/community/tests/integration_tests/graphs/docker-compose-ontotext-graphdb/start.sh
@@ -0,0 +1,5 @@
+set -ex
+
+docker compose down -v --remove-orphans
+docker build --tag graphdb .
+docker compose up -d graphdb
--- a/libs/community/tests/integration_tests/graphs/docker-compose-ontotext-graphdb/starwars-data.trig
+++ b/libs/community/tests/integration_tests/graphs/docker-compose-ontotext-graphdb/starwars-data.trig
@@ -0,0 +1,43 @@
+@base <https://swapi.co/resource/>.
+@prefix voc:  <https://swapi.co/vocabulary/> .
+@prefix owl:  <http://www.w3.org/2002/07/owl#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+
+{
+
+<besalisk/71>
+        a              voc:Besalisk , voc:Character ;
+        rdfs:label     "Dexter Jettster" ;
+        voc:eyeColor   "yellow" ;
+        voc:gender     "male" ;
+        voc:height     198.0 ;
+        voc:mass       102.0 ;
+        voc:skinColor  "brown" .
+
+}
+
+<https://swapi.co/ontology/> {
+
+  voc:Character a owl:Class .
+  voc:Species a owl:Class .
+
+  voc:Besalisk a voc:Species;
+    rdfs:label "Besalisk";
+    voc:averageHeight 178.0;
+    voc:averageLifespan "75";
+    voc:character <https://swapi.co/resource/besalisk/71>;
+    voc:language "besalisk";
+    voc:skinColor "brown";
+    voc:eyeColor "yellow" .
+
+  voc:averageHeight a owl:DatatypeProperty .
+  voc:averageLifespan a owl:DatatypeProperty .
+  voc:character a owl:ObjectProperty .
+  voc:language a owl:DatatypeProperty .
+  voc:skinColor a owl:DatatypeProperty .
+  voc:eyeColor a owl:DatatypeProperty .
+  voc:gender a owl:DatatypeProperty .
+  voc:height a owl:DatatypeProperty .
+  voc:mass a owl:DatatypeProperty .
+
+}
--- a/libs/community/tests/integration_tests/graphs/test_ontotext_graphdb_graph.py
+++ b/libs/community/tests/integration_tests/graphs/test_ontotext_graphdb_graph.py
@@ -0,0 +1,181 @@
+from pathlib import Path
+
+import pytest
+
+from langchain_community.graphs import OntotextGraphDBGraph
+
+"""
+cd libs/community/tests/integration_tests/graphs/docker-compose-ontotext-graphdb
+./start.sh
+"""
+
+
+def test_query() -> None:
+    graph = OntotextGraphDBGraph(
+        query_endpoint="http://localhost:7200/repositories/langchain",
+        query_ontology="CONSTRUCT {?s ?p ?o}"
+        "FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}",
+    )
+
+    query_results = graph.query(
+        "PREFIX voc: <https://swapi.co/vocabulary/> "
+        "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+        "SELECT ?eyeColor "
+        "WHERE {"
+        '  ?besalisk rdfs:label "Dexter Jettster" ; '
+        "    voc:eyeColor ?eyeColor ."
+        "}"
+    )
+    assert len(query_results) == 1
+    assert len(query_results[0]) == 1
+    assert str(query_results[0][0]) == "yellow"
+
+
+def test_get_schema_with_query() -> None:
+    graph = OntotextGraphDBGraph(
+        query_endpoint="http://localhost:7200/repositories/langchain",
+        query_ontology="CONSTRUCT {?s ?p ?o}"
+        "FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}",
+    )
+
+    from rdflib import Graph
+
+    assert len(Graph().parse(data=graph.get_schema, format="turtle")) == 19
+
+
+@pytest.mark.parametrize(
+    "rdf_format, file_extension",
+    [
+        ("json-ld", "json"),
+        ("json-ld", "jsonld"),
+        ("json-ld", "json-ld"),
+        ("xml", "rdf"),
+        ("xml", "xml"),
+        ("xml", "owl"),
+        ("pretty-xml", "xml"),
+        ("n3", "n3"),
+        ("turtle", "ttl"),
+        ("nt", "nt"),
+        ("trig", "trig"),
+        ("nquads", "nq"),
+        ("nquads", "nquads"),
+        ("trix", "trix"),
+    ],
+)
+def test_get_schema_from_file(
+    tmp_path: Path, rdf_format: str, file_extension: str
+) -> None:
+    expected_number_of_ontology_statements = 19
+
+    graph = OntotextGraphDBGraph(
+        query_endpoint="http://localhost:7200/repositories/langchain",
+        query_ontology="CONSTRUCT {?s ?p ?o}"
+        "FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}",
+    )
+
+    from rdflib import ConjunctiveGraph, Graph
+
+    assert (
+        len(Graph().parse(data=graph.get_schema, format="turtle"))
+        == expected_number_of_ontology_statements
+    )
+
+    # serialize the ontology schema loaded with the query in a local file
+    # in various rdf formats and check that this results
+    # in the same number of statements
+    conjunctive_graph = ConjunctiveGraph()
+    ontology_context = conjunctive_graph.get_context("https://swapi.co/ontology/")
+    ontology_context.parse(data=graph.get_schema, format="turtle")
+
+    assert len(ontology_context) == expected_number_of_ontology_statements
+    assert len(conjunctive_graph) == expected_number_of_ontology_statements
+
+    local_file = tmp_path / ("starwars-ontology." + file_extension)
+    conjunctive_graph.serialize(local_file, format=rdf_format)
+
+    graph = OntotextGraphDBGraph(
+        query_endpoint="http://localhost:7200/repositories/langchain",
+        local_file=str(local_file),
+    )
+    assert (
+        len(Graph().parse(data=graph.get_schema, format="turtle"))
+        == expected_number_of_ontology_statements
+    )
+
+
+@pytest.mark.parametrize(
+    "rdf_format", ["json-ld", "xml", "n3", "turtle", "nt", "trig", "nquads", "trix"]
+)
+def test_get_schema_from_file_with_explicit_rdf_format(
+    tmp_path: Path, rdf_format: str
+) -> None:
+    expected_number_of_ontology_statements = 19
+
+    graph = OntotextGraphDBGraph(
+        query_endpoint="http://localhost:7200/repositories/langchain",
+        query_ontology="CONSTRUCT {?s ?p ?o}"
+        "FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}",
+    )
+
+    from rdflib import ConjunctiveGraph, Graph
+
+    assert (
+        len(Graph().parse(data=graph.get_schema, format="turtle"))
+        == expected_number_of_ontology_statements
+    )
+
+    # serialize the ontology schema loaded with the query in a local file
+    # in various rdf formats and check that this results
+    # in the same number of statements
+    conjunctive_graph = ConjunctiveGraph()
+    ontology_context = conjunctive_graph.get_context("https://swapi.co/ontology/")
+    ontology_context.parse(data=graph.get_schema, format="turtle")
+
+    assert len(ontology_context) == expected_number_of_ontology_statements
+    assert len(conjunctive_graph) == expected_number_of_ontology_statements
+
+    local_file = tmp_path / "starwars-ontology.txt"
+    conjunctive_graph.serialize(local_file, format=rdf_format)
+
+    graph = OntotextGraphDBGraph(
+        query_endpoint="http://localhost:7200/repositories/langchain",
+        local_file=str(local_file),
+        local_file_format=rdf_format,
+    )
+    assert (
+        len(Graph().parse(data=graph.get_schema, format="turtle"))
+        == expected_number_of_ontology_statements
+    )
+
+
+def test_get_schema_from_file_with_wrong_extension(tmp_path: Path) -> None:
+    expected_number_of_ontology_statements = 19
+
+    graph = OntotextGraphDBGraph(
+        query_endpoint="http://localhost:7200/repositories/langchain",
+        query_ontology="CONSTRUCT {?s ?p ?o}"
+        "FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}",
+    )
+
+    from rdflib import ConjunctiveGraph, Graph
+
+    assert (
+        len(Graph().parse(data=graph.get_schema, format="turtle"))
+        == expected_number_of_ontology_statements
+    )
+
+    conjunctive_graph = ConjunctiveGraph()
+    ontology_context = conjunctive_graph.get_context("https://swapi.co/ontology/")
+    ontology_context.parse(data=graph.get_schema, format="turtle")
+
+    assert len(ontology_context) == expected_number_of_ontology_statements
+    assert len(conjunctive_graph) == expected_number_of_ontology_statements
+
+    local_file = tmp_path / "starwars-ontology.trig"
+    conjunctive_graph.serialize(local_file, format="nquads")
+
+    with pytest.raises(ValueError):
+        OntotextGraphDBGraph(
+            query_endpoint="http://localhost:7200/repositories/langchain",
+            local_file=str(local_file),
+        )
--- a/libs/community/tests/unit_tests/graphs/test_imports.py
+++ b/libs/community/tests/unit_tests/graphs/test_imports.py
@@ -12,6 +12,7 @@ EXPECTED_ALL = [
    "ArangoGraph",
    "FalkorDBGraph",
    "TigerGraph",
+    "OntotextGraphDBGraph",
 ]


--- a/libs/community/tests/unit_tests/graphs/test_ontotext_graphdb_graph.py
+++ b/libs/community/tests/unit_tests/graphs/test_ontotext_graphdb_graph.py
@@ -0,0 +1,176 @@
+import os
+import tempfile
+import unittest
+
+import pytest
+
+
+class TestOntotextGraphDBGraph(unittest.TestCase):
+    def test_import(self) -> None:
+        from langchain_community.graphs import OntotextGraphDBGraph  # noqa: F401
+
+    @pytest.mark.requires("rdflib")
+    def test_validate_user_query_wrong_type(self) -> None:
+        from langchain_community.graphs import OntotextGraphDBGraph
+
+        with self.assertRaises(TypeError) as e:
+            OntotextGraphDBGraph._validate_user_query(
+                [
+                    "PREFIX starwars: <https://swapi.co/ontology/> "
+                    "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+                    "DESCRIBE starwars: ?term "
+                    "WHERE {?term rdfs:isDefinedBy starwars: }"
+                ]
+            )
+        self.assertEqual("Ontology query must be provided as string.", str(e.exception))
+
+    @pytest.mark.requires("rdflib")
+    def test_validate_user_query_invalid_sparql_syntax(self) -> None:
+        from langchain_community.graphs import OntotextGraphDBGraph
+
+        with self.assertRaises(ValueError) as e:
+            OntotextGraphDBGraph._validate_user_query(
+                "CONSTRUCT {?s ?p ?o} FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o"
+            )
+        self.assertEqual(
+            "('Ontology query is not a valid SPARQL query.', "
+            "Expected ConstructQuery, "
+            "found end of text  (at char 70), (line:1, col:71))",
+            str(e.exception),
+        )
+
+    @pytest.mark.requires("rdflib")
+    def test_validate_user_query_invalid_query_type_select(self) -> None:
+        from langchain_community.graphs import OntotextGraphDBGraph
+
+        with self.assertRaises(ValueError) as e:
+            OntotextGraphDBGraph._validate_user_query("SELECT * { ?s ?p ?o }")
+        self.assertEqual(
+            "Invalid query type. Only CONSTRUCT queries are supported.",
+            str(e.exception),
+        )
+
+    @pytest.mark.requires("rdflib")
+    def test_validate_user_query_invalid_query_type_ask(self) -> None:
+        from langchain_community.graphs import OntotextGraphDBGraph
+
+        with self.assertRaises(ValueError) as e:
+            OntotextGraphDBGraph._validate_user_query("ASK { ?s ?p ?o }")
+        self.assertEqual(
+            "Invalid query type. Only CONSTRUCT queries are supported.",
+            str(e.exception),
+        )
+
+    @pytest.mark.requires("rdflib")
+    def test_validate_user_query_invalid_query_type_describe(self) -> None:
+        from langchain_community.graphs import OntotextGraphDBGraph
+
+        with self.assertRaises(ValueError) as e:
+            OntotextGraphDBGraph._validate_user_query(
+                "PREFIX swapi: <https://swapi.co/ontology/> "
+                "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+                "DESCRIBE ?term WHERE { ?term rdfs:isDefinedBy swapi: }"
+            )
+        self.assertEqual(
+            "Invalid query type. Only CONSTRUCT queries are supported.",
+            str(e.exception),
+        )
+
+    @pytest.mark.requires("rdflib")
+    def test_validate_user_query_construct(self) -> None:
+        from langchain_community.graphs import OntotextGraphDBGraph
+
+        OntotextGraphDBGraph._validate_user_query(
+            "CONSTRUCT {?s ?p ?o} FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}"
+        )
+
+    @pytest.mark.requires("rdflib")
+    def test_check_connectivity(self) -> None:
+        from langchain_community.graphs import OntotextGraphDBGraph
+
+        with self.assertRaises(ValueError) as e:
+            OntotextGraphDBGraph(
+                query_endpoint="http://localhost:7200/repositories/non-existing-repository",
+                query_ontology="PREFIX swapi: <https://swapi.co/ontology/> "
+                "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+                "DESCRIBE ?term WHERE {?term rdfs:isDefinedBy swapi: }",
+            )
+        self.assertEqual(
+            "Could not query the provided endpoint. "
+            "Please, check, if the value of the provided "
+            "query_endpoint points to the right repository. "
+            "If GraphDB is secured, please, make sure that the environment variables "
+            "'GRAPHDB_USERNAME' and 'GRAPHDB_PASSWORD' are set.",
+            str(e.exception),
+        )
+
+    @pytest.mark.requires("rdflib")
+    def test_local_file_does_not_exist(self) -> None:
+        from langchain_community.graphs import OntotextGraphDBGraph
+
+        non_existing_file = os.path.join("non", "existing", "path", "to", "file.ttl")
+        with self.assertRaises(FileNotFoundError) as e:
+            OntotextGraphDBGraph._load_ontology_schema_from_file(non_existing_file)
+        self.assertEqual(f"File {non_existing_file} does not exist.", str(e.exception))
+
+    @pytest.mark.requires("rdflib")
+    def test_local_file_no_access(self) -> None:
+        from langchain_community.graphs import OntotextGraphDBGraph
+
+        with tempfile.NamedTemporaryFile() as tmp_file:
+            tmp_file_name = tmp_file.name
+
+            # Set file permissions to write and execute only
+            os.chmod(tmp_file_name, 0o300)
+
+            with self.assertRaises(PermissionError) as e:
+                OntotextGraphDBGraph._load_ontology_schema_from_file(tmp_file_name)
+
+            self.assertEqual(
+                f"Read permission for {tmp_file_name} is restricted", str(e.exception)
+            )
+
+    @pytest.mark.requires("rdflib")
+    def test_local_file_bad_syntax(self) -> None:
+        from langchain_community.graphs import OntotextGraphDBGraph
+
+        with tempfile.TemporaryDirectory() as tempdir:
+            tmp_file_path = os.path.join(tempdir, "starwars-ontology.trig")
+            with open(tmp_file_path, "w") as tmp_file:
+                tmp_file.write("invalid trig")
+
+            with self.assertRaises(ValueError) as e:
+                OntotextGraphDBGraph._load_ontology_schema_from_file(tmp_file_path)
+            self.assertEqual(
+                f"('Invalid file format for {tmp_file_path} : '"
+                ", BadSyntax('', 0, 'invalid trig', 0, "
+                "'expected directive or statement'))",
+                str(e.exception),
+            )
+
+    @pytest.mark.requires("rdflib")
+    def test_both_query_and_local_file_provided(self) -> None:
+        from langchain_community.graphs import OntotextGraphDBGraph
+
+        with self.assertRaises(ValueError) as e:
+            OntotextGraphDBGraph(
+                query_endpoint="http://localhost:7200/repositories/non-existing-repository",
+                query_ontology="CONSTRUCT {?s ?p ?o}"
+                "FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}",
+                local_file="starwars-ontology-wrong.trig",
+            )
+        self.assertEqual(
+            "Both file and query provided. Only one is allowed.", str(e.exception)
+        )
+
+    @pytest.mark.requires("rdflib")
+    def test_nor_query_nor_local_file_provided(self) -> None:
+        from langchain_community.graphs import OntotextGraphDBGraph
+
+        with self.assertRaises(ValueError) as e:
+            OntotextGraphDBGraph(
+                query_endpoint="http://localhost:7200/repositories/non-existing-repository",
+            )
+        self.assertEqual(
+            "Neither file nor query provided. One is required.", str(e.exception)
+        )