langchain[minor], community[minor]: Implement Ontotext GraphDB QA Chain (#16019)

- **Description:** Implement Ontotext GraphDB QA Chain
  - **Issue:** N/A
  - **Dependencies:** N/A
  - **Twitter handle:** @OntotextGraphDB
This commit is contained in:
Neli Hateva
2024-01-29 22:25:53 +02:00
committed by GitHub
parent a08f9a7ff9
commit c95facc293
31 changed files with 2170 additions and 14 deletions

View File

@@ -9,6 +9,7 @@ from langchain_community.graphs.nebula_graph import NebulaGraph
from langchain_community.graphs.neo4j_graph import Neo4jGraph
from langchain_community.graphs.neptune_graph import NeptuneGraph
from langchain_community.graphs.networkx_graph import NetworkxEntityGraph
from langchain_community.graphs.ontotext_graphdb_graph import OntotextGraphDBGraph
from langchain_community.graphs.rdf_graph import RdfGraph
from langchain_community.graphs.tigergraph_graph import TigerGraph
@@ -24,4 +25,5 @@ __all__ = [
"ArangoGraph",
"FalkorDBGraph",
"TigerGraph",
"OntotextGraphDBGraph",
]

View File

@@ -0,0 +1,213 @@
from __future__ import annotations
import os
from typing import (
TYPE_CHECKING,
List,
Optional,
Union,
)
if TYPE_CHECKING:
import rdflib
class OntotextGraphDBGraph:
"""Ontotext GraphDB https://graphdb.ontotext.com/ wrapper for graph operations.
*Security note*: Make sure that the database connection uses credentials
that are narrowly-scoped to only include necessary permissions.
Failure to do so may result in data corruption or loss, since the calling
code may attempt commands that would result in deletion, mutation
of data if appropriately prompted or reading sensitive data if such
data is present in the database.
The best way to guard against such negative outcomes is to (as appropriate)
limit the permissions granted to the credentials used with this tool.
See https://python.langchain.com/docs/security for more information.
"""
def __init__(
self,
query_endpoint: str,
query_ontology: Optional[str] = None,
local_file: Optional[str] = None,
local_file_format: Optional[str] = None,
) -> None:
"""
Set up the GraphDB wrapper
:param query_endpoint: SPARQL endpoint for queries, read access
If GraphDB is secured,
set the environment variables 'GRAPHDB_USERNAME' and 'GRAPHDB_PASSWORD'.
:param query_ontology: a `CONSTRUCT` query that is executed
on the SPARQL endpoint and returns the KG schema statements
Example:
'CONSTRUCT {?s ?p ?o} FROM <https://example.com/ontology/> WHERE {?s ?p ?o}'
Currently, DESCRIBE queries like
'PREFIX onto: <https://example.com/ontology/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
DESCRIBE ?term WHERE {
?term rdfs:isDefinedBy onto:
}'
are not supported, because DESCRIBE returns
the Symmetric Concise Bounded Description (SCBD),
i.e. also the incoming class links.
In case of large graphs with a million of instances, this is not efficient.
Check https://github.com/eclipse-rdf4j/rdf4j/issues/4857
:param local_file: a local RDF ontology file.
Supported RDF formats:
Turtle, RDF/XML, JSON-LD, N-Triples, Notation-3, Trig, Trix, N-Quads.
If the rdf format can't be determined from the file extension,
pass explicitly the rdf format in `local_file_format` param.
:param local_file_format: Used if the rdf format can't be determined
from the local file extension.
One of "json-ld", "xml", "n3", "turtle", "nt", "trig", "nquads", "trix"
Either `query_ontology` or `local_file` should be passed.
"""
if query_ontology and local_file:
raise ValueError("Both file and query provided. Only one is allowed.")
if not query_ontology and not local_file:
raise ValueError("Neither file nor query provided. One is required.")
try:
import rdflib
from rdflib.plugins.stores import sparqlstore
except ImportError:
raise ValueError(
"Could not import rdflib python package. "
"Please install it with `pip install rdflib`."
)
auth = self._get_auth()
store = sparqlstore.SPARQLStore(auth=auth)
store.open(query_endpoint)
self.graph = rdflib.Graph(store, identifier=None, bind_namespaces="none")
self._check_connectivity()
if local_file:
ontology_schema_graph = self._load_ontology_schema_from_file(
local_file, local_file_format
)
else:
self._validate_user_query(query_ontology)
ontology_schema_graph = self._load_ontology_schema_with_query(
query_ontology
)
self.schema = ontology_schema_graph.serialize(format="turtle")
@staticmethod
def _get_auth() -> Union[tuple, None]:
"""
Returns the basic authentication configuration
"""
username = os.environ.get("GRAPHDB_USERNAME", None)
password = os.environ.get("GRAPHDB_PASSWORD", None)
if username:
if not password:
raise ValueError(
"Environment variable 'GRAPHDB_USERNAME' is set, "
"but 'GRAPHDB_PASSWORD' is not set."
)
else:
return username, password
return None
def _check_connectivity(self) -> None:
"""
Executes a simple `ASK` query to check connectivity
"""
try:
self.graph.query("ASK { ?s ?p ?o }")
except ValueError:
raise ValueError(
"Could not query the provided endpoint. "
"Please, check, if the value of the provided "
"query_endpoint points to the right repository. "
"If GraphDB is secured, please, "
"make sure that the environment variables "
"'GRAPHDB_USERNAME' and 'GRAPHDB_PASSWORD' are set."
)
@staticmethod
def _load_ontology_schema_from_file(local_file: str, local_file_format: str = None):
"""
Parse the ontology schema statements from the provided file
"""
import rdflib
if not os.path.exists(local_file):
raise FileNotFoundError(f"File {local_file} does not exist.")
if not os.access(local_file, os.R_OK):
raise PermissionError(f"Read permission for {local_file} is restricted")
graph = rdflib.ConjunctiveGraph()
try:
graph.parse(local_file, format=local_file_format)
except Exception as e:
raise ValueError(f"Invalid file format for {local_file} : ", e)
return graph
@staticmethod
def _validate_user_query(query_ontology: str) -> None:
"""
Validate the query is a valid SPARQL CONSTRUCT query
"""
from pyparsing import ParseException
from rdflib.plugins.sparql import prepareQuery
if not isinstance(query_ontology, str):
raise TypeError("Ontology query must be provided as string.")
try:
parsed_query = prepareQuery(query_ontology)
except ParseException as e:
raise ValueError("Ontology query is not a valid SPARQL query.", e)
if parsed_query.algebra.name != "ConstructQuery":
raise ValueError(
"Invalid query type. Only CONSTRUCT queries are supported."
)
def _load_ontology_schema_with_query(self, query: str):
"""
Execute the query for collecting the ontology schema statements
"""
from rdflib.exceptions import ParserError
try:
results = self.graph.query(query)
except ParserError as e:
raise ValueError(f"Generated SPARQL statement is invalid\n{e}")
return results.graph
@property
def get_schema(self) -> str:
"""
Returns the schema of the graph database in turtle format
"""
return self.schema
def query(
self,
query: str,
) -> List[rdflib.query.ResultRow]:
"""
Query the graph.
"""
from rdflib.exceptions import ParserError
from rdflib.query import ResultRow
try:
res = self.graph.query(query)
except ParserError as e:
raise ValueError(f"Generated SPARQL statement is invalid\n{e}")
return [r for r in res if isinstance(r, ResultRow)]

View File

@@ -3433,7 +3433,6 @@ files = [
{file = "jq-1.6.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:227b178b22a7f91ae88525810441791b1ca1fc71c86f03190911793be15cec3d"},
{file = "jq-1.6.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:780eb6383fbae12afa819ef676fc93e1548ae4b076c004a393af26a04b460742"},
{file = "jq-1.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:08ded6467f4ef89fec35b2bf310f210f8cd13fbd9d80e521500889edf8d22441"},
{file = "jq-1.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:49e44ed677713f4115bd5bf2dbae23baa4cd503be350e12a1c1f506b0687848f"},
{file = "jq-1.6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:984f33862af285ad3e41e23179ac4795f1701822473e1a26bf87ff023e5a89ea"},
{file = "jq-1.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f42264fafc6166efb5611b5d4cb01058887d050a6c19334f6a3f8a13bb369df5"},
{file = "jq-1.6.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a67154f150aaf76cc1294032ed588436eb002097dd4fd1e283824bf753a05080"},
@@ -6223,6 +6222,7 @@ files = [
{file = "pymongo-4.6.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b8729dbf25eb32ad0dc0b9bd5e6a0d0b7e5c2dc8ec06ad171088e1896b522a74"},
{file = "pymongo-4.6.1-cp312-cp312-win32.whl", hash = "sha256:3177f783ae7e08aaf7b2802e0df4e4b13903520e8380915e6337cdc7a6ff01d8"},
{file = "pymongo-4.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:00c199e1c593e2c8b033136d7a08f0c376452bac8a896c923fcd6f419e07bdd2"},
{file = "pymongo-4.6.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6dcc95f4bb9ed793714b43f4f23a7b0c57e4ef47414162297d6f650213512c19"},
{file = "pymongo-4.6.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:13552ca505366df74e3e2f0a4f27c363928f3dff0eef9f281eb81af7f29bc3c5"},
{file = "pymongo-4.6.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:77e0df59b1a4994ad30c6d746992ae887f9756a43fc25dec2db515d94cf0222d"},
{file = "pymongo-4.6.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:3a7f02a58a0c2912734105e05dedbee4f7507e6f1bd132ebad520be0b11d46fd"},
@@ -7093,6 +7093,27 @@ PyYAML = "*"
Shapely = ">=1.7.1"
six = ">=1.15.0"
[[package]]
name = "rdflib"
version = "7.0.0"
description = "RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information."
optional = true
python-versions = ">=3.8.1,<4.0.0"
files = [
{file = "rdflib-7.0.0-py3-none-any.whl", hash = "sha256:0438920912a642c866a513de6fe8a0001bd86ef975057d6962c79ce4771687cd"},
{file = "rdflib-7.0.0.tar.gz", hash = "sha256:9995eb8569428059b8c1affd26b25eac510d64f5043d9ce8c84e0d0036e995ae"},
]
[package.dependencies]
isodate = ">=0.6.0,<0.7.0"
pyparsing = ">=2.1.0,<4"
[package.extras]
berkeleydb = ["berkeleydb (>=18.1.0,<19.0.0)"]
html = ["html5lib (>=1.0,<2.0)"]
lxml = ["lxml (>=4.3.0,<5.0.0)"]
networkx = ["networkx (>=2.0.0,<3.0.0)"]
[[package]]
name = "referencing"
version = "0.31.1"
@@ -9226,9 +9247,9 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
[extras]
cli = ["typer"]
extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "azure-ai-documentintelligence", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "cohere", "dashvector", "databricks-vectorsearch", "datasets", "dgml-utils", "elasticsearch", "esprima", "faiss-cpu", "feedparser", "fireworks-ai", "geopandas", "gitpython", "google-cloud-documentai", "gql", "gradientai", "hdbcli", "hologres-vector", "html2text", "javelin-sdk", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "msal", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "oci", "openai", "openapi-pydantic", "oracle-ads", "pandas", "pdfminer-six", "pgvector", "praw", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "upstash-redis", "xata", "xmltodict", "zhipuai"]
extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "azure-ai-documentintelligence", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "cohere", "dashvector", "databricks-vectorsearch", "datasets", "dgml-utils", "elasticsearch", "esprima", "faiss-cpu", "feedparser", "fireworks-ai", "geopandas", "gitpython", "google-cloud-documentai", "gql", "gradientai", "hdbcli", "hologres-vector", "html2text", "javelin-sdk", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "msal", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "oci", "openai", "openapi-pydantic", "oracle-ads", "pandas", "pdfminer-six", "pgvector", "praw", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "rdflib", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "upstash-redis", "xata", "xmltodict", "zhipuai"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.8.1,<4.0"
content-hash = "064816bab088c1f6ff9902cb998291581b66a6d7762f965ff805b4e0b9b2e7e9"
content-hash = "42d012441d7b42d273e11708b7e12308fc56b169d4d56c4c2511e7469743a983"

View File

@@ -90,6 +90,7 @@ zhipuai = {version = "^1.0.7", optional = true}
elasticsearch = {version = "^8.12.0", optional = true}
hdbcli = {version = "^2.19.21", optional = true}
oci = {version = "^2.119.1", optional = true}
rdflib = {version = "7.0.0", optional = true}
[tool.poetry.group.test]
optional = true
@@ -254,7 +255,8 @@ extended_testing = [
"zhipuai",
"elasticsearch",
"hdbcli",
"oci"
"oci",
"rdflib",
]
[tool.ruff]
@@ -303,7 +305,7 @@ markers = [
asyncio_mode = "auto"
[tool.codespell]
skip = '.git,*.pdf,*.svg,*.pdf,*.yaml,*.ipynb,poetry.lock,*.min.js,*.css,package-lock.json,example_data,_dist,examples'
skip = '.git,*.pdf,*.svg,*.pdf,*.yaml,*.ipynb,poetry.lock,*.min.js,*.css,package-lock.json,example_data,_dist,examples,*.trig'
# Ignore latin etc
ignore-regex = '.*(Stati Uniti|Tense=Pres).*'
# whats is a typo but used frequently in queries so kept as is

View File

@@ -0,0 +1,6 @@
FROM ontotext/graphdb:10.5.1
RUN mkdir -p /opt/graphdb/dist/data/repositories/langchain
COPY config.ttl /opt/graphdb/dist/data/repositories/langchain/
COPY starwars-data.trig /
COPY graphdb_create.sh /run.sh
ENTRYPOINT bash /run.sh

View File

@@ -0,0 +1,46 @@
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
@prefix rep: <http://www.openrdf.org/config/repository#>.
@prefix sr: <http://www.openrdf.org/config/repository/sail#>.
@prefix sail: <http://www.openrdf.org/config/sail#>.
@prefix graphdb: <http://www.ontotext.com/config/graphdb#>.
[] a rep:Repository ;
rep:repositoryID "langchain" ;
rdfs:label "" ;
rep:repositoryImpl [
rep:repositoryType "graphdb:SailRepository" ;
sr:sailImpl [
sail:sailType "graphdb:Sail" ;
graphdb:read-only "false" ;
# Inference and Validation
graphdb:ruleset "empty" ;
graphdb:disable-sameAs "true" ;
graphdb:check-for-inconsistencies "false" ;
# Indexing
graphdb:entity-id-size "32" ;
graphdb:enable-context-index "false" ;
graphdb:enablePredicateList "true" ;
graphdb:enable-fts-index "false" ;
graphdb:fts-indexes ("default" "iri") ;
graphdb:fts-string-literals-index "default" ;
graphdb:fts-iris-index "none" ;
# Queries and Updates
graphdb:query-timeout "0" ;
graphdb:throw-QueryEvaluationException-on-timeout "false" ;
graphdb:query-limit-results "0" ;
# Settable in the file but otherwise hidden in the UI and in the RDF4J console
graphdb:base-URL "http://example.org/owlim#" ;
graphdb:defaultNS "" ;
graphdb:imports "" ;
graphdb:repository-type "file-repository" ;
graphdb:storage-folder "storage" ;
graphdb:entity-index-size "10000000" ;
graphdb:in-memory-literal-properties "true" ;
graphdb:enable-literal-index "true" ;
]
].

View File

@@ -0,0 +1,9 @@
version: '3.7'
services:
graphdb:
image: graphdb
container_name: graphdb
ports:
- "7200:7200"

View File

@@ -0,0 +1,33 @@
#! /bin/bash
REPOSITORY_ID="langchain"
GRAPHDB_URI="http://localhost:7200/"
echo -e "\nUsing GraphDB: ${GRAPHDB_URI}"
function startGraphDB {
echo -e "\nStarting GraphDB..."
exec /opt/graphdb/dist/bin/graphdb
}
function waitGraphDBStart {
echo -e "\nWaiting GraphDB to start..."
for _ in $(seq 1 5); do
CHECK_RES=$(curl --silent --write-out '%{http_code}' --output /dev/null ${GRAPHDB_URI}/rest/repositories)
if [ "${CHECK_RES}" = '200' ]; then
echo -e "\nUp and running"
break
fi
sleep 30s
echo "CHECK_RES: ${CHECK_RES}"
done
}
function loadData {
echo -e "\nImporting starwars-data.trig"
curl -X POST -H "Content-Type: application/x-trig" -T /starwars-data.trig ${GRAPHDB_URI}/repositories/${REPOSITORY_ID}/statements
}
startGraphDB &
waitGraphDBStart
loadData
wait

View File

@@ -0,0 +1,5 @@
set -ex
docker compose down -v --remove-orphans
docker build --tag graphdb .
docker compose up -d graphdb

View File

@@ -0,0 +1,43 @@
@base <https://swapi.co/resource/>.
@prefix voc: <https://swapi.co/vocabulary/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
{
<besalisk/71>
a voc:Besalisk , voc:Character ;
rdfs:label "Dexter Jettster" ;
voc:eyeColor "yellow" ;
voc:gender "male" ;
voc:height 198.0 ;
voc:mass 102.0 ;
voc:skinColor "brown" .
}
<https://swapi.co/ontology/> {
voc:Character a owl:Class .
voc:Species a owl:Class .
voc:Besalisk a voc:Species;
rdfs:label "Besalisk";
voc:averageHeight 178.0;
voc:averageLifespan "75";
voc:character <https://swapi.co/resource/besalisk/71>;
voc:language "besalisk";
voc:skinColor "brown";
voc:eyeColor "yellow" .
voc:averageHeight a owl:DatatypeProperty .
voc:averageLifespan a owl:DatatypeProperty .
voc:character a owl:ObjectProperty .
voc:language a owl:DatatypeProperty .
voc:skinColor a owl:DatatypeProperty .
voc:eyeColor a owl:DatatypeProperty .
voc:gender a owl:DatatypeProperty .
voc:height a owl:DatatypeProperty .
voc:mass a owl:DatatypeProperty .
}

View File

@@ -0,0 +1,181 @@
from pathlib import Path
import pytest
from langchain_community.graphs import OntotextGraphDBGraph
"""
cd libs/community/tests/integration_tests/graphs/docker-compose-ontotext-graphdb
./start.sh
"""
def test_query() -> None:
graph = OntotextGraphDBGraph(
query_endpoint="http://localhost:7200/repositories/langchain",
query_ontology="CONSTRUCT {?s ?p ?o}"
"FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}",
)
query_results = graph.query(
"PREFIX voc: <https://swapi.co/vocabulary/> "
"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
"SELECT ?eyeColor "
"WHERE {"
' ?besalisk rdfs:label "Dexter Jettster" ; '
" voc:eyeColor ?eyeColor ."
"}"
)
assert len(query_results) == 1
assert len(query_results[0]) == 1
assert str(query_results[0][0]) == "yellow"
def test_get_schema_with_query() -> None:
graph = OntotextGraphDBGraph(
query_endpoint="http://localhost:7200/repositories/langchain",
query_ontology="CONSTRUCT {?s ?p ?o}"
"FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}",
)
from rdflib import Graph
assert len(Graph().parse(data=graph.get_schema, format="turtle")) == 19
@pytest.mark.parametrize(
"rdf_format, file_extension",
[
("json-ld", "json"),
("json-ld", "jsonld"),
("json-ld", "json-ld"),
("xml", "rdf"),
("xml", "xml"),
("xml", "owl"),
("pretty-xml", "xml"),
("n3", "n3"),
("turtle", "ttl"),
("nt", "nt"),
("trig", "trig"),
("nquads", "nq"),
("nquads", "nquads"),
("trix", "trix"),
],
)
def test_get_schema_from_file(
tmp_path: Path, rdf_format: str, file_extension: str
) -> None:
expected_number_of_ontology_statements = 19
graph = OntotextGraphDBGraph(
query_endpoint="http://localhost:7200/repositories/langchain",
query_ontology="CONSTRUCT {?s ?p ?o}"
"FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}",
)
from rdflib import ConjunctiveGraph, Graph
assert (
len(Graph().parse(data=graph.get_schema, format="turtle"))
== expected_number_of_ontology_statements
)
# serialize the ontology schema loaded with the query in a local file
# in various rdf formats and check that this results
# in the same number of statements
conjunctive_graph = ConjunctiveGraph()
ontology_context = conjunctive_graph.get_context("https://swapi.co/ontology/")
ontology_context.parse(data=graph.get_schema, format="turtle")
assert len(ontology_context) == expected_number_of_ontology_statements
assert len(conjunctive_graph) == expected_number_of_ontology_statements
local_file = tmp_path / ("starwars-ontology." + file_extension)
conjunctive_graph.serialize(local_file, format=rdf_format)
graph = OntotextGraphDBGraph(
query_endpoint="http://localhost:7200/repositories/langchain",
local_file=str(local_file),
)
assert (
len(Graph().parse(data=graph.get_schema, format="turtle"))
== expected_number_of_ontology_statements
)
@pytest.mark.parametrize(
"rdf_format", ["json-ld", "xml", "n3", "turtle", "nt", "trig", "nquads", "trix"]
)
def test_get_schema_from_file_with_explicit_rdf_format(
tmp_path: Path, rdf_format: str
) -> None:
expected_number_of_ontology_statements = 19
graph = OntotextGraphDBGraph(
query_endpoint="http://localhost:7200/repositories/langchain",
query_ontology="CONSTRUCT {?s ?p ?o}"
"FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}",
)
from rdflib import ConjunctiveGraph, Graph
assert (
len(Graph().parse(data=graph.get_schema, format="turtle"))
== expected_number_of_ontology_statements
)
# serialize the ontology schema loaded with the query in a local file
# in various rdf formats and check that this results
# in the same number of statements
conjunctive_graph = ConjunctiveGraph()
ontology_context = conjunctive_graph.get_context("https://swapi.co/ontology/")
ontology_context.parse(data=graph.get_schema, format="turtle")
assert len(ontology_context) == expected_number_of_ontology_statements
assert len(conjunctive_graph) == expected_number_of_ontology_statements
local_file = tmp_path / "starwars-ontology.txt"
conjunctive_graph.serialize(local_file, format=rdf_format)
graph = OntotextGraphDBGraph(
query_endpoint="http://localhost:7200/repositories/langchain",
local_file=str(local_file),
local_file_format=rdf_format,
)
assert (
len(Graph().parse(data=graph.get_schema, format="turtle"))
== expected_number_of_ontology_statements
)
def test_get_schema_from_file_with_wrong_extension(tmp_path: Path) -> None:
expected_number_of_ontology_statements = 19
graph = OntotextGraphDBGraph(
query_endpoint="http://localhost:7200/repositories/langchain",
query_ontology="CONSTRUCT {?s ?p ?o}"
"FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}",
)
from rdflib import ConjunctiveGraph, Graph
assert (
len(Graph().parse(data=graph.get_schema, format="turtle"))
== expected_number_of_ontology_statements
)
conjunctive_graph = ConjunctiveGraph()
ontology_context = conjunctive_graph.get_context("https://swapi.co/ontology/")
ontology_context.parse(data=graph.get_schema, format="turtle")
assert len(ontology_context) == expected_number_of_ontology_statements
assert len(conjunctive_graph) == expected_number_of_ontology_statements
local_file = tmp_path / "starwars-ontology.trig"
conjunctive_graph.serialize(local_file, format="nquads")
with pytest.raises(ValueError):
OntotextGraphDBGraph(
query_endpoint="http://localhost:7200/repositories/langchain",
local_file=str(local_file),
)

View File

@@ -12,6 +12,7 @@ EXPECTED_ALL = [
"ArangoGraph",
"FalkorDBGraph",
"TigerGraph",
"OntotextGraphDBGraph",
]

View File

@@ -0,0 +1,176 @@
import os
import tempfile
import unittest
import pytest
class TestOntotextGraphDBGraph(unittest.TestCase):
def test_import(self) -> None:
from langchain_community.graphs import OntotextGraphDBGraph # noqa: F401
@pytest.mark.requires("rdflib")
def test_validate_user_query_wrong_type(self) -> None:
from langchain_community.graphs import OntotextGraphDBGraph
with self.assertRaises(TypeError) as e:
OntotextGraphDBGraph._validate_user_query(
[
"PREFIX starwars: <https://swapi.co/ontology/> "
"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
"DESCRIBE starwars: ?term "
"WHERE {?term rdfs:isDefinedBy starwars: }"
]
)
self.assertEqual("Ontology query must be provided as string.", str(e.exception))
@pytest.mark.requires("rdflib")
def test_validate_user_query_invalid_sparql_syntax(self) -> None:
from langchain_community.graphs import OntotextGraphDBGraph
with self.assertRaises(ValueError) as e:
OntotextGraphDBGraph._validate_user_query(
"CONSTRUCT {?s ?p ?o} FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o"
)
self.assertEqual(
"('Ontology query is not a valid SPARQL query.', "
"Expected ConstructQuery, "
"found end of text (at char 70), (line:1, col:71))",
str(e.exception),
)
@pytest.mark.requires("rdflib")
def test_validate_user_query_invalid_query_type_select(self) -> None:
from langchain_community.graphs import OntotextGraphDBGraph
with self.assertRaises(ValueError) as e:
OntotextGraphDBGraph._validate_user_query("SELECT * { ?s ?p ?o }")
self.assertEqual(
"Invalid query type. Only CONSTRUCT queries are supported.",
str(e.exception),
)
@pytest.mark.requires("rdflib")
def test_validate_user_query_invalid_query_type_ask(self) -> None:
from langchain_community.graphs import OntotextGraphDBGraph
with self.assertRaises(ValueError) as e:
OntotextGraphDBGraph._validate_user_query("ASK { ?s ?p ?o }")
self.assertEqual(
"Invalid query type. Only CONSTRUCT queries are supported.",
str(e.exception),
)
@pytest.mark.requires("rdflib")
def test_validate_user_query_invalid_query_type_describe(self) -> None:
from langchain_community.graphs import OntotextGraphDBGraph
with self.assertRaises(ValueError) as e:
OntotextGraphDBGraph._validate_user_query(
"PREFIX swapi: <https://swapi.co/ontology/> "
"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
"DESCRIBE ?term WHERE { ?term rdfs:isDefinedBy swapi: }"
)
self.assertEqual(
"Invalid query type. Only CONSTRUCT queries are supported.",
str(e.exception),
)
@pytest.mark.requires("rdflib")
def test_validate_user_query_construct(self) -> None:
from langchain_community.graphs import OntotextGraphDBGraph
OntotextGraphDBGraph._validate_user_query(
"CONSTRUCT {?s ?p ?o} FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}"
)
@pytest.mark.requires("rdflib")
def test_check_connectivity(self) -> None:
from langchain_community.graphs import OntotextGraphDBGraph
with self.assertRaises(ValueError) as e:
OntotextGraphDBGraph(
query_endpoint="http://localhost:7200/repositories/non-existing-repository",
query_ontology="PREFIX swapi: <https://swapi.co/ontology/> "
"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
"DESCRIBE ?term WHERE {?term rdfs:isDefinedBy swapi: }",
)
self.assertEqual(
"Could not query the provided endpoint. "
"Please, check, if the value of the provided "
"query_endpoint points to the right repository. "
"If GraphDB is secured, please, make sure that the environment variables "
"'GRAPHDB_USERNAME' and 'GRAPHDB_PASSWORD' are set.",
str(e.exception),
)
@pytest.mark.requires("rdflib")
def test_local_file_does_not_exist(self) -> None:
from langchain_community.graphs import OntotextGraphDBGraph
non_existing_file = os.path.join("non", "existing", "path", "to", "file.ttl")
with self.assertRaises(FileNotFoundError) as e:
OntotextGraphDBGraph._load_ontology_schema_from_file(non_existing_file)
self.assertEqual(f"File {non_existing_file} does not exist.", str(e.exception))
@pytest.mark.requires("rdflib")
def test_local_file_no_access(self) -> None:
from langchain_community.graphs import OntotextGraphDBGraph
with tempfile.NamedTemporaryFile() as tmp_file:
tmp_file_name = tmp_file.name
# Set file permissions to write and execute only
os.chmod(tmp_file_name, 0o300)
with self.assertRaises(PermissionError) as e:
OntotextGraphDBGraph._load_ontology_schema_from_file(tmp_file_name)
self.assertEqual(
f"Read permission for {tmp_file_name} is restricted", str(e.exception)
)
@pytest.mark.requires("rdflib")
def test_local_file_bad_syntax(self) -> None:
from langchain_community.graphs import OntotextGraphDBGraph
with tempfile.TemporaryDirectory() as tempdir:
tmp_file_path = os.path.join(tempdir, "starwars-ontology.trig")
with open(tmp_file_path, "w") as tmp_file:
tmp_file.write("invalid trig")
with self.assertRaises(ValueError) as e:
OntotextGraphDBGraph._load_ontology_schema_from_file(tmp_file_path)
self.assertEqual(
f"('Invalid file format for {tmp_file_path} : '"
", BadSyntax('', 0, 'invalid trig', 0, "
"'expected directive or statement'))",
str(e.exception),
)
@pytest.mark.requires("rdflib")
def test_both_query_and_local_file_provided(self) -> None:
from langchain_community.graphs import OntotextGraphDBGraph
with self.assertRaises(ValueError) as e:
OntotextGraphDBGraph(
query_endpoint="http://localhost:7200/repositories/non-existing-repository",
query_ontology="CONSTRUCT {?s ?p ?o}"
"FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}",
local_file="starwars-ontology-wrong.trig",
)
self.assertEqual(
"Both file and query provided. Only one is allowed.", str(e.exception)
)
@pytest.mark.requires("rdflib")
def test_nor_query_nor_local_file_provided(self) -> None:
from langchain_community.graphs import OntotextGraphDBGraph
with self.assertRaises(ValueError) as e:
OntotextGraphDBGraph(
query_endpoint="http://localhost:7200/repositories/non-existing-repository",
)
self.assertEqual(
"Neither file nor query provided. One is required.", str(e.exception)
)