mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-16 23:13:31 +00:00
langchain[minor], community[minor]: Implement Ontotext GraphDB QA Chain (#16019)
- **Description:** Implement Ontotext GraphDB QA Chain - **Issue:** N/A - **Dependencies:** N/A - **Twitter handle:** @OntotextGraphDB
This commit is contained in:
@@ -9,6 +9,7 @@ from langchain_community.graphs.nebula_graph import NebulaGraph
|
||||
from langchain_community.graphs.neo4j_graph import Neo4jGraph
|
||||
from langchain_community.graphs.neptune_graph import NeptuneGraph
|
||||
from langchain_community.graphs.networkx_graph import NetworkxEntityGraph
|
||||
from langchain_community.graphs.ontotext_graphdb_graph import OntotextGraphDBGraph
|
||||
from langchain_community.graphs.rdf_graph import RdfGraph
|
||||
from langchain_community.graphs.tigergraph_graph import TigerGraph
|
||||
|
||||
@@ -24,4 +25,5 @@ __all__ = [
|
||||
"ArangoGraph",
|
||||
"FalkorDBGraph",
|
||||
"TigerGraph",
|
||||
"OntotextGraphDBGraph",
|
||||
]
|
||||
|
@@ -0,0 +1,213 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
List,
|
||||
Optional,
|
||||
Union,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import rdflib
|
||||
|
||||
|
||||
class OntotextGraphDBGraph:
|
||||
"""Ontotext GraphDB https://graphdb.ontotext.com/ wrapper for graph operations.
|
||||
|
||||
*Security note*: Make sure that the database connection uses credentials
|
||||
that are narrowly-scoped to only include necessary permissions.
|
||||
Failure to do so may result in data corruption or loss, since the calling
|
||||
code may attempt commands that would result in deletion, mutation
|
||||
of data if appropriately prompted or reading sensitive data if such
|
||||
data is present in the database.
|
||||
The best way to guard against such negative outcomes is to (as appropriate)
|
||||
limit the permissions granted to the credentials used with this tool.
|
||||
|
||||
See https://python.langchain.com/docs/security for more information.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
query_endpoint: str,
|
||||
query_ontology: Optional[str] = None,
|
||||
local_file: Optional[str] = None,
|
||||
local_file_format: Optional[str] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Set up the GraphDB wrapper
|
||||
|
||||
:param query_endpoint: SPARQL endpoint for queries, read access
|
||||
|
||||
If GraphDB is secured,
|
||||
set the environment variables 'GRAPHDB_USERNAME' and 'GRAPHDB_PASSWORD'.
|
||||
|
||||
:param query_ontology: a `CONSTRUCT` query that is executed
|
||||
on the SPARQL endpoint and returns the KG schema statements
|
||||
Example:
|
||||
'CONSTRUCT {?s ?p ?o} FROM <https://example.com/ontology/> WHERE {?s ?p ?o}'
|
||||
Currently, DESCRIBE queries like
|
||||
'PREFIX onto: <https://example.com/ontology/>
|
||||
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
||||
DESCRIBE ?term WHERE {
|
||||
?term rdfs:isDefinedBy onto:
|
||||
}'
|
||||
are not supported, because DESCRIBE returns
|
||||
the Symmetric Concise Bounded Description (SCBD),
|
||||
i.e. also the incoming class links.
|
||||
In case of large graphs with a million of instances, this is not efficient.
|
||||
Check https://github.com/eclipse-rdf4j/rdf4j/issues/4857
|
||||
|
||||
:param local_file: a local RDF ontology file.
|
||||
Supported RDF formats:
|
||||
Turtle, RDF/XML, JSON-LD, N-Triples, Notation-3, Trig, Trix, N-Quads.
|
||||
If the rdf format can't be determined from the file extension,
|
||||
pass explicitly the rdf format in `local_file_format` param.
|
||||
|
||||
:param local_file_format: Used if the rdf format can't be determined
|
||||
from the local file extension.
|
||||
One of "json-ld", "xml", "n3", "turtle", "nt", "trig", "nquads", "trix"
|
||||
|
||||
Either `query_ontology` or `local_file` should be passed.
|
||||
"""
|
||||
|
||||
if query_ontology and local_file:
|
||||
raise ValueError("Both file and query provided. Only one is allowed.")
|
||||
|
||||
if not query_ontology and not local_file:
|
||||
raise ValueError("Neither file nor query provided. One is required.")
|
||||
|
||||
try:
|
||||
import rdflib
|
||||
from rdflib.plugins.stores import sparqlstore
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"Could not import rdflib python package. "
|
||||
"Please install it with `pip install rdflib`."
|
||||
)
|
||||
|
||||
auth = self._get_auth()
|
||||
store = sparqlstore.SPARQLStore(auth=auth)
|
||||
store.open(query_endpoint)
|
||||
|
||||
self.graph = rdflib.Graph(store, identifier=None, bind_namespaces="none")
|
||||
self._check_connectivity()
|
||||
|
||||
if local_file:
|
||||
ontology_schema_graph = self._load_ontology_schema_from_file(
|
||||
local_file, local_file_format
|
||||
)
|
||||
else:
|
||||
self._validate_user_query(query_ontology)
|
||||
ontology_schema_graph = self._load_ontology_schema_with_query(
|
||||
query_ontology
|
||||
)
|
||||
self.schema = ontology_schema_graph.serialize(format="turtle")
|
||||
|
||||
@staticmethod
|
||||
def _get_auth() -> Union[tuple, None]:
|
||||
"""
|
||||
Returns the basic authentication configuration
|
||||
"""
|
||||
username = os.environ.get("GRAPHDB_USERNAME", None)
|
||||
password = os.environ.get("GRAPHDB_PASSWORD", None)
|
||||
|
||||
if username:
|
||||
if not password:
|
||||
raise ValueError(
|
||||
"Environment variable 'GRAPHDB_USERNAME' is set, "
|
||||
"but 'GRAPHDB_PASSWORD' is not set."
|
||||
)
|
||||
else:
|
||||
return username, password
|
||||
return None
|
||||
|
||||
def _check_connectivity(self) -> None:
|
||||
"""
|
||||
Executes a simple `ASK` query to check connectivity
|
||||
"""
|
||||
try:
|
||||
self.graph.query("ASK { ?s ?p ?o }")
|
||||
except ValueError:
|
||||
raise ValueError(
|
||||
"Could not query the provided endpoint. "
|
||||
"Please, check, if the value of the provided "
|
||||
"query_endpoint points to the right repository. "
|
||||
"If GraphDB is secured, please, "
|
||||
"make sure that the environment variables "
|
||||
"'GRAPHDB_USERNAME' and 'GRAPHDB_PASSWORD' are set."
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _load_ontology_schema_from_file(local_file: str, local_file_format: str = None):
|
||||
"""
|
||||
Parse the ontology schema statements from the provided file
|
||||
"""
|
||||
import rdflib
|
||||
|
||||
if not os.path.exists(local_file):
|
||||
raise FileNotFoundError(f"File {local_file} does not exist.")
|
||||
if not os.access(local_file, os.R_OK):
|
||||
raise PermissionError(f"Read permission for {local_file} is restricted")
|
||||
graph = rdflib.ConjunctiveGraph()
|
||||
try:
|
||||
graph.parse(local_file, format=local_file_format)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Invalid file format for {local_file} : ", e)
|
||||
return graph
|
||||
|
||||
@staticmethod
|
||||
def _validate_user_query(query_ontology: str) -> None:
|
||||
"""
|
||||
Validate the query is a valid SPARQL CONSTRUCT query
|
||||
"""
|
||||
from pyparsing import ParseException
|
||||
from rdflib.plugins.sparql import prepareQuery
|
||||
|
||||
if not isinstance(query_ontology, str):
|
||||
raise TypeError("Ontology query must be provided as string.")
|
||||
try:
|
||||
parsed_query = prepareQuery(query_ontology)
|
||||
except ParseException as e:
|
||||
raise ValueError("Ontology query is not a valid SPARQL query.", e)
|
||||
|
||||
if parsed_query.algebra.name != "ConstructQuery":
|
||||
raise ValueError(
|
||||
"Invalid query type. Only CONSTRUCT queries are supported."
|
||||
)
|
||||
|
||||
def _load_ontology_schema_with_query(self, query: str):
|
||||
"""
|
||||
Execute the query for collecting the ontology schema statements
|
||||
"""
|
||||
from rdflib.exceptions import ParserError
|
||||
|
||||
try:
|
||||
results = self.graph.query(query)
|
||||
except ParserError as e:
|
||||
raise ValueError(f"Generated SPARQL statement is invalid\n{e}")
|
||||
|
||||
return results.graph
|
||||
|
||||
@property
|
||||
def get_schema(self) -> str:
|
||||
"""
|
||||
Returns the schema of the graph database in turtle format
|
||||
"""
|
||||
return self.schema
|
||||
|
||||
def query(
|
||||
self,
|
||||
query: str,
|
||||
) -> List[rdflib.query.ResultRow]:
|
||||
"""
|
||||
Query the graph.
|
||||
"""
|
||||
from rdflib.exceptions import ParserError
|
||||
from rdflib.query import ResultRow
|
||||
|
||||
try:
|
||||
res = self.graph.query(query)
|
||||
except ParserError as e:
|
||||
raise ValueError(f"Generated SPARQL statement is invalid\n{e}")
|
||||
return [r for r in res if isinstance(r, ResultRow)]
|
27
libs/community/poetry.lock
generated
27
libs/community/poetry.lock
generated
@@ -3433,7 +3433,6 @@ files = [
|
||||
{file = "jq-1.6.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:227b178b22a7f91ae88525810441791b1ca1fc71c86f03190911793be15cec3d"},
|
||||
{file = "jq-1.6.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:780eb6383fbae12afa819ef676fc93e1548ae4b076c004a393af26a04b460742"},
|
||||
{file = "jq-1.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:08ded6467f4ef89fec35b2bf310f210f8cd13fbd9d80e521500889edf8d22441"},
|
||||
{file = "jq-1.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:49e44ed677713f4115bd5bf2dbae23baa4cd503be350e12a1c1f506b0687848f"},
|
||||
{file = "jq-1.6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:984f33862af285ad3e41e23179ac4795f1701822473e1a26bf87ff023e5a89ea"},
|
||||
{file = "jq-1.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f42264fafc6166efb5611b5d4cb01058887d050a6c19334f6a3f8a13bb369df5"},
|
||||
{file = "jq-1.6.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a67154f150aaf76cc1294032ed588436eb002097dd4fd1e283824bf753a05080"},
|
||||
@@ -6223,6 +6222,7 @@ files = [
|
||||
{file = "pymongo-4.6.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b8729dbf25eb32ad0dc0b9bd5e6a0d0b7e5c2dc8ec06ad171088e1896b522a74"},
|
||||
{file = "pymongo-4.6.1-cp312-cp312-win32.whl", hash = "sha256:3177f783ae7e08aaf7b2802e0df4e4b13903520e8380915e6337cdc7a6ff01d8"},
|
||||
{file = "pymongo-4.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:00c199e1c593e2c8b033136d7a08f0c376452bac8a896c923fcd6f419e07bdd2"},
|
||||
{file = "pymongo-4.6.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6dcc95f4bb9ed793714b43f4f23a7b0c57e4ef47414162297d6f650213512c19"},
|
||||
{file = "pymongo-4.6.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:13552ca505366df74e3e2f0a4f27c363928f3dff0eef9f281eb81af7f29bc3c5"},
|
||||
{file = "pymongo-4.6.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:77e0df59b1a4994ad30c6d746992ae887f9756a43fc25dec2db515d94cf0222d"},
|
||||
{file = "pymongo-4.6.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:3a7f02a58a0c2912734105e05dedbee4f7507e6f1bd132ebad520be0b11d46fd"},
|
||||
@@ -7093,6 +7093,27 @@ PyYAML = "*"
|
||||
Shapely = ">=1.7.1"
|
||||
six = ">=1.15.0"
|
||||
|
||||
[[package]]
|
||||
name = "rdflib"
|
||||
version = "7.0.0"
|
||||
description = "RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information."
|
||||
optional = true
|
||||
python-versions = ">=3.8.1,<4.0.0"
|
||||
files = [
|
||||
{file = "rdflib-7.0.0-py3-none-any.whl", hash = "sha256:0438920912a642c866a513de6fe8a0001bd86ef975057d6962c79ce4771687cd"},
|
||||
{file = "rdflib-7.0.0.tar.gz", hash = "sha256:9995eb8569428059b8c1affd26b25eac510d64f5043d9ce8c84e0d0036e995ae"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
isodate = ">=0.6.0,<0.7.0"
|
||||
pyparsing = ">=2.1.0,<4"
|
||||
|
||||
[package.extras]
|
||||
berkeleydb = ["berkeleydb (>=18.1.0,<19.0.0)"]
|
||||
html = ["html5lib (>=1.0,<2.0)"]
|
||||
lxml = ["lxml (>=4.3.0,<5.0.0)"]
|
||||
networkx = ["networkx (>=2.0.0,<3.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "referencing"
|
||||
version = "0.31.1"
|
||||
@@ -9226,9 +9247,9 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
|
||||
|
||||
[extras]
|
||||
cli = ["typer"]
|
||||
extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "azure-ai-documentintelligence", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "cohere", "dashvector", "databricks-vectorsearch", "datasets", "dgml-utils", "elasticsearch", "esprima", "faiss-cpu", "feedparser", "fireworks-ai", "geopandas", "gitpython", "google-cloud-documentai", "gql", "gradientai", "hdbcli", "hologres-vector", "html2text", "javelin-sdk", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "msal", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "oci", "openai", "openapi-pydantic", "oracle-ads", "pandas", "pdfminer-six", "pgvector", "praw", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "upstash-redis", "xata", "xmltodict", "zhipuai"]
|
||||
extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "azure-ai-documentintelligence", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "cohere", "dashvector", "databricks-vectorsearch", "datasets", "dgml-utils", "elasticsearch", "esprima", "faiss-cpu", "feedparser", "fireworks-ai", "geopandas", "gitpython", "google-cloud-documentai", "gql", "gradientai", "hdbcli", "hologres-vector", "html2text", "javelin-sdk", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "msal", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "oci", "openai", "openapi-pydantic", "oracle-ads", "pandas", "pdfminer-six", "pgvector", "praw", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "rdflib", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "upstash-redis", "xata", "xmltodict", "zhipuai"]
|
||||
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.8.1,<4.0"
|
||||
content-hash = "064816bab088c1f6ff9902cb998291581b66a6d7762f965ff805b4e0b9b2e7e9"
|
||||
content-hash = "42d012441d7b42d273e11708b7e12308fc56b169d4d56c4c2511e7469743a983"
|
||||
|
@@ -90,6 +90,7 @@ zhipuai = {version = "^1.0.7", optional = true}
|
||||
elasticsearch = {version = "^8.12.0", optional = true}
|
||||
hdbcli = {version = "^2.19.21", optional = true}
|
||||
oci = {version = "^2.119.1", optional = true}
|
||||
rdflib = {version = "7.0.0", optional = true}
|
||||
|
||||
[tool.poetry.group.test]
|
||||
optional = true
|
||||
@@ -254,7 +255,8 @@ extended_testing = [
|
||||
"zhipuai",
|
||||
"elasticsearch",
|
||||
"hdbcli",
|
||||
"oci"
|
||||
"oci",
|
||||
"rdflib",
|
||||
]
|
||||
|
||||
[tool.ruff]
|
||||
@@ -303,7 +305,7 @@ markers = [
|
||||
asyncio_mode = "auto"
|
||||
|
||||
[tool.codespell]
|
||||
skip = '.git,*.pdf,*.svg,*.pdf,*.yaml,*.ipynb,poetry.lock,*.min.js,*.css,package-lock.json,example_data,_dist,examples'
|
||||
skip = '.git,*.pdf,*.svg,*.pdf,*.yaml,*.ipynb,poetry.lock,*.min.js,*.css,package-lock.json,example_data,_dist,examples,*.trig'
|
||||
# Ignore latin etc
|
||||
ignore-regex = '.*(Stati Uniti|Tense=Pres).*'
|
||||
# whats is a typo but used frequently in queries so kept as is
|
||||
|
@@ -0,0 +1,6 @@
|
||||
FROM ontotext/graphdb:10.5.1
|
||||
RUN mkdir -p /opt/graphdb/dist/data/repositories/langchain
|
||||
COPY config.ttl /opt/graphdb/dist/data/repositories/langchain/
|
||||
COPY starwars-data.trig /
|
||||
COPY graphdb_create.sh /run.sh
|
||||
ENTRYPOINT bash /run.sh
|
@@ -0,0 +1,46 @@
|
||||
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
|
||||
@prefix rep: <http://www.openrdf.org/config/repository#>.
|
||||
@prefix sr: <http://www.openrdf.org/config/repository/sail#>.
|
||||
@prefix sail: <http://www.openrdf.org/config/sail#>.
|
||||
@prefix graphdb: <http://www.ontotext.com/config/graphdb#>.
|
||||
|
||||
[] a rep:Repository ;
|
||||
rep:repositoryID "langchain" ;
|
||||
rdfs:label "" ;
|
||||
rep:repositoryImpl [
|
||||
rep:repositoryType "graphdb:SailRepository" ;
|
||||
sr:sailImpl [
|
||||
sail:sailType "graphdb:Sail" ;
|
||||
|
||||
graphdb:read-only "false" ;
|
||||
|
||||
# Inference and Validation
|
||||
graphdb:ruleset "empty" ;
|
||||
graphdb:disable-sameAs "true" ;
|
||||
graphdb:check-for-inconsistencies "false" ;
|
||||
|
||||
# Indexing
|
||||
graphdb:entity-id-size "32" ;
|
||||
graphdb:enable-context-index "false" ;
|
||||
graphdb:enablePredicateList "true" ;
|
||||
graphdb:enable-fts-index "false" ;
|
||||
graphdb:fts-indexes ("default" "iri") ;
|
||||
graphdb:fts-string-literals-index "default" ;
|
||||
graphdb:fts-iris-index "none" ;
|
||||
|
||||
# Queries and Updates
|
||||
graphdb:query-timeout "0" ;
|
||||
graphdb:throw-QueryEvaluationException-on-timeout "false" ;
|
||||
graphdb:query-limit-results "0" ;
|
||||
|
||||
# Settable in the file but otherwise hidden in the UI and in the RDF4J console
|
||||
graphdb:base-URL "http://example.org/owlim#" ;
|
||||
graphdb:defaultNS "" ;
|
||||
graphdb:imports "" ;
|
||||
graphdb:repository-type "file-repository" ;
|
||||
graphdb:storage-folder "storage" ;
|
||||
graphdb:entity-index-size "10000000" ;
|
||||
graphdb:in-memory-literal-properties "true" ;
|
||||
graphdb:enable-literal-index "true" ;
|
||||
]
|
||||
].
|
@@ -0,0 +1,9 @@
|
||||
version: '3.7'
|
||||
|
||||
services:
|
||||
|
||||
graphdb:
|
||||
image: graphdb
|
||||
container_name: graphdb
|
||||
ports:
|
||||
- "7200:7200"
|
@@ -0,0 +1,33 @@
|
||||
#! /bin/bash
|
||||
REPOSITORY_ID="langchain"
|
||||
GRAPHDB_URI="http://localhost:7200/"
|
||||
|
||||
echo -e "\nUsing GraphDB: ${GRAPHDB_URI}"
|
||||
|
||||
function startGraphDB {
|
||||
echo -e "\nStarting GraphDB..."
|
||||
exec /opt/graphdb/dist/bin/graphdb
|
||||
}
|
||||
|
||||
function waitGraphDBStart {
|
||||
echo -e "\nWaiting GraphDB to start..."
|
||||
for _ in $(seq 1 5); do
|
||||
CHECK_RES=$(curl --silent --write-out '%{http_code}' --output /dev/null ${GRAPHDB_URI}/rest/repositories)
|
||||
if [ "${CHECK_RES}" = '200' ]; then
|
||||
echo -e "\nUp and running"
|
||||
break
|
||||
fi
|
||||
sleep 30s
|
||||
echo "CHECK_RES: ${CHECK_RES}"
|
||||
done
|
||||
}
|
||||
|
||||
function loadData {
|
||||
echo -e "\nImporting starwars-data.trig"
|
||||
curl -X POST -H "Content-Type: application/x-trig" -T /starwars-data.trig ${GRAPHDB_URI}/repositories/${REPOSITORY_ID}/statements
|
||||
}
|
||||
|
||||
startGraphDB &
|
||||
waitGraphDBStart
|
||||
loadData
|
||||
wait
|
@@ -0,0 +1,5 @@
|
||||
set -ex
|
||||
|
||||
docker compose down -v --remove-orphans
|
||||
docker build --tag graphdb .
|
||||
docker compose up -d graphdb
|
@@ -0,0 +1,43 @@
|
||||
@base <https://swapi.co/resource/>.
|
||||
@prefix voc: <https://swapi.co/vocabulary/> .
|
||||
@prefix owl: <http://www.w3.org/2002/07/owl#> .
|
||||
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
|
||||
|
||||
{
|
||||
|
||||
<besalisk/71>
|
||||
a voc:Besalisk , voc:Character ;
|
||||
rdfs:label "Dexter Jettster" ;
|
||||
voc:eyeColor "yellow" ;
|
||||
voc:gender "male" ;
|
||||
voc:height 198.0 ;
|
||||
voc:mass 102.0 ;
|
||||
voc:skinColor "brown" .
|
||||
|
||||
}
|
||||
|
||||
<https://swapi.co/ontology/> {
|
||||
|
||||
voc:Character a owl:Class .
|
||||
voc:Species a owl:Class .
|
||||
|
||||
voc:Besalisk a voc:Species;
|
||||
rdfs:label "Besalisk";
|
||||
voc:averageHeight 178.0;
|
||||
voc:averageLifespan "75";
|
||||
voc:character <https://swapi.co/resource/besalisk/71>;
|
||||
voc:language "besalisk";
|
||||
voc:skinColor "brown";
|
||||
voc:eyeColor "yellow" .
|
||||
|
||||
voc:averageHeight a owl:DatatypeProperty .
|
||||
voc:averageLifespan a owl:DatatypeProperty .
|
||||
voc:character a owl:ObjectProperty .
|
||||
voc:language a owl:DatatypeProperty .
|
||||
voc:skinColor a owl:DatatypeProperty .
|
||||
voc:eyeColor a owl:DatatypeProperty .
|
||||
voc:gender a owl:DatatypeProperty .
|
||||
voc:height a owl:DatatypeProperty .
|
||||
voc:mass a owl:DatatypeProperty .
|
||||
|
||||
}
|
@@ -0,0 +1,181 @@
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain_community.graphs import OntotextGraphDBGraph
|
||||
|
||||
"""
|
||||
cd libs/community/tests/integration_tests/graphs/docker-compose-ontotext-graphdb
|
||||
./start.sh
|
||||
"""
|
||||
|
||||
|
||||
def test_query() -> None:
|
||||
graph = OntotextGraphDBGraph(
|
||||
query_endpoint="http://localhost:7200/repositories/langchain",
|
||||
query_ontology="CONSTRUCT {?s ?p ?o}"
|
||||
"FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}",
|
||||
)
|
||||
|
||||
query_results = graph.query(
|
||||
"PREFIX voc: <https://swapi.co/vocabulary/> "
|
||||
"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
||||
"SELECT ?eyeColor "
|
||||
"WHERE {"
|
||||
' ?besalisk rdfs:label "Dexter Jettster" ; '
|
||||
" voc:eyeColor ?eyeColor ."
|
||||
"}"
|
||||
)
|
||||
assert len(query_results) == 1
|
||||
assert len(query_results[0]) == 1
|
||||
assert str(query_results[0][0]) == "yellow"
|
||||
|
||||
|
||||
def test_get_schema_with_query() -> None:
|
||||
graph = OntotextGraphDBGraph(
|
||||
query_endpoint="http://localhost:7200/repositories/langchain",
|
||||
query_ontology="CONSTRUCT {?s ?p ?o}"
|
||||
"FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}",
|
||||
)
|
||||
|
||||
from rdflib import Graph
|
||||
|
||||
assert len(Graph().parse(data=graph.get_schema, format="turtle")) == 19
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"rdf_format, file_extension",
|
||||
[
|
||||
("json-ld", "json"),
|
||||
("json-ld", "jsonld"),
|
||||
("json-ld", "json-ld"),
|
||||
("xml", "rdf"),
|
||||
("xml", "xml"),
|
||||
("xml", "owl"),
|
||||
("pretty-xml", "xml"),
|
||||
("n3", "n3"),
|
||||
("turtle", "ttl"),
|
||||
("nt", "nt"),
|
||||
("trig", "trig"),
|
||||
("nquads", "nq"),
|
||||
("nquads", "nquads"),
|
||||
("trix", "trix"),
|
||||
],
|
||||
)
|
||||
def test_get_schema_from_file(
|
||||
tmp_path: Path, rdf_format: str, file_extension: str
|
||||
) -> None:
|
||||
expected_number_of_ontology_statements = 19
|
||||
|
||||
graph = OntotextGraphDBGraph(
|
||||
query_endpoint="http://localhost:7200/repositories/langchain",
|
||||
query_ontology="CONSTRUCT {?s ?p ?o}"
|
||||
"FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}",
|
||||
)
|
||||
|
||||
from rdflib import ConjunctiveGraph, Graph
|
||||
|
||||
assert (
|
||||
len(Graph().parse(data=graph.get_schema, format="turtle"))
|
||||
== expected_number_of_ontology_statements
|
||||
)
|
||||
|
||||
# serialize the ontology schema loaded with the query in a local file
|
||||
# in various rdf formats and check that this results
|
||||
# in the same number of statements
|
||||
conjunctive_graph = ConjunctiveGraph()
|
||||
ontology_context = conjunctive_graph.get_context("https://swapi.co/ontology/")
|
||||
ontology_context.parse(data=graph.get_schema, format="turtle")
|
||||
|
||||
assert len(ontology_context) == expected_number_of_ontology_statements
|
||||
assert len(conjunctive_graph) == expected_number_of_ontology_statements
|
||||
|
||||
local_file = tmp_path / ("starwars-ontology." + file_extension)
|
||||
conjunctive_graph.serialize(local_file, format=rdf_format)
|
||||
|
||||
graph = OntotextGraphDBGraph(
|
||||
query_endpoint="http://localhost:7200/repositories/langchain",
|
||||
local_file=str(local_file),
|
||||
)
|
||||
assert (
|
||||
len(Graph().parse(data=graph.get_schema, format="turtle"))
|
||||
== expected_number_of_ontology_statements
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"rdf_format", ["json-ld", "xml", "n3", "turtle", "nt", "trig", "nquads", "trix"]
|
||||
)
|
||||
def test_get_schema_from_file_with_explicit_rdf_format(
|
||||
tmp_path: Path, rdf_format: str
|
||||
) -> None:
|
||||
expected_number_of_ontology_statements = 19
|
||||
|
||||
graph = OntotextGraphDBGraph(
|
||||
query_endpoint="http://localhost:7200/repositories/langchain",
|
||||
query_ontology="CONSTRUCT {?s ?p ?o}"
|
||||
"FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}",
|
||||
)
|
||||
|
||||
from rdflib import ConjunctiveGraph, Graph
|
||||
|
||||
assert (
|
||||
len(Graph().parse(data=graph.get_schema, format="turtle"))
|
||||
== expected_number_of_ontology_statements
|
||||
)
|
||||
|
||||
# serialize the ontology schema loaded with the query in a local file
|
||||
# in various rdf formats and check that this results
|
||||
# in the same number of statements
|
||||
conjunctive_graph = ConjunctiveGraph()
|
||||
ontology_context = conjunctive_graph.get_context("https://swapi.co/ontology/")
|
||||
ontology_context.parse(data=graph.get_schema, format="turtle")
|
||||
|
||||
assert len(ontology_context) == expected_number_of_ontology_statements
|
||||
assert len(conjunctive_graph) == expected_number_of_ontology_statements
|
||||
|
||||
local_file = tmp_path / "starwars-ontology.txt"
|
||||
conjunctive_graph.serialize(local_file, format=rdf_format)
|
||||
|
||||
graph = OntotextGraphDBGraph(
|
||||
query_endpoint="http://localhost:7200/repositories/langchain",
|
||||
local_file=str(local_file),
|
||||
local_file_format=rdf_format,
|
||||
)
|
||||
assert (
|
||||
len(Graph().parse(data=graph.get_schema, format="turtle"))
|
||||
== expected_number_of_ontology_statements
|
||||
)
|
||||
|
||||
|
||||
def test_get_schema_from_file_with_wrong_extension(tmp_path: Path) -> None:
|
||||
expected_number_of_ontology_statements = 19
|
||||
|
||||
graph = OntotextGraphDBGraph(
|
||||
query_endpoint="http://localhost:7200/repositories/langchain",
|
||||
query_ontology="CONSTRUCT {?s ?p ?o}"
|
||||
"FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}",
|
||||
)
|
||||
|
||||
from rdflib import ConjunctiveGraph, Graph
|
||||
|
||||
assert (
|
||||
len(Graph().parse(data=graph.get_schema, format="turtle"))
|
||||
== expected_number_of_ontology_statements
|
||||
)
|
||||
|
||||
conjunctive_graph = ConjunctiveGraph()
|
||||
ontology_context = conjunctive_graph.get_context("https://swapi.co/ontology/")
|
||||
ontology_context.parse(data=graph.get_schema, format="turtle")
|
||||
|
||||
assert len(ontology_context) == expected_number_of_ontology_statements
|
||||
assert len(conjunctive_graph) == expected_number_of_ontology_statements
|
||||
|
||||
local_file = tmp_path / "starwars-ontology.trig"
|
||||
conjunctive_graph.serialize(local_file, format="nquads")
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
OntotextGraphDBGraph(
|
||||
query_endpoint="http://localhost:7200/repositories/langchain",
|
||||
local_file=str(local_file),
|
||||
)
|
@@ -12,6 +12,7 @@ EXPECTED_ALL = [
|
||||
"ArangoGraph",
|
||||
"FalkorDBGraph",
|
||||
"TigerGraph",
|
||||
"OntotextGraphDBGraph",
|
||||
]
|
||||
|
||||
|
||||
|
@@ -0,0 +1,176 @@
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestOntotextGraphDBGraph(unittest.TestCase):
|
||||
def test_import(self) -> None:
|
||||
from langchain_community.graphs import OntotextGraphDBGraph # noqa: F401
|
||||
|
||||
@pytest.mark.requires("rdflib")
|
||||
def test_validate_user_query_wrong_type(self) -> None:
|
||||
from langchain_community.graphs import OntotextGraphDBGraph
|
||||
|
||||
with self.assertRaises(TypeError) as e:
|
||||
OntotextGraphDBGraph._validate_user_query(
|
||||
[
|
||||
"PREFIX starwars: <https://swapi.co/ontology/> "
|
||||
"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
||||
"DESCRIBE starwars: ?term "
|
||||
"WHERE {?term rdfs:isDefinedBy starwars: }"
|
||||
]
|
||||
)
|
||||
self.assertEqual("Ontology query must be provided as string.", str(e.exception))
|
||||
|
||||
@pytest.mark.requires("rdflib")
|
||||
def test_validate_user_query_invalid_sparql_syntax(self) -> None:
|
||||
from langchain_community.graphs import OntotextGraphDBGraph
|
||||
|
||||
with self.assertRaises(ValueError) as e:
|
||||
OntotextGraphDBGraph._validate_user_query(
|
||||
"CONSTRUCT {?s ?p ?o} FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o"
|
||||
)
|
||||
self.assertEqual(
|
||||
"('Ontology query is not a valid SPARQL query.', "
|
||||
"Expected ConstructQuery, "
|
||||
"found end of text (at char 70), (line:1, col:71))",
|
||||
str(e.exception),
|
||||
)
|
||||
|
||||
@pytest.mark.requires("rdflib")
|
||||
def test_validate_user_query_invalid_query_type_select(self) -> None:
|
||||
from langchain_community.graphs import OntotextGraphDBGraph
|
||||
|
||||
with self.assertRaises(ValueError) as e:
|
||||
OntotextGraphDBGraph._validate_user_query("SELECT * { ?s ?p ?o }")
|
||||
self.assertEqual(
|
||||
"Invalid query type. Only CONSTRUCT queries are supported.",
|
||||
str(e.exception),
|
||||
)
|
||||
|
||||
@pytest.mark.requires("rdflib")
|
||||
def test_validate_user_query_invalid_query_type_ask(self) -> None:
|
||||
from langchain_community.graphs import OntotextGraphDBGraph
|
||||
|
||||
with self.assertRaises(ValueError) as e:
|
||||
OntotextGraphDBGraph._validate_user_query("ASK { ?s ?p ?o }")
|
||||
self.assertEqual(
|
||||
"Invalid query type. Only CONSTRUCT queries are supported.",
|
||||
str(e.exception),
|
||||
)
|
||||
|
||||
@pytest.mark.requires("rdflib")
|
||||
def test_validate_user_query_invalid_query_type_describe(self) -> None:
|
||||
from langchain_community.graphs import OntotextGraphDBGraph
|
||||
|
||||
with self.assertRaises(ValueError) as e:
|
||||
OntotextGraphDBGraph._validate_user_query(
|
||||
"PREFIX swapi: <https://swapi.co/ontology/> "
|
||||
"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
||||
"DESCRIBE ?term WHERE { ?term rdfs:isDefinedBy swapi: }"
|
||||
)
|
||||
self.assertEqual(
|
||||
"Invalid query type. Only CONSTRUCT queries are supported.",
|
||||
str(e.exception),
|
||||
)
|
||||
|
||||
@pytest.mark.requires("rdflib")
|
||||
def test_validate_user_query_construct(self) -> None:
|
||||
from langchain_community.graphs import OntotextGraphDBGraph
|
||||
|
||||
OntotextGraphDBGraph._validate_user_query(
|
||||
"CONSTRUCT {?s ?p ?o} FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}"
|
||||
)
|
||||
|
||||
@pytest.mark.requires("rdflib")
|
||||
def test_check_connectivity(self) -> None:
|
||||
from langchain_community.graphs import OntotextGraphDBGraph
|
||||
|
||||
with self.assertRaises(ValueError) as e:
|
||||
OntotextGraphDBGraph(
|
||||
query_endpoint="http://localhost:7200/repositories/non-existing-repository",
|
||||
query_ontology="PREFIX swapi: <https://swapi.co/ontology/> "
|
||||
"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
||||
"DESCRIBE ?term WHERE {?term rdfs:isDefinedBy swapi: }",
|
||||
)
|
||||
self.assertEqual(
|
||||
"Could not query the provided endpoint. "
|
||||
"Please, check, if the value of the provided "
|
||||
"query_endpoint points to the right repository. "
|
||||
"If GraphDB is secured, please, make sure that the environment variables "
|
||||
"'GRAPHDB_USERNAME' and 'GRAPHDB_PASSWORD' are set.",
|
||||
str(e.exception),
|
||||
)
|
||||
|
||||
@pytest.mark.requires("rdflib")
|
||||
def test_local_file_does_not_exist(self) -> None:
|
||||
from langchain_community.graphs import OntotextGraphDBGraph
|
||||
|
||||
non_existing_file = os.path.join("non", "existing", "path", "to", "file.ttl")
|
||||
with self.assertRaises(FileNotFoundError) as e:
|
||||
OntotextGraphDBGraph._load_ontology_schema_from_file(non_existing_file)
|
||||
self.assertEqual(f"File {non_existing_file} does not exist.", str(e.exception))
|
||||
|
||||
@pytest.mark.requires("rdflib")
|
||||
def test_local_file_no_access(self) -> None:
|
||||
from langchain_community.graphs import OntotextGraphDBGraph
|
||||
|
||||
with tempfile.NamedTemporaryFile() as tmp_file:
|
||||
tmp_file_name = tmp_file.name
|
||||
|
||||
# Set file permissions to write and execute only
|
||||
os.chmod(tmp_file_name, 0o300)
|
||||
|
||||
with self.assertRaises(PermissionError) as e:
|
||||
OntotextGraphDBGraph._load_ontology_schema_from_file(tmp_file_name)
|
||||
|
||||
self.assertEqual(
|
||||
f"Read permission for {tmp_file_name} is restricted", str(e.exception)
|
||||
)
|
||||
|
||||
@pytest.mark.requires("rdflib")
|
||||
def test_local_file_bad_syntax(self) -> None:
|
||||
from langchain_community.graphs import OntotextGraphDBGraph
|
||||
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
tmp_file_path = os.path.join(tempdir, "starwars-ontology.trig")
|
||||
with open(tmp_file_path, "w") as tmp_file:
|
||||
tmp_file.write("invalid trig")
|
||||
|
||||
with self.assertRaises(ValueError) as e:
|
||||
OntotextGraphDBGraph._load_ontology_schema_from_file(tmp_file_path)
|
||||
self.assertEqual(
|
||||
f"('Invalid file format for {tmp_file_path} : '"
|
||||
", BadSyntax('', 0, 'invalid trig', 0, "
|
||||
"'expected directive or statement'))",
|
||||
str(e.exception),
|
||||
)
|
||||
|
||||
@pytest.mark.requires("rdflib")
|
||||
def test_both_query_and_local_file_provided(self) -> None:
|
||||
from langchain_community.graphs import OntotextGraphDBGraph
|
||||
|
||||
with self.assertRaises(ValueError) as e:
|
||||
OntotextGraphDBGraph(
|
||||
query_endpoint="http://localhost:7200/repositories/non-existing-repository",
|
||||
query_ontology="CONSTRUCT {?s ?p ?o}"
|
||||
"FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}",
|
||||
local_file="starwars-ontology-wrong.trig",
|
||||
)
|
||||
self.assertEqual(
|
||||
"Both file and query provided. Only one is allowed.", str(e.exception)
|
||||
)
|
||||
|
||||
@pytest.mark.requires("rdflib")
|
||||
def test_nor_query_nor_local_file_provided(self) -> None:
|
||||
from langchain_community.graphs import OntotextGraphDBGraph
|
||||
|
||||
with self.assertRaises(ValueError) as e:
|
||||
OntotextGraphDBGraph(
|
||||
query_endpoint="http://localhost:7200/repositories/non-existing-repository",
|
||||
)
|
||||
self.assertEqual(
|
||||
"Neither file nor query provided. One is required.", str(e.exception)
|
||||
)
|
Reference in New Issue
Block a user