langchain[minor], community[minor]: Implement Ontotext GraphDB QA Chain (#16019)

- **Description:** Implement Ontotext GraphDB QA Chain
  - **Issue:** N/A
  - **Dependencies:** N/A
  - **Twitter handle:** @OntotextGraphDB
This commit is contained in:
Neli Hateva
2024-01-29 22:25:53 +02:00
committed by GitHub
parent a08f9a7ff9
commit c95facc293
31 changed files with 2170 additions and 14 deletions

View File

@@ -0,0 +1,6 @@
FROM ontotext/graphdb:10.5.1
RUN mkdir -p /opt/graphdb/dist/data/repositories/langchain
COPY config.ttl /opt/graphdb/dist/data/repositories/langchain/
COPY starwars-data.trig /
COPY graphdb_create.sh /run.sh
ENTRYPOINT bash /run.sh

View File

@@ -0,0 +1,46 @@
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
@prefix rep: <http://www.openrdf.org/config/repository#>.
@prefix sr: <http://www.openrdf.org/config/repository/sail#>.
@prefix sail: <http://www.openrdf.org/config/sail#>.
@prefix graphdb: <http://www.ontotext.com/config/graphdb#>.
[] a rep:Repository ;
rep:repositoryID "langchain" ;
rdfs:label "" ;
rep:repositoryImpl [
rep:repositoryType "graphdb:SailRepository" ;
sr:sailImpl [
sail:sailType "graphdb:Sail" ;
graphdb:read-only "false" ;
# Inference and Validation
graphdb:ruleset "empty" ;
graphdb:disable-sameAs "true" ;
graphdb:check-for-inconsistencies "false" ;
# Indexing
graphdb:entity-id-size "32" ;
graphdb:enable-context-index "false" ;
graphdb:enablePredicateList "true" ;
graphdb:enable-fts-index "false" ;
graphdb:fts-indexes ("default" "iri") ;
graphdb:fts-string-literals-index "default" ;
graphdb:fts-iris-index "none" ;
# Queries and Updates
graphdb:query-timeout "0" ;
graphdb:throw-QueryEvaluationException-on-timeout "false" ;
graphdb:query-limit-results "0" ;
# Settable in the file but otherwise hidden in the UI and in the RDF4J console
graphdb:base-URL "http://example.org/owlim#" ;
graphdb:defaultNS "" ;
graphdb:imports "" ;
graphdb:repository-type "file-repository" ;
graphdb:storage-folder "storage" ;
graphdb:entity-index-size "10000000" ;
graphdb:in-memory-literal-properties "true" ;
graphdb:enable-literal-index "true" ;
]
].

View File

@@ -0,0 +1,9 @@
version: '3.7'
services:
graphdb:
image: graphdb
container_name: graphdb
ports:
- "7200:7200"

View File

@@ -0,0 +1,33 @@
#! /bin/bash
REPOSITORY_ID="langchain"
GRAPHDB_URI="http://localhost:7200/"
echo -e "\nUsing GraphDB: ${GRAPHDB_URI}"
function startGraphDB {
echo -e "\nStarting GraphDB..."
exec /opt/graphdb/dist/bin/graphdb
}
function waitGraphDBStart {
echo -e "\nWaiting GraphDB to start..."
for _ in $(seq 1 5); do
CHECK_RES=$(curl --silent --write-out '%{http_code}' --output /dev/null ${GRAPHDB_URI}/rest/repositories)
if [ "${CHECK_RES}" = '200' ]; then
echo -e "\nUp and running"
break
fi
sleep 30s
echo "CHECK_RES: ${CHECK_RES}"
done
}
function loadData {
echo -e "\nImporting starwars-data.trig"
curl -X POST -H "Content-Type: application/x-trig" -T /starwars-data.trig ${GRAPHDB_URI}/repositories/${REPOSITORY_ID}/statements
}
startGraphDB &
waitGraphDBStart
loadData
wait

View File

@@ -0,0 +1,5 @@
set -ex
docker compose down -v --remove-orphans
docker build --tag graphdb .
docker compose up -d graphdb

View File

@@ -0,0 +1,43 @@
@base <https://swapi.co/resource/>.
@prefix voc: <https://swapi.co/vocabulary/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
{
<besalisk/71>
a voc:Besalisk , voc:Character ;
rdfs:label "Dexter Jettster" ;
voc:eyeColor "yellow" ;
voc:gender "male" ;
voc:height 198.0 ;
voc:mass 102.0 ;
voc:skinColor "brown" .
}
<https://swapi.co/ontology/> {
voc:Character a owl:Class .
voc:Species a owl:Class .
voc:Besalisk a voc:Species;
rdfs:label "Besalisk";
voc:averageHeight 178.0;
voc:averageLifespan "75";
voc:character <https://swapi.co/resource/besalisk/71>;
voc:language "besalisk";
voc:skinColor "brown";
voc:eyeColor "yellow" .
voc:averageHeight a owl:DatatypeProperty .
voc:averageLifespan a owl:DatatypeProperty .
voc:character a owl:ObjectProperty .
voc:language a owl:DatatypeProperty .
voc:skinColor a owl:DatatypeProperty .
voc:eyeColor a owl:DatatypeProperty .
voc:gender a owl:DatatypeProperty .
voc:height a owl:DatatypeProperty .
voc:mass a owl:DatatypeProperty .
}

View File

@@ -0,0 +1,181 @@
from pathlib import Path
import pytest
from langchain_community.graphs import OntotextGraphDBGraph
"""
cd libs/community/tests/integration_tests/graphs/docker-compose-ontotext-graphdb
./start.sh
"""
def test_query() -> None:
graph = OntotextGraphDBGraph(
query_endpoint="http://localhost:7200/repositories/langchain",
query_ontology="CONSTRUCT {?s ?p ?o}"
"FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}",
)
query_results = graph.query(
"PREFIX voc: <https://swapi.co/vocabulary/> "
"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
"SELECT ?eyeColor "
"WHERE {"
' ?besalisk rdfs:label "Dexter Jettster" ; '
" voc:eyeColor ?eyeColor ."
"}"
)
assert len(query_results) == 1
assert len(query_results[0]) == 1
assert str(query_results[0][0]) == "yellow"
def test_get_schema_with_query() -> None:
graph = OntotextGraphDBGraph(
query_endpoint="http://localhost:7200/repositories/langchain",
query_ontology="CONSTRUCT {?s ?p ?o}"
"FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}",
)
from rdflib import Graph
assert len(Graph().parse(data=graph.get_schema, format="turtle")) == 19
@pytest.mark.parametrize(
"rdf_format, file_extension",
[
("json-ld", "json"),
("json-ld", "jsonld"),
("json-ld", "json-ld"),
("xml", "rdf"),
("xml", "xml"),
("xml", "owl"),
("pretty-xml", "xml"),
("n3", "n3"),
("turtle", "ttl"),
("nt", "nt"),
("trig", "trig"),
("nquads", "nq"),
("nquads", "nquads"),
("trix", "trix"),
],
)
def test_get_schema_from_file(
tmp_path: Path, rdf_format: str, file_extension: str
) -> None:
expected_number_of_ontology_statements = 19
graph = OntotextGraphDBGraph(
query_endpoint="http://localhost:7200/repositories/langchain",
query_ontology="CONSTRUCT {?s ?p ?o}"
"FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}",
)
from rdflib import ConjunctiveGraph, Graph
assert (
len(Graph().parse(data=graph.get_schema, format="turtle"))
== expected_number_of_ontology_statements
)
# serialize the ontology schema loaded with the query in a local file
# in various rdf formats and check that this results
# in the same number of statements
conjunctive_graph = ConjunctiveGraph()
ontology_context = conjunctive_graph.get_context("https://swapi.co/ontology/")
ontology_context.parse(data=graph.get_schema, format="turtle")
assert len(ontology_context) == expected_number_of_ontology_statements
assert len(conjunctive_graph) == expected_number_of_ontology_statements
local_file = tmp_path / ("starwars-ontology." + file_extension)
conjunctive_graph.serialize(local_file, format=rdf_format)
graph = OntotextGraphDBGraph(
query_endpoint="http://localhost:7200/repositories/langchain",
local_file=str(local_file),
)
assert (
len(Graph().parse(data=graph.get_schema, format="turtle"))
== expected_number_of_ontology_statements
)
@pytest.mark.parametrize(
"rdf_format", ["json-ld", "xml", "n3", "turtle", "nt", "trig", "nquads", "trix"]
)
def test_get_schema_from_file_with_explicit_rdf_format(
tmp_path: Path, rdf_format: str
) -> None:
expected_number_of_ontology_statements = 19
graph = OntotextGraphDBGraph(
query_endpoint="http://localhost:7200/repositories/langchain",
query_ontology="CONSTRUCT {?s ?p ?o}"
"FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}",
)
from rdflib import ConjunctiveGraph, Graph
assert (
len(Graph().parse(data=graph.get_schema, format="turtle"))
== expected_number_of_ontology_statements
)
# serialize the ontology schema loaded with the query in a local file
# in various rdf formats and check that this results
# in the same number of statements
conjunctive_graph = ConjunctiveGraph()
ontology_context = conjunctive_graph.get_context("https://swapi.co/ontology/")
ontology_context.parse(data=graph.get_schema, format="turtle")
assert len(ontology_context) == expected_number_of_ontology_statements
assert len(conjunctive_graph) == expected_number_of_ontology_statements
local_file = tmp_path / "starwars-ontology.txt"
conjunctive_graph.serialize(local_file, format=rdf_format)
graph = OntotextGraphDBGraph(
query_endpoint="http://localhost:7200/repositories/langchain",
local_file=str(local_file),
local_file_format=rdf_format,
)
assert (
len(Graph().parse(data=graph.get_schema, format="turtle"))
== expected_number_of_ontology_statements
)
def test_get_schema_from_file_with_wrong_extension(tmp_path: Path) -> None:
expected_number_of_ontology_statements = 19
graph = OntotextGraphDBGraph(
query_endpoint="http://localhost:7200/repositories/langchain",
query_ontology="CONSTRUCT {?s ?p ?o}"
"FROM <https://swapi.co/ontology/> WHERE {?s ?p ?o}",
)
from rdflib import ConjunctiveGraph, Graph
assert (
len(Graph().parse(data=graph.get_schema, format="turtle"))
== expected_number_of_ontology_statements
)
conjunctive_graph = ConjunctiveGraph()
ontology_context = conjunctive_graph.get_context("https://swapi.co/ontology/")
ontology_context.parse(data=graph.get_schema, format="turtle")
assert len(ontology_context) == expected_number_of_ontology_statements
assert len(conjunctive_graph) == expected_number_of_ontology_statements
local_file = tmp_path / "starwars-ontology.trig"
conjunctive_graph.serialize(local_file, format="nquads")
with pytest.raises(ValueError):
OntotextGraphDBGraph(
query_endpoint="http://localhost:7200/repositories/langchain",
local_file=str(local_file),
)