feat: add document structure into GraphRAG (#2033)

Co-authored-by: Appointat <kuda.czk@antgroup.com>
Co-authored-by: tpoisonooo <khj.application@aliyun.com>
Co-authored-by: vritser <vritser@163.com>
This commit is contained in:
lipengfei
2024-10-18 22:03:08 +08:00
committed by GitHub
parent 811ce63493
commit 88e3d12bd3
29 changed files with 1909 additions and 935 deletions

View File

@@ -1,4 +1,5 @@
import pytest
from dbgpt.datasource.conn_tugraph import TuGraphConnector
# Set database connection parameters.
@@ -21,10 +22,10 @@ def connector():
def test_get_table_names(connector):
"""Test retrieving table names from the graph database."""
table_names = connector.get_table_names()
vertex_tables, edge_tables = connector.get_table_names()
# Verify the quantity of vertex and edge tables.
assert len(table_names["vertex_tables"]) == 5
assert len(table_names["edge_tables"]) == 8
assert len(vertex_tables) == 5
assert len(edge_tables) == 8
def test_get_columns(connector):

View File

@@ -4,6 +4,9 @@ from dbgpt.storage.graph_store.memgraph_store import (
MemoryGraphStore,
MemoryGraphStoreConfig,
)
from dbgpt.storage.knowledge_graph.community.memgraph_store_adapter import (
MemGraphStoreAdapter,
)
@pytest.fixture
@@ -11,31 +14,37 @@ def graph_store():
yield MemoryGraphStore(MemoryGraphStoreConfig())
def test_graph_store(graph_store):
graph_store.insert_triplet("A", "0", "A")
graph_store.insert_triplet("A", "1", "A")
graph_store.insert_triplet("A", "2", "B")
graph_store.insert_triplet("B", "3", "C")
graph_store.insert_triplet("B", "4", "D")
graph_store.insert_triplet("C", "5", "D")
graph_store.insert_triplet("B", "6", "E")
graph_store.insert_triplet("F", "7", "E")
graph_store.insert_triplet("E", "8", "F")
@pytest.fixture
def graph_store_adapter(graph_store: MemoryGraphStore):
memgraph_store_adapter = MemGraphStoreAdapter(graph_store)
yield memgraph_store_adapter
subgraph = graph_store.explore(["A"])
def test_graph_store(graph_store_adapter: MemGraphStoreAdapter):
graph_store_adapter.insert_triplet("A", "0", "A")
graph_store_adapter.insert_triplet("A", "1", "A")
graph_store_adapter.insert_triplet("A", "2", "B")
graph_store_adapter.insert_triplet("B", "3", "C")
graph_store_adapter.insert_triplet("B", "4", "D")
graph_store_adapter.insert_triplet("C", "5", "D")
graph_store_adapter.insert_triplet("B", "6", "E")
graph_store_adapter.insert_triplet("F", "7", "E")
graph_store_adapter.insert_triplet("E", "8", "F")
subgraph = graph_store_adapter.explore(["A"])
print(f"\n{subgraph.format()}")
assert subgraph.edge_count == 9
graph_store.delete_triplet("A", "0", "A")
graph_store.delete_triplet("B", "4", "D")
subgraph = graph_store.explore(["A"])
graph_store_adapter.delete_triplet("A", "0", "A")
graph_store_adapter.delete_triplet("B", "4", "D")
subgraph = graph_store_adapter.explore(["A"])
print(f"\n{subgraph.format()}")
assert subgraph.edge_count == 7
triplets = graph_store.get_triplets("B")
triplets = graph_store_adapter.get_triplets("B")
print(f"\nTriplets of B: {triplets}")
assert len(triplets) == 2
schema = graph_store.get_schema()
schema = graph_store_adapter.get_schema()
print(f"\nSchema: {schema}")
assert len(schema) == 86

View File

@@ -1,43 +1,52 @@
# test_tugraph_store.py
# test_tugraph_tugraph_store_adapter.py
import pytest
from dbgpt.storage.graph_store.tugraph_store import TuGraphStore, TuGraphStoreConfig
from dbgpt.storage.knowledge_graph.community.tugraph_store_adapter import (
TuGraphStoreAdapter,
)
@pytest.fixture(scope="module")
def store():
config = TuGraphStoreConfig(name="TestGraph", summary_enabled=False)
config = TuGraphStoreConfig(name="TestGraph", enable_summary=False)
store = TuGraphStore(config=config)
yield store
store.conn.close()
def test_insert_and_get_triplets(store):
store.insert_triplet("A", "0", "A")
store.insert_triplet("A", "1", "A")
store.insert_triplet("A", "2", "B")
store.insert_triplet("B", "3", "C")
store.insert_triplet("B", "4", "D")
store.insert_triplet("C", "5", "D")
store.insert_triplet("B", "6", "E")
store.insert_triplet("F", "7", "E")
store.insert_triplet("E", "8", "F")
triplets = store.get_triplets("A")
@pytest.fixture(scope="module")
def tugraph_store_adapter(store: TuGraphStore):
tugraph_store_adapter = TuGraphStoreAdapter(store)
yield tugraph_store_adapter
def test_insert_and_get_triplets(tugraph_store_adapter: TuGraphStoreAdapter):
tugraph_store_adapter.insert_triplet("A", "0", "A")
tugraph_store_adapter.insert_triplet("A", "1", "A")
tugraph_store_adapter.insert_triplet("A", "2", "B")
tugraph_store_adapter.insert_triplet("B", "3", "C")
tugraph_store_adapter.insert_triplet("B", "4", "D")
tugraph_store_adapter.insert_triplet("C", "5", "D")
tugraph_store_adapter.insert_triplet("B", "6", "E")
tugraph_store_adapter.insert_triplet("F", "7", "E")
tugraph_store_adapter.insert_triplet("E", "8", "F")
triplets = tugraph_store_adapter.get_triplets("A")
assert len(triplets) == 2
triplets = store.get_triplets("B")
triplets = tugraph_store_adapter.get_triplets("B")
assert len(triplets) == 3
triplets = store.get_triplets("C")
triplets = tugraph_store_adapter.get_triplets("C")
assert len(triplets) == 1
triplets = store.get_triplets("D")
triplets = tugraph_store_adapter.get_triplets("D")
assert len(triplets) == 0
triplets = store.get_triplets("E")
triplets = tugraph_store_adapter.get_triplets("E")
assert len(triplets) == 1
triplets = store.get_triplets("F")
triplets = tugraph_store_adapter.get_triplets("F")
assert len(triplets) == 1
def test_query(store):
def test_query(store: TuGraphStore):
query = "MATCH (n)-[r]->(n1) return n,n1,r limit 3"
result = store.query(query)
v_c = result.vertex_count
@@ -45,18 +54,18 @@ def test_query(store):
assert v_c == 3 and e_c == 3
def test_explore(store):
def test_explore(tugraph_store_adapter: TuGraphStoreAdapter):
subs = ["A", "B"]
result = store.explore(subs, depth=2, fan=None, limit=10)
result = tugraph_store_adapter.explore(subs, depth=2, fan=None, limit=10)
v_c = result.vertex_count
e_c = result.edge_count
assert v_c == 5 and e_c == 5
def test_delete_triplet(store):
def test_delete_triplet(tugraph_store_adapter: TuGraphStoreAdapter):
subj = "A"
rel = "0"
obj = "B"
store.delete_triplet(subj, rel, obj)
triplets = store.get_triplets(subj)
tugraph_store_adapter.delete_triplet(subj, rel, obj)
triplets = tugraph_store_adapter.get_triplets(subj)
assert len(triplets) == 0

View File

@@ -1,18 +1,27 @@
import pytest
from dbgpt.storage.graph_store.graph import Edge, MemoryGraph, Vertex
from dbgpt.storage.graph_store.tugraph_store import TuGraphStore, TuGraphStoreConfig
from dbgpt.storage.graph_store.graph import MemoryGraph, Edge, Vertex
from dbgpt.storage.knowledge_graph.community.tugraph_store_adapter import (
TuGraphStoreAdapter,
)
@pytest.fixture(scope="module")
def store():
config = TuGraphStoreConfig(name="TestSummaryGraph", summary_enabled=True)
config = TuGraphStoreConfig(name="TestSummaryGraph", enable_summary=True)
store_instance = TuGraphStore(config=config)
yield store_instance
store_instance.conn.close()
def test_insert_graph(store):
@pytest.fixture(scope="module")
def graph_store_adapter(store: TuGraphStore):
tugraph_store_adapter = TuGraphStoreAdapter(store)
yield tugraph_store_adapter
def test_upsert_graph(tugraph_store_adapter: TuGraphStoreAdapter):
graph = MemoryGraph()
vertex_list = [
Vertex("A", "A", description="Vertex A", _document_id="Test doc"),
@@ -35,22 +44,22 @@ def test_insert_graph(store):
graph.upsert_vertex(vertex)
for edge in edge_list:
graph.append_edge(edge)
store.insert_graph(graph)
tugraph_store_adapter.upsert_graph(graph)
def test_leiden_query(store):
def test_leiden_query(store: TuGraphStore):
query = "CALL db.plugin.callPlugin('CPP','leiden','{\"leiden_val\":\"_community_id\"}',60.00,false)"
result = store.query(query)
assert result.vertex_count == 1
def test_query_node_and_edge(store):
def test_query_node_and_edge(store: TuGraphStore):
query = 'MATCH (n)-[r]->(m) WHERE n._community_id = "0" RETURN n,r,m'
result = store.query(query)
assert result.vertex_count == 7 and result.edge_count == 6
def test_stream_query_path(store):
def test_stream_query_path(store: TuGraphStore):
query = 'MATCH p=(n)-[r:relation*2]->(m) WHERE n._community_id = "0" RETURN p'
result = store.query(query)
for v in result.vertices():