feat(GraphRAG): enhance GraphRAG by graph community summary (#1801)

Co-authored-by: Florian <fanzhidongyzby@163.com>
Co-authored-by: KingSkyLi <15566300566@163.com>
Co-authored-by: aries_ckt <916701291@qq.com>
Co-authored-by: Fangyin Cheng <staneyffer@gmail.com>
Co-authored-by: yvonneyx <zhuyuxin0627@gmail.com>
This commit is contained in:
M1n9X
2024-08-30 21:59:44 +08:00
committed by GitHub
parent 471689ba20
commit 759f7d99cc
59 changed files with 29316 additions and 411 deletions

View File

@@ -40,3 +40,15 @@ def test_get_indexes(connector):
# Get the index information of the vertex table named 'person'.
indexes = connector.get_indexes("person", "vertex")
assert len(indexes) > 0
def test_run_without_stream(connector):
query = "MATCH (n) RETURN n limit 10"
result = connector.run(query)
assert len(result) == 10
def test_run_with_stream(connector):
query = "MATCH (n) RETURN n limit 10"
result = list(connector.run_stream(query))
assert len(result) == 10

View File

@@ -23,13 +23,13 @@ def test_graph_store(graph_store):
graph_store.insert_triplet("E", "8", "F")
subgraph = graph_store.explore(["A"])
print(f"\n{subgraph.graphviz()}")
print(f"\n{subgraph.format()}")
assert subgraph.edge_count == 9
graph_store.delete_triplet("A", "0", "A")
graph_store.delete_triplet("B", "4", "D")
subgraph = graph_store.explore(["A"])
print(f"\n{subgraph.graphviz()}")
print(f"\n{subgraph.format()}")
assert subgraph.edge_count == 7
triplets = graph_store.get_triplets("B")
@@ -38,4 +38,4 @@ def test_graph_store(graph_store):
schema = graph_store.get_schema()
print(f"\nSchema: {schema}")
assert len(schema) == 138
assert len(schema) == 86

View File

@@ -2,17 +2,12 @@
import pytest
from dbgpt.storage.graph_store.tugraph_store import TuGraphStore
class TuGraphStoreConfig:
def __init__(self, name):
self.name = name
from dbgpt.storage.graph_store.tugraph_store import TuGraphStore, TuGraphStoreConfig
@pytest.fixture(scope="module")
def store():
config = TuGraphStoreConfig(name="TestGraph")
config = TuGraphStoreConfig(name="TestGraph", summary_enabled=False)
store = TuGraphStore(config=config)
yield store
store.conn.close()
@@ -29,7 +24,7 @@ def test_insert_and_get_triplets(store):
store.insert_triplet("F", "7", "E")
store.insert_triplet("E", "8", "F")
triplets = store.get_triplets("A")
assert len(triplets) == 3
assert len(triplets) == 2
triplets = store.get_triplets("B")
assert len(triplets) == 3
triplets = store.get_triplets("C")
@@ -47,7 +42,7 @@ def test_query(store):
result = store.query(query)
v_c = result.vertex_count
e_c = result.edge_count
assert v_c == 2 and e_c == 3
assert v_c == 3 and e_c == 3
def test_explore(store):
@@ -55,13 +50,13 @@ def test_explore(store):
result = store.explore(subs, depth=2, fan=None, limit=10)
v_c = result.vertex_count
e_c = result.edge_count
assert v_c == 2 and e_c == 3
assert v_c == 5 and e_c == 5
# def test_delete_triplet(store):
# subj = "A"
# rel = "0"
# obj = "B"
# store.delete_triplet(subj, rel, obj)
# triplets = store.get_triplets(subj)
# assert len(triplets) == 0
def test_delete_triplet(store):
subj = "A"
rel = "0"
obj = "B"
store.delete_triplet(subj, rel, obj)
triplets = store.get_triplets(subj)
assert len(triplets) == 0

View File

@@ -0,0 +1,58 @@
import pytest
from dbgpt.storage.graph_store.tugraph_store import TuGraphStore, TuGraphStoreConfig
from dbgpt.storage.graph_store.graph import MemoryGraph, Edge, Vertex
@pytest.fixture(scope="module")
def store():
config = TuGraphStoreConfig(name="TestSummaryGraph", summary_enabled=True)
store_instance = TuGraphStore(config=config)
yield store_instance
store_instance.conn.close()
def test_insert_graph(store):
graph = MemoryGraph()
vertex_list = [
Vertex("A", "A", description="Vertex A", _document_id="Test doc"),
Vertex("B", "B", description="Vertex B", _document_id="Test doc"),
Vertex("C", "C", description="Vertex C", _document_id="Test doc"),
Vertex("D", "D", description="Vertex D", _document_id="Test doc"),
Vertex("E", "E", description="Vertex E", _document_id="Test doc"),
Vertex("F", "F", description="Vertex F", _document_id="Test doc"),
Vertex("G", "G", description="Vertex G", _document_id="Test doc"),
]
edge_list = [
Edge("A", "B", name="A-B", description="description of edge"),
Edge("B", "C", name="B-C", description="description of edge"),
Edge("C", "D", name="C-D", description="description of edge"),
Edge("D", "E", name="D-E", description="description of edge"),
Edge("E", "F", name="E-F", description="description of edge"),
Edge("F", "G", name="F-G", description="description of edge"),
]
for vertex in vertex_list:
graph.upsert_vertex(vertex)
for edge in edge_list:
graph.append_edge(edge)
store.insert_graph(graph)
def test_leiden_query(store):
query = "CALL db.plugin.callPlugin('CPP','leiden','{\"leiden_val\":\"_community_id\"}',60.00,false)"
result = store.query(query)
assert result.vertex_count == 1
def test_query_node_and_edge(store):
query = 'MATCH (n)-[r]->(m) WHERE n._community_id = "0" RETURN n,r,m'
result = store.query(query)
assert result.vertex_count == 7 and result.edge_count == 6
def test_stream_query_path(store):
query = 'MATCH p=(n)-[r:relation*2]->(m) WHERE n._community_id = "0" RETURN p'
result = store.query(query)
for v in result.vertices():
print(v.get_prop("_community_id"))
assert result.vertex_count == 7 and result.edge_count == 6