feat: add document structure into GraphRAG (#2033)

Co-authored-by: Appointat <kuda.czk@antgroup.com>
Co-authored-by: tpoisonooo <khj.application@aliyun.com>
Co-authored-by: vritser <vritser@163.com>
This commit is contained in:
lipengfei
2024-10-18 22:03:08 +08:00
committed by GitHub
parent 811ce63493
commit 88e3d12bd3
29 changed files with 1909 additions and 935 deletions

View File

@@ -1,7 +1,7 @@
"""TuGraph Connector."""
import json
from typing import Dict, Generator, List, cast
from typing import Dict, Generator, List, Tuple, cast
from .base import BaseConnector
@@ -21,8 +21,7 @@ class TuGraphConnector(BaseConnector):
self._session = None
def create_graph(self, graph_name: str) -> None:
"""Create a new graph."""
# run the query to get vertex labels
"""Create a new graph in the database if it doesn't already exist."""
try:
with self._driver.session(database="default") as session:
graph_list = session.run("CALL dbms.graph.listGraphs()").data()
@@ -32,10 +31,10 @@ class TuGraphConnector(BaseConnector):
f"CALL dbms.graph.createGraph('{graph_name}', '', 2048)"
)
except Exception as e:
raise Exception(f"Failed to create graph '{graph_name}': {str(e)}")
raise Exception(f"Failed to create graph '{graph_name}': {str(e)}") from e
def delete_graph(self, graph_name: str) -> None:
"""Delete a graph."""
"""Delete a graph in the database if it exists."""
with self._driver.session(database="default") as session:
graph_list = session.run("CALL dbms.graph.listGraphs()").data()
exists = any(item["graph_name"] == graph_name for item in graph_list)
@@ -61,17 +60,20 @@ class TuGraphConnector(BaseConnector):
"`pip install neo4j`"
) from err
def get_table_names(self) -> Dict[str, List[str]]:
def get_table_names(self) -> Tuple[List[str], List[str]]:
"""Get all table names from the TuGraph by Neo4j driver."""
# run the query to get vertex labels
with self._driver.session(database=self._graph) as session:
v_result = session.run("CALL db.vertexLabels()").data()
v_data = [table_name["label"] for table_name in v_result]
# Run the query to get vertex labels
raw_vertex_labels: Dict[str, str] = session.run(
"CALL db.vertexLabels()"
).data()
vertex_labels = [table_name["label"] for table_name in raw_vertex_labels]
# run the query to get edge labels
e_result = session.run("CALL db.edgeLabels()").data()
e_data = [table_name["label"] for table_name in e_result]
return {"vertex_tables": v_data, "edge_tables": e_data}
# Run the query to get edge labels
raw_edge_labels: Dict[str, str] = session.run("CALL db.edgeLabels()").data()
edge_labels = [table_name["label"] for table_name in raw_edge_labels]
return vertex_labels, edge_labels
def get_grants(self):
"""Get grants."""
@@ -100,7 +102,7 @@ class TuGraphConnector(BaseConnector):
result = session.run(query)
return list(result)
except Exception as e:
raise Exception(f"Query execution failed: {e}")
raise Exception(f"Query execution failed: {e}\nQuery: {query}") from e
def run_stream(self, query: str) -> Generator:
"""Run GQL."""
@@ -109,11 +111,15 @@ class TuGraphConnector(BaseConnector):
yield from result
def get_columns(self, table_name: str, table_type: str = "vertex") -> List[Dict]:
"""Get fields about specified graph.
"""Retrieve the column for a specified vertex or edge table in the graph db.
This function queries the schema of a given table (vertex or edge) and returns
detailed information about its columns (properties).
Args:
table_name (str): table name (graph name)
table_type (str): table type (vertex or edge)
Returns:
columns: List[Dict], which contains name: str, type: str,
default_expression: str, is_in_primary_key: bool, comment: str
@@ -146,8 +152,8 @@ class TuGraphConnector(BaseConnector):
"""Get table indexes about specified table.
Args:
table_name:(str) table name
table_type:(str'vertex' | 'edge'
table_name (str): table name
table_type (str): 'vertex' | 'edge'
Returns:
List[Dict]:eg:[{'name': 'idx_key', 'column_names': ['id']}]
"""