mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-27 04:24:16 +00:00
✨ feat(GraphRAG): enhance GraphRAG by graph community summary (#1801)
Co-authored-by: Florian <fanzhidongyzby@163.com> Co-authored-by: KingSkyLi <15566300566@163.com> Co-authored-by: aries_ckt <916701291@qq.com> Co-authored-by: Fangyin Cheng <staneyffer@gmail.com> Co-authored-by: yvonneyx <zhuyuxin0627@gmail.com>
This commit is contained in:
@@ -36,8 +36,9 @@ class BuiltinKnowledgeGraph(KnowledgeGraphBase):
|
||||
|
||||
def __init__(self, config: BuiltinKnowledgeGraphConfig):
|
||||
"""Create builtin knowledge graph instance."""
|
||||
self._config = config
|
||||
super().__init__()
|
||||
self._config = config
|
||||
|
||||
self._llm_client = config.llm_client
|
||||
if not self._llm_client:
|
||||
raise ValueError("No llm client provided.")
|
||||
@@ -45,17 +46,19 @@ class BuiltinKnowledgeGraph(KnowledgeGraphBase):
|
||||
self._model_name = config.model_name
|
||||
self._triplet_extractor = TripletExtractor(self._llm_client, self._model_name)
|
||||
self._keyword_extractor = KeywordExtractor(self._llm_client, self._model_name)
|
||||
self._graph_store_type = (
|
||||
os.getenv("GRAPH_STORE_TYPE", "TuGraph") or config.graph_store_type
|
||||
)
|
||||
self._graph_store = self.__init_graph_store(config)
|
||||
|
||||
def __init_graph_store(self, config) -> GraphStoreBase:
|
||||
def configure(cfg: GraphStoreConfig):
|
||||
cfg.name = self._config.name
|
||||
cfg.embedding_fn = self._config.embedding_fn
|
||||
cfg.name = config.name
|
||||
cfg.embedding_fn = config.embedding_fn
|
||||
|
||||
self._graph_store: GraphStoreBase = GraphStoreFactory.create(
|
||||
self._graph_store_type, configure
|
||||
)
|
||||
graph_store_type = os.getenv("GRAPH_STORE_TYPE") or config.graph_store_type
|
||||
return GraphStoreFactory.create(graph_store_type, configure)
|
||||
|
||||
def get_config(self) -> BuiltinKnowledgeGraphConfig:
|
||||
"""Get the knowledge graph config."""
|
||||
return self._config
|
||||
|
||||
def load_document(self, chunks: List[Chunk]) -> List[str]:
|
||||
"""Extract and persist triplets to graph store."""
|
||||
@@ -113,35 +116,59 @@ class BuiltinKnowledgeGraph(KnowledgeGraphBase):
|
||||
|
||||
# extract keywords and explore graph store
|
||||
keywords = await self._keyword_extractor.extract(text)
|
||||
subgraph = self._graph_store.explore(keywords, limit=topk)
|
||||
subgraph = self._graph_store.explore(keywords, limit=topk).format()
|
||||
logger.info(f"Search subgraph from {len(keywords)} keywords")
|
||||
|
||||
if not subgraph:
|
||||
return []
|
||||
|
||||
content = (
|
||||
"The following vertices and edges data after [Subgraph Data] "
|
||||
"are retrieved from the knowledge graph based on the keywords:\n"
|
||||
f"Keywords:\n{','.join(keywords)}\n"
|
||||
"The following entities and relationships provided after "
|
||||
"[Subgraph] are retrieved from the knowledge graph "
|
||||
"based on the keywords:\n"
|
||||
f"\"{','.join(keywords)}\".\n"
|
||||
"---------------------\n"
|
||||
"You can refer to the sample vertices and edges to understand "
|
||||
"the real knowledge graph data provided by [Subgraph Data].\n"
|
||||
"Sample vertices:\n"
|
||||
"The following examples after [Entities] and [Relationships] that "
|
||||
"can help you understand the data format of the knowledge graph, "
|
||||
"but do not use them in the answer.\n"
|
||||
"[Entities]:\n"
|
||||
"(alice)\n"
|
||||
"(bob:{age:28})\n"
|
||||
'(carry:{age:18;role:"teacher"})\n\n'
|
||||
"Sample edges:\n"
|
||||
"[Relationships]:\n"
|
||||
"(alice)-[reward]->(alice)\n"
|
||||
'(alice)-[notify:{method:"email"}]->'
|
||||
'(carry:{age:18;role:"teacher"})\n'
|
||||
'(bob:{age:28})-[teach:{course:"math";hour:180}]->(alice)\n'
|
||||
"---------------------\n"
|
||||
f"Subgraph Data:\n{subgraph.format()}\n"
|
||||
f"[Subgraph]:\n{subgraph}\n"
|
||||
)
|
||||
return [Chunk(content=content, metadata=subgraph.schema())]
|
||||
return [Chunk(content=content)]
|
||||
|
||||
def query_graph(self, limit: Optional[int] = None) -> Graph:
|
||||
"""Query graph."""
|
||||
return self._graph_store.get_full_graph(limit)
|
||||
|
||||
def truncate(self) -> List[str]:
|
||||
"""Truncate knowledge graph."""
|
||||
logger.info(f"Truncate graph {self._config.name}")
|
||||
self._graph_store.truncate()
|
||||
|
||||
logger.info("Truncate keyword extractor")
|
||||
self._keyword_extractor.truncate()
|
||||
|
||||
logger.info("Truncate triplet extractor")
|
||||
self._triplet_extractor.truncate()
|
||||
|
||||
return [self._config.name]
|
||||
|
||||
def delete_vector_name(self, index_name: str):
|
||||
"""Delete vector name."""
|
||||
logger.info(f"Remove graph index {index_name}")
|
||||
logger.info(f"Drop graph {index_name}")
|
||||
self._graph_store.drop()
|
||||
|
||||
logger.info("Drop keyword extractor")
|
||||
self._keyword_extractor.drop()
|
||||
|
||||
logger.info("Drop triplet extractor")
|
||||
self._triplet_extractor.drop()
|
||||
|
Reference in New Issue
Block a user