Support text2gql search for GraphRAG (#2227)

Co-authored-by: aries_ckt <916701291@qq.com>
This commit is contained in:
SonglinLyu
2025-01-22 16:30:17 +08:00
committed by GitHub
parent 9336e80f1d
commit e0081e6b0a
23 changed files with 791 additions and 165 deletions

View File

@@ -160,19 +160,20 @@ EXECUTE_LOCAL_COMMANDS=False
#*******************************************************************# #*******************************************************************#
VECTOR_STORE_TYPE=Chroma VECTOR_STORE_TYPE=Chroma
GRAPH_STORE_TYPE=TuGraph GRAPH_STORE_TYPE=TuGraph
KNOWLEDGE_GRAPH_EXTRACT_SEARCH_TOP_SIZE=5 KNOWLEDGE_GRAPH_EXTRACT_SEARCH_TOP_SIZE=5 # the top size of knowledge graph search for triplets
KNOWLEDGE_GRAPH_EXTRACT_SEARCH_RECALL_SCORE=0.3 KNOWLEDGE_GRAPH_SIMILARITY_SEARCH_TOP_SIZE=5 # the top size of knowledge graph similarity search for triplets
KNOWLEDGE_GRAPH_SIMILARITY_SEARCH_TOP_SIZE=5 KNOWLEDGE_GRAPH_CHUNK_SEARCH_TOP_SIZE=5 # the top size of knowledge graph search for chunks
KNOWLEDGE_GRAPH_SIMILARITY_SEARCH_RECALL_SCORE=0.7 KNOWLEDGE_GRAPH_COMMUNITY_SEARCH_TOP_SIZE=20 # the top size of knowledge graph search for community
KNOWLEDGE_GRAPH_COMMUNITY_SEARCH_TOP_SIZE=20
KNOWLEDGE_GRAPH_COMMUNITY_SEARCH_RECALL_SCORE=0.0 KNOWLEDGE_GRAPH_EXTRACT_SEARCH_RECALL_SCORE=0.3 # the recall score of knowledge graph search for triplets
KNOWLEDGE_GRAPH_COMMUNITY_SEARCH_RECALL_SCORE=0.0 # the recall score of knowledge graph search for community
GRAPH_COMMUNITY_SUMMARY_ENABLED=True # enable the graph community summary GRAPH_COMMUNITY_SUMMARY_ENABLED=True # enable the graph community summary
TRIPLET_GRAPH_ENABLED=True # enable the graph search for triplets TRIPLET_GRAPH_ENABLED=True # enable the graph search for triplets
DOCUMENT_GRAPH_ENABLED=True # enable the graph search for documents and chunks DOCUMENT_GRAPH_ENABLED=True # enable the graph search for documents and chunks
SIMILARITY_SEARCH_ENABLED=True # enable the similarity search for entities and chunks SIMILARITY_SEARCH_ENABLED=True # enable the knowledge graph similarity search for entities and chunks
TEXT_SEARCH_ENABLED=False # enable the text search for entities and relations.
KNOWLEDGE_GRAPH_CHUNK_SEARCH_TOP_SIZE=5 # the top size of knowledge graph search for chunks
KNOWLEDGE_GRAPH_EXTRACTION_BATCH_SIZE=20 # the batch size of triplet extraction from the text KNOWLEDGE_GRAPH_EXTRACTION_BATCH_SIZE=20 # the batch size of triplet extraction from the text
COMMUNITY_SUMMARY_BATCH_SIZE=20 # the batch size of parallel community summary process COMMUNITY_SUMMARY_BATCH_SIZE=20 # the batch size of parallel community summary process
KNOWLEDGE_GRAPH_EMBEDDING_BATCH_SIZE=20 # the batch size of embedding from the text KNOWLEDGE_GRAPH_EMBEDDING_BATCH_SIZE=20 # the batch size of embedding from the text

View File

@@ -0,0 +1,10 @@
"""Agentic ntentTranslator class."""
import logging
from dbgpt.rag.transformer.base import TranslatorBase
logger = logging.getLogger(__name__)
class AgenticIntentTranslator(TranslatorBase):
"""Agentic ntentTranslator class."""

View File

@@ -0,0 +1,10 @@
"""AwelIntentTranslator class."""
import logging
from dbgpt.rag.transformer.base import TranslatorBase
logger = logging.getLogger(__name__)
class AwelIntentTranslator(TranslatorBase):
"""AwelIntentTranslator class."""

View File

@@ -2,7 +2,7 @@
import logging import logging
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import List, Optional from typing import Dict, List, Optional
from tenacity import retry, stop_after_attempt, wait_fixed from tenacity import retry, stop_after_attempt, wait_fixed
@@ -73,3 +73,7 @@ class ExtractorBase(TransformerBase, ABC):
class TranslatorBase(TransformerBase, ABC): class TranslatorBase(TransformerBase, ABC):
"""Translator base class.""" """Translator base class."""
@abstractmethod
async def translate(self, text: str) -> Dict:
"""Translate results from text."""

View File

@@ -54,6 +54,10 @@ class LLMExtractor(ExtractorBase, ABC):
self, text: str, history: str = None, limit: Optional[int] = None self, text: str, history: str = None, limit: Optional[int] = None
) -> List: ) -> List:
"""Inner extract by LLM.""" """Inner extract by LLM."""
# limit check
if limit and limit < 1:
ValueError("optional argument limit >= 1")
template = HumanPromptTemplate.from_template(self._prompt_template) template = HumanPromptTemplate.from_template(self._prompt_template)
messages = ( messages = (
@@ -80,8 +84,6 @@ class LLMExtractor(ExtractorBase, ABC):
logger.error(f"request llm failed ({code}) {reason}") logger.error(f"request llm failed ({code}) {reason}")
return [] return []
if limit and limit < 1:
ValueError("optional argument limit >= 1")
return self._parse_response(response.text, limit) return self._parse_response(response.text, limit)
def truncate(self): def truncate(self):

View File

@@ -0,0 +1,61 @@
"""LLMTranslator class."""
import logging
from abc import ABC, abstractmethod
from typing import Dict, List
from dbgpt.core import BaseMessage, LLMClient, ModelMessage, ModelRequest
from dbgpt.rag.transformer.base import TranslatorBase
logger = logging.getLogger(__name__)
class LLMTranslator(TranslatorBase, ABC):
"""LLMTranslator class."""
def __init__(self, llm_client: LLMClient, model_name: str, prompt_template: str):
"""Initialize the LLMExtractor."""
self._llm_client = llm_client
self._model_name = model_name
self._prompt_template = prompt_template
async def translate(self, text: str) -> Dict:
"""Translate by LLM."""
messages = self._format_messages(text)
return await self._translate(messages)
async def _translate(self, messages: List[BaseMessage]) -> Dict:
"""Inner translate by LLM."""
# use default model if needed
if not self._model_name:
models = await self._llm_client.models()
if not models:
raise Exception("No models available")
self._model_name = models[0].model
logger.info(f"Using model {self._model_name} to extract")
model_messages = ModelMessage.from_base_messages(messages)
request = ModelRequest(model=self._model_name, messages=model_messages)
response = await self._llm_client.generate(request=request)
if not response.success:
code = str(response.error_code)
reason = response.text
logger.error(f"request llm failed ({code}) {reason}")
return {}
return self._parse_response(response.text)
def truncate(self):
"""Do nothing by default."""
def drop(self):
"""Do nothing by default."""
@abstractmethod
def _format_messages(self, text: str, history: str = None) -> List[BaseMessage]:
"""Parse llm response."""
@abstractmethod
def _parse_response(self, text: str) -> Dict:
"""Parse llm response."""

View File

@@ -0,0 +1,108 @@
"""SimpleIntentTranslator class."""
import json
import logging
import re
from typing import Dict, List, Union
from dbgpt.core import BaseMessage, HumanPromptTemplate, LLMClient
from dbgpt.rag.transformer.llm_translator import LLMTranslator
INTENT_INTERPRET_PT = """
A question is provided below. Given the question, analyze and classify it into one of the following categories:
1. Single Entity Search: search for the detail of the given entity.
2. One Hop Entity Search: given one entity and one relation, search for all entities that have the relation with the given entity.
3. One Hop Relation Search: given two entities, serach for the relation between them.
4. Two Hop Entity Search: given one entity and one relation, break that relation into two consecutive relation, then search all entities that have the two hop relation with the given entity.
5. Freestyle Question: questions that are not in above four categories. Search all related entities and two-hop subgraphs centered on them.
After classfied the given question, rewrite the question in a graph query language style, return the category of the given question, the rewrited question in json format.
Also return entities and relations that might be used for query generation in json format. Here are some examples to guide your classification:
---------------------
Example:
Question: Introduce TuGraph.
Return:
{{"category": "Single Entity Search", rewritten_question": "Query the entity named TuGraph then return the entity.", entities": ["TuGraph"], "relations": []}}
Question: Who commits code to TuGraph.
Return:
{{"category": "One Hop Entity Search", "rewritten_question": "Query all one hop paths that has a entity named TuGraph and a relation named commit, then return them.", "entities": ["TuGraph"], "relations": ["commit"]}}
Question: What is the relation between Alex and TuGraph?
Return:
{{"category": "One Hop Relation Search", "rewritten_question": "Query all one hop paths between the entity named Alex and the entity named TuGraph, then return them.", "entities": ["Alex", "TuGraph"], "relations": []}}
Question: Who is the colleague of Bob?
Return:
{{"category": "Two Hop Entity Search", "rewritten_question": "Query all entities that have a two hop path between them and the entity named Bob, both entities should have a work for relation with the middle entity.", "entities": ["Bob"], "relations": ["work for"]}}
Question: Introduce TuGraph and DBGPT seperately.
Return:
{{"category": "Freestyle Question", "rewritten_question": "Query the entity named TuGraph and the entity named DBGPT, then return two-hop subgraphs centered on them.", "entities": ["TuGraph", "DBGPT"], "relations": []}}
---------------------
Text: {text}
Return:
""" # noqa: E501
logger = logging.getLogger(__name__)
class SimpleIntentTranslator(LLMTranslator):
"""SimpleIntentTranslator class."""
def __init__(self, llm_client: LLMClient, model_name: str):
"""Initialize the SimpleIntentTranslator."""
super().__init__(llm_client, model_name, INTENT_INTERPRET_PT)
def _format_messages(self, text: str, history: str = None) -> List[BaseMessage]:
# interprete intention with single prompt only.
template = HumanPromptTemplate.from_template(self._prompt_template)
messages: List[BaseMessage] = (
template.format_messages(text=text, history=history)
if history is not None
else template.format_messages(text=text)
)
return messages
def truncate(self):
"""Do nothing by default."""
def drop(self):
"""Do nothing by default."""
def _parse_response(self, text: str) -> Dict:
"""
Parse llm response.
The returned diction should contain the following content.
{
"category": "Type of the given question.",
"original_question: "The original question provided by user.",
"rewritten_question": "Rewritten question in graph query language style."
"entities": ["entities", "that", "might", "be", "used", "in", "query"],
"relations" ["relations", "that", "might", "be", "used", "in", "query"]
}
"""
code_block_pattern = re.compile(r"```json(.*?)```", re.S)
json_pattern = re.compile(r"{.*?}", re.S)
match_result = re.findall(code_block_pattern, text)
if match_result:
text = match_result[0]
match_result = re.findall(json_pattern, text)
if match_result:
text = match_result[0]
else:
text = ""
intention: Dict[str, Union[str, List[str]]] = {}
intention = json.loads(text)
if "category" not in intention:
intention["category"] = ""
if "original_question" not in intention:
intention["original_question"] = ""
if "rewritten_question" not in intention:
intention["rewritten_question"] = ""
if "entities" not in intention:
intention["entities"] = []
if "relations" not in intention:
intention["relations"] = []
return intention

View File

@@ -1,10 +0,0 @@
"""Text2Cypher class."""
import logging
from dbgpt.rag.transformer.base import TranslatorBase
logger = logging.getLogger(__name__)
class Text2Cypher(TranslatorBase):
"""Text2Cypher class."""

View File

@@ -1,10 +1,106 @@
"""Text2GQL class.""" """Text2GQL class."""
import json
import logging import logging
import re
from typing import Dict, List, Union
from dbgpt.rag.transformer.base import TranslatorBase from dbgpt.core import BaseMessage, HumanPromptTemplate, LLMClient
from dbgpt.rag.transformer.llm_translator import LLMTranslator
TEXT_TO_GQL_PT = """
A question written in graph query language style is provided below. The category of this question, entities and relations that might be used in the cypher query are also provided.
Given the question, translate the question into a cypher query that can be executed on the given knowledge graph. Make sure the syntax of the translated cypher query is correct.
To help query generation, the schema of the knowledge graph is:
{schema}
---------------------
Example:
Question: Query the entity named TuGraph then return the entity.
Category: Single Entity Search
entities: ["TuGraph"]
relations: []
Query:
Match (n) WHERE n.id="TuGraph" RETURN n
Question: Query all one hop paths between the entity named Alex and the entity named TuGraph, then return them.
Category: One Hop Entity Search
entities: ["Alex", "TuGraph"]
relations: []
Query:
MATCH p=(n)-[r]-(m) WHERE n.id="Alex" AND m.id="TuGraph" RETURN p
Question: Query all entities that have a two hop path between them and the entity named Bob, both entities should have a work for relation with the middle entity.
Category: Two Hop Entity Search
entities: ["Bob"]
relations: ["work for"]
Query:
MATCH p=(n)-[r1]-(m)-[r2]-(l) WHERE n.id="Bob" AND r1.id="work for" AND r2.id="work for" RETURN p
Question: Introduce TuGraph and DBGPT seperately.
Category: Freestyle Question
relations: []
Query:
MATCH p=(n)-[r:relation*2]-(m) WHERE n.id IN ["TuGraph", "DB-GPT"] RETURN p
---------------------
Question: {question}
Category: {category}
entities: {entities}
relations: {relations}
Query:
""" # noqa: E501
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class Text2GQL(TranslatorBase): class Text2GQL(LLMTranslator):
"""Text2GQL class.""" """Text2GQL class."""
def __init__(self, llm_client: LLMClient, model_name: str):
"""Initialize the Text2GQL."""
super().__init__(llm_client, model_name, TEXT_TO_GQL_PT)
def _format_messages(self, text: str, history: str = None) -> List[BaseMessage]:
# translate intention to gql with single prompt only.
intention: Dict[str, Union[str, List[str]]] = json.loads(text)
question = intention.get("rewritten_question", "")
category = intention.get("category", "")
entities = intention.get("entities", "")
relations = intention.get("relations", "")
schema = intention.get("schema", "")
template = HumanPromptTemplate.from_template(self._prompt_template)
messages = (
template.format_messages(
schema=schema,
question=question,
category=category,
entities=entities,
relations=relations,
history=history,
)
if history is not None
else template.format_messages(
schema=schema,
question=question,
category=category,
entities=entities,
relations=relations,
)
)
return messages
def _parse_response(self, text: str) -> Dict:
"""Parse llm response."""
translation: Dict[str, str] = {}
query = ""
code_block_pattern = re.compile(r"```cypher(.*?)```", re.S)
result = re.findall(code_block_pattern, text)
if result:
query = result[0]
else:
query = text
translation["query"] = query.strip()
return translation

View File

@@ -294,3 +294,11 @@ class CommunityMetastore(ABC):
@abstractmethod @abstractmethod
def drop(self): def drop(self):
"""Drop community metastore.""" """Drop community metastore."""
class GraphSyntaxValidator(ABC):
"""Community Syntax Validator."""
@abstractmethod
def validate(self, query: str) -> bool:
"""Validate query syntax."""

View File

@@ -890,21 +890,19 @@ class TuGraphStoreAdapter(GraphStoreAdapter):
def query(self, query: str, **kwargs) -> MemoryGraph: def query(self, query: str, **kwargs) -> MemoryGraph:
"""Execute a query on graph. """Execute a query on graph.
white_list: List[str] = kwargs.get("white_list", []), which contains the white white_list: List[str] = kwargs.get("white_list", []),
list of properties and filters the properties that are not in the white list. which contains the white list of properties that start with '_',
filters all the properties that start with '_' and are not in the white list.
white_list: List of properties to keep
- If []: Keep default properties (those not starting with '_'
and not in ['id', 'name'])
- If list of strings: Keep default properties
(those not starting with '_' and not in ['id', 'name'])
and properties in white_list
""" """
query_result = self.graph_store.conn.run(query=query) query_result = self.graph_store.conn.run(query=query)
white_list: List[str] = kwargs.get( white_list: List[str] = kwargs.get("white_list", [])
"white_list",
[
"id",
"name",
"description",
"_document_id",
"_chunk_id",
"_community_id",
],
)
vertices, edges = self._get_nodes_edges_from_queried_data( vertices, edges = self._get_nodes_edges_from_queried_data(
query_result, white_list query_result, white_list
) )
@@ -1016,7 +1014,7 @@ class TuGraphStoreAdapter(GraphStoreAdapter):
from neo4j import graph from neo4j import graph
def filter_properties( def filter_properties(
properties: dict[str, Any], white_list: Optional[List[str]] = None properties: dict[str, Any], white_list: List[str]
) -> Dict[str, Any]: ) -> Dict[str, Any]:
"""Filter the properties. """Filter the properties.
@@ -1028,23 +1026,18 @@ class TuGraphStoreAdapter(GraphStoreAdapter):
Args: Args:
properties: Dictionary of properties to filter properties: Dictionary of properties to filter
white_list: List of properties to keep white_list: List of properties to keep
- If None: Keep default properties (those not starting with '_' - If [""]: Keep default properties (those not starting with '_'
and not in ['id', 'name']) and not in ['id', 'name'])
- If [""]: Remove all properties (return empty dict)
- If list of strings: Keep only properties in white_list - If list of strings: Keep only properties in white_list
""" """
return ( return {
{}
if white_list == [""]
else {
key: value key: value
for key, value in properties.items() for key, value in properties.items()
if ( if (
(not key.startswith("_") and key not in ["id", "name"]) (not key.startswith("_") and key not in ["id", "name"])
or (white_list is not None and key in white_list) or (key in white_list)
) )
} }
)
# Parse the data to nodes and relationships # Parse the data to nodes and relationships
for record in data: for record in data:

View File

@@ -3,7 +3,7 @@
import logging import logging
import os import os
import uuid import uuid
from typing import List, Optional, Tuple, Union from typing import List, Optional, Tuple
from dbgpt._private.pydantic import ConfigDict, Field from dbgpt._private.pydantic import ConfigDict, Field
from dbgpt.core import Chunk, LLMClient from dbgpt.core import Chunk, LLMClient
@@ -14,6 +14,7 @@ from dbgpt.rag.transformer.graph_extractor import GraphExtractor
from dbgpt.rag.transformer.text_embedder import TextEmbedder from dbgpt.rag.transformer.text_embedder import TextEmbedder
from dbgpt.storage.knowledge_graph.base import ParagraphChunk from dbgpt.storage.knowledge_graph.base import ParagraphChunk
from dbgpt.storage.knowledge_graph.community.community_store import CommunityStore from dbgpt.storage.knowledge_graph.community.community_store import CommunityStore
from dbgpt.storage.knowledge_graph.graph_retriever.graph_retriever import GraphRetriever
from dbgpt.storage.knowledge_graph.knowledge_graph import ( from dbgpt.storage.knowledge_graph.knowledge_graph import (
GRAPH_PARAMETERS, GRAPH_PARAMETERS,
BuiltinKnowledgeGraph, BuiltinKnowledgeGraph,
@@ -205,6 +206,10 @@ class CommunitySummaryKnowledgeGraphConfig(BuiltinKnowledgeGraphConfig):
default=0.7, default=0.7,
description="Recall score of similarity search", description="Recall score of similarity search",
) )
enable_text_search: bool = Field(
default=False,
description="Enable text2gql search or not.",
)
@register_resource( @register_resource(
@@ -264,12 +269,6 @@ class CommunitySummaryKnowledgeGraph(BuiltinKnowledgeGraph):
if "TRIPLET_GRAPH_ENABLED" in os.environ if "TRIPLET_GRAPH_ENABLED" in os.environ
else config.triplet_graph_enabled else config.triplet_graph_enabled
) )
self._knowledge_graph_chunk_search_top_size = int(
os.getenv(
"KNOWLEDGE_GRAPH_CHUNK_SEARCH_TOP_SIZE",
config.knowledge_graph_chunk_search_top_size,
)
)
self._triplet_extraction_batch_size = int( self._triplet_extraction_batch_size = int(
os.getenv( os.getenv(
"KNOWLEDGE_GRAPH_EXTRACTION_BATCH_SIZE", "KNOWLEDGE_GRAPH_EXTRACTION_BATCH_SIZE",
@@ -288,18 +287,6 @@ class CommunitySummaryKnowledgeGraph(BuiltinKnowledgeGraph):
config.community_summary_batch_size, config.community_summary_batch_size,
) )
) )
self._similarity_search_topk = int(
os.getenv(
"KNOWLEDGE_GRAPH_SIMILARITY_SEARCH_TOP_SIZE",
config.similarity_search_topk,
)
)
self._similarity_search_score_threshold = float(
os.getenv(
"KNOWLEDGE_GRAPH_SIMILARITY_SEARCH_RECALL_SCORE",
config.similarity_search_score_threshold,
)
)
def extractor_configure(name: str, cfg: VectorStoreConfig): def extractor_configure(name: str, cfg: VectorStoreConfig):
cfg.name = name cfg.name = name
@@ -335,7 +322,7 @@ class CommunitySummaryKnowledgeGraph(BuiltinKnowledgeGraph):
cfg.score_threshold = self._community_score_threshold cfg.score_threshold = self._community_score_threshold
self._community_store = CommunityStore( self._community_store = CommunityStore(
self._graph_store_apdater, self._graph_store_adapter,
CommunitySummarizer(self._llm_client, self._model_name), CommunitySummarizer(self._llm_client, self._model_name),
VectorStoreFactory.create( VectorStoreFactory.create(
self._vector_store_type, self._vector_store_type,
@@ -344,6 +331,11 @@ class CommunitySummaryKnowledgeGraph(BuiltinKnowledgeGraph):
), ),
) )
self._graph_retriever = GraphRetriever(
config,
self._graph_store_adapter,
)
def get_config(self) -> BuiltinKnowledgeGraphConfig: def get_config(self) -> BuiltinKnowledgeGraphConfig:
"""Get the knowledge graph config.""" """Get the knowledge graph config."""
return self._config return self._config
@@ -351,7 +343,7 @@ class CommunitySummaryKnowledgeGraph(BuiltinKnowledgeGraph):
async def aload_document(self, chunks: List[Chunk]) -> List[str]: async def aload_document(self, chunks: List[Chunk]) -> List[str]:
"""Extract and persist graph from the document file.""" """Extract and persist graph from the document file."""
if not self.vector_name_exists(): if not self.vector_name_exists():
self._graph_store_apdater.create_graph(self.get_config().name) self._graph_store_adapter.create_graph(self.get_config().name)
await self._aload_document_graph(chunks) await self._aload_document_graph(chunks)
await self._aload_triplet_graph(chunks) await self._aload_triplet_graph(chunks)
await self._community_store.build_communities( await self._community_store.build_communities(
@@ -386,20 +378,20 @@ class CommunitySummaryKnowledgeGraph(BuiltinKnowledgeGraph):
chunk.embedding = embeddings[idx] chunk.embedding = embeddings[idx]
# upsert the document and chunks vertices # upsert the document and chunks vertices
self._graph_store_apdater.upsert_documents(iter([documment_chunk])) self._graph_store_adapter.upsert_documents(iter([documment_chunk]))
self._graph_store_apdater.upsert_chunks(iter(paragraph_chunks)) self._graph_store_adapter.upsert_chunks(iter(paragraph_chunks))
# upsert the document structure # upsert the document structure
for chunk_index, chunk in enumerate(paragraph_chunks): for chunk_index, chunk in enumerate(paragraph_chunks):
# document -> include -> chunk # document -> include -> chunk
if chunk.parent_is_document: if chunk.parent_is_document:
self._graph_store_apdater.upsert_doc_include_chunk(chunk=chunk) self._graph_store_adapter.upsert_doc_include_chunk(chunk=chunk)
else: # chunk -> include -> chunk else: # chunk -> include -> chunk
self._graph_store_apdater.upsert_chunk_include_chunk(chunk=chunk) self._graph_store_adapter.upsert_chunk_include_chunk(chunk=chunk)
# chunk -> next -> chunk # chunk -> next -> chunk
if chunk_index >= 1: if chunk_index >= 1:
self._graph_store_apdater.upsert_chunk_next_chunk( self._graph_store_adapter.upsert_chunk_next_chunk(
chunk=paragraph_chunks[chunk_index - 1], next_chunk=chunk chunk=paragraph_chunks[chunk_index - 1], next_chunk=chunk
) )
@@ -441,12 +433,12 @@ class CommunitySummaryKnowledgeGraph(BuiltinKnowledgeGraph):
graph.append_edge(edge=edge) graph.append_edge(edge=edge)
# Upsert the graph # Upsert the graph
self._graph_store_apdater.upsert_graph(graph) self._graph_store_adapter.upsert_graph(graph)
# chunk -> include -> entity # chunk -> include -> entity
if document_graph_enabled: if document_graph_enabled:
for vertex in graph.vertices(): for vertex in graph.vertices():
self._graph_store_apdater.upsert_chunk_include_entity( self._graph_store_adapter.upsert_chunk_include_entity(
chunk=chunks[idx], entity=vertex chunk=chunks[idx], entity=vertex
) )
@@ -520,90 +512,22 @@ class CommunitySummaryKnowledgeGraph(BuiltinKnowledgeGraph):
] ]
context = "\n".join(summaries) if summaries else "" context = "\n".join(summaries) if summaries else ""
enable_similarity_search = self._graph_store.enable_similarity_search subgraph, (
subgraph_for_doc,
subgraph = None text2gql_query,
subgraph_for_doc = None ) = await self._graph_retriever.retrieve(text)
# Local search: extract keywords and explore subgraph
triplet_graph_enabled = self._triplet_graph_enabled
document_graph_enabled = self._document_graph_enabled
# Using subs to transfer keywords or embeddings
# Using subs to transfer keywords
keywords: List[str] = await self._keyword_extractor.extract(text)
# If enable similarity search, using subs to transfer embeddings
subs: Union[List[str], List[List[float]]]
if enable_similarity_search:
# Embedding the question
vector = await self._text_embedder.embed(text)
# Embedding the keywords
vectors = await self._text_embedder.batch_embed(
keywords, batch_size=self._triplet_embedding_batch_size
)
# Using the embeddings of keywords and question
vectors.append(vector)
subs = vectors
else:
subs = keywords
# If enable triplet graph, using subs to search enetities
# subs -> enetities
if triplet_graph_enabled:
subgraph = self._graph_store_apdater.explore_trigraph(
subs=subs,
limit=topk,
topk=self._similarity_search_topk,
score_threshold=self._similarity_search_score_threshold,
)
# If enabled document graph
if document_graph_enabled:
# If not enable triplet graph or subgraph is None
# Using subs to search chunks
# subs -> chunks -> doc
if subgraph is None or subgraph.vertex_count == 0:
subgraph_for_doc = (
self._graph_store_apdater.explore_docgraph_without_entities(
subs=subs,
topk=self._similarity_search_topk,
score_threshold=self._similarity_search_score_threshold,
limit=self._knowledge_graph_chunk_search_top_size,
)
)
else:
# If there are searched entities
# Append the vids of entities
# VID is the KEYWORD which stores in entity
keywords_for_document_graph = keywords
for vertex in subgraph.vertices():
keywords_for_document_graph.append(vertex.name)
# Using the vids to search chunks and doc
# entities -> chunks -> doc
subgraph_for_doc = (
self._graph_store_apdater.explore_docgraph_with_entities(
subs=keywords_for_document_graph,
topk=self._similarity_search_topk,
score_threshold=self._similarity_search_score_threshold,
limit=self._knowledge_graph_chunk_search_top_size,
)
)
knowledge_graph_str = subgraph.format() if subgraph else "" knowledge_graph_str = subgraph.format() if subgraph else ""
knowledge_graph_for_doc_str = ( knowledge_graph_for_doc_str = (
subgraph_for_doc.format() if subgraph_for_doc else "" subgraph_for_doc.format() if subgraph_for_doc else ""
) )
logger.info(f"Search subgraph from the following keywords:\n{len(keywords)}")
if not (summaries or knowledge_graph_str or knowledge_graph_for_doc_str): if not (summaries or knowledge_graph_str or knowledge_graph_for_doc_str):
return [] return []
# merge search results into context # merge search results into context
content = HYBRID_SEARCH_PT.format( content = HYBRID_SEARCH_PT.format(
context=context, context=context,
query=text2gql_query,
knowledge_graph=knowledge_graph_str, knowledge_graph=knowledge_graph_str,
knowledge_graph_for_doc=knowledge_graph_for_doc_str, knowledge_graph_for_doc=knowledge_graph_for_doc_str,
) )
@@ -644,11 +568,14 @@ class CommunitySummaryKnowledgeGraph(BuiltinKnowledgeGraph):
HYBRID_SEARCH_PT = """ HYBRID_SEARCH_PT = """
===== =====
The following information from [Context], [Knowledge Graph], and [Original Text From RAG] can help you answer user questions better. The following information from [Context], [Graph Query Statement], [Knowledge Graph], and [Original Text From RAG] can help you answer user questions better.
[Context]: [Context]:
{context} {context}
[Graph Query Statement]:
{query}
[Knowledge Graph]: [Knowledge Graph]:
{knowledge_graph} {knowledge_graph}
@@ -715,9 +642,15 @@ answering the user's questions accurately and appropriately, and ensuring that n
- Extract supporting evidence and examples - Extract supporting evidence and examples
- Resolve conflicts between sources using this as primary reference - Resolve conflicts between sources using this as primary reference
4. Original Graph Query [Graph Query Statement]
- The graph query statement used if text2gql translation is successful
- Graph query will be empty if the translation failed
- Use the markdown code block format to highlight the graph query statement if the statement is not empty
### Output Format ### Output Format
1. Answer Structure 1. Answer Structure
- Lead with synthesized core information - Lead with a markdown code block to highlight the original cypher query statement from [Graph Query Statement] if it's not empty
- Demonstate synthesized core information
- Support with specific references to sources - Support with specific references to sources
- Include relevant entity-relationship pairs - Include relevant entity-relationship pairs
- Conclude with confidence assessment - Conclude with confidence assessment

View File

@@ -0,0 +1 @@
"""Module for Graph Retriever."""

View File

@@ -0,0 +1,17 @@
"""Graph retriever base class."""
import logging
from abc import ABC, abstractmethod
from typing import Any, Tuple
from dbgpt.storage.graph_store.graph import Graph
logger = logging.getLogger(__name__)
class GraphRetrieverBase(ABC):
"""Graph retriever base class."""
@abstractmethod
async def retrieve(self, input: Any) -> Tuple[Graph, Any]:
"""Retrieve from graph database."""

View File

@@ -0,0 +1,58 @@
"""Dcoument Based Graph Retriever."""
import logging
from typing import List, Tuple, Union
from dbgpt.storage.graph_store.graph import Graph
from dbgpt.storage.knowledge_graph.graph_retriever.base import GraphRetrieverBase
logger = logging.getLogger(__name__)
class DocumentGraphRetriever(GraphRetrieverBase):
"""Document Graph retriever class."""
def __init__(
self,
graph_store_adapter,
document_topk,
similarity_search_topk,
similarity_search_score_threshold,
):
"""Initialize Document Graph Retriever."""
self._graph_store_adapter = graph_store_adapter
self._document_topk = document_topk
self._similarity_search_topk = similarity_search_topk
self._similarity_search_score_threshold = similarity_search_score_threshold
async def retrieve(
self, input: Union[Graph, List[str], List[List[float]]]
) -> Tuple[Graph, None]:
"""Retrieve from document graph."""
# If retrieve subgraph from triplet graph successfully
if isinstance(input, Graph):
# Get entities' vids from triplet subgraph
keywords_for_document_graph = []
for vertex in input.vertices():
keywords_for_document_graph.append(vertex.name)
# Using the vids to search chunks and doc
# entities -> chunks -> doc
subgraph_for_doc = self._graph_store_adapter.explore_docgraph_with_entities(
subs=keywords_for_document_graph,
topk=self._similarity_search_topk,
score_threshold=self._similarity_search_score_threshold,
limit=self._document_topk,
)
else:
# Using subs to search chunks
# subs -> chunks -> doc
subgraph_for_doc = (
self._graph_store_adapter.explore_docgraph_without_entities(
subs=input,
topk=self._similarity_search_topk,
score_threshold=self._similarity_search_score_threshold,
limit=self._document_topk,
)
)
return subgraph_for_doc

View File

@@ -0,0 +1,177 @@
"""Graph Retriever."""
import logging
import os
from typing import List, Tuple, Union
from dbgpt.rag.transformer.keyword_extractor import KeywordExtractor
from dbgpt.rag.transformer.text_embedder import TextEmbedder
from dbgpt.storage.graph_store.graph import Graph, MemoryGraph
from dbgpt.storage.knowledge_graph.graph_retriever.base import GraphRetrieverBase
from dbgpt.storage.knowledge_graph.graph_retriever.document_graph_retriever import (
DocumentGraphRetriever,
)
from dbgpt.storage.knowledge_graph.graph_retriever.keyword_based_graph_retriever import ( # noqa: E501
KeywordBasedGraphRetriever,
)
from dbgpt.storage.knowledge_graph.graph_retriever.text_based_graph_retriever import (
TextBasedGraphRetriever,
)
from dbgpt.storage.knowledge_graph.graph_retriever.vector_based_graph_retriever import (
VectorBasedGraphRetriever,
)
logger = logging.getLogger(__name__)
class GraphRetriever(GraphRetrieverBase):
"""Graph Retriever class."""
def __init__(
self,
config,
graph_store_adapter,
):
"""Initialize Graph Retriever."""
self._triplet_graph_enabled = (
os.environ["TRIPLET_GRAPH_ENABLED"].lower() == "true"
if "TRIPLET_GRAPH_ENABLED" in os.environ
else config.triplet_graph_enabled
)
self._document_graph_enabled = (
os.environ["DOCUMENT_GRAPH_ENABLED"].lower() == "true"
if "DOCUMENT_GRAPH_ENABLED" in os.environ
else config.document_graph_enabled
)
triplet_topk = int(
os.getenv("KNOWLEDGE_GRAPH_EXTRACT_SEARCH_TOP_SIZE", config.extract_topk)
)
document_topk = int(
os.getenv(
"KNOWLEDGE_GRAPH_CHUNK_SEARCH_TOP_SIZE",
config.knowledge_graph_chunk_search_top_size,
)
)
llm_client = config.llm_client
model_name = config.model_name
self._enable_similarity_search = (
graph_store_adapter.graph_store.enable_similarity_search
)
self._embedding_batch_size = int(
os.getenv(
"KNOWLEDGE_GRAPH_EMBEDDING_BATCH_SIZE",
config.knowledge_graph_embedding_batch_size,
)
)
similarity_search_topk = int(
os.getenv(
"KNOWLEDGE_GRAPH_SIMILARITY_SEARCH_TOP_SIZE",
config.similarity_search_topk,
)
)
similarity_search_score_threshold = float(
os.getenv(
"KNOWLEDGE_GRAPH_EXTRACT_SEARCH_RECALL_SCORE",
config.extract_score_threshold,
)
)
self._enable_text_search = (
os.environ["TEXT_SEARCH_ENABLED"].lower() == "true"
if "TEXT_SEARCH_ENABLED" in os.environ
else config.enable_text_search
)
self._keyword_extractor = KeywordExtractor(llm_client, model_name)
self._text_embedder = TextEmbedder(config.embedding_fn)
self._keyword_based_graph_retriever = KeywordBasedGraphRetriever(
graph_store_adapter, triplet_topk
)
self._vector_based_graph_retriever = VectorBasedGraphRetriever(
graph_store_adapter,
triplet_topk,
similarity_search_topk,
similarity_search_score_threshold,
)
self._text_based_graph_retriever = TextBasedGraphRetriever(
graph_store_adapter, triplet_topk, llm_client, model_name
)
self._document_graph_retriever = DocumentGraphRetriever(
graph_store_adapter,
document_topk,
similarity_search_topk,
similarity_search_score_threshold,
)
async def retrieve(self, text: str) -> Tuple[Graph, Tuple[Graph, str]]:
"""Retrieve subgraph from triplet graph and document graph."""
subgraph = MemoryGraph()
subgraph_for_doc = MemoryGraph()
text2gql_query = ""
# Retrieve from triplet graph and document graph
if self._enable_text_search:
# Retrieve from knowledge graph with text.
subgraph, text2gql_query = await self._text_based_graph_retriever.retrieve(
text
)
# Extract keywords from original question
keywords: List[str] = await self._keyword_extractor.extract(text)
if subgraph.vertex_count == 0 and subgraph.edge_count == 0:
# if not enable text search or text search failed to retrieve subgraph
# Using subs to transfer keywords or embeddings
subs: Union[List[str], List[List[float]]]
if self._enable_similarity_search:
# Embedding the question
vector = await self._text_embedder.embed(text)
# Embedding the keywords
vectors = await self._text_embedder.batch_embed(
keywords, batch_size=self._embedding_batch_size
)
# Using the embeddings of keywords and question
vectors.append(vector)
# Using vectors as subs
subs = vectors
logger.info(
"Search subgraph with the following keywords and question's "
f"embedding vector:\n[KEYWORDS]:{keywords}\n[QUESTION]:{text}"
)
else:
# Using keywords as subs
subs = keywords
logger.info(
"Search subgraph with the following keywords:\n"
f"[KEYWORDS]:{keywords}"
)
# If enable triplet graph
if self._triplet_graph_enabled:
# Retrieve from triplet graph
if self._enable_similarity_search:
# Retrieve from triplet graph with vectors
subgraph = await self._vector_based_graph_retriever.retrieve(subs)
else:
# Retrieve from triplet graph with keywords
subgraph = await self._keyword_based_graph_retriever.retrieve(subs)
# If enable document graph
if self._document_graph_enabled:
# Retrieve from document graph
# If not enable triplet graph or failed to retrieve subgraph
if subgraph.vertex_count == 0 and subgraph.edge_count == 0:
# Using subs to retrieve from document graph
subgraph_for_doc = await self._document_graph_retriever.retrieve(
subs
)
else:
# If retrieve subgraph from triplet graph successfully
# Using entities in subgraph to search chunks and doc
subgraph_for_doc = await self._document_graph_retriever.retrieve(
subgraph
)
return subgraph, (subgraph_for_doc, text2gql_query)

View File

@@ -0,0 +1,27 @@
"""Keyword Based Graph Retriever."""
import logging
from typing import List, Tuple
from dbgpt.storage.graph_store.graph import Graph
from dbgpt.storage.knowledge_graph.graph_retriever.base import GraphRetrieverBase
logger = logging.getLogger(__name__)
class KeywordBasedGraphRetriever(GraphRetrieverBase):
"""Keyword Based Graph Retriever class."""
def __init__(self, graph_store_adapter, triplet_topk):
"""Initialize Keyword Based Graph Retriever."""
self._graph_store_adapter = graph_store_adapter
self._triplet_topk = triplet_topk
async def retrieve(self, keywords: List[str]) -> Tuple[Graph, str]:
"""Retrieve from triplets graph with keywords."""
subgraph = self._graph_store_adapter.explore_trigraph(
subs=keywords,
limit=self._triplet_topk,
)
return subgraph

View File

@@ -0,0 +1,48 @@
"""Text Based Graph Retriever."""
import json
import logging
from typing import Dict, List, Tuple, Union
from dbgpt.rag.transformer.simple_intent_translator import SimpleIntentTranslator
from dbgpt.rag.transformer.text2gql import Text2GQL
from dbgpt.storage.graph_store.graph import Graph, MemoryGraph
from dbgpt.storage.knowledge_graph.graph_retriever.base import GraphRetrieverBase
logger = logging.getLogger(__name__)
class TextBasedGraphRetriever(GraphRetrieverBase):
"""Text Based Graph Retriever class."""
def __init__(self, graph_store_adapter, triplet_topk, llm_client, model_name):
"""Initialize Text Based Graph Retriever."""
self._graph_store_adapter = graph_store_adapter
self._triplet_topk = triplet_topk
self._intent_interpreter = SimpleIntentTranslator(llm_client, model_name)
self._text2gql = Text2GQL(llm_client, model_name)
async def retrieve(self, text: str) -> Tuple[Graph, str]:
"""Retrieve from triplets graph with text2gql."""
intention: Dict[
str, Union[str, List[str]]
] = await self._intent_interpreter.translate(text)
schema = json.dumps(
json.loads(self._graph_store_adapter.get_schema()), indent=4
)
intention["schema"] = schema
translation: Dict[str, str] = await self._text2gql.translate(
json.dumps(intention)
)
text2gql_query = translation.get("query", "")
if "LIMIT" not in text2gql_query:
text2gql_query += f" LIMIT {self._triplet_topk}"
try:
subgraph = self._graph_store_adapter.query(query=text2gql_query)
logger.info(f"Query executed successfully: {text2gql_query}")
except Exception as e:
text2gql_query = ""
subgraph = MemoryGraph()
logger.error(f"Failed to execute query: {text2gql_query}\n{e}")
return subgraph, text2gql_query

View File

@@ -0,0 +1,37 @@
"""Vector Based Graph Retriever."""
import logging
from typing import List, Tuple
from dbgpt.storage.graph_store.graph import Graph
from dbgpt.storage.knowledge_graph.graph_retriever.base import GraphRetrieverBase
logger = logging.getLogger(__name__)
class VectorBasedGraphRetriever(GraphRetrieverBase):
"""Vector Based Graph Retriever class."""
def __init__(
self,
graph_store_adapter,
triplet_topk,
similarity_search_topk,
similarity_search_score_threshold,
):
"""Initialize Vector Based Graph Retriever."""
self._graph_store_adapter = graph_store_adapter
self._triplet_topk = triplet_topk
self._similarity_search_topk = similarity_search_topk
self._similarity_search_score_threshold = similarity_search_score_threshold
async def retrieve(self, vectors: List[List[float]]) -> Tuple[Graph, None]:
"""Retrieve from triplet graph with vectors."""
subgraph = self._graph_store_adapter.explore_trigraph(
subs=vectors,
topk=self._similarity_search_topk,
limit=self._triplet_topk,
score_threshold=self._similarity_search_score_threshold,
)
return subgraph

View File

@@ -144,7 +144,7 @@ class BuiltinKnowledgeGraph(KnowledgeGraphBase):
self._triplet_extractor = TripletExtractor(self._llm_client, self._model_name) self._triplet_extractor = TripletExtractor(self._llm_client, self._model_name)
self._keyword_extractor = KeywordExtractor(self._llm_client, self._model_name) self._keyword_extractor = KeywordExtractor(self._llm_client, self._model_name)
self._graph_store: GraphStoreBase = self.__init_graph_store(config) self._graph_store: GraphStoreBase = self.__init_graph_store(config)
self._graph_store_apdater: GraphStoreAdapter = self.__init_graph_store_adapter() self._graph_store_adapter: GraphStoreAdapter = self.__init_graph_store_adapter()
def __init_graph_store(self, config: BuiltinKnowledgeGraphConfig) -> GraphStoreBase: def __init_graph_store(self, config: BuiltinKnowledgeGraphConfig) -> GraphStoreBase:
def configure(cfg: GraphStoreConfig): def configure(cfg: GraphStoreConfig):
@@ -167,13 +167,13 @@ class BuiltinKnowledgeGraph(KnowledgeGraphBase):
async def process_chunk(chunk: Chunk): async def process_chunk(chunk: Chunk):
triplets = await self._triplet_extractor.extract(chunk.content) triplets = await self._triplet_extractor.extract(chunk.content)
for triplet in triplets: for triplet in triplets:
self._graph_store_apdater.insert_triplet(*triplet) self._graph_store_adapter.insert_triplet(*triplet)
logger.info(f"load {len(triplets)} triplets from chunk {chunk.chunk_id}") logger.info(f"load {len(triplets)} triplets from chunk {chunk.chunk_id}")
return chunk.chunk_id return chunk.chunk_id
# wait async tasks completed # wait async tasks completed
if not self.vector_name_exists(): if not self.vector_name_exists():
self._graph_store_apdater.create_graph(self.get_config().name) self._graph_store_adapter.create_graph(self.get_config().name)
tasks = [process_chunk(chunk) for chunk in chunks] tasks = [process_chunk(chunk) for chunk in chunks]
loop = asyncio.new_event_loop() loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop) asyncio.set_event_loop(loop)
@@ -190,11 +190,11 @@ class BuiltinKnowledgeGraph(KnowledgeGraphBase):
List[str]: chunk ids. List[str]: chunk ids.
""" """
if not self.vector_name_exists(): if not self.vector_name_exists():
self._graph_store_apdater.create_graph(self.get_config().name) self._graph_store_adapter.create_graph(self.get_config().name)
for chunk in chunks: for chunk in chunks:
triplets = await self._triplet_extractor.extract(chunk.content) triplets = await self._triplet_extractor.extract(chunk.content)
for triplet in triplets: for triplet in triplets:
self._graph_store_apdater.insert_triplet(*triplet) self._graph_store_adapter.insert_triplet(*triplet)
logger.info(f"load {len(triplets)} triplets from chunk {chunk.chunk_id}") logger.info(f"load {len(triplets)} triplets from chunk {chunk.chunk_id}")
return [chunk.chunk_id for chunk in chunks] return [chunk.chunk_id for chunk in chunks]
@@ -221,7 +221,7 @@ class BuiltinKnowledgeGraph(KnowledgeGraphBase):
# extract keywords and explore graph store # extract keywords and explore graph store
keywords = await self._keyword_extractor.extract(text) keywords = await self._keyword_extractor.extract(text)
subgraph = self._graph_store_apdater.explore_trigraph( subgraph = self._graph_store_adapter.explore_trigraph(
keywords, limit=topk keywords, limit=topk
).format() ).format()
@@ -255,12 +255,12 @@ class BuiltinKnowledgeGraph(KnowledgeGraphBase):
def query_graph(self, limit: Optional[int] = None) -> Graph: def query_graph(self, limit: Optional[int] = None) -> Graph:
"""Query graph.""" """Query graph."""
return self._graph_store_apdater.get_full_graph(limit) return self._graph_store_adapter.get_full_graph(limit)
def truncate(self) -> List[str]: def truncate(self) -> List[str]:
"""Truncate knowledge graph.""" """Truncate knowledge graph."""
logger.info(f"Truncate graph {self._config.name}") logger.info(f"Truncate graph {self._config.name}")
self._graph_store_apdater.truncate() self._graph_store_adapter.truncate()
logger.info("Truncate keyword extractor") logger.info("Truncate keyword extractor")
self._keyword_extractor.truncate() self._keyword_extractor.truncate()
@@ -273,7 +273,7 @@ class BuiltinKnowledgeGraph(KnowledgeGraphBase):
def delete_vector_name(self, index_name: str): def delete_vector_name(self, index_name: str):
"""Delete vector name.""" """Delete vector name."""
logger.info(f"Drop graph {index_name}") logger.info(f"Drop graph {index_name}")
self._graph_store_apdater.drop() self._graph_store_adapter.drop()
logger.info("Drop keyword extractor") logger.info("Drop keyword extractor")
self._keyword_extractor.drop() self._keyword_extractor.drop()
@@ -283,9 +283,9 @@ class BuiltinKnowledgeGraph(KnowledgeGraphBase):
def delete_by_ids(self, ids: str) -> List[str]: def delete_by_ids(self, ids: str) -> List[str]:
"""Delete by ids.""" """Delete by ids."""
self._graph_store_apdater.delete_document(chunk_id=ids) self._graph_store_adapter.delete_document(chunk_id=ids)
return [] return []
def vector_name_exists(self) -> bool: def vector_name_exists(self) -> bool:
"""Whether name exists.""" """Whether name exists."""
return self._graph_store_apdater.graph_store.is_exist(self._config.name) return self._graph_store_adapter.graph_store.is_exist(self._config.name)

View File

@@ -473,3 +473,48 @@ Furthermore, as shown in the following figure, compared to RAG, GraphRAG with si
</p> </p>
In conclusion, enabling similarity search in GraphRAG significantly expands the scope and relevance of its responses. In conclusion, enabling similarity search in GraphRAG significantly expands the scope and relevance of its responses.
### Text2GQL Search in GraphRAG:
In the latest version of DB-GPT, we have implemented a new feature:
- **Text2GQL search** for GraphRAG retrieval
#### How to use?
Set the variables below in the `.env` file to enable text2gql search in DB-GPT.
```
TEXT2GQL_SEARCH_ENABLED=True # enable the text2gql search for entities and relations.
```
#### Why to use?
Keywords or vectors based retrieval will generate large multihop subgraph for LLM to summarize information, but this method is costive when questions asked by users can be simply expressed by a single graph query. Text2GQL search can effectively reduce the cost of graph search and increase the accuracy of the retrieved subgraph under above situation.
In the future, we hope to further improve the ability of Text2GQL translation to compete with keywords or vectors based retrieval under complicated questions with both prompt based method and finetune based method.
#### Comparison of Text2GQL Search Results
Given identical documents and questions in the same environment, the results of the keyword mode are as follows:
<p align="left">
<img src={'/img/chat_knowledge/graph_rag/comparison_result_for_keywords_search.png'} width="1000px"/>
</p>
The results of the text2gql search mode are as follows:
<p align="left">
<img src={'/img/chat_knowledge/graph_rag/comparison_result_for_text2gql_search.png'} width="1000px"/>
</p>
Compared to the keyword search method, the text2gql search method can generate an accurate graph query laguage to query the entity of DB-GPT in knowledge graph, which is
```cypher
MATCH (n) WHERE n.id = 'DB-GPT' RETURN n LIMIT 10
```
This implies that in scenarios where questions can be expressed by a single graph query, the text2gql search approach can retrieve more accurate information with lower cost.
In conclusion, enabling text2gql search in GraphRAG significantly increase the accuracy and lower the cost when questions are concise and clear.

Binary file not shown.

After

Width:  |  Height:  |  Size: 186 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 333 KiB