diff --git a/docs/docs/cookbook/rag/graph_rag_app_develop.md b/docs/docs/cookbook/rag/graph_rag_app_develop.md index d0a690beb..722fdd3f8 100644 --- a/docs/docs/cookbook/rag/graph_rag_app_develop.md +++ b/docs/docs/cookbook/rag/graph_rag_app_develop.md @@ -10,28 +10,23 @@ You can refer to the python example file `DB-GPT/examples/rag/graph_rag_example. First, you need to install the `dbgpt` library. ```bash -pip install "dbgpt[rag]>=0.5.6" +pip install "dbgpt[rag]>=0.6.0" ```` ### Prepare Graph Database To store the knowledge in graph, we need an graph database, [TuGraph](https://github.com/TuGraph-family/tugraph-db) is the first graph database supported by DB-GPT. -Visit github repository of TuGraph to view [Quick Start](https://tugraph-db.readthedocs.io/zh-cn/latest/3.quick-start/1.preparation.html#id5) document, follow the instructions to pull the TuGraph database docker image (latest / version >= 4.3.0) and launch it. +Visit github repository of TuGraph to view [Quick Start](https://tugraph-db.readthedocs.io/zh-cn/latest/3.quick-start/1.preparation.html#id5) document, follow the instructions to pull the TuGraph database docker image (latest / version >= 4.3.2) and launch it. ``` docker pull tugraph/tugraph-runtime-centos7:latest -docker run -it -d -p 7001:7001 -p 7070:7070 -p 7687:7687 -p 8000:8000 -p 8888:8888 -p 8889:8889 -p 9090:9090 \ - -v /root/tugraph/data:/var/lib/lgraph/data -v /root/tugraph/log:/var/log/lgraph_log \ - --name tugraph_demo tugraph/tugraph-runtime-centos7:latest /bin/bash -docker exec -d tugraph_demo bash /setup.sh +docker run -d -p 7070:7070 -p 7687:7687 -p 9090:9090 --name tugraph_demo reg.docker.alibaba-inc.com/fma/tugraph-runtime-centos7:latest lgraph_server -d run --enable_plugin true ``` -The default port for the bolt protocol is `7687`, and DB-GPT accesses TuGraph through this port via `neo4j` python client. +The default port for the bolt protocol is `7687`. + -``` -pip install "neo4j>=5.20.0" -``` ### Prepare LLM @@ -117,10 +112,10 @@ TUGRAPH_HOST=127.0.0.1 TUGRAPH_PORT=7687 TUGRAPH_USERNAME=admin TUGRAPH_PASSWORD=73@TuGraph +GRAPH_COMMUNITY_SUMMARY_ENABLED=True ``` - ### Load into Knowledge Graph When using a graph database as the underlying knowledge storage platform, it is necessary to build a knowledge graph to facilitate the archiving and retrieval of documents. DB-GPT leverages the capabilities of large language models to implement an integrated knowledge graph, while still maintaining the flexibility to freely connect to other knowledge graph systems and graph database systems. @@ -129,19 +124,23 @@ To maintain compatibility with existing conventional RAG frameworks, we continue ```python from dbgpt.model.proxy.llms.chatgpt import OpenAILLMClient -from dbgpt.storage.knowledge_graph.knowledge_graph import ( - BuiltinKnowledgeGraph, - BuiltinKnowledgeGraphConfig, +from dbgpt.storage.knowledge_graph.community_summary import ( + CommunitySummaryKnowledgeGraph, + CommunitySummaryKnowledgeGraphConfig, ) -def _create_kg_connector(): - """Create knowledge graph connector.""" - return BuiltinKnowledgeGraph( - config=BuiltinKnowledgeGraphConfig( - name="graph_rag_test", - embedding_fn=None, - llm_client=OpenAILLMClient(), - model_name="gpt-4", +llm_client = OpenAILLMClient() +model_name = "gpt-4o-mini" + +def __create_community_kg_connector(): + """Create community knowledge graph connector.""" + return CommunitySummaryKnowledgeGraph( + config=CommunitySummaryKnowledgeGraphConfig( + name="community_graph_rag_test", + embedding_fn=DefaultEmbeddingFactory.openai(), + llm_client=llm_client, + model_name=model_name, + graph_store_type="TuGraphGraph", ), ) ``` @@ -155,31 +154,67 @@ Then you can retrieve the knowledge from the knowledge graph, which is the same ```python import os +import pytest + from dbgpt.configs.model_config import ROOT_PATH +from dbgpt.core import Chunk, HumanPromptTemplate, ModelMessage, ModelRequest +from dbgpt.model.proxy.llms.chatgpt import OpenAILLMClient from dbgpt.rag import ChunkParameters from dbgpt.rag.assembler import EmbeddingAssembler +from dbgpt.rag.embedding import DefaultEmbeddingFactory from dbgpt.rag.knowledge import KnowledgeFactory +from dbgpt.rag.retriever import RetrieverStrategy -async def main(): - file_path = os.path.join(ROOT_PATH, "examples/test_files/tranformers_story.md") +async def test_community_graph_rag(): + await __run_graph_rag( + knowledge_file="examples/test_files/graphrag-mini.md", + chunk_strategy="CHUNK_BY_MARKDOWN_HEADER", + knowledge_graph=__create_community_kg_connector(), + question="What's the relationship between TuGraph and DB-GPT ?", + ) + +async def __run_graph_rag(knowledge_file, chunk_strategy, knowledge_graph, question): + file_path = os.path.join(ROOT_PATH, knowledge_file).format() knowledge = KnowledgeFactory.from_file_path(file_path) - graph_store = _create_kg_connector() - chunk_parameters = ChunkParameters(chunk_strategy="CHUNK_BY_SIZE") - # get embedding assembler - assembler = EmbeddingAssembler.load_from_knowledge( - knowledge=knowledge, - chunk_parameters=chunk_parameters, - index_store=graph_store, + try: + chunk_parameters = ChunkParameters(chunk_strategy=chunk_strategy) + + # get embedding assembler + assembler = await EmbeddingAssembler.aload_from_knowledge( + knowledge=knowledge, + chunk_parameters=chunk_parameters, + index_store=knowledge_graph, + retrieve_strategy=RetrieverStrategy.GRAPH, + ) + await assembler.apersist() + + # get embeddings retriever + retriever = assembler.as_retriever(1) + chunks = await retriever.aretrieve_with_scores(question, score_threshold=0.3) + + # chat + print(f"{await ask_chunk(chunks[0], question)}") + + finally: + knowledge_graph.delete_vector_name(knowledge_graph.get_config().name) + +async def ask_chunk(chunk: Chunk, question) -> str: + rag_template = ( + "Based on the following [Context] {context}, " + "answer [Question] {question}." ) - assembler.persist() - # get embeddings retriever - retriever = assembler.as_retriever(3) - chunks = await retriever.aretrieve_with_scores( - "What actions has Megatron taken?", - score_threshold=0.3 - ) - print(f"embedding rag example results:{chunks}") - graph_store.delete_vector_name("graph_rag_test") + template = HumanPromptTemplate.from_template(rag_template) + messages = template.format_messages(context=chunk.content, question=question) + model_messages = ModelMessage.from_base_messages(messages) + request = ModelRequest(model=model_name, messages=model_messages) + response = await llm_client.generate(request=request) + + if not response.success: + code = str(response.error_code) + reason = response.text + raise Exception(f"request llm failed ({code}) {reason}") + + return response.text ``` @@ -187,26 +222,137 @@ async def main(): ### Chat Knowledge via GraphRAG +> Note: The current test data is in Chinese. + Here we demonstrate how to achieve chat knowledge through Graph RAG on web page. -First, create a knowledge base using the `Knowledge Graph` type. Upload the knowledge documents and wait for the slicing to complete. +First, create a knowledge base using the `Knowledge Graph` type.
-
+
-
+
-
+
-
+