mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-03 10:05:13 +00:00
refactor: RAG Refactor (#985)
Co-authored-by: Aralhi <xiaoping0501@gmail.com> Co-authored-by: csunny <cfqsunny@163.com>
This commit is contained in:
68
examples/rag/db_struct_rag_example.py
Normal file
68
examples/rag/db_struct_rag_example.py
Normal file
@@ -0,0 +1,68 @@
|
||||
from dbgpt.datasource.rdbms.conn_sqlite import SQLiteTempConnect
|
||||
from dbgpt.rag.embedding.embedding_factory import DefaultEmbeddingFactory
|
||||
from dbgpt.serve.rag.assembler.db_struct import DBStructAssembler
|
||||
from dbgpt.storage.vector_store.chroma_store import ChromaVectorConfig
|
||||
from dbgpt.storage.vector_store.connector import VectorStoreConnector
|
||||
|
||||
"""DB struct rag example.
|
||||
pre-requirements:
|
||||
set your embedding model path in your example code.
|
||||
```
|
||||
embedding_model_path = "{your_embedding_model_path}"
|
||||
```
|
||||
|
||||
Examples:
|
||||
..code-block:: shell
|
||||
python examples/rag/db_struct_rag_example.py
|
||||
"""
|
||||
|
||||
|
||||
def _create_temporary_connection():
|
||||
"""Create a temporary database connection for testing."""
|
||||
connect = SQLiteTempConnect.create_temporary_db()
|
||||
connect.create_temp_tables(
|
||||
{
|
||||
"user": {
|
||||
"columns": {
|
||||
"id": "INTEGER PRIMARY KEY",
|
||||
"name": "TEXT",
|
||||
"age": "INTEGER",
|
||||
},
|
||||
"data": [
|
||||
(1, "Tom", 10),
|
||||
(2, "Jerry", 16),
|
||||
(3, "Jack", 18),
|
||||
(4, "Alice", 20),
|
||||
(5, "Bob", 22),
|
||||
],
|
||||
}
|
||||
}
|
||||
)
|
||||
return connect
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
connection = _create_temporary_connection()
|
||||
|
||||
embedding_model_path = "{your_embedding_model_path}"
|
||||
vector_persist_path = "{your_persist_path}"
|
||||
embedding_fn = DefaultEmbeddingFactory(
|
||||
default_model_name=embedding_model_path
|
||||
).create()
|
||||
vector_connector = VectorStoreConnector.from_default(
|
||||
"Chroma",
|
||||
vector_store_config=ChromaVectorConfig(
|
||||
name="vector_name",
|
||||
persist_path=vector_persist_path,
|
||||
),
|
||||
embedding_fn=embedding_fn,
|
||||
)
|
||||
assembler = DBStructAssembler.load_from_connection(
|
||||
connection=connection,
|
||||
vector_store_connector=vector_connector,
|
||||
)
|
||||
assembler.persist()
|
||||
# get db struct retriever
|
||||
retriever = assembler.as_retriever(top_k=1)
|
||||
chunks = retriever.retrieve("show columns from user")
|
||||
print(f"db struct rag example results:{[chunk.content for chunk in chunks]}")
|
53
examples/rag/embedding_rag_example.py
Normal file
53
examples/rag/embedding_rag_example.py
Normal file
@@ -0,0 +1,53 @@
|
||||
import asyncio
|
||||
|
||||
from dbgpt.rag.chunk_manager import ChunkParameters
|
||||
from dbgpt.rag.embedding.embedding_factory import DefaultEmbeddingFactory
|
||||
from dbgpt.rag.knowledge.factory import KnowledgeFactory
|
||||
from dbgpt.serve.rag.assembler.embedding import EmbeddingAssembler
|
||||
from dbgpt.storage.vector_store.chroma_store import ChromaVectorConfig
|
||||
from dbgpt.storage.vector_store.connector import VectorStoreConnector
|
||||
|
||||
"""Embedding rag example.
|
||||
pre-requirements:
|
||||
set your embedding model path in your example code.
|
||||
```
|
||||
embedding_model_path = "{your_embedding_model_path}"
|
||||
```
|
||||
|
||||
Examples:
|
||||
..code-block:: shell
|
||||
python examples/rag/embedding_rag_example.py
|
||||
"""
|
||||
|
||||
|
||||
async def main():
|
||||
file_path = "./docs/docs/awel.md"
|
||||
vector_persist_path = "{your_persist_path}"
|
||||
embedding_model_path = "{your_embedding_model_path}"
|
||||
knowledge = KnowledgeFactory.from_file_path(file_path)
|
||||
vector_connector = VectorStoreConnector.from_default(
|
||||
"Chroma",
|
||||
vector_store_config=ChromaVectorConfig(
|
||||
name="vector_name",
|
||||
persist_path=vector_persist_path,
|
||||
),
|
||||
embedding_fn=DefaultEmbeddingFactory(
|
||||
default_model_name=embedding_model_path
|
||||
).create(),
|
||||
)
|
||||
chunk_parameters = ChunkParameters(chunk_strategy="CHUNK_BY_SIZE")
|
||||
# get embedding assembler
|
||||
assembler = EmbeddingAssembler.load_from_knowledge(
|
||||
knowledge=knowledge,
|
||||
chunk_parameters=chunk_parameters,
|
||||
vector_store_connector=vector_connector,
|
||||
)
|
||||
assembler.persist()
|
||||
# get embeddings retriever
|
||||
retriever = assembler.as_retriever(3)
|
||||
chunks = await retriever.aretrieve_with_scores("RAG", 0.3)
|
||||
print(f"embedding rag example results:{chunks}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
42
examples/rag/rewrite_rag_example.py
Normal file
42
examples/rag/rewrite_rag_example.py
Normal file
@@ -0,0 +1,42 @@
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dbgpt.model import OpenAILLMClient
|
||||
from dbgpt.rag.retriever.rewrite import QueryRewrite
|
||||
|
||||
"""Query rewrite example.
|
||||
pre-requirements:
|
||||
1. install openai python sdk
|
||||
```
|
||||
pip install openai
|
||||
```
|
||||
2. set openai key and base
|
||||
```
|
||||
export OPENAI_API_KEY={your_openai_key}
|
||||
export OPENAI_API_BASE={your_openai_base}
|
||||
```
|
||||
or
|
||||
```
|
||||
import os
|
||||
os.environ["OPENAI_API_KEY"] = {your_openai_key}
|
||||
os.environ["OPENAI_API_BASE"] = {your_openai_base}
|
||||
```
|
||||
Examples:
|
||||
..code-block:: shell
|
||||
python examples/rag/rewrite_rag_example.py
|
||||
"""
|
||||
|
||||
|
||||
async def main():
|
||||
query = "compare steve curry and lebron james"
|
||||
llm_client = OpenAILLMClient()
|
||||
reinforce = QueryRewrite(
|
||||
llm_client=llm_client,
|
||||
model_name="gpt-3.5-turbo",
|
||||
)
|
||||
return await reinforce.rewrite(origin_query=query, nums=1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
output = asyncio.run(main())
|
||||
print(f"output: \n\n{output}")
|
47
examples/rag/summary_extractor_example.py
Normal file
47
examples/rag/summary_extractor_example.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import asyncio
|
||||
|
||||
from dbgpt.model import OpenAILLMClient
|
||||
from dbgpt.rag.chunk_manager import ChunkParameters
|
||||
from dbgpt.rag.knowledge.factory import KnowledgeFactory
|
||||
from dbgpt.serve.rag.assembler.summary import SummaryAssembler
|
||||
|
||||
"""Summary extractor example.
|
||||
pre-requirements:
|
||||
1. install openai python sdk
|
||||
```
|
||||
pip install openai
|
||||
```
|
||||
2. set openai key and base
|
||||
```
|
||||
export OPENAI_API_KEY={your_openai_key}
|
||||
export OPENAI_API_BASE={your_openai_base}
|
||||
```
|
||||
or
|
||||
```
|
||||
import os
|
||||
os.environ["OPENAI_API_KEY"] = {your_openai_key}
|
||||
os.environ["OPENAI_API_BASE"] = {your_openai_base}
|
||||
```
|
||||
Examples:
|
||||
..code-block:: shell
|
||||
python examples/rag/summary_extractor_example.py
|
||||
"""
|
||||
|
||||
|
||||
async def main():
|
||||
file_path = "./docs/docs/awel.md"
|
||||
llm_client = OpenAILLMClient()
|
||||
knowledge = KnowledgeFactory.from_file_path(file_path)
|
||||
chunk_parameters = ChunkParameters(chunk_strategy="CHUNK_BY_SIZE")
|
||||
assembler = SummaryAssembler.load_from_knowledge(
|
||||
knowledge=knowledge,
|
||||
chunk_parameters=chunk_parameters,
|
||||
llm_client=llm_client,
|
||||
model_name="gpt-3.5-turbo",
|
||||
)
|
||||
return await assembler.generate_summary()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
output = asyncio.run(main())
|
||||
print(f"output: \n\n{output}")
|
Reference in New Issue
Block a user