refactor: RAG Refactor (#985)

Co-authored-by: Aralhi <xiaoping0501@gmail.com>
Co-authored-by: csunny <cfqsunny@163.com>
This commit is contained in:
Aries-ckt
2024-01-03 09:45:26 +08:00
committed by GitHub
parent 90775aad50
commit 9ad70a2961
206 changed files with 5766 additions and 2419 deletions

View File

@@ -0,0 +1,68 @@
from dbgpt.datasource.rdbms.conn_sqlite import SQLiteTempConnect
from dbgpt.rag.embedding.embedding_factory import DefaultEmbeddingFactory
from dbgpt.serve.rag.assembler.db_struct import DBStructAssembler
from dbgpt.storage.vector_store.chroma_store import ChromaVectorConfig
from dbgpt.storage.vector_store.connector import VectorStoreConnector
"""DB struct rag example.
pre-requirements:
set your embedding model path in your example code.
```
embedding_model_path = "{your_embedding_model_path}"
```
Examples:
..code-block:: shell
python examples/rag/db_struct_rag_example.py
"""
def _create_temporary_connection():
"""Create a temporary database connection for testing."""
connect = SQLiteTempConnect.create_temporary_db()
connect.create_temp_tables(
{
"user": {
"columns": {
"id": "INTEGER PRIMARY KEY",
"name": "TEXT",
"age": "INTEGER",
},
"data": [
(1, "Tom", 10),
(2, "Jerry", 16),
(3, "Jack", 18),
(4, "Alice", 20),
(5, "Bob", 22),
],
}
}
)
return connect
if __name__ == "__main__":
connection = _create_temporary_connection()
embedding_model_path = "{your_embedding_model_path}"
vector_persist_path = "{your_persist_path}"
embedding_fn = DefaultEmbeddingFactory(
default_model_name=embedding_model_path
).create()
vector_connector = VectorStoreConnector.from_default(
"Chroma",
vector_store_config=ChromaVectorConfig(
name="vector_name",
persist_path=vector_persist_path,
),
embedding_fn=embedding_fn,
)
assembler = DBStructAssembler.load_from_connection(
connection=connection,
vector_store_connector=vector_connector,
)
assembler.persist()
# get db struct retriever
retriever = assembler.as_retriever(top_k=1)
chunks = retriever.retrieve("show columns from user")
print(f"db struct rag example results:{[chunk.content for chunk in chunks]}")

View File

@@ -0,0 +1,53 @@
import asyncio
from dbgpt.rag.chunk_manager import ChunkParameters
from dbgpt.rag.embedding.embedding_factory import DefaultEmbeddingFactory
from dbgpt.rag.knowledge.factory import KnowledgeFactory
from dbgpt.serve.rag.assembler.embedding import EmbeddingAssembler
from dbgpt.storage.vector_store.chroma_store import ChromaVectorConfig
from dbgpt.storage.vector_store.connector import VectorStoreConnector
"""Embedding rag example.
pre-requirements:
set your embedding model path in your example code.
```
embedding_model_path = "{your_embedding_model_path}"
```
Examples:
..code-block:: shell
python examples/rag/embedding_rag_example.py
"""
async def main():
file_path = "./docs/docs/awel.md"
vector_persist_path = "{your_persist_path}"
embedding_model_path = "{your_embedding_model_path}"
knowledge = KnowledgeFactory.from_file_path(file_path)
vector_connector = VectorStoreConnector.from_default(
"Chroma",
vector_store_config=ChromaVectorConfig(
name="vector_name",
persist_path=vector_persist_path,
),
embedding_fn=DefaultEmbeddingFactory(
default_model_name=embedding_model_path
).create(),
)
chunk_parameters = ChunkParameters(chunk_strategy="CHUNK_BY_SIZE")
# get embedding assembler
assembler = EmbeddingAssembler.load_from_knowledge(
knowledge=knowledge,
chunk_parameters=chunk_parameters,
vector_store_connector=vector_connector,
)
assembler.persist()
# get embeddings retriever
retriever = assembler.as_retriever(3)
chunks = await retriever.aretrieve_with_scores("RAG", 0.3)
print(f"embedding rag example results:{chunks}")
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,42 @@
import asyncio
import os
from dbgpt.model import OpenAILLMClient
from dbgpt.rag.retriever.rewrite import QueryRewrite
"""Query rewrite example.
pre-requirements:
1. install openai python sdk
```
pip install openai
```
2. set openai key and base
```
export OPENAI_API_KEY={your_openai_key}
export OPENAI_API_BASE={your_openai_base}
```
or
```
import os
os.environ["OPENAI_API_KEY"] = {your_openai_key}
os.environ["OPENAI_API_BASE"] = {your_openai_base}
```
Examples:
..code-block:: shell
python examples/rag/rewrite_rag_example.py
"""
async def main():
query = "compare steve curry and lebron james"
llm_client = OpenAILLMClient()
reinforce = QueryRewrite(
llm_client=llm_client,
model_name="gpt-3.5-turbo",
)
return await reinforce.rewrite(origin_query=query, nums=1)
if __name__ == "__main__":
output = asyncio.run(main())
print(f"output: \n\n{output}")

View File

@@ -0,0 +1,47 @@
import asyncio
from dbgpt.model import OpenAILLMClient
from dbgpt.rag.chunk_manager import ChunkParameters
from dbgpt.rag.knowledge.factory import KnowledgeFactory
from dbgpt.serve.rag.assembler.summary import SummaryAssembler
"""Summary extractor example.
pre-requirements:
1. install openai python sdk
```
pip install openai
```
2. set openai key and base
```
export OPENAI_API_KEY={your_openai_key}
export OPENAI_API_BASE={your_openai_base}
```
or
```
import os
os.environ["OPENAI_API_KEY"] = {your_openai_key}
os.environ["OPENAI_API_BASE"] = {your_openai_base}
```
Examples:
..code-block:: shell
python examples/rag/summary_extractor_example.py
"""
async def main():
file_path = "./docs/docs/awel.md"
llm_client = OpenAILLMClient()
knowledge = KnowledgeFactory.from_file_path(file_path)
chunk_parameters = ChunkParameters(chunk_strategy="CHUNK_BY_SIZE")
assembler = SummaryAssembler.load_from_knowledge(
knowledge=knowledge,
chunk_parameters=chunk_parameters,
llm_client=llm_client,
model_name="gpt-3.5-turbo",
)
return await assembler.generate_summary()
if __name__ == "__main__":
output = asyncio.run(main())
print(f"output: \n\n{output}")