refactor: rag storage refactor (#2434)

This commit is contained in:
Aries-ckt
2025-03-17 14:15:21 +08:00
committed by GitHub
parent 34d86d693c
commit fc3fe6b725
52 changed files with 1134 additions and 797 deletions

View File

@@ -5,7 +5,7 @@ from dbgpt.configs.model_config import ROOT_PATH
from dbgpt_ext.rag import ChunkParameters
from dbgpt_ext.rag.assembler.bm25 import BM25Assembler
from dbgpt_ext.rag.knowledge import KnowledgeFactory
from dbgpt_ext.storage.vector_store.elastic_store import ElasticsearchVectorConfig
from dbgpt_ext.storage.vector_store.elastic_store import ElasticsearchStoreConfig
"""Embedding rag example.
pre-requirements:
@@ -19,8 +19,7 @@ from dbgpt_ext.storage.vector_store.elastic_store import ElasticsearchVectorConf
def _create_es_config():
"""Create vector connector."""
return ElasticsearchVectorConfig(
name="bm25_es_dbgpt",
return ElasticsearchStoreConfig(
uri="localhost",
port="9200",
user="elastic",

View File

@@ -25,14 +25,16 @@ def _create_vector_connector():
"""Create vector connector."""
config = ChromaVectorConfig(
persist_path=PILOT_PATH,
)
return ChromaStore(
config,
name="embedding_rag_test",
embedding_fn=DefaultEmbeddingFactory(
default_model_name=os.path.join(MODEL_PATH, "text2vec-large-chinese"),
).create(),
)
return ChromaStore(config)
async def main():
file_path = os.path.join(ROOT_PATH, "docs/docs/awel/awel.md")

View File

@@ -4,8 +4,7 @@ from dbgpt.configs.model_config import MODEL_PATH, PILOT_PATH
from dbgpt.rag.embedding import DefaultEmbeddingFactory
from dbgpt_ext.datasource.rdbms.conn_sqlite import SQLiteTempConnector
from dbgpt_ext.rag.assembler import DBSchemaAssembler
from dbgpt_ext.storage.vector_store.chroma_store import ChromaVectorConfig
from dbgpt_serve.rag.connector import VectorStoreConnector
from dbgpt_ext.storage.vector_store.chroma_store import ChromaStore, ChromaVectorConfig
"""DB struct rag example.
pre-requirements:
@@ -46,12 +45,12 @@ def _create_temporary_connection():
def _create_vector_connector():
"""Create vector connector."""
return VectorStoreConnector.from_default(
"Chroma",
vector_store_config=ChromaVectorConfig(
name="db_schema_vector_store_name",
persist_path=os.path.join(PILOT_PATH, "data"),
),
config = ChromaVectorConfig(
persist_path=PILOT_PATH,
)
return ChromaStore(
config,
name="embedding_rag_test",
embedding_fn=DefaultEmbeddingFactory(
default_model_name=os.path.join(MODEL_PATH, "text2vec-large-chinese"),
).create(),

View File

@@ -25,14 +25,16 @@ def _create_vector_connector():
"""Create vector connector."""
config = ChromaVectorConfig(
persist_path=PILOT_PATH,
)
return ChromaStore(
config,
name="embedding_rag_test",
embedding_fn=DefaultEmbeddingFactory(
default_model_name=os.path.join(MODEL_PATH, "text2vec-large-chinese"),
).create(),
)
return ChromaStore(config)
async def main():
file_path = os.path.join(ROOT_PATH, "docs/docs/awel/awel.md")

View File

@@ -10,13 +10,12 @@ from dbgpt.rag.retriever import RetrieverStrategy
from dbgpt_ext.rag import ChunkParameters
from dbgpt_ext.rag.assembler import EmbeddingAssembler
from dbgpt_ext.rag.knowledge import KnowledgeFactory
from dbgpt_ext.storage.graph_store.tugraph_store import TuGraphStoreConfig
from dbgpt_ext.storage.knowledge_graph.community_summary import (
CommunitySummaryKnowledgeGraph,
CommunitySummaryKnowledgeGraphConfig,
)
from dbgpt_ext.storage.knowledge_graph.knowledge_graph import (
BuiltinKnowledgeGraph,
BuiltinKnowledgeGraphConfig,
)
"""GraphRAG example.
@@ -61,26 +60,22 @@ async def test_community_graph_rag():
def __create_naive_kg_connector():
"""Create knowledge graph connector."""
return BuiltinKnowledgeGraph(
config=BuiltinKnowledgeGraphConfig(
name="naive_graph_rag_test",
embedding_fn=None,
llm_client=llm_client,
model_name=model_name,
graph_store_type="MemoryGraph",
),
config=TuGraphStoreConfig(),
name="naive_graph_rag_test",
embedding_fn=None,
llm_client=llm_client,
llm_model=model_name,
)
def __create_community_kg_connector():
"""Create community knowledge graph connector."""
return CommunitySummaryKnowledgeGraph(
config=CommunitySummaryKnowledgeGraphConfig(
name="community_graph_rag_test",
embedding_fn=DefaultEmbeddingFactory.openai(),
llm_client=llm_client,
model_name=model_name,
graph_store_type="TuGraphGraph",
),
config=TuGraphStoreConfig(),
name="community_graph_rag_test",
embedding_fn=DefaultEmbeddingFactory.openai(),
llm_client=llm_client,
llm_model=model_name,
)

View File

@@ -6,8 +6,8 @@ from dbgpt_ext.rag import ChunkParameters
from dbgpt_ext.rag.assembler import EmbeddingAssembler
from dbgpt_ext.rag.knowledge import KnowledgeFactory
from dbgpt_ext.storage.full_text.elasticsearch import (
ElasticDocumentConfig,
ElasticDocumentStore,
ElasticsearchStoreConfig,
)
"""Keyword rag example.
@@ -22,15 +22,14 @@ from dbgpt_ext.storage.full_text.elasticsearch import (
def _create_es_connector():
"""Create es connector."""
config = ElasticDocumentConfig(
name="keyword_rag_test",
config = ElasticsearchStoreConfig(
uri="localhost",
port="9200",
user="elastic",
password="dbgpt",
)
return ElasticDocumentStore(config)
return ElasticDocumentStore(config, name="keyword_rag_test")
async def main():

View File

@@ -23,14 +23,16 @@ def _create_vector_connector():
"""Create vector connector."""
config = ChromaVectorConfig(
persist_path=PILOT_PATH,
name="metadata_rag_test",
)
return ChromaStore(
config,
name="embedding_rag_test",
embedding_fn=DefaultEmbeddingFactory(
default_model_name=os.path.join(MODEL_PATH, "text2vec-large-chinese"),
).create(),
)
return ChromaStore(config)
async def main():
file_path = os.path.join(ROOT_PATH, "docs/docs/awel/awel.md")

View File

@@ -56,11 +56,13 @@ def _create_vector_connector():
"""Create vector connector."""
config = ChromaVectorConfig(
persist_path=PILOT_PATH,
name="embedding_api_rag_test",
embedding_fn=_create_embeddings(),
)
return ChromaStore(config)
return ChromaStore(
config,
name="embedding_rag_test",
embedding_fn=_create_embeddings(),
)
async def main():

View File

@@ -27,15 +27,17 @@ def _create_embeddings(
).create()
def _create_vector_connector(embeddings: Embeddings):
def _create_vector_connector():
"""Create vector connector."""
config = ChromaVectorConfig(
persist_path=PILOT_PATH,
name="embedding_rag_test",
embedding_fn=embeddings,
)
return ChromaStore(config)
return ChromaStore(
config,
name="embedding_rag_test",
embedding_fn=_create_embeddings(),
)
async def main():

View File

@@ -39,15 +39,17 @@ from dbgpt_ext.storage.vector_store.chroma_store import ChromaStore, ChromaVecto
def _create_vector_connector():
"""Create vector connector."""
config = ChromaVectorConfig(
persist_path=os.path.join(PILOT_PATH, "data"),
name="vector_name",
persist_path=PILOT_PATH,
)
return ChromaStore(
config,
name="embedding_rag_test",
embedding_fn=DefaultEmbeddingFactory(
default_model_name=os.path.join(MODEL_PATH, "text2vec-large-chinese"),
).create(),
)
return ChromaStore(config)
def _create_temporary_connection():
"""Create a temporary database connection for testing."""

View File

@@ -27,14 +27,16 @@ def _create_vector_connector():
"""Create vector connector."""
config = ChromaVectorConfig(
persist_path=PILOT_PATH,
)
return ChromaStore(
config,
name="embedding_rag_test",
embedding_fn=DefaultEmbeddingFactory(
default_model_name=os.path.join(MODEL_PATH, "text2vec-large-chinese"),
).create(),
)
return ChromaStore(config)
class TriggerReqBody(BaseModel):
url: str = Field(..., description="url")

View File

@@ -76,14 +76,16 @@ def _create_vector_connector():
"""Create vector connector."""
config = ChromaVectorConfig(
persist_path=PILOT_PATH,
)
return ChromaStore(
config,
name="embedding_rag_test",
embedding_fn=DefaultEmbeddingFactory(
default_model_name=os.path.join(MODEL_PATH, "text2vec-large-chinese"),
).create(),
)
return ChromaStore(config)
with DAG("simple_sdk_rag_retriever_example") as dag:
vector_store = _create_vector_connector()