import asyncio import os from dbgpt.configs.model_config import MODEL_PATH, PILOT_PATH, ROOT_PATH from dbgpt.rag.chunk_manager import ChunkParameters from dbgpt.rag.embedding.embedding_factory import DefaultEmbeddingFactory from dbgpt.rag.knowledge.factory import KnowledgeFactory from dbgpt.serve.rag.assembler.embedding import EmbeddingAssembler from dbgpt.storage.vector_store.chroma_store import ChromaVectorConfig from dbgpt.storage.vector_store.connector import VectorStoreConnector """Embedding rag example. pre-requirements: set your embedding model path in your example code. ``` embedding_model_path = "{your_embedding_model_path}" ``` Examples: ..code-block:: shell python examples/rag/embedding_rag_example.py """ def _create_vector_connector(): """Create vector connector.""" return VectorStoreConnector.from_default( "Chroma", vector_store_config=ChromaVectorConfig( name="db_schema_vector_store_name", persist_path=os.path.join(PILOT_PATH, "data"), ), embedding_fn=DefaultEmbeddingFactory( default_model_name=os.path.join(MODEL_PATH, "text2vec-large-chinese"), ).create(), ) async def main(): file_path = os.path.join(ROOT_PATH, "docs/docs/awel/awel.md") knowledge = KnowledgeFactory.from_file_path(file_path) vector_connector = _create_vector_connector() chunk_parameters = ChunkParameters(chunk_strategy="CHUNK_BY_SIZE") # get embedding assembler assembler = EmbeddingAssembler.load_from_knowledge( knowledge=knowledge, chunk_parameters=chunk_parameters, vector_store_connector=vector_connector, ) assembler.persist() # get embeddings retriever retriever = assembler.as_retriever(3) chunks = await retriever.aretrieve_with_scores("what is awel talk about", 0.3) print(f"embedding rag example results:{chunks}") if __name__ == "__main__": asyncio.run(main())