import asyncio import os from dbgpt.configs.model_config import MODEL_PATH, PILOT_PATH, ROOT_PATH from dbgpt.rag.embedding import DefaultEmbeddingFactory from dbgpt_ext.rag import ChunkParameters from dbgpt_ext.rag.assembler import EmbeddingAssembler from dbgpt_ext.rag.knowledge import KnowledgeFactory from dbgpt_ext.storage.vector_store.chroma_store import ChromaStore, ChromaVectorConfig """Embedding rag example. pre-requirements: set your embedding model path in your example code. ``` embedding_model_path = "{your_embedding_model_path}" ``` Examples: ..code-block:: shell python examples/rag/embedding_rag_example.py """ def _create_vector_connector(): """Create vector connector.""" config = ChromaVectorConfig( persist_path=PILOT_PATH, ) return ChromaStore( config, name="embedding_rag_test", embedding_fn=DefaultEmbeddingFactory( default_model_name=os.path.join(MODEL_PATH, "text2vec-large-chinese"), ).create(), ) async def main(): file_path = os.path.join(ROOT_PATH, "docs/docs/awel/awel.md") knowledge = KnowledgeFactory.from_file_path(file_path) vector_store = _create_vector_connector() chunk_parameters = ChunkParameters(chunk_strategy="CHUNK_BY_SIZE") # get embedding assembler assembler = EmbeddingAssembler.load_from_knowledge( knowledge=knowledge, chunk_parameters=chunk_parameters, index_store=vector_store, ) assembler.persist() # get embeddings retriever retriever = assembler.as_retriever(3) chunks = await retriever.aretrieve_with_scores("what is awel talk about", 0.3) print(f"embedding rag example results:{chunks}") if __name__ == "__main__": asyncio.run(main())