refactor: RAG Refactor (#985)

Co-authored-by: Aralhi <xiaoping0501@gmail.com>
Co-authored-by: csunny <cfqsunny@163.com>
This commit is contained in:
Aries-ckt
2024-01-03 09:45:26 +08:00
committed by GitHub
parent 90775aad50
commit 9ad70a2961
206 changed files with 5766 additions and 2419 deletions

View File

@@ -16,7 +16,7 @@ from dbgpt._private.pydantic import Field, PrivateAttr, BaseModel
from dbgpt.util.global_helper import globals_helper
from dbgpt._private.llm_metadata import LLMMetadata
from dbgpt.rag.embedding_engine.loader.token_splitter import TokenTextSplitter
from dbgpt.rag.text_splitter.token_splitter import TokenTextSplitter
DEFAULT_PADDING = 5
DEFAULT_CHUNK_OVERLAP_RATIO = 0.1
@@ -93,6 +93,11 @@ class PromptHelper(BaseModel):
separator=separator,
)
def token_count(self, prompt_template: str) -> int:
"""Get token count of prompt template."""
empty_prompt_txt = get_empty_prompt_txt(prompt_template)
return len(self._tokenizer(empty_prompt_txt))
@classmethod
def from_llm_metadata(
cls,