mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-08-01 08:11:45 +00:00
feat: define framework and split api
add cache for llm add openapi module add proxy module
This commit is contained in:
parent
b834bed65b
commit
50719c14b9
@ -1,4 +1,4 @@
|
||||
from pilot.source_embedding.csv_embedding import CSVEmbedding
|
||||
from pilot.embedding_engine.csv_embedding import CSVEmbedding
|
||||
|
||||
# path = "/Users/chenketing/Downloads/share_ireserve双写数据异常2.xlsx"
|
||||
path = "xx.csv"
|
||||
|
@ -1,4 +1,4 @@
|
||||
from pilot.source_embedding.pdf_embedding import PDFEmbedding
|
||||
from pilot.embedding_engine.pdf_embedding import PDFEmbedding
|
||||
|
||||
path = "xxx.pdf"
|
||||
path = "your_path/OceanBase-数据库-V4.1.0-应用开发.pdf"
|
||||
|
@ -1,4 +1,4 @@
|
||||
from pilot.source_embedding.url_embedding import URLEmbedding
|
||||
from pilot.embedding_engine.url_embedding import URLEmbedding
|
||||
|
||||
path = "https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-8-2023"
|
||||
model_name = "your_path/all-MiniLM-L6-v2"
|
||||
|
@ -1,3 +1,3 @@
|
||||
from pilot.source_embedding import SourceEmbedding, register
|
||||
from pilot.embedding_engine import SourceEmbedding, register
|
||||
|
||||
__all__ = ["SourceEmbedding", "register"]
|
||||
|
3
pilot/embedding_engine/__init__.py
Normal file
3
pilot/embedding_engine/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
from pilot.embedding_engine.source_embedding import SourceEmbedding, register
|
||||
|
||||
__all__ = ["SourceEmbedding", "register"]
|
@ -3,7 +3,7 @@ from typing import Dict, List, Optional
|
||||
from langchain.document_loaders import CSVLoader
|
||||
from langchain.schema import Document
|
||||
|
||||
from pilot.source_embedding import SourceEmbedding, register
|
||||
from pilot.embedding_engine import SourceEmbedding, register
|
||||
|
||||
|
||||
class CSVEmbedding(SourceEmbedding):
|
@ -4,12 +4,12 @@ from chromadb.errors import NotEnoughElementsException
|
||||
from langchain.embeddings import HuggingFaceEmbeddings
|
||||
|
||||
from pilot.configs.config import Config
|
||||
from pilot.source_embedding.csv_embedding import CSVEmbedding
|
||||
from pilot.source_embedding.markdown_embedding import MarkdownEmbedding
|
||||
from pilot.source_embedding.pdf_embedding import PDFEmbedding
|
||||
from pilot.source_embedding.ppt_embedding import PPTEmbedding
|
||||
from pilot.source_embedding.url_embedding import URLEmbedding
|
||||
from pilot.source_embedding.word_embedding import WordEmbedding
|
||||
from pilot.embedding_engine.csv_embedding import CSVEmbedding
|
||||
from pilot.embedding_engine.markdown_embedding import MarkdownEmbedding
|
||||
from pilot.embedding_engine.pdf_embedding import PDFEmbedding
|
||||
from pilot.embedding_engine.ppt_embedding import PPTEmbedding
|
||||
from pilot.embedding_engine.url_embedding import URLEmbedding
|
||||
from pilot.embedding_engine.word_embedding import WordEmbedding
|
||||
from pilot.vector_store.connector import VectorStoreConnector
|
||||
|
||||
CFG = Config()
|
@ -9,9 +9,9 @@ from langchain.schema import Document
|
||||
from langchain.text_splitter import SpacyTextSplitter
|
||||
|
||||
from pilot.configs.config import Config
|
||||
from pilot.source_embedding import SourceEmbedding, register
|
||||
from pilot.source_embedding.EncodeTextLoader import EncodeTextLoader
|
||||
from pilot.source_embedding.chn_document_splitter import CHNDocumentSplitter
|
||||
from pilot.embedding_engine import SourceEmbedding, register
|
||||
from pilot.embedding_engine.EncodeTextLoader import EncodeTextLoader
|
||||
from pilot.embedding_engine.chn_document_splitter import CHNDocumentSplitter
|
||||
|
||||
CFG = Config()
|
||||
|
@ -7,7 +7,7 @@ from langchain.schema import Document
|
||||
from langchain.text_splitter import SpacyTextSplitter
|
||||
|
||||
from pilot.configs.config import Config
|
||||
from pilot.source_embedding import SourceEmbedding, register
|
||||
from pilot.embedding_engine import SourceEmbedding, register
|
||||
|
||||
CFG = Config()
|
||||
|
@ -7,7 +7,7 @@ from langchain.schema import Document
|
||||
from langchain.text_splitter import SpacyTextSplitter
|
||||
|
||||
from pilot.configs.config import Config
|
||||
from pilot.source_embedding import SourceEmbedding, register
|
||||
from pilot.embedding_engine import SourceEmbedding, register
|
||||
|
||||
CFG = Config()
|
||||
|
@ -7,8 +7,8 @@ from langchain.text_splitter import CharacterTextSplitter
|
||||
|
||||
from pilot.configs.config import Config
|
||||
from pilot.configs.model_config import KNOWLEDGE_CHUNK_SPLIT_SIZE
|
||||
from pilot.source_embedding import SourceEmbedding, register
|
||||
from pilot.source_embedding.chn_document_splitter import CHNDocumentSplitter
|
||||
from pilot.embedding_engine import SourceEmbedding, register
|
||||
from pilot.embedding_engine.chn_document_splitter import CHNDocumentSplitter
|
||||
|
||||
CFG = Config()
|
||||
|
@ -6,8 +6,8 @@ from langchain.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoad
|
||||
from langchain.schema import Document
|
||||
|
||||
from pilot.configs.config import Config
|
||||
from pilot.source_embedding import SourceEmbedding, register
|
||||
from pilot.source_embedding.chn_document_splitter import CHNDocumentSplitter
|
||||
from pilot.embedding_engine import SourceEmbedding, register
|
||||
from pilot.embedding_engine.chn_document_splitter import CHNDocumentSplitter
|
||||
|
||||
CFG = Config()
|
||||
|
4
pilot/model/cache/__init__.py
vendored
Normal file
4
pilot/model/cache/__init__.py
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
from .base import Cache
|
||||
from .disk_cache import DiskCache
|
||||
from .memory_cache import InMemoryCache
|
||||
from .gpt_cache import GPTCache
|
27
pilot/model/cache/base.py
vendored
Normal file
27
pilot/model/cache/base.py
vendored
Normal file
@ -0,0 +1,27 @@
|
||||
import json
|
||||
import hashlib
|
||||
from typing import Any, Dict
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
class Cache(ABC):
|
||||
|
||||
def create(self, key: str) -> bool:
|
||||
pass
|
||||
|
||||
def clear(self):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def __getitem__(self, key: str) -> str:
|
||||
"""get an item from the cache or throw key error"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def __setitem__(self, key: str, value: str) -> None:
|
||||
"""set an item in the cache"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def __contains__(self, key: str) -> bool:
|
||||
"""see if we can return a cached value for the passed key"""
|
||||
pass
|
27
pilot/model/cache/disk_cache.py
vendored
Normal file
27
pilot/model/cache/disk_cache.py
vendored
Normal file
@ -0,0 +1,27 @@
|
||||
import os
|
||||
import diskcache
|
||||
import platformdirs
|
||||
from pilot.model.cache import Cache
|
||||
|
||||
class DiskCache(Cache):
|
||||
"""DiskCache is a cache that uses diskcache lib.
|
||||
https://github.com/grantjenks/python-diskcache
|
||||
"""
|
||||
def __init__(self, llm_name: str):
|
||||
self._diskcache = diskcache.Cache(
|
||||
os.path.join(
|
||||
platformdirs.user_cache_dir("dbgpt"), f"_{llm_name}.diskcache"
|
||||
)
|
||||
)
|
||||
|
||||
def __getitem__(self, key: str) -> str:
|
||||
return self._diskcache[key]
|
||||
|
||||
def __setitem__(self, key: str, value: str) -> None:
|
||||
self._diskcache[key] = value
|
||||
|
||||
def __contains__(self, key: str) -> bool:
|
||||
return key in self._diskcache
|
||||
|
||||
def clear(self):
|
||||
self._diskcache.clear()
|
44
pilot/model/cache/gpt_cache.py
vendored
Normal file
44
pilot/model/cache/gpt_cache.py
vendored
Normal file
@ -0,0 +1,44 @@
|
||||
import os
|
||||
from typing import Dict, Any
|
||||
import platformdirs
|
||||
|
||||
from pilot.model.cache import Cache
|
||||
|
||||
try:
|
||||
from gptcache.adapter.api import get, put, init_similar_cache
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
class GPTCache(Cache):
|
||||
|
||||
"""
|
||||
GPTCache is a semantic cache that uses
|
||||
"""
|
||||
|
||||
def __init__(self, cache) -> None:
|
||||
"""GPT Cache is a semantic cache that uses GPTCache lib."""
|
||||
|
||||
if isinstance(cache, str):
|
||||
_cache = Cache()
|
||||
init_similar_cache(
|
||||
data_dir=os.path.join(
|
||||
platformdirs.user_cache_dir("dbgpt"), f"_{cache}.gptcache"
|
||||
),
|
||||
cache_obj=_cache
|
||||
)
|
||||
else:
|
||||
_cache = cache
|
||||
|
||||
self._cache_obj = _cache
|
||||
|
||||
def __getitem__(self, key: str) -> str:
|
||||
return get(key)
|
||||
|
||||
def __setitem__(self, key: str, value: str) -> None:
|
||||
put(key, value)
|
||||
|
||||
def __contains__(self, key: str) -> bool:
|
||||
return get(key) is not None
|
||||
|
||||
def create(self, llm: str, **kwargs: Dict[str, Any]) -> str:
|
||||
pass
|
24
pilot/model/cache/memory_cache.py
vendored
Normal file
24
pilot/model/cache/memory_cache.py
vendored
Normal file
@ -0,0 +1,24 @@
|
||||
from typing import Dict, Any
|
||||
from pilot.model.cache import Cache
|
||||
|
||||
class InMemoryCache(Cache):
|
||||
|
||||
def __init__(self) -> None:
|
||||
"Initialize that stores things in memory."
|
||||
self._cache: Dict[str, Any] = {}
|
||||
|
||||
def create(self, key: str) -> bool:
|
||||
pass
|
||||
|
||||
def clear(self):
|
||||
return self._cache.clear()
|
||||
|
||||
def __setitem__(self, key: str, value: str) -> None:
|
||||
self._cache[key] = value
|
||||
|
||||
def __getitem__(self, key: str) -> str:
|
||||
return self._cache[key]
|
||||
|
||||
def __contains__(self, key: str) -> bool:
|
||||
return self._cache.get(key, None) is not None
|
||||
|
0
pilot/model/proxy/__init__.py
Normal file
0
pilot/model/proxy/__init__.py
Normal file
0
pilot/openapi/__init__.py
Normal file
0
pilot/openapi/__init__.py
Normal file
@ -17,7 +17,7 @@ from pilot.configs.model_config import (
|
||||
)
|
||||
|
||||
from pilot.scene.chat_knowledge.custom.prompt import prompt
|
||||
from pilot.source_embedding.knowledge_embedding import KnowledgeEmbedding
|
||||
from pilot.embedding_engine.knowledge_embedding import KnowledgeEmbedding
|
||||
|
||||
CFG = Config()
|
||||
|
||||
|
@ -19,7 +19,7 @@ from pilot.configs.model_config import (
|
||||
)
|
||||
|
||||
from pilot.scene.chat_knowledge.default.prompt import prompt
|
||||
from pilot.source_embedding.knowledge_embedding import KnowledgeEmbedding
|
||||
from pilot.embedding_engine.knowledge_embedding import KnowledgeEmbedding
|
||||
|
||||
CFG = Config()
|
||||
|
||||
|
@ -17,7 +17,7 @@ from pilot.configs.model_config import (
|
||||
)
|
||||
|
||||
from pilot.scene.chat_knowledge.url.prompt import prompt
|
||||
from pilot.source_embedding.knowledge_embedding import KnowledgeEmbedding
|
||||
from pilot.embedding_engine.knowledge_embedding import KnowledgeEmbedding
|
||||
|
||||
CFG = Config()
|
||||
|
||||
|
@ -39,7 +39,7 @@ from pilot.common.plugins import scan_plugins, load_native_plugins
|
||||
|
||||
from pilot.server.gradio_css import code_highlight_css
|
||||
from pilot.server.gradio_patch import Chatbot as grChatbot
|
||||
from pilot.source_embedding.knowledge_embedding import KnowledgeEmbedding
|
||||
from pilot.embedding_engine.knowledge_embedding import KnowledgeEmbedding
|
||||
from pilot.utils import build_logger
|
||||
from pilot.vector_store.extract_tovec import (
|
||||
get_vector_storelist,
|
||||
|
@ -1,3 +0,0 @@
|
||||
from pilot.source_embedding.source_embedding import SourceEmbedding, register
|
||||
|
||||
__all__ = ["SourceEmbedding", "register"]
|
@ -7,8 +7,8 @@ from pilot.configs.config import Config
|
||||
from pilot.configs.model_config import LLM_MODEL_CONFIG
|
||||
from pilot.scene.base import ChatScene
|
||||
from pilot.scene.base_chat import BaseChat
|
||||
from pilot.source_embedding.knowledge_embedding import KnowledgeEmbedding
|
||||
from pilot.source_embedding.string_embedding import StringEmbedding
|
||||
from pilot.embedding_engine.knowledge_embedding import KnowledgeEmbedding
|
||||
from pilot.embedding_engine.string_embedding import StringEmbedding
|
||||
from pilot.summary.mysql_db_summary import MysqlSummary
|
||||
from pilot.scene.chat_factory import ChatFactory
|
||||
|
||||
|
@ -50,6 +50,7 @@ pymysql
|
||||
unstructured==0.6.3
|
||||
grpcio==1.47.5
|
||||
gpt4all==0.3.0
|
||||
diskcache==5.6.1
|
||||
|
||||
auto-gpt-plugin-template
|
||||
pymdown-extensions
|
||||
|
@ -11,7 +11,7 @@ from pilot.configs.model_config import (
|
||||
DATASETS_DIR,
|
||||
LLM_MODEL_CONFIG,
|
||||
)
|
||||
from pilot.source_embedding.knowledge_embedding import KnowledgeEmbedding
|
||||
from pilot.embedding_engine.knowledge_embedding import KnowledgeEmbedding
|
||||
|
||||
CFG = Config()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user