mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-08-06 10:54:29 +00:00
Merge branch 'llm_framework' into DEV_TY_06
This commit is contained in:
commit
03576cfc5d
@ -1,4 +1,4 @@
|
|||||||
from pilot.source_embedding.csv_embedding import CSVEmbedding
|
from pilot.embedding_engine.csv_embedding import CSVEmbedding
|
||||||
|
|
||||||
# path = "/Users/chenketing/Downloads/share_ireserve双写数据异常2.xlsx"
|
# path = "/Users/chenketing/Downloads/share_ireserve双写数据异常2.xlsx"
|
||||||
path = "xx.csv"
|
path = "xx.csv"
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from pilot.source_embedding.pdf_embedding import PDFEmbedding
|
from pilot.embedding_engine.pdf_embedding import PDFEmbedding
|
||||||
|
|
||||||
path = "xxx.pdf"
|
path = "xxx.pdf"
|
||||||
path = "your_path/OceanBase-数据库-V4.1.0-应用开发.pdf"
|
path = "your_path/OceanBase-数据库-V4.1.0-应用开发.pdf"
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from pilot.source_embedding.url_embedding import URLEmbedding
|
from pilot.embedding_engine.url_embedding import URLEmbedding
|
||||||
|
|
||||||
path = "https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-8-2023"
|
path = "https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-8-2023"
|
||||||
model_name = "your_path/all-MiniLM-L6-v2"
|
model_name = "your_path/all-MiniLM-L6-v2"
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from pilot.source_embedding import SourceEmbedding, register
|
from pilot.embedding_engine import SourceEmbedding, register
|
||||||
|
|
||||||
__all__ = ["SourceEmbedding", "register"]
|
__all__ = ["SourceEmbedding", "register"]
|
||||||
|
3
pilot/embedding_engine/__init__.py
Normal file
3
pilot/embedding_engine/__init__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
from pilot.embedding_engine.source_embedding import SourceEmbedding, register
|
||||||
|
|
||||||
|
__all__ = ["SourceEmbedding", "register"]
|
@ -3,7 +3,7 @@ from typing import Dict, List, Optional
|
|||||||
from langchain.document_loaders import CSVLoader
|
from langchain.document_loaders import CSVLoader
|
||||||
from langchain.schema import Document
|
from langchain.schema import Document
|
||||||
|
|
||||||
from pilot.source_embedding import SourceEmbedding, register
|
from pilot.embedding_engine import SourceEmbedding, register
|
||||||
|
|
||||||
|
|
||||||
class CSVEmbedding(SourceEmbedding):
|
class CSVEmbedding(SourceEmbedding):
|
@ -4,12 +4,12 @@ from chromadb.errors import NotEnoughElementsException
|
|||||||
from langchain.embeddings import HuggingFaceEmbeddings
|
from langchain.embeddings import HuggingFaceEmbeddings
|
||||||
|
|
||||||
from pilot.configs.config import Config
|
from pilot.configs.config import Config
|
||||||
from pilot.source_embedding.csv_embedding import CSVEmbedding
|
from pilot.embedding_engine.csv_embedding import CSVEmbedding
|
||||||
from pilot.source_embedding.markdown_embedding import MarkdownEmbedding
|
from pilot.embedding_engine.markdown_embedding import MarkdownEmbedding
|
||||||
from pilot.source_embedding.pdf_embedding import PDFEmbedding
|
from pilot.embedding_engine.pdf_embedding import PDFEmbedding
|
||||||
from pilot.source_embedding.ppt_embedding import PPTEmbedding
|
from pilot.embedding_engine.ppt_embedding import PPTEmbedding
|
||||||
from pilot.source_embedding.url_embedding import URLEmbedding
|
from pilot.embedding_engine.url_embedding import URLEmbedding
|
||||||
from pilot.source_embedding.word_embedding import WordEmbedding
|
from pilot.embedding_engine.word_embedding import WordEmbedding
|
||||||
from pilot.vector_store.connector import VectorStoreConnector
|
from pilot.vector_store.connector import VectorStoreConnector
|
||||||
|
|
||||||
CFG = Config()
|
CFG = Config()
|
@ -9,9 +9,9 @@ from langchain.schema import Document
|
|||||||
from langchain.text_splitter import SpacyTextSplitter
|
from langchain.text_splitter import SpacyTextSplitter
|
||||||
|
|
||||||
from pilot.configs.config import Config
|
from pilot.configs.config import Config
|
||||||
from pilot.source_embedding import SourceEmbedding, register
|
from pilot.embedding_engine import SourceEmbedding, register
|
||||||
from pilot.source_embedding.EncodeTextLoader import EncodeTextLoader
|
from pilot.embedding_engine.EncodeTextLoader import EncodeTextLoader
|
||||||
from pilot.source_embedding.chn_document_splitter import CHNDocumentSplitter
|
from pilot.embedding_engine.chn_document_splitter import CHNDocumentSplitter
|
||||||
|
|
||||||
CFG = Config()
|
CFG = Config()
|
||||||
|
|
@ -7,7 +7,7 @@ from langchain.schema import Document
|
|||||||
from langchain.text_splitter import SpacyTextSplitter
|
from langchain.text_splitter import SpacyTextSplitter
|
||||||
|
|
||||||
from pilot.configs.config import Config
|
from pilot.configs.config import Config
|
||||||
from pilot.source_embedding import SourceEmbedding, register
|
from pilot.embedding_engine import SourceEmbedding, register
|
||||||
|
|
||||||
CFG = Config()
|
CFG = Config()
|
||||||
|
|
@ -7,7 +7,7 @@ from langchain.schema import Document
|
|||||||
from langchain.text_splitter import SpacyTextSplitter
|
from langchain.text_splitter import SpacyTextSplitter
|
||||||
|
|
||||||
from pilot.configs.config import Config
|
from pilot.configs.config import Config
|
||||||
from pilot.source_embedding import SourceEmbedding, register
|
from pilot.embedding_engine import SourceEmbedding, register
|
||||||
|
|
||||||
CFG = Config()
|
CFG = Config()
|
||||||
|
|
@ -7,8 +7,8 @@ from langchain.text_splitter import CharacterTextSplitter
|
|||||||
|
|
||||||
from pilot.configs.config import Config
|
from pilot.configs.config import Config
|
||||||
from pilot.configs.model_config import KNOWLEDGE_CHUNK_SPLIT_SIZE
|
from pilot.configs.model_config import KNOWLEDGE_CHUNK_SPLIT_SIZE
|
||||||
from pilot.source_embedding import SourceEmbedding, register
|
from pilot.embedding_engine import SourceEmbedding, register
|
||||||
from pilot.source_embedding.chn_document_splitter import CHNDocumentSplitter
|
from pilot.embedding_engine.chn_document_splitter import CHNDocumentSplitter
|
||||||
|
|
||||||
CFG = Config()
|
CFG = Config()
|
||||||
|
|
@ -6,8 +6,8 @@ from langchain.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoad
|
|||||||
from langchain.schema import Document
|
from langchain.schema import Document
|
||||||
|
|
||||||
from pilot.configs.config import Config
|
from pilot.configs.config import Config
|
||||||
from pilot.source_embedding import SourceEmbedding, register
|
from pilot.embedding_engine import SourceEmbedding, register
|
||||||
from pilot.source_embedding.chn_document_splitter import CHNDocumentSplitter
|
from pilot.embedding_engine.chn_document_splitter import CHNDocumentSplitter
|
||||||
|
|
||||||
CFG = Config()
|
CFG = Config()
|
||||||
|
|
4
pilot/model/cache/__init__.py
vendored
Normal file
4
pilot/model/cache/__init__.py
vendored
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
from .base import Cache
|
||||||
|
from .disk_cache import DiskCache
|
||||||
|
from .memory_cache import InMemoryCache
|
||||||
|
from .gpt_cache import GPTCache
|
27
pilot/model/cache/base.py
vendored
Normal file
27
pilot/model/cache/base.py
vendored
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
import json
|
||||||
|
import hashlib
|
||||||
|
from typing import Any, Dict
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
class Cache(ABC):
|
||||||
|
|
||||||
|
def create(self, key: str) -> bool:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def clear(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def __getitem__(self, key: str) -> str:
|
||||||
|
"""get an item from the cache or throw key error"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def __setitem__(self, key: str, value: str) -> None:
|
||||||
|
"""set an item in the cache"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def __contains__(self, key: str) -> bool:
|
||||||
|
"""see if we can return a cached value for the passed key"""
|
||||||
|
pass
|
27
pilot/model/cache/disk_cache.py
vendored
Normal file
27
pilot/model/cache/disk_cache.py
vendored
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
import os
|
||||||
|
import diskcache
|
||||||
|
import platformdirs
|
||||||
|
from pilot.model.cache import Cache
|
||||||
|
|
||||||
|
class DiskCache(Cache):
|
||||||
|
"""DiskCache is a cache that uses diskcache lib.
|
||||||
|
https://github.com/grantjenks/python-diskcache
|
||||||
|
"""
|
||||||
|
def __init__(self, llm_name: str):
|
||||||
|
self._diskcache = diskcache.Cache(
|
||||||
|
os.path.join(
|
||||||
|
platformdirs.user_cache_dir("dbgpt"), f"_{llm_name}.diskcache"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def __getitem__(self, key: str) -> str:
|
||||||
|
return self._diskcache[key]
|
||||||
|
|
||||||
|
def __setitem__(self, key: str, value: str) -> None:
|
||||||
|
self._diskcache[key] = value
|
||||||
|
|
||||||
|
def __contains__(self, key: str) -> bool:
|
||||||
|
return key in self._diskcache
|
||||||
|
|
||||||
|
def clear(self):
|
||||||
|
self._diskcache.clear()
|
44
pilot/model/cache/gpt_cache.py
vendored
Normal file
44
pilot/model/cache/gpt_cache.py
vendored
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
import os
|
||||||
|
from typing import Dict, Any
|
||||||
|
import platformdirs
|
||||||
|
|
||||||
|
from pilot.model.cache import Cache
|
||||||
|
|
||||||
|
try:
|
||||||
|
from gptcache.adapter.api import get, put, init_similar_cache
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
class GPTCache(Cache):
|
||||||
|
|
||||||
|
"""
|
||||||
|
GPTCache is a semantic cache that uses
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, cache) -> None:
|
||||||
|
"""GPT Cache is a semantic cache that uses GPTCache lib."""
|
||||||
|
|
||||||
|
if isinstance(cache, str):
|
||||||
|
_cache = Cache()
|
||||||
|
init_similar_cache(
|
||||||
|
data_dir=os.path.join(
|
||||||
|
platformdirs.user_cache_dir("dbgpt"), f"_{cache}.gptcache"
|
||||||
|
),
|
||||||
|
cache_obj=_cache
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
_cache = cache
|
||||||
|
|
||||||
|
self._cache_obj = _cache
|
||||||
|
|
||||||
|
def __getitem__(self, key: str) -> str:
|
||||||
|
return get(key)
|
||||||
|
|
||||||
|
def __setitem__(self, key: str, value: str) -> None:
|
||||||
|
put(key, value)
|
||||||
|
|
||||||
|
def __contains__(self, key: str) -> bool:
|
||||||
|
return get(key) is not None
|
||||||
|
|
||||||
|
def create(self, llm: str, **kwargs: Dict[str, Any]) -> str:
|
||||||
|
pass
|
24
pilot/model/cache/memory_cache.py
vendored
Normal file
24
pilot/model/cache/memory_cache.py
vendored
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
from typing import Dict, Any
|
||||||
|
from pilot.model.cache import Cache
|
||||||
|
|
||||||
|
class InMemoryCache(Cache):
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
"Initialize that stores things in memory."
|
||||||
|
self._cache: Dict[str, Any] = {}
|
||||||
|
|
||||||
|
def create(self, key: str) -> bool:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def clear(self):
|
||||||
|
return self._cache.clear()
|
||||||
|
|
||||||
|
def __setitem__(self, key: str, value: str) -> None:
|
||||||
|
self._cache[key] = value
|
||||||
|
|
||||||
|
def __getitem__(self, key: str) -> str:
|
||||||
|
return self._cache[key]
|
||||||
|
|
||||||
|
def __contains__(self, key: str) -> bool:
|
||||||
|
return self._cache.get(key, None) is not None
|
||||||
|
|
0
pilot/model/proxy/__init__.py
Normal file
0
pilot/model/proxy/__init__.py
Normal file
0
pilot/openapi/__init__.py
Normal file
0
pilot/openapi/__init__.py
Normal file
@ -17,7 +17,7 @@ from pilot.configs.model_config import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
from pilot.scene.chat_knowledge.custom.prompt import prompt
|
from pilot.scene.chat_knowledge.custom.prompt import prompt
|
||||||
from pilot.source_embedding.knowledge_embedding import KnowledgeEmbedding
|
from pilot.embedding_engine.knowledge_embedding import KnowledgeEmbedding
|
||||||
|
|
||||||
CFG = Config()
|
CFG = Config()
|
||||||
|
|
||||||
|
@ -19,7 +19,7 @@ from pilot.configs.model_config import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
from pilot.scene.chat_knowledge.default.prompt import prompt
|
from pilot.scene.chat_knowledge.default.prompt import prompt
|
||||||
from pilot.source_embedding.knowledge_embedding import KnowledgeEmbedding
|
from pilot.embedding_engine.knowledge_embedding import KnowledgeEmbedding
|
||||||
|
|
||||||
CFG = Config()
|
CFG = Config()
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@ from pilot.configs.model_config import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
from pilot.scene.chat_knowledge.url.prompt import prompt
|
from pilot.scene.chat_knowledge.url.prompt import prompt
|
||||||
from pilot.source_embedding.knowledge_embedding import KnowledgeEmbedding
|
from pilot.embedding_engine.knowledge_embedding import KnowledgeEmbedding
|
||||||
|
|
||||||
CFG = Config()
|
CFG = Config()
|
||||||
|
|
||||||
|
@ -35,7 +35,7 @@ from pilot.conversation import (
|
|||||||
|
|
||||||
from pilot.server.gradio_css import code_highlight_css
|
from pilot.server.gradio_css import code_highlight_css
|
||||||
from pilot.server.gradio_patch import Chatbot as grChatbot
|
from pilot.server.gradio_patch import Chatbot as grChatbot
|
||||||
from pilot.source_embedding.knowledge_embedding import KnowledgeEmbedding
|
from pilot.embedding_engine.knowledge_embedding import KnowledgeEmbedding
|
||||||
from pilot.utils import build_logger
|
from pilot.utils import build_logger
|
||||||
from pilot.vector_store.extract_tovec import (
|
from pilot.vector_store.extract_tovec import (
|
||||||
get_vector_storelist,
|
get_vector_storelist,
|
||||||
|
@ -1,3 +0,0 @@
|
|||||||
from pilot.source_embedding.source_embedding import SourceEmbedding, register
|
|
||||||
|
|
||||||
__all__ = ["SourceEmbedding", "register"]
|
|
@ -7,8 +7,8 @@ from pilot.configs.config import Config
|
|||||||
from pilot.configs.model_config import LLM_MODEL_CONFIG
|
from pilot.configs.model_config import LLM_MODEL_CONFIG
|
||||||
from pilot.scene.base import ChatScene
|
from pilot.scene.base import ChatScene
|
||||||
from pilot.scene.base_chat import BaseChat
|
from pilot.scene.base_chat import BaseChat
|
||||||
from pilot.source_embedding.knowledge_embedding import KnowledgeEmbedding
|
from pilot.embedding_engine.knowledge_embedding import KnowledgeEmbedding
|
||||||
from pilot.source_embedding.string_embedding import StringEmbedding
|
from pilot.embedding_engine.string_embedding import StringEmbedding
|
||||||
from pilot.summary.mysql_db_summary import MysqlSummary
|
from pilot.summary.mysql_db_summary import MysqlSummary
|
||||||
from pilot.scene.chat_factory import ChatFactory
|
from pilot.scene.chat_factory import ChatFactory
|
||||||
|
|
||||||
|
@ -50,6 +50,7 @@ pymysql
|
|||||||
unstructured==0.6.3
|
unstructured==0.6.3
|
||||||
grpcio==1.47.5
|
grpcio==1.47.5
|
||||||
gpt4all==0.3.0
|
gpt4all==0.3.0
|
||||||
|
diskcache==5.6.1
|
||||||
|
|
||||||
auto-gpt-plugin-template
|
auto-gpt-plugin-template
|
||||||
pymdown-extensions
|
pymdown-extensions
|
||||||
|
@ -11,7 +11,7 @@ from pilot.configs.model_config import (
|
|||||||
DATASETS_DIR,
|
DATASETS_DIR,
|
||||||
LLM_MODEL_CONFIG,
|
LLM_MODEL_CONFIG,
|
||||||
)
|
)
|
||||||
from pilot.source_embedding.knowledge_embedding import KnowledgeEmbedding
|
from pilot.embedding_engine.knowledge_embedding import KnowledgeEmbedding
|
||||||
|
|
||||||
CFG = Config()
|
CFG = Config()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user