Merge branch 'llm_framework' into DEV_TY_06

This commit is contained in:
tuyang.yhj 2023-06-25 14:47:30 +08:00
commit 03576cfc5d
34 changed files with 157 additions and 30 deletions

View File

@ -1,4 +1,4 @@
from pilot.source_embedding.csv_embedding import CSVEmbedding
from pilot.embedding_engine.csv_embedding import CSVEmbedding
# path = "/Users/chenketing/Downloads/share_ireserve双写数据异常2.xlsx"
path = "xx.csv"

View File

@ -1,4 +1,4 @@
from pilot.source_embedding.pdf_embedding import PDFEmbedding
from pilot.embedding_engine.pdf_embedding import PDFEmbedding
path = "xxx.pdf"
path = "your_path/OceanBase-数据库-V4.1.0-应用开发.pdf"

View File

@ -1,4 +1,4 @@
from pilot.source_embedding.url_embedding import URLEmbedding
from pilot.embedding_engine.url_embedding import URLEmbedding
path = "https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-8-2023"
model_name = "your_path/all-MiniLM-L6-v2"

View File

@ -1,3 +1,3 @@
from pilot.source_embedding import SourceEmbedding, register
from pilot.embedding_engine import SourceEmbedding, register
__all__ = ["SourceEmbedding", "register"]

View File

@ -0,0 +1,3 @@
from pilot.embedding_engine.source_embedding import SourceEmbedding, register
__all__ = ["SourceEmbedding", "register"]

View File

@ -3,7 +3,7 @@ from typing import Dict, List, Optional
from langchain.document_loaders import CSVLoader
from langchain.schema import Document
from pilot.source_embedding import SourceEmbedding, register
from pilot.embedding_engine import SourceEmbedding, register
class CSVEmbedding(SourceEmbedding):

View File

@ -4,12 +4,12 @@ from chromadb.errors import NotEnoughElementsException
from langchain.embeddings import HuggingFaceEmbeddings
from pilot.configs.config import Config
from pilot.source_embedding.csv_embedding import CSVEmbedding
from pilot.source_embedding.markdown_embedding import MarkdownEmbedding
from pilot.source_embedding.pdf_embedding import PDFEmbedding
from pilot.source_embedding.ppt_embedding import PPTEmbedding
from pilot.source_embedding.url_embedding import URLEmbedding
from pilot.source_embedding.word_embedding import WordEmbedding
from pilot.embedding_engine.csv_embedding import CSVEmbedding
from pilot.embedding_engine.markdown_embedding import MarkdownEmbedding
from pilot.embedding_engine.pdf_embedding import PDFEmbedding
from pilot.embedding_engine.ppt_embedding import PPTEmbedding
from pilot.embedding_engine.url_embedding import URLEmbedding
from pilot.embedding_engine.word_embedding import WordEmbedding
from pilot.vector_store.connector import VectorStoreConnector
CFG = Config()

View File

@ -9,9 +9,9 @@ from langchain.schema import Document
from langchain.text_splitter import SpacyTextSplitter
from pilot.configs.config import Config
from pilot.source_embedding import SourceEmbedding, register
from pilot.source_embedding.EncodeTextLoader import EncodeTextLoader
from pilot.source_embedding.chn_document_splitter import CHNDocumentSplitter
from pilot.embedding_engine import SourceEmbedding, register
from pilot.embedding_engine.EncodeTextLoader import EncodeTextLoader
from pilot.embedding_engine.chn_document_splitter import CHNDocumentSplitter
CFG = Config()

View File

@ -7,7 +7,7 @@ from langchain.schema import Document
from langchain.text_splitter import SpacyTextSplitter
from pilot.configs.config import Config
from pilot.source_embedding import SourceEmbedding, register
from pilot.embedding_engine import SourceEmbedding, register
CFG = Config()

View File

@ -7,7 +7,7 @@ from langchain.schema import Document
from langchain.text_splitter import SpacyTextSplitter
from pilot.configs.config import Config
from pilot.source_embedding import SourceEmbedding, register
from pilot.embedding_engine import SourceEmbedding, register
CFG = Config()

View File

@ -7,8 +7,8 @@ from langchain.text_splitter import CharacterTextSplitter
from pilot.configs.config import Config
from pilot.configs.model_config import KNOWLEDGE_CHUNK_SPLIT_SIZE
from pilot.source_embedding import SourceEmbedding, register
from pilot.source_embedding.chn_document_splitter import CHNDocumentSplitter
from pilot.embedding_engine import SourceEmbedding, register
from pilot.embedding_engine.chn_document_splitter import CHNDocumentSplitter
CFG = Config()

View File

@ -6,8 +6,8 @@ from langchain.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoad
from langchain.schema import Document
from pilot.configs.config import Config
from pilot.source_embedding import SourceEmbedding, register
from pilot.source_embedding.chn_document_splitter import CHNDocumentSplitter
from pilot.embedding_engine import SourceEmbedding, register
from pilot.embedding_engine.chn_document_splitter import CHNDocumentSplitter
CFG = Config()

4
pilot/model/cache/__init__.py vendored Normal file
View File

@ -0,0 +1,4 @@
from .base import Cache
from .disk_cache import DiskCache
from .memory_cache import InMemoryCache
from .gpt_cache import GPTCache

27
pilot/model/cache/base.py vendored Normal file
View File

@ -0,0 +1,27 @@
import json
import hashlib
from typing import Any, Dict
from abc import ABC, abstractmethod
class Cache(ABC):
def create(self, key: str) -> bool:
pass
def clear(self):
pass
@abstractmethod
def __getitem__(self, key: str) -> str:
"""get an item from the cache or throw key error"""
pass
@abstractmethod
def __setitem__(self, key: str, value: str) -> None:
"""set an item in the cache"""
pass
@abstractmethod
def __contains__(self, key: str) -> bool:
"""see if we can return a cached value for the passed key"""
pass

27
pilot/model/cache/disk_cache.py vendored Normal file
View File

@ -0,0 +1,27 @@
import os
import diskcache
import platformdirs
from pilot.model.cache import Cache
class DiskCache(Cache):
"""DiskCache is a cache that uses diskcache lib.
https://github.com/grantjenks/python-diskcache
"""
def __init__(self, llm_name: str):
self._diskcache = diskcache.Cache(
os.path.join(
platformdirs.user_cache_dir("dbgpt"), f"_{llm_name}.diskcache"
)
)
def __getitem__(self, key: str) -> str:
return self._diskcache[key]
def __setitem__(self, key: str, value: str) -> None:
self._diskcache[key] = value
def __contains__(self, key: str) -> bool:
return key in self._diskcache
def clear(self):
self._diskcache.clear()

44
pilot/model/cache/gpt_cache.py vendored Normal file
View File

@ -0,0 +1,44 @@
import os
from typing import Dict, Any
import platformdirs
from pilot.model.cache import Cache
try:
from gptcache.adapter.api import get, put, init_similar_cache
except ImportError:
pass
class GPTCache(Cache):
"""
GPTCache is a semantic cache that uses
"""
def __init__(self, cache) -> None:
"""GPT Cache is a semantic cache that uses GPTCache lib."""
if isinstance(cache, str):
_cache = Cache()
init_similar_cache(
data_dir=os.path.join(
platformdirs.user_cache_dir("dbgpt"), f"_{cache}.gptcache"
),
cache_obj=_cache
)
else:
_cache = cache
self._cache_obj = _cache
def __getitem__(self, key: str) -> str:
return get(key)
def __setitem__(self, key: str, value: str) -> None:
put(key, value)
def __contains__(self, key: str) -> bool:
return get(key) is not None
def create(self, llm: str, **kwargs: Dict[str, Any]) -> str:
pass

24
pilot/model/cache/memory_cache.py vendored Normal file
View File

@ -0,0 +1,24 @@
from typing import Dict, Any
from pilot.model.cache import Cache
class InMemoryCache(Cache):
def __init__(self) -> None:
"Initialize that stores things in memory."
self._cache: Dict[str, Any] = {}
def create(self, key: str) -> bool:
pass
def clear(self):
return self._cache.clear()
def __setitem__(self, key: str, value: str) -> None:
self._cache[key] = value
def __getitem__(self, key: str) -> str:
return self._cache[key]
def __contains__(self, key: str) -> bool:
return self._cache.get(key, None) is not None

View File

View File

View File

@ -17,7 +17,7 @@ from pilot.configs.model_config import (
)
from pilot.scene.chat_knowledge.custom.prompt import prompt
from pilot.source_embedding.knowledge_embedding import KnowledgeEmbedding
from pilot.embedding_engine.knowledge_embedding import KnowledgeEmbedding
CFG = Config()

View File

@ -19,7 +19,7 @@ from pilot.configs.model_config import (
)
from pilot.scene.chat_knowledge.default.prompt import prompt
from pilot.source_embedding.knowledge_embedding import KnowledgeEmbedding
from pilot.embedding_engine.knowledge_embedding import KnowledgeEmbedding
CFG = Config()

View File

@ -17,7 +17,7 @@ from pilot.configs.model_config import (
)
from pilot.scene.chat_knowledge.url.prompt import prompt
from pilot.source_embedding.knowledge_embedding import KnowledgeEmbedding
from pilot.embedding_engine.knowledge_embedding import KnowledgeEmbedding
CFG = Config()

View File

@ -35,7 +35,7 @@ from pilot.conversation import (
from pilot.server.gradio_css import code_highlight_css
from pilot.server.gradio_patch import Chatbot as grChatbot
from pilot.source_embedding.knowledge_embedding import KnowledgeEmbedding
from pilot.embedding_engine.knowledge_embedding import KnowledgeEmbedding
from pilot.utils import build_logger
from pilot.vector_store.extract_tovec import (
get_vector_storelist,

View File

@ -1,3 +0,0 @@
from pilot.source_embedding.source_embedding import SourceEmbedding, register
__all__ = ["SourceEmbedding", "register"]

View File

@ -7,8 +7,8 @@ from pilot.configs.config import Config
from pilot.configs.model_config import LLM_MODEL_CONFIG
from pilot.scene.base import ChatScene
from pilot.scene.base_chat import BaseChat
from pilot.source_embedding.knowledge_embedding import KnowledgeEmbedding
from pilot.source_embedding.string_embedding import StringEmbedding
from pilot.embedding_engine.knowledge_embedding import KnowledgeEmbedding
from pilot.embedding_engine.string_embedding import StringEmbedding
from pilot.summary.mysql_db_summary import MysqlSummary
from pilot.scene.chat_factory import ChatFactory

View File

@ -50,6 +50,7 @@ pymysql
unstructured==0.6.3
grpcio==1.47.5
gpt4all==0.3.0
diskcache==5.6.1
auto-gpt-plugin-template
pymdown-extensions

View File

@ -11,7 +11,7 @@ from pilot.configs.model_config import (
DATASETS_DIR,
LLM_MODEL_CONFIG,
)
from pilot.source_embedding.knowledge_embedding import KnowledgeEmbedding
from pilot.embedding_engine.knowledge_embedding import KnowledgeEmbedding
CFG = Config()