update:format

This commit is contained in:
aries-ckt 2023-06-05 16:27:52 +08:00
parent be1a792d3c
commit f2f28fee42
11 changed files with 34 additions and 18 deletions

View File

@ -0,0 +1 @@
LlamaIndex是一个数据框架旨在帮助您构建LLM应用程序。它包括一个向量存储索引和一个简单的目录阅读器可以帮助您处理和操作数据。此外LlamaIndex还提供了一个GPT Index可以用于数据增强和生成更好的LM模型。

View File

@ -97,7 +97,6 @@ class GuanacoAdapter(BaseLLMAdaper):
return model, tokenizer return model, tokenizer
class GuanacoAdapter(BaseLLMAdaper): class GuanacoAdapter(BaseLLMAdaper):
"""TODO Support guanaco""" """TODO Support guanaco"""

View File

@ -3,7 +3,6 @@ from threading import Thread
from transformers import TextIteratorStreamer, StoppingCriteriaList, StoppingCriteria from transformers import TextIteratorStreamer, StoppingCriteriaList, StoppingCriteria
def guanaco_stream_generate_output(model, tokenizer, params, device, context_len=2048): def guanaco_stream_generate_output(model, tokenizer, params, device, context_len=2048):
"""Fork from: https://github.com/KohakuBlueleaf/guanaco-lora/blob/main/generate.py""" """Fork from: https://github.com/KohakuBlueleaf/guanaco-lora/blob/main/generate.py"""
tokenizer.bos_token_id = 1 tokenizer.bos_token_id = 1

View File

@ -28,7 +28,9 @@ _DEFAULT_TEMPLATE_EN = """ Based on the known information below, provide users w
{question} {question}
""" """
_DEFAULT_TEMPLATE = _DEFAULT_TEMPLATE_EN if CFG.LANGUAGE == 'en' else _DEFAULT_TEMPLATE_ZH _DEFAULT_TEMPLATE = (
_DEFAULT_TEMPLATE_EN if CFG.LANGUAGE == "en" else _DEFAULT_TEMPLATE_ZH
)
PROMPT_SEP = SeparatorStyle.SINGLE.value PROMPT_SEP = SeparatorStyle.SINGLE.value

View File

@ -29,7 +29,9 @@ _DEFAULT_TEMPLATE_EN = """ Based on the known information below, provide users w
{question} {question}
""" """
_DEFAULT_TEMPLATE = _DEFAULT_TEMPLATE_EN if CFG.LANGUAGE == 'en' else _DEFAULT_TEMPLATE_ZH _DEFAULT_TEMPLATE = (
_DEFAULT_TEMPLATE_EN if CFG.LANGUAGE == "en" else _DEFAULT_TEMPLATE_ZH
)
PROMPT_SEP = SeparatorStyle.SINGLE.value PROMPT_SEP = SeparatorStyle.SINGLE.value

View File

@ -28,7 +28,9 @@ _DEFAULT_TEMPLATE_EN = """ Based on the known information below, provide users w
{question} {question}
""" """
_DEFAULT_TEMPLATE = _DEFAULT_TEMPLATE_EN if CFG.LANGUAGE == 'en' else _DEFAULT_TEMPLATE_ZH _DEFAULT_TEMPLATE = (
_DEFAULT_TEMPLATE_EN if CFG.LANGUAGE == "en" else _DEFAULT_TEMPLATE_ZH
)
PROMPT_SEP = SeparatorStyle.SINGLE.value PROMPT_SEP = SeparatorStyle.SINGLE.value

View File

@ -59,6 +59,7 @@ class ChatGLMChatAdapter(BaseChatAdpter):
return chatglm_generate_stream return chatglm_generate_stream
class GuanacoChatAdapter(BaseChatAdpter): class GuanacoChatAdapter(BaseChatAdpter):
"""Model chat adapter for Guanaco""" """Model chat adapter for Guanaco"""
@ -66,10 +67,13 @@ class GuanacoChatAdapter(BaseChatAdpter):
return "guanaco" in model_path return "guanaco" in model_path
def get_generate_stream_func(self): def get_generate_stream_func(self):
from pilot.model.llm_out.guanaco_stream_llm import guanaco_stream_generate_output from pilot.model.llm_out.guanaco_stream_llm import (
guanaco_stream_generate_output,
)
return guanaco_generate_output return guanaco_generate_output
class CodeT5ChatAdapter(BaseChatAdpter): class CodeT5ChatAdapter(BaseChatAdpter):
"""Model chat adapter for CodeT5""" """Model chat adapter for CodeT5"""

View File

@ -15,12 +15,12 @@ class EncodeTextLoader(BaseLoader):
def load(self) -> List[Document]: def load(self) -> List[Document]:
"""Load from file path.""" """Load from file path."""
with open(self.file_path, 'rb') as f: with open(self.file_path, "rb") as f:
raw_text = f.read() raw_text = f.read()
result = chardet.detect(raw_text) result = chardet.detect(raw_text)
if result['encoding'] is None: if result["encoding"] is None:
text = raw_text.decode('utf-8') text = raw_text.decode("utf-8")
else: else:
text = raw_text.decode(result['encoding']) text = raw_text.decode(result["encoding"])
metadata = {"source": self.file_path} metadata = {"source": self.file_path}
return [Document(page_content=text, metadata=metadata)] return [Document(page_content=text, metadata=metadata)]

View File

@ -20,13 +20,14 @@ CFG = Config()
KnowledgeEmbeddingType = { KnowledgeEmbeddingType = {
".txt": (MarkdownEmbedding, {}), ".txt": (MarkdownEmbedding, {}),
".md": (MarkdownEmbedding,{}), ".md": (MarkdownEmbedding, {}),
".pdf": (PDFEmbedding, {}), ".pdf": (PDFEmbedding, {}),
".doc": (WordEmbedding, {}), ".doc": (WordEmbedding, {}),
".docx": (WordEmbedding, {}), ".docx": (WordEmbedding, {}),
".csv": (CSVEmbedding, {}), ".csv": (CSVEmbedding, {}),
} }
class KnowledgeEmbedding: class KnowledgeEmbedding:
def __init__( def __init__(
self, self,
@ -34,7 +35,6 @@ class KnowledgeEmbedding:
vector_store_config, vector_store_config,
file_type: Optional[str] = "default", file_type: Optional[str] = "default",
file_path: Optional[str] = None, file_path: Optional[str] = None,
): ):
"""Initialize with Loader url, model_name, vector_store_config""" """Initialize with Loader url, model_name, vector_store_config"""
self.file_path = file_path self.file_path = file_path
@ -62,13 +62,20 @@ class KnowledgeEmbedding:
extension = "." + self.file_path.rsplit(".", 1)[-1] extension = "." + self.file_path.rsplit(".", 1)[-1]
if extension in KnowledgeEmbeddingType: if extension in KnowledgeEmbeddingType:
knowledge_class, knowledge_args = KnowledgeEmbeddingType[extension] knowledge_class, knowledge_args = KnowledgeEmbeddingType[extension]
embedding = knowledge_class(self.file_path, model_name=self.model_name, vector_store_config=self.vector_store_config, **knowledge_args) embedding = knowledge_class(
self.file_path,
model_name=self.model_name,
vector_store_config=self.vector_store_config,
**knowledge_args,
)
return embedding return embedding
raise ValueError(f"Unsupported knowledge file type '{extension}'") raise ValueError(f"Unsupported knowledge file type '{extension}'")
return embedding return embedding
def similar_search(self, text, topk): def similar_search(self, text, topk):
vector_client = VectorStoreConnector(CFG.VECTOR_STORE_TYPE, self.vector_store_config) vector_client = VectorStoreConnector(
CFG.VECTOR_STORE_TYPE, self.vector_store_config
)
return vector_client.similar_search(text, topk) return vector_client.similar_search(text, topk)
def vector_exist(self): def vector_exist(self):

View File

@ -20,6 +20,7 @@ class PDFEmbedding(SourceEmbedding):
self.model_name = model_name self.model_name = model_name
self.vector_store_config = vector_store_config self.vector_store_config = vector_store_config
self.encoding = encoding self.encoding = encoding
@register @register
def read(self): def read(self):
"""Load from pdf path.""" """Load from pdf path."""

View File

@ -40,7 +40,6 @@ class LocalKnowledgeInit:
client.source_embedding() client.source_embedding()
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--vector_name", type=str, default="default") parser.add_argument("--vector_name", type=str, default="default")