From 81fb56a67c5f005d45b941e8b469f3eb116816e6 Mon Sep 17 00:00:00 2001 From: Fangyin Cheng Date: Mon, 13 May 2024 13:50:21 +0800 Subject: [PATCH] feat(model): Support Yi-1.5 models (#1516) --- README.md | 3 +++ README.zh.md | 3 +++ dbgpt/configs/model_config.py | 4 ++++ dbgpt/model/adapter/base.py | 4 ++-- dbgpt/model/adapter/hf_adapter.py | 38 +++++++++++++++++++++++++++++++ 5 files changed, 50 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 96d0e5a08..bc770a568 100644 --- a/README.md +++ b/README.md @@ -158,6 +158,9 @@ At present, we have introduced several key features to showcase our current capa We offer extensive model support, including dozens of large language models (LLMs) from both open-source and API agents, such as LLaMA/LLaMA2, Baichuan, ChatGLM, Wenxin, Tongyi, Zhipu, and many more. - News + - πŸ”₯πŸ”₯πŸ”₯ [Yi-1.5-34B-Chat](https://huggingface.co/01-ai/Yi-1.5-34B-Chat) + - πŸ”₯πŸ”₯πŸ”₯ [Yi-1.5-9B-Chat](https://huggingface.co/01-ai/Yi-1.5-9B-Chat) + - πŸ”₯πŸ”₯πŸ”₯ [Yi-1.5-6B-Chat](https://huggingface.co/01-ai/Yi-1.5-6B-Chat) - πŸ”₯πŸ”₯πŸ”₯ [Qwen1.5-110B-Chat](https://huggingface.co/Qwen/Qwen1.5-110B-Chat) - πŸ”₯πŸ”₯πŸ”₯ [Qwen1.5-MoE-A2.7B-Chat](https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B-Chat) - πŸ”₯πŸ”₯πŸ”₯ [Meta-Llama-3-70B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct) diff --git a/README.zh.md b/README.zh.md index 752c169c1..9038170c1 100644 --- a/README.zh.md +++ b/README.zh.md @@ -152,6 +152,9 @@ ζ΅·ι‡ζ¨‘εž‹ζ”―ζŒοΌŒεŒ…ζ‹¬εΌ€ζΊγ€APIδ»£η†η­‰ε‡ εη§ε€§θ―­θ¨€ζ¨‘εž‹γ€‚ε¦‚LLaMA/LLaMA2、Baichuan、ChatGLMγ€ζ–‡εΏƒγ€ι€šδΉ‰γ€ζ™Ίθ°±η­‰γ€‚ε½“ε‰ε·²ζ”―ζŒε¦‚δΈ‹ζ¨‘εž‹: - ζ–°ε’žζ”―ζŒζ¨‘εž‹ + - πŸ”₯πŸ”₯πŸ”₯ [Yi-1.5-34B-Chat](https://huggingface.co/01-ai/Yi-1.5-34B-Chat) + - πŸ”₯πŸ”₯πŸ”₯ [Yi-1.5-9B-Chat](https://huggingface.co/01-ai/Yi-1.5-9B-Chat) + - πŸ”₯πŸ”₯πŸ”₯ [Yi-1.5-6B-Chat](https://huggingface.co/01-ai/Yi-1.5-6B-Chat) - πŸ”₯πŸ”₯πŸ”₯ [Qwen1.5-110B-Chat](https://huggingface.co/Qwen/Qwen1.5-110B-Chat) - πŸ”₯πŸ”₯πŸ”₯ [Qwen1.5-MoE-A2.7B-Chat](https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B-Chat) - πŸ”₯πŸ”₯πŸ”₯ [Meta-Llama-3-70B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct) diff --git a/dbgpt/configs/model_config.py b/dbgpt/configs/model_config.py index a36b33a7c..958e9514d 100644 --- a/dbgpt/configs/model_config.py +++ b/dbgpt/configs/model_config.py @@ -174,6 +174,10 @@ LLM_MODEL_CONFIG = { # https://huggingface.co/01-ai/Yi-34B-Chat-4bits "yi-34b-chat-4bits": os.path.join(MODEL_PATH, "Yi-34B-Chat-4bits"), "yi-6b-chat": os.path.join(MODEL_PATH, "Yi-6B-Chat"), + # https://huggingface.co/01-ai/Yi-1.5-6B-Chat + "yi-1.5-6b-chat": os.path.join(MODEL_PATH, "Yi-1.5-6B-Chat"), + "yi-1.5-9b-chat": os.path.join(MODEL_PATH, "Yi-1.5-9B-Chat"), + "yi-1.5-34b-chat": os.path.join(MODEL_PATH, "Yi-1.5-34B-Chat"), # https://huggingface.co/google/gemma-7b-it "gemma-7b-it": os.path.join(MODEL_PATH, "gemma-7b-it"), # https://huggingface.co/google/gemma-2b-it diff --git a/dbgpt/model/adapter/base.py b/dbgpt/model/adapter/base.py index ded5c793b..8d5bb9441 100644 --- a/dbgpt/model/adapter/base.py +++ b/dbgpt/model/adapter/base.py @@ -524,11 +524,11 @@ def get_model_adapter( """ adapter = None # First find adapter by model_name - for adapter_entry in model_adapters: + for adapter_entry in model_adapters[::-1]: if adapter_entry.model_adapter.match(model_type, model_name, None): adapter = adapter_entry.model_adapter break - for adapter_entry in model_adapters: + for adapter_entry in model_adapters[::-1]: if adapter_entry.model_adapter.match(model_type, None, model_path): adapter = adapter_entry.model_adapter break diff --git a/dbgpt/model/adapter/hf_adapter.py b/dbgpt/model/adapter/hf_adapter.py index d93f00cee..8a1f5d764 100644 --- a/dbgpt/model/adapter/hf_adapter.py +++ b/dbgpt/model/adapter/hf_adapter.py @@ -135,6 +135,41 @@ class YiAdapter(NewHFChatModelAdapter): ) +class Yi15Adapter(YiAdapter): + """Yi 1.5 model adapter.""" + + def do_match(self, lower_model_name_or_path: Optional[str] = None): + return ( + lower_model_name_or_path + and "yi-" in lower_model_name_or_path + and "1.5" in lower_model_name_or_path + and "chat" in lower_model_name_or_path + ) + + def get_str_prompt( + self, + params: Dict, + messages: List[ModelMessage], + tokenizer: Any, + prompt_template: str = None, + convert_to_compatible_format: bool = False, + ) -> Optional[str]: + str_prompt = super().get_str_prompt( + params, + messages, + tokenizer, + prompt_template, + convert_to_compatible_format, + ) + terminators = [ + tokenizer.eos_token_id, + ] + exist_token_ids = params.get("stop_token_ids", []) + terminators.extend(exist_token_ids) + params["stop_token_ids"] = terminators + return str_prompt + + class Mixtral8x7BAdapter(NewHFChatModelAdapter): """ https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1 @@ -335,7 +370,10 @@ class Llama3Adapter(NewHFChatModelAdapter): return str_prompt +# The following code is used to register the model adapter +# The last registered model adapter is matched first register_model_adapter(YiAdapter) +register_model_adapter(Yi15Adapter) register_model_adapter(Mixtral8x7BAdapter) register_model_adapter(SOLARAdapter) register_model_adapter(GemmaAdapter)