fix: gorilla chat adapter and config

2025-08-13 14:06:43 +00:00 · 2023-06-08 17:35:17 +08:00 · 2023-06-08 17:35:17 +08:00 · 0948bc45bc
commit 0948bc45bc
parent 8b3d7b0ba7
4 changed files with 21 additions and 2 deletions
--- a/pilot/configs/config.py
+++ b/pilot/configs/config.py
@ -146,6 +146,9 @@ class Config(metaclass=Singleton):
        self.MILVUS_USERNAME = os.getenv("MILVUS_USERNAME", None)
        self.MILVUS_PASSWORD = os.getenv("MILVUS_PASSWORD", None)
        # QLoRA 
        self.QLoRA = os.getenv("QUANTIZE_QLORA", "True")
        ### EMBEDDING Configuration
        self.EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "text2vec")
        self.KNOWLEDGE_CHUNK_SIZE = int(os.getenv("KNOWLEDGE_CHUNK_SIZE", 500))
--- a/pilot/model/adapter.py
+++ b/pilot/model/adapter.py
@ -6,8 +6,10 @@ from typing import List
 from functools import cache
 from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer, BitsAndBytesConfig
 from pilot.configs.model_config import DEVICE
 from pilot.configs.config import Config
 bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype="bfloat16", bnb_4bit_use_double_quant=False)
 CFG = Config()
 class BaseLLMAdaper:
    """The Base class for multi model, in our project.
@ -106,7 +108,8 @@ class FalconAdapater(BaseLLMAdaper):
    def loader(self, model_path: str, from_pretrained_kwagrs: dict):
        tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
-        if QLORA:
+
        if CFG.QLoRA:
            model = AutoModelForCausalLM.from_pretrained(
                model_path, 
                load_in_4bit=True, #quantize
--- a/pilot/out_parser/base.py
+++ b/pilot/out_parser/base.py
@ -61,6 +61,8 @@ class BaseOutputParser(ABC):
                # stream out output
                output = data["text"][11:].replace("<s>", "").strip()
                # TODO gorilla and falcon output 
            else:
                output = data["text"].strip()
--- a/pilot/server/chat_adapter.py
+++ b/pilot/server/chat_adapter.py
@ -116,10 +116,21 @@ class ProxyllmChatAdapter(BaseChatAdpter):
        return proxyllm_generate_stream
 class GorillaChatAdapter(BaseChatAdpter): 
    def match(self, model_path: str):
        return "gorilla" in model_path
    def get_generate_stream_func(self):
        from pilot.model.llm_out.gorilla_llm import generate_stream
        return generate_stream
 register_llm_model_chat_adapter(VicunaChatAdapter)
 register_llm_model_chat_adapter(ChatGLMChatAdapter)
 register_llm_model_chat_adapter(GuanacoChatAdapter)
-register_llm_model_adapters(FalconChatAdapter)
+register_llm_model_chat_adapter(FalconChatAdapter)
 register_llm_model_chat_adapter(GorillaChatAdapter)
 # Proxy model for test and develop, it's cheap for us now.