fix: lint

This commit is contained in:
csunny 2023-05-30 23:06:20 +08:00
parent b0e22eff05
commit 858cf472e2
4 changed files with 57 additions and 60 deletions

View File

@ -85,9 +85,10 @@ class ChatGLMAdapater(BaseLLMAdaper):
class GuanacoAdapter(BaseLLMAdaper): class GuanacoAdapter(BaseLLMAdaper):
"""TODO Support guanaco""" """TODO Support guanaco"""
def match(self, model_path: str): def match(self, model_path: str):
return "guanaco" in model_path return "guanaco" in model_path
def loader(self, model_path: str, from_pretrained_kwargs: dict): def loader(self, model_path: str, from_pretrained_kwargs: dict):
tokenizer = LlamaTokenizer.from_pretrained(model_path) tokenizer = LlamaTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained( model = AutoModelForCausalLM.from_pretrained(

View File

@ -6,73 +6,68 @@ import transformers
from transformers import GenerationConfig from transformers import GenerationConfig
from llm_utils import Iteratorize, Stream from llm_utils import Iteratorize, Stream
def guanaco_generate_output(model, tokenizer, params, device): def guanaco_generate_output(model, tokenizer, params, device):
"""Fork from fastchat: https://github.com/KohakuBlueleaf/guanaco-lora/blob/main/generate.py""" """Fork from fastchat: https://github.com/KohakuBlueleaf/guanaco-lora/blob/main/generate.py"""
prompt = params["prompt"] prompt = params["prompt"]
inputs = tokenizer(prompt, return_tensors="pt") inputs = tokenizer(prompt, return_tensors="pt")
input_ids = inputs["input_ids"].to(device) input_ids = inputs["input_ids"].to(device)
temperature=0.5, temperature = (0.5,)
top_p=0.95, top_p = (0.95,)
top_k=45, top_k = (45,)
max_new_tokens=128, max_new_tokens = (128,)
stream_output=True stream_output = True
generation_config = GenerationConfig( generation_config = GenerationConfig(
temperature=temperature, temperature=temperature,
top_p=top_p, top_p=top_p,
top_k=top_k, top_k=top_k,
) )
generate_params = { generate_params = {
"input_ids": input_ids, "input_ids": input_ids,
"generation_config": generation_config, "generation_config": generation_config,
"return_dict_in_generate": True, "return_dict_in_generate": True,
"output_scores": True, "output_scores": True,
"max_new_tokens": max_new_tokens, "max_new_tokens": max_new_tokens,
} }
if stream_output: if stream_output:
# Stream the reply 1 token at a time. # Stream the reply 1 token at a time.
# This is based on the trick of using 'stopping_criteria' to create an iterator, # This is based on the trick of using 'stopping_criteria' to create an iterator,
# from https://github.com/oobabooga/text-generation-webui/blob/ad37f396fc8bcbab90e11ecf17c56c97bfbd4a9c/modules/text_generation.py#L216-L243. # from https://github.com/oobabooga/text-generation-webui/blob/ad37f396fc8bcbab90e11ecf17c56c97bfbd4a9c/modules/text_generation.py#L216-L243.
def generate_with_callback(callback=None, **kwargs): def generate_with_callback(callback=None, **kwargs):
kwargs.setdefault( kwargs.setdefault("stopping_criteria", transformers.StoppingCriteriaList())
"stopping_criteria", transformers.StoppingCriteriaList() kwargs["stopping_criteria"].append(Stream(callback_func=callback))
) with torch.no_grad():
kwargs["stopping_criteria"].append( model.generate(**kwargs)
Stream(callback_func=callback)
)
with torch.no_grad():
model.generate(**kwargs)
def generate_with_streaming(**kwargs): def generate_with_streaming(**kwargs):
return Iteratorize( return Iteratorize(generate_with_callback, kwargs, callback=None)
generate_with_callback, kwargs, callback=None
)
with generate_with_streaming(**generate_params) as generator:
for output in generator:
# new_tokens = len(output) - len(input_ids[0])
decoded_output = tokenizer.decode(output)
if output[-1] in [tokenizer.eos_token_id]: with generate_with_streaming(**generate_params) as generator:
break for output in generator:
# new_tokens = len(output) - len(input_ids[0])
decoded_output = tokenizer.decode(output)
if output[-1] in [tokenizer.eos_token_id]:
break
yield decoded_output.split("### Response:")[-1].strip()
return # early return for stream_output
yield decoded_output.split("### Response:")[-1].strip()
return # early return for stream_output
with torch.no_grad(): with torch.no_grad():
generation_output = model.generate( generation_output = model.generate(
input_ids=input_ids, input_ids=input_ids,
generation_config=generation_config, generation_config=generation_config,
return_dict_in_generate=True, return_dict_in_generate=True,
output_scores=True, output_scores=True,
max_new_tokens=max_new_tokens, max_new_tokens=max_new_tokens,
) )
s = generation_output.sequences[0] s = generation_output.sequences[0]
print(f"debug_sequences,{s}",s) print(f"debug_sequences,{s}", s)
output = tokenizer.decode(s) output = tokenizer.decode(s)
print(f"debug_output,{output}",output) print(f"debug_output,{output}", output)
yield output.split("### Response:")[-1].strip() yield output.split("### Response:")[-1].strip()

View File

@ -53,6 +53,7 @@ def create_chat_completion(
response = None response = None
# TODO impl this use vicuna server api # TODO impl this use vicuna server api
class Stream(transformers.StoppingCriteria): class Stream(transformers.StoppingCriteria):
def __init__(self, callback_func=None): def __init__(self, callback_func=None):
self.callback_func = callback_func self.callback_func = callback_func
@ -113,4 +114,4 @@ class Iteratorize:
return self return self
def __exit__(self, exc_type, exc_val, exc_tb): def __exit__(self, exc_type, exc_val, exc_tb):
self.stop_now = True self.stop_now = True

View File

@ -85,14 +85,14 @@ class CodeGenChatAdapter(BaseChatAdpter):
class GuanacoChatAdapter(BaseChatAdpter): class GuanacoChatAdapter(BaseChatAdpter):
"""Model chat adapter for Guanaco """ """Model chat adapter for Guanaco"""
def match(self, model_path: str): def match(self, model_path: str):
return "guanaco" in model_path return "guanaco" in model_path
def get_generate_stream_func(self): def get_generate_stream_func(self):
from pilot.model.guanaco_llm import guanaco_generate_output from pilot.model.guanaco_llm import guanaco_generate_output
return guanaco_generate_output return guanaco_generate_output