mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-06-28 16:27:31 +00:00
added eval for gptj
This commit is contained in:
parent
b8f39c5104
commit
af7a4004c8
15
configs/eval/generate_gptj_0.yaml
Normal file
15
configs/eval/generate_gptj_0.yaml
Normal file
@ -0,0 +1,15 @@
|
||||
# model/tokenizer
|
||||
model_name: 'nomic-ai/gpt4all-gptj-epoch_0'
|
||||
tokenizer_name: 'EleutherAI/gpt-j-6B'
|
||||
lora: false
|
||||
lora_path: 'nolora'
|
||||
|
||||
max_new_tokens: 512
|
||||
temperature: 0.001
|
||||
prompt: |
|
||||
#this code prints a string reversed
|
||||
my_string = "hello how are you"
|
||||
print(len(my_string))
|
||||
|
||||
|
||||
My code above does not work. Can you help me?
|
@ -22,24 +22,25 @@ def read_jsonl_file(file_path):
|
||||
return data
|
||||
|
||||
def setup_model(config):
|
||||
model = AutoModelForCausalLM.from_pretrained(config["model_name"], device_map="auto", torch_dtype=torch.float16, output_hidden_states=True)
|
||||
model = AutoModelForCausalLM.from_pretrained(config["model_name"], device_map="auto", torch_dtype=torch.float16, output_hidden_states=True, use_auth_token=True)
|
||||
tokenizer = AutoTokenizer.from_pretrained(config["tokenizer_name"])
|
||||
if "gptj" in config["model_name"]:
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
else:
|
||||
added_tokens = tokenizer.add_special_tokens({"bos_token": "<s>", "eos_token": "</s>", "pad_token": "<pad>"})
|
||||
|
||||
if added_tokens > 0:
|
||||
model.resize_token_embeddings(len(tokenizer))
|
||||
|
||||
if 'lora' in config and config['lora']:
|
||||
model = PeftModelForCausalLM.from_pretrained(model, config["lora_path"], device_map="auto", torch_dtype=torch.float16, return_hidden_states=True)
|
||||
if config["lora"]:
|
||||
model = PeftModelForCausalLM.from_pretrained(model, config["lora_path"], device_map="auto", torch_dtype=torch.float16)
|
||||
|
||||
model.to(dtype=torch.float16)
|
||||
|
||||
print(f"Mem needed: {model.get_memory_footprint() / 1024 / 1024 / 1024:.2f} GB")
|
||||
|
||||
return model, tokenizer
|
||||
|
||||
|
||||
|
||||
|
||||
def eval_example(model, tokenizer, example, config):
|
||||
|
||||
prompt = example['instruction'] + ' ' + example['instances'][0]['input']
|
||||
|
Loading…
Reference in New Issue
Block a user