diff --git a/configs/eval/generate_baseline.yaml b/configs/eval/generate_baseline.yaml new file mode 100644 index 00000000..4302d61f --- /dev/null +++ b/configs/eval/generate_baseline.yaml @@ -0,0 +1,17 @@ +# model/tokenizer +model_name: "zpn/llama-7b" +tokenizer_name: "zpn/llama-7b" +lora: true +lora_path: "tloen/alpaca-lora-7b" + + + +max_new_tokens: 512 +temperature: 0.001 +prompt: | + #this code prints a string reversed + my_string = "hello how are you" + print(len(my_string)) + + + My code above does not work. Can you help me? diff --git a/configs/eval/generate_full.yaml b/configs/eval/generate_full.yaml new file mode 100644 index 00000000..82f5f96d --- /dev/null +++ b/configs/eval/generate_full.yaml @@ -0,0 +1,14 @@ +# model/tokenizer +model_name: "nomic-ai/vicuna-full-multi-turn_epoch_0" +tokenizer_name: "zpn/llama-7b" +lora_path: "no-lora" + +max_new_tokens: 512 +temperature: 0.001 +prompt: | + #this code prints a string reversed + my_string = "hello how are you" + print(len(my_string)) + + + My code above does not work. Can you help me? diff --git a/configs/eval/generate_large_2.yaml b/configs/eval/generate_large_2.yaml new file mode 100644 index 00000000..5cee0d7c --- /dev/null +++ b/configs/eval/generate_large_2.yaml @@ -0,0 +1,15 @@ +# model/tokenizer +model_name: "zpn/llama-7b" +tokenizer_name: "zpn/llama-7b" +lora: true +lora_path: "nomic-ai/vicuna-lora-multi-turn_epoch_2" + +max_new_tokens: 512 +temperature: 0.001 +prompt: | + #this code prints a string reversed + my_string = "hello how are you" + print(len(my_string)) + + + My code above does not work. Can you help me? diff --git a/configs/eval/generate_large_3.yaml b/configs/eval/generate_large_3.yaml new file mode 100644 index 00000000..48f4cb06 --- /dev/null +++ b/configs/eval/generate_large_3.yaml @@ -0,0 +1,15 @@ +# model/tokenizer +model_name: "zpn/llama-7b" +tokenizer_name: "zpn/llama-7b" +lora: true +lora_path: "nomic-ai/vicuna-lora-multi-turn_epoch_3" + +max_new_tokens: 512 +temperature: 0.001 +prompt: | + #this code prints a string reversed + my_string = "hello how are you" + print(len(my_string)) + + + My code above does not work. Can you help me? diff --git a/eval_figures.py b/eval_figures.py new file mode 100644 index 00000000..0126bda4 --- /dev/null +++ b/eval_figures.py @@ -0,0 +1,22 @@ +import glob +import pickle +import numpy as np +from matplotlib import pyplot as plt + +plt.figure() +for fpath in glob.glob('./eval_data/*multi*.pkl'): + parts = fpath.split('__') + model_name = parts[1].replace('model-', '').replace('.pkl', '') + lora_name = parts[2].replace('lora-', '').replace('.pkl', '') + with open(fpath, 'rb') as f: + data = pickle.load(f) + perplexities = data['perplexities'] + perplexities = np.nan_to_num(perplexities, 100) + perplexities = np.clip(perplexities, 0, 100) + plt.hist(perplexities, label='{}-{}'.format(model_name, lora_name), alpha=.5) + +plt.xlabel('Perplexity') +plt.ylabel('Frequency') +plt.legend() +plt.savefig('figs/perplexity_hist.png') + diff --git a/eval_self_instruct.py b/eval_self_instruct.py index a7243157..e0dbded1 100644 --- a/eval_self_instruct.py +++ b/eval_self_instruct.py @@ -8,6 +8,11 @@ from argparse import ArgumentParser from peft import PeftModelForCausalLM from transformers import AutoModelForCausalLM, AutoTokenizer +''' +Evaluates perplexity on the outputs of: +https://github.com/yizhongw/self-instruct/blob/main/human_eval/user_oriented_instructions.jsonl +''' + def read_jsonl_file(file_path): data = [] with open(file_path, 'r', encoding='utf-8') as file: @@ -47,7 +52,7 @@ def eval_example(model, tokenizer, example, config): continuations = [] tokenized_continuations = [] trajectories = [] - for i in range(3): + for i in range(1): with torch.no_grad(): outputs = model.generate(input_ids=input['input_ids'], max_new_tokens=config["max_new_tokens"],