diff --git a/configs/eval/generate_baseline.yaml b/configs/eval/generate_baseline.yaml
new file mode 100644
index 00000000..4302d61f
--- /dev/null
+++ b/configs/eval/generate_baseline.yaml
@@ -0,0 +1,17 @@
+# model/tokenizer
+model_name: "zpn/llama-7b"
+tokenizer_name: "zpn/llama-7b"
+lora: true
+lora_path: "tloen/alpaca-lora-7b"
+
+
+
+max_new_tokens: 512
+temperature: 0.001
+prompt: | 
+  #this code prints a string reversed
+  my_string = "hello how are you"
+  print(len(my_string))
+
+
+  My code above does not work. Can you help me?
diff --git a/configs/eval/generate_full.yaml b/configs/eval/generate_full.yaml
new file mode 100644
index 00000000..82f5f96d
--- /dev/null
+++ b/configs/eval/generate_full.yaml
@@ -0,0 +1,14 @@
+# model/tokenizer
+model_name: "nomic-ai/vicuna-full-multi-turn_epoch_0"
+tokenizer_name: "zpn/llama-7b"
+lora_path: "no-lora"
+
+max_new_tokens: 512
+temperature: 0.001
+prompt: | 
+  #this code prints a string reversed
+  my_string = "hello how are you"
+  print(len(my_string))
+
+
+  My code above does not work. Can you help me?
diff --git a/configs/eval/generate_large_2.yaml b/configs/eval/generate_large_2.yaml
new file mode 100644
index 00000000..5cee0d7c
--- /dev/null
+++ b/configs/eval/generate_large_2.yaml
@@ -0,0 +1,15 @@
+# model/tokenizer
+model_name: "zpn/llama-7b"
+tokenizer_name: "zpn/llama-7b"
+lora: true
+lora_path: "nomic-ai/vicuna-lora-multi-turn_epoch_2"
+
+max_new_tokens: 512
+temperature: 0.001
+prompt: | 
+  #this code prints a string reversed
+  my_string = "hello how are you"
+  print(len(my_string))
+
+
+  My code above does not work. Can you help me?
diff --git a/configs/eval/generate_large_3.yaml b/configs/eval/generate_large_3.yaml
new file mode 100644
index 00000000..48f4cb06
--- /dev/null
+++ b/configs/eval/generate_large_3.yaml
@@ -0,0 +1,15 @@
+# model/tokenizer
+model_name: "zpn/llama-7b"
+tokenizer_name: "zpn/llama-7b"
+lora: true
+lora_path: "nomic-ai/vicuna-lora-multi-turn_epoch_3"
+
+max_new_tokens: 512
+temperature: 0.001
+prompt: | 
+  #this code prints a string reversed
+  my_string = "hello how are you"
+  print(len(my_string))
+
+
+  My code above does not work. Can you help me?
diff --git a/eval_figures.py b/eval_figures.py
new file mode 100644
index 00000000..0126bda4
--- /dev/null
+++ b/eval_figures.py
@@ -0,0 +1,22 @@
+import glob
+import pickle
+import numpy as np
+from matplotlib import pyplot as plt
+
+plt.figure()
+for fpath in glob.glob('./eval_data/*multi*.pkl'):
+    parts = fpath.split('__')
+    model_name = parts[1].replace('model-', '').replace('.pkl', '')
+    lora_name = parts[2].replace('lora-', '').replace('.pkl', '')
+    with open(fpath, 'rb') as f:
+        data = pickle.load(f)
+        perplexities = data['perplexities']
+        perplexities = np.nan_to_num(perplexities, 100)
+        perplexities = np.clip(perplexities, 0, 100)
+        plt.hist(perplexities, label='{}-{}'.format(model_name, lora_name), alpha=.5)
+
+plt.xlabel('Perplexity')
+plt.ylabel('Frequency')
+plt.legend()
+plt.savefig('figs/perplexity_hist.png')
+
diff --git a/eval_self_instruct.py b/eval_self_instruct.py
index a7243157..e0dbded1 100644
--- a/eval_self_instruct.py
+++ b/eval_self_instruct.py
@@ -8,6 +8,11 @@ from argparse import ArgumentParser
 from peft import PeftModelForCausalLM
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
+'''
+Evaluates perplexity on the outputs of:
+https://github.com/yizhongw/self-instruct/blob/main/human_eval/user_oriented_instructions.jsonl
+'''
+
 def read_jsonl_file(file_path):
     data = []
     with open(file_path, 'r', encoding='utf-8') as file:
@@ -47,7 +52,7 @@ def eval_example(model, tokenizer, example, config):
     continuations = []
     tokenized_continuations = []
     trajectories = []
-    for i in range(3):
+    for i in range(1):
         with torch.no_grad():
             outputs = model.generate(input_ids=input['input_ids'],
                                      max_new_tokens=config["max_new_tokens"],