From 3736eda56a1594127f8a32931d1ec3052e9b47e3 Mon Sep 17 00:00:00 2001 From: Zach Nussbaum Date: Mon, 1 May 2023 21:39:21 +0000 Subject: [PATCH] feat: eval for retrieval --- configs/eval/evaluate_gpt4all_jr.yaml | 18 +++++++++ gpt4all/eval/eval_squad.py | 54 +++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 configs/eval/evaluate_gpt4all_jr.yaml create mode 100644 gpt4all/eval/eval_squad.py diff --git a/configs/eval/evaluate_gpt4all_jr.yaml b/configs/eval/evaluate_gpt4all_jr.yaml new file mode 100644 index 00000000..989928e8 --- /dev/null +++ b/configs/eval/evaluate_gpt4all_jr.yaml @@ -0,0 +1,18 @@ +# model/tokenizer +model_name: "/home/paperspace/gpt4all/gpt4all/train/ckpts/epoch_2" +tokenizer_name: "EleutherAI/gpt-j-6B" +version: null +gradient_checkpointing: true +save_name: "nomic-ai/gpt-jr" +encoder_dim: 384 + +# dataset +streaming: false +num_proc: 64 +dataset_path: "/home/paperspace/gpt4all/gpt4all/index/squad_supplemented_validation" +max_length: 1024 +batch_size: 32 +pct_test: 0.05 +q_column: "question" +a_column: "answers" +encoder_column: "neighbor_embeddings" \ No newline at end of file diff --git a/gpt4all/eval/eval_squad.py b/gpt4all/eval/eval_squad.py new file mode 100644 index 00000000..d474e4cc --- /dev/null +++ b/gpt4all/eval/eval_squad.py @@ -0,0 +1,54 @@ +import torch +from gpt4all.models import GPTJRForCausalLM +from gpt4all.data.retrieval_dataloader import load_retrieval_augmented_data +from gpt4all.train.metrics import f1_score, exact_match_score +from gpt4all.utils.read import read_config +from transformers import AutoTokenizer +from argparse import ArgumentParser +from tqdm import tqdm + +parser = ArgumentParser() +parser.add_argument("--config", type=str, default="config.yaml") + +args = parser.parse_args() + +config = read_config(args.config) + +tokenizer = AutoTokenizer.from_pretrained(config["tokenizer_name"]) +if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token + +dataloader = load_retrieval_augmented_data(config, tokenizer, split_dataset=False) + + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +model = GPTJRForCausalLM.from_pretrained(config["model_name"], use_cache=False) +model.to(device) +model.eval() + +# Evaluate the model on the SQUAD dataset +f1s = [] +exact_matches = [] +with torch.no_grad(): + for batch in tqdm(dataloader): + outputs = model(input_ids=batch["input_ids"].to(device), + labels=batch["labels"].to(device), + encoder_hidden_states=batch["encoder_hidden_states"].to(device)) + + predicted_tokens = outputs.logits.argmax(dim=-1) + predicted = tokenizer.batch_decode(predicted_tokens, skip_special_tokens=True) + + labels = batch["labels"] + mask = labels == -100 + labels[mask] = tokenizer.pad_token_id + ground_truth = tokenizer.batch_decode(labels, skip_special_tokens=True) + + f1 = f1_score(predicted, ground_truth) + exact_match = exact_match_score(predicted, ground_truth) + + f1s.extend(f1) + exact_matches.extend(exact_match) + + +print(torch.tensor(f1s).mean()) +print(torch.tensor(exact_matches).to(torch.float32).mean()) \ No newline at end of file