[ColossalEval] Support GSM, Data Leakage Evaluation and Tensor Parallel (#5169)

* Support GSM, Data Leakage Evaluation and Tensor Parallel

* remove redundant code and update inference.py in examples/gpt_evaluation

---------

Co-authored-by: Xu Yuanchen <yuanchen.xu00@gmail.com>
This commit is contained in:
Yuanchen
2023-12-12 14:47:35 +08:00
committed by GitHub
parent b07a6f4e27
commit cefdc32615
19 changed files with 578 additions and 100 deletions

View File

@@ -71,8 +71,8 @@ default_inference_kwargs = {
}
def get_few_shot_data(data: List[Dict]):
few_shot_data = []
def get_few_shot_data(data: List[Dict], subject):
few_shot_data = [f"以下是中国关于{subject}考试的单项选择题,请选出其中的正确答案。"]
for i in data:
few_shot_data.append(i["input"] + i["target"])
return few_shot_data
@@ -86,7 +86,9 @@ class CEvalDataset(BaseDataset):
"""
@staticmethod
def load(path: str, logger: DistributedLogger, few_shot: bool) -> List[Dict]:
def load(
path: str, logger: DistributedLogger, few_shot: bool, forward_only: bool, load_train: bool, load_reference: bool
) -> List[Dict]:
dataset = {"dev": {}, "test": {}}
for split in ["dev", "test"]:
files = os.listdir(os.path.join(path, split))
@@ -105,7 +107,7 @@ class CEvalDataset(BaseDataset):
if split == "test" and few_shot:
dataset[split][subject]["inference_kwargs"]["few_shot_data"] = get_few_shot_data(
dataset["dev"][subject]["data"]
dataset["dev"][subject]["data"], subject
)
with open(file_dir, encoding="utf-8") as f: