mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-08-06 02:24:31 +00:00
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
This commit is contained in:
parent
ad56d16c1d
commit
d61f4a0a30
@ -437,7 +437,8 @@ class RawConversationDataset(Dataset):
|
|||||||
tokens = apply_chat_template_and_mask(self.tokenizer, message, self.max_length)
|
tokens = apply_chat_template_and_mask(self.tokenizer, message, self.max_length)
|
||||||
self.tokenized_texts[index] = dict(tokens)
|
self.tokenized_texts[index] = dict(tokens)
|
||||||
return self.tokenized_texts[index]
|
return self.tokenized_texts[index]
|
||||||
|
|
||||||
|
|
||||||
class AIMEDataset(Dataset):
|
class AIMEDataset(Dataset):
|
||||||
"""
|
"""
|
||||||
AIME dataset.
|
AIME dataset.
|
||||||
@ -454,26 +455,36 @@ class AIMEDataset(Dataset):
|
|||||||
|
|
||||||
def __len__(self) -> int:
|
def __len__(self) -> int:
|
||||||
return len(self.raw_texts)
|
return len(self.raw_texts)
|
||||||
|
|
||||||
def __getitem__(self, index: int):
|
def __getitem__(self, index: int):
|
||||||
if self.tokenized_texts[index] is None:
|
if self.tokenized_texts[index] is None:
|
||||||
message = self.raw_texts[index]
|
message = self.raw_texts[index]
|
||||||
gt_answer = self.tokenizer.encode(message['answer'], padding="max_length", truncation=True, max_length=self.max_length, return_tensors="pt")
|
gt_answer = self.tokenizer.encode(
|
||||||
|
message["answer"],
|
||||||
|
padding="max_length",
|
||||||
|
truncation=True,
|
||||||
|
max_length=self.max_length,
|
||||||
|
return_tensors="pt",
|
||||||
|
)
|
||||||
|
|
||||||
def make_conv_hf(question):
|
def make_conv_hf(question):
|
||||||
msg = [
|
msg = [{"role": "user", "content": question}]
|
||||||
{"role": "user", "content": question}
|
|
||||||
]
|
|
||||||
return msg
|
return msg
|
||||||
|
|
||||||
message = make_conv_hf(message["question"])
|
message = make_conv_hf(message["question"])
|
||||||
tokens = apply_chat_template_and_mask(self.tokenizer, message, self.max_length)
|
tokens = apply_chat_template_and_mask(self.tokenizer, message, self.max_length)
|
||||||
self.tokenized_texts[index] = dict(tokens)
|
self.tokenized_texts[index] = dict(tokens)
|
||||||
self.tokenized_texts[index]["gt_answer"] = gt_answer.squeeze(1)
|
self.tokenized_texts[index]["gt_answer"] = gt_answer.squeeze(1)
|
||||||
return self.tokenized_texts[index]
|
return self.tokenized_texts[index]
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained("/home/share/data/model/Qwen2.5-3B")
|
tokenizer = AutoTokenizer.from_pretrained("/home/share/data/model/Qwen2.5-3B")
|
||||||
dataset = AIMEDataset(tokenizer, "/home/yanglibing/workspace/PRIME/eval/data/AI-MO/aimo-validation-aime/aimo-validation-aime.jsonl", 512)
|
dataset = AIMEDataset(
|
||||||
print(dataset[0])
|
tokenizer,
|
||||||
|
"/home/yanglibing/workspace/PRIME/eval/data/AI-MO/aimo-validation-aime/aimo-validation-aime.jsonl",
|
||||||
|
512,
|
||||||
|
)
|
||||||
|
print(dataset[0])
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
import os
|
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -7,15 +6,12 @@ import ray
|
|||||||
import ray.util.collective as cc
|
import ray.util.collective as cc
|
||||||
import torch
|
import torch
|
||||||
from coati.dataset.loader import RawConversationDataset
|
from coati.dataset.loader import RawConversationDataset
|
||||||
import wandb
|
|
||||||
from applications.ColossalChat.coati.distributed.reward.reward_fn import math_reward_fn
|
|
||||||
from coati.distributed.reward.verifiable_reward import VerifiableReward
|
from coati.distributed.reward.verifiable_reward import VerifiableReward
|
||||||
from torch import nn
|
|
||||||
from torch.utils.data import DataLoader, DistributedSampler
|
from torch.utils.data import DataLoader, DistributedSampler
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
|
|
||||||
from applications.ColossalChat.build.lib.coati.models.utils import read_jsonl_file
|
|
||||||
from applications.ColossalChat.coati.dataset.loader import AIMEDataset
|
from applications.ColossalChat.coati.dataset.loader import AIMEDataset
|
||||||
|
from applications.ColossalChat.coati.distributed.reward.reward_fn import math_reward_fn
|
||||||
from colossalai.utils import get_current_device
|
from colossalai.utils import get_current_device
|
||||||
|
|
||||||
from .comm import ray_broadcast_tensor_dict
|
from .comm import ray_broadcast_tensor_dict
|
||||||
@ -106,12 +102,12 @@ class BaseProducer:
|
|||||||
self.dataloader.sampler.set_epoch(episode)
|
self.dataloader.sampler.set_epoch(episode)
|
||||||
for i, batch in enumerate(self.dataloader):
|
for i, batch in enumerate(self.dataloader):
|
||||||
valid_metrics = self.validate()
|
valid_metrics = self.validate()
|
||||||
|
|
||||||
if i >= num_valid_microbatches:
|
if i >= num_valid_microbatches:
|
||||||
break
|
break
|
||||||
outputs = self.rollout(**batch)
|
outputs = self.rollout(**batch)
|
||||||
outputs.update(valid_metrics)
|
outputs.update(valid_metrics)
|
||||||
|
|
||||||
print(f"[P{self.producer_idx}] Send data {[(k, v.shape) for k, v in outputs.items()]}")
|
print(f"[P{self.producer_idx}] Send data {[(k, v.shape) for k, v in outputs.items()]}")
|
||||||
outputs["temperature"] = torch.tensor(
|
outputs["temperature"] = torch.tensor(
|
||||||
[self.model.generate_config.temperature] * outputs["input_ids"].size(0)
|
[self.model.generate_config.temperature] * outputs["input_ids"].size(0)
|
||||||
@ -189,11 +185,10 @@ class SimpleProducer(BaseProducer):
|
|||||||
self.model.load_state_dict(state_dict)
|
self.model.load_state_dict(state_dict)
|
||||||
|
|
||||||
def validate(self):
|
def validate(self):
|
||||||
all_rewards = []
|
all_rewards = []
|
||||||
all_formats = []
|
all_formats = []
|
||||||
all_accs = []
|
all_accs = []
|
||||||
batch_reward_means = []
|
|
||||||
|
|
||||||
self.val_dataset = AIMEDataset(
|
self.val_dataset = AIMEDataset(
|
||||||
tokenizer=self.tokenizer,
|
tokenizer=self.tokenizer,
|
||||||
input_file="/home/yanglibing/workspace/PRIME/eval/data/AI-MO/aimo-validation-aime/aimo-validation-aime.jsonl",
|
input_file="/home/yanglibing/workspace/PRIME/eval/data/AI-MO/aimo-validation-aime/aimo-validation-aime.jsonl",
|
||||||
@ -209,8 +204,8 @@ class SimpleProducer(BaseProducer):
|
|||||||
self.reward_model = VerifiableReward(
|
self.reward_model = VerifiableReward(
|
||||||
reward_fns=[math_reward_fn], tokenizer=self.tokenizer, tags=response_format_tags
|
reward_fns=[math_reward_fn], tokenizer=self.tokenizer, tags=response_format_tags
|
||||||
)
|
)
|
||||||
|
|
||||||
def collate_fn(data_list: list[dict]) -> dict:
|
def collate_fn(data_list: list[dict]) -> dict:
|
||||||
tensors = defaultdict(list)
|
tensors = defaultdict(list)
|
||||||
non_tensors = defaultdict(list)
|
non_tensors = defaultdict(list)
|
||||||
|
|
||||||
@ -228,41 +223,40 @@ class SimpleProducer(BaseProducer):
|
|||||||
non_tensors[key] = np.array(val, dtype=object)
|
non_tensors[key] = np.array(val, dtype=object)
|
||||||
|
|
||||||
return {**tensors, **non_tensors}
|
return {**tensors, **non_tensors}
|
||||||
|
|
||||||
self.val_dataloader = DataLoader(dataset=self.val_dataset,
|
self.val_dataloader = DataLoader(
|
||||||
batch_size=64,
|
dataset=self.val_dataset, batch_size=64, shuffle=True, drop_last=True, collate_fn=collate_fn
|
||||||
shuffle=True,
|
)
|
||||||
drop_last=True,
|
|
||||||
collate_fn=collate_fn)
|
|
||||||
|
|
||||||
all_rewards = torch.tensor([], device=self.device)
|
all_rewards = torch.tensor([], device=self.device)
|
||||||
all_formats = torch.tensor([], device=self.device)
|
all_formats = torch.tensor([], device=self.device)
|
||||||
all_accs = torch.tensor([], device=self.device)
|
all_accs = torch.tensor([], device=self.device)
|
||||||
|
|
||||||
for test_batch in self.val_dataloader:
|
for test_batch in self.val_dataloader:
|
||||||
# test_batch['input_ids'].size() [32, 300]
|
# test_batch['input_ids'].size() [32, 300]
|
||||||
# test_batch["gt_answer"] orch.Size([32, 1, 300])
|
# test_batch["gt_answer"] orch.Size([32, 1, 300])
|
||||||
test_output = self.rollout(**test_batch)
|
test_output = self.rollout(**test_batch)
|
||||||
# test_output["response_idx"] torch.Size([32, 8, 2])
|
# test_output["response_idx"] torch.Size([32, 8, 2])
|
||||||
num_generations = test_output["response_idx"].size(1)
|
num_generations = test_output["response_idx"].size(1)
|
||||||
print("num_generations", num_generations)
|
print("num_generations", num_generations)
|
||||||
data = {k: v.view(-1, v.size(-1)) for k, v in test_output.items()}
|
data = {k: v.view(-1, v.size(-1)) for k, v in test_output.items()}
|
||||||
# data = test_output
|
# data = test_output
|
||||||
reward_group = self.reward_model(
|
reward_group = self.reward_model(
|
||||||
data["input_ids"], gt_answer=data["gt_answer"], response_idx=data["response_idx"])
|
data["input_ids"], gt_answer=data["gt_answer"], response_idx=data["response_idx"]
|
||||||
|
)
|
||||||
|
|
||||||
rewards = torch.stack([x[0] for x in reward_group])
|
rewards = torch.stack([x[0] for x in reward_group])
|
||||||
format_rewards = torch.stack([x[1] for x in reward_group])
|
format_rewards = torch.stack([x[1] for x in reward_group])
|
||||||
acc_rewards = torch.stack([x[2] for x in reward_group])
|
acc_rewards = torch.stack([x[2] for x in reward_group])
|
||||||
|
|
||||||
all_rewards = torch.cat([all_rewards, rewards])
|
all_rewards = torch.cat([all_rewards, rewards])
|
||||||
all_formats = torch.cat([all_formats, format_rewards])
|
all_formats = torch.cat([all_formats, format_rewards])
|
||||||
all_accs = torch.cat([all_accs, acc_rewards])
|
all_accs = torch.cat([all_accs, acc_rewards])
|
||||||
|
|
||||||
avg_reward = torch.mean(all_rewards)
|
avg_reward = torch.mean(all_rewards)
|
||||||
avg_format = torch.mean(all_formats)
|
avg_format = torch.mean(all_formats)
|
||||||
avg_acc = torch.mean(all_accs)
|
avg_acc = torch.mean(all_accs)
|
||||||
|
|
||||||
valid_metrics = {
|
valid_metrics = {
|
||||||
"avg_reward": torch.tensor(avg_reward).unsqueeze(0),
|
"avg_reward": torch.tensor(avg_reward).unsqueeze(0),
|
||||||
"avg_format": torch.tensor(avg_format).unsqueeze(0),
|
"avg_format": torch.tensor(avg_format).unsqueeze(0),
|
||||||
@ -275,4 +269,3 @@ class SimpleProducer(BaseProducer):
|
|||||||
f"acc={valid_metrics['avg_acc'].item():.4f}"
|
f"acc={valid_metrics['avg_acc'].item():.4f}"
|
||||||
)
|
)
|
||||||
return valid_metrics
|
return valid_metrics
|
||||||
|
|
@ -1,2 +1,2 @@
|
|||||||
export NCCL_BLOCKING_WAIT=1
|
export NCCL_BLOCKING_WAIT=1
|
||||||
CUDA_VISIBLE_DEVICES=4,5,6,7 python rl_example.py --dataset /home/share/data/dataset/math_competition_train_short.jsonl --model /home/share/data/model/Qwen2.5-3B -t 1 -i 2 -b vllm
|
CUDA_VISIBLE_DEVICES=4,5,6,7 python rl_example.py --dataset /home/share/data/dataset/math_competition_train_short.jsonl --model /home/share/data/model/Qwen2.5-3B -t 1 -i 2 -b vllm
|
||||||
|
Loading…
Reference in New Issue
Block a user