mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-06-21 13:11:27 +00:00
* Add dpo. Fix sft, ppo, lora. Refactor all * fix and tested ppo * 2 nd round refactor * add ci tests * fix ci * fix ci * fix readme, style * fix readme style * fix style, fix benchmark * reproduce benchmark result, remove useless files * rename to ColossalChat * use new image * fix ci workflow * fix ci * use local model/tokenizer for ci tests * fix ci * fix ci * fix ci * fix ci timeout * fix rm progress bar. fix ci timeout * fix ci * fix ci typo * remove 3d plugin from ci temporary * test environment * cannot save optimizer * support chat template * fix readme * fix path * test ci locally * restore build_or_pr * fix ci data path * fix benchmark * fix ci, move ci tests to 3080, disable fast tokenizer * move ci to 85 * support flash attention 2 * add all-in-one data preparation script. Fix colossal-llama2-chat chat template * add hardware requirements * move ci test data * fix save_model, add unwrap * fix missing bos * fix missing bos; support grad accumulation with gemini * fix ci * fix ci * fix ci * fix llama2 chat template config * debug sft * debug sft * fix colossalai version requirement * fix ci * add sanity check to prevent NaN loss * fix requirements * add dummy data generation script * add dummy data generation script * add dummy data generation script * add dummy data generation script * update readme * update readme * update readme and ignore * fix logger bug * support parallel_output * modify data preparation logic * fix tokenization * update lr * fix inference * run pre-commit --------- Co-authored-by: Tong Li <tong.li352711588@gmail.com>
76 lines
2.4 KiB
Python
Executable File
76 lines
2.4 KiB
Python
Executable File
from dataclasses import dataclass
|
|
from typing import List, Optional
|
|
|
|
import torch
|
|
import torch.nn.functional as F
|
|
from coati.experience_maker.base import Experience
|
|
|
|
|
|
@dataclass
|
|
class BufferItem:
|
|
"""BufferItem is an item of experience data.
|
|
|
|
Shapes of each tensor:
|
|
sequences: (S)
|
|
action_log_probs: (A)
|
|
values: (1)
|
|
reward: (1)
|
|
advantages: (1)
|
|
attention_mask: (S)
|
|
action_mask: (A)
|
|
|
|
"A" is the number of actions.
|
|
"""
|
|
|
|
sequences: torch.Tensor
|
|
action_log_probs: torch.Tensor
|
|
values: torch.Tensor
|
|
reward: torch.Tensor
|
|
kl: torch.Tensor
|
|
advantages: torch.Tensor
|
|
attention_mask: Optional[torch.LongTensor]
|
|
action_mask: Optional[torch.BoolTensor]
|
|
|
|
|
|
def split_experience_batch(experience: Experience) -> List[BufferItem]:
|
|
batch_size = experience.sequences.size(0)
|
|
batch_kwargs = [{} for _ in range(batch_size)]
|
|
keys = ("sequences", "action_log_probs", "values", "reward", "kl", "advantages", "attention_mask", "action_mask")
|
|
for key in keys:
|
|
value = getattr(experience, key)
|
|
if isinstance(value, torch.Tensor):
|
|
vals = torch.unbind(value)
|
|
else:
|
|
# None
|
|
vals = [value for _ in range(batch_size)]
|
|
assert batch_size == len(vals)
|
|
for i, v in enumerate(vals):
|
|
batch_kwargs[i][key] = v
|
|
items = [BufferItem(**kwargs) for kwargs in batch_kwargs]
|
|
return items
|
|
|
|
|
|
def _zero_pad_sequences(sequences: List[torch.Tensor], side: str = "left") -> torch.Tensor:
|
|
assert side in ("left", "right")
|
|
max_len = max(seq.size(0) for seq in sequences)
|
|
padded_sequences = []
|
|
for seq in sequences:
|
|
pad_len = max_len - seq.size(0)
|
|
padding = (pad_len, 0) if side == "left" else (0, pad_len)
|
|
padded_sequences.append(F.pad(seq, padding))
|
|
return torch.stack(padded_sequences, dim=0)
|
|
|
|
|
|
def make_experience_batch(items: List[BufferItem]) -> Experience:
|
|
kwargs = {}
|
|
to_pad_keys = set(("action_log_probs", "action_mask"))
|
|
keys = ("sequences", "action_log_probs", "values", "reward", "kl", "advantages", "attention_mask", "action_mask")
|
|
for key in keys:
|
|
vals = [getattr(item, key) for item in items]
|
|
if key in to_pad_keys:
|
|
batch_data = _zero_pad_sequences(vals)
|
|
else:
|
|
batch_data = torch.stack(vals, dim=0)
|
|
kwargs[key] = batch_data
|
|
return Experience(**kwargs)
|