mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-04-28 03:43:01 +00:00
* feat: modify forward fn of critic and reward model * feat: modify calc_action_log_probs * to: add wandb in sft and rm trainer * feat: update train_sft * feat: update train_rm * style: modify type annotation and add warning * feat: pass tokenizer to ppo trainer * to: modify trainer base and maker base * feat: add wandb in ppo trainer * feat: pass tokenizer to generate * test: update generate fn tests * test: update train tests * fix: remove action_mask * feat: remove unused code * fix: fix wrong ignore_index * fix: fix mock tokenizer * chore: update requirements * revert: modify make_experience * fix: fix inference * fix: add padding side * style: modify _on_learn_batch_end * test: use mock tokenizer * fix: use bf16 to avoid overflow * fix: fix workflow * [chat] fix gemini strategy * [chat] fix * sync: update colossalai strategy * fix: fix args and model dtype * fix: fix checkpoint test * fix: fix requirements * fix: fix missing import and wrong arg * fix: temporarily skip gemini test in stage 3 * style: apply pre-commit * fix: temporarily skip gemini test in stage 1&2 --------- Co-authored-by: Mingyan Jiang <1829166702@qq.com>
61 lines
2.1 KiB
Python
61 lines
2.1 KiB
Python
import random
|
|
import warnings
|
|
from typing import List
|
|
|
|
import torch
|
|
from coati.experience_maker.base import Experience
|
|
|
|
from .base import ExperienceBuffer
|
|
from .utils import BufferItem, make_experience_batch, split_experience_batch
|
|
|
|
|
|
class NaiveExperienceBuffer(ExperienceBuffer):
|
|
"""Naive experience buffer class. It stores experience.
|
|
|
|
Args:
|
|
sample_batch_size (int): Batch size when sampling.
|
|
limit (int, optional): Limit of number of experience samples. A number <= 0 means unlimited. Defaults to 0.
|
|
cpu_offload (bool, optional): Whether to offload experience to cpu when sampling. Defaults to True.
|
|
"""
|
|
|
|
def __init__(self, sample_batch_size: int, limit: int = 0, cpu_offload: bool = True) -> None:
|
|
super().__init__(sample_batch_size, limit)
|
|
self.cpu_offload = cpu_offload
|
|
self.target_device = torch.device(f"cuda:{torch.cuda.current_device()}")
|
|
# TODO(ver217): add prefetch
|
|
self.items: List[BufferItem] = []
|
|
|
|
@torch.no_grad()
|
|
def append(self, experience: Experience) -> None:
|
|
if self.cpu_offload:
|
|
experience.to_device(torch.device("cpu"))
|
|
items = split_experience_batch(experience)
|
|
self.items.extend(items)
|
|
|
|
if self.limit > 0:
|
|
samples_to_remove = len(self.items) - self.limit
|
|
if samples_to_remove > 0:
|
|
warnings.warn(f"Experience buffer is full. Removing {samples_to_remove} samples.")
|
|
self.items = self.items[samples_to_remove:]
|
|
|
|
def clear(self) -> None:
|
|
self.items.clear()
|
|
|
|
@torch.no_grad()
|
|
def sample(self) -> Experience:
|
|
items = random.sample(self.items, self.sample_batch_size)
|
|
experience = make_experience_batch(items)
|
|
if self.cpu_offload:
|
|
experience.to_device(self.target_device)
|
|
return experience
|
|
|
|
def __len__(self) -> int:
|
|
return len(self.items)
|
|
|
|
def __getitem__(self, idx: int) -> BufferItem:
|
|
return self.items[idx]
|
|
|
|
def collate_fn(self, batch) -> Experience:
|
|
experience = make_experience_batch(batch)
|
|
return experience
|