callbacks
|
[ColossalChat] Update RLHF V2 (#5286)
|
2024-03-29 14:12:29 +08:00 |
__init__.py
|
Add GRPO and Support RLVR for PPO (#6186)
|
2025-02-18 09:43:36 +08:00 |
base.py
|
Add GRPO and Support RLVR for PPO (#6186)
|
2025-02-18 09:43:36 +08:00 |
grpo.py
|
fix num_train_step update
|
2025-02-20 18:24:04 +08:00 |
kto.py
|
fix num_train_step update
|
2025-02-20 18:24:04 +08:00 |
orpo.py
|
fix num_train_step update
|
2025-02-20 18:24:04 +08:00 |
ppo.py
|
fix num_train_step update
|
2025-02-20 18:24:04 +08:00 |
rm.py
|
fix num_train_step update
|
2025-02-20 18:24:04 +08:00 |
sft.py
|
fix num_train_step update
|
2025-02-20 18:24:04 +08:00 |
utils.py
|
Add GRPO and Support RLVR for PPO (#6186)
|
2025-02-18 09:43:36 +08:00 |