fix behind

2025-10-30 05:22:41 +00:00 · 2025-06-26 10:27:00 +08:00
parent db8baeeaf2
commit 8abf186ce2
3 changed files with 52 additions and 42 deletions
--- a/applications/ColossalChat/rl_example.py
+++ b/applications/ColossalChat/rl_example.py
@@ -263,7 +263,6 @@ if __name__ == "__main__":
        grpo_config = {
            "lr": args.learning_rate,
            "train_microbatch_size": args.train_microbatch_size,
-            "num_minibatch_during_rollout": 1,  # number of mini batches to pop out from buffer and used for training during rollout of the producer after it syncs the model. Hint, set to a proper value close to the number of mini batches for training that takes roughly the same time as the rollout of the producer. A value that is too large or too small will cause bubble time on the trainer or the producer.
            "beta": args.kl_coeff,  # KL penalty coefficient
            "loss_variation": "sample_level",
            "reward_fn_type": args.reward_type,