From 729456957a708529d8c09b3697ad85b8824e2b12 Mon Sep 17 00:00:00 2001 From: Tong Li Date: Mon, 31 Mar 2025 11:35:23 +0800 Subject: [PATCH] update help information --- applications/ColossalChat/rl_example.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/applications/ColossalChat/rl_example.py b/applications/ColossalChat/rl_example.py index 4a4a4c340..5e7af5c19 100644 --- a/applications/ColossalChat/rl_example.py +++ b/applications/ColossalChat/rl_example.py @@ -10,13 +10,13 @@ if __name__ == "__main__": parser.add_argument("-d", "--dataset", type=str, default="data.jsonl") parser.add_argument("-t", "--num-trainers", type=int, default=2) parser.add_argument("-i", "--num-inferencer", type=int, default=2) - parser.add_argument("-g", "--num-generations", type=int, default=8) - parser.add_argument("-ibs", "--inference-batch-size", type=int, default=64) - parser.add_argument("-imbs", "--inference-microbatch-size", type=int, default=8) - parser.add_argument("-tbs", "--train-batch-size", type=int, default=32) - parser.add_argument("-tMbs", "--train-minibatch-size", type=int, default=1) - parser.add_argument("-tmbs", "--train-microbatch-size", type=int, default=2) - parser.add_argument("-b", "--backend", type=str, default="transformers") + parser.add_argument("-g", "--num-generations", type=int, default=8, help="Number of generations per prompt.") + parser.add_argument("-ibs", "--inference-batch-size", type=int, default=64, help="Number of prompts to generate per step.") + parser.add_argument("-imbs", "--inference-microbatch-size", type=int, default=8, help="Number of prompts to send from the producer to the consumer.") + parser.add_argument("-tbs", "--train-batch-size", type=int, default=32, help="Number of prompts to update policy model.") + parser.add_argument("-tMbs", "--train-minibatch-size", type=int, default=1, help="Number of prompts per device. Number of samples = tMbs * num of generation per prompt.") + parser.add_argument("-tmbs", "--train-microbatch-size", type=int, default=2, help="Number of samples per device.") + parser.add_argument("-b", "--backend", type=str, default="transformers", choices=["transformers, vllm"]) parser.add_argument("-a", "--algo", type=str, default="GRPO", choices=["Simple", "GRPO", "EvalGRPO"]) args = parser.parse_args()