From f8bd2db33fa3b42ef8b1ddcf2c96ce6535ab672c Mon Sep 17 00:00:00 2001 From: YeAnbang Date: Tue, 20 May 2025 09:45:56 +0800 Subject: [PATCH] add uuid to rollout log --- applications/ColossalChat/coati/distributed/launch.py | 6 +++++- applications/ColossalChat/rl_example.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/applications/ColossalChat/coati/distributed/launch.py b/applications/ColossalChat/coati/distributed/launch.py index 6eeb5d379..ef81bcbdd 100644 --- a/applications/ColossalChat/coati/distributed/launch.py +++ b/applications/ColossalChat/coati/distributed/launch.py @@ -56,7 +56,7 @@ def launch_distributed( eval_save_dir: Optional[str] = None, eval_generation_config: Optional[Dict[str, Any]] = None, log_rollout_interval: int = 20, - rollout_log_file: str = "./rollout_log.jsonl", + rollout_save_dir: str = "./rollout", ): if core_algo not in ALGO_MAP: raise NotImplementedError(f"{core_algo} is not supported yet.") @@ -74,6 +74,10 @@ def launch_distributed( run_name = f"{inference_backend}_bs_{train_batch_size * train_dp_size}_temp_{generate_config['temperature']:.01f}_top_p_{generate_config['top_p']:.02f}" wandb_group_name = str(uuid.uuid4()) + rollout_log_file = os.path.join( + rollout_save_dir, + f"{project_name.replace(' ','_')}_run_{wandb_group_name}.jsonl", + ) procs = [] for i in range(num_producers): diff --git a/applications/ColossalChat/rl_example.py b/applications/ColossalChat/rl_example.py index 98c139f14..bfa0ab7d0 100644 --- a/applications/ColossalChat/rl_example.py +++ b/applications/ColossalChat/rl_example.py @@ -273,5 +273,5 @@ if __name__ == "__main__": eval_save_dir=os.path.join(args.eval_save_dir, args.project.replace(" ", "_")), eval_generation_config=eval_generation_config, log_rollout_interval=20, - rollout_log_file=os.path.join(args.rollout_save_dir, args.project.replace(" ", "_") + ".jsonl"), + rollout_save_dir=args.rollout_save_dir, )