From 03f4b1dde31d80c548e383bc514f10dde07195a1 Mon Sep 17 00:00:00 2001 From: Tong Li Date: Tue, 22 Apr 2025 10:39:47 +0800 Subject: [PATCH 1/2] add prompt template (#6273) Co-authored-by: Tong Li --- .../ColossalChat/coati/dataset/loader.py | 9 ++++++--- applications/ColossalChat/rl_example.py | 19 ++++++++++--------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/applications/ColossalChat/coati/dataset/loader.py b/applications/ColossalChat/coati/dataset/loader.py index 4518fd71f..43cf78383 100755 --- a/applications/ColossalChat/coati/dataset/loader.py +++ b/applications/ColossalChat/coati/dataset/loader.py @@ -352,12 +352,14 @@ def apply_chat_template_and_mask( tokenizer: PreTrainedTokenizer, chat: List[Dict[str, str]], max_length: Optional[int] = None, + system_prompt: str = None, padding: bool = True, truncation: bool = True, ignore_idx: int = -100, ) -> Dict[str, torch.Tensor]: - system_prompt = "You are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within and tags, respectively, i.e., reasoning process here answer here . Now the user asks you to solve a math problem that involves reasoning. After thinking, when you finally reach a conclusion, clearly output the final answer without explanation within the tags, i.e., 123 .\n\n" + if system_prompt is None: + system_prompt = "You are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within and tags, respectively, i.e., reasoning process here answer here . Now the user asks you to solve a math problem that involves reasoning. After thinking, when you finally reach a conclusion, clearly output the final answer without explanation within the tags, i.e., 123 .\n\n" system_element = { "role": "system", @@ -419,7 +421,7 @@ class RawConversationDataset(Dataset): Each instance is a dictionary with fields `system`, `roles`, `messages`, `offset`, `sep_style`, `seps`. """ - def __init__(self, tokenizer: PreTrainedTokenizer, input_file: str, max_length: int) -> None: + def __init__(self, tokenizer: PreTrainedTokenizer, input_file: str, max_length: int, system_prompt: str) -> None: self.tokenizer = tokenizer self.raw_texts = [] with jsonlines.open(input_file) as f: @@ -427,6 +429,7 @@ class RawConversationDataset(Dataset): self.raw_texts.append(line) self.tokenized_texts = [None] * len(self.raw_texts) self.max_length = max_length + self.system_prompt = system_prompt def __len__(self) -> int: return len(self.raw_texts) @@ -434,6 +437,6 @@ class RawConversationDataset(Dataset): def __getitem__(self, index: int): if self.tokenized_texts[index] is None: message = self.raw_texts[index] - tokens = apply_chat_template_and_mask(self.tokenizer, message, self.max_length) + tokens = apply_chat_template_and_mask(self.tokenizer, message, self.max_length, self.system_prompt) self.tokenized_texts[index] = dict(tokens) return self.tokenized_texts[index] diff --git a/applications/ColossalChat/rl_example.py b/applications/ColossalChat/rl_example.py index 6c43ccd19..317446695 100644 --- a/applications/ColossalChat/rl_example.py +++ b/applications/ColossalChat/rl_example.py @@ -49,6 +49,7 @@ if __name__ == "__main__": ) parser.add_argument("-b", "--backend", type=str, default="transformers", choices=["transformers", "vllm"]) parser.add_argument("-a", "--algo", type=str, default="GRPO", choices=["Simple", "GRPO", "EvalGRPO"]) + parser.add_argument("-s", "--system-prompt", type=str, default=None, help="System prompt for data construction.") args = parser.parse_args() assert args.train_minibatch_size > 0, "Train mini batch size must be greater than 0" @@ -112,20 +113,20 @@ if __name__ == "__main__": train_batch_size=args.train_batch_size, train_minibatch_size=args.train_minibatch_size, train_microbatch_size=args.train_microbatch_size, - dataset_config={"path": args.dataset, "max_length": 300}, + dataset_config={"path": args.dataset, "max_length": 300, "system_prompt": args.system_prompt}, dataloaders_config={}, inference_model_config=inference_model_config, generate_config=generate_config, num_generations=args.num_generations, train_model_config=train_model_config, - # plugin_config={}, # for zero - plugin_config={ - "pp_size": 2, - "tp_size": 2, - "microbatch_size": args.train_microbatch_size // 2, - "zero_stage": 0, - "max_norm": 1.0, - }, # for pp + plugin_config={}, # Default setting: zero. + # plugin_config={ + # "pp_size": 2, + # "tp_size": 2, + # "microbatch_size": args.train_microbatch_size // 2, + # "zero_stage": 0, + # "max_norm": 1.0, + # }, # for pp inference_backend=args.backend, master_addr="localhost", master_port=29506, From b823c6eec757a03dbc8b48d8bf25a5748e99d770 Mon Sep 17 00:00:00 2001 From: Tong Li Date: Wed, 23 Apr 2025 10:03:46 +0800 Subject: [PATCH 2/2] [feat] Add final save at the end (#6274) * add final save * default 1 episode --- applications/ColossalChat/coati/distributed/consumer.py | 2 +- applications/ColossalChat/rl_example.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/applications/ColossalChat/coati/distributed/consumer.py b/applications/ColossalChat/coati/distributed/consumer.py index 79beb2a2d..b7b865b26 100644 --- a/applications/ColossalChat/coati/distributed/consumer.py +++ b/applications/ColossalChat/coati/distributed/consumer.py @@ -119,7 +119,7 @@ class BaseConsumer: assert len(self.buffer) == 0 if self.lr_scheduler is not None: self.lr_scheduler.step() - if (step + 1) % self.save_interval == 0: + if (step + 1) % self.save_interval == 0 or (step + 1) == self.num_update_per_episode: if self.rank == 0: print(f"Start saving policy model at step {step + 1}.") save_path = os.path.join(self.save_dir, f"modeling-step-{step + 1}") diff --git a/applications/ColossalChat/rl_example.py b/applications/ColossalChat/rl_example.py index 317446695..f42a660b7 100644 --- a/applications/ColossalChat/rl_example.py +++ b/applications/ColossalChat/rl_example.py @@ -107,7 +107,7 @@ if __name__ == "__main__": num_producers=args.num_inferencer, num_proc_per_producer=1, num_consumer_procs=args.num_trainers, - num_episodes=10, + num_episodes=1, inference_batch_size=args.inference_batch_size, inference_microbatch_size=args.inference_microbatch_size, train_batch_size=args.train_batch_size,