From 03f4b1dde31d80c548e383bc514f10dde07195a1 Mon Sep 17 00:00:00 2001
From: Tong Li <tong.li352711588@gmail.com>
Date: Tue, 22 Apr 2025 10:39:47 +0800
Subject: [PATCH 1/2] add prompt template (#6273)

Co-authored-by: Tong Li <tong.li35271158@gmail.com>
---
 .../ColossalChat/coati/dataset/loader.py      |  9 ++++++---
 applications/ColossalChat/rl_example.py       | 19 ++++++++++---------
 2 files changed, 16 insertions(+), 12 deletions(-)
diff --git a/applications/ColossalChat/coati/dataset/loader.py b/applications/ColossalChat/coati/dataset/loader.py
index 4518fd71f..43cf78383 100755
--- a/applications/ColossalChat/coati/dataset/loader.py
+++ b/applications/ColossalChat/coati/dataset/loader.py
@@ -352,12 +352,14 @@ def apply_chat_template_and_mask(
     tokenizer: PreTrainedTokenizer,
     chat: List[Dict[str, str]],
     max_length: Optional[int] = None,
+    system_prompt: str = None,
     padding: bool = True,
     truncation: bool = True,
     ignore_idx: int = -100,
 ) -> Dict[str, torch.Tensor]:
 
-    system_prompt = "You are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>. Now the user asks you to solve a math problem that involves reasoning. After thinking, when you finally reach a conclusion, clearly output the final answer without explanation within the <answer> </answer> tags, i.e., <answer> 123 </answer>.\n\n"
+    if system_prompt is None:
+        system_prompt = "You are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>. Now the user asks you to solve a math problem that involves reasoning. After thinking, when you finally reach a conclusion, clearly output the final answer without explanation within the <answer> </answer> tags, i.e., <answer> 123 </answer>.\n\n"
 
     system_element = {
         "role": "system",
@@ -419,7 +421,7 @@ class RawConversationDataset(Dataset):
     Each instance is a dictionary with fields `system`, `roles`, `messages`, `offset`, `sep_style`, `seps`.
     """
 
-    def __init__(self, tokenizer: PreTrainedTokenizer, input_file: str, max_length: int) -> None:
+    def __init__(self, tokenizer: PreTrainedTokenizer, input_file: str, max_length: int, system_prompt: str) -> None:
         self.tokenizer = tokenizer
         self.raw_texts = []
         with jsonlines.open(input_file) as f:
@@ -427,6 +429,7 @@ class RawConversationDataset(Dataset):
                 self.raw_texts.append(line)
         self.tokenized_texts = [None] * len(self.raw_texts)
         self.max_length = max_length
+        self.system_prompt = system_prompt
 
     def __len__(self) -> int:
         return len(self.raw_texts)
@@ -434,6 +437,6 @@ class RawConversationDataset(Dataset):
     def __getitem__(self, index: int):
         if self.tokenized_texts[index] is None:
             message = self.raw_texts[index]
-            tokens = apply_chat_template_and_mask(self.tokenizer, message, self.max_length)
+            tokens = apply_chat_template_and_mask(self.tokenizer, message, self.max_length, self.system_prompt)
             self.tokenized_texts[index] = dict(tokens)
         return self.tokenized_texts[index]
diff --git a/applications/ColossalChat/rl_example.py b/applications/ColossalChat/rl_example.py
index 6c43ccd19..317446695 100644
--- a/applications/ColossalChat/rl_example.py
+++ b/applications/ColossalChat/rl_example.py
@@ -49,6 +49,7 @@ if __name__ == "__main__":
     )
     parser.add_argument("-b", "--backend", type=str, default="transformers", choices=["transformers", "vllm"])
     parser.add_argument("-a", "--algo", type=str, default="GRPO", choices=["Simple", "GRPO", "EvalGRPO"])
+    parser.add_argument("-s", "--system-prompt", type=str, default=None, help="System prompt for data construction.")
     args = parser.parse_args()
 
     assert args.train_minibatch_size > 0, "Train mini batch size must be greater than 0"
@@ -112,20 +113,20 @@ if __name__ == "__main__":
         train_batch_size=args.train_batch_size,
         train_minibatch_size=args.train_minibatch_size,
         train_microbatch_size=args.train_microbatch_size,
-        dataset_config={"path": args.dataset, "max_length": 300},
+        dataset_config={"path": args.dataset, "max_length": 300, "system_prompt": args.system_prompt},
         dataloaders_config={},
         inference_model_config=inference_model_config,
         generate_config=generate_config,
         num_generations=args.num_generations,
         train_model_config=train_model_config,
-        # plugin_config={}, # for zero
-        plugin_config={
-            "pp_size": 2,
-            "tp_size": 2,
-            "microbatch_size": args.train_microbatch_size // 2,
-            "zero_stage": 0,
-            "max_norm": 1.0,
-        },  # for pp
+        plugin_config={},  # Default setting: zero.
+        # plugin_config={
+        #     "pp_size": 2,
+        #     "tp_size": 2,
+        #     "microbatch_size": args.train_microbatch_size // 2,
+        #     "zero_stage": 0,
+        #     "max_norm": 1.0,
+        # },  # for pp
         inference_backend=args.backend,
         master_addr="localhost",
         master_port=29506,

From b823c6eec757a03dbc8b48d8bf25a5748e99d770 Mon Sep 17 00:00:00 2001
From: Tong Li <tong.li352711588@gmail.com>
Date: Wed, 23 Apr 2025 10:03:46 +0800
Subject: [PATCH 2/2] [feat] Add final save at the end (#6274)

* add final save

* default 1 episode
---
 applications/ColossalChat/coati/distributed/consumer.py | 2 +-
 applications/ColossalChat/rl_example.py                 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/applications/ColossalChat/coati/distributed/consumer.py b/applications/ColossalChat/coati/distributed/consumer.py
index 79beb2a2d..b7b865b26 100644
--- a/applications/ColossalChat/coati/distributed/consumer.py
+++ b/applications/ColossalChat/coati/distributed/consumer.py
@@ -119,7 +119,7 @@ class BaseConsumer:
                     assert len(self.buffer) == 0
                     if self.lr_scheduler is not None:
                         self.lr_scheduler.step()
-                    if (step + 1) % self.save_interval == 0:
+                    if (step + 1) % self.save_interval == 0 or (step + 1) == self.num_update_per_episode:
                         if self.rank == 0:
                             print(f"Start saving policy model at step {step + 1}.")
                         save_path = os.path.join(self.save_dir, f"modeling-step-{step + 1}")
diff --git a/applications/ColossalChat/rl_example.py b/applications/ColossalChat/rl_example.py
index 317446695..f42a660b7 100644
--- a/applications/ColossalChat/rl_example.py
+++ b/applications/ColossalChat/rl_example.py
@@ -107,7 +107,7 @@ if __name__ == "__main__":
         num_producers=args.num_inferencer,
         num_proc_per_producer=1,
         num_consumer_procs=args.num_trainers,
-        num_episodes=10,
+        num_episodes=1,
         inference_batch_size=args.inference_batch_size,
         inference_microbatch_size=args.inference_microbatch_size,
         train_batch_size=args.train_batch_size,