diff --git a/applications/ColossalChat/coati/dataset/loader.py b/applications/ColossalChat/coati/dataset/loader.py index 4518fd71f..43cf78383 100755 --- a/applications/ColossalChat/coati/dataset/loader.py +++ b/applications/ColossalChat/coati/dataset/loader.py @@ -352,12 +352,14 @@ def apply_chat_template_and_mask( tokenizer: PreTrainedTokenizer, chat: List[Dict[str, str]], max_length: Optional[int] = None, + system_prompt: str = None, padding: bool = True, truncation: bool = True, ignore_idx: int = -100, ) -> Dict[str, torch.Tensor]: - system_prompt = "You are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within and tags, respectively, i.e., reasoning process here answer here . Now the user asks you to solve a math problem that involves reasoning. After thinking, when you finally reach a conclusion, clearly output the final answer without explanation within the tags, i.e., 123 .\n\n" + if system_prompt is None: + system_prompt = "You are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within and tags, respectively, i.e., reasoning process here answer here . Now the user asks you to solve a math problem that involves reasoning. After thinking, when you finally reach a conclusion, clearly output the final answer without explanation within the tags, i.e., 123 .\n\n" system_element = { "role": "system", @@ -419,7 +421,7 @@ class RawConversationDataset(Dataset): Each instance is a dictionary with fields `system`, `roles`, `messages`, `offset`, `sep_style`, `seps`. """ - def __init__(self, tokenizer: PreTrainedTokenizer, input_file: str, max_length: int) -> None: + def __init__(self, tokenizer: PreTrainedTokenizer, input_file: str, max_length: int, system_prompt: str) -> None: self.tokenizer = tokenizer self.raw_texts = [] with jsonlines.open(input_file) as f: @@ -427,6 +429,7 @@ class RawConversationDataset(Dataset): self.raw_texts.append(line) self.tokenized_texts = [None] * len(self.raw_texts) self.max_length = max_length + self.system_prompt = system_prompt def __len__(self) -> int: return len(self.raw_texts) @@ -434,6 +437,6 @@ class RawConversationDataset(Dataset): def __getitem__(self, index: int): if self.tokenized_texts[index] is None: message = self.raw_texts[index] - tokens = apply_chat_template_and_mask(self.tokenizer, message, self.max_length) + tokens = apply_chat_template_and_mask(self.tokenizer, message, self.max_length, self.system_prompt) self.tokenized_texts[index] = dict(tokens) return self.tokenized_texts[index] diff --git a/applications/ColossalChat/rl_example.py b/applications/ColossalChat/rl_example.py index 6c43ccd19..317446695 100644 --- a/applications/ColossalChat/rl_example.py +++ b/applications/ColossalChat/rl_example.py @@ -49,6 +49,7 @@ if __name__ == "__main__": ) parser.add_argument("-b", "--backend", type=str, default="transformers", choices=["transformers", "vllm"]) parser.add_argument("-a", "--algo", type=str, default="GRPO", choices=["Simple", "GRPO", "EvalGRPO"]) + parser.add_argument("-s", "--system-prompt", type=str, default=None, help="System prompt for data construction.") args = parser.parse_args() assert args.train_minibatch_size > 0, "Train mini batch size must be greater than 0" @@ -112,20 +113,20 @@ if __name__ == "__main__": train_batch_size=args.train_batch_size, train_minibatch_size=args.train_minibatch_size, train_microbatch_size=args.train_microbatch_size, - dataset_config={"path": args.dataset, "max_length": 300}, + dataset_config={"path": args.dataset, "max_length": 300, "system_prompt": args.system_prompt}, dataloaders_config={}, inference_model_config=inference_model_config, generate_config=generate_config, num_generations=args.num_generations, train_model_config=train_model_config, - # plugin_config={}, # for zero - plugin_config={ - "pp_size": 2, - "tp_size": 2, - "microbatch_size": args.train_microbatch_size // 2, - "zero_stage": 0, - "max_norm": 1.0, - }, # for pp + plugin_config={}, # Default setting: zero. + # plugin_config={ + # "pp_size": 2, + # "tp_size": 2, + # "microbatch_size": args.train_microbatch_size // 2, + # "zero_stage": 0, + # "max_norm": 1.0, + # }, # for pp inference_backend=args.backend, master_addr="localhost", master_port=29506,