add prompt template (#6273)

Co-authored-by: Tong Li <tong.li35271158@gmail.com>
This commit is contained in:
Tong Li 2025-04-22 10:39:47 +08:00 committed by GitHub
parent 9467c10690
commit 03f4b1dde3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 16 additions and 12 deletions

View File

@ -352,11 +352,13 @@ def apply_chat_template_and_mask(
tokenizer: PreTrainedTokenizer, tokenizer: PreTrainedTokenizer,
chat: List[Dict[str, str]], chat: List[Dict[str, str]],
max_length: Optional[int] = None, max_length: Optional[int] = None,
system_prompt: str = None,
padding: bool = True, padding: bool = True,
truncation: bool = True, truncation: bool = True,
ignore_idx: int = -100, ignore_idx: int = -100,
) -> Dict[str, torch.Tensor]: ) -> Dict[str, torch.Tensor]:
if system_prompt is None:
system_prompt = "You are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>. Now the user asks you to solve a math problem that involves reasoning. After thinking, when you finally reach a conclusion, clearly output the final answer without explanation within the <answer> </answer> tags, i.e., <answer> 123 </answer>.\n\n" system_prompt = "You are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>. Now the user asks you to solve a math problem that involves reasoning. After thinking, when you finally reach a conclusion, clearly output the final answer without explanation within the <answer> </answer> tags, i.e., <answer> 123 </answer>.\n\n"
system_element = { system_element = {
@ -419,7 +421,7 @@ class RawConversationDataset(Dataset):
Each instance is a dictionary with fields `system`, `roles`, `messages`, `offset`, `sep_style`, `seps`. Each instance is a dictionary with fields `system`, `roles`, `messages`, `offset`, `sep_style`, `seps`.
""" """
def __init__(self, tokenizer: PreTrainedTokenizer, input_file: str, max_length: int) -> None: def __init__(self, tokenizer: PreTrainedTokenizer, input_file: str, max_length: int, system_prompt: str) -> None:
self.tokenizer = tokenizer self.tokenizer = tokenizer
self.raw_texts = [] self.raw_texts = []
with jsonlines.open(input_file) as f: with jsonlines.open(input_file) as f:
@ -427,6 +429,7 @@ class RawConversationDataset(Dataset):
self.raw_texts.append(line) self.raw_texts.append(line)
self.tokenized_texts = [None] * len(self.raw_texts) self.tokenized_texts = [None] * len(self.raw_texts)
self.max_length = max_length self.max_length = max_length
self.system_prompt = system_prompt
def __len__(self) -> int: def __len__(self) -> int:
return len(self.raw_texts) return len(self.raw_texts)
@ -434,6 +437,6 @@ class RawConversationDataset(Dataset):
def __getitem__(self, index: int): def __getitem__(self, index: int):
if self.tokenized_texts[index] is None: if self.tokenized_texts[index] is None:
message = self.raw_texts[index] message = self.raw_texts[index]
tokens = apply_chat_template_and_mask(self.tokenizer, message, self.max_length) tokens = apply_chat_template_and_mask(self.tokenizer, message, self.max_length, self.system_prompt)
self.tokenized_texts[index] = dict(tokens) self.tokenized_texts[index] = dict(tokens)
return self.tokenized_texts[index] return self.tokenized_texts[index]

View File

@ -49,6 +49,7 @@ if __name__ == "__main__":
) )
parser.add_argument("-b", "--backend", type=str, default="transformers", choices=["transformers", "vllm"]) parser.add_argument("-b", "--backend", type=str, default="transformers", choices=["transformers", "vllm"])
parser.add_argument("-a", "--algo", type=str, default="GRPO", choices=["Simple", "GRPO", "EvalGRPO"]) parser.add_argument("-a", "--algo", type=str, default="GRPO", choices=["Simple", "GRPO", "EvalGRPO"])
parser.add_argument("-s", "--system-prompt", type=str, default=None, help="System prompt for data construction.")
args = parser.parse_args() args = parser.parse_args()
assert args.train_minibatch_size > 0, "Train mini batch size must be greater than 0" assert args.train_minibatch_size > 0, "Train mini batch size must be greater than 0"
@ -112,20 +113,20 @@ if __name__ == "__main__":
train_batch_size=args.train_batch_size, train_batch_size=args.train_batch_size,
train_minibatch_size=args.train_minibatch_size, train_minibatch_size=args.train_minibatch_size,
train_microbatch_size=args.train_microbatch_size, train_microbatch_size=args.train_microbatch_size,
dataset_config={"path": args.dataset, "max_length": 300}, dataset_config={"path": args.dataset, "max_length": 300, "system_prompt": args.system_prompt},
dataloaders_config={}, dataloaders_config={},
inference_model_config=inference_model_config, inference_model_config=inference_model_config,
generate_config=generate_config, generate_config=generate_config,
num_generations=args.num_generations, num_generations=args.num_generations,
train_model_config=train_model_config, train_model_config=train_model_config,
# plugin_config={}, # for zero plugin_config={}, # Default setting: zero.
plugin_config={ # plugin_config={
"pp_size": 2, # "pp_size": 2,
"tp_size": 2, # "tp_size": 2,
"microbatch_size": args.train_microbatch_size // 2, # "microbatch_size": args.train_microbatch_size // 2,
"zero_stage": 0, # "zero_stage": 0,
"max_norm": 1.0, # "max_norm": 1.0,
}, # for pp # }, # for pp
inference_backend=args.backend, inference_backend=args.backend,
master_addr="localhost", master_addr="localhost",
master_port=29506, master_port=29506,