fix logging rollouts

This commit is contained in:
YeAnbang
2025-05-17 21:12:58 +08:00
parent 03b41d6fb5
commit 107470a360
5 changed files with 56 additions and 24 deletions

View File

@@ -120,12 +120,16 @@ class GRPOConsumer(BaseConsumer):
"either max_tokens (vllm) or max_new_tokens (transformers) must be set in generate_config."
)
# Initialize verifiable reward.
response_format_tags = {
"think_start": {"text": "<think>", "num_occur": 1},
"think_end": {"text": "</think>", "num_occur": 1},
"answer_start": {"text": "<answer>", "num_occur": 1},
"answer_end": {"text": "</answer>", "num_occur": 1},
}
response_format_tags = (
{
"think_start": {"text": "<think>", "num_occur": 1},
"think_end": {"text": "</think>", "num_occur": 1},
"answer_start": {"text": "<answer>", "num_occur": 1},
"answer_end": {"text": "</answer>", "num_occur": 1},
}
if grpo_config.get("reward_fn_type") == "think_answer_tags"
else None
)
reward_model_kwargs = {
k: v
for k, v in grpo_config.items()