fix logging rollouts

2025-09-06 19:40:28 +00:00 · 2025-05-17 21:12:58 +08:00
parent 03b41d6fb5
commit 107470a360
5 changed files with 56 additions and 24 deletions
--- a/applications/ColossalChat/coati/distributed/grpo_consumer.py
+++ b/applications/ColossalChat/coati/distributed/grpo_consumer.py
@@ -120,12 +120,16 @@ class GRPOConsumer(BaseConsumer):
                    "either max_tokens (vllm) or max_new_tokens (transformers) must be set in generate_config."
                )
        # Initialize verifiable reward.
-        response_format_tags = {
-            "think_start": {"text": "<think>", "num_occur": 1},
-            "think_end": {"text": "</think>", "num_occur": 1},
-            "answer_start": {"text": "<answer>", "num_occur": 1},
-            "answer_end": {"text": "</answer>", "num_occur": 1},
-        }
+        response_format_tags = (
+            {
+                "think_start": {"text": "<think>", "num_occur": 1},
+                "think_end": {"text": "</think>", "num_occur": 1},
+                "answer_start": {"text": "<answer>", "num_occur": 1},
+                "answer_end": {"text": "</answer>", "num_occur": 1},
+            }
+            if grpo_config.get("reward_fn_type") == "think_answer_tags"
+            else None
+        )
        reward_model_kwargs = {
            k: v
            for k, v in grpo_config.items()