[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
This commit is contained in:
pre-commit-ci[bot] 2025-03-06 08:29:58 +00:00
parent 22cc1558a8
commit eb6337f07f
3 changed files with 4 additions and 5 deletions

View File

@ -359,8 +359,6 @@ def apply_chat_template_and_mask(
system_prompt = "You are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>. Now the user asks you to solve a math problem that involves reasoning. After thinking, when you finally reach a conclusion, clearly output the final answer without explanation within the <answer> </answer> tags, i.e., <answer> 123 </answer>.\n\n"
system_element = {
"role": "system",
"content": system_prompt,

View File

@ -1,6 +1,7 @@
import os
from contextlib import nullcontext
from typing import Any, Dict, Optional
import os
import ray
import ray.util.collective as cc
import torch
@ -34,7 +35,7 @@ class BaseConsumer:
plugin_config: Dict[str, Any],
microbatch_size: int = 1,
save_interval: int = 100,
save_dir: str = "./model"
save_dir: str = "./model",
):
self.num_producers = num_producers
self.num_episodes = num_episodes

View File

@ -79,7 +79,7 @@ class GRPOConsumer(BaseConsumer):
self.policy_loss_fn = PolicyLoss()
self.global_step = 0
if use_wandb and self.rank == 0:
if use_wandb and self.rank == 0:
self.wandb_run = wandb.init(project="GRPO-Test", sync_tensorboard=True)
def setup(self):