[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
This commit is contained in:
pre-commit-ci[bot] 2025-03-06 08:29:58 +00:00
parent 22cc1558a8
commit eb6337f07f
3 changed files with 4 additions and 5 deletions

View File

@ -359,8 +359,6 @@ def apply_chat_template_and_mask(
system_prompt = "You are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>. Now the user asks you to solve a math problem that involves reasoning. After thinking, when you finally reach a conclusion, clearly output the final answer without explanation within the <answer> </answer> tags, i.e., <answer> 123 </answer>.\n\n" system_prompt = "You are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>. Now the user asks you to solve a math problem that involves reasoning. After thinking, when you finally reach a conclusion, clearly output the final answer without explanation within the <answer> </answer> tags, i.e., <answer> 123 </answer>.\n\n"
system_element = { system_element = {
"role": "system", "role": "system",
"content": system_prompt, "content": system_prompt,

View File

@ -1,6 +1,7 @@
import os
from contextlib import nullcontext from contextlib import nullcontext
from typing import Any, Dict, Optional from typing import Any, Dict, Optional
import os
import ray import ray
import ray.util.collective as cc import ray.util.collective as cc
import torch import torch
@ -34,7 +35,7 @@ class BaseConsumer:
plugin_config: Dict[str, Any], plugin_config: Dict[str, Any],
microbatch_size: int = 1, microbatch_size: int = 1,
save_interval: int = 100, save_interval: int = 100,
save_dir: str = "./model" save_dir: str = "./model",
): ):
self.num_producers = num_producers self.num_producers = num_producers
self.num_episodes = num_episodes self.num_episodes = num_episodes

View File

@ -79,7 +79,7 @@ class GRPOConsumer(BaseConsumer):
self.policy_loss_fn = PolicyLoss() self.policy_loss_fn = PolicyLoss()
self.global_step = 0 self.global_step = 0
if use_wandb and self.rank == 0: if use_wandb and self.rank == 0:
self.wandb_run = wandb.init(project="GRPO-Test", sync_tensorboard=True) self.wandb_run = wandb.init(project="GRPO-Test", sync_tensorboard=True)
def setup(self): def setup(self):