mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-06-27 07:47:05 +00:00
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
This commit is contained in:
parent
22cc1558a8
commit
eb6337f07f
@ -359,8 +359,6 @@ def apply_chat_template_and_mask(
|
||||
|
||||
system_prompt = "You are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>. Now the user asks you to solve a math problem that involves reasoning. After thinking, when you finally reach a conclusion, clearly output the final answer without explanation within the <answer> </answer> tags, i.e., <answer> 123 </answer>.\n\n"
|
||||
|
||||
|
||||
|
||||
system_element = {
|
||||
"role": "system",
|
||||
"content": system_prompt,
|
||||
|
@ -1,6 +1,7 @@
|
||||
import os
|
||||
from contextlib import nullcontext
|
||||
from typing import Any, Dict, Optional
|
||||
import os
|
||||
|
||||
import ray
|
||||
import ray.util.collective as cc
|
||||
import torch
|
||||
@ -34,7 +35,7 @@ class BaseConsumer:
|
||||
plugin_config: Dict[str, Any],
|
||||
microbatch_size: int = 1,
|
||||
save_interval: int = 100,
|
||||
save_dir: str = "./model"
|
||||
save_dir: str = "./model",
|
||||
):
|
||||
self.num_producers = num_producers
|
||||
self.num_episodes = num_episodes
|
||||
|
@ -79,7 +79,7 @@ class GRPOConsumer(BaseConsumer):
|
||||
|
||||
self.policy_loss_fn = PolicyLoss()
|
||||
self.global_step = 0
|
||||
if use_wandb and self.rank == 0:
|
||||
if use_wandb and self.rank == 0:
|
||||
self.wandb_run = wandb.init(project="GRPO-Test", sync_tensorboard=True)
|
||||
|
||||
def setup(self):
|
||||
|
Loading…
Reference in New Issue
Block a user