diff --git a/applications/ColossalChat/coati/distributed/grpo_consumer.py b/applications/ColossalChat/coati/distributed/grpo_consumer.py index a2c3e03d6..424d46098 100644 --- a/applications/ColossalChat/coati/distributed/grpo_consumer.py +++ b/applications/ColossalChat/coati/distributed/grpo_consumer.py @@ -530,4 +530,4 @@ class GRPOConsumer(BaseConsumer): model = self.policy_model.unwrap() state_dict = model.state_dict() state_dict["consumer_global_step"] = torch.tensor([self.global_step], device=self.device) - return state_dict \ No newline at end of file + return state_dict diff --git a/colossalai/shardformer/modeling/qwen3.py b/colossalai/shardformer/modeling/qwen3.py index 437693800..5f96f5f49 100644 --- a/colossalai/shardformer/modeling/qwen3.py +++ b/colossalai/shardformer/modeling/qwen3.py @@ -273,7 +273,7 @@ class Qwen3PipelineForwards: hidden_states: Optional[torch.FloatTensor] = None, stage_index: Optional[List[int]] = None, shard_config: ShardConfig = None, - **kwargs + **kwargs, ): r""" Args: