diff --git a/colossalai/booster/plugin/hybrid_parallel_plugin.py b/colossalai/booster/plugin/hybrid_parallel_plugin.py index 1af20f473..58d055bb0 100644 --- a/colossalai/booster/plugin/hybrid_parallel_plugin.py +++ b/colossalai/booster/plugin/hybrid_parallel_plugin.py @@ -78,7 +78,6 @@ class HybridParallelModule(ModelWrapper, AMPModelMixin): self.require_grad_sync = True self.overlap_allgather = overlap_allgather self.use_fp8 = use_fp8 - self.use_fp8 = use_fp8 shardformer = ShardFormer(shard_config) if custom_policy is not None: @@ -1099,7 +1098,6 @@ class HybridParallelPlugin(PipelinePluginBase): self.enable_jit_fused = enable_jit_fused self.enable_sequence_parallelism = enable_sequence_parallelism self.use_fp8 = use_fp8 - self.use_fp8 = use_fp8 if dp_outside: self.dp_axis, self.pp_axis, self.tp_axis, self.sp_axis = 0, 1, 2, 3 self.pg_mesh = ProcessGroupMesh(self.dp_size, self.pp_size, self.tp_size, self.sp_size) @@ -1325,7 +1323,6 @@ class HybridParallelPlugin(PipelinePluginBase): custom_policy=self.custom_policy, overlap_allgather=(self.zero_stage > 0 and self.zero_config["overlap_allgather"]), use_fp8=self.use_fp8, - use_fp8=self.use_fp8, ) if optimizer is not None and not isinstance(optimizer, OptimizerWrapper): if zero_stage == 0: