mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-10-25 10:06:27 +00:00
[zero] Update initialize for ZeRO (#458)
* polish code * shard strategy receive pg in shard() / gather() * update zero engine * polish code
This commit is contained in:
@@ -16,7 +16,7 @@ _ZERO_MODEL_CONFIG = dict(reduce_scatter_bucket_size_mb=25,
|
||||
offload_config=None,
|
||||
gradient_predivide_factor=1.0,
|
||||
use_memory_tracer=False,
|
||||
shard_strategy=TensorShardStrategy)
|
||||
shard_strategy=TensorShardStrategy())
|
||||
|
||||
_ZERO_OPTIMIZER_CONFIG = dict(cpu_offload=False,
|
||||
initial_scale=2**5,
|
||||
@@ -25,8 +25,7 @@ _ZERO_OPTIMIZER_CONFIG = dict(cpu_offload=False,
|
||||
backoff_factor=0.5,
|
||||
growth_interval=1000,
|
||||
hysteresis=2,
|
||||
max_scale=2**32,
|
||||
lr=1e-3)
|
||||
max_scale=2**32)
|
||||
|
||||
ZERO_PARALLEL_CONFIG = dict(fp16=dict(mode=None,),
|
||||
zero=dict(
|
||||
|
||||
Reference in New Issue
Block a user