mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-21 09:29:47 +00:00
[async io]supoort async io (#6137)
* support async optimizer save/load * fix * fix * support pin mem * Update low_level_zero_plugin.py * fix * fix * fix * fix * fix
This commit is contained in:
committed by
Hongxin Liu
parent
b90835bd32
commit
eb69e640e5
@@ -359,6 +359,7 @@ class Booster:
|
||||
gather_dtensor: bool = True,
|
||||
prefix: Optional[str] = None,
|
||||
size_per_shard: int = 1024,
|
||||
use_async: bool = False,
|
||||
) -> None:
|
||||
"""
|
||||
Save optimizer to checkpoint.
|
||||
@@ -374,7 +375,9 @@ class Booster:
|
||||
names to compose the keys in state_dict. Defaults to None.
|
||||
size_per_shard (int, optional): Maximum size of checkpoint shard file in MB. This is useful only when ``shard=True``. Defaults to 1024.
|
||||
"""
|
||||
self.checkpoint_io.save_optimizer(optimizer, checkpoint, shard, gather_dtensor, prefix, size_per_shard)
|
||||
self.checkpoint_io.save_optimizer(
|
||||
optimizer, checkpoint, shard, gather_dtensor, prefix, size_per_shard, use_async=use_async
|
||||
)
|
||||
|
||||
def save_lr_scheduler(self, lr_scheduler: LRScheduler, checkpoint: str) -> None:
|
||||
"""Save lr scheduler to checkpoint.
|
||||
|
Reference in New Issue
Block a user