[checkpointio] fix async io (#6155)

2025-09-20 17:10:03 +00:00 · 2024-12-16 10:36:28 +08:00
parent de3d371f65
commit e994c64568
2 changed files with 2 additions and 3 deletions
--- a/colossalai/checkpoint_io/general_checkpoint_io.py
+++ b/colossalai/checkpoint_io/general_checkpoint_io.py
@@ -8,8 +8,6 @@ from typing import Optional
 import torch.nn as nn
 from torch.optim import Optimizer
 from colossalai.utils.safetensors import move_and_save
 from .checkpoint_io_base import CheckpointIO
 from .index_file import CheckpointIndexFile
 from .utils import (
@@ -54,6 +52,7 @@ class GeneralCheckpointIO(CheckpointIO):
            pass
        if use_async:
            from colossalai.utils.safetensors import move_and_save
            if id(model) not in self.pinned_state_dicts:
                self.pinned_state_dicts[id(model)] = create_pinned_state_dict(state_dict)
--- a/colossalai/checkpoint_io/utils.py
+++ b/colossalai/checkpoint_io/utils.py
@@ -19,7 +19,6 @@ from colossalai.tensor.d_tensor import (
    to_global,
    to_global_for_customized_distributed_tensor,
 )
 from colossalai.utils.safetensors import move_and_save
 SAFE_WEIGHTS_NAME = "model.safetensors"
 WEIGHTS_NAME = "pytorch_model.bin"
@@ -289,6 +288,7 @@ def async_save_state_dict_shards(
    Returns:
        int: the total size of shards
    """
    from colossalai.utils.safetensors import move_and_save
    total_size = 0
    shard_filenames = []