[checkpointio] fix zero optimizer async save memory (#6151)

* [checkpointio] fix zero optimizer async save memory

* [checkpointio] fit new tensornvme api

* [checkpointio] fit new tensornvme api
This commit is contained in:
Hongxin Liu
2024-11-25 14:46:31 +08:00
committed by GitHub
parent 8ecff0cb7f
commit ab856fd308
7 changed files with 57 additions and 42 deletions

View File

@@ -311,7 +311,7 @@ def async_save_state_dict_shards(
index_file.append_weight_map(key, shard_file)
checkpoint_file_path = os.path.join(checkpoint, shard_file)
writer = AsyncFileWriter(open(checkpoint_file_path, "wb", buffering=0), n_write_entries, backend="pthread")
writer = AsyncFileWriter(checkpoint_file_path, n_write_entries, backend="pthread")
writers.append(writer)
if pinned_state_dict is not None: