mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-03 10:06:44 +00:00
[hotfix] fix memory leak in zero (#781)
This commit is contained in:
@@ -72,23 +72,13 @@ def run_stm():
|
||||
|
||||
# warmup done
|
||||
# only 2 params can be on CUDA
|
||||
limit_cuda_memory(0.26)
|
||||
limit_cuda_memory(0.26 / tensor_placement_policy._steady_cuda_cap_ratio)
|
||||
# use OPT-like eviction strategy
|
||||
apply_adjust(model, model.p0, [model.p0, model.p1], stateful_tensor_mgr)
|
||||
mem_collector.sample_model_data()
|
||||
mem_collector.sample_overall_data()
|
||||
apply_adjust(model, model.p1, [model.p0, model.p1], stateful_tensor_mgr)
|
||||
mem_collector.sample_model_data()
|
||||
mem_collector.sample_overall_data()
|
||||
apply_adjust(model, model.p2, [model.p0, model.p2], stateful_tensor_mgr)
|
||||
mem_collector.sample_model_data()
|
||||
mem_collector.sample_overall_data()
|
||||
apply_adjust(model, model.p0, [model.p0, model.p2], stateful_tensor_mgr)
|
||||
mem_collector.sample_model_data()
|
||||
mem_collector.sample_overall_data()
|
||||
apply_adjust(model, model.p1, [model.p1, model.p2], stateful_tensor_mgr)
|
||||
mem_collector.sample_model_data()
|
||||
mem_collector.finish_collection()
|
||||
|
||||
|
||||
def apply_adjust(model: torch.nn.Module, compute_param: Parameter, cuda_param_after_adjust: List[Parameter],
|
||||
|
Reference in New Issue
Block a user