add interleaved pipeline, fix naive amp and update pipeline model initializer (#80)

This commit is contained in:
ver217
2021-12-20 23:26:19 +08:00
committed by GitHub
parent 91c327cb44
commit 8f02a88db2
17 changed files with 544 additions and 170 deletions

View File

@@ -32,7 +32,7 @@ class DataParallelGradientHandler(BaseGradientHandler):
if tp not in buckets:
buckets[tp] = []
buckets[tp].append(param)
param.main_grad = param.grad
# param.main_grad = param.grad
# For each bucket, all-reduce and copy all-reduced grads.
for tp in buckets: