[pipeline] rewrite t5 tests & support multi-tensor transmitting in pipeline (#4388)

* fix remaining t5 bugs/rewrite t5 tests

* fix multi-tensor communication in pipeline

* rearrange test_config

* fix keyerror in sync_shared_params

* fix get_held_layers & Randomnizer, complete t5 tests

* erase printing

* fix get_held_layers through modifying _release_unheld_layers

* fix _get_recursive_held_layers bug
This commit is contained in:
Baizhou Zhang
2023-08-08 17:46:44 +08:00
committed by Hongxin Liu
parent 906426cb44
commit ed4c448488
11 changed files with 196 additions and 246 deletions

View File

@@ -107,8 +107,15 @@ class OneForwardOneBackwardSchedule(PipelineSchedule):
if output_obj_grad is None:
optimizer.backward(output_obj)
else:
for k, grad in output_obj_grad.items():
optimizer.backward_by_grad(output_obj[k], grad)
if "backward_tensor_keys" not in output_obj:
for k, grad in output_obj_grad.items():
optimizer.backward_by_grad(output_obj[k], grad)
else:
for k, grad in output_obj_grad.items():
output_obj[k].grad = grad
for k in output_obj["backward_tensor_keys"]:
tensor_to_backward = output_obj[k]
optimizer.backward_by_grad(tensor_to_backward, tensor_to_backward.grad)
# Collect the grad of the input_obj.
input_obj_grad = None