mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-06 19:40:28 +00:00
[pipeline] rewrite t5 tests & support multi-tensor transmitting in pipeline (#4388)
* fix remaining t5 bugs/rewrite t5 tests * fix multi-tensor communication in pipeline * rearrange test_config * fix keyerror in sync_shared_params * fix get_held_layers & Randomnizer, complete t5 tests * erase printing * fix get_held_layers through modifying _release_unheld_layers * fix _get_recursive_held_layers bug
This commit is contained in:
committed by
Hongxin Liu
parent
906426cb44
commit
ed4c448488
@@ -3,6 +3,7 @@
|
||||
|
||||
import io
|
||||
import pickle
|
||||
import re
|
||||
from typing import Any, List, Optional, Union
|
||||
|
||||
import torch
|
||||
@@ -31,7 +32,10 @@ def _cuda_safe_tensor_to_object(tensor: torch.Tensor, tensor_size: torch.Size) -
|
||||
if b'cuda' in buf:
|
||||
buf_array = bytearray(buf)
|
||||
device_index = torch.cuda.current_device()
|
||||
buf_array[buf_array.find(b'cuda') + 5] = 48 + device_index
|
||||
# There might be more than one output tensors during forward
|
||||
for cuda_str in re.finditer(b'cuda', buf_array):
|
||||
pos = cuda_str.start()
|
||||
buf_array[pos + 5] = 48 + device_index
|
||||
buf = bytes(buf_array)
|
||||
|
||||
io_bytes = io.BytesIO(buf)
|
||||
|
Reference in New Issue
Block a user