mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-07 20:10:17 +00:00
[pipeline] test pure pipeline process using llama (#4218)
* bloom policy
* llama pipeline forward and tests
* fix the output and attention_mask
* fix name
* bind argument to policy
* Revert "bloom policy"
This reverts commit 8dee68a0a2
.
This policy should be revert and copied to feature/bloom
* revert the bloom changes
* cancel unneeded inputs
* gpt
* finish llama
* causal lm and sequence classification
* revision
* add pure pipeline test
* fixed version
* fixed version
* pure pipeline
This commit is contained in:
@@ -9,6 +9,7 @@ import torch
|
||||
import torch.distributed as dist
|
||||
from torch.distributed import ProcessGroup
|
||||
from torch.distributed import distributed_c10d as c10d
|
||||
from version_parser.version import Version
|
||||
|
||||
from .stage_manager import PipelineStageManager
|
||||
|
||||
@@ -61,17 +62,6 @@ def _broadcast_object_list(object_list: List[Any],
|
||||
c10d._warn_not_in_group("broadcast_object_list")
|
||||
return
|
||||
|
||||
my_rank = dist.get_rank()
|
||||
# Serialize object_list elements to tensors on src rank.
|
||||
if my_rank == src:
|
||||
if torch.__version__ >= "1.13.0":
|
||||
tensor_list, size_list = zip(*[c10d._object_to_tensor(obj, device=device) for obj in object_list])
|
||||
else:
|
||||
tensor_list, size_list = zip(*[c10d._object_to_tensor(obj) for obj in object_list])
|
||||
object_sizes_tensor = torch.cat(size_list)
|
||||
else:
|
||||
object_sizes_tensor = torch.empty(len(object_list), dtype=torch.long)
|
||||
|
||||
is_nccl_backend = c10d._check_for_nccl_backend(group)
|
||||
current_device = None
|
||||
|
||||
@@ -83,6 +73,18 @@ def _broadcast_object_list(object_list: List[Any],
|
||||
current_device = torch.device("cpu")
|
||||
if is_nccl_backend:
|
||||
current_device = torch.device("cuda", torch.cuda.current_device())
|
||||
|
||||
my_rank = dist.get_rank()
|
||||
# Serialize object_list elements to tensors on src rank.
|
||||
if my_rank == src:
|
||||
if Version(torch.__version__) >= Version("1.13.0"):
|
||||
tensor_list, size_list = zip(*[c10d._object_to_tensor(obj, device=current_device) for obj in object_list])
|
||||
else:
|
||||
tensor_list, size_list = zip(*[c10d._object_to_tensor(obj) for obj in object_list])
|
||||
object_sizes_tensor = torch.cat(size_list)
|
||||
else:
|
||||
object_sizes_tensor = torch.empty(len(object_list), dtype=torch.long)
|
||||
|
||||
if is_nccl_backend:
|
||||
object_sizes_tensor = object_sizes_tensor.to(current_device)
|
||||
|
||||
|
Reference in New Issue
Block a user