mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-26 20:23:26 +00:00
[pipeline] move bert related pipeline components to shardformer (#4187)
* move bert related pipeline components to shardformer * fix bugs * revision * fix bert model tests * fix bert_lm_head model tests * fix tests * fix tests * done checks * skip bloom
This commit is contained in:
@@ -4,6 +4,7 @@ from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Callable, Dict, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import torch.nn as nn
|
||||
from torch import Tensor
|
||||
from torch.nn import Module
|
||||
@@ -176,3 +177,33 @@ class Policy(ABC):
|
||||
List[Dict[int, Tensor]]: List of parameters that should be shared across stages. E.g. [{0: module.model.embed_tokens.weight, 3: module.lm_head.weight}]
|
||||
"""
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def distribute_layers(num_layers: int, num_stages: int) -> List[int]:
|
||||
"""Divide layers into stages
|
||||
|
||||
"""
|
||||
quotient = num_layers // num_stages
|
||||
remainder = num_layers % num_stages
|
||||
|
||||
# calculate the num_layers per stage
|
||||
layers_per_stage = [quotient] * num_stages
|
||||
|
||||
# deal with the rest layers
|
||||
if remainder > 0:
|
||||
start_position = num_layers // 2 - remainder // 2
|
||||
for i in range(start_position, start_position + remainder):
|
||||
layers_per_stage[i] += 1
|
||||
return layers_per_stage
|
||||
|
||||
@staticmethod
|
||||
def get_stage_index(layers_per_stage: List[int], stage: int) -> List[int]:
|
||||
"""
|
||||
get the start index and end index of layers for each stage.
|
||||
"""
|
||||
num_layers_per_stage_accumulated = np.insert(np.cumsum(layers_per_stage), 0, 0)
|
||||
|
||||
start_idx = num_layers_per_stage_accumulated[stage]
|
||||
end_idx = num_layers_per_stage_accumulated[stage + 1]
|
||||
|
||||
return [start_idx, end_idx]
|
||||
|
Reference in New Issue
Block a user