[shardformer] add Dropout layer support different dropout pattern (#3856)

* add dropout layer, add dropout test

* modify seed manager as context manager

* add a copy of col_nn.layer

* add dist_crossentropy loss; separate module test

* polish the code

* fix dist crossentropy loss
This commit is contained in:
FoolPlayer
2023-06-01 16:21:02 +08:00
committed by Frank Lee
parent c594dc2f1c
commit ab8a47f830
14 changed files with 1413 additions and 41 deletions

View File

@@ -4,7 +4,7 @@ from typing import Any, Callable, Dict, List, Tuple, Type
import torch.nn as nn
from transformers.models.bert.modeling_bert import BertEmbeddings, BertLayer, BertLMPredictionHead
import colossalai.nn as col_nn
import colossalai.shardformer.layer.layers as col_nn
from .basepolicy import Argument, Col_Layer, Layer, Policy, Row_Layer
@@ -142,7 +142,7 @@ class BertPolicy(Policy):
weight="decoder.weight",
bias="decoder.bias",
replace_layer=col_nn.Linear1D_Col,
gather_output=True,
# gather_output=True,
)
]