mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-10 21:40:02 +00:00
[misc] update pre-commit and run all files (#4752)
* [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format
This commit is contained in:
@@ -1,10 +1,10 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
from .linear import Linear
|
||||
from colossalai.kernel.jit import bias_gelu_impl
|
||||
|
||||
from .linear import Linear
|
||||
|
||||
|
||||
class TransformerMLP(nn.Module):
|
||||
"""MLP.
|
||||
@@ -18,19 +18,13 @@ class TransformerMLP(nn.Module):
|
||||
super(TransformerMLP, self).__init__()
|
||||
|
||||
# Project to 4h.
|
||||
self.dense_h_to_4h = Linear(
|
||||
hidden_size,
|
||||
int(hidden_size*mlp_ratio),
|
||||
skip_bias_add=True)
|
||||
self.dense_h_to_4h = Linear(hidden_size, int(hidden_size * mlp_ratio), skip_bias_add=True)
|
||||
|
||||
self.bias_gelu_fusion = fuse_gelu
|
||||
self.activation_func = F.gelu
|
||||
|
||||
# Project back to h.
|
||||
self.dense_4h_to_h = Linear(
|
||||
int(hidden_size*mlp_ratio),
|
||||
hidden_size,
|
||||
skip_bias_add=True)
|
||||
self.dense_4h_to_h = Linear(int(hidden_size * mlp_ratio), hidden_size, skip_bias_add=True)
|
||||
|
||||
def forward(self, hidden_states):
|
||||
# hidden states should be in the shape of [s, b, h]
|
||||
@@ -39,11 +33,9 @@ class TransformerMLP(nn.Module):
|
||||
intermediate_parallel, bias_parallel = self.dense_h_to_4h(hidden_states)
|
||||
|
||||
if self.bias_gelu_fusion:
|
||||
intermediate_parallel = \
|
||||
bias_gelu_impl(intermediate_parallel, bias_parallel)
|
||||
intermediate_parallel = bias_gelu_impl(intermediate_parallel, bias_parallel)
|
||||
else:
|
||||
intermediate_parallel = \
|
||||
self.activation_func(intermediate_parallel + bias_parallel)
|
||||
intermediate_parallel = self.activation_func(intermediate_parallel + bias_parallel)
|
||||
|
||||
# [s, b, h]
|
||||
output, output_bias = self.dense_4h_to_h(intermediate_parallel)
|
||||
|
Reference in New Issue
Block a user