mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-15 14:12:02 +00:00
[misc] update pre-commit and run all files (#4752)
* [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format
This commit is contained in:
@@ -1,25 +1,23 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from transformers import GPT2Config, GPT2LMHeadModel
|
||||
from transformers import BertConfig, BertLMHeadModel
|
||||
from transformers import BertConfig, BertLMHeadModel, GPT2Config, GPT2LMHeadModel
|
||||
|
||||
from tests.components_to_test.registry import non_distributed_component_funcs
|
||||
|
||||
class GPTLMModel(nn.Module):
|
||||
|
||||
def __init__(self,
|
||||
hidden_size=768,
|
||||
num_layers=12,
|
||||
num_attention_heads=12,
|
||||
max_seq_len=1024,
|
||||
vocab_size=50257):
|
||||
class GPTLMModel(nn.Module):
|
||||
def __init__(self, hidden_size=768, num_layers=12, num_attention_heads=12, max_seq_len=1024, vocab_size=50257):
|
||||
super().__init__()
|
||||
self.model = GPT2LMHeadModel(
|
||||
GPT2Config(n_embd=hidden_size,
|
||||
n_layer=num_layers,
|
||||
n_head=num_attention_heads,
|
||||
n_positions=max_seq_len,
|
||||
n_ctx=max_seq_len,
|
||||
vocab_size=vocab_size))
|
||||
GPT2Config(
|
||||
n_embd=hidden_size,
|
||||
n_layer=num_layers,
|
||||
n_head=num_attention_heads,
|
||||
n_positions=max_seq_len,
|
||||
n_ctx=max_seq_len,
|
||||
vocab_size=vocab_size,
|
||||
)
|
||||
)
|
||||
|
||||
def forward(self, input_ids, attention_mask):
|
||||
# Only return lm_logits
|
||||
@@ -27,7 +25,6 @@ class GPTLMModel(nn.Module):
|
||||
|
||||
|
||||
class LMLoss(nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.loss_fn = nn.CrossEntropyLoss()
|
||||
@@ -38,18 +35,27 @@ class LMLoss(nn.Module):
|
||||
# Flatten the tokens
|
||||
return self.loss_fn(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
|
||||
|
||||
|
||||
class BertLMModel(nn.Module):
|
||||
def __init__(self, hidden_size=768, num_layers=12, num_attention_heads=32, vocab_size=30522):
|
||||
super().__init__()
|
||||
self.model = BertLMHeadModel(BertConfig(n_embd=hidden_size, num_hidden_layers=num_layers, hidden_size=hidden_size,
|
||||
num_attention_heads=num_attention_heads, max_position_embeddings=hidden_size,
|
||||
vocab_size=vocab_size))
|
||||
self.model = BertLMHeadModel(
|
||||
BertConfig(
|
||||
n_embd=hidden_size,
|
||||
num_hidden_layers=num_layers,
|
||||
hidden_size=hidden_size,
|
||||
num_attention_heads=num_attention_heads,
|
||||
max_position_embeddings=hidden_size,
|
||||
vocab_size=vocab_size,
|
||||
)
|
||||
)
|
||||
|
||||
def forward(self, input_ids, attention_mask):
|
||||
# Only return lm_logits
|
||||
return self.model(input_ids=input_ids, attention_mask=attention_mask, use_cache=True)[0]
|
||||
|
||||
@non_distributed_component_funcs.register(name='bert_')
|
||||
|
||||
@non_distributed_component_funcs.register(name="bert_")
|
||||
def get_bert_components():
|
||||
vocab_size = 1024
|
||||
seq_len = 64
|
||||
@@ -67,7 +73,8 @@ def get_bert_components():
|
||||
|
||||
return bert_model_builder, bert_data_gen
|
||||
|
||||
@non_distributed_component_funcs.register(name='gpt2_')
|
||||
|
||||
@non_distributed_component_funcs.register(name="gpt2_")
|
||||
def get_gpt2_components():
|
||||
vocab_size = 1024
|
||||
seq_len = 8
|
||||
@@ -83,4 +90,4 @@ def get_gpt2_components():
|
||||
kwargs = dict(input_ids=input_ids, attention_mask=attention_mask)
|
||||
return kwargs
|
||||
|
||||
return gpt2_model_builder, gpt2_data_gen
|
||||
return gpt2_model_builder, gpt2_data_gen
|
||||
|
Reference in New Issue
Block a user