mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-11-21 11:25:45 +00:00
[misc] update pre-commit and run all files (#4752)
* [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format
This commit is contained in:
@@ -1,13 +1,11 @@
|
||||
import torch
|
||||
import transformers
|
||||
from packaging import version
|
||||
from transformers import AlbertConfig, AlbertForSequenceClassification
|
||||
|
||||
from .bert import get_bert_data_loader
|
||||
from .registry import non_distributed_component_funcs
|
||||
|
||||
|
||||
@non_distributed_component_funcs.register(name='albert')
|
||||
@non_distributed_component_funcs.register(name="albert")
|
||||
def get_training_components():
|
||||
hidden_dim = 8
|
||||
num_head = 4
|
||||
@@ -16,20 +14,21 @@ def get_training_components():
|
||||
vocab_size = 32
|
||||
|
||||
def bert_model_builder(checkpoint: bool = False):
|
||||
config = AlbertConfig(vocab_size=vocab_size,
|
||||
gradient_checkpointing=checkpoint,
|
||||
hidden_size=hidden_dim,
|
||||
intermediate_size=hidden_dim * 4,
|
||||
num_attention_heads=num_head,
|
||||
max_position_embeddings=sequence_length,
|
||||
num_hidden_layers=num_layer,
|
||||
hidden_dropout_prob=0.,
|
||||
attention_probs_dropout_prob=0.)
|
||||
print('building AlbertForSequenceClassification model')
|
||||
config = AlbertConfig(
|
||||
vocab_size=vocab_size,
|
||||
gradient_checkpointing=checkpoint,
|
||||
hidden_size=hidden_dim,
|
||||
intermediate_size=hidden_dim * 4,
|
||||
num_attention_heads=num_head,
|
||||
max_position_embeddings=sequence_length,
|
||||
num_hidden_layers=num_layer,
|
||||
hidden_dropout_prob=0.0,
|
||||
attention_probs_dropout_prob=0.0,
|
||||
)
|
||||
print("building AlbertForSequenceClassification model")
|
||||
|
||||
# adapting huggingface BertForSequenceClassification for single unittest calling interface
|
||||
class ModelAdaptor(AlbertForSequenceClassification):
|
||||
|
||||
def forward(self, input_ids, labels):
|
||||
"""
|
||||
inputs: data, label
|
||||
@@ -44,16 +43,20 @@ def get_training_components():
|
||||
return model
|
||||
|
||||
is_distributed = torch.distributed.is_initialized()
|
||||
trainloader = get_bert_data_loader(n_class=vocab_size,
|
||||
batch_size=2,
|
||||
total_samples=10000,
|
||||
sequence_length=sequence_length,
|
||||
is_distributed=is_distributed)
|
||||
testloader = get_bert_data_loader(n_class=vocab_size,
|
||||
batch_size=2,
|
||||
total_samples=10000,
|
||||
sequence_length=sequence_length,
|
||||
is_distributed=is_distributed)
|
||||
trainloader = get_bert_data_loader(
|
||||
n_class=vocab_size,
|
||||
batch_size=2,
|
||||
total_samples=10000,
|
||||
sequence_length=sequence_length,
|
||||
is_distributed=is_distributed,
|
||||
)
|
||||
testloader = get_bert_data_loader(
|
||||
n_class=vocab_size,
|
||||
batch_size=2,
|
||||
total_samples=10000,
|
||||
sequence_length=sequence_length,
|
||||
is_distributed=is_distributed,
|
||||
)
|
||||
|
||||
criterion = None
|
||||
return bert_model_builder, trainloader, testloader, torch.optim.Adam, criterion
|
||||
|
||||
Reference in New Issue
Block a user