[misc] update pre-commit and run all files (#4752)

* [misc] update pre-commit

* [misc] run pre-commit

* [misc] remove useless configuration files

* [misc] ignore cuda for clang-format
This commit is contained in:
Hongxin Liu
2023-09-19 14:20:26 +08:00
committed by GitHub
parent 3c6b831c26
commit 079bf3cb26
1268 changed files with 50037 additions and 38444 deletions

View File

@@ -4,22 +4,25 @@ from transformers import GPT2Config, GPT2LMHeadModel
## Define the Model and Loss Based on Huggingface transformers GPT2LMHeadModel
class GPTLMModel(nn.Module):
def __init__(self,
hidden_size=768,
num_layers=12,
num_attention_heads=12,
max_seq_len=1024,
vocab_size=50257,
checkpoint=False):
def __init__(
self,
hidden_size=768,
num_layers=12,
num_attention_heads=12,
max_seq_len=1024,
vocab_size=50257,
checkpoint=False,
):
super().__init__()
self.checkpoint = checkpoint
self.config = GPT2Config(n_embd=hidden_size,
n_layer=num_layers,
n_head=num_attention_heads,
n_positions=max_seq_len,
n_ctx=max_seq_len,
vocab_size=vocab_size)
self.config = GPT2Config(
n_embd=hidden_size,
n_layer=num_layers,
n_head=num_attention_heads,
n_positions=max_seq_len,
n_ctx=max_seq_len,
vocab_size=vocab_size,
)
self.model = GPT2LMHeadModel(self.config)
if checkpoint:
self.model.gradient_checkpointing_enable()
@@ -82,4 +85,4 @@ def model_builder(model_size: str) -> callable:
raise TypeError(f"model_builder {model_size}")
__all__ = ['model_builder']
__all__ = ["model_builder"]

View File

@@ -6,7 +6,6 @@ from torch.profiler import ProfilerActivity, profile, schedule, tensorboard_trac
class DummyProfiler:
def __init__(self):
self.step_number = 0
@@ -27,11 +26,13 @@ def get_tflops(model_numel, batch_size, seq_len, step_time):
def get_profile_context(enable_flag, warmup_steps, active_steps, save_dir):
if enable_flag:
return profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
schedule=schedule(wait=0, warmup=warmup_steps, active=active_steps),
on_trace_ready=tensorboard_trace_handler(save_dir),
record_shapes=True,
profile_memory=True)
return profile(
activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
schedule=schedule(wait=0, warmup=warmup_steps, active=active_steps),
on_trace_ready=tensorboard_trace_handler(save_dir),
record_shapes=True,
profile_memory=True,
)
else:
return nullcontext(DummyProfiler())