[misc] update pre-commit and run all files (#4752)

* [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format
2025-09-15 14:12:02 +00:00 · 2023-09-19 14:20:26 +08:00
parent 3c6b831c26
commit 079bf3cb26
1268 changed files with 50037 additions and 38444 deletions
--- a/tests/test_auto_parallel/test_offload/model_utils.py
+++ b/tests/test_auto_parallel/test_offload/model_utils.py
@@ -1,25 +1,23 @@
 import torch
 import torch.nn as nn
-from transformers import GPT2Config, GPT2LMHeadModel
-from transformers import BertConfig, BertLMHeadModel
+from transformers import BertConfig, BertLMHeadModel, GPT2Config, GPT2LMHeadModel
+
 from tests.components_to_test.registry import non_distributed_component_funcs

-class GPTLMModel(nn.Module):

-    def __init__(self,
-                 hidden_size=768,
-                 num_layers=12,
-                 num_attention_heads=12,
-                 max_seq_len=1024,
-                 vocab_size=50257):
+class GPTLMModel(nn.Module):
+    def __init__(self, hidden_size=768, num_layers=12, num_attention_heads=12, max_seq_len=1024, vocab_size=50257):
        super().__init__()
        self.model = GPT2LMHeadModel(
-            GPT2Config(n_embd=hidden_size,
-                       n_layer=num_layers,
-                       n_head=num_attention_heads,
-                       n_positions=max_seq_len,
-                       n_ctx=max_seq_len,
-                       vocab_size=vocab_size))
+            GPT2Config(
+                n_embd=hidden_size,
+                n_layer=num_layers,
+                n_head=num_attention_heads,
+                n_positions=max_seq_len,
+                n_ctx=max_seq_len,
+                vocab_size=vocab_size,
+            )
+        )

    def forward(self, input_ids, attention_mask):
        # Only return lm_logits
@@ -27,7 +25,6 @@ class GPTLMModel(nn.Module):


 class LMLoss(nn.Module):
-
    def __init__(self):
        super().__init__()
        self.loss_fn = nn.CrossEntropyLoss()
@@ -38,18 +35,27 @@ class LMLoss(nn.Module):
        # Flatten the tokens
        return self.loss_fn(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))

+
 class BertLMModel(nn.Module):
    def __init__(self, hidden_size=768, num_layers=12, num_attention_heads=32, vocab_size=30522):
        super().__init__()
-        self.model = BertLMHeadModel(BertConfig(n_embd=hidden_size, num_hidden_layers=num_layers, hidden_size=hidden_size,
-                                                num_attention_heads=num_attention_heads, max_position_embeddings=hidden_size,
-                                                vocab_size=vocab_size))
+        self.model = BertLMHeadModel(
+            BertConfig(
+                n_embd=hidden_size,
+                num_hidden_layers=num_layers,
+                hidden_size=hidden_size,
+                num_attention_heads=num_attention_heads,
+                max_position_embeddings=hidden_size,
+                vocab_size=vocab_size,
+            )
+        )

    def forward(self, input_ids, attention_mask):
        # Only return lm_logits
        return self.model(input_ids=input_ids, attention_mask=attention_mask, use_cache=True)[0]

-@non_distributed_component_funcs.register(name='bert_')
+
+@non_distributed_component_funcs.register(name="bert_")
 def get_bert_components():
    vocab_size = 1024
    seq_len = 64
@@ -67,7 +73,8 @@ def get_bert_components():

    return bert_model_builder, bert_data_gen

-@non_distributed_component_funcs.register(name='gpt2_')
+
+@non_distributed_component_funcs.register(name="gpt2_")
 def get_gpt2_components():
    vocab_size = 1024
    seq_len = 8
@@ -83,4 +90,4 @@ def get_gpt2_components():
        kwargs = dict(input_ids=input_ids, attention_mask=attention_mask)
        return kwargs

-    return gpt2_model_builder, gpt2_data_gen
+    return gpt2_model_builder, gpt2_data_gen