[misc] update pre-commit and run all files (#4752)

* [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format
2025-09-20 17:10:03 +00:00 · 2023-09-19 14:20:26 +08:00
parent 3c6b831c26
commit 079bf3cb26
1268 changed files with 50037 additions and 38444 deletions
--- a/applications/Chat/coati/kernels/init.py
+++ b/applications/Chat/coati/kernels/init.py
@@ -1,6 +1,6 @@
 from .wrapper import convert_to_xformer_model, recover_from_xformer_model

 __all__ = [
-    'convert_to_xformer_model',
-    'recover_from_xformer_model',
+    "convert_to_xformer_model",
+    "recover_from_xformer_model",
 ]
--- a/applications/Chat/coati/kernels/opt_attn.py
+++ b/applications/Chat/coati/kernels/opt_attn.py
@@ -21,11 +21,12 @@ class XOPTAttention(OPTAttention):
        output_attentions: bool = False,
    ) -> Tuple[Tensor, Optional[Tensor], Optional[Tuple[Tensor]]]:
        if not self.training:
-            return super().forward(hidden_states, key_value_states, past_key_value, attention_mask, layer_head_mask,
-                                   output_attentions)
+            return super().forward(
+                hidden_states, key_value_states, past_key_value, attention_mask, layer_head_mask, output_attentions
+            )
        """Input shape: Batch x Time x Channel"""
-        assert layer_head_mask is None, 'Xformers attention does not support layer_head_mask'
-        assert not output_attentions, 'Xformers attention does not support output_attentions'
+        assert layer_head_mask is None, "Xformers attention does not support layer_head_mask"
+        assert not output_attentions, "Xformers attention does not support output_attentions"

        # if key_value_states are provided this layer is used as a cross-attention layer
        # for the decoder
@@ -69,12 +70,14 @@ class XOPTAttention(OPTAttention):
        key_states = key_states.transpose(1, 2)
        value_states = value_states.transpose(1, 2)

-        attn_output = xops.memory_efficient_attention(query_states,
-                                                      key_states,
-                                                      value_states,
-                                                      attn_bias=xops.LowerTriangularMask(),
-                                                      p=self.dropout if self.training else 0.0,
-                                                      scale=self.scaling)
+        attn_output = xops.memory_efficient_attention(
+            query_states,
+            key_states,
+            value_states,
+            attn_bias=xops.LowerTriangularMask(),
+            p=self.dropout if self.training else 0.0,
+            scale=self.scaling,
+        )

        # Use the `embed_dim` from the config (stored in the class) rather than `hidden_state` because `attn_output` can be
        # partitioned across GPUs when using tensor-parallelism.