[misc] update pre-commit and run all files (#4752)

* [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format
2025-09-07 12:01:39 +00:00 · 2023-09-19 14:20:26 +08:00
parent 3c6b831c26
commit 079bf3cb26
1268 changed files with 50037 additions and 38444 deletions
--- a/colossalai/_analyzer/fx/codegen.py
+++ b/colossalai/_analyzer/fx/codegen.py
@@ -1,4 +1,4 @@
-from typing import Any, Callable, Dict, Iterable, List, Tuple
+from typing import Any, Dict, List, Tuple

 import torch

@@ -22,7 +22,7 @@ from torch.fx.node import Argument, Node, _get_qualified_name, _type_repr, map_a
 import colossalai
 from colossalai.fx._compatibility import compatibility

-_register_custom_builtin('colossalai', 'import colossalai', colossalai)
+_register_custom_builtin("colossalai", "import colossalai", colossalai)


 def _gen_ckpt_fn_def(label, free_vars: List[str]) -> str:
@@ -43,17 +43,17 @@ def _gen_ckpt_usage(label, input_vars, output_vars, use_reentrant=True):
    """
    Generate the checkpoint function call code text
    """
-    outputs = ', '.join(output_vars)
-    inputs = ', '.join(input_vars)
-    return f'{outputs} = torch.utils.checkpoint.checkpoint(self.checkpoint_{label}, {inputs}, use_reentrant={use_reentrant})'
+    outputs = ", ".join(output_vars)
+    inputs = ", ".join(input_vars)
+    return f"{outputs} = torch.utils.checkpoint.checkpoint(self.checkpoint_{label}, {inputs}, use_reentrant={use_reentrant})"


 def _end_of_ckpt(node: Node, ckpt_level: int) -> bool:
    """
    Check if the node could end the ckpt region at `ckpt_level`
    """
-    if len(node.meta['info'].activation_checkpoint) > ckpt_level:
-        return node.meta['info'].activation_checkpoint[ckpt_level] is not None
+    if len(node.meta["info"].activation_checkpoint) > ckpt_level:
+        return node.meta["info"].activation_checkpoint[ckpt_level] is not None
    return True


@@ -94,8 +94,8 @@ def _find_nested_ckpt_regions(node_list: List[Node], ckpt_level: int = 0):
    current_region = None

    for idx, node in enumerate(node_list):
-        if len(node.meta['info'].activation_checkpoint) > ckpt_level:
-            act_ckpt_label = node.meta['info'].activation_checkpoint[ckpt_level]
+        if len(node.meta["info"].activation_checkpoint) > ckpt_level:
+            act_ckpt_label = node.meta["info"].activation_checkpoint[ckpt_level]

            # this activation checkpoint label is not set yet
            # meaning this is the first node of the activation ckpt region
@@ -131,13 +131,9 @@ def _find_nested_ckpt_regions(node_list: List[Node], ckpt_level: int = 0):
    return ckpt_regions


-def emit_ckpt_func(body,
-                   ckpt_func,
-                   node_list: List[Node],
-                   emit_node_func,
-                   delete_unused_value_func,
-                   ckpt_level=0,
-                   in_ckpt=False):
+def emit_ckpt_func(
+    body, ckpt_func, node_list: List[Node], emit_node_func, delete_unused_value_func, ckpt_level=0, in_ckpt=False
+):
    """Emit ckpt function in nested way

    Args:
@@ -156,12 +152,12 @@ def emit_ckpt_func(body,

    # label given by each layer, e.g. if you are currently at level (0, 1, 1)
    # the label will be '0_1_1'
-    label = "_".join([str(idx) for idx in node_list[0].meta['info'].activation_checkpoint[:ckpt_level + 1]])
+    label = "_".join([str(idx) for idx in node_list[0].meta["info"].activation_checkpoint[: ckpt_level + 1]])
    ckpt_fn_def = _gen_ckpt_fn_def(label, inputs)
-    ckpt_func.append(f'{ckpt_fn_def}\n')
+    ckpt_func.append(f"{ckpt_fn_def}\n")

    # if there is more level to fetch
-    if ckpt_level + 1 < max(map(lambda node: len(node.meta['info'].activation_checkpoint), node_list)):
+    if ckpt_level + 1 < max(map(lambda node: len(node.meta["info"].activation_checkpoint), node_list)):
        ckpt_regions = _find_nested_ckpt_regions(node_list, ckpt_level + 1)
        start_idx = [item[0] for item in ckpt_regions]
        end_idx = [item[1] for item in ckpt_regions]
@@ -174,33 +170,40 @@ def emit_ckpt_func(body,
                break

            if node_idx in start_idx:
-                ckpt_node_list = node_list[node_idx:end_idx[start_idx.index(node_idx)] + 1]
-                emit_ckpt_func(ckpt_func, ckpt_func_buffer, ckpt_node_list, emit_node_func, delete_unused_value_func,
-                               ckpt_level + 1, True)
+                ckpt_node_list = node_list[node_idx : end_idx[start_idx.index(node_idx)] + 1]
+                emit_ckpt_func(
+                    ckpt_func,
+                    ckpt_func_buffer,
+                    ckpt_node_list,
+                    emit_node_func,
+                    delete_unused_value_func,
+                    ckpt_level + 1,
+                    True,
+                )
                node_idx += len(ckpt_node_list)

            else:
                node = node_list[node_idx]
                emit_node_func(node, ckpt_func)
-                ckpt_func[-1] = '    ' + ckpt_func[-1]
+                ckpt_func[-1] = "    " + ckpt_func[-1]
                delete_unused_value_func(node, ckpt_func)
                node_idx += 1

-        ckpt_func.append('    ' + _gen_ckpt_output(outputs) + '\n\n')
+        ckpt_func.append("    " + _gen_ckpt_output(outputs) + "\n\n")
        ckpt_func += ckpt_func_buffer

    # last level
    else:
        for node in node_list:
            emit_node_func(node, ckpt_func)
-            ckpt_func[-1] = '    ' + ckpt_func[-1]
+            ckpt_func[-1] = "    " + ckpt_func[-1]
            delete_unused_value_func(node, ckpt_func)

-        ckpt_func.append('    ' + _gen_ckpt_output(outputs) + '\n\n')
+        ckpt_func.append("    " + _gen_ckpt_output(outputs) + "\n\n")

-    usage = _gen_ckpt_usage(label, inputs, outputs, False) + '\n'
+    usage = _gen_ckpt_usage(label, inputs, outputs, False) + "\n"
    if in_ckpt:
-        usage = '    ' + usage
+        usage = "    " + usage
    body.append(usage)


@@ -229,7 +232,7 @@ def emit_code_with_activation_checkpoint(body, ckpt_func, nodes, emit_node_func,

        # process ckpt_regions
        if node_idx in start_idx:
-            ckpt_node_list = node_list[node_idx:end_idx[start_idx.index(node_idx)] + 1]
+            ckpt_node_list = node_list[node_idx : end_idx[start_idx.index(node_idx)] + 1]
            emit_ckpt_func(body, ckpt_func, ckpt_node_list, emit_node_func, delete_unused_value_func)
            node_idx += len(ckpt_node_list)

@@ -243,7 +246,6 @@ def emit_code_with_activation_checkpoint(body, ckpt_func, nodes, emit_node_func,

@compatibility(is_backward_compatible=True)
 class ActivationCheckpointCodeGen(CodeGen):
-
    def _gen_python_code(self, nodes, root_module: str, namespace: _Namespace) -> PythonCode:
        free_vars: List[str] = []
        body: List[str] = []
@@ -251,7 +253,7 @@ class ActivationCheckpointCodeGen(CodeGen):
        wrapped_fns: Dict[str, None] = {}

        # Wrap string in list to pass by reference
-        maybe_return_annotation: List[str] = ['']
+        maybe_return_annotation: List[str] = [""]

        def add_global(name_hint: str, obj: Any):
            """Add an obj to be tracked as a global.
@@ -259,7 +261,7 @@ class ActivationCheckpointCodeGen(CodeGen):
            Graph, like functions or types.
            Returns: the global name that should be used to reference 'obj' in generated source.
            """
-            if _is_from_torch(obj) and obj != torch.device:    # to support registering torch.device
+            if _is_from_torch(obj) and obj != torch.device:  # to support registering torch.device
                # HACK: workaround for how torch custom ops are registered. We
                # can't import them like normal modules so they must retain their
                # fully qualified name.
@@ -281,16 +283,16 @@ class ActivationCheckpointCodeGen(CodeGen):
        def type_repr(o: Any):
            if o == ():
                # Empty tuple is used for empty tuple type annotation Tuple[()]
-                return '()'
+                return "()"

            typename = _type_repr(o)

-            if hasattr(o, '__origin__'):
+            if hasattr(o, "__origin__"):
                # This is a generic type, e.g. typing.List[torch.Tensor]
                origin_type = _origin_type_map.get(o.__origin__, o.__origin__)
                origin_typename = add_global(_type_repr(origin_type), origin_type)

-                if hasattr(o, '__args__'):
+                if hasattr(o, "__args__"):
                    # Assign global names for each of the inner type variables.
                    args = [type_repr(arg) for arg in o.__args__]

@@ -309,19 +311,18 @@ class ActivationCheckpointCodeGen(CodeGen):
            return add_global(typename, o)

        def _format_args(args: Tuple[Argument, ...], kwargs: Dict[str, Argument]) -> str:
-
            def _get_repr(arg):
                # Handle NamedTuples (if it has `_fields`) via add_global.
-                if isinstance(arg, tuple) and hasattr(arg, '_fields'):
+                if isinstance(arg, tuple) and hasattr(arg, "_fields"):
                    qualified_name = _get_qualified_name(type(arg))
                    global_name = add_global(qualified_name, type(arg))
                    return f"{global_name}{repr(tuple(arg))}"
                return repr(arg)

-            args_s = ', '.join(_get_repr(a) for a in args)
-            kwargs_s = ', '.join(f'{k} = {_get_repr(v)}' for k, v in kwargs.items())
+            args_s = ", ".join(_get_repr(a) for a in args)
+            kwargs_s = ", ".join(f"{k} = {_get_repr(v)}" for k, v in kwargs.items())
            if args_s and kwargs_s:
-                return f'{args_s}, {kwargs_s}'
+                return f"{args_s}, {kwargs_s}"
            return args_s or kwargs_s

        # Run through reverse nodes and record the first instance of a use
@@ -347,82 +348,94 @@ class ActivationCheckpointCodeGen(CodeGen):
            not used in the remainder of the code are freed and the memory usage
            of the code is optimal.
            """
-            if user.op == 'placeholder':
+            if user.op == "placeholder":
                return
-            if user.op == 'output':
-                body.append('\n')
+            if user.op == "output":
+                body.append("\n")
                return
            nodes_to_delete = user_to_last_uses.get(user, [])
            if len(nodes_to_delete):
-                to_delete_str = ' = '.join([repr(n) for n in nodes_to_delete] + ['None'])
-                body.append(f';  {to_delete_str}\n')
+                to_delete_str = " = ".join([repr(n) for n in nodes_to_delete] + ["None"])
+                body.append(f";  {to_delete_str}\n")
            else:
-                body.append('\n')
+                body.append("\n")

        # NOTE: we add a variable to distinguish body and ckpt_func
        def emit_node(node: Node, body):
-            maybe_type_annotation = '' if node.type is None else f' : {type_repr(node.type)}'
-            if node.op == 'placeholder':
+            maybe_type_annotation = "" if node.type is None else f" : {type_repr(node.type)}"
+            if node.op == "placeholder":
                assert isinstance(node.target, str)
-                maybe_default_arg = '' if not node.args else f' = {repr(node.args[0])}'
-                free_vars.append(f'{node.target}{maybe_type_annotation}{maybe_default_arg}')
-                raw_name = node.target.replace('*', '')
+                maybe_default_arg = "" if not node.args else f" = {repr(node.args[0])}"
+                free_vars.append(f"{node.target}{maybe_type_annotation}{maybe_default_arg}")
+                raw_name = node.target.replace("*", "")
                if raw_name != repr(node):
-                    body.append(f'{repr(node)} = {raw_name}\n')
+                    body.append(f"{repr(node)} = {raw_name}\n")
                return
-            elif node.op == 'call_method':
+            elif node.op == "call_method":
                assert isinstance(node.target, str)
-                body.append(f'{repr(node)}{maybe_type_annotation} = {_format_target(repr(node.args[0]), node.target)}'
-                            f'({_format_args(node.args[1:], node.kwargs)})')
+                body.append(
+                    f"{repr(node)}{maybe_type_annotation} = {_format_target(repr(node.args[0]), node.target)}"
+                    f"({_format_args(node.args[1:], node.kwargs)})"
+                )
                return
-            elif node.op == 'call_function':
+            elif node.op == "call_function":
                assert callable(node.target)
                # pretty print operators
-                if node.target.__module__ == '_operator' and node.target.__name__ in magic_methods:
+                if node.target.__module__ == "_operator" and node.target.__name__ in magic_methods:
                    assert isinstance(node.args, tuple)
-                    body.append(f'{repr(node)}{maybe_type_annotation} = '
-                                f'{magic_methods[node.target.__name__].format(*(repr(a) for a in node.args))}')
+                    body.append(
+                        f"{repr(node)}{maybe_type_annotation} = "
+                        f"{magic_methods[node.target.__name__].format(*(repr(a) for a in node.args))}"
+                    )
                    return

                # pretty print inplace operators; required for jit.script to work properly
                # not currently supported in normal FX graphs, but generated by torchdynamo
-                if node.target.__module__ == '_operator' and node.target.__name__ in inplace_methods:
-                    body.append(f'{inplace_methods[node.target.__name__].format(*(repr(a) for a in node.args))};  '
-                                f'{repr(node)}{maybe_type_annotation} = {repr(node.args[0])}')
+                if node.target.__module__ == "_operator" and node.target.__name__ in inplace_methods:
+                    body.append(
+                        f"{inplace_methods[node.target.__name__].format(*(repr(a) for a in node.args))};  "
+                        f"{repr(node)}{maybe_type_annotation} = {repr(node.args[0])}"
+                    )
                    return

                qualified_name = _get_qualified_name(node.target)
                global_name = add_global(qualified_name, node.target)
                # special case for getattr: node.args could be 2-argument or 3-argument
                # 2-argument: attribute access; 3-argument: fall through to attrib function call with default value
-                if global_name == 'getattr' and \
-                isinstance(node.args, tuple) and \
-                isinstance(node.args[1], str) and \
-                node.args[1].isidentifier() and \
-                len(node.args) == 2:
+                if (
+                    global_name == "getattr"
+                    and isinstance(node.args, tuple)
+                    and isinstance(node.args[1], str)
+                    and node.args[1].isidentifier()
+                    and len(node.args) == 2
+                ):
                    body.append(
-                        f'{repr(node)}{maybe_type_annotation} = {_format_target(repr(node.args[0]), node.args[1])}')
+                        f"{repr(node)}{maybe_type_annotation} = {_format_target(repr(node.args[0]), node.args[1])}"
+                    )
                    return
                body.append(
-                    f'{repr(node)}{maybe_type_annotation} = {global_name}({_format_args(node.args, node.kwargs)})')
-                if node.meta.get('is_wrapped', False):
+                    f"{repr(node)}{maybe_type_annotation} = {global_name}({_format_args(node.args, node.kwargs)})"
+                )
+                if node.meta.get("is_wrapped", False):
                    wrapped_fns.setdefault(global_name)
                return
-            elif node.op == 'call_module':
+            elif node.op == "call_module":
                assert isinstance(node.target, str)
-                body.append(f'{repr(node)}{maybe_type_annotation} = '
-                            f'{_format_target(root_module, node.target)}({_format_args(node.args, node.kwargs)})')
+                body.append(
+                    f"{repr(node)}{maybe_type_annotation} = "
+                    f"{_format_target(root_module, node.target)}({_format_args(node.args, node.kwargs)})"
+                )
                return
-            elif node.op == 'get_attr':
+            elif node.op == "get_attr":
                assert isinstance(node.target, str)
-                body.append(f'{repr(node)}{maybe_type_annotation} = {_format_target(root_module, node.target)}')
+                body.append(f"{repr(node)}{maybe_type_annotation} = {_format_target(root_module, node.target)}")
                return
-            elif node.op == 'output':
+            elif node.op == "output":
                if node.type is not None:
                    maybe_return_annotation[0] = f" -> {type_repr(node.type)}"
                body.append(self.generate_output(node.args[0]))
                return
-            raise NotImplementedError(f'node: {node.op} {node.target}')
+            raise NotImplementedError(f"node: {node.op} {node.target}")

        # Modified for activation checkpointing
        ckpt_func = []
@@ -432,13 +445,13 @@ class ActivationCheckpointCodeGen(CodeGen):
            # If the Graph has no non-placeholder nodes, no lines for the body
            # have been emitted. To continue to have valid Python code, emit a
            # single pass statement
-            body.append('pass\n')
+            body.append("pass\n")

        if len(wrapped_fns) > 0:
-            wrap_name = add_global('wrap', torch.fx.wrap)
-            wrap_stmts = '\n'.join([f'{wrap_name}("{name}")' for name in wrapped_fns])
+            wrap_name = add_global("wrap", torch.fx.wrap)
+            wrap_stmts = "\n".join([f'{wrap_name}("{name}")' for name in wrapped_fns])
        else:
-            wrap_stmts = ''
+            wrap_stmts = ""

        if self._body_transformer:
            body = self._body_transformer(body)
@@ -447,11 +460,11 @@ class ActivationCheckpointCodeGen(CodeGen):
            add_global(name, value)

        prologue = self.gen_fn_def(free_vars, maybe_return_annotation[0])
-        prologue = ''.join(ckpt_func) + prologue
+        prologue = "".join(ckpt_func) + prologue
        prologue = prologue

-        code = ''.join(body)
-        code = '\n'.join('    ' + line for line in code.split('\n'))
+        code = "".join(body)
+        code = "\n".join("    " + line for line in code.split("\n"))
        fn_code = f"""
 {wrap_stmts}
 {prologue}