mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-03 18:19:58 +00:00
[misc] update pre-commit and run all files (#4752)
* [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format
This commit is contained in:
@@ -25,12 +25,14 @@ class Partition:
|
||||
self.targets: Dict[str, Any] = {}
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"name: {self.name},\n" \
|
||||
f" nodes: {self.node_names},\n" \
|
||||
f" inputs: {self.inputs},\n" \
|
||||
f" outputs: {self.outputs},\n" \
|
||||
f" partitions dependent on: {self.partitions_dependent_on},\n" \
|
||||
return (
|
||||
f"name: {self.name},\n"
|
||||
f" nodes: {self.node_names},\n"
|
||||
f" inputs: {self.inputs},\n"
|
||||
f" outputs: {self.outputs},\n"
|
||||
f" partitions dependent on: {self.partitions_dependent_on},\n"
|
||||
f" partition dependents: {self.partition_dependents}"
|
||||
)
|
||||
|
||||
|
||||
# Creates subgraphs out of main graph
|
||||
@@ -117,10 +119,9 @@ def split_module(
|
||||
partitions: Dict[str, Partition] = {}
|
||||
orig_nodes: Dict[str, torch.fx.node.Node] = {}
|
||||
|
||||
def record_cross_partition_use(def_node: torch.fx.node.Node,
|
||||
use_node: Optional[torch.fx.node.Node]): # noqa: B950
|
||||
def_partition_name = getattr(def_node, '_fx_partition', None)
|
||||
use_partition_name = getattr(use_node, '_fx_partition', None)
|
||||
def record_cross_partition_use(def_node: torch.fx.node.Node, use_node: Optional[torch.fx.node.Node]): # noqa: B950
|
||||
def_partition_name = getattr(def_node, "_fx_partition", None)
|
||||
use_partition_name = getattr(use_node, "_fx_partition", None)
|
||||
if def_partition_name != use_partition_name:
|
||||
if def_partition_name is not None:
|
||||
def_partition = partitions[def_partition_name]
|
||||
@@ -134,7 +135,7 @@ def split_module(
|
||||
if def_partition_name is not None:
|
||||
use_partition.partitions_dependent_on.setdefault(def_partition_name)
|
||||
|
||||
def record_output(def_node: torch.fx.node.Node, use_node: Optional[torch.fx.node.Node]): # noqa: B950
|
||||
def record_output(def_node: torch.fx.node.Node, use_node: Optional[torch.fx.node.Node]): # noqa: B950
|
||||
def_partition_name = getattr(def_node, "_fx_partition", None)
|
||||
use_partition_name = getattr(use_node, "_fx_partition", None)
|
||||
if def_partition_name != use_partition_name:
|
||||
@@ -161,7 +162,7 @@ def split_module(
|
||||
|
||||
if node.op in ["placeholder"]:
|
||||
continue
|
||||
if node.op == 'output':
|
||||
if node.op == "output":
|
||||
if merge_output:
|
||||
torch.fx.graph.map_arg(node.args[0], lambda n: record_output(n, node.prev))
|
||||
else:
|
||||
@@ -178,7 +179,7 @@ def split_module(
|
||||
node._fx_partition = partition_name
|
||||
|
||||
torch.fx.graph.map_arg(node.args, lambda def_node: record_cross_partition_use(def_node, node))
|
||||
torch.fx.graph.map_arg(node.kwargs, lambda def_node: record_cross_partition_use(def_node, node)) # noqa: B950
|
||||
torch.fx.graph.map_arg(node.kwargs, lambda def_node: record_cross_partition_use(def_node, node)) # noqa: B950
|
||||
|
||||
# find partitions with no dependencies
|
||||
root_partitions: List[str] = []
|
||||
@@ -208,7 +209,7 @@ def split_module(
|
||||
|
||||
# Transform nodes and collect targets for partition's submodule
|
||||
for node in m.graph.nodes:
|
||||
if hasattr(node, '_fx_partition'):
|
||||
if hasattr(node, "_fx_partition"):
|
||||
partition = partitions[node._fx_partition]
|
||||
|
||||
# swap out old graph nodes in kw/args with references to new nodes in this submodule
|
||||
@@ -216,25 +217,24 @@ def split_module(
|
||||
gathered_args = torch.fx.graph.map_arg(node.args, lambda n: environment[n])
|
||||
gathered_kwargs = torch.fx.graph.map_arg(node.kwargs, lambda n: environment[n])
|
||||
|
||||
if node.op not in ['call_module', 'get_attr']:
|
||||
if node.op not in ["call_module", "get_attr"]:
|
||||
target = node.target
|
||||
else:
|
||||
target_atoms = node.target.split('.')
|
||||
target_atoms = node.target.split(".")
|
||||
target_attr = m
|
||||
for atom in target_atoms:
|
||||
if not hasattr(target_attr, atom):
|
||||
raise RuntimeError(f'Operator target {node.target} not found!')
|
||||
raise RuntimeError(f"Operator target {node.target} not found!")
|
||||
target_attr = getattr(target_attr, atom)
|
||||
# target = target_atoms[-1]
|
||||
target = '_'.join(target_atoms)
|
||||
target = "_".join(target_atoms)
|
||||
partition.targets[target] = target_attr
|
||||
|
||||
assert isinstance(gathered_args, tuple)
|
||||
assert isinstance(gathered_kwargs, dict)
|
||||
new_node = partition.graph.create_node(op=node.op,
|
||||
target=target,
|
||||
args=gathered_args,
|
||||
kwargs=gathered_kwargs)
|
||||
new_node = partition.graph.create_node(
|
||||
op=node.op, target=target, args=gathered_args, kwargs=gathered_kwargs
|
||||
)
|
||||
new_node.meta = node.meta.copy()
|
||||
partition.environment[node] = new_node
|
||||
|
||||
@@ -243,14 +243,14 @@ def split_module(
|
||||
base_mod_graph: torch.fx.graph.Graph = torch.fx.graph.Graph()
|
||||
base_mod_attrs: Dict[str, torch.fx.graph_module.GraphModule] = {}
|
||||
for node in m.graph.nodes:
|
||||
if node.op == 'placeholder':
|
||||
if version.parse(torch.__version__) < version.parse('1.11.0'):
|
||||
if node.op == "placeholder":
|
||||
if version.parse(torch.__version__) < version.parse("1.11.0"):
|
||||
base_mod_env[node.name] = base_mod_graph.placeholder(node.target, type_expr=node.type)
|
||||
else:
|
||||
default_value = node.args[0] if len(node.args) > 0 else inspect.Signature.empty
|
||||
base_mod_env[node.name] = base_mod_graph.placeholder(node.target,
|
||||
type_expr=node.type,
|
||||
default_value=default_value)
|
||||
base_mod_env[node.name] = base_mod_graph.placeholder(
|
||||
node.target, type_expr=node.type, default_value=default_value
|
||||
)
|
||||
base_mod_env[node.name].meta = node.meta.copy()
|
||||
|
||||
# Do some things iterating over the partitions in topological order again:
|
||||
@@ -264,13 +264,14 @@ def split_module(
|
||||
|
||||
# Set correct output values
|
||||
output_vals = tuple(partition.environment[orig_nodes[name]] for name in partition.outputs)
|
||||
output_vals = output_vals[0] if len(output_vals) == 1 else output_vals # type: ignore[assignment]
|
||||
output_vals = output_vals[0] if len(output_vals) == 1 else output_vals # type: ignore[assignment]
|
||||
partition.graph.output(output_vals)
|
||||
|
||||
# Construct GraphModule for this partition
|
||||
submod_name = f'submod_{partition_name}'
|
||||
base_mod_attrs[submod_name] = torch.fx.graph_module.GraphModule(partition.targets,
|
||||
partition.graph) # noqa: B950
|
||||
submod_name = f"submod_{partition_name}"
|
||||
base_mod_attrs[submod_name] = torch.fx.graph_module.GraphModule(
|
||||
partition.targets, partition.graph
|
||||
) # noqa: B950
|
||||
|
||||
# Emit call in base graph to this submodule
|
||||
output_val = base_mod_graph.call_module(submod_name, tuple(base_mod_env[name] for name in partition.inputs))
|
||||
@@ -278,15 +279,15 @@ def split_module(
|
||||
# Unpack multiple return values from submodule
|
||||
output_val_proxy = torch.fx.proxy.Proxy(output_val)
|
||||
for i, output_name in enumerate(partition.outputs):
|
||||
base_mod_env[output_name] = output_val_proxy[i].node # type: ignore[index]
|
||||
base_mod_env[output_name] = output_val_proxy[i].node # type: ignore[index]
|
||||
else:
|
||||
if not partition.outputs:
|
||||
continue
|
||||
base_mod_env[list(partition.outputs)[0]] = output_val
|
||||
|
||||
for node in m.graph.nodes:
|
||||
if node.op == 'output':
|
||||
base_mod_graph.output(torch.fx.graph.map_arg(node.args[0], lambda n: base_mod_env[n.name])) # noqa: B950
|
||||
if node.op == "output":
|
||||
base_mod_graph.output(torch.fx.graph.map_arg(node.args[0], lambda n: base_mod_env[n.name])) # noqa: B950
|
||||
|
||||
for partition_name in sorted_partitions:
|
||||
partition = partitions[partition_name]
|
||||
|
Reference in New Issue
Block a user