[graph] improve the graph building. (#1157)

This commit is contained in:
Jiarui Fang
2022-06-22 16:47:20 +08:00
committed by GitHub
parent 22717a856f
commit 07f9c781f9
7 changed files with 79 additions and 103 deletions

View File

@@ -17,6 +17,13 @@ class _DistSpec:
dist_placement_pattern: DistPlacementPattern,
process_group: Optional[ProcessGroup] = None,
**meta_info):
"""_DistSpec, Distributed Specification
Args:
dist_placement_pattern (DistPlacementPattern): the pattern describing how tensors are distributed among processes.
The dist_placement_pattern is picked from a limited set, now including two patterns: replicate and shard.
process_group (Optional[ProcessGroup], optional): the process group contains processes. Defaults to None.
"""
self.placement = dist_placement_pattern
self.process_group = process_group
for k, v in meta_info.items():
@@ -37,6 +44,7 @@ class _DistSpec:
res += f'{attr}: {str(getattr(self, attr))}\n\t'
return res
def replicate(process_group: Optional[ProcessGroup] = None) -> _DistSpec:
# process_group=None means global process group
return _DistSpec(DistPlacementPattern.REPLICATE, process_group)

View File

@@ -1,3 +0,0 @@
from .graph_node import GraphNode, GraphOpNode, GraphContext, GraphGlobalEnv
__all__ = ['GraphNode', 'GraphOpNode', 'GraphContext', 'GraphGlobalEnv']

View File

@@ -1,97 +0,0 @@
from colossalai.tensor import ColoTensor
from colossalai.context.singleton_meta import SingletonMeta
class GraphGlobalEnv(metaclass=SingletonMeta):
def __init__(self) -> None:
self.graph_building = False
self.graph_node_list = []
self.node_id = -1
def get_node_id(self):
self.node_id += 1
return self.node_id
def add_graph_node(self, node):
self.graph_node_list.append(node)
class GraphContext():
"""
Building the computing graph under the context
>>> with GraphContext():
>>> output = model(colo_input_tensor)
"""
graph_nodes = []
def __enter__(self):
GraphGlobalEnv().graph_building = True
GraphGlobalEnv().graph_node_list = []
def __exit__(self, *exc_info):
GraphGlobalEnv().graph_building = False
GraphGlobalEnv().node_id = -1
self.graph_nodes = GraphGlobalEnv().graph_node_list
class GraphNode(object):
def __init__(self) -> None:
self.prev_nodes = []
self.post_nodes = []
self.id = GraphGlobalEnv().get_node_id()
def add_prev_node(self, node):
if GraphGlobalEnv().graph_building:
self.prev_nodes.append(node)
def add_post_node(self, node):
if GraphGlobalEnv().graph_building:
self.post_nodes.append(node)
def post_node_empty(self) -> bool:
return len(self.post_nodes) == 0
class GraphOpNode(GraphNode):
def __init__(self, op_type, param_list) -> None:
super().__init__()
self._op_type = op_type
self._param_list = param_list
GraphGlobalEnv().add_graph_node(self)
def add_prev_tensor(self, colo_tensor: ColoTensor):
r"""
Link the current graph op node to previous graph op.
Op1 <- Activation (colo_tensor) Op2
Op1 <- Op2
"""
if GraphGlobalEnv().graph_building:
assert isinstance(colo_tensor, ColoTensor)
if colo_tensor._graph_node is None:
colo_tensor._graph_node = GraphNode()
prev_ops = colo_tensor._graph_node.prev_nodes
for op_node in prev_ops:
self.add_prev_node(op_node)
op_node.add_post_node(self)
def add_post_tensor(self, colo_tensor: ColoTensor):
"""
Op <- Activation (colo_tensor)
"""
if GraphGlobalEnv().graph_building:
assert isinstance(colo_tensor, ColoTensor)
if colo_tensor._graph_node is None:
colo_tensor._graph_node = GraphNode()
colo_tensor._graph_node.add_prev_node(self)
def print(self):
print(
f'GraphOpNode {self._op_type} {self.id}, post nodes {[node.id for node in self.post_nodes]}, prev node number {[node.id for node in self.prev_nodes]}'
)