diff --git a/docs/sidebars.json b/docs/sidebars.json index 2732704a5..dd3a4e5ec 100644 --- a/docs/sidebars.json +++ b/docs/sidebars.json @@ -58,7 +58,8 @@ ] }, "features/pipeline_parallel", - "features/nvme_offload" + "features/nvme_offload", + "features/cluster_utils" ] }, { diff --git a/docs/source/en/features/cluster_utils.md b/docs/source/en/features/cluster_utils.md new file mode 100644 index 000000000..1903d64d2 --- /dev/null +++ b/docs/source/en/features/cluster_utils.md @@ -0,0 +1,32 @@ +# Cluster Utilities + +Author: [Hongxin Liu](https://github.com/ver217) + +**Prerequisite:** +- [Distributed Training](../concepts/distributed_training.md) + +## Introduction + +We provide a utility class `colossalai.cluster.DistCoordinator` to coordinate distributed training. It's useful to get various information about the cluster, such as the number of nodes, the number of processes per node, etc. + +## API Reference + +{{ autodoc:colossalai.cluster.DistCoordinator }} + +{{ autodoc:colossalai.cluster.DistCoordinator.is_master }} + +{{ autodoc:colossalai.cluster.DistCoordinator.is_node_master }} + +{{ autodoc:colossalai.cluster.DistCoordinator.is_last_process }} + +{{ autodoc:colossalai.cluster.DistCoordinator.print_on_master }} + +{{ autodoc:colossalai.cluster.DistCoordinator.print_on_node_master }} + +{{ autodoc:colossalai.cluster.DistCoordinator.priority_execution }} + +{{ autodoc:colossalai.cluster.DistCoordinator.destroy }} + +{{ autodoc:colossalai.cluster.DistCoordinator.block_all }} + +{{ autodoc:colossalai.cluster.DistCoordinator.on_master_only }} diff --git a/docs/source/zh-Hans/features/cluster_utils.md b/docs/source/zh-Hans/features/cluster_utils.md new file mode 100644 index 000000000..ca787a869 --- /dev/null +++ b/docs/source/zh-Hans/features/cluster_utils.md @@ -0,0 +1,32 @@ +# 集群实用程序 + +作者: [Hongxin Liu](https://github.com/ver217) + +**前置教程:** +- [分布式训练](../concepts/distributed_training.md) + +## 引言 + +我们提供了一个实用程序类 `colossalai.cluster.DistCoordinator` 来协调分布式训练。它对于获取有关集群的各种信息很有用,例如节点数、每个节点的进程数等。 + +## API 参考 + +{{ autodoc:colossalai.cluster.DistCoordinator }} + +{{ autodoc:colossalai.cluster.DistCoordinator.is_master }} + +{{ autodoc:colossalai.cluster.DistCoordinator.is_node_master }} + +{{ autodoc:colossalai.cluster.DistCoordinator.is_last_process }} + +{{ autodoc:colossalai.cluster.DistCoordinator.print_on_master }} + +{{ autodoc:colossalai.cluster.DistCoordinator.print_on_node_master }} + +{{ autodoc:colossalai.cluster.DistCoordinator.priority_execution }} + +{{ autodoc:colossalai.cluster.DistCoordinator.destroy }} + +{{ autodoc:colossalai.cluster.DistCoordinator.block_all }} + +{{ autodoc:colossalai.cluster.DistCoordinator.on_master_only }}