[doc] polish shardformer doc (#4779)

* fix example format in docstring

* polish shardformer doc
This commit is contained in:
Baizhou Zhang
2023-09-26 10:57:47 +08:00
committed by GitHub
parent 26cd6d850c
commit a2db75546d
8 changed files with 215 additions and 68 deletions

View File

@@ -229,16 +229,17 @@ class GeminiPlugin(DPPluginBase):
"""
Plugin for Gemini.
Example:
>>> from colossalai.booster import Booster
>>> from colossalai.booster.plugin import GeminiPlugin
>>>
>>> model, train_dataset, optimizer, criterion = ...
>>> plugin = GeminiPlugin()
```python
from colossalai.booster import Booster
from colossalai.booster.plugin import GeminiPlugin
>>> train_dataloader = plugin.prepare_dataloader(train_dataset, batch_size=8)
>>> booster = Booster(plugin=plugin)
>>> model, optimizer, train_dataloader, criterion = booster.boost(model, optimizer, train_dataloader, criterion)
model, train_dataset, optimizer, criterion = ...
plugin = GeminiPlugin()
train_dataloader = plugin.prepare_dataloader(train_dataset, batch_size=8)
booster = Booster(plugin=plugin)
model, optimizer, train_dataloader, criterion = booster.boost(model, optimizer, train_dataloader, criterion)
```
Args:
chunk_config_dict (dict, optional): chunk configuration dictionary.

View File

@@ -266,16 +266,17 @@ class HybridParallelPlugin(PipelinePluginBase):
Tensor parallel, pipeline parallel and data parallel(DDP/ZeRO) can be picked and combined in this plugin.
The size of tp and pp should be passed in by user, then the size of dp is automatically calculated from dp_size = world_size / (tp_size * pp_size).
Example:
>>> from colossalai.booster import Booster
>>> from colossalai.booster.plugin import HybridParallelPlugin
```python
from colossalai.booster import Booster
from colossalai.booster.plugin import HybridParallelPlugin
>>> model, train_dataset, optimizer, criterion = ...
>>> plugin = HybridParallelPlugin(tp_size=2, pp_size=2)
model, train_dataset, optimizer, criterion = ...
plugin = HybridParallelPlugin(tp_size=2, pp_size=2)
>>> train_dataloader = plugin.prepare_dataloader(train_dataset, batch_size=8)
>>> booster = Booster(plugin=plugin)
>>> model, optimizer, criterion, train_dataloader, _ = booster.boost(model, optimizer, criterion, train_dataloader)
train_dataloader = plugin.prepare_dataloader(train_dataset, batch_size=8)
booster = Booster(plugin=plugin)
model, optimizer, criterion, train_dataloader, _ = booster.boost(model, optimizer, criterion, train_dataloader)
```
Args:
tp_size (int): The size of tensor parallelism. Tensor parallelism will not be used when tp_size is set to 1.

View File

@@ -213,16 +213,17 @@ class LowLevelZeroPlugin(DPPluginBase):
"""
Plugin for low level zero.
Example:
>>> from colossalai.booster import Booster
>>> from colossalai.booster.plugin import LowLevelZeroPlugin
>>>
>>> model, train_dataset, optimizer, criterion = ...
>>> plugin = LowLevelZeroPlugin()
```python
from colossalai.booster import Booster
from colossalai.booster.plugin import LowLevelZeroPlugin
>>> train_dataloader = plugin.prepare_dataloader(train_dataset, batch_size=8)
>>> booster = Booster(plugin=plugin)
>>> model, optimizer, train_dataloader, criterion = booster.boost(model, optimizer, train_dataloader, criterion)
model, train_dataset, optimizer, criterion = ...
plugin = LowLevelZeroPlugin()
train_dataloader = plugin.prepare_dataloader(train_dataset, batch_size=8)
booster = Booster(plugin=plugin)
model, optimizer, train_dataloader, criterion = booster.boost(model, optimizer, train_dataloader, criterion)
```
Args:
strage (int, optional): ZeRO stage. Defaults to 1.

View File

@@ -130,16 +130,17 @@ class TorchDDPPlugin(DPPluginBase):
"""
Plugin for PyTorch DDP.
Example:
>>> from colossalai.booster import Booster
>>> from colossalai.booster.plugin import TorchDDPPlugin
>>>
>>> model, train_dataset, optimizer, criterion = ...
>>> plugin = TorchDDPPlugin()
```python
from colossalai.booster import Booster
from colossalai.booster.plugin import TorchDDPPlugin
>>> train_dataloader = plugin.prepare_dataloader(train_dataset, batch_size=8)
>>> booster = Booster(plugin=plugin)
>>> model, optimizer, train_dataloader, criterion = booster.boost(model, optimizer, train_dataloader, criterion)
model, train_dataset, optimizer, criterion = ...
plugin = TorchDDPPlugin()
train_dataloader = plugin.prepare_dataloader(train_dataset, batch_size=8)
booster = Booster(plugin=plugin)
model, optimizer, train_dataloader, criterion = booster.boost(model, optimizer, train_dataloader, criterion)
```
Args:
broadcast_buffers (bool, optional): Whether to broadcast buffers in the beginning of training. Defaults to True.

View File

@@ -143,16 +143,17 @@ class TorchFSDPPlugin(DPPluginBase):
"""
Plugin for PyTorch FSDP.
Example:
>>> from colossalai.booster import Booster
>>> from colossalai.booster.plugin import TorchFSDPPlugin
>>>
>>> model, train_dataset, optimizer, criterion = ...
>>> plugin = TorchFSDPPlugin()
```python
from colossalai.booster import Booster
from colossalai.booster.plugin import TorchFSDPPlugin
>>> train_dataloader = plugin.prepare_train_dataloader(train_dataset, batch_size=8)
>>> booster = Booster(plugin=plugin)
>>> model, optimizer, train_dataloader, criterion = booster.boost(model, optimizer, train_dataloader, criterion)
model, train_dataset, optimizer, criterion = ...
plugin = TorchFSDPPlugin()
train_dataloader = plugin.prepare_train_dataloader(train_dataset, batch_size=8)
booster = Booster(plugin=plugin)
model, optimizer, train_dataloader, criterion = booster.boost(model, optimizer, train_dataloader, criterion)
```
Args:
See https://pytorch.org/docs/stable/fsdp.html for details.

View File

@@ -20,14 +20,16 @@ class DistCoordinator(metaclass=SingletonMeta):
- master: the process with rank 0
- node master: the process with local rank 0 on the current node
Example:
>>> from colossalai.cluster.dist_coordinator import DistCoordinator
>>> coordinator = DistCoordinator()
>>>
>>> if coordinator.is_master():
>>> do_something()
>>>
>>> coordinator.print_on_master('hello world')
```python
from colossalai.cluster.dist_coordinator import DistCoordinator
coordinator = DistCoordinator()
if coordinator.is_master():
do_something()
coordinator.print_on_master('hello world')
```
Attributes:
rank (int): the rank of the current process
@@ -131,11 +133,13 @@ class DistCoordinator(metaclass=SingletonMeta):
other processes in the same process group. This is often useful when downloading is required
as we only want to download in one process to prevent file corruption.
Example:
>>> from colossalai.cluster import DistCoordinator
>>> dist_coordinator = DistCoordinator()
>>> with dist_coordinator.priority_execution():
>>> dataset = CIFAR10(root='./data', download=True)
```python
from colossalai.cluster import DistCoordinator
dist_coordinator = DistCoordinator()
with dist_coordinator.priority_execution():
dataset = CIFAR10(root='./data', download=True)
```
Args:
executor_rank (int): the process rank to execute without blocking, all other processes will be blocked
@@ -174,13 +178,14 @@ class DistCoordinator(metaclass=SingletonMeta):
"""
A function wrapper that only executes the wrapped function on the master process (rank 0).
Example:
>>> from colossalai.cluster import DistCoordinator
>>> dist_coordinator = DistCoordinator()
>>>
>>> @dist_coordinator.on_master_only()
>>> def print_on_master(msg):
>>> print(msg)
```python
from colossalai.cluster import DistCoordinator
dist_coordinator = DistCoordinator()
@dist_coordinator.on_master_only()
def print_on_master(msg):
print(msg)
```
"""
is_master = self.is_master(process_group)