diff --git a/docs/source/en/features/distributed_optimizers.md b/docs/source/en/features/distributed_optimizers.md index f95b23304..279bc8f9d 100644 --- a/docs/source/en/features/distributed_optimizers.md +++ b/docs/source/en/features/distributed_optimizers.md @@ -87,44 +87,42 @@ optim = DistGaloreAwamW( ## Plugin compatibility <table> <tr> - <th nowrap="nowrap">Model/Feature</th> - <th nowrap="nowrap" align="center" title="Lamb">Lamb</th> - <th nowrap="nowrap" align="center" title="GaLore">GaLore</th> - <th nowrap="nowrap" align="center" title="Adafactor">Adafactor</th> - <th nowrap="nowrap" align="center" title="CAME">CAME</th> + <th nowrap="nowrap">Optimizer/Plugin</th> + <th nowrap="nowrap" align="center">Hybrid Parallel Plugin</th> + <th nowrap="nowrap" align="center">Low Level Zero Plugin</th> + <th nowrap="nowrap" align="center">Torch DDP Plugin</th> + <th nowrap="nowrap" align="center">Gemini Plugin</th> + <th nowrap="nowrap" align="center">Moe Hybrid Plugin</th> </tr> <tr> - <td nowrap="nowrap">Hybrid Parallel<br />Plugin</td> + <td nowrap="nowrap" align="center" title="Lamb">Lamb</td> <td nowrap="nowrap" align="center">✔️</td> <td nowrap="nowrap" align="center">✔️</td> <td nowrap="nowrap" align="center">✔️</td> - <td nowrap="nowrap" align="center">✔️</td> - </tr> - <tr> - <td nowrap="nowrap">Low Level Zero<br />Plugin</td> - <td nowrap="nowrap" align="center">✔️</td> - <td nowrap="nowrap" align="center">❌</td> - <td nowrap="nowrap" align="center">✔️</td> - <td nowrap="nowrap" align="center">✔️</td> - </tr> - <tr> - <td nowrap="nowrap">Torch DDP<br />Plugin</td> - <td nowrap="nowrap" align="center">✔️</td> - <td nowrap="nowrap" align="center">✔️</td> - <td nowrap="nowrap" align="center">✔️</td> - <td nowrap="nowrap" align="center">✔️</td> - </tr> - <tr> - <td nowrap="nowrap">Gemini<br />Plugin</td> - <td nowrap="nowrap" align="center">❌</td> - <td nowrap="nowrap" align="center">❌</td> <td nowrap="nowrap" align="center">❌</td> <td nowrap="nowrap" align="center">❌</td> </tr> <tr> - <td nowrap="nowrap">Moe Hybrid<br />Plugin</td> + <td nowrap="nowrap" align="center" title="GaLore">GaLore</td> + <td nowrap="nowrap" align="center">✔️</td> + <td nowrap="nowrap" align="center">✔️</td> + <td nowrap="nowrap" align="center">✔️</td> <td nowrap="nowrap" align="center">❌</td> <td nowrap="nowrap" align="center">❌</td> + </tr> + <tr> + <td nowrap="nowrap" align="center" title="Adafactor">Adafactor</td> + <td nowrap="nowrap" align="center">✔️</td> + <td nowrap="nowrap" align="center">✔️</td> + <td nowrap="nowrap" align="center">✔️</td> + <td nowrap="nowrap" align="center">❌</td> + <td nowrap="nowrap" align="center">❌</td> + </tr> + <tr> + <td nowrap="nowrap" align="center" title="CAME">CAME</td> + <td nowrap="nowrap" align="center">✔️</td> + <td nowrap="nowrap" align="center">✔️</td> + <td nowrap="nowrap" align="center">✔️</td> <td nowrap="nowrap" align="center">❌</td> <td nowrap="nowrap" align="center">❌</td> </tr> diff --git a/docs/source/en/features/shardformer.md b/docs/source/en/features/shardformer.md index 68d310f5c..40b8954b5 100644 --- a/docs/source/en/features/shardformer.md +++ b/docs/source/en/features/shardformer.md @@ -55,7 +55,7 @@ Model/Feature Compatibility Matrix: <td nowrap="nowrap" align="center">✔️</td> <td nowrap="nowrap" align="center">✔️</td> <td nowrap="nowrap" align="center">✔️</td> - <td nowrap="nowrap" align="center">❌</td> + <td nowrap="nowrap" align="center">✔️</td> <td nowrap="nowrap" align="center">❌</td> </tr> <tr> diff --git a/docs/source/zh-Hans/features/distributed_optimizers.md b/docs/source/zh-Hans/features/distributed_optimizers.md index 7a7068077..5761f8c55 100644 --- a/docs/source/zh-Hans/features/distributed_optimizers.md +++ b/docs/source/zh-Hans/features/distributed_optimizers.md @@ -84,44 +84,42 @@ optim = DistGaloreAwamW( ## 兼容性 <table> <tr> - <th nowrap="nowrap">Model/Feature</th> - <th nowrap="nowrap" align="center" title="Lamb">Lamb</th> - <th nowrap="nowrap" align="center" title="GaLore">GaLore</th> - <th nowrap="nowrap" align="center" title="Adafactor">Adafactor</th> - <th nowrap="nowrap" align="center" title="CAME">CAME</th> + <th nowrap="nowrap">Optimizer/Plugin</th> + <th nowrap="nowrap" align="center">Hybrid Parallel Plugin</th> + <th nowrap="nowrap" align="center">Low Level Zero Plugin</th> + <th nowrap="nowrap" align="center">Torch DDP Plugin</th> + <th nowrap="nowrap" align="center">Gemini Plugin</th> + <th nowrap="nowrap" align="center">Moe Hybrid Plugin</th> </tr> <tr> - <td nowrap="nowrap">Hybrid Parallel<br />Plugin</td> + <td nowrap="nowrap" align="center" title="Lamb">Lamb</td> <td nowrap="nowrap" align="center">✔️</td> <td nowrap="nowrap" align="center">✔️</td> <td nowrap="nowrap" align="center">✔️</td> - <td nowrap="nowrap" align="center">✔️</td> - </tr> - <tr> - <td nowrap="nowrap">Low Level Zero<br />Plugin</td> - <td nowrap="nowrap" align="center">✔️</td> - <td nowrap="nowrap" align="center">❌</td> - <td nowrap="nowrap" align="center">✔️</td> - <td nowrap="nowrap" align="center">✔️</td> - </tr> - <tr> - <td nowrap="nowrap">Torch DDP<br />Plugin</td> - <td nowrap="nowrap" align="center">✔️</td> - <td nowrap="nowrap" align="center">✔️</td> - <td nowrap="nowrap" align="center">✔️</td> - <td nowrap="nowrap" align="center">✔️</td> - </tr> - <tr> - <td nowrap="nowrap">Gemini<br />Plugin</td> - <td nowrap="nowrap" align="center">❌</td> - <td nowrap="nowrap" align="center">❌</td> <td nowrap="nowrap" align="center">❌</td> <td nowrap="nowrap" align="center">❌</td> </tr> <tr> - <td nowrap="nowrap">Moe Hybrid<br />Plugin</td> + <td nowrap="nowrap" align="center" title="GaLore">GaLore</td> + <td nowrap="nowrap" align="center">✔️</td> + <td nowrap="nowrap" align="center">✔️</td> + <td nowrap="nowrap" align="center">✔️</td> <td nowrap="nowrap" align="center">❌</td> <td nowrap="nowrap" align="center">❌</td> + </tr> + <tr> + <td nowrap="nowrap" align="center" title="Adafactor">Adafactor</td> + <td nowrap="nowrap" align="center">✔️</td> + <td nowrap="nowrap" align="center">✔️</td> + <td nowrap="nowrap" align="center">✔️</td> + <td nowrap="nowrap" align="center">❌</td> + <td nowrap="nowrap" align="center">❌</td> + </tr> + <tr> + <td nowrap="nowrap" align="center" title="CAME">CAME</td> + <td nowrap="nowrap" align="center">✔️</td> + <td nowrap="nowrap" align="center">✔️</td> + <td nowrap="nowrap" align="center">✔️</td> <td nowrap="nowrap" align="center">❌</td> <td nowrap="nowrap" align="center">❌</td> </tr> @@ -130,6 +128,7 @@ optim = DistGaloreAwamW( </tr> </table> + <!-- doc-test-command: colossalai run --nproc_per_node 4 distributed_optimizers.py --> ## API 参考 diff --git a/docs/source/zh-Hans/features/shardformer.md b/docs/source/zh-Hans/features/shardformer.md index 00e1a13d6..02290f3d6 100644 --- a/docs/source/zh-Hans/features/shardformer.md +++ b/docs/source/zh-Hans/features/shardformer.md @@ -51,7 +51,7 @@ Author: [Baizhou Zhang](https://github.com/Fridge003), [Bin Jia](https://github. <td nowrap="nowrap" align="center">✔️</td> <td nowrap="nowrap" align="center">✔️</td> <td nowrap="nowrap" align="center">✔️</td> - <td nowrap="nowrap" align="center">❌</td> + <td nowrap="nowrap" align="center">✔️</td> <td nowrap="nowrap" align="center">❌</td> </tr> <tr>