diff --git a/colossalai/fx/passes/meta_info_prop.py b/colossalai/fx/passes/meta_info_prop.py
index 711439955..5137494ad 100644
--- a/colossalai/fx/passes/meta_info_prop.py
+++ b/colossalai/fx/passes/meta_info_prop.py
@@ -338,7 +338,7 @@ def metainfo_trace(gm: torch.fx.GraphModule, *args, verbose: bool = False, unit:
Returns:
torch.fx.GraphModule: The ``GraphModule`` annotated with MetaInfo.
"""
- device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+ device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
interp = MetaInfoProp(gm.to(device))
if is_compatible_with_meta():
from colossalai.fx.profiler import MetaTensor
diff --git a/examples/tutorial/auto_parallel/README.md b/examples/tutorial/auto_parallel/README.md
index bed488022..e93a8288b 100644
--- a/examples/tutorial/auto_parallel/README.md
+++ b/examples/tutorial/auto_parallel/README.md
@@ -15,3 +15,82 @@ export DATA=/path/to/data
```bash
colossalai run --nproc_per_node 4 auto_parallel_demo.py
```
+
+## Auto Checkpoint Benchmarking
+
+We prepare three demos for you to test the performance of auto checkpoint, the test `demo_resnet50.py` and `demo_gpt2_medium.py` will show you the ability of solver to search checkpoint strategy that could fit in the given budget.
+
+The usage of the above two test
+```bash
+python demo_resnet50.py --help
+usage: ResNet50 Auto Activation Benchmark [-h] [--batch_size BATCH_SIZE] [--num_steps NUM_STEPS] [--sample_points SAMPLE_POINTS] [--free_memory FREE_MEMORY]
+ [--start_factor START_FACTOR]
+
+optional arguments:
+ -h, --help show this help message and exit
+ --batch_size BATCH_SIZE
+ batch size for benchmark, default 128
+ --num_steps NUM_STEPS
+ number of test steps for benchmark, default 5
+ --sample_points SAMPLE_POINTS
+ number of sample points for benchmark from start memory budget to maximum memory budget (free_memory), default 15
+ --free_memory FREE_MEMORY
+ maximum memory budget in MB for benchmark, default 11000 MB
+ --start_factor START_FACTOR
+ start memory budget factor for benchmark, the start memory budget will be free_memory / start_factor, default 4
+
+# run with default settings
+python demo_resnet50.py
+
+python demo_gpt2_medium.py --help
+usage: GPT2 medium Auto Activation Benchmark [-h] [--batch_size BATCH_SIZE] [--num_steps NUM_STEPS] [--sample_points SAMPLE_POINTS] [--free_memory FREE_MEMORY]
+ [--start_factor START_FACTOR]
+
+optional arguments:
+ -h, --help show this help message and exit
+ --batch_size BATCH_SIZE
+ batch size for benchmark, default 8
+ --num_steps NUM_STEPS
+ number of test steps for benchmark, default 5
+ --sample_points SAMPLE_POINTS
+ number of sample points for benchmark from start memory budget to maximum memory budget (free_memory), default 15
+ --free_memory FREE_MEMORY
+ maximum memory budget in MB for benchmark, default 56000 MB
+ --start_factor START_FACTOR
+ start memory budget factor for benchmark, the start memory budget will be free_memory / start_factor, default 10
+
+# run with default settings
+python demo_gpt2_medium.py
+```
+
+There are some results for your reference
+
+### ResNet 50
+
+
+### GPT2 Medium
+
+
+We also prepare the demo `demo_resnet152.py` to manifest the benefit of auto activation with large batch, the usage is listed as follows
+```bash
+python demo_resnet152.py --help
+usage: ResNet152 Auto Activation Through Put Benchmark [-h] [--num_steps NUM_STEPS]
+
+optional arguments:
+ -h, --help show this help message and exit
+ --num_steps NUM_STEPS
+ number of test steps for benchmark, default 5
+
+# run with default settings
+python demo_resnet152.py
+```
+
+here are some results on our end for your reference
+```bash
+===============test summary================
+batch_size: 512, peak memory: 73314.392 MB, through put: 254.286 images/s
+batch_size: 1024, peak memory: 73316.216 MB, through put: 397.608 images/s
+batch_size: 2048, peak memory: 72927.837 MB, through put: 277.429 images/s
+```
+
+The above tests will output the test summary and a plot of the benchmarking results.
diff --git a/examples/tutorial/auto_parallel/auto_ckpt_demo.ipynb b/examples/tutorial/auto_parallel/auto_ckpt_demo.ipynb
deleted file mode 100644
index cacf5d5f3..000000000
--- a/examples/tutorial/auto_parallel/auto_ckpt_demo.ipynb
+++ /dev/null
@@ -1,878 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/home/lcsjy/.conda/envs/autoparallel/lib/python3.10/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
- " from .autonotebook import tqdm as notebook_tqdm\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "
[11/10/22 18:04:14] INFO colossalai - torch.distributed.distributed_c10d - INFO: Added key: \n",
- " store_based_barrier_key:1 to store for rank: 0 \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m[11/10/22 18:04:14]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Added key: \n",
- "\u001b[2;36m \u001b[0m store_based_barrier_key:\u001b[1;36m1\u001b[0m to store for rank: \u001b[1;36m0\u001b[0m \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " INFO colossalai - torch.distributed.distributed_c10d - INFO: Rank 0 : Completed store-based \n",
- " barrier for key:store_based_barrier_key:1 with 1 nodes. \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Rank \u001b[1;36m0\u001b[0m: Completed store-based \n",
- "\u001b[2;36m \u001b[0m barrier for key:store_based_barrier_key:\u001b[1;36m1\u001b[0m with \u001b[1;36m1\u001b[0m nodes. \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " INFO colossalai - torch.distributed.distributed_c10d - INFO: Added key: \n",
- " store_based_barrier_key:2 to store for rank: 0 \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Added key: \n",
- "\u001b[2;36m \u001b[0m store_based_barrier_key:\u001b[1;36m2\u001b[0m to store for rank: \u001b[1;36m0\u001b[0m \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " INFO colossalai - torch.distributed.distributed_c10d - INFO: Rank 0 : Completed store-based \n",
- " barrier for key:store_based_barrier_key:2 with 1 nodes. \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Rank \u001b[1;36m0\u001b[0m: Completed store-based \n",
- "\u001b[2;36m \u001b[0m barrier for key:store_based_barrier_key:\u001b[1;36m2\u001b[0m with \u001b[1;36m1\u001b[0m nodes. \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " INFO colossalai - torch.distributed.distributed_c10d - INFO: Added key: \n",
- " store_based_barrier_key:3 to store for rank: 0 \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Added key: \n",
- "\u001b[2;36m \u001b[0m store_based_barrier_key:\u001b[1;36m3\u001b[0m to store for rank: \u001b[1;36m0\u001b[0m \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " INFO colossalai - torch.distributed.distributed_c10d - INFO: Rank 0 : Completed store-based \n",
- " barrier for key:store_based_barrier_key:3 with 1 nodes. \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Rank \u001b[1;36m0\u001b[0m: Completed store-based \n",
- "\u001b[2;36m \u001b[0m barrier for key:store_based_barrier_key:\u001b[1;36m3\u001b[0m with \u001b[1;36m1\u001b[0m nodes. \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " INFO colossalai - torch.distributed.distributed_c10d - INFO: Added key: \n",
- " store_based_barrier_key:4 to store for rank: 0 \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Added key: \n",
- "\u001b[2;36m \u001b[0m store_based_barrier_key:\u001b[1;36m4\u001b[0m to store for rank: \u001b[1;36m0\u001b[0m \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " INFO colossalai - torch.distributed.distributed_c10d - INFO: Rank 0 : Completed store-based \n",
- " barrier for key:store_based_barrier_key:4 with 1 nodes. \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Rank \u001b[1;36m0\u001b[0m: Completed store-based \n",
- "\u001b[2;36m \u001b[0m barrier for key:store_based_barrier_key:\u001b[1;36m4\u001b[0m with \u001b[1;36m1\u001b[0m nodes. \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " INFO colossalai - torch.distributed.distributed_c10d - INFO: Added key: \n",
- " store_based_barrier_key:5 to store for rank: 0 \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Added key: \n",
- "\u001b[2;36m \u001b[0m store_based_barrier_key:\u001b[1;36m5\u001b[0m to store for rank: \u001b[1;36m0\u001b[0m \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " INFO colossalai - torch.distributed.distributed_c10d - INFO: Rank 0 : Completed store-based \n",
- " barrier for key:store_based_barrier_key:5 with 1 nodes. \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Rank \u001b[1;36m0\u001b[0m: Completed store-based \n",
- "\u001b[2;36m \u001b[0m barrier for key:store_based_barrier_key:\u001b[1;36m5\u001b[0m with \u001b[1;36m1\u001b[0m nodes. \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " INFO colossalai - torch.distributed.distributed_c10d - INFO: Added key: \n",
- " store_based_barrier_key:6 to store for rank: 0 \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Added key: \n",
- "\u001b[2;36m \u001b[0m store_based_barrier_key:\u001b[1;36m6\u001b[0m to store for rank: \u001b[1;36m0\u001b[0m \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " INFO colossalai - torch.distributed.distributed_c10d - INFO: Rank 0 : Completed store-based \n",
- " barrier for key:store_based_barrier_key:6 with 1 nodes. \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Rank \u001b[1;36m0\u001b[0m: Completed store-based \n",
- "\u001b[2;36m \u001b[0m barrier for key:store_based_barrier_key:\u001b[1;36m6\u001b[0m with \u001b[1;36m1\u001b[0m nodes. \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " INFO colossalai - torch.distributed.distributed_c10d - INFO: Added key: \n",
- " store_based_barrier_key:7 to store for rank: 0 \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Added key: \n",
- "\u001b[2;36m \u001b[0m store_based_barrier_key:\u001b[1;36m7\u001b[0m to store for rank: \u001b[1;36m0\u001b[0m \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " INFO colossalai - torch.distributed.distributed_c10d - INFO: Rank 0 : Completed store-based \n",
- " barrier for key:store_based_barrier_key:7 with 1 nodes. \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Rank \u001b[1;36m0\u001b[0m: Completed store-based \n",
- "\u001b[2;36m \u001b[0m barrier for key:store_based_barrier_key:\u001b[1;36m7\u001b[0m with \u001b[1;36m1\u001b[0m nodes. \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " INFO colossalai - torch.distributed.distributed_c10d - INFO: Added key: \n",
- " store_based_barrier_key:8 to store for rank: 0 \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Added key: \n",
- "\u001b[2;36m \u001b[0m store_based_barrier_key:\u001b[1;36m8\u001b[0m to store for rank: \u001b[1;36m0\u001b[0m \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " INFO colossalai - torch.distributed.distributed_c10d - INFO: Rank 0 : Completed store-based \n",
- " barrier for key:store_based_barrier_key:8 with 1 nodes. \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Rank \u001b[1;36m0\u001b[0m: Completed store-based \n",
- "\u001b[2;36m \u001b[0m barrier for key:store_based_barrier_key:\u001b[1;36m8\u001b[0m with \u001b[1;36m1\u001b[0m nodes. \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " INFO colossalai - colossalai - INFO: \n",
- " /home/lcsjy/ColossalAI/colossalai/context/ parallel_context.py :521 set_device \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - colossalai - INFO: \n",
- "\u001b[2;36m \u001b[0m \u001b[35m/home/lcsjy/ColossalAI/colossalai/context/\u001b[0m\u001b[95mparallel_context.py\u001b[0m:\u001b[1;36m521\u001b[0m set_device \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " INFO colossalai - colossalai - INFO: process rank 0 is bound to device 0 \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - colossalai - INFO: process rank \u001b[1;36m0\u001b[0m is bound to device \u001b[1;36m0\u001b[0m \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " INFO colossalai - colossalai - INFO: \n",
- " /home/lcsjy/ColossalAI/colossalai/context/ parallel_context.py :557 set_seed \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - colossalai - INFO: \n",
- "\u001b[2;36m \u001b[0m \u001b[35m/home/lcsjy/ColossalAI/colossalai/context/\u001b[0m\u001b[95mparallel_context.py\u001b[0m:\u001b[1;36m557\u001b[0m set_seed \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " INFO colossalai - colossalai - INFO: initialized seed on rank 0 , numpy: 1024 , python \n",
- " random: 1024 , ParallelMode.DATA: 1024 , ParallelMode.TENSOR: 1024 ,the default parallel \n",
- " seed is ParallelMode.DATA. \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - colossalai - INFO: initialized seed on rank \u001b[1;36m0\u001b[0m, numpy: \u001b[1;36m1024\u001b[0m, python \n",
- "\u001b[2;36m \u001b[0m random: \u001b[1;36m1024\u001b[0m, ParallelMode.DATA: \u001b[1;36m1024\u001b[0m, ParallelMode.TENSOR: \u001b[1;36m1024\u001b[0m,the default parallel \n",
- "\u001b[2;36m \u001b[0m seed is ParallelMode.DATA. \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " INFO colossalai - colossalai - INFO: /home/lcsjy/ColossalAI/colossalai/ initialize.py :117 \n",
- " launch \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - colossalai - INFO: \u001b[35m/home/lcsjy/ColossalAI/colossalai/\u001b[0m\u001b[95minitialize.py\u001b[0m:\u001b[1;36m117\u001b[0m \n",
- "\u001b[2;36m \u001b[0m launch \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " INFO colossalai - colossalai - INFO: Distributed environment is initialized, data parallel \n",
- " size: 1 , pipeline parallel size: 1 , tensor parallel size: 1 \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - colossalai - INFO: Distributed environment is initialized, data parallel \n",
- "\u001b[2;36m \u001b[0m size: \u001b[1;36m1\u001b[0m, pipeline parallel size: \u001b[1;36m1\u001b[0m, tensor parallel size: \u001b[1;36m1\u001b[0m \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "import time\n",
- "import torchvision.models as tm\n",
- "import torch\n",
- "import colossalai\n",
- "from colossalai.fx import symbolic_trace, metainfo_trace\n",
- "from colossalai.auto_parallel.checkpoint import CheckpointSolverRotor\n",
- "from functools import partial\n",
- "from colossalai.utils import free_port\n",
- "\n",
- "from bench_utils import bench, bench_rotor\n",
- "import matplotlib.pyplot as plt\n",
- "\n",
- "colossalai.launch(config={}, rank=0, world_size=1, host='localhost', port=free_port(), backend='nccl')"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### ResNet152 with batch size = 512 fails"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(78990.4404296875, inf)"
- ]
- },
- "execution_count": 2,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "def data_gen(batch_size, shape, device='cuda'):\n",
- " data = torch.empty(batch_size, *shape, device=device)\n",
- " label = torch.empty(batch_size, dtype=torch.long, device=device).random_(1000)\n",
- " return {'x': data}, label\n",
- "\n",
- "model = tm.resnet152()\n",
- "gm = symbolic_trace(model)\n",
- "gm = metainfo_trace(gm, torch.empty(512, 3, 224, 224, device='meta'))\n",
- "bench(gm, torch.nn.CrossEntropyLoss(), partial(data_gen, batch_size=512, shape=(3, 224, 224)), num_steps=5)\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### ResNet152 with batch size = 2048 succeeds "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(74495.8486328125, 5634.262561798096)"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "def data_gen(batch_size, shape, device='cuda'):\n",
- " data = torch.empty(batch_size, *shape, device=device)\n",
- " label = torch.empty(batch_size, dtype=torch.long, device=device).random_(1000)\n",
- " return {'x': data}, label\n",
- "\n",
- "model = tm.resnet152()\n",
- "gm = symbolic_trace(model)\n",
- "gm = metainfo_trace(gm, torch.empty(2048, 3, 224, 224, device='meta'))\n",
- "solver = CheckpointSolverRotor(gm.graph, free_memory=torch.cuda.mem_get_info(device=0)[0] * 0.95)\n",
- "gm.graph = solver.solve()\n",
- "bench(gm, torch.nn.CrossEntropyLoss(), partial(data_gen, batch_size=2048, shape=(3, 224, 224)), num_steps=5)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Benchmarking on ResNet18"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "[11/10/22 18:04:20] WARNING colossalai - colossalai - WARNING: \n",
- " /home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/ ckpt_solver_rotor.py :82 \n",
- " solve \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m[11/10/22 18:04:20]\u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: \n",
- "\u001b[2;36m \u001b[0m \u001b[35m/home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/\u001b[0m\u001b[95mckpt_solver_rotor.py\u001b[0m:\u001b[1;36m82\u001b[0m \n",
- "\u001b[2;36m \u001b[0m solve \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " WARNING colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n",
- " chain from index 0 to 14 with memory 500 \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n",
- "\u001b[2;36m \u001b[0m chain from index \u001b[1;36m0\u001b[0m to \u001b[1;36m14\u001b[0m with memory \u001b[1;36m500\u001b[0m \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " WARNING colossalai - colossalai - WARNING: \n",
- " /home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/ ckpt_solver_rotor.py :82 \n",
- " solve \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: \n",
- "\u001b[2;36m \u001b[0m \u001b[35m/home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/\u001b[0m\u001b[95mckpt_solver_rotor.py\u001b[0m:\u001b[1;36m82\u001b[0m \n",
- "\u001b[2;36m \u001b[0m solve \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " WARNING colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n",
- " chain from index 0 to 14 with memory 500 \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n",
- "\u001b[2;36m \u001b[0m chain from index \u001b[1;36m0\u001b[0m to \u001b[1;36m14\u001b[0m with memory \u001b[1;36m500\u001b[0m \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " WARNING colossalai - colossalai - WARNING: \n",
- " /home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/ ckpt_solver_rotor.py :82 \n",
- " solve \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: \n",
- "\u001b[2;36m \u001b[0m \u001b[35m/home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/\u001b[0m\u001b[95mckpt_solver_rotor.py\u001b[0m:\u001b[1;36m82\u001b[0m \n",
- "\u001b[2;36m \u001b[0m solve \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " WARNING colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n",
- " chain from index 0 to 14 with memory 500 \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n",
- "\u001b[2;36m \u001b[0m chain from index \u001b[1;36m0\u001b[0m to \u001b[1;36m14\u001b[0m with memory \u001b[1;36m500\u001b[0m \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " WARNING colossalai - colossalai - WARNING: \n",
- " /home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/ ckpt_solver_rotor.py :82 \n",
- " solve \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: \n",
- "\u001b[2;36m \u001b[0m \u001b[35m/home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/\u001b[0m\u001b[95mckpt_solver_rotor.py\u001b[0m:\u001b[1;36m82\u001b[0m \n",
- "\u001b[2;36m \u001b[0m solve \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " WARNING colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n",
- " chain from index 0 to 14 with memory 500 \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n",
- "\u001b[2;36m \u001b[0m chain from index \u001b[1;36m0\u001b[0m to \u001b[1;36m14\u001b[0m with memory \u001b[1;36m500\u001b[0m \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "[11/10/22 18:04:21] WARNING colossalai - colossalai - WARNING: \n",
- " /home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/ ckpt_solver_rotor.py :82 \n",
- " solve \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m[11/10/22 18:04:21]\u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: \n",
- "\u001b[2;36m \u001b[0m \u001b[35m/home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/\u001b[0m\u001b[95mckpt_solver_rotor.py\u001b[0m:\u001b[1;36m82\u001b[0m \n",
- "\u001b[2;36m \u001b[0m solve \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " WARNING colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n",
- " chain from index 0 to 14 with memory 500 \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n",
- "\u001b[2;36m \u001b[0m chain from index \u001b[1;36m0\u001b[0m to \u001b[1;36m14\u001b[0m with memory \u001b[1;36m500\u001b[0m \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " WARNING colossalai - colossalai - WARNING: \n",
- " /home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/ ckpt_solver_rotor.py :82 \n",
- " solve \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: \n",
- "\u001b[2;36m \u001b[0m \u001b[35m/home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/\u001b[0m\u001b[95mckpt_solver_rotor.py\u001b[0m:\u001b[1;36m82\u001b[0m \n",
- "\u001b[2;36m \u001b[0m solve \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " WARNING colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n",
- " chain from index 0 to 14 with memory 500 \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n",
- "\u001b[2;36m \u001b[0m chain from index \u001b[1;36m0\u001b[0m to \u001b[1;36m14\u001b[0m with memory \u001b[1;36m500\u001b[0m \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " WARNING colossalai - colossalai - WARNING: \n",
- " /home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/ ckpt_solver_rotor.py :82 \n",
- " solve \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: \n",
- "\u001b[2;36m \u001b[0m \u001b[35m/home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/\u001b[0m\u001b[95mckpt_solver_rotor.py\u001b[0m:\u001b[1;36m82\u001b[0m \n",
- "\u001b[2;36m \u001b[0m solve \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " WARNING colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n",
- " chain from index 0 to 14 with memory 500 \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n",
- "\u001b[2;36m \u001b[0m chain from index \u001b[1;36m0\u001b[0m to \u001b[1;36m14\u001b[0m with memory \u001b[1;36m500\u001b[0m \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "[11/10/22 18:04:22] WARNING colossalai - colossalai - WARNING: \n",
- " /home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/ ckpt_solver_rotor.py :82 \n",
- " solve \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m[11/10/22 18:04:22]\u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: \n",
- "\u001b[2;36m \u001b[0m \u001b[35m/home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/\u001b[0m\u001b[95mckpt_solver_rotor.py\u001b[0m:\u001b[1;36m82\u001b[0m \n",
- "\u001b[2;36m \u001b[0m solve \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " WARNING colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n",
- " chain from index 0 to 14 with memory 500 \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n",
- "\u001b[2;36m \u001b[0m chain from index \u001b[1;36m0\u001b[0m to \u001b[1;36m14\u001b[0m with memory \u001b[1;36m500\u001b[0m \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " WARNING colossalai - colossalai - WARNING: \n",
- " /home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/ ckpt_solver_rotor.py :82 \n",
- " solve \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: \n",
- "\u001b[2;36m \u001b[0m \u001b[35m/home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/\u001b[0m\u001b[95mckpt_solver_rotor.py\u001b[0m:\u001b[1;36m82\u001b[0m \n",
- "\u001b[2;36m \u001b[0m solve \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " WARNING colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n",
- " chain from index 0 to 14 with memory 500 \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n",
- "\u001b[2;36m \u001b[0m chain from index \u001b[1;36m0\u001b[0m to \u001b[1;36m14\u001b[0m with memory \u001b[1;36m500\u001b[0m \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "[11/10/22 18:04:23] WARNING colossalai - colossalai - WARNING: \n",
- " /home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/ ckpt_solver_rotor.py :82 \n",
- " solve \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m[11/10/22 18:04:23]\u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: \n",
- "\u001b[2;36m \u001b[0m \u001b[35m/home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/\u001b[0m\u001b[95mckpt_solver_rotor.py\u001b[0m:\u001b[1;36m82\u001b[0m \n",
- "\u001b[2;36m \u001b[0m solve \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- " WARNING colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n",
- " chain from index 0 to 14 with memory 500 \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n",
- "\u001b[2;36m \u001b[0m chain from index \u001b[1;36m0\u001b[0m to \u001b[1;36m14\u001b[0m with memory \u001b[1;36m500\u001b[0m \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "def data_gen(batch_size, shape, device='cuda'):\n",
- " data = torch.empty(batch_size, *shape, device=device)\n",
- " label = torch.empty(batch_size, dtype=torch.long, device=device).random_(1000)\n",
- " return (data, ), label\n",
- "\n",
- "model = tm.resnet18()\n",
- "gm = symbolic_trace(model)\n",
- "gm = metainfo_trace(gm, torch.empty(128, 3, 224, 224, device='meta'))\n",
- "peak_hist, step_hist = bench_rotor(gm, torch.nn.CrossEntropyLoss(), partial(data_gen, batch_size=128, shape=(3, 224, 224)), num_steps=5, sample_points=20, free_memory=2700 * 1024**2)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[]"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAArEAAAKTCAYAAAAOvlAQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAvJElEQVR4nO3df5BV9X34/9eVZRfF5TaKK4Ir/kjQRUwQZSBkI0EjP0TBZYaiYxQCsbEBCST1E62xHW3aJTTNdGIaCXHdsaEqo+JWhRjZjiAO2hJQEycUcKMsVQzByK4UC9E93z8y3m+uu/zYVYQ3PB4zZ+bes+/z4+57Dnl6e/Y0l2VZFgAAkJBjDvUJAABAZ4lYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEhOyaE+gY9TW1tbvP7661FeXh65XO5Qnw4AAB+QZVm8/fbb0bdv3zjmmL1/33pURezrr78elZWVh/o0AADYjy1btsSpp566158fVRFbXl4eEX/8pfTq1esQnw0AAB/U2toalZWVhW7bm6MqYt+/haBXr14iFgDgMLa/Wz/9YRcAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELkLC2tiza2rJDfRoAHzsRC5CotrYszvzrZXHmXy8TssBRR8QCJOr3u/Z0+BrgaCBiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAktOpiD399NMjl8u1W2bOnFkYs379+pgwYULk8/koLy+P4cOHR3Nzc+Hnb7zxRlx77bXRp0+f6NmzZwwZMiQeeuih/R77Rz/6UZxxxhnRo0ePuOCCC2LVqlWdOXUAAI4gnYrYNWvWxNatWwvL8uXLIyJi8uTJERHR1NQU1dXVcc4558SKFSvixRdfjNtuuy169OhR2Me1114bGzZsiEcffTR+9atfxaRJk2LKlCnx/PPP7/W4ixcvjjlz5sStt94azz//fHz+85+PcePGFcUxAABHj1yWZVlXN54zZ048/vjjsWnTpsjlcnHVVVdF9+7d46c//eletzn++OPjrrvuimuvvbaw7sQTT4z58+fHjBkzOtxm2LBhMWTIkLjrrrsK66qqquLKK6+M2travR5r9+7dsXv37sL71tbWqKysjJaWlujVq1dnPirAYWf7zt1x4XcaIyLiF9/+YvQ+vuwQnxHAh9fa2hr5fH6/vdble2L37NkTixYtiunTp0cul4u2trZYunRpDBgwIMaMGRMVFRUxbNiwaGhoKNquuro6Fi9eHL///e+jra0tHnjggdi9e3d84Qtf2Otx1q5dG6NHjy5aP3r06Fi9evU+z7G2tjby+Xxhqays7OrHBQDgMNLliG1oaIgdO3bEtGnTIiJi27ZtsXPnzpg3b16MHTs2nnzyyaipqYlJkybFypUrC9stXrw43n333TjxxBOjrKwsvvrVr8YjjzwSZ511VofH2b59e7z33ntx8sknF60/+eST44033tjnOd5yyy3R0tJSWLZs2dLVjwsAwGGkpKsb1tXVxbhx46Jv374REdHW1hYRERMnToy5c+dGRMTgwYNj9erVsWDBghg5cmRERHz729+Ot956KxobG6N3797R0NAQkydPjlWrVsV555231+Plcrmi91mWtVv3QWVlZVFW5v+8BgBwpOlSxG7evDkaGxtjyZIlhXW9e/eOkpKSGDhwYNHYqqqqeOaZZyLij3/49cMf/jBeeumlOPfccyMi4jOf+UysWrUq/uVf/iUWLFjQ7li9e/eObt26tfvWddu2be2+nQUA4OjQpdsJ6uvro6KiIsaPH19YV1paGkOHDo0NGzYUjd24cWP0798/IiJ27dr1x4MeU3zYbt26Fb7J/aDS0tK44IILCk9CeN/y5ctjxIgRXTl9AAAS1+lvYtva2qK+vj6mTp0aJSXFm990000xZcqUuOiii2LUqFHxxBNPxGOPPRYrVqyIiIhzzjknPvnJT8ZXv/rV+N73vhcnnnhiNDQ0xPLly+Pxxx8v7OeSSy6JmpqamDVrVkREfOMb34hrr702LrzwwvjsZz8bCxcujObm5rjhhhs+xEcHACBVnY7YxsbGaG5ujunTp7f7WU1NTSxYsCBqa2tj9uzZcfbZZ8fDDz8c1dXVERHRvXv3WLZsWdx8881xxRVXxM6dO+OTn/xk3HvvvXHZZZcV9tPU1BTbt28vvJ8yZUq8+eabcccdd8TWrVtj0KBBsWzZssI3vAAAHF0+1HNiU3Ogzx0DSIHnxAJHooP+nFgAADhURCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAiTq2O7dOnwNcDQQsQCJyuU6fg1wNBCxAAAkR8QCAJAcEQsAQHJELAAAyRGxAAAkR8QCAJAcEQsAQHJELAAAyRGxAAAkR8QCAJAcEQsAQHJELECi2rKOXwMcDUQsQKLe+t89Hb4GOBqIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWIBEtWVZh68BjgYiFiBRv//fPR2+BjgaiFgAAJIjYgEASE6nIvb000+PXC7Xbpk5c2ZhzPr162PChAmRz+ejvLw8hg8fHs3NzRER8eqrr3a4fS6XiwcffHCvx3333Xfj29/+dpxxxhlx7LHHxplnnhl33HFHtLW1dfFjAwCQspLODF6zZk289957hfcvvfRSXHrppTF58uSIiGhqaorq6uqYMWNG3H777ZHP52P9+vXRo0ePiIiorKyMrVu3Fu1z4cKFMX/+/Bg3btxej/vd7343FixYEPfee2+ce+658Ytf/CK+/OUvRz6fj69//eud+QgAABwBOhWxJ510UtH7efPmxVlnnRUjR46MiIhbb701Lrvsspg/f35hzJlnnll43a1bt+jTp0/RPh555JGYMmVKHH/88Xs97rPPPhsTJ06M8ePHR8QfvxG+//774xe/+EVnTh8AgCNEl++J3bNnTyxatCimT58euVwu2traYunSpTFgwIAYM2ZMVFRUxLBhw6KhoWGv+1i7dm288MILMWPGjH0eq7q6Ov7jP/4jNm7cGBERL774YjzzzDNx2WWX7XO73bt3R2tra9ECAED6uhyxDQ0NsWPHjpg2bVpERGzbti127twZ8+bNi7Fjx8aTTz4ZNTU1MWnSpFi5cmWH+6irq4uqqqoYMWLEPo/1rW99K66++uo455xzonv37nH++efHnDlz4uqrr97ndrW1tZHP5wtLZWVllz4rAACHly5HbF1dXYwbNy769u0bEVH4I6uJEyfG3LlzY/DgwXHzzTfH5ZdfHgsWLGi3/TvvvBP33Xfffr+FjYhYvHhxLFq0KO67775Yt25d3HvvvfG9730v7r333n1ud8stt0RLS0th2bJlSxc+KQAAh5tO3RP7vs2bN0djY2MsWbKksK53795RUlISAwcOLBpbVVUVzzzzTLt9PPTQQ7Fr16647rrr9nu8m266KW6++ea46qqrIiLivPPOi82bN0dtbW1MnTp1r9uVlZVFWVnZgX4sAAAS0aVvYuvr66OioqLwh1YREaWlpTF06NDYsGFD0diNGzdG//792+2jrq4uJkyY0O6PxTqya9euOOaY4lPt1q2bR2wBR7UTepZ2+BrgaNDpb2Lb2tqivr4+pk6dGiUlxZvfdNNNMWXKlLjoooti1KhR8cQTT8Rjjz0WK1asKBr38ssvx9NPPx3Lli3r8BiXXHJJ1NTUxKxZsyIi4oorroi///u/j9NOOy3OPffceP755+P73/9+TJ8+vbOnD3DEOCaX6/A1wNGg0xHb2NgYzc3NHQZkTU1NLFiwIGpra2P27Nlx9tlnx8MPPxzV1dVF4+65557o169fjB49usNjNDU1xfbt2wvv77zzzrjtttvia1/7Wmzbti369u0bX/3qV+Nv/uZvOnv6AAAcAXJZlmWH+iQ+Lq2trZHP56OlpSV69ep1qE8H4EPZ8vtd8fn5T0VExKr/NyoqTzjuEJ8RwId3oL3W5acTAADAoSJiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5nYrY008/PXK5XLtl5syZhTHr16+PCRMmRD6fj/Ly8hg+fHg0NzdHRMSrr77a4fa5XC4efPDBfR77tddeiy996Utx4oknxnHHHReDBw+OtWvXduEjAwCQupLODF6zZk289957hfcvvfRSXHrppTF58uSIiGhqaorq6uqYMWNG3H777ZHP52P9+vXRo0ePiIiorKyMrVu3Fu1z4cKFMX/+/Bg3btxej/vWW2/F5z73uRg1alT87Gc/i4qKimhqaoo/+7M/68zpAwBwhOhUxJ500klF7+fNmxdnnXVWjBw5MiIibr311rjsssti/vz5hTFnnnlm4XW3bt2iT58+Rft45JFHYsqUKXH88cfv9bjf/e53o7KyMurr6wvrTj/99M6cOgAAR5Au3xO7Z8+eWLRoUUyfPj1yuVy0tbXF0qVLY8CAATFmzJioqKiIYcOGRUNDw173sXbt2njhhRdixowZ+zzWo48+GhdeeGFMnjw5Kioq4vzzz4+f/OQn+z3H3bt3R2tra9ECAED6uhyxDQ0NsWPHjpg2bVpERGzbti127twZ8+bNi7Fjx8aTTz4ZNTU1MWnSpFi5cmWH+6irq4uqqqoYMWLEPo/1m9/8Ju6666741Kc+FT//+c/jhhtuiNmzZ8e//uu/7nO72trayOfzhaWysrJLnxUAgMNLLsuyrCsbjhkzJkpLS+Oxxx6LiIjXX389+vXrF1dffXXcd999hXETJkyInj17xv3331+0/TvvvBOnnHJK3HbbbfHNb35zn8cqLS2NCy+8MFavXl1YN3v27FizZk08++yze91u9+7dsXv37sL71tbWqKysjJaWlujVq1enPi/A4WbL73fF5+c/FRERq/7fqKg84bhDfEYAH15ra2vk8/n99lqXvondvHlzNDY2xle+8pXCut69e0dJSUkMHDiwaGxVVVXh6QR/6qGHHopdu3bFddddt9/jnXLKKQe83z9VVlYWvXr1KloAAEhflyK2vr4+KioqYvz48YV1paWlMXTo0NiwYUPR2I0bN0b//v3b7aOuri4mTJjQ7o/FOvK5z33ugPcLAMCRr1NPJ4iIaGtri/r6+pg6dWqUlBRvftNNN8WUKVPioosuilGjRsUTTzwRjz32WKxYsaJo3MsvvxxPP/10LFu2rMNjXHLJJVFTUxOzZs2KiIi5c+fGiBEj4h/+4R/iz//8z+O//uu/YuHChbFw4cLOnj4AAEeATn8T29jYGM3NzTF9+vR2P6upqYkFCxbE/Pnz47zzzou77747Hn744aiuri4ad88990S/fv1i9OjRHR6jqakptm/fXng/dOjQeOSRR+L++++PQYMGxd/93d/FP//zP8c111zT2dMHAOAI0OU/7ErRgd4oDJACf9gFHIkO6h92AQDAoSRiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYgUZ/oWdrha4CjgYgFSNQxuY5fAxwNRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMnpVMSefvrpkcvl2i0zZ84sjFm/fn1MmDAh8vl8lJeXx/Dhw6O5uTkiIl599dUOt8/lcvHggw8e0DnU1tZGLpeLOXPmdObUAQA4gpR0ZvCaNWvivffeK7x/6aWX4tJLL43JkydHRERTU1NUV1fHjBkz4vbbb498Ph/r16+PHj16REREZWVlbN26tWifCxcujPnz58e4ceMO6PgLFy6MT3/60505bQAAjjCditiTTjqp6P28efPirLPOipEjR0ZExK233hqXXXZZzJ8/vzDmzDPPLLzu1q1b9OnTp2gfjzzySEyZMiWOP/74fR57586dcc0118RPfvKT+M53vtOZ0wYA4AjT5Xti9+zZE4sWLYrp06dHLpeLtra2WLp0aQwYMCDGjBkTFRUVMWzYsGhoaNjrPtauXRsvvPBCzJgxY7/HmzlzZowfPz6++MUvHvA57t69O1pbW4sWAADS1+WIbWhoiB07dsS0adMiImLbtm2xc+fOmDdvXowdOzaefPLJqKmpiUmTJsXKlSs73EddXV1UVVXFiBEj9nmsBx54INatWxe1tbWdOsfa2trI5/OFpbKyslPbAwBweOpyxNbV1cW4ceOib9++ERHR1tYWERETJ06MuXPnxuDBg+Pmm2+Oyy+/PBYsWNBu+3feeSfuu+++/X4Lu2XLlvj6178eixYtKtxbe6BuueWWaGlpKSxbtmzp1PYAAByeOnVP7Ps2b94cjY2NsWTJksK63r17R0lJSQwcOLBobFVVVTzzzDPt9vHQQw/Frl274rrrrtvnsdauXRvbtm2LCy64oLDuvffei6effjp++MMfxu7du6Nbt24dbltWVhZlZWWd+WgAACSgSxFbX18fFRUVMX78+MK60tLSGDp0aGzYsKFo7MaNG6N///7t9lFXVxcTJkxo98diH3TJJZfEr371q6J1X/7yl+Occ86Jb33rW3sNWAAAjlydjti2traor6+PqVOnRklJ8eY33XRTTJkyJS666KIYNWpUPPHEE/HYY4/FihUrisa9/PLL8fTTT8eyZcs6PMYll1wSNTU1MWvWrCgvL49BgwYV/bxnz55x4okntlsPAMDRodP3xDY2NkZzc3NMnz693c9qampiwYIFMX/+/DjvvPPi7rvvjocffjiqq6uLxt1zzz3Rr1+/GD16dIfHaGpqiu3bt3f21AAAOErksizLDvVJfFxaW1sjn89HS0tL9OrV61CfDsCHsmvPuzHwb34eERG/vmNMHFfapTvEAA4rB9prXX46AQAAHCoiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACS06mIPf300yOXy7VbZs6cWRizfv36mDBhQuTz+SgvL4/hw4dHc3NzRES8+uqrHW6fy+XiwQcf3Otxa2trY+jQoVFeXh4VFRVx5ZVXxoYNG7r4kQEASF2nInbNmjWxdevWwrJ8+fKIiJg8eXJERDQ1NUV1dXWcc845sWLFinjxxRfjtttuix49ekRERGVlZdH2W7dujdtvvz169uwZ48aN2+txV65cGTNnzoznnnsuli9fHu+++26MHj06/vd//7ernxsAgITlsizLurrxnDlz4vHHH49NmzZFLpeLq666Krp37x4//elPD3gf559/fgwZMiTq6uoOeJvf/e53UVFREStXroyLLrrogLdrbW2NfD4fLS0t0atXrwPeDuBwtGvPuzHwb34eERG/vmNMHFdacojPCODDO9Be6/I9sXv27IlFixbF9OnTI5fLRVtbWyxdujQGDBgQY8aMiYqKihg2bFg0NDTsdR9r166NF154IWbMmNGpY7e0tERExAknnLDPcbt3747W1taiBQCA9HU5YhsaGmLHjh0xbdq0iIjYtm1b7Ny5M+bNmxdjx46NJ598MmpqamLSpEmxcuXKDvdRV1cXVVVVMWLEiAM+bpZl8Y1vfCOqq6tj0KBB+xxbW1sb+Xy+sFRWVh7wcQAAOHx1OWLr6upi3Lhx0bdv34iIaGtri4iIiRMnxty5c2Pw4MFx8803x+WXXx4LFixot/0777wT9913X6e/hZ01a1b88pe/jPvvv3+/Y2+55ZZoaWkpLFu2bOnUsQAAODx16QaqzZs3R2NjYyxZsqSwrnfv3lFSUhIDBw4sGltVVRXPPPNMu3089NBDsWvXrrjuuusO+Lg33nhjPProo/H000/Hqaeeut/xZWVlUVZWdsD7BwAgDV2K2Pr6+qioqIjx48cX1pWWlsbQoUPbPfpq48aN0b9//3b7qKuriwkTJsRJJ5203+NlWRY33nhjPPLII7FixYo444wzunLaAAAcITodsW1tbVFfXx9Tp06NkpLizW+66aaYMmVKXHTRRTFq1Kh44okn4rHHHosVK1YUjXv55Zfj6aefjmXLlnV4jEsuuSRqampi1qxZERExc+bMuO++++Lf//3fo7y8PN54442IiMjn83Hsscd29iMAAJC4Tt8T29jYGM3NzTF9+vR2P6upqYkFCxbE/Pnz47zzzou77747Hn744aiuri4ad88990S/fv1i9OjRHR6jqakptm/fXnh/1113RUtLS3zhC1+IU045pbAsXry4s6cPAMAR4EM9JzY1nhMLHEk8JxY4Eh3058QCAMChImIBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDklh/oEAOiaY7t3i1/fMabwGuBoImIBEpXL5eK4Uv+MA0cntxMAAJAcEQsAQHJELAAAyRGxAAAkR8QCAJAcEQsAQHJELAAAyRGxAAAkR8QCAJAcEQsAQHJELAAAyRGxAAAkR8QCAJAcEQsAQHJELAAAyRGxAAAkR8QCAJCcTkXs6aefHrlcrt0yc+bMwpj169fHhAkTIp/PR3l5eQwfPjyam5sjIuLVV1/tcPtcLhcPPvjgPo/9ox/9KM4444zo0aNHXHDBBbFq1aoufFwAAI4EnYrYNWvWxNatWwvL8uXLIyJi8uTJERHR1NQU1dXVcc4558SKFSvixRdfjNtuuy169OgRERGVlZVF22/dujVuv/326NmzZ4wbN26vx128eHHMmTMnbr311nj++efj85//fIwbN64QxwAAHF1yWZZlXd14zpw58fjjj8emTZsil8vFVVddFd27d4+f/vSnB7yP888/P4YMGRJ1dXV7HTNs2LAYMmRI3HXXXYV1VVVVceWVV0Ztbe0BH6u1tTXy+Xy0tLREr169Dng7AAA+Hgfaa12+J3bPnj2xaNGimD59euRyuWhra4ulS5fGgAEDYsyYMVFRURHDhg2LhoaGve5j7dq18cILL8SMGTP2eZy1a9fG6NGji9aPHj06Vq9evc9z3L17d7S2thYtAACkr8sR29DQEDt27Ihp06ZFRMS2bdti586dMW/evBg7dmw8+eSTUVNTE5MmTYqVK1d2uI+6urqoqqqKESNG7PU427dvj/feey9OPvnkovUnn3xyvPHGG/s8x9ra2sjn84WlsrKycx8SAIDDUpcjtq6uLsaNGxd9+/aNiIi2traIiJg4cWLMnTs3Bg8eHDfffHNcfvnlsWDBgnbbv/POO3Hfffft81vYP5XL5YreZ1nWbt0H3XLLLdHS0lJYtmzZckDHAgDg8FbSlY02b94cjY2NsWTJksK63r17R0lJSQwcOLBobFVVVTzzzDPt9vHQQw/Frl274rrrrtvnsXr37h3dunVr963rtm3b2n07+0FlZWVRVla2v48DAEBiuhSx9fX1UVFREePHjy+sKy0tjaFDh8aGDRuKxm7cuDH69+/fbh91dXUxYcKEOOmkk/Z5rNLS0rjgggti+fLlUVNTU1i/fPnymDhxYqfO+/2/YXNvLADA4en9TtvvsweyTnrvvfey0047LfvWt77V7mdLlizJunfvni1cuDDbtGlTduedd2bdunXLVq1aVTRu06ZNWS6Xy372s591eIyLL744u/POOwvvH3jggax79+5ZXV1d9utf/zqbM2dO1rNnz+zVV1/t1Llv2bIliwiLxWKxWCwWy2G+bNmyZZ9d1+lvYhsbG6O5uTmmT5/e7mc1NTWxYMGCqK2tjdmzZ8fZZ58dDz/8cFRXVxeNu+eee6Jfv37tnjjwvqampti+fXvh/ZQpU+LNN9+MO+64I7Zu3RqDBg2KZcuWdfgN77707ds3tmzZEuXl5fu9nzYlra2tUVlZGVu2bPHosMOUOUqDeTr8maM0mKfD3+E8R1mWxdtvv134u6u9+VDPieXw4Pm3hz9zlAbzdPgzR2kwT4e/I2GOuvx0AgAAOFRELAAAyRGxR4CysrL427/9W48TO4yZozSYp8OfOUqDeTr8HQlz5J5YAACS45tYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IPgdra2hg6dGiUl5dHRUVFXHnllbFhw4aiMdOmTYtcLle0DB8+vGjMF77whXZjrrrqqqIxb731Vlx77bWRz+cjn8/HtddeGzt27Cga09zcHFdccUX07NkzevfuHbNnz449e/YclM+eigOZo4iI9evXx4QJEyKfz0d5eXkMHz48mpubCz/fvXt33HjjjdG7d+/o2bNnTJgwIf7nf/6naB/mqOs+qnlyLR08BzJHH/zdv7/84z/+Y2GMa+ng+qjmybV08BzIHO3cuTNmzZoVp556ahx77LFRVVUVd911V9GYI+payvjYjRkzJquvr89eeuml7IUXXsjGjx+fnXbaadnOnTsLY6ZOnZqNHTs227p1a2F58803i/YzcuTI7Prrry8as2PHjqIxY8eOzQYNGpStXr06W716dTZo0KDs8ssvL/z83XffzQYNGpSNGjUqW7duXbZ8+fKsb9++2axZsw7uL+EwdyBz9PLLL2cnnHBCdtNNN2Xr1q3Lmpqasscffzz77W9/Wxhzww03ZP369cuWL1+erVu3Lhs1alT2mc98Jnv33XcLY8xR131U8+RaOngOZI7+9Pe+devW7J577slyuVzW1NRUGONaOrg+qnlyLR08BzJHX/nKV7Kzzjore+qpp7JXXnkl+/GPf5x169Yta2hoKIw5kq4lEXsY2LZtWxYR2cqVKwvrpk6dmk2cOHGf240cOTL7+te/vtef//rXv84iInvuuecK65599tksIrL//u//zrIsy5YtW5Ydc8wx2WuvvVYYc//992dlZWVZS0tL1z7QEaijOZoyZUr2pS99aa/b7NixI+vevXv2wAMPFNa99tpr2THHHJM98cQTWZaZo49aV+Ypy1xLH6eO5uiDJk6cmF188cWF966lj19X5inLXEsfp47m6Nxzz83uuOOOonFDhgzJvv3tb2dZduRdS24nOAy0tLRERMQJJ5xQtH7FihVRUVERAwYMiOuvvz62bdvWbtt/+7d/i969e8e5554bf/VXfxVvv/124WfPPvts5PP5GDZsWGHd8OHDI5/Px+rVqwtjBg0aFH379i2MGTNmTOzevTvWrl37kX7OlH1wjtra2mLp0qUxYMCAGDNmTFRUVMSwYcOioaGhsM3atWvjD3/4Q4wePbqwrm/fvjFo0KCi3785+uh0ZZ7e51r6eOzt37v3/fa3v42lS5fGjBkzCutcSx+/rszT+1xLH4+O5qi6ujoeffTReO211yLLsnjqqadi48aNMWbMmIg48q6lko/tSHQoy7L4xje+EdXV1TFo0KDC+nHjxsXkyZOjf//+8corr8Rtt90WF198caxdu7bw/yLummuuiTPOOCP69OkTL730Utxyyy3x4osvxvLlyyMi4o033oiKiop2x6yoqIg33nijMObkk08u+vknPvGJKC0tLYw52nU0R9u2bYudO3fGvHnz4jvf+U5897vfjSeeeCImTZoUTz31VIwcOTLeeOONKC0tjU984hNF+zv55JOLfv/m6KPR1XmKcC19XPb2792fuvfee6O8vDwmTZpUWOda+nh1dZ4iXEsfl73N0Q9+8IO4/vrr49RTT42SkpI45phj4u67747q6uqIOPKuJRF7iM2aNSt++ctfxjPPPFO0fsqUKYXXgwYNigsvvDD69+8fS5cuLfyjcf311xeN+dSnPhUXXnhhrFu3LoYMGRIRf7wR/4OyLCtafyBjjmYdzVFbW1tEREycODHmzp0bERGDBw+O1atXx4IFCwpx1JGu/P7N0f59mHlyLX089vbv3Z+655574pprrokePXrsd3+upYPjw8yTa+njsbc5+sEPfhDPPfdcPProo9G/f/94+umn42tf+1qccsop8cUvfnGv+0v1WnI7wSF04403xqOPPhpPPfVUnHrqqfsce8opp0T//v1j06ZNex0zZMiQ6N69e2FMnz594re//W27cb/73e8K/wXVp0+fdv/V9NZbb8Uf/vCHdv+VdTTa2xz17t07SkpKYuDAgUXjq6qqCn/13qdPn9izZ0+89dZbRWO2bdtW9Ps3Rx/eh5mnjriWPnoH8u/dqlWrYsOGDfGVr3ylaL1r6ePzYeapI66lj97e5uidd96Jv/7rv47vf//7ccUVV8SnP/3pmDVrVkyZMiW+973vRcQReC19bHffUtDW1pbNnDkz69u3b7Zx48YD2mb79u1ZWVlZdu+99+51zK9+9auim7zfvzn7P//zPwtjnnvuuQ5vzn799dcLYx544IGj/gb6A5mjz372s+3+YOjKK6/Mrr766izL/v8b6BcvXlz4+euvv97hDfTmqGs+innqiGvpo9OZf++mTp2aXXDBBe3Wu5YOvo9injriWvro7G+OWlpasojIli1bVrT+L/7iL7JLL700y7Ij71oSsYfAX/7lX2b5fD5bsWJF0WNIdu3alWVZlr399tvZN7/5zWz16tXZK6+8kj311FPZZz/72axfv35Za2trlmV/fGzQ7bffnq1ZsyZ75ZVXsqVLl2bnnHNOdv7557d7TManP/3p7Nlnn82effbZ7LzzzuvwMRmXXHJJtm7duqyxsTE79dRTj/pHmexvjrIsy5YsWZJ17949W7hwYbZp06bszjvvzLp165atWrWqMOaGG27ITj311KyxsTFbt25ddvHFF3f4KBNz1DUfxTy5lg6uA5mjLPvj/wAfd9xx2V133dXhflxLB9dHMU+upYPrQOZo5MiR2bnnnps99dRT2W9+85usvr4+69GjR/ajH/2oMOZIupZE7CEQER0u9fX1WZZl2a5du7LRo0dnJ510Uta9e/fstNNOy6ZOnZo1NzcX9tHc3JxddNFF2QknnJCVlpZmZ511VjZ79ux2z5J98803s2uuuSYrLy/PysvLs2uuuSZ76623isZs3rw5Gz9+fHbsscdmJ5xwQjZr1qzs//7v/w72r+Gwtr85el9dXV32yU9+MuvRo0f2mc98puhZfFmWZe+88042a9as7IQTTsiOPfbY7PLLLy+axywzRx/GRzFPrqWD60Dn6Mc//nF27LHHtnum6PtcSwfXRzFPrqWD60DmaOvWrdm0adOyvn37Zj169MjOPvvs7J/+6Z+ytra2wpgj6VrKZVmWfbQ3KAAAwMHlD7sAAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5Px/+stDv7Sfnq4AAAAASUVORK5CYII=",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "plt.figure(figsize=(8, 8))\n",
- "plt.plot(peak_hist, step_hist)\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[540.0,\n",
- " 653.6842105263158,\n",
- " 767.3684210526316,\n",
- " 881.0526315789474,\n",
- " 994.7368421052631,\n",
- " 1108.421052631579,\n",
- " 1222.1052631578948,\n",
- " 1335.7894736842104,\n",
- " 1449.4736842105262,\n",
- " 1563.157894736842,\n",
- " 26711.86572265625,\n",
- " 26711.86572265625,\n",
- " 26711.86572265625,\n",
- " 26711.86572265625,\n",
- " 26711.86572265625,\n",
- " 26711.86572265625,\n",
- " 26711.86572265625,\n",
- " 26711.86572265625,\n",
- " 26711.86572265625,\n",
- " 26711.86572265625]"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "peak_hist"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3.10.6 ('autoparallel': conda)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.6"
- },
- "orig_nbformat": 4,
- "vscode": {
- "interpreter": {
- "hash": "cc0ad6865167fb9a52c12f0fd0c8203c9a7690797bfee612a871d56b9d2024ce"
- }
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/tutorial/auto_parallel/bench_utils.py b/examples/tutorial/auto_parallel/bench_utils.py
index 365e07e21..d9d656b85 100644
--- a/examples/tutorial/auto_parallel/bench_utils.py
+++ b/examples/tutorial/auto_parallel/bench_utils.py
@@ -1,16 +1,33 @@
import time
+from copy import deepcopy
from functools import partial
from typing import Callable, Tuple
import numpy as np
import torch
+import torch.nn as nn
import torchvision.models as tm
+from transformers import GPT2Config, GPT2LMHeadModel
from colossalai.auto_parallel.checkpoint import CheckpointSolverRotor
from colossalai.fx import metainfo_trace
-def bench(gm: torch.fx.GraphModule, criterion: torch.nn.Module, data_gen: Callable, num_steps: int = 5):
+def bench(gm: torch.fx.GraphModule,
+ criterion: torch.nn.Module,
+ data_gen: Callable,
+ num_steps: int = 5) -> Tuple[int, int]:
+ """Benchmarking a given graph module
+
+ Args:
+ gm (torch.fx.GraphModule): The graph module to benchmark.
+ criterion (torch.nn.Module): Loss function.
+ data_gen (Callable): Data generator.
+ num_steps (int, optional): Number of test steps. Defaults to 5.
+
+ Returns:
+ Tuple[int, int]: peak memory in MB and step time in MS.
+ """
gm.train()
gm.cuda()
step_time = float('inf')
@@ -39,7 +56,8 @@ def bench(gm: torch.fx.GraphModule, criterion: torch.nn.Module, data_gen: Callab
del args, label, output, loss
gm.to("cpu")
torch.cuda.empty_cache()
- return (torch.cuda.max_memory_allocated(device="cuda") - cached) / 1024**2, step_time * 1.0e3
+ peak_mem = (torch.cuda.max_memory_allocated(device="cuda") - cached) / 1024**2
+ return peak_mem, step_time * 1.0e3
def bench_rotor(gm: torch.fx.GraphModule,
@@ -47,19 +65,92 @@ def bench_rotor(gm: torch.fx.GraphModule,
data_gen: Callable,
num_steps: int = 5,
sample_points: int = 20,
- free_memory: int = torch.cuda.mem_get_info()[0]):
+ free_memory: int = torch.cuda.mem_get_info()[0],
+ start_factor: int = 4) -> Tuple[np.array, list, list]:
+ """Auto Checkpoint Rotor Algorithm benchmarking
+ Benchmarks the Auto Checkpoint Rotor Algorithm for a given graph module and data.
+
+ Args:
+ gm (torch.fx.GraphModule): The graph module to benchmark.
+ criterion (torch.nn.Module): Loss function.
+ data_gen (Callable): Data generator.
+ num_steps (int, optional): Number of test steps. Defaults to 5.
+ sample_points (int, optional): Number of sample points. Defaults to 20.
+ free_memory (int, optional): Max memory budget in Byte. Defaults to torch.cuda.mem_get_info()[0].
+ start_factor (int, optional): Start memory budget factor for benchmark, the start memory budget
+ will be free_memory / start_factor. Defaults to 4.
+
+ Returns:
+ Tuple[np.array, list, list]: return budgets vector (MB), peak memory vector (MB), step time vector (MS).
+ """
peak_hist, step_hist = [], []
- for budget in np.linspace(free_memory // 5, free_memory, sample_points):
+ raw_graph = deepcopy(gm.graph)
+ for budget in np.linspace(free_memory // start_factor, free_memory, sample_points):
gm = metainfo_trace(gm, *data_gen()[0])
solver = CheckpointSolverRotor(gm.graph, free_memory=budget)
try:
- gm.graph = solver.solve()
- peak_memory, step_time = bench(gm,
- criterion,
- partial(data_gen, batch_size=2048, shape=(3, 224, 224)),
- num_steps=num_steps)
+ gm.graph = solver.solve(verbose=False)
+ peak_memory, step_time = bench(gm, criterion, data_gen, num_steps=num_steps)
except:
peak_memory, step_time = budget / 1024**2, float('inf')
peak_hist.append(peak_memory)
step_hist.append(step_time)
- return peak_hist, step_hist
+ gm.graph = deepcopy(raw_graph)
+ return np.linspace(free_memory // start_factor, free_memory, sample_points) / 1024**2, peak_hist, step_hist
+
+
+class GPTLMModel(nn.Module):
+ """
+ GPT Model
+ """
+
+ def __init__(self,
+ hidden_size=768,
+ num_layers=12,
+ num_attention_heads=12,
+ max_seq_len=1024,
+ vocab_size=50257,
+ checkpoint=False):
+ super().__init__()
+ self.checkpoint = checkpoint
+ self.model = GPT2LMHeadModel(
+ GPT2Config(n_embd=hidden_size,
+ n_layer=num_layers,
+ n_head=num_attention_heads,
+ n_positions=max_seq_len,
+ n_ctx=max_seq_len,
+ vocab_size=vocab_size))
+ if checkpoint:
+ self.model.gradient_checkpointing_enable()
+
+ def forward(self, input_ids, attention_mask):
+ # Only return lm_logits
+ return self.model(input_ids=input_ids, attention_mask=attention_mask, use_cache=not self.checkpoint)[0]
+
+
+class GPTLMLoss(nn.Module):
+ """
+ GPT Loss
+ """
+
+ def __init__(self):
+ super().__init__()
+ self.loss_fn = nn.CrossEntropyLoss()
+
+ def forward(self, logits, labels):
+ shift_logits = logits[..., :-1, :].contiguous()
+ shift_labels = labels[..., 1:].contiguous()
+ # Flatten the tokens
+ return self.loss_fn(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
+
+
+def gpt2_medium(checkpoint=False):
+ return GPTLMModel(hidden_size=1024, num_layers=24, num_attention_heads=16, checkpoint=checkpoint)
+
+
+def gpt2_xl(checkpoint=False):
+ return GPTLMModel(hidden_size=1600, num_layers=48, num_attention_heads=32, checkpoint=checkpoint)
+
+
+def gpt2_6b(checkpoint=False):
+ return GPTLMModel(hidden_size=4096, num_layers=30, num_attention_heads=16, checkpoint=checkpoint)
diff --git a/examples/tutorial/auto_parallel/demo_gpt2_medium.py b/examples/tutorial/auto_parallel/demo_gpt2_medium.py
new file mode 100644
index 000000000..2739a4c2e
--- /dev/null
+++ b/examples/tutorial/auto_parallel/demo_gpt2_medium.py
@@ -0,0 +1,108 @@
+import time
+from argparse import ArgumentParser
+from functools import partial
+
+import matplotlib.pyplot as plt
+import torch
+import torch.multiprocessing as mp
+import torchvision.models as tm
+from bench_utils import GPTLMLoss, bench_rotor, gpt2_medium
+
+import colossalai
+from colossalai.auto_parallel.checkpoint import CheckpointSolverRotor
+from colossalai.fx import metainfo_trace, symbolic_trace
+from colossalai.utils import free_port
+
+
+def data_gen(batch_size, seq_len, vocab_size, device='cuda:0'):
+ """
+ Generate random data for benchmarking
+ """
+ input_ids = torch.randint(0, vocab_size, (batch_size, seq_len), device=device)
+ attention_mask = torch.ones_like(input_ids, device=device)
+ return (input_ids, attention_mask), attention_mask
+
+
+def _gpt2_benchmark(rank, world_size, port, batch_size, num_steps, sample_points, free_memory, start_factor):
+ colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
+ model = gpt2_medium()
+
+ # trace and benchmark
+ data, mask = data_gen(batch_size, 1024, 50257, device='meta')[0]
+ gm = symbolic_trace(model, meta_args={'input_ids': data, 'attention_mask': mask})
+ gm = metainfo_trace(gm, data, mask)
+ budgets, peak_hist, step_hist = bench_rotor(gm,
+ GPTLMLoss(),
+ partial(data_gen, batch_size=batch_size, seq_len=1024,
+ vocab_size=50257),
+ num_steps=num_steps,
+ sample_points=sample_points,
+ free_memory=free_memory,
+ start_factor=start_factor)
+
+ # print summary
+ print("==============test summary==============")
+ for budget, peak, step in zip(budgets, peak_hist, step_hist):
+ print(f'memory budget: {budget:.3f} MB, peak memory: {peak:.3f} MB, step time: {step:.3f} MS')
+
+ # plot valid results
+ fig, axs = plt.subplots(1, 2, figsize=(16, 8))
+ valid_idx = step_hist.index(next(step for step in step_hist if step != float("inf")))
+
+ # plot peak memory vs. budget memory
+ axs[0].plot(budgets[valid_idx:], peak_hist[valid_idx:])
+ axs[0].plot([budgets[valid_idx], budgets[-1]], [budgets[valid_idx], budgets[-1]], linestyle='--')
+ axs[0].set_xlabel("Budget Memory (MB)")
+ axs[0].set_ylabel("Peak Memory (MB)")
+ axs[0].set_title("Peak Memory vs. Budget Memory")
+
+ # plot relative step time vs. budget memory
+ axs[1].plot(peak_hist[valid_idx:], [step_time / step_hist[-1] for step_time in step_hist[valid_idx:]])
+ axs[1].plot([peak_hist[valid_idx], peak_hist[-1]], [1.0, 1.0], linestyle='--')
+ axs[1].set_xlabel("Peak Memory (MB)")
+ axs[1].set_ylabel("Relative Step Time")
+ axs[1].set_title("Step Time vs. Peak Memory")
+ axs[1].set_ylim(0.8, 1.5)
+
+ # save plot
+ fig.savefig("gpt2_benchmark.png")
+
+
+def gpt2_benchmark(batch_size, num_steps, sample_points, free_memory, start_factor):
+ world_size = 1
+ run_func_module = partial(_gpt2_benchmark,
+ world_size=world_size,
+ port=free_port(),
+ batch_size=batch_size,
+ num_steps=num_steps,
+ sample_points=sample_points,
+ free_memory=free_memory,
+ start_factor=start_factor)
+ mp.spawn(run_func_module, nprocs=world_size)
+
+
+if __name__ == "__main__":
+ parser = ArgumentParser("GPT2 medium Auto Activation Benchmark")
+ parser.add_argument("--batch_size", type=int, default=8, help="batch size for benchmark, default 8")
+ parser.add_argument("--num_steps", type=int, default=5, help="number of test steps for benchmark, default 5")
+ parser.add_argument(
+ "--sample_points",
+ type=int,
+ default=15,
+ help=
+ "number of sample points for benchmark from start memory budget to maximum memory budget (free_memory), default 15"
+ )
+ parser.add_argument("--free_memory",
+ type=int,
+ default=56000,
+ help="maximum memory budget in MB for benchmark, default 56000 MB")
+ parser.add_argument(
+ "--start_factor",
+ type=int,
+ default=10,
+ help=
+ "start memory budget factor for benchmark, the start memory budget will be free_memory / start_factor, default 10"
+ )
+ args = parser.parse_args()
+
+ gpt2_benchmark(args.batch_size, args.num_steps, args.sample_points, args.free_memory * 1024**2, args.start_factor)
diff --git a/examples/tutorial/auto_parallel/demo_resnet152.py b/examples/tutorial/auto_parallel/demo_resnet152.py
new file mode 100644
index 000000000..5861371e8
--- /dev/null
+++ b/examples/tutorial/auto_parallel/demo_resnet152.py
@@ -0,0 +1,74 @@
+import time
+from argparse import ArgumentParser
+from copy import deepcopy
+from functools import partial
+
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+import torch.multiprocessing as mp
+import torchvision.models as tm
+from bench_utils import bench
+
+import colossalai
+from colossalai.auto_parallel.checkpoint import CheckpointSolverRotor
+from colossalai.fx import metainfo_trace, symbolic_trace
+from colossalai.utils import free_port
+
+
+def data_gen(batch_size, shape, device='cuda'):
+ """
+ Generate random data for benchmarking
+ """
+ data = torch.empty(batch_size, *shape, device=device)
+ label = torch.empty(batch_size, dtype=torch.long, device=device).random_(1000)
+ return (data,), label
+
+
+def _resnet152_benchmark(rank, world_size, port, num_steps):
+ """Resnet152 benchmark
+ This benchmark test the through put of Resnet152 with our activation solver given the memory budget of 95% of
+ maximum GPU memory, and with the batch size of [512, 1024, 2048]
+ """
+ colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
+ model = tm.resnet152()
+ gm = symbolic_trace(model)
+ raw_graph = deepcopy(gm.graph)
+ peak_mems, through_puts, batch_sizes = [], [], [512, 1024, 2048]
+ for batch_size in batch_sizes:
+ batch_size = int(batch_size)
+ gm = metainfo_trace(gm, torch.empty(batch_size, 3, 224, 224, device='meta'))
+ solver = CheckpointSolverRotor(gm.graph, free_memory=torch.cuda.mem_get_info()[0] * 0.95)
+ gm.graph = solver.solve()
+ peak_mem, step_time = bench(gm,
+ torch.nn.CrossEntropyLoss(),
+ partial(data_gen, batch_size=batch_size, shape=(3, 224, 224)),
+ num_steps=num_steps)
+ peak_mems.append(peak_mem)
+ through_puts.append(batch_size / step_time * 1.0e3)
+ gm.graph = deepcopy(raw_graph)
+
+ # print results
+ print("===============test summary================")
+ for batch_size, peak_mem, through_put in zip(batch_sizes, peak_mems, through_puts):
+ print(f'batch_size: {int(batch_size)}, peak memory: {peak_mem:.3f} MB, through put: {through_put:.3f} images/s')
+
+ plt.plot(batch_sizes, through_puts)
+ plt.xlabel("batch size")
+ plt.ylabel("through put (images/s)")
+ plt.title("Resnet152 benchmark")
+ plt.savefig("resnet152_benchmark.png")
+
+
+def resnet152_benchmark(num_steps):
+ world_size = 1
+ run_func_module = partial(_resnet152_benchmark, world_size=world_size, port=free_port(), num_steps=num_steps)
+ mp.spawn(run_func_module, nprocs=world_size)
+
+
+if __name__ == "__main__":
+ parser = ArgumentParser("ResNet152 Auto Activation Through Put Benchmark")
+ parser.add_argument("--num_steps", type=int, default=5, help="number of test steps for benchmark, default 5")
+ args = parser.parse_args()
+
+ resnet152_benchmark(args.num_steps)
diff --git a/examples/tutorial/auto_parallel/demo_resnet50.py b/examples/tutorial/auto_parallel/demo_resnet50.py
new file mode 100644
index 000000000..4cbd53eba
--- /dev/null
+++ b/examples/tutorial/auto_parallel/demo_resnet50.py
@@ -0,0 +1,107 @@
+import time
+from argparse import ArgumentParser
+from functools import partial
+
+import matplotlib.pyplot as plt
+import torch
+import torch.multiprocessing as mp
+import torchvision.models as tm
+from bench_utils import bench_rotor
+
+import colossalai
+from colossalai.auto_parallel.checkpoint import CheckpointSolverRotor
+from colossalai.fx import metainfo_trace, symbolic_trace
+from colossalai.utils import free_port
+
+
+def data_gen(batch_size, shape, device='cuda'):
+ """
+ Generate random data for benchmarking
+ """
+ data = torch.empty(batch_size, *shape, device=device)
+ label = torch.empty(batch_size, dtype=torch.long, device=device).random_(1000)
+ return (data,), label
+
+
+def _resnet50_benchmark(rank, world_size, port, batch_size, num_steps, sample_points, free_memory, start_factor):
+ colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
+ model = tm.resnet50()
+
+ # trace and benchmark
+ gm = symbolic_trace(model)
+ gm = metainfo_trace(gm, torch.empty(batch_size, 3, 224, 224, device='meta'))
+ budgets, peak_hist, step_hist = bench_rotor(gm,
+ torch.nn.CrossEntropyLoss(),
+ partial(data_gen, batch_size=batch_size, shape=(3, 224, 224)),
+ num_steps=num_steps,
+ sample_points=sample_points,
+ free_memory=free_memory,
+ start_factor=start_factor)
+
+ # print summary
+ print("==============test summary==============")
+ for budget, peak, step in zip(budgets, peak_hist, step_hist):
+ print(f'memory budget: {budget:.3f} MB, peak memory: {peak:.3f} MB, step time: {step:.3f} MS')
+
+ # plot valid results
+ fig, axs = plt.subplots(1, 2, figsize=(16, 8))
+ valid_idx = step_hist.index(next(step for step in step_hist if step != float("inf")))
+
+ # plot peak memory vs. budget memory
+ axs[0].plot(budgets[valid_idx:], peak_hist[valid_idx:])
+ axs[0].plot([budgets[valid_idx], budgets[-1]], [budgets[valid_idx], budgets[-1]], linestyle='--')
+ axs[0].set_xlabel("Budget Memory (MB)")
+ axs[0].set_ylabel("Peak Memory (MB)")
+ axs[0].set_title("Peak Memory vs. Budget Memory")
+
+ # plot relative step time vs. budget memory
+ axs[1].plot(peak_hist[valid_idx:], [step_time / step_hist[-1] for step_time in step_hist[valid_idx:]])
+ axs[1].plot([peak_hist[valid_idx], peak_hist[-1]], [1.0, 1.0], linestyle='--')
+ axs[1].set_xlabel("Peak Memory (MB)")
+ axs[1].set_ylabel("Relative Step Time")
+ axs[1].set_title("Step Time vs. Peak Memory")
+ axs[1].set_ylim(0.8, 1.5)
+
+ # save plot
+ fig.savefig("resnet50_benchmark.png")
+
+
+def resnet50_benchmark(batch_size, num_steps, sample_points, free_memory, start_factor):
+ world_size = 1
+ run_func_module = partial(_resnet50_benchmark,
+ world_size=world_size,
+ port=free_port(),
+ batch_size=batch_size,
+ num_steps=num_steps,
+ sample_points=sample_points,
+ free_memory=free_memory,
+ start_factor=start_factor)
+ mp.spawn(run_func_module, nprocs=world_size)
+
+
+if __name__ == "__main__":
+ parser = ArgumentParser("ResNet50 Auto Activation Benchmark")
+ parser.add_argument("--batch_size", type=int, default=128, help="batch size for benchmark, default 128")
+ parser.add_argument("--num_steps", type=int, default=5, help="number of test steps for benchmark, default 5")
+ parser.add_argument(
+ "--sample_points",
+ type=int,
+ default=15,
+ help=
+ "number of sample points for benchmark from start memory budget to maximum memory budget (free_memory), default 15"
+ )
+ parser.add_argument("--free_memory",
+ type=int,
+ default=11000,
+ help="maximum memory budget in MB for benchmark, default 11000 MB")
+ parser.add_argument(
+ "--start_factor",
+ type=int,
+ default=4,
+ help=
+ "start memory budget factor for benchmark, the start memory budget will be free_memory / start_factor, default 4"
+ )
+ args = parser.parse_args()
+
+ resnet50_benchmark(args.batch_size, args.num_steps, args.sample_points, args.free_memory * 1024**2,
+ args.start_factor)
diff --git a/examples/tutorial/auto_parallel/imgs/gpt2_benchmark.png b/examples/tutorial/auto_parallel/imgs/gpt2_benchmark.png
new file mode 100644
index 000000000..eec121758
Binary files /dev/null and b/examples/tutorial/auto_parallel/imgs/gpt2_benchmark.png differ
diff --git a/examples/tutorial/auto_parallel/imgs/resnet50_benchmark.png b/examples/tutorial/auto_parallel/imgs/resnet50_benchmark.png
new file mode 100644
index 000000000..0208c54fb
Binary files /dev/null and b/examples/tutorial/auto_parallel/imgs/resnet50_benchmark.png differ