diff --git a/colossalai/fx/passes/meta_info_prop.py b/colossalai/fx/passes/meta_info_prop.py index 711439955..5137494ad 100644 --- a/colossalai/fx/passes/meta_info_prop.py +++ b/colossalai/fx/passes/meta_info_prop.py @@ -338,7 +338,7 @@ def metainfo_trace(gm: torch.fx.GraphModule, *args, verbose: bool = False, unit: Returns: torch.fx.GraphModule: The ``GraphModule`` annotated with MetaInfo. """ - device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') + device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu') interp = MetaInfoProp(gm.to(device)) if is_compatible_with_meta(): from colossalai.fx.profiler import MetaTensor diff --git a/examples/tutorial/auto_parallel/README.md b/examples/tutorial/auto_parallel/README.md index bed488022..e93a8288b 100644 --- a/examples/tutorial/auto_parallel/README.md +++ b/examples/tutorial/auto_parallel/README.md @@ -15,3 +15,82 @@ export DATA=/path/to/data ```bash colossalai run --nproc_per_node 4 auto_parallel_demo.py ``` + +## Auto Checkpoint Benchmarking + +We prepare three demos for you to test the performance of auto checkpoint, the test `demo_resnet50.py` and `demo_gpt2_medium.py` will show you the ability of solver to search checkpoint strategy that could fit in the given budget. + +The usage of the above two test +```bash +python demo_resnet50.py --help +usage: ResNet50 Auto Activation Benchmark [-h] [--batch_size BATCH_SIZE] [--num_steps NUM_STEPS] [--sample_points SAMPLE_POINTS] [--free_memory FREE_MEMORY] + [--start_factor START_FACTOR] + +optional arguments: + -h, --help show this help message and exit + --batch_size BATCH_SIZE + batch size for benchmark, default 128 + --num_steps NUM_STEPS + number of test steps for benchmark, default 5 + --sample_points SAMPLE_POINTS + number of sample points for benchmark from start memory budget to maximum memory budget (free_memory), default 15 + --free_memory FREE_MEMORY + maximum memory budget in MB for benchmark, default 11000 MB + --start_factor START_FACTOR + start memory budget factor for benchmark, the start memory budget will be free_memory / start_factor, default 4 + +# run with default settings +python demo_resnet50.py + +python demo_gpt2_medium.py --help +usage: GPT2 medium Auto Activation Benchmark [-h] [--batch_size BATCH_SIZE] [--num_steps NUM_STEPS] [--sample_points SAMPLE_POINTS] [--free_memory FREE_MEMORY] + [--start_factor START_FACTOR] + +optional arguments: + -h, --help show this help message and exit + --batch_size BATCH_SIZE + batch size for benchmark, default 8 + --num_steps NUM_STEPS + number of test steps for benchmark, default 5 + --sample_points SAMPLE_POINTS + number of sample points for benchmark from start memory budget to maximum memory budget (free_memory), default 15 + --free_memory FREE_MEMORY + maximum memory budget in MB for benchmark, default 56000 MB + --start_factor START_FACTOR + start memory budget factor for benchmark, the start memory budget will be free_memory / start_factor, default 10 + +# run with default settings +python demo_gpt2_medium.py +``` + +There are some results for your reference + +### ResNet 50 +![](./imgs/resnet50_benchmark.png) + +### GPT2 Medium +![](./imgs/gpt2_benchmark.png) + +We also prepare the demo `demo_resnet152.py` to manifest the benefit of auto activation with large batch, the usage is listed as follows +```bash +python demo_resnet152.py --help +usage: ResNet152 Auto Activation Through Put Benchmark [-h] [--num_steps NUM_STEPS] + +optional arguments: + -h, --help show this help message and exit + --num_steps NUM_STEPS + number of test steps for benchmark, default 5 + +# run with default settings +python demo_resnet152.py +``` + +here are some results on our end for your reference +```bash +===============test summary================ +batch_size: 512, peak memory: 73314.392 MB, through put: 254.286 images/s +batch_size: 1024, peak memory: 73316.216 MB, through put: 397.608 images/s +batch_size: 2048, peak memory: 72927.837 MB, through put: 277.429 images/s +``` + +The above tests will output the test summary and a plot of the benchmarking results. diff --git a/examples/tutorial/auto_parallel/auto_ckpt_demo.ipynb b/examples/tutorial/auto_parallel/auto_ckpt_demo.ipynb deleted file mode 100644 index cacf5d5f3..000000000 --- a/examples/tutorial/auto_parallel/auto_ckpt_demo.ipynb +++ /dev/null @@ -1,878 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/lcsjy/.conda/envs/autoparallel/lib/python3.10/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - }, - { - "data": { - "text/html": [ - "
[11/10/22 18:04:14] INFO     colossalai - torch.distributed.distributed_c10d - INFO: Added key:                    \n",
-       "                             store_based_barrier_key:1 to store for rank: 0                                        \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/10/22 18:04:14]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Added key: \n", - "\u001b[2;36m \u001b[0m store_based_barrier_key:\u001b[1;36m1\u001b[0m to store for rank: \u001b[1;36m0\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     colossalai - torch.distributed.distributed_c10d - INFO: Rank 0: Completed store-based \n",
-       "                             barrier for key:store_based_barrier_key:1 with 1 nodes.                               \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Rank \u001b[1;36m0\u001b[0m: Completed store-based \n", - "\u001b[2;36m \u001b[0m barrier for key:store_based_barrier_key:\u001b[1;36m1\u001b[0m with \u001b[1;36m1\u001b[0m nodes. \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     colossalai - torch.distributed.distributed_c10d - INFO: Added key:                    \n",
-       "                             store_based_barrier_key:2 to store for rank: 0                                        \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Added key: \n", - "\u001b[2;36m \u001b[0m store_based_barrier_key:\u001b[1;36m2\u001b[0m to store for rank: \u001b[1;36m0\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     colossalai - torch.distributed.distributed_c10d - INFO: Rank 0: Completed store-based \n",
-       "                             barrier for key:store_based_barrier_key:2 with 1 nodes.                               \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Rank \u001b[1;36m0\u001b[0m: Completed store-based \n", - "\u001b[2;36m \u001b[0m barrier for key:store_based_barrier_key:\u001b[1;36m2\u001b[0m with \u001b[1;36m1\u001b[0m nodes. \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     colossalai - torch.distributed.distributed_c10d - INFO: Added key:                    \n",
-       "                             store_based_barrier_key:3 to store for rank: 0                                        \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Added key: \n", - "\u001b[2;36m \u001b[0m store_based_barrier_key:\u001b[1;36m3\u001b[0m to store for rank: \u001b[1;36m0\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     colossalai - torch.distributed.distributed_c10d - INFO: Rank 0: Completed store-based \n",
-       "                             barrier for key:store_based_barrier_key:3 with 1 nodes.                               \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Rank \u001b[1;36m0\u001b[0m: Completed store-based \n", - "\u001b[2;36m \u001b[0m barrier for key:store_based_barrier_key:\u001b[1;36m3\u001b[0m with \u001b[1;36m1\u001b[0m nodes. \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     colossalai - torch.distributed.distributed_c10d - INFO: Added key:                    \n",
-       "                             store_based_barrier_key:4 to store for rank: 0                                        \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Added key: \n", - "\u001b[2;36m \u001b[0m store_based_barrier_key:\u001b[1;36m4\u001b[0m to store for rank: \u001b[1;36m0\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     colossalai - torch.distributed.distributed_c10d - INFO: Rank 0: Completed store-based \n",
-       "                             barrier for key:store_based_barrier_key:4 with 1 nodes.                               \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Rank \u001b[1;36m0\u001b[0m: Completed store-based \n", - "\u001b[2;36m \u001b[0m barrier for key:store_based_barrier_key:\u001b[1;36m4\u001b[0m with \u001b[1;36m1\u001b[0m nodes. \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     colossalai - torch.distributed.distributed_c10d - INFO: Added key:                    \n",
-       "                             store_based_barrier_key:5 to store for rank: 0                                        \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Added key: \n", - "\u001b[2;36m \u001b[0m store_based_barrier_key:\u001b[1;36m5\u001b[0m to store for rank: \u001b[1;36m0\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     colossalai - torch.distributed.distributed_c10d - INFO: Rank 0: Completed store-based \n",
-       "                             barrier for key:store_based_barrier_key:5 with 1 nodes.                               \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Rank \u001b[1;36m0\u001b[0m: Completed store-based \n", - "\u001b[2;36m \u001b[0m barrier for key:store_based_barrier_key:\u001b[1;36m5\u001b[0m with \u001b[1;36m1\u001b[0m nodes. \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     colossalai - torch.distributed.distributed_c10d - INFO: Added key:                    \n",
-       "                             store_based_barrier_key:6 to store for rank: 0                                        \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Added key: \n", - "\u001b[2;36m \u001b[0m store_based_barrier_key:\u001b[1;36m6\u001b[0m to store for rank: \u001b[1;36m0\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     colossalai - torch.distributed.distributed_c10d - INFO: Rank 0: Completed store-based \n",
-       "                             barrier for key:store_based_barrier_key:6 with 1 nodes.                               \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Rank \u001b[1;36m0\u001b[0m: Completed store-based \n", - "\u001b[2;36m \u001b[0m barrier for key:store_based_barrier_key:\u001b[1;36m6\u001b[0m with \u001b[1;36m1\u001b[0m nodes. \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     colossalai - torch.distributed.distributed_c10d - INFO: Added key:                    \n",
-       "                             store_based_barrier_key:7 to store for rank: 0                                        \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Added key: \n", - "\u001b[2;36m \u001b[0m store_based_barrier_key:\u001b[1;36m7\u001b[0m to store for rank: \u001b[1;36m0\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     colossalai - torch.distributed.distributed_c10d - INFO: Rank 0: Completed store-based \n",
-       "                             barrier for key:store_based_barrier_key:7 with 1 nodes.                               \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Rank \u001b[1;36m0\u001b[0m: Completed store-based \n", - "\u001b[2;36m \u001b[0m barrier for key:store_based_barrier_key:\u001b[1;36m7\u001b[0m with \u001b[1;36m1\u001b[0m nodes. \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     colossalai - torch.distributed.distributed_c10d - INFO: Added key:                    \n",
-       "                             store_based_barrier_key:8 to store for rank: 0                                        \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Added key: \n", - "\u001b[2;36m \u001b[0m store_based_barrier_key:\u001b[1;36m8\u001b[0m to store for rank: \u001b[1;36m0\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     colossalai - torch.distributed.distributed_c10d - INFO: Rank 0: Completed store-based \n",
-       "                             barrier for key:store_based_barrier_key:8 with 1 nodes.                               \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - torch.distributed.distributed_c10d - INFO: Rank \u001b[1;36m0\u001b[0m: Completed store-based \n", - "\u001b[2;36m \u001b[0m barrier for key:store_based_barrier_key:\u001b[1;36m8\u001b[0m with \u001b[1;36m1\u001b[0m nodes. \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     colossalai - colossalai - INFO:                                                       \n",
-       "                             /home/lcsjy/ColossalAI/colossalai/context/parallel_context.py:521 set_device          \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - colossalai - INFO: \n", - "\u001b[2;36m \u001b[0m \u001b[35m/home/lcsjy/ColossalAI/colossalai/context/\u001b[0m\u001b[95mparallel_context.py\u001b[0m:\u001b[1;36m521\u001b[0m set_device \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     colossalai - colossalai - INFO: process rank 0 is bound to device 0                   \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - colossalai - INFO: process rank \u001b[1;36m0\u001b[0m is bound to device \u001b[1;36m0\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     colossalai - colossalai - INFO:                                                       \n",
-       "                             /home/lcsjy/ColossalAI/colossalai/context/parallel_context.py:557 set_seed            \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - colossalai - INFO: \n", - "\u001b[2;36m \u001b[0m \u001b[35m/home/lcsjy/ColossalAI/colossalai/context/\u001b[0m\u001b[95mparallel_context.py\u001b[0m:\u001b[1;36m557\u001b[0m set_seed \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     colossalai - colossalai - INFO: initialized seed on rank 0, numpy: 1024, python       \n",
-       "                             random: 1024, ParallelMode.DATA: 1024, ParallelMode.TENSOR: 1024,the default parallel \n",
-       "                             seed is ParallelMode.DATA.                                                            \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - colossalai - INFO: initialized seed on rank \u001b[1;36m0\u001b[0m, numpy: \u001b[1;36m1024\u001b[0m, python \n", - "\u001b[2;36m \u001b[0m random: \u001b[1;36m1024\u001b[0m, ParallelMode.DATA: \u001b[1;36m1024\u001b[0m, ParallelMode.TENSOR: \u001b[1;36m1024\u001b[0m,the default parallel \n", - "\u001b[2;36m \u001b[0m seed is ParallelMode.DATA. \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     colossalai - colossalai - INFO: /home/lcsjy/ColossalAI/colossalai/initialize.py:117   \n",
-       "                             launch                                                                                \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - colossalai - INFO: \u001b[35m/home/lcsjy/ColossalAI/colossalai/\u001b[0m\u001b[95minitialize.py\u001b[0m:\u001b[1;36m117\u001b[0m \n", - "\u001b[2;36m \u001b[0m launch \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     colossalai - colossalai - INFO: Distributed environment is initialized, data parallel \n",
-       "                             size: 1, pipeline parallel size: 1, tensor parallel size: 1                           \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m colossalai - colossalai - INFO: Distributed environment is initialized, data parallel \n", - "\u001b[2;36m \u001b[0m size: \u001b[1;36m1\u001b[0m, pipeline parallel size: \u001b[1;36m1\u001b[0m, tensor parallel size: \u001b[1;36m1\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import time\n", - "import torchvision.models as tm\n", - "import torch\n", - "import colossalai\n", - "from colossalai.fx import symbolic_trace, metainfo_trace\n", - "from colossalai.auto_parallel.checkpoint import CheckpointSolverRotor\n", - "from functools import partial\n", - "from colossalai.utils import free_port\n", - "\n", - "from bench_utils import bench, bench_rotor\n", - "import matplotlib.pyplot as plt\n", - "\n", - "colossalai.launch(config={}, rank=0, world_size=1, host='localhost', port=free_port(), backend='nccl')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### ResNet152 with batch size = 512 fails" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(78990.4404296875, inf)" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def data_gen(batch_size, shape, device='cuda'):\n", - " data = torch.empty(batch_size, *shape, device=device)\n", - " label = torch.empty(batch_size, dtype=torch.long, device=device).random_(1000)\n", - " return {'x': data}, label\n", - "\n", - "model = tm.resnet152()\n", - "gm = symbolic_trace(model)\n", - "gm = metainfo_trace(gm, torch.empty(512, 3, 224, 224, device='meta'))\n", - "bench(gm, torch.nn.CrossEntropyLoss(), partial(data_gen, batch_size=512, shape=(3, 224, 224)), num_steps=5)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### ResNet152 with batch size = 2048 succeeds " - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(74495.8486328125, 5634.262561798096)" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def data_gen(batch_size, shape, device='cuda'):\n", - " data = torch.empty(batch_size, *shape, device=device)\n", - " label = torch.empty(batch_size, dtype=torch.long, device=device).random_(1000)\n", - " return {'x': data}, label\n", - "\n", - "model = tm.resnet152()\n", - "gm = symbolic_trace(model)\n", - "gm = metainfo_trace(gm, torch.empty(2048, 3, 224, 224, device='meta'))\n", - "solver = CheckpointSolverRotor(gm.graph, free_memory=torch.cuda.mem_get_info(device=0)[0] * 0.95)\n", - "gm.graph = solver.solve()\n", - "bench(gm, torch.nn.CrossEntropyLoss(), partial(data_gen, batch_size=2048, shape=(3, 224, 224)), num_steps=5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Benchmarking on ResNet18" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/10/22 18:04:20] WARNING  colossalai - colossalai - WARNING:                                                    \n",
-       "                             /home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/ckpt_solver_rotor.py:82    \n",
-       "                             solve                                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/10/22 18:04:20]\u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: \n", - "\u001b[2;36m \u001b[0m \u001b[35m/home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/\u001b[0m\u001b[95mckpt_solver_rotor.py\u001b[0m:\u001b[1;36m82\u001b[0m \n", - "\u001b[2;36m \u001b[0m solve \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this     \n",
-       "                             chain from index 0 to 14 with memory 500                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n", - "\u001b[2;36m \u001b[0m chain from index \u001b[1;36m0\u001b[0m to \u001b[1;36m14\u001b[0m with memory \u001b[1;36m500\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  colossalai - colossalai - WARNING:                                                    \n",
-       "                             /home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/ckpt_solver_rotor.py:82    \n",
-       "                             solve                                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: \n", - "\u001b[2;36m \u001b[0m \u001b[35m/home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/\u001b[0m\u001b[95mckpt_solver_rotor.py\u001b[0m:\u001b[1;36m82\u001b[0m \n", - "\u001b[2;36m \u001b[0m solve \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this     \n",
-       "                             chain from index 0 to 14 with memory 500                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n", - "\u001b[2;36m \u001b[0m chain from index \u001b[1;36m0\u001b[0m to \u001b[1;36m14\u001b[0m with memory \u001b[1;36m500\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  colossalai - colossalai - WARNING:                                                    \n",
-       "                             /home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/ckpt_solver_rotor.py:82    \n",
-       "                             solve                                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: \n", - "\u001b[2;36m \u001b[0m \u001b[35m/home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/\u001b[0m\u001b[95mckpt_solver_rotor.py\u001b[0m:\u001b[1;36m82\u001b[0m \n", - "\u001b[2;36m \u001b[0m solve \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this     \n",
-       "                             chain from index 0 to 14 with memory 500                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n", - "\u001b[2;36m \u001b[0m chain from index \u001b[1;36m0\u001b[0m to \u001b[1;36m14\u001b[0m with memory \u001b[1;36m500\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  colossalai - colossalai - WARNING:                                                    \n",
-       "                             /home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/ckpt_solver_rotor.py:82    \n",
-       "                             solve                                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: \n", - "\u001b[2;36m \u001b[0m \u001b[35m/home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/\u001b[0m\u001b[95mckpt_solver_rotor.py\u001b[0m:\u001b[1;36m82\u001b[0m \n", - "\u001b[2;36m \u001b[0m solve \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this     \n",
-       "                             chain from index 0 to 14 with memory 500                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n", - "\u001b[2;36m \u001b[0m chain from index \u001b[1;36m0\u001b[0m to \u001b[1;36m14\u001b[0m with memory \u001b[1;36m500\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/10/22 18:04:21] WARNING  colossalai - colossalai - WARNING:                                                    \n",
-       "                             /home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/ckpt_solver_rotor.py:82    \n",
-       "                             solve                                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/10/22 18:04:21]\u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: \n", - "\u001b[2;36m \u001b[0m \u001b[35m/home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/\u001b[0m\u001b[95mckpt_solver_rotor.py\u001b[0m:\u001b[1;36m82\u001b[0m \n", - "\u001b[2;36m \u001b[0m solve \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this     \n",
-       "                             chain from index 0 to 14 with memory 500                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n", - "\u001b[2;36m \u001b[0m chain from index \u001b[1;36m0\u001b[0m to \u001b[1;36m14\u001b[0m with memory \u001b[1;36m500\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  colossalai - colossalai - WARNING:                                                    \n",
-       "                             /home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/ckpt_solver_rotor.py:82    \n",
-       "                             solve                                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: \n", - "\u001b[2;36m \u001b[0m \u001b[35m/home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/\u001b[0m\u001b[95mckpt_solver_rotor.py\u001b[0m:\u001b[1;36m82\u001b[0m \n", - "\u001b[2;36m \u001b[0m solve \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this     \n",
-       "                             chain from index 0 to 14 with memory 500                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n", - "\u001b[2;36m \u001b[0m chain from index \u001b[1;36m0\u001b[0m to \u001b[1;36m14\u001b[0m with memory \u001b[1;36m500\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  colossalai - colossalai - WARNING:                                                    \n",
-       "                             /home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/ckpt_solver_rotor.py:82    \n",
-       "                             solve                                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: \n", - "\u001b[2;36m \u001b[0m \u001b[35m/home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/\u001b[0m\u001b[95mckpt_solver_rotor.py\u001b[0m:\u001b[1;36m82\u001b[0m \n", - "\u001b[2;36m \u001b[0m solve \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this     \n",
-       "                             chain from index 0 to 14 with memory 500                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n", - "\u001b[2;36m \u001b[0m chain from index \u001b[1;36m0\u001b[0m to \u001b[1;36m14\u001b[0m with memory \u001b[1;36m500\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/10/22 18:04:22] WARNING  colossalai - colossalai - WARNING:                                                    \n",
-       "                             /home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/ckpt_solver_rotor.py:82    \n",
-       "                             solve                                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/10/22 18:04:22]\u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: \n", - "\u001b[2;36m \u001b[0m \u001b[35m/home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/\u001b[0m\u001b[95mckpt_solver_rotor.py\u001b[0m:\u001b[1;36m82\u001b[0m \n", - "\u001b[2;36m \u001b[0m solve \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this     \n",
-       "                             chain from index 0 to 14 with memory 500                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n", - "\u001b[2;36m \u001b[0m chain from index \u001b[1;36m0\u001b[0m to \u001b[1;36m14\u001b[0m with memory \u001b[1;36m500\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  colossalai - colossalai - WARNING:                                                    \n",
-       "                             /home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/ckpt_solver_rotor.py:82    \n",
-       "                             solve                                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: \n", - "\u001b[2;36m \u001b[0m \u001b[35m/home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/\u001b[0m\u001b[95mckpt_solver_rotor.py\u001b[0m:\u001b[1;36m82\u001b[0m \n", - "\u001b[2;36m \u001b[0m solve \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this     \n",
-       "                             chain from index 0 to 14 with memory 500                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n", - "\u001b[2;36m \u001b[0m chain from index \u001b[1;36m0\u001b[0m to \u001b[1;36m14\u001b[0m with memory \u001b[1;36m500\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/10/22 18:04:23] WARNING  colossalai - colossalai - WARNING:                                                    \n",
-       "                             /home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/ckpt_solver_rotor.py:82    \n",
-       "                             solve                                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/10/22 18:04:23]\u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: \n", - "\u001b[2;36m \u001b[0m \u001b[35m/home/lcsjy/ColossalAI/colossalai/auto_parallel/checkpoint/\u001b[0m\u001b[95mckpt_solver_rotor.py\u001b[0m:\u001b[1;36m82\u001b[0m \n", - "\u001b[2;36m \u001b[0m solve \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this     \n",
-       "                             chain from index 0 to 14 with memory 500                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m colossalai - colossalai - WARNING: Checkpoint solver failed: Can not process this \n", - "\u001b[2;36m \u001b[0m chain from index \u001b[1;36m0\u001b[0m to \u001b[1;36m14\u001b[0m with memory \u001b[1;36m500\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "def data_gen(batch_size, shape, device='cuda'):\n", - " data = torch.empty(batch_size, *shape, device=device)\n", - " label = torch.empty(batch_size, dtype=torch.long, device=device).random_(1000)\n", - " return (data, ), label\n", - "\n", - "model = tm.resnet18()\n", - "gm = symbolic_trace(model)\n", - "gm = metainfo_trace(gm, torch.empty(128, 3, 224, 224, device='meta'))\n", - "peak_hist, step_hist = bench_rotor(gm, torch.nn.CrossEntropyLoss(), partial(data_gen, batch_size=128, shape=(3, 224, 224)), num_steps=5, sample_points=20, free_memory=2700 * 1024**2)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAArEAAAKTCAYAAAAOvlAQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAvJElEQVR4nO3df5BV9X34/9eVZRfF5TaKK4Ir/kjQRUwQZSBkI0EjP0TBZYaiYxQCsbEBCST1E62xHW3aJTTNdGIaCXHdsaEqo+JWhRjZjiAO2hJQEycUcKMsVQzByK4UC9E93z8y3m+uu/zYVYQ3PB4zZ+bes+/z4+57Dnl6e/Y0l2VZFgAAkJBjDvUJAABAZ4lYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEhOyaE+gY9TW1tbvP7661FeXh65XO5Qnw4AAB+QZVm8/fbb0bdv3zjmmL1/33pURezrr78elZWVh/o0AADYjy1btsSpp566158fVRFbXl4eEX/8pfTq1esQnw0AAB/U2toalZWVhW7bm6MqYt+/haBXr14iFgDgMLa/Wz/9YRcAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELkLC2tiza2rJDfRoAHzsRC5CotrYszvzrZXHmXy8TssBRR8QCJOr3u/Z0+BrgaCBiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAktOpiD399NMjl8u1W2bOnFkYs379+pgwYULk8/koLy+P4cOHR3Nzc+Hnb7zxRlx77bXRp0+f6NmzZwwZMiQeeuih/R77Rz/6UZxxxhnRo0ePuOCCC2LVqlWdOXUAAI4gnYrYNWvWxNatWwvL8uXLIyJi8uTJERHR1NQU1dXVcc4558SKFSvixRdfjNtuuy169OhR2Me1114bGzZsiEcffTR+9atfxaRJk2LKlCnx/PPP7/W4ixcvjjlz5sStt94azz//fHz+85+PcePGFcUxAABHj1yWZVlXN54zZ048/vjjsWnTpsjlcnHVVVdF9+7d46c//eletzn++OPjrrvuimuvvbaw7sQTT4z58+fHjBkzOtxm2LBhMWTIkLjrrrsK66qqquLKK6+M2travR5r9+7dsXv37sL71tbWqKysjJaWlujVq1dnPirAYWf7zt1x4XcaIyLiF9/+YvQ+vuwQnxHAh9fa2hr5fH6/vdble2L37NkTixYtiunTp0cul4u2trZYunRpDBgwIMaMGRMVFRUxbNiwaGhoKNquuro6Fi9eHL///e+jra0tHnjggdi9e3d84Qtf2Otx1q5dG6NHjy5aP3r06Fi9evU+z7G2tjby+Xxhqays7OrHBQDgMNLliG1oaIgdO3bEtGnTIiJi27ZtsXPnzpg3b16MHTs2nnzyyaipqYlJkybFypUrC9stXrw43n333TjxxBOjrKwsvvrVr8YjjzwSZ511VofH2b59e7z33ntx8sknF60/+eST44033tjnOd5yyy3R0tJSWLZs2dLVjwsAwGGkpKsb1tXVxbhx46Jv374REdHW1hYRERMnToy5c+dGRMTgwYNj9erVsWDBghg5cmRERHz729+Ot956KxobG6N3797R0NAQkydPjlWrVsV555231+Plcrmi91mWtVv3QWVlZVFW5v+8BgBwpOlSxG7evDkaGxtjyZIlhXW9e/eOkpKSGDhwYNHYqqqqeOaZZyLij3/49cMf/jBeeumlOPfccyMi4jOf+UysWrUq/uVf/iUWLFjQ7li9e/eObt26tfvWddu2be2+nQUA4OjQpdsJ6uvro6KiIsaPH19YV1paGkOHDo0NGzYUjd24cWP0798/IiJ27dr1x4MeU3zYbt26Fb7J/aDS0tK44IILCk9CeN/y5ctjxIgRXTl9AAAS1+lvYtva2qK+vj6mTp0aJSXFm990000xZcqUuOiii2LUqFHxxBNPxGOPPRYrVqyIiIhzzjknPvnJT8ZXv/rV+N73vhcnnnhiNDQ0xPLly+Pxxx8v7OeSSy6JmpqamDVrVkREfOMb34hrr702LrzwwvjsZz8bCxcujObm5rjhhhs+xEcHACBVnY7YxsbGaG5ujunTp7f7WU1NTSxYsCBqa2tj9uzZcfbZZ8fDDz8c1dXVERHRvXv3WLZsWdx8881xxRVXxM6dO+OTn/xk3HvvvXHZZZcV9tPU1BTbt28vvJ8yZUq8+eabcccdd8TWrVtj0KBBsWzZssI3vAAAHF0+1HNiU3Ogzx0DSIHnxAJHooP+nFgAADhURCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAiTq2O7dOnwNcDQQsQCJyuU6fg1wNBCxAAAkR8QCAJAcEQsAQHJELAAAyRGxAAAkR8QCAJAcEQsAQHJELAAAyRGxAAAkR8QCAJAcEQsAQHJELECi2rKOXwMcDUQsQKLe+t89Hb4GOBqIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWIBEtWVZh68BjgYiFiBRv//fPR2+BjgaiFgAAJIjYgEASE6nIvb000+PXC7Xbpk5c2ZhzPr162PChAmRz+ejvLw8hg8fHs3NzRER8eqrr3a4fS6XiwcffHCvx3333Xfj29/+dpxxxhlx7LHHxplnnhl33HFHtLW1dfFjAwCQspLODF6zZk289957hfcvvfRSXHrppTF58uSIiGhqaorq6uqYMWNG3H777ZHP52P9+vXRo0ePiIiorKyMrVu3Fu1z4cKFMX/+/Bg3btxej/vd7343FixYEPfee2+ce+658Ytf/CK+/OUvRz6fj69//eud+QgAABwBOhWxJ510UtH7efPmxVlnnRUjR46MiIhbb701Lrvsspg/f35hzJlnnll43a1bt+jTp0/RPh555JGYMmVKHH/88Xs97rPPPhsTJ06M8ePHR8QfvxG+//774xe/+EVnTh8AgCNEl++J3bNnTyxatCimT58euVwu2traYunSpTFgwIAYM2ZMVFRUxLBhw6KhoWGv+1i7dm288MILMWPGjH0eq7q6Ov7jP/4jNm7cGBERL774YjzzzDNx2WWX7XO73bt3R2tra9ECAED6uhyxDQ0NsWPHjpg2bVpERGzbti127twZ8+bNi7Fjx8aTTz4ZNTU1MWnSpFi5cmWH+6irq4uqqqoYMWLEPo/1rW99K66++uo455xzonv37nH++efHnDlz4uqrr97ndrW1tZHP5wtLZWVllz4rAACHly5HbF1dXYwbNy769u0bEVH4I6uJEyfG3LlzY/DgwXHzzTfH5ZdfHgsWLGi3/TvvvBP33Xfffr+FjYhYvHhxLFq0KO67775Yt25d3HvvvfG9730v7r333n1ud8stt0RLS0th2bJlSxc+KQAAh5tO3RP7vs2bN0djY2MsWbKksK53795RUlISAwcOLBpbVVUVzzzzTLt9PPTQQ7Fr16647rrr9nu8m266KW6++ea46qqrIiLivPPOi82bN0dtbW1MnTp1r9uVlZVFWVnZgX4sAAAS0aVvYuvr66OioqLwh1YREaWlpTF06NDYsGFD0diNGzdG//792+2jrq4uJkyY0O6PxTqya9euOOaY4lPt1q2bR2wBR7UTepZ2+BrgaNDpb2Lb2tqivr4+pk6dGiUlxZvfdNNNMWXKlLjoooti1KhR8cQTT8Rjjz0WK1asKBr38ssvx9NPPx3Lli3r8BiXXHJJ1NTUxKxZsyIi4oorroi///u/j9NOOy3OPffceP755+P73/9+TJ8+vbOnD3DEOCaX6/A1wNGg0xHb2NgYzc3NHQZkTU1NLFiwIGpra2P27Nlx9tlnx8MPPxzV1dVF4+65557o169fjB49usNjNDU1xfbt2wvv77zzzrjtttvia1/7Wmzbti369u0bX/3qV+Nv/uZvOnv6AAAcAXJZlmWH+iQ+Lq2trZHP56OlpSV69ep1qE8H4EPZ8vtd8fn5T0VExKr/NyoqTzjuEJ8RwId3oL3W5acTAADAoSJiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5nYrY008/PXK5XLtl5syZhTHr16+PCRMmRD6fj/Ly8hg+fHg0NzdHRMSrr77a4fa5XC4efPDBfR77tddeiy996Utx4oknxnHHHReDBw+OtWvXduEjAwCQupLODF6zZk289957hfcvvfRSXHrppTF58uSIiGhqaorq6uqYMWNG3H777ZHP52P9+vXRo0ePiIiorKyMrVu3Fu1z4cKFMX/+/Bg3btxej/vWW2/F5z73uRg1alT87Gc/i4qKimhqaoo/+7M/68zpAwBwhOhUxJ500klF7+fNmxdnnXVWjBw5MiIibr311rjsssti/vz5hTFnnnlm4XW3bt2iT58+Rft45JFHYsqUKXH88cfv9bjf/e53o7KyMurr6wvrTj/99M6cOgAAR5Au3xO7Z8+eWLRoUUyfPj1yuVy0tbXF0qVLY8CAATFmzJioqKiIYcOGRUNDw173sXbt2njhhRdixowZ+zzWo48+GhdeeGFMnjw5Kioq4vzzz4+f/OQn+z3H3bt3R2tra9ECAED6uhyxDQ0NsWPHjpg2bVpERGzbti127twZ8+bNi7Fjx8aTTz4ZNTU1MWnSpFi5cmWH+6irq4uqqqoYMWLEPo/1m9/8Ju6666741Kc+FT//+c/jhhtuiNmzZ8e//uu/7nO72trayOfzhaWysrJLnxUAgMNLLsuyrCsbjhkzJkpLS+Oxxx6LiIjXX389+vXrF1dffXXcd999hXETJkyInj17xv3331+0/TvvvBOnnHJK3HbbbfHNb35zn8cqLS2NCy+8MFavXl1YN3v27FizZk08++yze91u9+7dsXv37sL71tbWqKysjJaWlujVq1enPi/A4WbL73fF5+c/FRERq/7fqKg84bhDfEYAH15ra2vk8/n99lqXvondvHlzNDY2xle+8pXCut69e0dJSUkMHDiwaGxVVVXh6QR/6qGHHopdu3bFddddt9/jnXLKKQe83z9VVlYWvXr1KloAAEhflyK2vr4+KioqYvz48YV1paWlMXTo0NiwYUPR2I0bN0b//v3b7aOuri4mTJjQ7o/FOvK5z33ugPcLAMCRr1NPJ4iIaGtri/r6+pg6dWqUlBRvftNNN8WUKVPioosuilGjRsUTTzwRjz32WKxYsaJo3MsvvxxPP/10LFu2rMNjXHLJJVFTUxOzZs2KiIi5c+fGiBEj4h/+4R/iz//8z+O//uu/YuHChbFw4cLOnj4AAEeATn8T29jYGM3NzTF9+vR2P6upqYkFCxbE/Pnz47zzzou77747Hn744aiuri4ad88990S/fv1i9OjRHR6jqakptm/fXng/dOjQeOSRR+L++++PQYMGxd/93d/FP//zP8c111zT2dMHAOAI0OU/7ErRgd4oDJACf9gFHIkO6h92AQDAoSRiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYAgOSIWAAAkiNiAQBIjogFACA5IhYgUZ/oWdrha4CjgYgFSNQxuY5fAxwNRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMkRsQAAJEfEAgCQHBELAEByRCwAAMnpVMSefvrpkcvl2i0zZ84sjFm/fn1MmDAh8vl8lJeXx/Dhw6O5uTkiIl599dUOt8/lcvHggw8e0DnU1tZGLpeLOXPmdObUAQA4gpR0ZvCaNWvivffeK7x/6aWX4tJLL43JkydHRERTU1NUV1fHjBkz4vbbb498Ph/r16+PHj16REREZWVlbN26tWifCxcujPnz58e4ceMO6PgLFy6MT3/60505bQAAjjCditiTTjqp6P28efPirLPOipEjR0ZExK233hqXXXZZzJ8/vzDmzDPPLLzu1q1b9OnTp2gfjzzySEyZMiWOP/74fR57586dcc0118RPfvKT+M53vtOZ0wYA4AjT5Xti9+zZE4sWLYrp06dHLpeLtra2WLp0aQwYMCDGjBkTFRUVMWzYsGhoaNjrPtauXRsvvPBCzJgxY7/HmzlzZowfPz6++MUvHvA57t69O1pbW4sWAADS1+WIbWhoiB07dsS0adMiImLbtm2xc+fOmDdvXowdOzaefPLJqKmpiUmTJsXKlSs73EddXV1UVVXFiBEj9nmsBx54INatWxe1tbWdOsfa2trI5/OFpbKyslPbAwBweOpyxNbV1cW4ceOib9++ERHR1tYWERETJ06MuXPnxuDBg+Pmm2+Oyy+/PBYsWNBu+3feeSfuu+++/X4Lu2XLlvj6178eixYtKtxbe6BuueWWaGlpKSxbtmzp1PYAAByeOnVP7Ps2b94cjY2NsWTJksK63r17R0lJSQwcOLBobFVVVTzzzDPt9vHQQw/Frl274rrrrtvnsdauXRvbtm2LCy64oLDuvffei6effjp++MMfxu7du6Nbt24dbltWVhZlZWWd+WgAACSgSxFbX18fFRUVMX78+MK60tLSGDp0aGzYsKFo7MaNG6N///7t9lFXVxcTJkxo98diH3TJJZfEr371q6J1X/7yl+Occ86Jb33rW3sNWAAAjlydjti2traor6+PqVOnRklJ8eY33XRTTJkyJS666KIYNWpUPPHEE/HYY4/FihUrisa9/PLL8fTTT8eyZcs6PMYll1wSNTU1MWvWrCgvL49BgwYV/bxnz55x4okntlsPAMDRodP3xDY2NkZzc3NMnz693c9qampiwYIFMX/+/DjvvPPi7rvvjocffjiqq6uLxt1zzz3Rr1+/GD16dIfHaGpqiu3bt3f21AAAOErksizLDvVJfFxaW1sjn89HS0tL9OrV61CfDsCHsmvPuzHwb34eERG/vmNMHFfapTvEAA4rB9prXX46AQAAHCoiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACS06mIPf300yOXy7VbZs6cWRizfv36mDBhQuTz+SgvL4/hw4dHc3NzRES8+uqrHW6fy+XiwQcf3Otxa2trY+jQoVFeXh4VFRVx5ZVXxoYNG7r4kQEASF2nInbNmjWxdevWwrJ8+fKIiJg8eXJERDQ1NUV1dXWcc845sWLFinjxxRfjtttuix49ekRERGVlZdH2W7dujdtvvz169uwZ48aN2+txV65cGTNnzoznnnsuli9fHu+++26MHj06/vd//7ernxsAgITlsizLurrxnDlz4vHHH49NmzZFLpeLq666Krp37x4//elPD3gf559/fgwZMiTq6uoOeJvf/e53UVFREStXroyLLrrogLdrbW2NfD4fLS0t0atXrwPeDuBwtGvPuzHwb34eERG/vmNMHFdacojPCODDO9Be6/I9sXv27IlFixbF9OnTI5fLRVtbWyxdujQGDBgQY8aMiYqKihg2bFg0NDTsdR9r166NF154IWbMmNGpY7e0tERExAknnLDPcbt3747W1taiBQCA9HU5YhsaGmLHjh0xbdq0iIjYtm1b7Ny5M+bNmxdjx46NJ598MmpqamLSpEmxcuXKDvdRV1cXVVVVMWLEiAM+bpZl8Y1vfCOqq6tj0KBB+xxbW1sb+Xy+sFRWVh7wcQAAOHx1OWLr6upi3Lhx0bdv34iIaGtri4iIiRMnxty5c2Pw4MFx8803x+WXXx4LFixot/0777wT9913X6e/hZ01a1b88pe/jPvvv3+/Y2+55ZZoaWkpLFu2bOnUsQAAODx16QaqzZs3R2NjYyxZsqSwrnfv3lFSUhIDBw4sGltVVRXPPPNMu3089NBDsWvXrrjuuusO+Lg33nhjPProo/H000/Hqaeeut/xZWVlUVZWdsD7BwAgDV2K2Pr6+qioqIjx48cX1pWWlsbQoUPbPfpq48aN0b9//3b7qKuriwkTJsRJJ5203+NlWRY33nhjPPLII7FixYo444wzunLaAAAcITodsW1tbVFfXx9Tp06NkpLizW+66aaYMmVKXHTRRTFq1Kh44okn4rHHHosVK1YUjXv55Zfj6aefjmXLlnV4jEsuuSRqampi1qxZERExc+bMuO++++Lf//3fo7y8PN54442IiMjn83Hsscd29iMAAJC4Tt8T29jYGM3NzTF9+vR2P6upqYkFCxbE/Pnz47zzzou77747Hn744aiuri4ad88990S/fv1i9OjRHR6jqakptm/fXnh/1113RUtLS3zhC1+IU045pbAsXry4s6cPAMAR4EM9JzY1nhMLHEk8JxY4Eh3058QCAMChImIBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDklh/oEAOiaY7t3i1/fMabwGuBoImIBEpXL5eK4Uv+MA0cntxMAAJAcEQsAQHJELAAAyRGxAAAkR8QCAJAcEQsAQHJELAAAyRGxAAAkR8QCAJAcEQsAQHJELAAAyRGxAAAkR8QCAJAcEQsAQHJELAAAyRGxAAAkR8QCAJCcTkXs6aefHrlcrt0yc+bMwpj169fHhAkTIp/PR3l5eQwfPjyam5sjIuLVV1/tcPtcLhcPPvjgPo/9ox/9KM4444zo0aNHXHDBBbFq1aoufFwAAI4EnYrYNWvWxNatWwvL8uXLIyJi8uTJERHR1NQU1dXVcc4558SKFSvixRdfjNtuuy169OgRERGVlZVF22/dujVuv/326NmzZ4wbN26vx128eHHMmTMnbr311nj++efj85//fIwbN64QxwAAHF1yWZZlXd14zpw58fjjj8emTZsil8vFVVddFd27d4+f/vSnB7yP888/P4YMGRJ1dXV7HTNs2LAYMmRI3HXXXYV1VVVVceWVV0Ztbe0BH6u1tTXy+Xy0tLREr169Dng7AAA+Hgfaa12+J3bPnj2xaNGimD59euRyuWhra4ulS5fGgAEDYsyYMVFRURHDhg2LhoaGve5j7dq18cILL8SMGTP2eZy1a9fG6NGji9aPHj06Vq9evc9z3L17d7S2thYtAACkr8sR29DQEDt27Ihp06ZFRMS2bdti586dMW/evBg7dmw8+eSTUVNTE5MmTYqVK1d2uI+6urqoqqqKESNG7PU427dvj/feey9OPvnkovUnn3xyvPHGG/s8x9ra2sjn84WlsrKycx8SAIDDUpcjtq6uLsaNGxd9+/aNiIi2traIiJg4cWLMnTs3Bg8eHDfffHNcfvnlsWDBgnbbv/POO3Hfffft81vYP5XL5YreZ1nWbt0H3XLLLdHS0lJYtmzZckDHAgDg8FbSlY02b94cjY2NsWTJksK63r17R0lJSQwcOLBobFVVVTzzzDPt9vHQQw/Frl274rrrrtvnsXr37h3dunVr963rtm3b2n07+0FlZWVRVla2v48DAEBiuhSx9fX1UVFREePHjy+sKy0tjaFDh8aGDRuKxm7cuDH69+/fbh91dXUxYcKEOOmkk/Z5rNLS0rjgggti+fLlUVNTU1i/fPnymDhxYqfO+/2/YXNvLADA4en9TtvvsweyTnrvvfey0047LfvWt77V7mdLlizJunfvni1cuDDbtGlTduedd2bdunXLVq1aVTRu06ZNWS6Xy372s591eIyLL744u/POOwvvH3jggax79+5ZXV1d9utf/zqbM2dO1rNnz+zVV1/t1Llv2bIliwiLxWKxWCwWy2G+bNmyZZ9d1+lvYhsbG6O5uTmmT5/e7mc1NTWxYMGCqK2tjdmzZ8fZZ58dDz/8cFRXVxeNu+eee6Jfv37tnjjwvqampti+fXvh/ZQpU+LNN9+MO+64I7Zu3RqDBg2KZcuWdfgN77707ds3tmzZEuXl5fu9nzYlra2tUVlZGVu2bPHosMOUOUqDeTr8maM0mKfD3+E8R1mWxdtvv134u6u9+VDPieXw4Pm3hz9zlAbzdPgzR2kwT4e/I2GOuvx0AgAAOFRELAAAyRGxR4CysrL427/9W48TO4yZozSYp8OfOUqDeTr8HQlz5J5YAACS45tYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IPgdra2hg6dGiUl5dHRUVFXHnllbFhw4aiMdOmTYtcLle0DB8+vGjMF77whXZjrrrqqqIxb731Vlx77bWRz+cjn8/HtddeGzt27Cga09zcHFdccUX07NkzevfuHbNnz449e/YclM+eigOZo4iI9evXx4QJEyKfz0d5eXkMHz48mpubCz/fvXt33HjjjdG7d+/o2bNnTJgwIf7nf/6naB/mqOs+qnlyLR08BzJHH/zdv7/84z/+Y2GMa+ng+qjmybV08BzIHO3cuTNmzZoVp556ahx77LFRVVUVd911V9GYI+payvjYjRkzJquvr89eeuml7IUXXsjGjx+fnXbaadnOnTsLY6ZOnZqNHTs227p1a2F58803i/YzcuTI7Prrry8as2PHjqIxY8eOzQYNGpStXr06W716dTZo0KDs8ssvL/z83XffzQYNGpSNGjUqW7duXbZ8+fKsb9++2axZsw7uL+EwdyBz9PLLL2cnnHBCdtNNN2Xr1q3Lmpqasscffzz77W9/Wxhzww03ZP369cuWL1+erVu3Lhs1alT2mc98Jnv33XcLY8xR131U8+RaOngOZI7+9Pe+devW7J577slyuVzW1NRUGONaOrg+qnlyLR08BzJHX/nKV7Kzzjore+qpp7JXXnkl+/GPf5x169Yta2hoKIw5kq4lEXsY2LZtWxYR2cqVKwvrpk6dmk2cOHGf240cOTL7+te/vtef//rXv84iInvuuecK65599tksIrL//u//zrIsy5YtW5Ydc8wx2WuvvVYYc//992dlZWVZS0tL1z7QEaijOZoyZUr2pS99aa/b7NixI+vevXv2wAMPFNa99tpr2THHHJM98cQTWZaZo49aV+Ypy1xLH6eO5uiDJk6cmF188cWF966lj19X5inLXEsfp47m6Nxzz83uuOOOonFDhgzJvv3tb2dZduRdS24nOAy0tLRERMQJJ5xQtH7FihVRUVERAwYMiOuvvz62bdvWbtt/+7d/i969e8e5554bf/VXfxVvv/124WfPPvts5PP5GDZsWGHd8OHDI5/Px+rVqwtjBg0aFH379i2MGTNmTOzevTvWrl37kX7OlH1wjtra2mLp0qUxYMCAGDNmTFRUVMSwYcOioaGhsM3atWvjD3/4Q4wePbqwrm/fvjFo0KCi3785+uh0ZZ7e51r6eOzt37v3/fa3v42lS5fGjBkzCutcSx+/rszT+1xLH4+O5qi6ujoeffTReO211yLLsnjqqadi48aNMWbMmIg48q6lko/tSHQoy7L4xje+EdXV1TFo0KDC+nHjxsXkyZOjf//+8corr8Rtt90WF198caxdu7bw/yLummuuiTPOOCP69OkTL730Utxyyy3x4osvxvLlyyMi4o033oiKiop2x6yoqIg33nijMObkk08u+vknPvGJKC0tLYw52nU0R9u2bYudO3fGvHnz4jvf+U5897vfjSeeeCImTZoUTz31VIwcOTLeeOONKC0tjU984hNF+zv55JOLfv/m6KPR1XmKcC19XPb2792fuvfee6O8vDwmTZpUWOda+nh1dZ4iXEsfl73N0Q9+8IO4/vrr49RTT42SkpI45phj4u67747q6uqIOPKuJRF7iM2aNSt++ctfxjPPPFO0fsqUKYXXgwYNigsvvDD69+8fS5cuLfyjcf311xeN+dSnPhUXXnhhrFu3LoYMGRIRf7wR/4OyLCtafyBjjmYdzVFbW1tEREycODHmzp0bERGDBw+O1atXx4IFCwpx1JGu/P7N0f59mHlyLX089vbv3Z+655574pprrokePXrsd3+upYPjw8yTa+njsbc5+sEPfhDPPfdcPProo9G/f/94+umn42tf+1qccsop8cUvfnGv+0v1WnI7wSF04403xqOPPhpPPfVUnHrqqfsce8opp0T//v1j06ZNex0zZMiQ6N69e2FMnz594re//W27cb/73e8K/wXVp0+fdv/V9NZbb8Uf/vCHdv+VdTTa2xz17t07SkpKYuDAgUXjq6qqCn/13qdPn9izZ0+89dZbRWO2bdtW9Ps3Rx/eh5mnjriWPnoH8u/dqlWrYsOGDfGVr3ylaL1r6ePzYeapI66lj97e5uidd96Jv/7rv47vf//7ccUVV8SnP/3pmDVrVkyZMiW+973vRcQReC19bHffUtDW1pbNnDkz69u3b7Zx48YD2mb79u1ZWVlZdu+99+51zK9+9auim7zfvzn7P//zPwtjnnvuuQ5vzn799dcLYx544IGj/gb6A5mjz372s+3+YOjKK6/Mrr766izL/v8b6BcvXlz4+euvv97hDfTmqGs+innqiGvpo9OZf++mTp2aXXDBBe3Wu5YOvo9injriWvro7G+OWlpasojIli1bVrT+L/7iL7JLL700y7Ij71oSsYfAX/7lX2b5fD5bsWJF0WNIdu3alWVZlr399tvZN7/5zWz16tXZK6+8kj311FPZZz/72axfv35Za2trlmV/fGzQ7bffnq1ZsyZ75ZVXsqVLl2bnnHNOdv7557d7TManP/3p7Nlnn82effbZ7LzzzuvwMRmXXHJJtm7duqyxsTE79dRTj/pHmexvjrIsy5YsWZJ17949W7hwYbZp06bszjvvzLp165atWrWqMOaGG27ITj311KyxsTFbt25ddvHFF3f4KBNz1DUfxTy5lg6uA5mjLPvj/wAfd9xx2V133dXhflxLB9dHMU+upYPrQOZo5MiR2bnnnps99dRT2W9+85usvr4+69GjR/ajH/2oMOZIupZE7CEQER0u9fX1WZZl2a5du7LRo0dnJ510Uta9e/fstNNOy6ZOnZo1NzcX9tHc3JxddNFF2QknnJCVlpZmZ511VjZ79ux2z5J98803s2uuuSYrLy/PysvLs2uuuSZ76623isZs3rw5Gz9+fHbsscdmJ5xwQjZr1qzs//7v/w72r+Gwtr85el9dXV32yU9+MuvRo0f2mc98puhZfFmWZe+88042a9as7IQTTsiOPfbY7PLLLy+axywzRx/GRzFPrqWD60Dn6Mc//nF27LHHtnum6PtcSwfXRzFPrqWD60DmaOvWrdm0adOyvn37Zj169MjOPvvs7J/+6Z+ytra2wpgj6VrKZVmWfbQ3KAAAwMHlD7sAAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5IhYAACSI2IBAEiOiAUAIDkiFgCA5Px/+stDv7Sfnq4AAAAASUVORK5CYII=", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plt.figure(figsize=(8, 8))\n", - "plt.plot(peak_hist, step_hist)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[540.0,\n", - " 653.6842105263158,\n", - " 767.3684210526316,\n", - " 881.0526315789474,\n", - " 994.7368421052631,\n", - " 1108.421052631579,\n", - " 1222.1052631578948,\n", - " 1335.7894736842104,\n", - " 1449.4736842105262,\n", - " 1563.157894736842,\n", - " 26711.86572265625,\n", - " 26711.86572265625,\n", - " 26711.86572265625,\n", - " 26711.86572265625,\n", - " 26711.86572265625,\n", - " 26711.86572265625,\n", - " 26711.86572265625,\n", - " 26711.86572265625,\n", - " 26711.86572265625,\n", - " 26711.86572265625]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "peak_hist" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.10.6 ('autoparallel': conda)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.6" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "cc0ad6865167fb9a52c12f0fd0c8203c9a7690797bfee612a871d56b9d2024ce" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/examples/tutorial/auto_parallel/bench_utils.py b/examples/tutorial/auto_parallel/bench_utils.py index 365e07e21..d9d656b85 100644 --- a/examples/tutorial/auto_parallel/bench_utils.py +++ b/examples/tutorial/auto_parallel/bench_utils.py @@ -1,16 +1,33 @@ import time +from copy import deepcopy from functools import partial from typing import Callable, Tuple import numpy as np import torch +import torch.nn as nn import torchvision.models as tm +from transformers import GPT2Config, GPT2LMHeadModel from colossalai.auto_parallel.checkpoint import CheckpointSolverRotor from colossalai.fx import metainfo_trace -def bench(gm: torch.fx.GraphModule, criterion: torch.nn.Module, data_gen: Callable, num_steps: int = 5): +def bench(gm: torch.fx.GraphModule, + criterion: torch.nn.Module, + data_gen: Callable, + num_steps: int = 5) -> Tuple[int, int]: + """Benchmarking a given graph module + + Args: + gm (torch.fx.GraphModule): The graph module to benchmark. + criterion (torch.nn.Module): Loss function. + data_gen (Callable): Data generator. + num_steps (int, optional): Number of test steps. Defaults to 5. + + Returns: + Tuple[int, int]: peak memory in MB and step time in MS. + """ gm.train() gm.cuda() step_time = float('inf') @@ -39,7 +56,8 @@ def bench(gm: torch.fx.GraphModule, criterion: torch.nn.Module, data_gen: Callab del args, label, output, loss gm.to("cpu") torch.cuda.empty_cache() - return (torch.cuda.max_memory_allocated(device="cuda") - cached) / 1024**2, step_time * 1.0e3 + peak_mem = (torch.cuda.max_memory_allocated(device="cuda") - cached) / 1024**2 + return peak_mem, step_time * 1.0e3 def bench_rotor(gm: torch.fx.GraphModule, @@ -47,19 +65,92 @@ def bench_rotor(gm: torch.fx.GraphModule, data_gen: Callable, num_steps: int = 5, sample_points: int = 20, - free_memory: int = torch.cuda.mem_get_info()[0]): + free_memory: int = torch.cuda.mem_get_info()[0], + start_factor: int = 4) -> Tuple[np.array, list, list]: + """Auto Checkpoint Rotor Algorithm benchmarking + Benchmarks the Auto Checkpoint Rotor Algorithm for a given graph module and data. + + Args: + gm (torch.fx.GraphModule): The graph module to benchmark. + criterion (torch.nn.Module): Loss function. + data_gen (Callable): Data generator. + num_steps (int, optional): Number of test steps. Defaults to 5. + sample_points (int, optional): Number of sample points. Defaults to 20. + free_memory (int, optional): Max memory budget in Byte. Defaults to torch.cuda.mem_get_info()[0]. + start_factor (int, optional): Start memory budget factor for benchmark, the start memory budget + will be free_memory / start_factor. Defaults to 4. + + Returns: + Tuple[np.array, list, list]: return budgets vector (MB), peak memory vector (MB), step time vector (MS). + """ peak_hist, step_hist = [], [] - for budget in np.linspace(free_memory // 5, free_memory, sample_points): + raw_graph = deepcopy(gm.graph) + for budget in np.linspace(free_memory // start_factor, free_memory, sample_points): gm = metainfo_trace(gm, *data_gen()[0]) solver = CheckpointSolverRotor(gm.graph, free_memory=budget) try: - gm.graph = solver.solve() - peak_memory, step_time = bench(gm, - criterion, - partial(data_gen, batch_size=2048, shape=(3, 224, 224)), - num_steps=num_steps) + gm.graph = solver.solve(verbose=False) + peak_memory, step_time = bench(gm, criterion, data_gen, num_steps=num_steps) except: peak_memory, step_time = budget / 1024**2, float('inf') peak_hist.append(peak_memory) step_hist.append(step_time) - return peak_hist, step_hist + gm.graph = deepcopy(raw_graph) + return np.linspace(free_memory // start_factor, free_memory, sample_points) / 1024**2, peak_hist, step_hist + + +class GPTLMModel(nn.Module): + """ + GPT Model + """ + + def __init__(self, + hidden_size=768, + num_layers=12, + num_attention_heads=12, + max_seq_len=1024, + vocab_size=50257, + checkpoint=False): + super().__init__() + self.checkpoint = checkpoint + self.model = GPT2LMHeadModel( + GPT2Config(n_embd=hidden_size, + n_layer=num_layers, + n_head=num_attention_heads, + n_positions=max_seq_len, + n_ctx=max_seq_len, + vocab_size=vocab_size)) + if checkpoint: + self.model.gradient_checkpointing_enable() + + def forward(self, input_ids, attention_mask): + # Only return lm_logits + return self.model(input_ids=input_ids, attention_mask=attention_mask, use_cache=not self.checkpoint)[0] + + +class GPTLMLoss(nn.Module): + """ + GPT Loss + """ + + def __init__(self): + super().__init__() + self.loss_fn = nn.CrossEntropyLoss() + + def forward(self, logits, labels): + shift_logits = logits[..., :-1, :].contiguous() + shift_labels = labels[..., 1:].contiguous() + # Flatten the tokens + return self.loss_fn(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1)) + + +def gpt2_medium(checkpoint=False): + return GPTLMModel(hidden_size=1024, num_layers=24, num_attention_heads=16, checkpoint=checkpoint) + + +def gpt2_xl(checkpoint=False): + return GPTLMModel(hidden_size=1600, num_layers=48, num_attention_heads=32, checkpoint=checkpoint) + + +def gpt2_6b(checkpoint=False): + return GPTLMModel(hidden_size=4096, num_layers=30, num_attention_heads=16, checkpoint=checkpoint) diff --git a/examples/tutorial/auto_parallel/demo_gpt2_medium.py b/examples/tutorial/auto_parallel/demo_gpt2_medium.py new file mode 100644 index 000000000..2739a4c2e --- /dev/null +++ b/examples/tutorial/auto_parallel/demo_gpt2_medium.py @@ -0,0 +1,108 @@ +import time +from argparse import ArgumentParser +from functools import partial + +import matplotlib.pyplot as plt +import torch +import torch.multiprocessing as mp +import torchvision.models as tm +from bench_utils import GPTLMLoss, bench_rotor, gpt2_medium + +import colossalai +from colossalai.auto_parallel.checkpoint import CheckpointSolverRotor +from colossalai.fx import metainfo_trace, symbolic_trace +from colossalai.utils import free_port + + +def data_gen(batch_size, seq_len, vocab_size, device='cuda:0'): + """ + Generate random data for benchmarking + """ + input_ids = torch.randint(0, vocab_size, (batch_size, seq_len), device=device) + attention_mask = torch.ones_like(input_ids, device=device) + return (input_ids, attention_mask), attention_mask + + +def _gpt2_benchmark(rank, world_size, port, batch_size, num_steps, sample_points, free_memory, start_factor): + colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl') + model = gpt2_medium() + + # trace and benchmark + data, mask = data_gen(batch_size, 1024, 50257, device='meta')[0] + gm = symbolic_trace(model, meta_args={'input_ids': data, 'attention_mask': mask}) + gm = metainfo_trace(gm, data, mask) + budgets, peak_hist, step_hist = bench_rotor(gm, + GPTLMLoss(), + partial(data_gen, batch_size=batch_size, seq_len=1024, + vocab_size=50257), + num_steps=num_steps, + sample_points=sample_points, + free_memory=free_memory, + start_factor=start_factor) + + # print summary + print("==============test summary==============") + for budget, peak, step in zip(budgets, peak_hist, step_hist): + print(f'memory budget: {budget:.3f} MB, peak memory: {peak:.3f} MB, step time: {step:.3f} MS') + + # plot valid results + fig, axs = plt.subplots(1, 2, figsize=(16, 8)) + valid_idx = step_hist.index(next(step for step in step_hist if step != float("inf"))) + + # plot peak memory vs. budget memory + axs[0].plot(budgets[valid_idx:], peak_hist[valid_idx:]) + axs[0].plot([budgets[valid_idx], budgets[-1]], [budgets[valid_idx], budgets[-1]], linestyle='--') + axs[0].set_xlabel("Budget Memory (MB)") + axs[0].set_ylabel("Peak Memory (MB)") + axs[0].set_title("Peak Memory vs. Budget Memory") + + # plot relative step time vs. budget memory + axs[1].plot(peak_hist[valid_idx:], [step_time / step_hist[-1] for step_time in step_hist[valid_idx:]]) + axs[1].plot([peak_hist[valid_idx], peak_hist[-1]], [1.0, 1.0], linestyle='--') + axs[1].set_xlabel("Peak Memory (MB)") + axs[1].set_ylabel("Relative Step Time") + axs[1].set_title("Step Time vs. Peak Memory") + axs[1].set_ylim(0.8, 1.5) + + # save plot + fig.savefig("gpt2_benchmark.png") + + +def gpt2_benchmark(batch_size, num_steps, sample_points, free_memory, start_factor): + world_size = 1 + run_func_module = partial(_gpt2_benchmark, + world_size=world_size, + port=free_port(), + batch_size=batch_size, + num_steps=num_steps, + sample_points=sample_points, + free_memory=free_memory, + start_factor=start_factor) + mp.spawn(run_func_module, nprocs=world_size) + + +if __name__ == "__main__": + parser = ArgumentParser("GPT2 medium Auto Activation Benchmark") + parser.add_argument("--batch_size", type=int, default=8, help="batch size for benchmark, default 8") + parser.add_argument("--num_steps", type=int, default=5, help="number of test steps for benchmark, default 5") + parser.add_argument( + "--sample_points", + type=int, + default=15, + help= + "number of sample points for benchmark from start memory budget to maximum memory budget (free_memory), default 15" + ) + parser.add_argument("--free_memory", + type=int, + default=56000, + help="maximum memory budget in MB for benchmark, default 56000 MB") + parser.add_argument( + "--start_factor", + type=int, + default=10, + help= + "start memory budget factor for benchmark, the start memory budget will be free_memory / start_factor, default 10" + ) + args = parser.parse_args() + + gpt2_benchmark(args.batch_size, args.num_steps, args.sample_points, args.free_memory * 1024**2, args.start_factor) diff --git a/examples/tutorial/auto_parallel/demo_resnet152.py b/examples/tutorial/auto_parallel/demo_resnet152.py new file mode 100644 index 000000000..5861371e8 --- /dev/null +++ b/examples/tutorial/auto_parallel/demo_resnet152.py @@ -0,0 +1,74 @@ +import time +from argparse import ArgumentParser +from copy import deepcopy +from functools import partial + +import matplotlib.pyplot as plt +import numpy as np +import torch +import torch.multiprocessing as mp +import torchvision.models as tm +from bench_utils import bench + +import colossalai +from colossalai.auto_parallel.checkpoint import CheckpointSolverRotor +from colossalai.fx import metainfo_trace, symbolic_trace +from colossalai.utils import free_port + + +def data_gen(batch_size, shape, device='cuda'): + """ + Generate random data for benchmarking + """ + data = torch.empty(batch_size, *shape, device=device) + label = torch.empty(batch_size, dtype=torch.long, device=device).random_(1000) + return (data,), label + + +def _resnet152_benchmark(rank, world_size, port, num_steps): + """Resnet152 benchmark + This benchmark test the through put of Resnet152 with our activation solver given the memory budget of 95% of + maximum GPU memory, and with the batch size of [512, 1024, 2048] + """ + colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl') + model = tm.resnet152() + gm = symbolic_trace(model) + raw_graph = deepcopy(gm.graph) + peak_mems, through_puts, batch_sizes = [], [], [512, 1024, 2048] + for batch_size in batch_sizes: + batch_size = int(batch_size) + gm = metainfo_trace(gm, torch.empty(batch_size, 3, 224, 224, device='meta')) + solver = CheckpointSolverRotor(gm.graph, free_memory=torch.cuda.mem_get_info()[0] * 0.95) + gm.graph = solver.solve() + peak_mem, step_time = bench(gm, + torch.nn.CrossEntropyLoss(), + partial(data_gen, batch_size=batch_size, shape=(3, 224, 224)), + num_steps=num_steps) + peak_mems.append(peak_mem) + through_puts.append(batch_size / step_time * 1.0e3) + gm.graph = deepcopy(raw_graph) + + # print results + print("===============test summary================") + for batch_size, peak_mem, through_put in zip(batch_sizes, peak_mems, through_puts): + print(f'batch_size: {int(batch_size)}, peak memory: {peak_mem:.3f} MB, through put: {through_put:.3f} images/s') + + plt.plot(batch_sizes, through_puts) + plt.xlabel("batch size") + plt.ylabel("through put (images/s)") + plt.title("Resnet152 benchmark") + plt.savefig("resnet152_benchmark.png") + + +def resnet152_benchmark(num_steps): + world_size = 1 + run_func_module = partial(_resnet152_benchmark, world_size=world_size, port=free_port(), num_steps=num_steps) + mp.spawn(run_func_module, nprocs=world_size) + + +if __name__ == "__main__": + parser = ArgumentParser("ResNet152 Auto Activation Through Put Benchmark") + parser.add_argument("--num_steps", type=int, default=5, help="number of test steps for benchmark, default 5") + args = parser.parse_args() + + resnet152_benchmark(args.num_steps) diff --git a/examples/tutorial/auto_parallel/demo_resnet50.py b/examples/tutorial/auto_parallel/demo_resnet50.py new file mode 100644 index 000000000..4cbd53eba --- /dev/null +++ b/examples/tutorial/auto_parallel/demo_resnet50.py @@ -0,0 +1,107 @@ +import time +from argparse import ArgumentParser +from functools import partial + +import matplotlib.pyplot as plt +import torch +import torch.multiprocessing as mp +import torchvision.models as tm +from bench_utils import bench_rotor + +import colossalai +from colossalai.auto_parallel.checkpoint import CheckpointSolverRotor +from colossalai.fx import metainfo_trace, symbolic_trace +from colossalai.utils import free_port + + +def data_gen(batch_size, shape, device='cuda'): + """ + Generate random data for benchmarking + """ + data = torch.empty(batch_size, *shape, device=device) + label = torch.empty(batch_size, dtype=torch.long, device=device).random_(1000) + return (data,), label + + +def _resnet50_benchmark(rank, world_size, port, batch_size, num_steps, sample_points, free_memory, start_factor): + colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl') + model = tm.resnet50() + + # trace and benchmark + gm = symbolic_trace(model) + gm = metainfo_trace(gm, torch.empty(batch_size, 3, 224, 224, device='meta')) + budgets, peak_hist, step_hist = bench_rotor(gm, + torch.nn.CrossEntropyLoss(), + partial(data_gen, batch_size=batch_size, shape=(3, 224, 224)), + num_steps=num_steps, + sample_points=sample_points, + free_memory=free_memory, + start_factor=start_factor) + + # print summary + print("==============test summary==============") + for budget, peak, step in zip(budgets, peak_hist, step_hist): + print(f'memory budget: {budget:.3f} MB, peak memory: {peak:.3f} MB, step time: {step:.3f} MS') + + # plot valid results + fig, axs = plt.subplots(1, 2, figsize=(16, 8)) + valid_idx = step_hist.index(next(step for step in step_hist if step != float("inf"))) + + # plot peak memory vs. budget memory + axs[0].plot(budgets[valid_idx:], peak_hist[valid_idx:]) + axs[0].plot([budgets[valid_idx], budgets[-1]], [budgets[valid_idx], budgets[-1]], linestyle='--') + axs[0].set_xlabel("Budget Memory (MB)") + axs[0].set_ylabel("Peak Memory (MB)") + axs[0].set_title("Peak Memory vs. Budget Memory") + + # plot relative step time vs. budget memory + axs[1].plot(peak_hist[valid_idx:], [step_time / step_hist[-1] for step_time in step_hist[valid_idx:]]) + axs[1].plot([peak_hist[valid_idx], peak_hist[-1]], [1.0, 1.0], linestyle='--') + axs[1].set_xlabel("Peak Memory (MB)") + axs[1].set_ylabel("Relative Step Time") + axs[1].set_title("Step Time vs. Peak Memory") + axs[1].set_ylim(0.8, 1.5) + + # save plot + fig.savefig("resnet50_benchmark.png") + + +def resnet50_benchmark(batch_size, num_steps, sample_points, free_memory, start_factor): + world_size = 1 + run_func_module = partial(_resnet50_benchmark, + world_size=world_size, + port=free_port(), + batch_size=batch_size, + num_steps=num_steps, + sample_points=sample_points, + free_memory=free_memory, + start_factor=start_factor) + mp.spawn(run_func_module, nprocs=world_size) + + +if __name__ == "__main__": + parser = ArgumentParser("ResNet50 Auto Activation Benchmark") + parser.add_argument("--batch_size", type=int, default=128, help="batch size for benchmark, default 128") + parser.add_argument("--num_steps", type=int, default=5, help="number of test steps for benchmark, default 5") + parser.add_argument( + "--sample_points", + type=int, + default=15, + help= + "number of sample points for benchmark from start memory budget to maximum memory budget (free_memory), default 15" + ) + parser.add_argument("--free_memory", + type=int, + default=11000, + help="maximum memory budget in MB for benchmark, default 11000 MB") + parser.add_argument( + "--start_factor", + type=int, + default=4, + help= + "start memory budget factor for benchmark, the start memory budget will be free_memory / start_factor, default 4" + ) + args = parser.parse_args() + + resnet50_benchmark(args.batch_size, args.num_steps, args.sample_points, args.free_memory * 1024**2, + args.start_factor) diff --git a/examples/tutorial/auto_parallel/imgs/gpt2_benchmark.png b/examples/tutorial/auto_parallel/imgs/gpt2_benchmark.png new file mode 100644 index 000000000..eec121758 Binary files /dev/null and b/examples/tutorial/auto_parallel/imgs/gpt2_benchmark.png differ diff --git a/examples/tutorial/auto_parallel/imgs/resnet50_benchmark.png b/examples/tutorial/auto_parallel/imgs/resnet50_benchmark.png new file mode 100644 index 000000000..0208c54fb Binary files /dev/null and b/examples/tutorial/auto_parallel/imgs/resnet50_benchmark.png differ