diff --git a/colossalai/zero/gemini/chunk/manager.py b/colossalai/zero/gemini/chunk/manager.py index 341790a72..c7bdd5e1f 100644 --- a/colossalai/zero/gemini/chunk/manager.py +++ b/colossalai/zero/gemini/chunk/manager.py @@ -83,7 +83,7 @@ class ChunkManager: if chunk_group: # the chunk group is not empty # close the last chunk - self.__close_one_chunk(chunk_group[-1]) # chunk[-1] 满了,所以关闭,不能再添加,然后同时scatter到ZeRO PG中 + self.__close_one_chunk(chunk_group[-1]) if tensor.numel() > chunk_size: chunk_size = tensor.numel() diff --git a/colossalai/zero/gemini/gemini_hook.py b/colossalai/zero/gemini/gemini_hook.py index e691b423b..450cb3ad6 100644 --- a/colossalai/zero/gemini/gemini_hook.py +++ b/colossalai/zero/gemini/gemini_hook.py @@ -33,7 +33,7 @@ class GeminiZeROHook(ColoParamOpHook): all_chunks = self._chunk_manager.get_chunks(params) # wait for prefetched chunks, filter those are not prefetched - chunks_fetch_sync = self._gemini_manager.wait_chunks(all_chunks) # 当前要fetch的chunk + chunks_fetch_sync = self._gemini_manager.wait_chunks(all_chunks) # transfer state for p in params: diff --git a/colossalai/zero/gemini/gemini_mgr.py b/colossalai/zero/gemini/gemini_mgr.py index 85beafd32..11bde789c 100644 --- a/colossalai/zero/gemini/gemini_mgr.py +++ b/colossalai/zero/gemini/gemini_mgr.py @@ -125,7 +125,7 @@ class GeminiManager: self._async_works[chunk].wait() del self._async_works[chunk] else: - non_prefetched_chunks.append(chunk) # 没在之前prefetch过,现在要prefetch的chunk + non_prefetched_chunks.append(chunk) return tuple(non_prefetched_chunks) def add_work(self, chunk: Chunk, work: dist.Work): diff --git a/colossalai/zero/gemini/placement_policy.py b/colossalai/zero/gemini/placement_policy.py index 9e9fb1f58..cfbf16d1b 100644 --- a/colossalai/zero/gemini/placement_policy.py +++ b/colossalai/zero/gemini/placement_policy.py @@ -113,10 +113,8 @@ class StaticPlacementPolicy(PlacementPolicy): def get_prefetch_chunks(self) -> List[Chunk]: if self.gemini_manager.is_warmup(): # no prefetch during warmup since we need compute_list return [] - # 最多有多少个异步的work can_prefetch = self.max_prefetch - len(self.gemini_manager._async_works) prefetch = [] - # static炸就炸了,dynamic可能需要我们要先分析当前运行时的内存情况,分配空间或者淘汰块 for i in range(self.gemini_manager.compute_idx + 1, len(self.gemini_manager.compute_list)): for chunk in self.gemini_manager.compute_list[i]: if len(prefetch) >= can_prefetch: diff --git a/examples/language/gpt/gemini/demo.ipynb b/examples/language/gpt/gemini/demo.ipynb deleted file mode 100644 index 09953b3a9..000000000 --- a/examples/language/gpt/gemini/demo.ipynb +++ /dev/null @@ -1,142 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "import torch.nn as nn" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Linear(in_features=10, out_features=5, bias=False) 50\n", - "Linear(in_features=5, out_features=10, bias=False) 50\n", - "Linear(in_features=10, out_features=10, bias=False) 100\n" - ] - } - ], - "source": [ - "class Toy(nn.Module):\n", - " \n", - " def __init__(self):\n", - " super(Toy, self).__init__()\n", - " self.fc1 = nn.Linear(10,5, bias=False)\n", - " self.m3 = nn.Sequential(nn.Linear(5, 10, bias=False), nn.Linear(10,10, bias=False))\n", - "\n", - "t = Toy()\n", - "for mod in t.modules():\n", - " for p in mod.parameters(recurse=False):\n", - " print(mod, p.numel())" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "torch.Size([5, 10]) 50\n", - "torch.Size([10, 5]) 50\n", - "torch.Size([10, 10]) 100\n" - ] - } - ], - "source": [ - "for p in t.parameters():\n", - " print(p.shape, p.numel())" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'224'" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "conf_str = torch.__config__.parallel_info()\n", - "inter_str = conf_str.split(\"hardware_concurrency() : \")[1]\n", - "max_concurrency = inter_str.split(\"\\n\")[0]\n", - "max_concurrency" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0 0\n", - "0 1\n", - "0 2\n", - "1 0\n", - "1 1\n", - "1 2\n" - ] - } - ], - "source": [ - "for i in range(3):\n", - " for j in range(3):\n", - " print(i, j)\n", - " if i == 1 and j == 2:break\n", - " else:\n", - " continue\n", - " break" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "colossalai-py310", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.14" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/examples/language/gpt/gemini/train_gpt_demo.py b/examples/language/gpt/gemini/train_gpt_demo.py index 667a0c77a..6db74231a 100644 --- a/examples/language/gpt/gemini/train_gpt_demo.py +++ b/examples/language/gpt/gemini/train_gpt_demo.py @@ -66,18 +66,18 @@ class GPTLMLoss(nn.Module): def get_cpu_mem(): - return psutil.Process().memory_info().rss / 1024**2 # 返回值是B,转换成MB + return psutil.Process().memory_info().rss / 1024**2 def get_gpu_mem(): - return torch.cuda.memory_allocated() / 1024**2 # 转换成MB + return torch.cuda.memory_allocated() / 1024**2 def get_mem_info(prefix=""): return f"{prefix}GPU memory usage: {get_gpu_mem():.2f} MB, CPU memory usage: {get_cpu_mem():.2f} MB" -def get_model_size(model: nn.Module): # 得到模型参数量 +def get_model_size(model: nn.Module): total_numel = 0 for module in model.modules(): for p in module.parameters(recurse=False): diff --git a/tests/test_zero/test_gemini/test_optim.py b/tests/test_zero/test_gemini/test_optim.py index 4e1fb988b..1c914ca0e 100644 --- a/tests/test_zero/test_gemini/test_optim.py +++ b/tests/test_zero/test_gemini/test_optim.py @@ -26,7 +26,7 @@ PLACEMENT_CONFIGS = [ "offload_optim_frac": 1.0, "offload_param_frac": 1.0, }, # zero3-offload-all - # {"placement_policy": "auto"}, + {"placement_policy": "auto"}, ] # this model is large enough to slice to chunks