diff --git a/colossalai/zero/gemini/chunk/manager.py b/colossalai/zero/gemini/chunk/manager.py
index 341790a72..c7bdd5e1f 100644
--- a/colossalai/zero/gemini/chunk/manager.py
+++ b/colossalai/zero/gemini/chunk/manager.py
@@ -83,7 +83,7 @@ class ChunkManager:
             if chunk_group:
                 # the chunk group is not empty
                 # close the last chunk
-                self.__close_one_chunk(chunk_group[-1])  # chunk[-1] 满了，所以关闭，不能再添加，然后同时scatter到ZeRO PG中
+                self.__close_one_chunk(chunk_group[-1])
 
             if tensor.numel() > chunk_size:
                 chunk_size = tensor.numel()
diff --git a/colossalai/zero/gemini/gemini_hook.py b/colossalai/zero/gemini/gemini_hook.py
index e691b423b..450cb3ad6 100644
--- a/colossalai/zero/gemini/gemini_hook.py
+++ b/colossalai/zero/gemini/gemini_hook.py
@@ -33,7 +33,7 @@ class GeminiZeROHook(ColoParamOpHook):
         all_chunks = self._chunk_manager.get_chunks(params)
 
         # wait for prefetched chunks, filter those are not prefetched
-        chunks_fetch_sync = self._gemini_manager.wait_chunks(all_chunks)  # 当前要fetch的chunk
+        chunks_fetch_sync = self._gemini_manager.wait_chunks(all_chunks)
 
         # transfer state
         for p in params:
diff --git a/colossalai/zero/gemini/gemini_mgr.py b/colossalai/zero/gemini/gemini_mgr.py
index 85beafd32..11bde789c 100644
--- a/colossalai/zero/gemini/gemini_mgr.py
+++ b/colossalai/zero/gemini/gemini_mgr.py
@@ -125,7 +125,7 @@ class GeminiManager:
                 self._async_works[chunk].wait()
                 del self._async_works[chunk]
             else:
-                non_prefetched_chunks.append(chunk)  # 没在之前prefetch过，现在要prefetch的chunk
+                non_prefetched_chunks.append(chunk)
         return tuple(non_prefetched_chunks)
 
     def add_work(self, chunk: Chunk, work: dist.Work):
diff --git a/colossalai/zero/gemini/placement_policy.py b/colossalai/zero/gemini/placement_policy.py
index 9e9fb1f58..cfbf16d1b 100644
--- a/colossalai/zero/gemini/placement_policy.py
+++ b/colossalai/zero/gemini/placement_policy.py
@@ -113,10 +113,8 @@ class StaticPlacementPolicy(PlacementPolicy):
     def get_prefetch_chunks(self) -> List[Chunk]:
         if self.gemini_manager.is_warmup():  # no prefetch during warmup since we need compute_list
             return []
-        # 最多有多少个异步的work
         can_prefetch = self.max_prefetch - len(self.gemini_manager._async_works)
         prefetch = []
-        # static炸就炸了，dynamic可能需要我们要先分析当前运行时的内存情况，分配空间或者淘汰块
         for i in range(self.gemini_manager.compute_idx + 1, len(self.gemini_manager.compute_list)):
             for chunk in self.gemini_manager.compute_list[i]:
                 if len(prefetch) >= can_prefetch:
diff --git a/examples/language/gpt/gemini/demo.ipynb b/examples/language/gpt/gemini/demo.ipynb
deleted file mode 100644
index 09953b3a9..000000000
--- a/examples/language/gpt/gemini/demo.ipynb
+++ /dev/null
@@ -1,142 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import torch\n",
-    "import torch.nn as nn"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Linear(in_features=10, out_features=5, bias=False) 50\n",
-      "Linear(in_features=5, out_features=10, bias=False) 50\n",
-      "Linear(in_features=10, out_features=10, bias=False) 100\n"
-     ]
-    }
-   ],
-   "source": [
-    "class Toy(nn.Module):\n",
-    "    \n",
-    "    def __init__(self):\n",
-    "        super(Toy, self).__init__()\n",
-    "        self.fc1 = nn.Linear(10,5, bias=False)\n",
-    "        self.m3 = nn.Sequential(nn.Linear(5, 10, bias=False), nn.Linear(10,10, bias=False))\n",
-    "\n",
-    "t = Toy()\n",
-    "for mod in t.modules():\n",
-    "    for p in mod.parameters(recurse=False):\n",
-    "        print(mod, p.numel())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "torch.Size([5, 10]) 50\n",
-      "torch.Size([10, 5]) 50\n",
-      "torch.Size([10, 10]) 100\n"
-     ]
-    }
-   ],
-   "source": [
-    "for p in t.parameters():\n",
-    "    print(p.shape, p.numel())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'224'"
-      ]
-     },
-     "execution_count": 27,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "conf_str = torch.__config__.parallel_info()\n",
-    "inter_str = conf_str.split(\"hardware_concurrency() : \")[1]\n",
-    "max_concurrency = inter_str.split(\"\\n\")[0]\n",
-    "max_concurrency"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0 0\n",
-      "0 1\n",
-      "0 2\n",
-      "1 0\n",
-      "1 1\n",
-      "1 2\n"
-     ]
-    }
-   ],
-   "source": [
-    "for i in range(3):\n",
-    "    for j in range(3):\n",
-    "        print(i, j)\n",
-    "        if i == 1 and j == 2:break\n",
-    "    else:\n",
-    "        continue\n",
-    "    break"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "colossalai-py310",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.14"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/language/gpt/gemini/train_gpt_demo.py b/examples/language/gpt/gemini/train_gpt_demo.py
index 667a0c77a..6db74231a 100644
--- a/examples/language/gpt/gemini/train_gpt_demo.py
+++ b/examples/language/gpt/gemini/train_gpt_demo.py
@@ -66,18 +66,18 @@ class GPTLMLoss(nn.Module):
 
 
 def get_cpu_mem():
-    return psutil.Process().memory_info().rss / 1024**2  # 返回值是B，转换成MB
+    return psutil.Process().memory_info().rss / 1024**2
 
 
 def get_gpu_mem():
-    return torch.cuda.memory_allocated() / 1024**2  # 转换成MB
+    return torch.cuda.memory_allocated() / 1024**2
 
 
 def get_mem_info(prefix=""):
     return f"{prefix}GPU memory usage: {get_gpu_mem():.2f} MB, CPU memory usage: {get_cpu_mem():.2f} MB"
 
 
-def get_model_size(model: nn.Module):  # 得到模型参数量
+def get_model_size(model: nn.Module):
     total_numel = 0
     for module in model.modules():
         for p in module.parameters(recurse=False):
diff --git a/tests/test_zero/test_gemini/test_optim.py b/tests/test_zero/test_gemini/test_optim.py
index 4e1fb988b..1c914ca0e 100644
--- a/tests/test_zero/test_gemini/test_optim.py
+++ b/tests/test_zero/test_gemini/test_optim.py
@@ -26,7 +26,7 @@ PLACEMENT_CONFIGS = [
         "offload_optim_frac": 1.0,
         "offload_param_frac": 1.0,
     },  # zero3-offload-all
-    # {"placement_policy": "auto"},
+    {"placement_policy": "auto"},
 ]
 
 # this model is large enough to slice to chunks