diff --git a/colossalai/inference/core/engine.py b/colossalai/inference/core/engine.py
index 635c3f801..3f456e1f9 100644
--- a/colossalai/inference/core/engine.py
+++ b/colossalai/inference/core/engine.py
@@ -1,6 +1,6 @@
 import time
 from itertools import count
-from typing import Dict, List, Optional, Tuple, Union, Iterable
+from typing import Dict, List, Optional, Tuple, Union
 
 import numpy as np
 import torch
diff --git a/colossalai/shardformer/layer/embedding.py b/colossalai/shardformer/layer/embedding.py
index cb7eceae4..93df5e522 100644
--- a/colossalai/shardformer/layer/embedding.py
+++ b/colossalai/shardformer/layer/embedding.py
@@ -248,7 +248,6 @@ class VocabParallelEmbedding1D(PaddingParallelModule):
             he initializer of weight, defaults to normal initializer.
 
     The ``args`` and ``kwargs`` used in :class:``torch.nn.functional.embedding`` should contain:
-    ::
 
         max_norm (float, optional): If given, each embedding vector with norm larger than max_norm is
                     renormalized to have norm max_norm. Note: this will modify weight in-place.