diff --git a/colossalai/inference/core/engine.py b/colossalai/inference/core/engine.py index 635c3f801..3f456e1f9 100644 --- a/colossalai/inference/core/engine.py +++ b/colossalai/inference/core/engine.py @@ -1,6 +1,6 @@ import time from itertools import count -from typing import Dict, List, Optional, Tuple, Union, Iterable +from typing import Dict, List, Optional, Tuple, Union import numpy as np import torch diff --git a/colossalai/shardformer/layer/embedding.py b/colossalai/shardformer/layer/embedding.py index cb7eceae4..93df5e522 100644 --- a/colossalai/shardformer/layer/embedding.py +++ b/colossalai/shardformer/layer/embedding.py @@ -248,7 +248,6 @@ class VocabParallelEmbedding1D(PaddingParallelModule): he initializer of weight, defaults to normal initializer. The ``args`` and ``kwargs`` used in :class:``torch.nn.functional.embedding`` should contain: - :: max_norm (float, optional): If given, each embedding vector with norm larger than max_norm is renormalized to have norm max_norm. Note: this will modify weight in-place.