diff --git a/colossalai/_analyzer/fx/codegen.py b/colossalai/_analyzer/fx/codegen.py index cd244b22c..68a27d919 100644 --- a/colossalai/_analyzer/fx/codegen.py +++ b/colossalai/_analyzer/fx/codegen.py @@ -246,7 +246,7 @@ def emit_code_with_activation_checkpoint(body, ckpt_func, nodes, emit_node_func, @compatibility(is_backward_compatible=True) class ActivationCheckpointCodeGen(CodeGen): - def _gen_python_code(self, nodes, root_module: str, namespace: _Namespace) -> PythonCode: + def _gen_python_code(self, nodes, root_module: str, namespace: _Namespace, verbose=None) -> PythonCode: free_vars: List[str] = [] body: List[str] = [] globals_: Dict[str, Any] = {} diff --git a/colossalai/auto_parallel/offload/base_offload_module.py b/colossalai/auto_parallel/offload/base_offload_module.py index f5e8e31f5..60de7743a 100644 --- a/colossalai/auto_parallel/offload/base_offload_module.py +++ b/colossalai/auto_parallel/offload/base_offload_module.py @@ -5,7 +5,7 @@ import torch import torch.nn as nn from colossalai.utils import _cast_float -from colossalai.zero.legacy.gemini.tensor_utils import free_storage +from colossalai.utils.common import free_storage from .region_manager import RegionManager from .util import GlobalRuntimeInfo diff --git a/colossalai/auto_parallel/offload/region.py b/colossalai/auto_parallel/offload/region.py index ea92c714c..a9f6f4c18 100644 --- a/colossalai/auto_parallel/offload/region.py +++ b/colossalai/auto_parallel/offload/region.py @@ -3,7 +3,8 @@ from typing import Dict, List, Tuple import torch from torch.fx import Node -from colossalai.zero.legacy.gemini.tensor_utils import alloc_storage, free_storage +from colossalai.utils.common import free_storage +from colossalai.zero.gemini.chunk.chunk import alloc_storage class Region: diff --git a/colossalai/autochunk/autochunk_codegen.py b/colossalai/autochunk/autochunk_codegen.py index 9571fa2c1..07dbf8a79 100644 --- a/colossalai/autochunk/autochunk_codegen.py +++ b/colossalai/autochunk/autochunk_codegen.py @@ -372,7 +372,7 @@ if AUTOCHUNK_AVAILABLE: if print_progress: get_logger().info("AutoChunk start codegen") - def _gen_python_code(self, nodes, root_module: str, namespace: _Namespace) -> PythonCode: + def _gen_python_code(self, nodes, root_module: str, namespace: _Namespace, verbose=None) -> PythonCode: free_vars: List[str] = [] body: List[str] = [] globals_: Dict[str, Any] = {} diff --git a/colossalai/fx/codegen/activation_checkpoint_codegen.py b/colossalai/fx/codegen/activation_checkpoint_codegen.py index dfb5754d7..28451bdd1 100644 --- a/colossalai/fx/codegen/activation_checkpoint_codegen.py +++ b/colossalai/fx/codegen/activation_checkpoint_codegen.py @@ -625,7 +625,7 @@ def emit_code_with_activation_checkpoint(body, ckpt_func, nodes, emit_node_func, if CODEGEN_AVAILABLE: class ActivationCheckpointCodeGen(CodeGen): - def _gen_python_code(self, nodes, root_module: str, namespace: _Namespace) -> PythonCode: + def _gen_python_code(self, nodes, root_module: str, namespace: _Namespace, verbose=None) -> PythonCode: free_vars: List[str] = [] body: List[str] = [] globals_: Dict[str, Any] = {} diff --git a/examples/language/gpt/hybridparallelism/data.py b/examples/language/gpt/hybridparallelism/data.py index ef51f938d..e5dc882bc 100644 --- a/examples/language/gpt/hybridparallelism/data.py +++ b/examples/language/gpt/hybridparallelism/data.py @@ -62,6 +62,8 @@ class GLUEDataBuilder: self.text_fields = self.task_text_field_map[task_name] self.num_labels = self.glue_task_num_labels[task_name] self.tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path, use_fast=True) + if not getattr(self.tokenizer, "pad_token", None): + self.tokenizer.pad_token = self.tokenizer._eos_token self.setup() def setup(self):