mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-07 20:10:17 +00:00
[devops] remove post commit ci (#5566)
* [devops] remove post commit ci * [misc] run pre-commit on all files * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
@@ -50,7 +50,6 @@ def all_reduce_mean(x: float, world_size: int) -> float:
|
||||
|
||||
|
||||
class Timer:
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.start_time: Optional[float] = None
|
||||
self.duration: float = 0.0
|
||||
@@ -112,7 +111,7 @@ class PerformanceEvaluator:
|
||||
batch_size, seq_len = input_ids.shape
|
||||
|
||||
self.num_samples += batch_size
|
||||
self.flop += (batch_size * seq_len * self.model_numel * 2 * (3 + int(self.enable_grad_checkpoint)))
|
||||
self.flop += batch_size * seq_len * self.model_numel * 2 * (3 + int(self.enable_grad_checkpoint))
|
||||
|
||||
def on_fit_end(self) -> None:
|
||||
avg_duration = all_reduce_mean(self.timer.duration, self.world_size)
|
||||
@@ -122,5 +121,6 @@ class PerformanceEvaluator:
|
||||
if dist.get_rank() == 0:
|
||||
print(
|
||||
f"num_samples: {self.num_samples}, dp_world_size: {self.dp_world_size}, flop: {self.flop}, avg_duration: {avg_duration}, "
|
||||
f"avg_throughput: {avg_throughput}")
|
||||
f"avg_throughput: {avg_throughput}"
|
||||
)
|
||||
print(f"Throughput: {avg_throughput:.2f} samples/sec, TFLOPS per GPU: {avg_tflops_per_gpu:.2f}")
|
||||
|
@@ -16,17 +16,15 @@ def inference(args):
|
||||
tokenizer = T5Tokenizer.from_pretrained("google/umt5-small")
|
||||
if args.model == "test":
|
||||
config = LlamaConfig.from_pretrained("hpcai-tech/openmoe-base")
|
||||
set_openmoe_args(config,
|
||||
num_experts=config.num_experts,
|
||||
moe_layer_interval=config.moe_layer_interval,
|
||||
enable_kernel=True)
|
||||
set_openmoe_args(
|
||||
config, num_experts=config.num_experts, moe_layer_interval=config.moe_layer_interval, enable_kernel=True
|
||||
)
|
||||
model = OpenMoeForCausalLM(config)
|
||||
else:
|
||||
config = LlamaConfig.from_pretrained(f"hpcai-tech/openmoe-{args.model}")
|
||||
set_openmoe_args(config,
|
||||
num_experts=config.num_experts,
|
||||
moe_layer_interval=config.moe_layer_interval,
|
||||
enable_kernel=False)
|
||||
set_openmoe_args(
|
||||
config, num_experts=config.num_experts, moe_layer_interval=config.moe_layer_interval, enable_kernel=False
|
||||
)
|
||||
model = OpenMoeForCausalLM.from_pretrained(f"hpcai-tech/openmoe-{args.model}", config=config)
|
||||
model = model.eval().bfloat16()
|
||||
model = model.to(torch.cuda.current_device())
|
||||
|
@@ -172,9 +172,9 @@ def make_state_dict(converted_params):
|
||||
def load_t5x_weights_in_t5(model, config, t5x_checkpoint_path):
|
||||
"""Replaces the params in model witht the T5X converted params."""
|
||||
variables = checkpoints.load_t5x_checkpoint(t5x_checkpoint_path)
|
||||
converted = convert_t5x_to_pytorch(variables,
|
||||
num_layers=config.num_hidden_layers,
|
||||
moe_interval=config.moe_layer_interval)
|
||||
converted = convert_t5x_to_pytorch(
|
||||
variables, num_layers=config.num_hidden_layers, moe_interval=config.moe_layer_interval
|
||||
)
|
||||
state_dict = make_state_dict(converted)
|
||||
model.load_state_dict(state_dict, strict=True)
|
||||
|
||||
@@ -203,11 +203,9 @@ def convert_t5x_checkpoint_to_pytorch(t5x_checkpoint_path, config_file, pytorch_
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Converts a native T5X checkpoint into a PyTorch checkpoint.")
|
||||
# Required parameters
|
||||
parser.add_argument("--t5x_checkpoint_path",
|
||||
default=None,
|
||||
type=str,
|
||||
required=True,
|
||||
help="Path to the T5X checkpoint.")
|
||||
parser.add_argument(
|
||||
"--t5x_checkpoint_path", default=None, type=str, required=True, help="Path to the T5X checkpoint."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--config_file",
|
||||
default=None,
|
||||
@@ -215,10 +213,8 @@ if __name__ == "__main__":
|
||||
required=True,
|
||||
help="The config json file corresponding to the pre-trained T5 model.\nThis specifies the model architecture.",
|
||||
)
|
||||
parser.add_argument("--pytorch_dump_path",
|
||||
default=None,
|
||||
type=str,
|
||||
required=True,
|
||||
help="Path to the output PyTorch model.")
|
||||
parser.add_argument(
|
||||
"--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
convert_t5x_checkpoint_to_pytorch(args.t5x_checkpoint_path, args.config_file, args.pytorch_dump_path)
|
||||
|
Reference in New Issue
Block a user