[autochunk] support complete benchmark (#3121)

* refact memory code

* dont log free var memory

* add memory align

* update chunk target

* update setting for new memory

* finish test

* update tracer

* update typo

* update test

* add unet test

* add bench

* update bench

* update bench

* init

* support vit

* move to cpu

* add cpu benchmark
This commit is contained in:
Xuanlei Zhao 2023-03-13 17:42:37 +08:00 committed by GitHub
parent 68577fbc43
commit 30dd13c450
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 8 additions and 8 deletions

View File

@ -23,7 +23,7 @@ def _benchmark_evoformer_stack_gm(
get_data: Any, get_data: Any,
) -> None: ) -> None:
# build model and input # build model and input
model = get_model() model = get_model().cpu().eval()
meta_args, concrete_args = get_data(*data_args) meta_args, concrete_args = get_data(*data_args)
if concrete_args is None: if concrete_args is None:
concrete_args = [] concrete_args = []
@ -35,7 +35,7 @@ def _benchmark_evoformer_stack_gm(
concrete_args={k: v for k, v in concrete_args}, concrete_args={k: v for k, v in concrete_args},
) )
interp = MetaInfoProp(meta_graph) interp = MetaInfoProp(meta_graph)
meta_tensors = [MetaTensor(i[1], fake_device="cuda:0") for i in meta_args] + [i[1] for i in concrete_args] meta_tensors = [MetaTensor(i[1], fake_device="cpu") for i in meta_args] + [i[1] for i in concrete_args]
interp.propagate(*meta_tensors) interp.propagate(*meta_tensors)
codegen = AutoChunkCodeGen( codegen = AutoChunkCodeGen(
meta_graph, meta_graph,

View File

@ -35,10 +35,9 @@ def _benchmark_autochunk_unet_gm(
meta_args={k: v.to(torch.device("meta")) for k, v in meta_args}, meta_args={k: v.to(torch.device("meta")) for k, v in meta_args},
concrete_args={k: v for k, v in concrete_args}, concrete_args={k: v for k, v in concrete_args},
) )
model = model.cuda().eval()
interp = MetaInfoProp(meta_graph) interp = MetaInfoProp(meta_graph)
meta_tensors = [i[1] for i in meta_args] + [i[1] for i in concrete_args] meta_tensors = [i[1] for i in meta_args] + [i[1] for i in concrete_args]
meta_tensors = [MetaTensor(i, fake_device="cuda:0") if isinstance(i, torch.Tensor) else i for i in meta_tensors] meta_tensors = [MetaTensor(i, fake_device="cpu") if isinstance(i, torch.Tensor) else i for i in meta_tensors]
interp.propagate(*meta_tensors) interp.propagate(*meta_tensors)
codegen = AutoChunkCodeGen( codegen = AutoChunkCodeGen(
meta_graph, meta_graph,
@ -142,6 +141,7 @@ if __name__ == "__main__":
port=free_port(), port=free_port(),
backend="nccl", backend="nccl",
) )
benchmark_autochunk_unet(batch=1, height=224 * 2, width=224 * 2)
benchmark_autochunk_unet(batch=1, height=224 * 3, width=224 * 3) benchmark_autochunk_unet(batch=1, height=224 * 3, width=224 * 3)
benchmark_autochunk_unet(batch=1, height=224 * 4, width=224 * 4) benchmark_autochunk_unet(batch=1, height=224 * 4, width=224 * 4)
benchmark_autochunk_unet(batch=1, height=224 * 5, width=224 * 5)
benchmark_autochunk_unet(batch=1, height=224 * 6, width=224 * 6)

View File

@ -22,7 +22,7 @@ def _benchmark_autochunk_gpt_gm(
data: tuple, data: tuple,
max_memory: int = None, max_memory: int = None,
) -> None: ) -> None:
model = model.cuda().eval() model = model.eval().cpu()
# build model and input # build model and input
meta_args, concrete_args, sequence = data meta_args, concrete_args, sequence = data
@ -37,7 +37,7 @@ def _benchmark_autochunk_gpt_gm(
) )
interp = MetaInfoProp(meta_graph) interp = MetaInfoProp(meta_graph)
meta_tensors = [meta_args[i] if i in meta_args else concrete_args[i] for i in sequence] meta_tensors = [meta_args[i] if i in meta_args else concrete_args[i] for i in sequence]
meta_tensors = [MetaTensor(i, fake_device="cuda:0") if isinstance(i, torch.Tensor) else i for i in meta_tensors] meta_tensors = [MetaTensor(i, fake_device="cpu") if isinstance(i, torch.Tensor) else i for i in meta_tensors]
interp.propagate(*meta_tensors) interp.propagate(*meta_tensors)
codegen = AutoChunkCodeGen( codegen = AutoChunkCodeGen(
meta_graph, meta_graph,
@ -58,7 +58,7 @@ def _benchmark_autochunk_gpt_gm(
# init inputs # init inputs
inputs = [meta_args[i] if i in meta_args else concrete_args[i] for i in sequence] inputs = [meta_args[i] if i in meta_args else concrete_args[i] for i in sequence]
inputs = [i.cuda() if isinstance(i, torch.Tensor) else i for i in inputs] inputs = [i.cuda() if isinstance(i, torch.Tensor) else i for i in inputs]
model.cuda().eval() model.cuda()
# bench # bench
para_mem = float(parameter_size(model)) / 1024**2 * 6 para_mem = float(parameter_size(model)) / 1024**2 * 6