[inference] refactored config (#5376)

This commit is contained in:
Frank Lee
2024-02-08 14:04:14 +08:00
committed by GitHub
parent 1f8c7e7046
commit 9afa52061f
2 changed files with 32 additions and 22 deletions

View File

@@ -130,7 +130,6 @@ class InferenceEngine:
enable_flash_attention=False,
enable_jit_fused=False,
enable_sequence_parallelism=False,
extra_kwargs={"quant": self.inference_config.quant_mode},
)
shardformer = ShardFormer(shard_config=shardconfig)
shard_model, _ = shardformer.optimize(model, model_policy)