[exmaple] diffuser, support quant inference for stable diffusion (#2186)

2025-09-10 13:30:19 +00:00 · 2022-12-23 16:06:29 +08:00
parent bc0e271e71
commit 1cf6d92d7c
3 changed files with 116 additions and 4 deletions
--- a/examples/images/diffusion/scripts/img2img.py
+++ b/examples/images/diffusion/scripts/img2img.py
@@ -22,6 +22,7 @@ from imwatermark import WatermarkEncoder
 from scripts.txt2img import put_watermark
 from ldm.util import instantiate_from_config
 from ldm.models.diffusion.ddim import DDIMSampler
+from utils import replace_module, getModelSize


 def chunk(it, size):
@@ -44,7 +45,6 @@ def load_model_from_config(config, ckpt, verbose=False):
        print("unexpected keys:")
        print(u)

-    model.cuda()
    model.eval()
    return model

@@ -183,6 +183,12 @@ def main():
        choices=["full", "autocast"],
        default="autocast"
    )
+    parser.add_argument(
+        "--use_int8",
+        type=bool,
+        default=False,
+        help="use int8 for inference",
+    )

    opt = parser.parse_args()
    seed_everything(opt.seed)
@@ -193,6 +199,12 @@ def main():
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    model = model.to(device)

+    # quantize model
+    if opt.use_int8:
+        model = replace_module(model)
+        # # to compute the model size
+        # getModelSize(model)
+    
    sampler = DDIMSampler(model)

    os.makedirs(opt.outdir, exist_ok=True)
@@ -280,3 +292,5 @@ def main():

 if __name__ == "__main__":
    main()
+    # # to compute the mem allocated
+    # print(torch.cuda.max_memory_allocated() / 1024 / 1024)