[Inference]ADD Bench Chatglm2 script (#4963)

* add bench chatglm * fix bug and make utils --------- Co-authored-by: CjhHa1 <cjh18671720497outlook.com>
2025-09-29 21:46:06 +00:00 · 2023-10-24 13:11:15 +08:00
parent 785802e809
commit c6cd629e7a
6 changed files with 160 additions and 98 deletions
--- a/examples/inference/_utils.py
+++ b/examples/inference/_utils.py
@@ -0,0 +1,19 @@
+def print_perf_stats(latency_set, config, bs, warmup=3):
+    # trim warmup queries
+    latency_set = list(latency_set)
+    latency_set = latency_set[warmup:]
+    count = len(latency_set)
+
+    if count > 0:
+        latency_set.sort()
+        avg = sum(latency_set) / count
+        num_layers = (
+            getattr(config, "num_layers") if hasattr(config, "num_layers") else getattr(config, "num_hidden_layers")
+        )
+        num_parameters = num_layers * config.hidden_size * config.hidden_size * 12
+        num_bytes = 2  # float16
+
+        print("Avg Per Token Latency: {0:8.2f} ms".format(avg * 1000))
+        print("Avg BW: {0:8.2f} GB/s".format(1 / avg * num_parameters * num_bytes / 1e9))
+        print("Avg flops: {0:8.2f} TFlops/s".format(1 / avg * num_parameters * num_bytes * bs / 1e12))
+        print("Avg Throughput: tokens/s: {}".format((1000 / (avg * 1000)) * bs))