[fp8] support gemini plugin (#5978)

* [fp8] refactor hook * [fp8] support gemini plugin * [example] add fp8 option for llama benchmark
2025-09-01 17:17:05 +00:00 · 2024-08-09 14:09:48 +08:00
parent 4b9bec8176
commit 8241c0c054
7 changed files with 21 additions and 7 deletions
--- a/colossalai/quantization/fp8.py
+++ b/colossalai/quantization/fp8.py
@@ -652,5 +652,5 @@ class _LinearFp8(torch.autograd.Function):
        return x_grad.reshape(ctx.x_shape), w_grad, bias_grad


-def linear_fp8(x: torch.Tensor, w: torch.Tensor, bias: Optional[torch.Tensor] = None) -> torch.Tensor:
-    return _LinearFp8.apply(x, w, bias)
+def linear_fp8(input: torch.Tensor, weight: torch.Tensor, bias: Optional[torch.Tensor] = None) -> torch.Tensor:
+    return _LinearFp8.apply(input, weight, bias)