mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-05 10:30:29 +00:00
speedup: just use mat*vec shaders for mat*mat
so far my from-scratch mat*mats are still slower than just running more invocations of the existing Metal ported mat*vec shaders - it should be theoretically possible to make a mat*mat that's faster (for actual mat*mat cases) than an optimal mat*vec, but it will need to be at *least* as fast as the mat*vec op and then take special care to be cache-friendly and save memory bandwidth, as the # of compute ops is the same
This commit is contained in:
@@ -239,11 +239,6 @@ if (LLAMA_KOMPUTE)
|
||||
kompute/op_rmsnorm.comp
|
||||
kompute/op_diagmask.comp
|
||||
kompute/op_mul_mat_mat_f32.comp
|
||||
kompute/op_mul_mat_mat_f16.comp
|
||||
kompute/op_mul_mat_mat_q8_0.comp
|
||||
kompute/op_mul_mat_mat_q4_0.comp
|
||||
kompute/op_mul_mat_mat_q4_1.comp
|
||||
kompute/op_mul_mat_mat_q6_k.comp
|
||||
kompute/op_mul_mat_f16.comp
|
||||
kompute/op_mul_mat_q8_0.comp
|
||||
kompute/op_mul_mat_q4_0.comp
|
||||
@@ -275,11 +270,6 @@ if (LLAMA_KOMPUTE)
|
||||
shaderop_rmsnorm.h
|
||||
shaderop_diagmask.h
|
||||
shaderop_mul_mat_mat_f32.h
|
||||
shaderop_mul_mat_mat_f16.h
|
||||
shaderop_mul_mat_mat_q8_0.h
|
||||
shaderop_mul_mat_mat_q4_0.h
|
||||
shaderop_mul_mat_mat_q4_1.h
|
||||
shaderop_mul_mat_mat_q6_k.h
|
||||
shaderop_mul_mat_f16.h
|
||||
shaderop_mul_mat_q8_0.h
|
||||
shaderop_mul_mat_q4_0.h
|
||||
|
Reference in New Issue
Block a user