bump llama.cpp version + needed fixes for that

This commit is contained in:
Aaron Miller
2023-07-14 11:10:41 -04:00
committed by Adam Treat
parent 33c22be2aa
commit 0bc2274869
6 changed files with 38 additions and 33 deletions

View File

@@ -1,6 +1,7 @@
#pragma once
#include <cstdint>
#include <cstddef>
#include <vector>
#include <ggml.h>
struct llm_buffer {
@@ -34,3 +35,14 @@ struct llm_kv_cache {
}
}
};
#if LLAMA_DATE >= 230519
inline void ggml_graph_compute_g4a(llm_buffer& buf, ggml_cgraph * graph, int n_threads) {
struct ggml_cplan plan = ggml_graph_plan(graph, n_threads);
if (plan.work_size > 0) {
buf.resize(plan.work_size);
plan.work_data = buf.addr;
}
ggml_graph_compute(graph, &plan);
}
#endif