backend: use ggml_new_graph for GGML backend v2 (#1719)

This commit is contained in:
Jared Van Bortel 2023-12-06 14:38:53 -05:00 committed by GitHub
parent fb3b1ceba2
commit dfd8ef0186
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 13 additions and 13 deletions

View File

@ -317,7 +317,7 @@ void bert_eval(
}; };
struct ggml_context *ctx0 = ggml_init(params); struct ggml_context *ctx0 = ggml_init(params);
struct ggml_cgraph gf = {}; struct ggml_cgraph *gf = ggml_new_graph(ctx0);
// Embeddings. word_embeddings + token_type_embeddings + position_embeddings // Embeddings. word_embeddings + token_type_embeddings + position_embeddings
struct ggml_tensor *token_layer = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); struct ggml_tensor *token_layer = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
@ -448,10 +448,10 @@ void bert_eval(
ggml_tensor *output = inpL; ggml_tensor *output = inpL;
// run the computation // run the computation
ggml_build_forward_expand(&gf, output); ggml_build_forward_expand(gf, output);
//ggml_graph_compute_g4a() //ggml_graph_compute_g4a()
ggml_graph_compute_g4a(ctx->work_buf, &gf, n_threads); ggml_graph_compute_g4a(ctx->work_buf, gf, n_threads);
//ggml_graph_compute(ctx0, &gf); //ggml_graph_compute(ctx0, gf);
// float *dat = ggml_get_data_f32(output); // float *dat = ggml_get_data_f32(output);
@ -460,7 +460,7 @@ void bert_eval(
#ifdef GGML_PERF #ifdef GGML_PERF
// print timing information per ggml operation (for debugging purposes) // print timing information per ggml operation (for debugging purposes)
// requires GGML_PERF to be defined // requires GGML_PERF to be defined
ggml_graph_print(&gf); ggml_graph_print(gf);
#endif #endif
if (!mem_req_mode) { if (!mem_req_mode) {

View File

@ -343,7 +343,7 @@ bool gptj_eval(
}; };
struct ggml_context * ctx0 = ggml_init(params); struct ggml_context * ctx0 = ggml_init(params);
struct ggml_cgraph gf = {}; struct ggml_cgraph * gf = ggml_new_graph(ctx0);
// KQ_pos - contains the positions // KQ_pos - contains the positions
struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
@ -395,8 +395,8 @@ bool gptj_eval(
( n_ctx)*ggml_element_size(model.kv_self.v), ( n_ctx)*ggml_element_size(model.kv_self.v),
(il*n_ctx)*ggml_element_size(model.kv_self.v)*n_embd + n_past*ggml_element_size(model.kv_self.v)); (il*n_ctx)*ggml_element_size(model.kv_self.v)*n_embd + n_past*ggml_element_size(model.kv_self.v));
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k)); ggml_build_forward_expand(gf, ggml_cpy(ctx0, Kcur, k));
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v)); ggml_build_forward_expand(gf, ggml_cpy(ctx0, Vcur, v));
} }
// Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3) // Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3)
@ -515,22 +515,22 @@ bool gptj_eval(
// logits -> probs // logits -> probs
//inpL = ggml_soft_max(ctx0, inpL); //inpL = ggml_soft_max(ctx0, inpL);
ggml_build_forward_expand(&gf, inpL); ggml_build_forward_expand(gf, inpL);
// run the computation // run the computation
{ {
std::unique_ptr<uint8_t []> data; std::unique_ptr<uint8_t []> data;
auto plan = ggml_graph_plan(&gf, n_threads); auto plan = ggml_graph_plan(gf, n_threads);
if (plan.work_size > 0) { if (plan.work_size > 0) {
data.reset(new uint8_t[plan.work_size]); data.reset(new uint8_t[plan.work_size]);
plan.work_data = data.get(); plan.work_data = data.get();
} }
ggml_graph_compute(&gf, &plan); ggml_graph_compute(gf, &plan);
} }
//if (n_past%100 == 0) { //if (n_past%100 == 0) {
// ggml_graph_print (&gf); // ggml_graph_print (gf);
// ggml_graph_dump_dot(&gf, NULL, "gpt-2.dot"); // ggml_graph_dump_dot(gf, NULL, "gpt-2.dot");
//} //}
//embd_w.resize(n_vocab*N); //embd_w.resize(n_vocab*N);