backend: factor out common elements in model code (#1089)

* backend: factor out common structs in model code prepping to hack on these by hopefully making there be fewer places to fix the same bug rename * use common buffer wrapper instead of manual malloc * fix replit compile warnings
2025-09-06 11:00:48 +00:00 · 2023-06-28 17:35:07 -07:00
parent 285aa50b60
commit 8d19ef3909
6 changed files with 81 additions and 202 deletions
--- a/gpt4all-backend/llmodel_shared.h
+++ b/gpt4all-backend/llmodel_shared.h
@@ -0,0 +1,36 @@
+#pragma once
+#include <cstdint>
+#include <cstddef>
+#include <ggml.h>
+
+struct llm_buffer {
+    uint8_t * addr = NULL;
+    size_t size = 0;
+
+    void resize(size_t size) {
+        delete[] addr;
+        addr = new uint8_t[size];
+        this->size = size;
+    }
+
+    ~llm_buffer() {
+        delete[] addr;
+    }
+};
+
+struct llm_kv_cache {
+    struct ggml_tensor * k;
+    struct ggml_tensor * v;
+
+    struct ggml_context * ctx = NULL;
+
+    llm_buffer buf;
+
+    int n; // number of tokens currently in the cache
+
+    ~llm_kv_cache() {
+        if (ctx) {
+            ggml_free(ctx);
+        }
+    }
+};