mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-06 11:00:48 +00:00
backend: factor out common elements in model code (#1089)
* backend: factor out common structs in model code prepping to hack on these by hopefully making there be fewer places to fix the same bug rename * use common buffer wrapper instead of manual malloc * fix replit compile warnings
This commit is contained in:
36
gpt4all-backend/llmodel_shared.h
Normal file
36
gpt4all-backend/llmodel_shared.h
Normal file
@@ -0,0 +1,36 @@
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
#include <cstddef>
|
||||
#include <ggml.h>
|
||||
|
||||
struct llm_buffer {
|
||||
uint8_t * addr = NULL;
|
||||
size_t size = 0;
|
||||
|
||||
void resize(size_t size) {
|
||||
delete[] addr;
|
||||
addr = new uint8_t[size];
|
||||
this->size = size;
|
||||
}
|
||||
|
||||
~llm_buffer() {
|
||||
delete[] addr;
|
||||
}
|
||||
};
|
||||
|
||||
struct llm_kv_cache {
|
||||
struct ggml_tensor * k;
|
||||
struct ggml_tensor * v;
|
||||
|
||||
struct ggml_context * ctx = NULL;
|
||||
|
||||
llm_buffer buf;
|
||||
|
||||
int n; // number of tokens currently in the cache
|
||||
|
||||
~llm_kv_cache() {
|
||||
if (ctx) {
|
||||
ggml_free(ctx);
|
||||
}
|
||||
}
|
||||
};
|
Reference in New Issue
Block a user