backend: rebase llama.cpp on upstream as of Sep 26th (#2998)

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-11-02 23:13:29 +00:00 · 2024-09-27 12:05:59 -04:00
parent 8bd937eb68
commit f9d6be8afb
16 changed files with 165 additions and 600 deletions
--- a/gpt4all-backend/include/gpt4all-backend/llmodel.h
+++ b/gpt4all-backend/include/gpt4all-backend/llmodel.h
@@ -7,6 +7,7 @@
 #include <cstdint>
 #include <functional>
 #include <optional>
+#include <span>
 #include <stdexcept>
 #include <string>
 #include <string_view>
@@ -149,9 +150,9 @@ public:
    virtual bool isEmbeddingModel(const std::string &modelPath) const { (void)modelPath; return false; }
    virtual bool isModelLoaded() const = 0;
    virtual size_t requiredMem(const std::string &modelPath, int n_ctx, int ngl) = 0;
-    virtual size_t stateSize() const { return 0; }
-    virtual size_t saveState(uint8_t *dest) const { (void)dest; return 0; }
-    virtual size_t restoreState(const uint8_t *src) { (void)src; return 0; }
+    virtual size_t stateSize() const = 0;
+    virtual size_t saveState(std::span<uint8_t> dest) const = 0;
+    virtual size_t restoreState(std::span<const uint8_t> src) = 0;

    // This method requires the model to return true from supportsCompletion otherwise it will throw
    // an error
@@ -215,7 +216,8 @@ protected:
    virtual std::vector<Token> tokenize(PromptContext &ctx, std::string_view str, bool special = false) = 0;
    virtual bool isSpecialToken(Token id) const = 0;
    virtual std::string tokenToString(Token id) const = 0;
-    virtual Token sampleToken(PromptContext &ctx) const = 0;
+    virtual void initSampler(PromptContext &ctx) = 0;
+    virtual Token sampleToken() const = 0;
    virtual bool evalTokens(PromptContext &ctx, const std::vector<int32_t> &tokens) const = 0;
    virtual void shiftContext(PromptContext &promptCtx) = 0;
    virtual int32_t contextLength() const = 0;
--- a/gpt4all-backend/include/gpt4all-backend/llmodel_c.h
+++ b/gpt4all-backend/include/gpt4all-backend/llmodel_c.h
@@ -148,18 +148,20 @@ uint64_t llmodel_get_state_size(llmodel_model model);
 * NOTE: This state data is specific to the type of model you have created.
 * @param model A pointer to the llmodel_model instance.
 * @param dest A pointer to the destination.
- * @return the number of bytes copied
+ * @param size The size of the destination buffer.
+ * @return the number of bytes copied, or zero on error.
 */
-uint64_t llmodel_save_state_data(llmodel_model model, uint8_t *dest);
+uint64_t llmodel_save_state_data(llmodel_model model, uint8_t *dest, uint64_t size);

 /**
 * Restores the internal state of the model using data from the specified address.
 * NOTE: This state data is specific to the type of model you have created.
 * @param model A pointer to the llmodel_model instance.
- * @param src A pointer to the src.
- * @return the number of bytes read
+ * @param src A pointer to the state data.
+ * @param size The size of the source data.
+ * @return The number of bytes read, or zero on error.
 */
-uint64_t llmodel_restore_state_data(llmodel_model model, const uint8_t *src);
+uint64_t llmodel_restore_state_data(llmodel_model model, const uint8_t *src, size_t size);

 /**
 * Generate a response using the model.