remove old llama.cpp submodules

2025-09-06 11:00:48 +00:00 · 2023-10-05 11:59:23 -04:00
parent cc6db61c93
commit d87573ea75
5 changed files with 0 additions and 20 deletions
--- a/gpt4all-backend/llama.cpp-230511
+++ b/gpt4all-backend/llama.cpp-230511
--- a/gpt4all-backend/llama.cpp-230519
+++ b/gpt4all-backend/llama.cpp-230519
--- a/gpt4all-backend/llamamodel.cpp
+++ b/gpt4all-backend/llamamodel.cpp
@@ -39,15 +39,10 @@ const char *modelType_ = "LLaMA";
 struct gpt_params {
    int32_t seed          = -1;   // RNG seed
    int32_t n_keep        = 0;    // number of tokens to keep from initial prompt
-#if LLAMA_DATE <= 230511
-    int32_t n_parts       = -1;   // amount of model parts (-1 = determine from model dimensions)
-#endif

-#if LLAMA_DATE >= 230519
    // sampling parameters
    float   tfs_z         = 1.0f; // 1.0 = disabled
    float   typical_p     = 1.0f; // 1.0 = disabled
-#endif

    std::string prompt = "";

@@ -57,7 +52,6 @@ struct gpt_params {
    bool use_mlock         = false; // use mlock to keep model in memory
 };

-#if LLAMA_DATE >= 230519
 static int llama_sample_top_p_top_k(
        llama_context *ctx,
        const llama_token *last_n_tokens_data,
@@ -85,7 +79,6 @@ static int llama_sample_top_p_top_k(
    llama_sample_temperature(ctx, &candidates_p, temp);
    return llama_sample_token(ctx, &candidates_p);
 }
-#endif

 struct LLamaPrivate {
    const std::string modelPath;
@@ -150,9 +143,6 @@ bool LLamaModel::loadModel(const std::string &modelPath)
 #else
    d_ptr->params.use_mlock  = params.use_mlock;
 #endif
-#if LLAMA_DATE <= 230511
-    d_ptr->params.n_parts  = params.n_parts;
-#endif
 #ifdef GGML_USE_METAL
    std::cerr << "llama.cpp: using Metal" << std::endl;
    // metal always runs the whole model if n_gpu_layers is not 0, at least
--- a/gpt4all-backend/llmodel_shared.h
+++ b/gpt4all-backend/llmodel_shared.h
@@ -80,7 +80,6 @@ struct llm_kv_cache {
    }
 };

-#if LLAMA_DATE >= 230519
 inline void ggml_graph_compute_g4a(llm_buffer& buf, ggml_cgraph * graph, int n_threads) {
    struct ggml_cplan plan = ggml_graph_plan(graph, n_threads);
    if (plan.work_size > 0) {
@@ -89,4 +88,3 @@ inline void ggml_graph_compute_g4a(llm_buffer& buf, ggml_cgraph * graph, int n_t
    }
    ggml_graph_compute(graph, &plan);
 }
-#endif