When regenerating erase the previous response and prompt from the context.

2025-08-09 11:57:23 +00:00 · 2023-04-14 20:34:42 -04:00 · 2023-04-14 20:34:42 -04:00 · f8005cff45
commit f8005cff45
parent aa836fa6d5
4 changed files with 13 additions and 5 deletions
--- a/gptj.cpp
+++ b/gptj.cpp
@ -707,7 +707,9 @@ void GPTJ::prompt(const std::string &prompt, std::function<bool(const std::strin
            std::cerr << "GPT-J ERROR: Failed to process prompt\n";
            return;
        }
-        // We pass a null string to see if the user has asked us to stop...
+        // We pass a null string for each token to see if the user has asked us to stop...
        size_t tokens = batch_end - i;
        for (size_t t = 0; t < tokens; ++t)
            if (!response(""))
                return;
        ctx.n_past += batch.size();
--- a/gptj.h
+++ b/gptj.h
@ -15,8 +15,8 @@ public:
    bool loadModel(const std::string &modelPath, std::istream &fin) override;
    bool isModelLoaded() const override;
    void prompt(const std::string &prompt, std::function<bool(const std::string&)> response,
-        PromptContext &ctx, int32_t n_predict = 200, int32_t top_k = 40, float top_p = 0.9f,
+        PromptContext &ctx, int32_t n_predict = 200, int32_t top_k = 50400, float top_p = 1.0f,
-        float temp = 0.9f, int32_t n_batch = 9) override;
+        float temp = 0.0f, int32_t n_batch = 9) override;
 private:
    GPTJPrivate *d_ptr;
--- a/llm.cpp
+++ b/llm.cpp
@ -19,6 +19,7 @@ static LLModel::PromptContext s_ctx;
 LLMObject::LLMObject()
    : QObject{nullptr}
    , m_llmodel(new GPTJ)
    , m_responseTokens(0)
 {
    moveToThread(&m_llmThread);
    connect(&m_llmThread, &QThread::started, this, &LLMObject::loadModel);
@ -64,6 +65,9 @@ bool LLMObject::isModelLoaded() const
 void LLMObject::resetResponse()
 {
    s_ctx.n_past -= m_responseTokens;
    s_ctx.logits.erase(s_ctx.logits.end() -= m_responseTokens, s_ctx.logits.end());
    m_responseTokens = 0;
    m_response = std::string();
    emit responseChanged();
 }
@ -89,6 +93,7 @@ bool LLMObject::handleResponse(const std::string &response)
    printf("%s", response.c_str());
    fflush(stdout);
 #endif
    ++m_responseTokens;
    if (!response.empty()) {
        m_response.append(response);
        emit responseChanged();
--- a/llm.h
+++ b/llm.h
@ -41,6 +41,7 @@ private:
 private:
    LLModel *m_llmodel;
    std::string m_response;
    quint32 m_responseTokens;
    QString m_modelName;
    QThread m_llmThread;
    std::atomic<bool> m_stopGenerating;