mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-07-04 11:08:01 +00:00
rename LLModel -> ModelBackend, EmbLLModel -> EmbCapableBackend
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
parent
bafbed9c6b
commit
5be5314ace
@ -138,7 +138,7 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
|
|||||||
endforeach()
|
endforeach()
|
||||||
|
|
||||||
add_library(llmodel
|
add_library(llmodel
|
||||||
llmodel.h
|
model_backend.h
|
||||||
llamacpp_backend.h llamacpp_backend.cpp
|
llamacpp_backend.h llamacpp_backend.cpp
|
||||||
llamacpp_backend_manager.h llamacpp_backend_manager.cpp
|
llamacpp_backend_manager.h llamacpp_backend_manager.cpp
|
||||||
llmodel_c.h llmodel_c.cpp
|
llmodel_c.h llmodel_c.cpp
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "llmodel.h"
|
#include "model_backend.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
@ -17,7 +17,7 @@ using namespace std::string_literals;
|
|||||||
class LlamaCppBackendManager;
|
class LlamaCppBackendManager;
|
||||||
|
|
||||||
|
|
||||||
class LlamaCppBackend : public EmbLLModel {
|
class LlamaCppBackend : public EmbCapableBackend {
|
||||||
public:
|
public:
|
||||||
struct GPUDevice {
|
struct GPUDevice {
|
||||||
const char *backend;
|
const char *backend;
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#define LLAMACPP_BACKEND_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE
|
#define LLAMACPP_BACKEND_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE
|
||||||
#include "llamacpp_backend_impl.h"
|
#include "llamacpp_backend_impl.h"
|
||||||
|
|
||||||
#include "llmodel.h"
|
#include "model_backend.h"
|
||||||
|
|
||||||
#include <ggml.h>
|
#include <ggml.h>
|
||||||
#include <llama.h>
|
#include <llama.h>
|
||||||
@ -242,7 +242,7 @@ struct LlamaPrivate {
|
|||||||
llama_model_params model_params;
|
llama_model_params model_params;
|
||||||
llama_context_params ctx_params;
|
llama_context_params ctx_params;
|
||||||
int64_t n_threads = 0;
|
int64_t n_threads = 0;
|
||||||
std::vector<LLModel::Token> end_tokens;
|
std::vector<ModelBackend::Token> end_tokens;
|
||||||
const char *backend_name = nullptr;
|
const char *backend_name = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -528,11 +528,11 @@ size_t LlamaCppBackendImpl::restoreState(const uint8_t *src)
|
|||||||
return llama_set_state_data(d_ptr->ctx, const_cast<uint8_t*>(src));
|
return llama_set_state_data(d_ptr->ctx, const_cast<uint8_t*>(src));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<LLModel::Token> LlamaCppBackendImpl::tokenize(PromptContext &ctx, const std::string &str, bool special)
|
std::vector<ModelBackend::Token> LlamaCppBackendImpl::tokenize(PromptContext &ctx, const std::string &str, bool special)
|
||||||
{
|
{
|
||||||
bool atStart = m_tokenize_last_token == -1;
|
bool atStart = m_tokenize_last_token == -1;
|
||||||
bool insertSpace = atStart || isSpecialToken(m_tokenize_last_token);
|
bool insertSpace = atStart || isSpecialToken(m_tokenize_last_token);
|
||||||
std::vector<LLModel::Token> fres(str.length() + 4);
|
std::vector<ModelBackend::Token> fres(str.length() + 4);
|
||||||
int32_t fres_len = llama_tokenize_gpt4all(
|
int32_t fres_len = llama_tokenize_gpt4all(
|
||||||
d_ptr->model, str.c_str(), str.length(), fres.data(), fres.size(), /*add_special*/ atStart,
|
d_ptr->model, str.c_str(), str.length(), fres.data(), fres.size(), /*add_special*/ atStart,
|
||||||
/*parse_special*/ special, /*insert_space*/ insertSpace
|
/*parse_special*/ special, /*insert_space*/ insertSpace
|
||||||
@ -565,7 +565,7 @@ std::string LlamaCppBackendImpl::tokenToString(Token id) const
|
|||||||
return std::string(result.data(), result.size());
|
return std::string(result.data(), result.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
LLModel::Token LlamaCppBackendImpl::sampleToken(PromptContext &promptCtx) const
|
ModelBackend::Token LlamaCppBackendImpl::sampleToken(PromptContext &promptCtx) const
|
||||||
{
|
{
|
||||||
const size_t n_prev_toks = std::min((size_t) promptCtx.repeat_last_n, promptCtx.tokens.size());
|
const size_t n_prev_toks = std::min((size_t) promptCtx.repeat_last_n, promptCtx.tokens.size());
|
||||||
return llama_sample_top_p_top_k(d_ptr->ctx,
|
return llama_sample_top_p_top_k(d_ptr->ctx,
|
||||||
@ -627,7 +627,7 @@ int32_t LlamaCppBackendImpl::contextLength() const
|
|||||||
return llama_n_ctx(d_ptr->ctx);
|
return llama_n_ctx(d_ptr->ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector<LLModel::Token> &LlamaCppBackendImpl::endTokens() const
|
const std::vector<ModelBackend::Token> &LlamaCppBackendImpl::endTokens() const
|
||||||
{
|
{
|
||||||
return d_ptr->end_tokens;
|
return d_ptr->end_tokens;
|
||||||
}
|
}
|
||||||
@ -825,7 +825,7 @@ void llama_batch_add(
|
|||||||
batch.n_tokens++;
|
batch.n_tokens++;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void batch_add_seq(llama_batch &batch, const std::vector<LLModel::Token> &tokens, int seq_id)
|
static void batch_add_seq(llama_batch &batch, const std::vector<ModelBackend::Token> &tokens, int seq_id)
|
||||||
{
|
{
|
||||||
for (unsigned i = 0; i < tokens.size(); i++) {
|
for (unsigned i = 0; i < tokens.size(); i++) {
|
||||||
llama_batch_add(batch, tokens[i], i, { seq_id }, i == tokens.size() - 1);
|
llama_batch_add(batch, tokens[i], i, { seq_id }, i == tokens.size() - 1);
|
||||||
@ -909,7 +909,7 @@ void LlamaCppBackendImpl::embed(
|
|||||||
|
|
||||||
void LlamaCppBackendImpl::embed(
|
void LlamaCppBackendImpl::embed(
|
||||||
const std::vector<std::string> &texts, float *embeddings, std::optional<std::string> prefix, int dimensionality,
|
const std::vector<std::string> &texts, float *embeddings, std::optional<std::string> prefix, int dimensionality,
|
||||||
size_t *tokenCount, bool doMean, bool atlas, EmbLLModel::EmbedCancelCallback *cancelCb
|
size_t *tokenCount, bool doMean, bool atlas, EmbedCancelCallback *cancelCb
|
||||||
) {
|
) {
|
||||||
if (!d_ptr->model)
|
if (!d_ptr->model)
|
||||||
throw std::logic_error("no model is loaded");
|
throw std::logic_error("no model is loaded");
|
||||||
@ -967,9 +967,9 @@ double getL2NormScale(T *start, T *end)
|
|||||||
|
|
||||||
void LlamaCppBackendImpl::embedInternal(
|
void LlamaCppBackendImpl::embedInternal(
|
||||||
const std::vector<std::string> &texts, float *embeddings, std::string prefix, int dimensionality,
|
const std::vector<std::string> &texts, float *embeddings, std::string prefix, int dimensionality,
|
||||||
size_t *tokenCount, bool doMean, bool atlas, EmbLLModel::EmbedCancelCallback *cancelCb, const EmbModelSpec *spec
|
size_t *tokenCount, bool doMean, bool atlas, EmbedCancelCallback *cancelCb, const EmbModelSpec *spec
|
||||||
) {
|
) {
|
||||||
typedef std::vector<LLModel::Token> TokenString;
|
typedef std::vector<ModelBackend::Token> TokenString;
|
||||||
static constexpr int32_t atlasMaxLength = 8192;
|
static constexpr int32_t atlasMaxLength = 8192;
|
||||||
static constexpr int chunkOverlap = 8; // Atlas overlaps chunks of input by 8 tokens
|
static constexpr int chunkOverlap = 8; // Atlas overlaps chunks of input by 8 tokens
|
||||||
|
|
||||||
@ -1217,7 +1217,7 @@ DLL_EXPORT bool is_arch_supported(const char *arch)
|
|||||||
return std::find(KNOWN_ARCHES.begin(), KNOWN_ARCHES.end(), std::string(arch)) < KNOWN_ARCHES.end();
|
return std::find(KNOWN_ARCHES.begin(), KNOWN_ARCHES.end(), std::string(arch)) < KNOWN_ARCHES.end();
|
||||||
}
|
}
|
||||||
|
|
||||||
DLL_EXPORT LLModel *construct()
|
DLL_EXPORT LlamaCppBackend *construct()
|
||||||
{
|
{
|
||||||
llama_log_set(llama_log_callback, nullptr);
|
llama_log_set(llama_log_callback, nullptr);
|
||||||
#ifdef GGML_USE_CUDA
|
#ifdef GGML_USE_CUDA
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
#include "llamacpp_backend.h"
|
#include "llamacpp_backend.h"
|
||||||
#include "llamacpp_backend_manager.h"
|
#include "llamacpp_backend_manager.h"
|
||||||
#include "llmodel.h"
|
#include "model_backend.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
@ -18,7 +18,7 @@
|
|||||||
|
|
||||||
struct LLModelWrapper {
|
struct LLModelWrapper {
|
||||||
LlamaCppBackend *llModel = nullptr;
|
LlamaCppBackend *llModel = nullptr;
|
||||||
LLModel::PromptContext promptContext;
|
ModelBackend::PromptContext promptContext;
|
||||||
~LLModelWrapper() { delete llModel; }
|
~LLModelWrapper() { delete llModel; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
|
|
||||||
#define LLMODEL_MAX_PROMPT_BATCH 128
|
#define LLMODEL_MAX_PROMPT_BATCH 128
|
||||||
|
|
||||||
class LLModel {
|
class ModelBackend {
|
||||||
public:
|
public:
|
||||||
using Token = int32_t;
|
using Token = int32_t;
|
||||||
|
|
||||||
@ -29,7 +29,7 @@ public:
|
|||||||
float contextErase = 0.5f; // percent of context to erase if we exceed the context window
|
float contextErase = 0.5f; // percent of context to erase if we exceed the context window
|
||||||
};
|
};
|
||||||
|
|
||||||
virtual ~LLModel() {}
|
virtual ~ModelBackend() {}
|
||||||
|
|
||||||
virtual bool supportsCompletion() const { return true; }
|
virtual bool supportsCompletion() const { return true; }
|
||||||
virtual bool loadModel(const std::string &modelPath, int n_ctx, int ngl) = 0;
|
virtual bool loadModel(const std::string &modelPath, int n_ctx, int ngl) = 0;
|
||||||
@ -50,13 +50,13 @@ public:
|
|||||||
std::string *fakeReply = nullptr) = 0;
|
std::string *fakeReply = nullptr) = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
explicit LLModel() {}
|
explicit ModelBackend() {}
|
||||||
};
|
};
|
||||||
|
|
||||||
class EmbLLModel: virtual public LLModel {
|
using EmbedCancelCallback = bool(unsigned *batchSizes, unsigned nBatch, const char *backend);
|
||||||
public:
|
|
||||||
using EmbedCancelCallback = bool(unsigned *batchSizes, unsigned nBatch, const char *backend);
|
|
||||||
|
|
||||||
|
class EmbCapableBackend : virtual public ModelBackend {
|
||||||
|
public:
|
||||||
virtual bool supportsCompletion() const = 0;
|
virtual bool supportsCompletion() const = 0;
|
||||||
virtual bool supportsEmbedding() const = 0;
|
virtual bool supportsEmbedding() const = 0;
|
||||||
virtual size_t embeddingSize() const = 0;
|
virtual size_t embeddingSize() const = 0;
|
@ -55,7 +55,7 @@ def copy_prebuilt_C_lib(src_dir, dest_dir, dest_build_dir):
|
|||||||
|
|
||||||
|
|
||||||
# NOTE: You must provide correct path to the prebuilt llmodel C library.
|
# NOTE: You must provide correct path to the prebuilt llmodel C library.
|
||||||
# Specifically, the llmodel.h and C shared library are needed.
|
# Specifically, the model_backend.h and C shared library are needed.
|
||||||
copy_prebuilt_C_lib(SRC_CLIB_DIRECTORY,
|
copy_prebuilt_C_lib(SRC_CLIB_DIRECTORY,
|
||||||
DEST_CLIB_DIRECTORY,
|
DEST_CLIB_DIRECTORY,
|
||||||
DEST_CLIB_BUILD_DIRECTORY)
|
DEST_CLIB_BUILD_DIRECTORY)
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#include "llmodel.h"
|
#include "model_backend.h"
|
||||||
#include "llmodel_c.h"
|
#include "llmodel_c.h"
|
||||||
#include "prompt.h"
|
#include "prompt.h"
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#ifndef PREDICT_WORKER_H
|
#ifndef PREDICT_WORKER_H
|
||||||
#define PREDICT_WORKER_H
|
#define PREDICT_WORKER_H
|
||||||
|
|
||||||
#include "llmodel.h"
|
#include "model_backend.h"
|
||||||
#include "llmodel_c.h"
|
#include "llmodel_c.h"
|
||||||
#include "napi.h"
|
#include "napi.h"
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#include "chatapi.h"
|
#include "chatapi.h"
|
||||||
|
|
||||||
#include "../gpt4all-backend/llmodel.h"
|
#include "../gpt4all-backend/model_backend.h"
|
||||||
|
|
||||||
#include <QCoreApplication>
|
#include <QCoreApplication>
|
||||||
#include <QGuiApplication>
|
#include <QGuiApplication>
|
||||||
@ -170,7 +170,7 @@ bool ChatAPI::callResponse(int32_t token, const std::string& string)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ChatAPIWorker::request(const QString &apiKey,
|
void ChatAPIWorker::request(const QString &apiKey,
|
||||||
LLModel::PromptContext *promptCtx,
|
ModelBackend::PromptContext *promptCtx,
|
||||||
const QByteArray &array)
|
const QByteArray &array)
|
||||||
{
|
{
|
||||||
m_ctx = promptCtx;
|
m_ctx = promptCtx;
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#ifndef CHATAPI_H
|
#ifndef CHATAPI_H
|
||||||
#define CHATAPI_H
|
#define CHATAPI_H
|
||||||
|
|
||||||
#include "../gpt4all-backend/llmodel.h"
|
#include "../gpt4all-backend/model_backend.h"
|
||||||
|
|
||||||
#include <QByteArray>
|
#include <QByteArray>
|
||||||
#include <QNetworkReply>
|
#include <QNetworkReply>
|
||||||
@ -33,7 +33,7 @@ public:
|
|||||||
QString currentResponse() const { return m_currentResponse; }
|
QString currentResponse() const { return m_currentResponse; }
|
||||||
|
|
||||||
void request(const QString &apiKey,
|
void request(const QString &apiKey,
|
||||||
LLModel::PromptContext *promptCtx,
|
ModelBackend::PromptContext *promptCtx,
|
||||||
const QByteArray &array);
|
const QByteArray &array);
|
||||||
|
|
||||||
Q_SIGNALS:
|
Q_SIGNALS:
|
||||||
@ -46,12 +46,12 @@ private Q_SLOTS:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
ChatAPI *m_chat;
|
ChatAPI *m_chat;
|
||||||
LLModel::PromptContext *m_ctx;
|
ModelBackend::PromptContext *m_ctx;
|
||||||
QNetworkAccessManager *m_networkManager;
|
QNetworkAccessManager *m_networkManager;
|
||||||
QString m_currentResponse;
|
QString m_currentResponse;
|
||||||
};
|
};
|
||||||
|
|
||||||
class ChatAPI : public QObject, public LLModel {
|
class ChatAPI : public QObject, public ModelBackend {
|
||||||
Q_OBJECT
|
Q_OBJECT
|
||||||
public:
|
public:
|
||||||
ChatAPI();
|
ChatAPI();
|
||||||
@ -83,7 +83,7 @@ public:
|
|||||||
|
|
||||||
Q_SIGNALS:
|
Q_SIGNALS:
|
||||||
void request(const QString &apiKey,
|
void request(const QString &apiKey,
|
||||||
LLModel::PromptContext *ctx,
|
ModelBackend::PromptContext *ctx,
|
||||||
const QByteArray &array);
|
const QByteArray &array);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -94,7 +94,7 @@ void LLModelStore::destroy()
|
|||||||
m_availableModel.reset();
|
m_availableModel.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
void LLModelInfo::resetModel(ChatLLM *cllm, LLModel *model) {
|
void LLModelInfo::resetModel(ChatLLM *cllm, ModelBackend *model) {
|
||||||
this->model.reset(model);
|
this->model.reset(model);
|
||||||
fallbackReason.reset();
|
fallbackReason.reset();
|
||||||
emit cllm->loadedModelInfoChanged();
|
emit cllm->loadedModelInfoChanged();
|
||||||
@ -647,7 +647,7 @@ void ChatLLM::resetContext()
|
|||||||
{
|
{
|
||||||
resetResponse();
|
resetResponse();
|
||||||
m_processedSystemPrompt = false;
|
m_processedSystemPrompt = false;
|
||||||
m_ctx = LLModel::PromptContext();
|
m_ctx = ModelBackend::PromptContext();
|
||||||
}
|
}
|
||||||
|
|
||||||
QString ChatLLM::response() const
|
QString ChatLLM::response() const
|
||||||
@ -902,7 +902,7 @@ void ChatLLM::generateName()
|
|||||||
auto promptTemplate = MySettings::globalInstance()->modelPromptTemplate(m_modelInfo);
|
auto promptTemplate = MySettings::globalInstance()->modelPromptTemplate(m_modelInfo);
|
||||||
auto promptFunc = std::bind(&ChatLLM::handleNamePrompt, this, std::placeholders::_1);
|
auto promptFunc = std::bind(&ChatLLM::handleNamePrompt, this, std::placeholders::_1);
|
||||||
auto responseFunc = std::bind(&ChatLLM::handleNameResponse, this, std::placeholders::_1, std::placeholders::_2);
|
auto responseFunc = std::bind(&ChatLLM::handleNameResponse, this, std::placeholders::_1, std::placeholders::_2);
|
||||||
LLModel::PromptContext ctx = m_ctx;
|
ModelBackend::PromptContext ctx = m_ctx;
|
||||||
m_llModelInfo.model->prompt(chatNamePrompt.toStdString(), promptTemplate.toStdString(),
|
m_llModelInfo.model->prompt(chatNamePrompt.toStdString(), promptTemplate.toStdString(),
|
||||||
promptFunc, responseFunc, /*allowContextShift*/ false, ctx);
|
promptFunc, responseFunc, /*allowContextShift*/ false, ctx);
|
||||||
std::string trimmed = trim_whitespace(m_nameResponse);
|
std::string trimmed = trim_whitespace(m_nameResponse);
|
||||||
@ -998,7 +998,7 @@ void ChatLLM::generateQuestions(qint64 elapsed)
|
|||||||
auto promptTemplate = MySettings::globalInstance()->modelPromptTemplate(m_modelInfo);
|
auto promptTemplate = MySettings::globalInstance()->modelPromptTemplate(m_modelInfo);
|
||||||
auto promptFunc = std::bind(&ChatLLM::handleQuestionPrompt, this, std::placeholders::_1);
|
auto promptFunc = std::bind(&ChatLLM::handleQuestionPrompt, this, std::placeholders::_1);
|
||||||
auto responseFunc = std::bind(&ChatLLM::handleQuestionResponse, this, std::placeholders::_1, std::placeholders::_2);
|
auto responseFunc = std::bind(&ChatLLM::handleQuestionResponse, this, std::placeholders::_1, std::placeholders::_2);
|
||||||
LLModel::PromptContext ctx = m_ctx;
|
ModelBackend::PromptContext ctx = m_ctx;
|
||||||
QElapsedTimer totalTime;
|
QElapsedTimer totalTime;
|
||||||
totalTime.start();
|
totalTime.start();
|
||||||
m_llModelInfo.model->prompt(suggestedFollowUpPrompt, promptTemplate.toStdString(), promptFunc, responseFunc,
|
m_llModelInfo.model->prompt(suggestedFollowUpPrompt, promptTemplate.toStdString(), promptFunc, responseFunc,
|
||||||
@ -1225,7 +1225,7 @@ void ChatLLM::processSystemPrompt()
|
|||||||
|
|
||||||
// Start with a whole new context
|
// Start with a whole new context
|
||||||
m_stopGenerating = false;
|
m_stopGenerating = false;
|
||||||
m_ctx = LLModel::PromptContext();
|
m_ctx = ModelBackend::PromptContext();
|
||||||
|
|
||||||
auto promptFunc = std::bind(&ChatLLM::handleSystemPrompt, this, std::placeholders::_1);
|
auto promptFunc = std::bind(&ChatLLM::handleSystemPrompt, this, std::placeholders::_1);
|
||||||
|
|
||||||
@ -1278,7 +1278,7 @@ void ChatLLM::processRestoreStateFromText()
|
|||||||
emit restoringFromTextChanged();
|
emit restoringFromTextChanged();
|
||||||
|
|
||||||
m_stopGenerating = false;
|
m_stopGenerating = false;
|
||||||
m_ctx = LLModel::PromptContext();
|
m_ctx = ModelBackend::PromptContext();
|
||||||
|
|
||||||
auto promptFunc = std::bind(&ChatLLM::handleRestoreStateFromTextPrompt, this, std::placeholders::_1);
|
auto promptFunc = std::bind(&ChatLLM::handleRestoreStateFromTextPrompt, this, std::placeholders::_1);
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
#include "modellist.h"
|
#include "modellist.h"
|
||||||
|
|
||||||
#include "../gpt4all-backend/llamacpp_backend.h"
|
#include "../gpt4all-backend/llamacpp_backend.h"
|
||||||
#include "../gpt4all-backend/llmodel.h"
|
#include "../gpt4all-backend/model_backend.h"
|
||||||
|
|
||||||
#include <QByteArray>
|
#include <QByteArray>
|
||||||
#include <QElapsedTimer>
|
#include <QElapsedTimer>
|
||||||
@ -39,14 +39,14 @@ enum LLModelType {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct LLModelInfo {
|
struct LLModelInfo {
|
||||||
std::unique_ptr<LLModel> model;
|
std::unique_ptr<ModelBackend> model;
|
||||||
QFileInfo fileInfo;
|
QFileInfo fileInfo;
|
||||||
std::optional<QString> fallbackReason;
|
std::optional<QString> fallbackReason;
|
||||||
|
|
||||||
// NOTE: This does not store the model type or name on purpose as this is left for ChatLLM which
|
// NOTE: This does not store the model type or name on purpose as this is left for ChatLLM which
|
||||||
// must be able to serialize the information even if it is in the unloaded state
|
// must be able to serialize the information even if it is in the unloaded state
|
||||||
|
|
||||||
void resetModel(ChatLLM *cllm, LLModel *model = nullptr);
|
void resetModel(ChatLLM *cllm, ModelBackend *model = nullptr);
|
||||||
};
|
};
|
||||||
|
|
||||||
class TokenTimer : public QObject {
|
class TokenTimer : public QObject {
|
||||||
@ -218,7 +218,7 @@ private:
|
|||||||
bool loadNewModel(const ModelInfo &modelInfo, QVariantMap &modelLoadProps);
|
bool loadNewModel(const ModelInfo &modelInfo, QVariantMap &modelLoadProps);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
LLModel::PromptContext m_ctx;
|
ModelBackend::PromptContext m_ctx;
|
||||||
quint32 m_promptTokens;
|
quint32 m_promptTokens;
|
||||||
quint32 m_promptResponseTokens;
|
quint32 m_promptResponseTokens;
|
||||||
|
|
||||||
@ -243,7 +243,7 @@ private:
|
|||||||
bool m_processedSystemPrompt;
|
bool m_processedSystemPrompt;
|
||||||
bool m_restoreStateFromText;
|
bool m_restoreStateFromText;
|
||||||
// m_pristineLoadedState is set if saveSate is unnecessary, either because:
|
// m_pristineLoadedState is set if saveSate is unnecessary, either because:
|
||||||
// - an unload was queued during LLModel::restoreState()
|
// - an unload was queued during ModelBackend::restoreState()
|
||||||
// - the chat will be restored from text and hasn't been interacted with yet
|
// - the chat will be restored from text and hasn't been interacted with yet
|
||||||
bool m_pristineLoadedState = false;
|
bool m_pristineLoadedState = false;
|
||||||
QVector<QPair<QString, QString>> m_stateFromText;
|
QVector<QPair<QString, QString>> m_stateFromText;
|
||||||
|
@ -193,7 +193,7 @@ std::vector<float> EmbeddingLLMWorker::generateQueryEmbedding(const QString &tex
|
|||||||
try {
|
try {
|
||||||
m_model->embed({text.toStdString()}, embedding.data(), /*isRetrieval*/ true);
|
m_model->embed({text.toStdString()}, embedding.data(), /*isRetrieval*/ true);
|
||||||
} catch (const std::exception &e) {
|
} catch (const std::exception &e) {
|
||||||
qWarning() << "WARNING: LLModel::embed failed:" << e.what();
|
qWarning() << "WARNING: LlamaCppBackend::embed failed:" << e.what();
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -287,7 +287,7 @@ void EmbeddingLLMWorker::docEmbeddingsRequested(const QVector<EmbeddingChunk> &c
|
|||||||
try {
|
try {
|
||||||
m_model->embed(batchTexts, result.data() + j * m_model->embeddingSize(), /*isRetrieval*/ false);
|
m_model->embed(batchTexts, result.data() + j * m_model->embeddingSize(), /*isRetrieval*/ false);
|
||||||
} catch (const std::exception &e) {
|
} catch (const std::exception &e) {
|
||||||
qWarning() << "WARNING: LLModel::embed failed:" << e.what();
|
qWarning() << "WARNING: LlamaCppBackend::embed failed:" << e.what();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user