Replit Model (#713)

* porting over replit code model to gpt4all

* replaced memory with kv_self struct

* continuing debug

* welp it built but lot of sus things

* working model loading and somewhat working generate.. need to format response?

* revert back to semi working version

* finally got rid of weird formatting

* figured out problem is with python bindings - this is good to go for testing

* addressing PR feedback

* output refactor

* fixed prompt reponse collection

* cleanup

* addressing PR comments

* building replit backend with new ggmlver code

* chatllm replit and clean python files

* cleanup

* updated replit to match new llmodel api

* match llmodel api and change size_t to Token

* resolve PR comments

* replit model commit comment
This commit is contained in:
Richard Guo
2023-06-06 17:09:00 -04:00
committed by GitHub
parent ef35eb496f
commit c4706d0c14
8 changed files with 1140 additions and 6 deletions

View File

@@ -18,6 +18,7 @@
#define MPT_INTERNAL_STATE_VERSION 0
#define GPTJ_INTERNAL_STATE_VERSION 0
#define REPLIT_INTERNAL_STATE_VERSION 0
#define LLAMA_INTERNAL_STATE_VERSION 0
static QString modelFilePath(const QString &modelName, bool isChatGPT)
@@ -226,6 +227,7 @@ bool ChatLLM::loadModel(const QString &modelName)
case 'L': m_modelType = LLModelType::LLAMA_; break;
case 'G': m_modelType = LLModelType::GPTJ_; break;
case 'M': m_modelType = LLModelType::MPT_; break;
case 'R': m_modelType = LLModelType::REPLIT_; break;
default: delete std::exchange(m_modelInfo.model, nullptr);
}
} else {
@@ -561,6 +563,7 @@ bool ChatLLM::serialize(QDataStream &stream, int version)
if (version > 1) {
stream << m_modelType;
switch (m_modelType) {
case REPLIT_: stream << REPLIT_INTERNAL_STATE_VERSION; break;
case MPT_: stream << MPT_INTERNAL_STATE_VERSION; break;
case GPTJ_: stream << GPTJ_INTERNAL_STATE_VERSION; break;
case LLAMA_: stream << LLAMA_INTERNAL_STATE_VERSION; break;

View File

@@ -13,6 +13,7 @@ enum LLModelType {
GPTJ_,
LLAMA_,
CHATGPT_,
REPLIT_
};
struct LLModelInfo {