mirror of
				https://github.com/nomic-ai/gpt4all.git
				synced 2025-10-31 13:51:43 +00:00 
			
		
		
		
	Latest rebase on llama.cpp with gguf support.
This commit is contained in:
		| @@ -226,9 +226,9 @@ size_t LLamaModel::restoreState(const uint8_t *src) | ||||
|  | ||||
| std::vector<LLModel::Token> LLamaModel::tokenize(PromptContext &ctx, const std::string &str) const | ||||
| { | ||||
|     const bool useBOS = ctx.n_past == 0 && (ctx.tokens.empty() || ctx.tokens.front() != llama_token_bos()); | ||||
|     const bool useBOS = ctx.n_past == 0 && (ctx.tokens.empty() || ctx.tokens.front() != llama_token_bos(d_ptr->ctx)); | ||||
|     std::vector<LLModel::Token> fres(str.size()+4); | ||||
|     auto fres_len = llama_tokenize(d_ptr->ctx, str.c_str(), fres.data(), fres.size(), useBOS); | ||||
|     auto fres_len = llama_tokenize(d_ptr->ctx, str.c_str(), str.length(), fres.data(), fres.size(), useBOS); | ||||
|     fres.resize(fres_len); | ||||
|     return fres; | ||||
| } | ||||
| @@ -250,10 +250,10 @@ LLModel::Token LLamaModel::sampleToken(PromptContext &promptCtx) const | ||||
| bool LLamaModel::evalTokens(PromptContext &ctx, const std::vector<int32_t> &tokens) const | ||||
| { | ||||
|     // When we recalculate context we could have erased the original BOS token... we need to replace it | ||||
|     const bool useBOS = ctx.n_past == 0 && (ctx.tokens.empty() || ctx.tokens.front() != llama_token_bos()); | ||||
|     const bool useBOS = ctx.n_past == 0 && (ctx.tokens.empty() || ctx.tokens.front() != llama_token_bos(d_ptr->ctx)); | ||||
|     if (useBOS) { | ||||
|         std::vector<int32_t> myTokens; | ||||
|         myTokens.push_back(llama_token_bos()); | ||||
|         myTokens.push_back(llama_token_bos(d_ptr->ctx)); | ||||
|         myTokens.insert(myTokens.end(), tokens.begin(), tokens.end()); | ||||
|         ctx.n_past += 1; | ||||
|         return llama_eval(d_ptr->ctx, myTokens.data(), myTokens.size(), ctx.n_past, d_ptr->n_threads) == 0; | ||||
| @@ -268,7 +268,7 @@ int32_t LLamaModel::contextLength() const | ||||
|  | ||||
| const std::vector<LLModel::Token> &LLamaModel::endTokens() const | ||||
| { | ||||
|     static const std::vector<LLModel::Token> fres = {llama_token_eos()}; | ||||
|     static const std::vector<LLModel::Token> fres = {llama_token_eos(d_ptr->ctx)}; | ||||
|     return fres; | ||||
| } | ||||
|  | ||||
| @@ -351,6 +351,16 @@ bool LLamaModel::usingGPUDevice() | ||||
|     return false; | ||||
| } | ||||
|  | ||||
| std::string get_arch_name(gguf_context *ctx_gguf) { | ||||
|     std::string arch_name; | ||||
|     const int kid = gguf_find_key(ctx_gguf, "general.architecture"); | ||||
|     enum gguf_type ktype = gguf_get_kv_type(ctx_gguf, kid); | ||||
|     if (ktype != (GGUF_TYPE_STRING)) { | ||||
|         throw std::runtime_error("ERROR: Can't get general architecture from gguf file."); | ||||
|     } | ||||
|     return gguf_get_val_str(ctx_gguf, kid); | ||||
| } | ||||
|  | ||||
| #if defined(_WIN32) | ||||
| #define DLL_EXPORT __declspec(dllexport) | ||||
| #else | ||||
| @@ -370,39 +380,42 @@ DLL_EXPORT const char *get_build_variant() { | ||||
|     return GGML_BUILD_VARIANT; | ||||
| } | ||||
|  | ||||
| DLL_EXPORT bool magic_match(std::istream& f) { | ||||
|     // Check magic | ||||
|     uint32_t magic = 0; | ||||
|     f.read(reinterpret_cast<char*>(&magic), sizeof(magic)); | ||||
|     if (magic != 0x67676a74) return false; | ||||
|     // Check version | ||||
|     uint32_t version = 0; | ||||
|     f.read(reinterpret_cast<char*>(&version), sizeof(version)); | ||||
|     if (!(version LLAMA_VERSIONS)) { | ||||
| DLL_EXPORT bool magic_match(const char * fname) { | ||||
|  | ||||
|     struct ggml_context * ctx_meta = NULL; | ||||
|     struct gguf_init_params params = { | ||||
|         /*.no_alloc = */ true, | ||||
|         /*.ctx      = */ &ctx_meta, | ||||
|     }; | ||||
|     gguf_context *ctx_gguf = gguf_init_from_file(fname, params); | ||||
|     if (!ctx_gguf) | ||||
|         return false; | ||||
|     } | ||||
|     llama_file_hparams hparams; | ||||
|     f.read(reinterpret_cast<char*>(&hparams), sizeof(hparams)); | ||||
|     if (!(hparams.n_vocab >= 32000 && hparams.n_vocab <= 32100)) { | ||||
|         return false; // not a llama. | ||||
|     } | ||||
|  | ||||
|     bool isValid = gguf_get_version(ctx_gguf) <= 2; | ||||
|     isValid = get_arch_name(ctx_gguf) != "llama" ? false : isValid; | ||||
|  | ||||
| #ifdef GGML_USE_METAL | ||||
|     // Check quant supported on metal | ||||
|     // skip fields | ||||
|     switch(hparams.ftype) { | ||||
|         // currently supported on Metal https://github.com/ggerganov/llama.cpp/blob/ae9663f1887513e152839e91f61c513075a19422/ggml-metal.m#L51-L55 | ||||
|         case LLAMA_FTYPE_MOSTLY_F16: | ||||
|         case LLAMA_FTYPE_MOSTLY_Q2_K: | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_0: | ||||
|         case LLAMA_FTYPE_MOSTLY_Q6_K: | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_K_S: | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_K_M: | ||||
|             return true; | ||||
|         default: // unsupported quant-type for Metal | ||||
|             return false; | ||||
|     const int n_tensors = gguf_get_n_tensors(ctx_gguf); | ||||
|     for (int i = 0; i < n_tensors; i++) { | ||||
|         const char * name = gguf_get_tensor_name(ctx_gguf, i); | ||||
|         struct ggml_tensor * meta = ggml_get_tensor(ctx_meta, name); | ||||
|         switch(meta->type) { | ||||
|             // currently supported on Metal https://github.com/ggerganov/llama.cpp/blob/ae9663f1887513e152839e91f61c513075a19422/ggml-metal.m#L51-L55 | ||||
|             case LLAMA_FTYPE_MOSTLY_F16: | ||||
|             case LLAMA_FTYPE_MOSTLY_Q2_K: | ||||
|             case LLAMA_FTYPE_MOSTLY_Q4_0: | ||||
|             case LLAMA_FTYPE_MOSTLY_Q6_K: | ||||
|             case LLAMA_FTYPE_MOSTLY_Q4_K_S: | ||||
|             case LLAMA_FTYPE_MOSTLY_Q4_K_M: | ||||
|                 break; | ||||
|             default: // unsupported quant-type for Metal | ||||
|                 isValid = false; | ||||
|         } | ||||
|     } | ||||
| #endif | ||||
|     return true; | ||||
|  | ||||
|     gguf_free(ctx_gguf); | ||||
|     return isValid; | ||||
| } | ||||
|  | ||||
| DLL_EXPORT LLModel *construct() { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user