mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-06 02:50:36 +00:00
Implement configurable context length (#1749)
This commit is contained in:
@@ -138,7 +138,7 @@ const LLModel::Implementation* LLModel::Implementation::implementation(const cha
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::string buildVariant) {
|
||||
LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::string buildVariant, int n_ctx) {
|
||||
if (!has_at_least_minimal_hardware()) {
|
||||
std::cerr << "LLModel ERROR: CPU does not support AVX\n";
|
||||
return nullptr;
|
||||
@@ -154,7 +154,11 @@ LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::s
|
||||
if(impl) {
|
||||
LLModel* metalimpl = impl->m_construct();
|
||||
metalimpl->m_implementation = impl;
|
||||
size_t req_mem = metalimpl->requiredMem(modelPath);
|
||||
/* TODO(cebtenzzre): after we fix requiredMem, we should change this to happen at
|
||||
* load time, not construct time. right now n_ctx is incorrectly hardcoded 2048 in
|
||||
* most (all?) places where this is called, causing underestimation of required
|
||||
* memory. */
|
||||
size_t req_mem = metalimpl->requiredMem(modelPath, n_ctx);
|
||||
float req_to_total = (float) req_mem / (float) total_mem;
|
||||
// on a 16GB M2 Mac a 13B q4_0 (0.52) works for me but a 13B q4_K_M (0.55) does not
|
||||
if (req_to_total >= 0.53) {
|
||||
@@ -165,6 +169,8 @@ LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::s
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
(void)n_ctx;
|
||||
#endif
|
||||
|
||||
if (!impl) {
|
||||
|
Reference in New Issue
Block a user