Implement configurable context length (#1749)

This commit is contained in:
Jared Van Bortel
2023-12-16 17:58:15 -05:00
committed by GitHub
parent 7aa0f779de
commit d1c56b8b28
31 changed files with 291 additions and 135 deletions

View File

@@ -138,7 +138,7 @@ const LLModel::Implementation* LLModel::Implementation::implementation(const cha
return nullptr;
}
LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::string buildVariant) {
LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::string buildVariant, int n_ctx) {
if (!has_at_least_minimal_hardware()) {
std::cerr << "LLModel ERROR: CPU does not support AVX\n";
return nullptr;
@@ -154,7 +154,11 @@ LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::s
if(impl) {
LLModel* metalimpl = impl->m_construct();
metalimpl->m_implementation = impl;
size_t req_mem = metalimpl->requiredMem(modelPath);
/* TODO(cebtenzzre): after we fix requiredMem, we should change this to happen at
* load time, not construct time. right now n_ctx is incorrectly hardcoded 2048 in
* most (all?) places where this is called, causing underestimation of required
* memory. */
size_t req_mem = metalimpl->requiredMem(modelPath, n_ctx);
float req_to_total = (float) req_mem / (float) total_mem;
// on a 16GB M2 Mac a 13B q4_0 (0.52) works for me but a 13B q4_K_M (0.55) does not
if (req_to_total >= 0.53) {
@@ -165,6 +169,8 @@ LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::s
}
}
}
#else
(void)n_ctx;
#endif
if (!impl) {