Implement configurable context length (#1749)

This commit is contained in:
Jared Van Bortel
2023-12-16 17:58:15 -05:00
committed by GitHub
parent 7aa0f779de
commit d1c56b8b28
31 changed files with 291 additions and 135 deletions

View File

@@ -714,8 +714,9 @@ Bert::~Bert() {
bert_free(d_ptr->ctx);
}
bool Bert::loadModel(const std::string &modelPath)
bool Bert::loadModel(const std::string &modelPath, int n_ctx)
{
(void)n_ctx;
d_ptr->ctx = bert_load_from_file(modelPath.c_str());
d_ptr->n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
d_ptr->modelLoaded = d_ptr->ctx != nullptr;
@@ -728,8 +729,10 @@ bool Bert::isModelLoaded() const
return d_ptr->modelLoaded;
}
size_t Bert::requiredMem(const std::string &/*modelPath*/)
size_t Bert::requiredMem(const std::string &modelPath, int n_ctx)
{
(void)modelPath;
(void)n_ctx;
return 0;
}