typescript: async generator and token stream (#1897)

Signed-off-by: Tare Ebelo <75279482+TareHimself@users.noreply.github.com>
Signed-off-by: jacob <jacoobes@sern.dev>
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
Co-authored-by: jacob <jacoobes@sern.dev>
Co-authored-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
TareHimself
2024-02-24 17:50:14 -05:00
committed by GitHub
parent ef518fae3e
commit a153cc5b25
14 changed files with 1517 additions and 955 deletions

View File

@@ -3,9 +3,9 @@
Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
Napi::Function self = DefineClass(env, "LLModel", {
InstanceMethod("type", &NodeModelWrapper::getType),
InstanceMethod("type", &NodeModelWrapper::GetType),
InstanceMethod("isModelLoaded", &NodeModelWrapper::IsModelLoaded),
InstanceMethod("name", &NodeModelWrapper::getName),
InstanceMethod("name", &NodeModelWrapper::GetName),
InstanceMethod("stateSize", &NodeModelWrapper::StateSize),
InstanceMethod("raw_prompt", &NodeModelWrapper::Prompt),
InstanceMethod("setThreadCount", &NodeModelWrapper::SetThreadCount),
@@ -28,14 +28,14 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo& info)
{
auto env = info.Env();
return Napi::Number::New(env, static_cast<uint32_t>( llmodel_required_mem(GetInference(), full_model_path.c_str(), 2048, 100) ));
return Napi::Number::New(env, static_cast<uint32_t>(llmodel_required_mem(GetInference(), full_model_path.c_str(), nCtx, nGpuLayers) ));
}
Napi::Value NodeModelWrapper::GetGpuDevices(const Napi::CallbackInfo& info)
{
auto env = info.Env();
int num_devices = 0;
auto mem_size = llmodel_required_mem(GetInference(), full_model_path.c_str());
auto mem_size = llmodel_required_mem(GetInference(), full_model_path.c_str(), nCtx, nGpuLayers);
llmodel_gpu_device* all_devices = llmodel_available_gpu_devices(GetInference(), mem_size, &num_devices);
if(all_devices == nullptr) {
Napi::Error::New(
@@ -70,7 +70,7 @@ Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo& info)
return js_array;
}
Napi::Value NodeModelWrapper::getType(const Napi::CallbackInfo& info)
Napi::Value NodeModelWrapper::GetType(const Napi::CallbackInfo& info)
{
if(type.empty()) {
return info.Env().Undefined();
@@ -132,6 +132,9 @@ Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo& info)
library_path = ".";
}
device = config_object.Get("device").As<Napi::String>();
nCtx = config_object.Get("nCtx").As<Napi::Number>().Int32Value();
nGpuLayers = config_object.Get("ngl").As<Napi::Number>().Int32Value();
}
llmodel_set_implementation_search_path(library_path.c_str());
const char* e;
@@ -148,20 +151,17 @@ Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo& info)
return;
}
if(device != "cpu") {
size_t mem = llmodel_required_mem(GetInference(), full_weight_path.c_str());
std::cout << "Initiating GPU\n";
size_t mem = llmodel_required_mem(GetInference(), full_weight_path.c_str(),nCtx, nGpuLayers);
auto success = llmodel_gpu_init_gpu_device_by_string(GetInference(), mem, device.c_str());
if(success) {
std::cout << "GPU init successfully\n";
} else {
if(!success) {
//https://github.com/nomic-ai/gpt4all/blob/3acbef14b7c2436fe033cae9036e695d77461a16/gpt4all-bindings/python/gpt4all/pyllmodel.py#L215
//Haven't implemented this but it is still open to contribution
std::cout << "WARNING: Failed to init GPU\n";
}
}
auto success = llmodel_loadModel(GetInference(), full_weight_path.c_str(), 2048, 100);
auto success = llmodel_loadModel(GetInference(), full_weight_path.c_str(), nCtx, nGpuLayers);
if(!success) {
Napi::Error::New(env, "Failed to load model at given path").ThrowAsJavaScriptException();
return;
@@ -254,6 +254,9 @@ Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo& info)
.repeat_last_n = 10,
.context_erase = 0.5
};
PromptWorkerConfig promptWorkerConfig;
if(info[1].IsObject())
{
auto inputObject = info[1].As<Napi::Object>();
@@ -285,29 +288,33 @@ Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo& info)
if(inputObject.Has("context_erase"))
promptContext.context_erase = inputObject.Get("context_erase").As<Napi::Number>().FloatValue();
}
//copy to protect llmodel resources when splitting to new thread
llmodel_prompt_context copiedPrompt = promptContext;
else
{
Napi::Error::New(info.Env(), "Missing Prompt Options").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
std::string copiedQuestion = question;
PromptWorkContext pc = {
copiedQuestion,
inference_,
copiedPrompt,
""
};
auto threadSafeContext = new TsfnContext(env, pc);
threadSafeContext->tsfn = Napi::ThreadSafeFunction::New(
env, // Environment
info[2].As<Napi::Function>(), // JS function from caller
"PromptCallback", // Resource name
0, // Max queue size (0 = unlimited).
1, // Initial thread count
threadSafeContext, // Context,
FinalizerCallback, // Finalizer
(void*)nullptr // Finalizer data
);
threadSafeContext->nativeThread = std::thread(threadEntry, threadSafeContext);
return threadSafeContext->deferred_.Promise();
if(info.Length() >= 3 && info[2].IsFunction()){
promptWorkerConfig.bHasTokenCallback = true;
promptWorkerConfig.tokenCallback = info[2].As<Napi::Function>();
}
//copy to protect llmodel resources when splitting to new thread
// llmodel_prompt_context copiedPrompt = promptContext;
promptWorkerConfig.context = promptContext;
promptWorkerConfig.model = GetInference();
promptWorkerConfig.mutex = &inference_mutex;
promptWorkerConfig.prompt = question;
promptWorkerConfig.result = "";
auto worker = new PromptWorker(env, promptWorkerConfig);
worker->Queue();
return worker->GetPromise();
}
void NodeModelWrapper::Dispose(const Napi::CallbackInfo& info) {
llmodel_model_destroy(inference_);
@@ -321,7 +328,7 @@ Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo& info)
}
}
Napi::Value NodeModelWrapper::getName(const Napi::CallbackInfo& info) {
Napi::Value NodeModelWrapper::GetName(const Napi::CallbackInfo& info) {
return Napi::String::New(info.Env(), name);
}
Napi::Value NodeModelWrapper::ThreadCount(const Napi::CallbackInfo& info) {