mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-06 02:50:36 +00:00
typescript: async generator and token stream (#1897)
Signed-off-by: Tare Ebelo <75279482+TareHimself@users.noreply.github.com> Signed-off-by: jacob <jacoobes@sern.dev> Signed-off-by: Jared Van Bortel <jared@nomic.ai> Co-authored-by: jacob <jacoobes@sern.dev> Co-authored-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
@@ -3,9 +3,9 @@
|
||||
|
||||
Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
|
||||
Napi::Function self = DefineClass(env, "LLModel", {
|
||||
InstanceMethod("type", &NodeModelWrapper::getType),
|
||||
InstanceMethod("type", &NodeModelWrapper::GetType),
|
||||
InstanceMethod("isModelLoaded", &NodeModelWrapper::IsModelLoaded),
|
||||
InstanceMethod("name", &NodeModelWrapper::getName),
|
||||
InstanceMethod("name", &NodeModelWrapper::GetName),
|
||||
InstanceMethod("stateSize", &NodeModelWrapper::StateSize),
|
||||
InstanceMethod("raw_prompt", &NodeModelWrapper::Prompt),
|
||||
InstanceMethod("setThreadCount", &NodeModelWrapper::SetThreadCount),
|
||||
@@ -28,14 +28,14 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
|
||||
Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo& info)
|
||||
{
|
||||
auto env = info.Env();
|
||||
return Napi::Number::New(env, static_cast<uint32_t>( llmodel_required_mem(GetInference(), full_model_path.c_str(), 2048, 100) ));
|
||||
return Napi::Number::New(env, static_cast<uint32_t>(llmodel_required_mem(GetInference(), full_model_path.c_str(), nCtx, nGpuLayers) ));
|
||||
|
||||
}
|
||||
Napi::Value NodeModelWrapper::GetGpuDevices(const Napi::CallbackInfo& info)
|
||||
{
|
||||
auto env = info.Env();
|
||||
int num_devices = 0;
|
||||
auto mem_size = llmodel_required_mem(GetInference(), full_model_path.c_str());
|
||||
auto mem_size = llmodel_required_mem(GetInference(), full_model_path.c_str(), nCtx, nGpuLayers);
|
||||
llmodel_gpu_device* all_devices = llmodel_available_gpu_devices(GetInference(), mem_size, &num_devices);
|
||||
if(all_devices == nullptr) {
|
||||
Napi::Error::New(
|
||||
@@ -70,7 +70,7 @@ Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo& info)
|
||||
return js_array;
|
||||
}
|
||||
|
||||
Napi::Value NodeModelWrapper::getType(const Napi::CallbackInfo& info)
|
||||
Napi::Value NodeModelWrapper::GetType(const Napi::CallbackInfo& info)
|
||||
{
|
||||
if(type.empty()) {
|
||||
return info.Env().Undefined();
|
||||
@@ -132,6 +132,9 @@ Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo& info)
|
||||
library_path = ".";
|
||||
}
|
||||
device = config_object.Get("device").As<Napi::String>();
|
||||
|
||||
nCtx = config_object.Get("nCtx").As<Napi::Number>().Int32Value();
|
||||
nGpuLayers = config_object.Get("ngl").As<Napi::Number>().Int32Value();
|
||||
}
|
||||
llmodel_set_implementation_search_path(library_path.c_str());
|
||||
const char* e;
|
||||
@@ -148,20 +151,17 @@ Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo& info)
|
||||
return;
|
||||
}
|
||||
if(device != "cpu") {
|
||||
size_t mem = llmodel_required_mem(GetInference(), full_weight_path.c_str());
|
||||
std::cout << "Initiating GPU\n";
|
||||
size_t mem = llmodel_required_mem(GetInference(), full_weight_path.c_str(),nCtx, nGpuLayers);
|
||||
|
||||
auto success = llmodel_gpu_init_gpu_device_by_string(GetInference(), mem, device.c_str());
|
||||
if(success) {
|
||||
std::cout << "GPU init successfully\n";
|
||||
} else {
|
||||
if(!success) {
|
||||
//https://github.com/nomic-ai/gpt4all/blob/3acbef14b7c2436fe033cae9036e695d77461a16/gpt4all-bindings/python/gpt4all/pyllmodel.py#L215
|
||||
//Haven't implemented this but it is still open to contribution
|
||||
std::cout << "WARNING: Failed to init GPU\n";
|
||||
}
|
||||
}
|
||||
|
||||
auto success = llmodel_loadModel(GetInference(), full_weight_path.c_str(), 2048, 100);
|
||||
auto success = llmodel_loadModel(GetInference(), full_weight_path.c_str(), nCtx, nGpuLayers);
|
||||
if(!success) {
|
||||
Napi::Error::New(env, "Failed to load model at given path").ThrowAsJavaScriptException();
|
||||
return;
|
||||
@@ -254,6 +254,9 @@ Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo& info)
|
||||
.repeat_last_n = 10,
|
||||
.context_erase = 0.5
|
||||
};
|
||||
|
||||
PromptWorkerConfig promptWorkerConfig;
|
||||
|
||||
if(info[1].IsObject())
|
||||
{
|
||||
auto inputObject = info[1].As<Napi::Object>();
|
||||
@@ -285,29 +288,33 @@ Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo& info)
|
||||
if(inputObject.Has("context_erase"))
|
||||
promptContext.context_erase = inputObject.Get("context_erase").As<Napi::Number>().FloatValue();
|
||||
}
|
||||
//copy to protect llmodel resources when splitting to new thread
|
||||
llmodel_prompt_context copiedPrompt = promptContext;
|
||||
else
|
||||
{
|
||||
Napi::Error::New(info.Env(), "Missing Prompt Options").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
std::string copiedQuestion = question;
|
||||
PromptWorkContext pc = {
|
||||
copiedQuestion,
|
||||
inference_,
|
||||
copiedPrompt,
|
||||
""
|
||||
};
|
||||
auto threadSafeContext = new TsfnContext(env, pc);
|
||||
threadSafeContext->tsfn = Napi::ThreadSafeFunction::New(
|
||||
env, // Environment
|
||||
info[2].As<Napi::Function>(), // JS function from caller
|
||||
"PromptCallback", // Resource name
|
||||
0, // Max queue size (0 = unlimited).
|
||||
1, // Initial thread count
|
||||
threadSafeContext, // Context,
|
||||
FinalizerCallback, // Finalizer
|
||||
(void*)nullptr // Finalizer data
|
||||
);
|
||||
threadSafeContext->nativeThread = std::thread(threadEntry, threadSafeContext);
|
||||
return threadSafeContext->deferred_.Promise();
|
||||
if(info.Length() >= 3 && info[2].IsFunction()){
|
||||
promptWorkerConfig.bHasTokenCallback = true;
|
||||
promptWorkerConfig.tokenCallback = info[2].As<Napi::Function>();
|
||||
}
|
||||
|
||||
|
||||
|
||||
//copy to protect llmodel resources when splitting to new thread
|
||||
// llmodel_prompt_context copiedPrompt = promptContext;
|
||||
promptWorkerConfig.context = promptContext;
|
||||
promptWorkerConfig.model = GetInference();
|
||||
promptWorkerConfig.mutex = &inference_mutex;
|
||||
promptWorkerConfig.prompt = question;
|
||||
promptWorkerConfig.result = "";
|
||||
|
||||
|
||||
auto worker = new PromptWorker(env, promptWorkerConfig);
|
||||
|
||||
worker->Queue();
|
||||
|
||||
return worker->GetPromise();
|
||||
}
|
||||
void NodeModelWrapper::Dispose(const Napi::CallbackInfo& info) {
|
||||
llmodel_model_destroy(inference_);
|
||||
@@ -321,7 +328,7 @@ Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo& info)
|
||||
}
|
||||
}
|
||||
|
||||
Napi::Value NodeModelWrapper::getName(const Napi::CallbackInfo& info) {
|
||||
Napi::Value NodeModelWrapper::GetName(const Napi::CallbackInfo& info) {
|
||||
return Napi::String::New(info.Env(), name);
|
||||
}
|
||||
Napi::Value NodeModelWrapper::ThreadCount(const Napi::CallbackInfo& info) {
|
||||
|
Reference in New Issue
Block a user