mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-06-25 06:53:05 +00:00
typescript bindings maintenance (#2363)
* remove outdated comments Signed-off-by: limez <limez@protonmail.com> * simpler build from source Signed-off-by: limez <limez@protonmail.com> * update unix build script to create .so runtimes correctly Signed-off-by: limez <limez@protonmail.com> * configure ci build type, use RelWithDebInfo for dev build script Signed-off-by: limez <limez@protonmail.com> * add clean script Signed-off-by: limez <limez@protonmail.com> * fix streamed token decoding / emoji Signed-off-by: limez <limez@protonmail.com> * remove deprecated nCtx Signed-off-by: limez <limez@protonmail.com> * update typings Signed-off-by: jacob <jacoobes@sern.dev> update typings Signed-off-by: jacob <jacoobes@sern.dev> * readme,mspell Signed-off-by: jacob <jacoobes@sern.dev> * cuda/backend logic changes + name napi methods like their js counterparts Signed-off-by: limez <limez@protonmail.com> * convert llmodel example into a test, separate test suite that can run in ci Signed-off-by: limez <limez@protonmail.com> * update examples / naming Signed-off-by: limez <limez@protonmail.com> * update deps, remove the need for binding.ci.gyp, make node-gyp-build fallback easier testable Signed-off-by: limez <limez@protonmail.com> * make sure the assert-backend-sources.js script is published, but not the others Signed-off-by: limez <limez@protonmail.com> * build correctly on windows (regression on node-gyp-build) Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com> * codespell Signed-off-by: limez <limez@protonmail.com> * make sure dlhandle.cpp gets linked correctly Signed-off-by: limez <limez@protonmail.com> * add include for check_cxx_compiler_flag call during aarch64 builds Signed-off-by: limez <limez@protonmail.com> * x86 > arm64 cross compilation of runtimes and bindings Signed-off-by: limez <limez@protonmail.com> * default to cpu instead of kompute on arm64 Signed-off-by: limez <limez@protonmail.com> * formatting, more minimal example Signed-off-by: limez <limez@protonmail.com> --------- Signed-off-by: limez <limez@protonmail.com> Signed-off-by: jacob <jacoobes@sern.dev> Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com> Co-authored-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com> Co-authored-by: jacob <jacoobes@sern.dev>
This commit is contained in:
parent
f001897a1a
commit
a602f7fde7
@ -570,7 +570,7 @@ jobs:
|
||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y cmake build-essential vulkan-sdk cuda-compiler-12-4 libcublas-dev-12-4 libnvidia-compute-550-server libmysqlclient21 libodbc2 libpq5
|
||||
sudo apt-get install -y cmake build-essential g++-12-aarch64-linux-gnu gcc-12-aarch64-linux-gnu vulkan-sdk cuda-compiler-12-4 libcublas-dev-12-4 libnvidia-compute-550-server libmysqlclient21 libodbc2 libpq5
|
||||
- run:
|
||||
name: Build Libraries
|
||||
command: |
|
||||
@ -578,14 +578,19 @@ jobs:
|
||||
cd gpt4all-backend
|
||||
mkdir -p runtimes/build
|
||||
cd runtimes/build
|
||||
cmake ../..
|
||||
cmake --build . --parallel --config Release
|
||||
cmake ../.. -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build . --parallel
|
||||
mkdir ../linux-x64
|
||||
cp -L *.so ../linux-x64 # otherwise persist_to_workspace seems to mess symlinks
|
||||
cmake ../.. -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE="./toolchains/linux-arm64-toolchain.cmake"
|
||||
cmake --build . --parallel
|
||||
mkdir ../linux-arm64
|
||||
cp -L *.so ../linux-arm64
|
||||
- persist_to_workspace:
|
||||
root: gpt4all-backend
|
||||
paths:
|
||||
- runtimes/linux-x64/*.so
|
||||
- runtimes/linux-arm64/*.so
|
||||
|
||||
build-bindings-backend-macos:
|
||||
macos:
|
||||
@ -896,6 +901,11 @@ jobs:
|
||||
- checkout
|
||||
- attach_workspace:
|
||||
at: /tmp/gpt4all-backend
|
||||
- run:
|
||||
name: Install dependencies
|
||||
command: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y g++-12-aarch64-linux-gnu gcc-12-aarch64-linux-gnu
|
||||
- node/install:
|
||||
install-yarn: true
|
||||
node-version: "18.16"
|
||||
@ -908,18 +918,24 @@ jobs:
|
||||
- run:
|
||||
command: |
|
||||
cd gpt4all-bindings/typescript
|
||||
yarn prebuildify -t 18.16.0 --napi
|
||||
yarn build:prebuilds
|
||||
- run:
|
||||
command: |
|
||||
mkdir -p gpt4all-backend/prebuilds/linux-x64
|
||||
mkdir -p gpt4all-backend/runtimes/linux-x64
|
||||
cp /tmp/gpt4all-backend/runtimes/linux-x64/*-*.so gpt4all-backend/runtimes/linux-x64
|
||||
cp gpt4all-bindings/typescript/prebuilds/linux-x64/*.node gpt4all-backend/prebuilds/linux-x64
|
||||
mkdir -p gpt4all-backend/prebuilds/linux-arm64
|
||||
mkdir -p gpt4all-backend/runtimes/linux-arm64
|
||||
cp /tmp/gpt4all-backend/runtimes/linux-arm64/*-*.so gpt4all-backend/runtimes/linux-arm64
|
||||
cp gpt4all-bindings/typescript/prebuilds/linux-arm64/*.node gpt4all-backend/prebuilds/linux-arm64
|
||||
- persist_to_workspace:
|
||||
root: gpt4all-backend
|
||||
paths:
|
||||
- prebuilds/linux-x64/*.node
|
||||
- runtimes/linux-x64/*-*.so
|
||||
- prebuilds/linux-arm64/*.node
|
||||
- runtimes/linux-arm64/*-*.so
|
||||
build-nodejs-macos:
|
||||
macos:
|
||||
xcode: "14.0.0"
|
||||
@ -1030,12 +1046,10 @@ jobs:
|
||||
|
||||
cp /tmp/gpt4all-backend/prebuilds/darwin-x64/*.node prebuilds/darwin-x64/
|
||||
|
||||
# Fallback build if user is not on above prebuilds
|
||||
mv -f binding.ci.gyp binding.gyp
|
||||
|
||||
mkdir gpt4all-backend
|
||||
# copy the backend source we depend on to make fallback builds work
|
||||
mkdir backend
|
||||
cd ../../gpt4all-backend
|
||||
mv llmodel.h llmodel.cpp llmodel_c.cpp llmodel_c.h sysinfo.h dlhandle.h ../gpt4all-bindings/typescript/gpt4all-backend/
|
||||
mv llmodel.h llmodel.cpp llmodel_c.cpp llmodel_c.h sysinfo.h dlhandle.h ../gpt4all-bindings/typescript/backend/
|
||||
|
||||
# Test install
|
||||
- node/install-packages:
|
||||
@ -1045,7 +1059,7 @@ jobs:
|
||||
- run:
|
||||
command: |
|
||||
cd gpt4all-bindings/typescript
|
||||
yarn run test
|
||||
yarn run test:ci
|
||||
- run:
|
||||
command: |
|
||||
cd gpt4all-bindings/typescript
|
||||
|
@ -79,6 +79,7 @@ if (LLMODEL_ROCM)
|
||||
endif()
|
||||
|
||||
set(CMAKE_VERBOSE_MAKEFILE ON)
|
||||
include(CheckCXXCompilerFlag)
|
||||
|
||||
# Go through each build variant
|
||||
foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
|
||||
|
11
gpt4all-backend/toolchains/linux-arm64-toolchain.cmake
Normal file
11
gpt4all-backend/toolchains/linux-arm64-toolchain.cmake
Normal file
@ -0,0 +1,11 @@
|
||||
# Toolchain to crosscompile runtimes for arm64 on jammy x86_64
|
||||
# You may have to `sudo apt-get install g++-12-aarch64-linux-gnu gcc-12-aarch64-linux-gnu`
|
||||
|
||||
set(CMAKE_SYSTEM_NAME Linux)
|
||||
set(CMAKE_SYSTEM_PROCESSOR aarch64)
|
||||
set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc-12)
|
||||
set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++-12)
|
||||
|
||||
# Supported backends
|
||||
set(LLMODEL_CUDA off)
|
||||
set(LLMODEL_KOMPUTE off)
|
1
gpt4all-bindings/typescript/.gitignore
vendored
1
gpt4all-bindings/typescript/.gitignore
vendored
@ -8,4 +8,5 @@ prebuilds/
|
||||
!.yarn/sdks
|
||||
!.yarn/versions
|
||||
runtimes/
|
||||
backend/
|
||||
compile_flags.txt
|
||||
|
@ -1,4 +1,5 @@
|
||||
test/
|
||||
spec/
|
||||
scripts/
|
||||
scripts/*
|
||||
!scripts/assert-backend-sources.js
|
||||
build
|
@ -188,6 +188,8 @@ model.dispose();
|
||||
* python 3
|
||||
* On Windows and Linux, building GPT4All requires the complete Vulkan SDK. You may download it from here: https://vulkan.lunarg.com/sdk/home
|
||||
* macOS users do not need Vulkan, as GPT4All will use Metal instead.
|
||||
* CUDA Toolkit >= 11.4 (you can bypass this with adding a custom flag to build step)
|
||||
- Windows: There is difficulty compiling with cuda if the Visual Studio IDE is NOT present.
|
||||
|
||||
### Build (from source)
|
||||
|
||||
@ -196,23 +198,29 @@ git clone https://github.com/nomic-ai/gpt4all.git
|
||||
cd gpt4all-bindings/typescript
|
||||
```
|
||||
|
||||
* The below shell commands assume the current working directory is `typescript`.
|
||||
|
||||
* To Build and Rebuild:
|
||||
|
||||
```sh
|
||||
node scripts/prebuild.js
|
||||
```
|
||||
* llama.cpp git submodule for gpt4all can be possibly absent. If this is the case, make sure to run in llama.cpp parent directory
|
||||
llama.cpp git submodule for gpt4all can be possibly absent or outdated. Make sure to run
|
||||
|
||||
```sh
|
||||
git submodule update --init --recursive
|
||||
```
|
||||
|
||||
The below shell commands assume the current working directory is `typescript`.
|
||||
|
||||
Using yarn
|
||||
|
||||
```sh
|
||||
yarn build:backend
|
||||
yarn install
|
||||
yarn build
|
||||
```
|
||||
This will build platform-dependent dynamic libraries, and will be located in runtimes/(platform)/native
|
||||
|
||||
Using npm
|
||||
|
||||
```sh
|
||||
npm install
|
||||
npm run build
|
||||
```
|
||||
|
||||
The `build:runtimes` script will create runtime libraries for your platform in `runtimes` and `build:prebuilds` will create the bindings in `prebuilds`. `build` is a shortcut for both.
|
||||
|
||||
### Test
|
||||
|
||||
@ -259,7 +267,7 @@ yarn test
|
||||
|
||||
This package has been stabilizing over time development, and breaking changes may happen until the api stabilizes. Here's what's the todo list:
|
||||
|
||||
* \[ ] Purely offline. Per the gui, which can be run completely offline, the bindings should be as well.
|
||||
* \[x] [Purely offline](#Offline-usage). Per the gui, which can be run completely offline, the bindings should be as well.
|
||||
* \[ ] NPM bundle size reduction via optionalDependencies strategy (need help)
|
||||
* Should include prebuilds to avoid painful node-gyp errors
|
||||
* \[x] createChatSession ( the python equivalent to create\_chat\_session )
|
||||
@ -276,7 +284,7 @@ This package has been stabilizing over time development, and breaking changes ma
|
||||
This repository serves as the new bindings for nodejs users.
|
||||
- If you were a user of [these bindings](https://github.com/nomic-ai/gpt4all-ts), they are outdated.
|
||||
- Version 4 includes the follow breaking changes
|
||||
* `createEmbedding` & `EmbeddingModel.embed()` returns an object, `EmbeddingResult`, instead of a float32array.
|
||||
* `createEmbedding` & `EmbeddingModel.embed()` returns an object, `EmbeddingResult`, instead of a Float32Array.
|
||||
* Removed deprecated types `ModelType` and `ModelFile`
|
||||
* Removed deprecated initiation of model by string path only
|
||||
|
||||
|
@ -1,62 +0,0 @@
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"target_name": "gpt4all", # gpt4all-ts will cause compile error
|
||||
"include_dirs": [
|
||||
"<!@(node -p \"require('node-addon-api').include\")",
|
||||
"gpt4all-backend",
|
||||
],
|
||||
"sources": [
|
||||
# PREVIOUS VERSION: had to required the sources, but with newest changes do not need to
|
||||
#"../../gpt4all-backend/llama.cpp/examples/common.cpp",
|
||||
#"../../gpt4all-backend/llama.cpp/ggml.c",
|
||||
#"../../gpt4all-backend/llama.cpp/llama.cpp",
|
||||
# "../../gpt4all-backend/utils.cpp",
|
||||
"gpt4all-backend/llmodel_c.cpp",
|
||||
"gpt4all-backend/llmodel.cpp",
|
||||
"prompt.cc",
|
||||
"index.cc",
|
||||
],
|
||||
"conditions": [
|
||||
['OS=="mac"', {
|
||||
'xcode_settings': {
|
||||
'GCC_ENABLE_CPP_EXCEPTIONS': 'YES'
|
||||
},
|
||||
'defines': [
|
||||
'LIB_FILE_EXT=".dylib"',
|
||||
'NAPI_CPP_EXCEPTIONS',
|
||||
],
|
||||
'cflags_cc': [
|
||||
"-fexceptions"
|
||||
]
|
||||
}],
|
||||
['OS=="win"', {
|
||||
'defines': [
|
||||
'LIB_FILE_EXT=".dll"',
|
||||
'NAPI_CPP_EXCEPTIONS',
|
||||
],
|
||||
"msvs_settings": {
|
||||
"VCCLCompilerTool": {
|
||||
"AdditionalOptions": [
|
||||
"/std:c++20",
|
||||
"/EHsc",
|
||||
],
|
||||
},
|
||||
},
|
||||
}],
|
||||
['OS=="linux"', {
|
||||
'defines': [
|
||||
'LIB_FILE_EXT=".so"',
|
||||
'NAPI_CPP_EXCEPTIONS',
|
||||
],
|
||||
'cflags_cc!': [
|
||||
'-fno-rtti',
|
||||
],
|
||||
'cflags_cc': [
|
||||
'-std=c++2a',
|
||||
'-fexceptions'
|
||||
]
|
||||
}]
|
||||
]
|
||||
}]
|
||||
}
|
@ -1,19 +1,15 @@
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"target_name": "gpt4all", # gpt4all-ts will cause compile error
|
||||
"target_name": "gpt4all",
|
||||
"include_dirs": [
|
||||
"<!@(node -p \"require('node-addon-api').include\")",
|
||||
"../../gpt4all-backend",
|
||||
"backend",
|
||||
],
|
||||
"sources": [
|
||||
# PREVIOUS VERSION: had to required the sources, but with newest changes do not need to
|
||||
#"../../gpt4all-backend/llama.cpp/examples/common.cpp",
|
||||
#"../../gpt4all-backend/llama.cpp/ggml.c",
|
||||
#"../../gpt4all-backend/llama.cpp/llama.cpp",
|
||||
# "../../gpt4all-backend/utils.cpp",
|
||||
"../../gpt4all-backend/llmodel_c.cpp",
|
||||
"../../gpt4all-backend/llmodel.cpp",
|
||||
"backend/llmodel_c.cpp",
|
||||
"backend/llmodel.cpp",
|
||||
"backend/dlhandle.cpp",
|
||||
"prompt.cc",
|
||||
"index.cc",
|
||||
],
|
||||
|
@ -3,23 +3,24 @@
|
||||
|
||||
Napi::Function NodeModelWrapper::GetClass(Napi::Env env)
|
||||
{
|
||||
Napi::Function self = DefineClass(env, "LLModel",
|
||||
{InstanceMethod("type", &NodeModelWrapper::GetType),
|
||||
InstanceMethod("isModelLoaded", &NodeModelWrapper::IsModelLoaded),
|
||||
InstanceMethod("name", &NodeModelWrapper::GetName),
|
||||
InstanceMethod("stateSize", &NodeModelWrapper::StateSize),
|
||||
InstanceMethod("infer", &NodeModelWrapper::Infer),
|
||||
InstanceMethod("setThreadCount", &NodeModelWrapper::SetThreadCount),
|
||||
InstanceMethod("embed", &NodeModelWrapper::GenerateEmbedding),
|
||||
InstanceMethod("threadCount", &NodeModelWrapper::ThreadCount),
|
||||
InstanceMethod("getLibraryPath", &NodeModelWrapper::GetLibraryPath),
|
||||
InstanceMethod("initGpuByString", &NodeModelWrapper::InitGpuByString),
|
||||
InstanceMethod("hasGpuDevice", &NodeModelWrapper::HasGpuDevice),
|
||||
InstanceMethod("listGpu", &NodeModelWrapper::GetGpuDevices),
|
||||
InstanceMethod("memoryNeeded", &NodeModelWrapper::GetRequiredMemory),
|
||||
InstanceMethod("dispose", &NodeModelWrapper::Dispose)});
|
||||
Napi::Function self = DefineClass(
|
||||
env, "LLModel",
|
||||
{InstanceMethod("load", &NodeModelWrapper::Load),
|
||||
InstanceMethod("initGpu", &NodeModelWrapper::InitGpu),
|
||||
InstanceMethod("infer", &NodeModelWrapper::Infer),
|
||||
InstanceMethod("embed", &NodeModelWrapper::Embed),
|
||||
InstanceMethod("isModelLoaded", &NodeModelWrapper::IsModelLoaded),
|
||||
InstanceMethod("getType", &NodeModelWrapper::GetType),
|
||||
InstanceMethod("getName", &NodeModelWrapper::GetName),
|
||||
InstanceMethod("getStateSize", &NodeModelWrapper::GetStateSize),
|
||||
InstanceMethod("setThreadCount", &NodeModelWrapper::SetThreadCount),
|
||||
InstanceMethod("getThreadCount", &NodeModelWrapper::GetThreadCount),
|
||||
InstanceMethod("getLibraryPath", &NodeModelWrapper::GetLibraryPath),
|
||||
InstanceMethod("hasGpuDevice", &NodeModelWrapper::HasGpuDevice),
|
||||
InstanceMethod("getGpuDevices", &NodeModelWrapper::GetGpuDevices),
|
||||
InstanceMethod("getRequiredMemory", &NodeModelWrapper::GetRequiredMemory),
|
||||
InstanceMethod("dispose", &NodeModelWrapper::Dispose)});
|
||||
// Keep a static reference to the constructor
|
||||
//
|
||||
Napi::FunctionReference *constructor = new Napi::FunctionReference();
|
||||
*constructor = Napi::Persistent(self);
|
||||
env.SetInstanceData(constructor);
|
||||
@ -29,13 +30,13 @@ Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo &info)
|
||||
{
|
||||
auto env = info.Env();
|
||||
return Napi::Number::New(
|
||||
env, static_cast<uint32_t>(llmodel_required_mem(GetInference(), full_model_path.c_str(), nCtx, nGpuLayers)));
|
||||
env, static_cast<uint32_t>(llmodel_required_mem(GetInference(), model_file.c_str(), n_ctx, n_gpu_layers)));
|
||||
}
|
||||
Napi::Value NodeModelWrapper::GetGpuDevices(const Napi::CallbackInfo &info)
|
||||
{
|
||||
auto env = info.Env();
|
||||
int num_devices = 0;
|
||||
auto mem_size = llmodel_required_mem(GetInference(), full_model_path.c_str(), nCtx, nGpuLayers);
|
||||
auto mem_size = llmodel_required_mem(GetInference(), model_file.c_str(), n_ctx, n_gpu_layers);
|
||||
llmodel_gpu_device *all_devices = llmodel_available_gpu_devices(mem_size, &num_devices);
|
||||
if (all_devices == nullptr)
|
||||
{
|
||||
@ -63,6 +64,7 @@ Napi::Value NodeModelWrapper::GetGpuDevices(const Napi::CallbackInfo &info)
|
||||
js_gpu_device["heapSize"] = static_cast<uint32_t>(gpu_device.heapSize);
|
||||
js_gpu_device["name"] = gpu_device.name;
|
||||
js_gpu_device["vendor"] = gpu_device.vendor;
|
||||
js_gpu_device["backend"] = gpu_device.backend;
|
||||
|
||||
js_array[i] = js_gpu_device;
|
||||
}
|
||||
@ -71,35 +73,13 @@ Napi::Value NodeModelWrapper::GetGpuDevices(const Napi::CallbackInfo &info)
|
||||
|
||||
Napi::Value NodeModelWrapper::GetType(const Napi::CallbackInfo &info)
|
||||
{
|
||||
if (type.empty())
|
||||
if (model_type.empty())
|
||||
{
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
return Napi::String::New(info.Env(), type);
|
||||
return Napi::String::New(info.Env(), model_type);
|
||||
}
|
||||
|
||||
Napi::Value NodeModelWrapper::InitGpuByString(const Napi::CallbackInfo &info)
|
||||
{
|
||||
auto env = info.Env();
|
||||
size_t memory_required = static_cast<size_t>(info[0].As<Napi::Number>().Uint32Value());
|
||||
|
||||
std::string gpu_device_identifier = info[1].As<Napi::String>();
|
||||
|
||||
size_t converted_value;
|
||||
if (memory_required <= std::numeric_limits<size_t>::max())
|
||||
{
|
||||
converted_value = static_cast<size_t>(memory_required);
|
||||
}
|
||||
else
|
||||
{
|
||||
Napi::Error::New(env, "invalid number for memory size. Exceeded bounds for memory.")
|
||||
.ThrowAsJavaScriptException();
|
||||
return env.Undefined();
|
||||
}
|
||||
|
||||
auto result = llmodel_gpu_init_gpu_device_by_string(GetInference(), converted_value, gpu_device_identifier.c_str());
|
||||
return Napi::Boolean::New(env, result);
|
||||
}
|
||||
Napi::Value NodeModelWrapper::HasGpuDevice(const Napi::CallbackInfo &info)
|
||||
{
|
||||
return Napi::Boolean::New(info.Env(), llmodel_has_gpu_device(GetInference()));
|
||||
@ -110,82 +90,61 @@ NodeModelWrapper::NodeModelWrapper(const Napi::CallbackInfo &info) : Napi::Objec
|
||||
auto env = info.Env();
|
||||
auto config_object = info[0].As<Napi::Object>();
|
||||
|
||||
// sets the directory where models (gguf files) are to be searched
|
||||
llmodel_set_implementation_search_path(
|
||||
config_object.Has("library_path") ? config_object.Get("library_path").As<Napi::String>().Utf8Value().c_str()
|
||||
: ".");
|
||||
// sets the directories where runtime libs are to be searched
|
||||
llmodel_set_implementation_search_path(config_object.Has("librariesPath")
|
||||
? config_object.Get("librariesPath").As<Napi::String>().Utf8Value().c_str()
|
||||
: ".");
|
||||
|
||||
std::string model_name = config_object.Get("model_name").As<Napi::String>();
|
||||
fs::path model_path = config_object.Get("model_path").As<Napi::String>().Utf8Value();
|
||||
std::string full_weight_path = (model_path / fs::path(model_name)).string();
|
||||
model_file = config_object.Get("modelFile").As<Napi::String>().Utf8Value();
|
||||
model_name = model_file.substr(model_file.find_last_of("/\\") + 1);
|
||||
backend = config_object.Get("backend").As<Napi::String>().Utf8Value();
|
||||
n_ctx = config_object.Get("nCtx").As<Napi::Number>().Int32Value();
|
||||
n_gpu_layers = config_object.Get("nGpuLayers").As<Napi::Number>().Int32Value();
|
||||
|
||||
name = model_name.empty() ? model_path.filename().string() : model_name;
|
||||
full_model_path = full_weight_path;
|
||||
nCtx = config_object.Get("nCtx").As<Napi::Number>().Int32Value();
|
||||
nGpuLayers = config_object.Get("ngl").As<Napi::Number>().Int32Value();
|
||||
|
||||
const char *e;
|
||||
inference_ = llmodel_model_create2(full_weight_path.c_str(), "auto", &e);
|
||||
const char *err;
|
||||
inference_ = llmodel_model_create2(model_file.c_str(), backend.c_str(), &err);
|
||||
if (!inference_)
|
||||
{
|
||||
Napi::Error::New(env, e).ThrowAsJavaScriptException();
|
||||
Napi::Error::New(env, err).ThrowAsJavaScriptException();
|
||||
return;
|
||||
}
|
||||
if (GetInference() == nullptr)
|
||||
{
|
||||
std::cerr << "Tried searching libraries in \"" << llmodel_get_implementation_search_path() << "\"" << std::endl;
|
||||
std::cerr << "Tried searching for model weight in \"" << full_weight_path << "\"" << std::endl;
|
||||
std::cerr << "Tried using model weights in \"" << model_file << "\"" << std::endl;
|
||||
std::cerr << "Do you have runtime libraries installed?" << std::endl;
|
||||
Napi::Error::New(env, "Had an issue creating llmodel object, inference is null").ThrowAsJavaScriptException();
|
||||
return;
|
||||
}
|
||||
|
||||
std::string device = config_object.Get("device").As<Napi::String>();
|
||||
if (device != "cpu")
|
||||
{
|
||||
size_t mem = llmodel_required_mem(GetInference(), full_weight_path.c_str(), nCtx, nGpuLayers);
|
||||
|
||||
auto success = llmodel_gpu_init_gpu_device_by_string(GetInference(), mem, device.c_str());
|
||||
if (!success)
|
||||
{
|
||||
// https://github.com/nomic-ai/gpt4all/blob/3acbef14b7c2436fe033cae9036e695d77461a16/gpt4all-bindings/python/gpt4all/pyllmodel.py#L215
|
||||
// Haven't implemented this but it is still open to contribution
|
||||
std::cout << "WARNING: Failed to init GPU\n";
|
||||
}
|
||||
}
|
||||
|
||||
auto success = llmodel_loadModel(GetInference(), full_weight_path.c_str(), nCtx, nGpuLayers);
|
||||
if (!success)
|
||||
{
|
||||
Napi::Error::New(env, "Failed to load model at given path").ThrowAsJavaScriptException();
|
||||
return;
|
||||
}
|
||||
// optional
|
||||
if (config_object.Has("model_type"))
|
||||
if (config_object.Has("modelType"))
|
||||
{
|
||||
type = config_object.Get("model_type").As<Napi::String>();
|
||||
model_type = config_object.Get("modelType").As<Napi::String>();
|
||||
}
|
||||
};
|
||||
|
||||
// NodeModelWrapper::~NodeModelWrapper() {
|
||||
// if(GetInference() != nullptr) {
|
||||
// std::cout << "Debug: deleting model\n";
|
||||
// llmodel_model_destroy(inference_);
|
||||
// std::cout << (inference_ == nullptr);
|
||||
// }
|
||||
// }
|
||||
// void NodeModelWrapper::Finalize(Napi::Env env) {
|
||||
// if(inference_ != nullptr) {
|
||||
// std::cout << "Debug: deleting model\n";
|
||||
//
|
||||
// }
|
||||
// }
|
||||
Napi::Value NodeModelWrapper::Load(const Napi::CallbackInfo &info)
|
||||
{
|
||||
auto env = info.Env();
|
||||
auto success = llmodel_loadModel(GetInference(), model_file.c_str(), n_ctx, n_gpu_layers);
|
||||
return Napi::Boolean::New(env, success);
|
||||
}
|
||||
|
||||
Napi::Value NodeModelWrapper::InitGpu(const Napi::CallbackInfo &info)
|
||||
{
|
||||
auto env = info.Env();
|
||||
auto device = info[0].As<Napi::String>().Utf8Value();
|
||||
size_t mem_required = llmodel_required_mem(GetInference(), model_file.c_str(), n_ctx, n_gpu_layers);
|
||||
auto success = llmodel_gpu_init_gpu_device_by_string(GetInference(), mem_required, device.c_str());
|
||||
return Napi::Boolean::New(env, success);
|
||||
}
|
||||
|
||||
Napi::Value NodeModelWrapper::IsModelLoaded(const Napi::CallbackInfo &info)
|
||||
{
|
||||
return Napi::Boolean::New(info.Env(), llmodel_isModelLoaded(GetInference()));
|
||||
}
|
||||
|
||||
Napi::Value NodeModelWrapper::StateSize(const Napi::CallbackInfo &info)
|
||||
Napi::Value NodeModelWrapper::GetStateSize(const Napi::CallbackInfo &info)
|
||||
{
|
||||
// Implement the binding for the stateSize method
|
||||
return Napi::Number::New(info.Env(), static_cast<int64_t>(llmodel_get_state_size(GetInference())));
|
||||
@ -220,7 +179,7 @@ Napi::Array ChunkedFloatPtr(float *embedding_ptr, int embedding_size, int text_l
|
||||
return result;
|
||||
}
|
||||
|
||||
Napi::Value NodeModelWrapper::GenerateEmbedding(const Napi::CallbackInfo &info)
|
||||
Napi::Value NodeModelWrapper::Embed(const Napi::CallbackInfo &info)
|
||||
{
|
||||
auto env = info.Env();
|
||||
|
||||
@ -256,7 +215,7 @@ Napi::Value NodeModelWrapper::GenerateEmbedding(const Napi::CallbackInfo &info)
|
||||
str_ptrs.push_back(text_arr[i].c_str());
|
||||
str_ptrs.push_back(nullptr);
|
||||
const char *_err = nullptr;
|
||||
float *embeds = llmodel_embed(GetInference(), str_ptrs.data(), &embedding_size,
|
||||
float *embeds = llmodel_embed(GetInference(), str_ptrs.data(), &embedding_size,
|
||||
prefix.IsUndefined() ? nullptr : prefix.As<Napi::String>().Utf8Value().c_str(),
|
||||
dimensionality, &token_count, do_mean, atlas, nullptr, &_err);
|
||||
if (!embeds)
|
||||
@ -271,9 +230,12 @@ Napi::Value NodeModelWrapper::GenerateEmbedding(const Napi::CallbackInfo &info)
|
||||
llmodel_free_embedding(embeds);
|
||||
auto res = Napi::Object::New(env);
|
||||
res.Set("n_prompt_tokens", token_count);
|
||||
if(is_single_text) {
|
||||
if (is_single_text)
|
||||
{
|
||||
res.Set("embeddings", embedmat.Get(static_cast<uint32_t>(0)));
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
res.Set("embeddings", embedmat);
|
||||
}
|
||||
|
||||
@ -308,7 +270,7 @@ Napi::Value NodeModelWrapper::Infer(const Napi::CallbackInfo &info)
|
||||
llmodel_prompt_context promptContext = {.logits = nullptr,
|
||||
.tokens = nullptr,
|
||||
.n_past = 0,
|
||||
.n_ctx = nCtx,
|
||||
.n_ctx = n_ctx,
|
||||
.n_predict = 4096,
|
||||
.top_k = 40,
|
||||
.top_p = 0.9f,
|
||||
@ -323,6 +285,12 @@ Napi::Value NodeModelWrapper::Infer(const Napi::CallbackInfo &info)
|
||||
|
||||
auto inputObject = info[1].As<Napi::Object>();
|
||||
|
||||
if (!inputObject.Has("promptTemplate"))
|
||||
{
|
||||
Napi::Error::New(info.Env(), "Missing Prompt Template").ThrowAsJavaScriptException();
|
||||
return info.Env().Undefined();
|
||||
}
|
||||
|
||||
if (inputObject.Has("logits") || inputObject.Has("tokens"))
|
||||
{
|
||||
Napi::Error::New(info.Env(), "Invalid input: 'logits' or 'tokens' properties are not allowed")
|
||||
@ -425,9 +393,9 @@ void NodeModelWrapper::SetThreadCount(const Napi::CallbackInfo &info)
|
||||
|
||||
Napi::Value NodeModelWrapper::GetName(const Napi::CallbackInfo &info)
|
||||
{
|
||||
return Napi::String::New(info.Env(), name);
|
||||
return Napi::String::New(info.Env(), model_name);
|
||||
}
|
||||
Napi::Value NodeModelWrapper::ThreadCount(const Napi::CallbackInfo &info)
|
||||
Napi::Value NodeModelWrapper::GetThreadCount(const Napi::CallbackInfo &info)
|
||||
{
|
||||
return Napi::Number::New(info.Env(), llmodel_threadCount(GetInference()));
|
||||
}
|
||||
|
@ -16,30 +16,28 @@ class NodeModelWrapper : public Napi::ObjectWrap<NodeModelWrapper>
|
||||
|
||||
public:
|
||||
NodeModelWrapper(const Napi::CallbackInfo &);
|
||||
// virtual ~NodeModelWrapper();
|
||||
Napi::Value GetType(const Napi::CallbackInfo &info);
|
||||
Napi::Value IsModelLoaded(const Napi::CallbackInfo &info);
|
||||
Napi::Value StateSize(const Napi::CallbackInfo &info);
|
||||
// void Finalize(Napi::Env env) override;
|
||||
Napi::Value Load(const Napi::CallbackInfo &info);
|
||||
Napi::Value InitGpu(const Napi::CallbackInfo &info);
|
||||
/**
|
||||
* Prompting the model. This entails spawning a new thread and adding the response tokens
|
||||
* into a thread local string variable.
|
||||
*/
|
||||
Napi::Value Infer(const Napi::CallbackInfo &info);
|
||||
void SetThreadCount(const Napi::CallbackInfo &info);
|
||||
void Dispose(const Napi::CallbackInfo &info);
|
||||
Napi::Value Embed(const Napi::CallbackInfo &info);
|
||||
Napi::Value IsModelLoaded(const Napi::CallbackInfo &info);
|
||||
Napi::Value GetType(const Napi::CallbackInfo &info);
|
||||
Napi::Value GetName(const Napi::CallbackInfo &info);
|
||||
Napi::Value ThreadCount(const Napi::CallbackInfo &info);
|
||||
Napi::Value GenerateEmbedding(const Napi::CallbackInfo &info);
|
||||
Napi::Value HasGpuDevice(const Napi::CallbackInfo &info);
|
||||
Napi::Value ListGpus(const Napi::CallbackInfo &info);
|
||||
Napi::Value InitGpuByString(const Napi::CallbackInfo &info);
|
||||
Napi::Value GetRequiredMemory(const Napi::CallbackInfo &info);
|
||||
Napi::Value GetGpuDevices(const Napi::CallbackInfo &info);
|
||||
Napi::Value GetStateSize(const Napi::CallbackInfo &info);
|
||||
void SetThreadCount(const Napi::CallbackInfo &info);
|
||||
Napi::Value GetThreadCount(const Napi::CallbackInfo &info);
|
||||
/*
|
||||
* The path that is used to search for the dynamic libraries
|
||||
*/
|
||||
Napi::Value GetLibraryPath(const Napi::CallbackInfo &info);
|
||||
Napi::Value HasGpuDevice(const Napi::CallbackInfo &info);
|
||||
Napi::Value GetGpuDevices(const Napi::CallbackInfo &info);
|
||||
Napi::Value GetRequiredMemory(const Napi::CallbackInfo &info);
|
||||
void Dispose(const Napi::CallbackInfo &info);
|
||||
/**
|
||||
* Creates the LLModel class
|
||||
*/
|
||||
@ -54,10 +52,10 @@ class NodeModelWrapper : public Napi::ObjectWrap<NodeModelWrapper>
|
||||
|
||||
std::mutex inference_mutex;
|
||||
|
||||
std::string type;
|
||||
// corresponds to LLModel::name() in typescript
|
||||
std::string name;
|
||||
int nCtx{};
|
||||
int nGpuLayers{};
|
||||
std::string full_model_path;
|
||||
std::string model_type;
|
||||
std::string model_name;
|
||||
std::string model_file;
|
||||
std::string backend;
|
||||
int n_ctx{};
|
||||
int n_gpu_layers{};
|
||||
};
|
||||
|
@ -5,32 +5,38 @@
|
||||
"main": "src/gpt4all.js",
|
||||
"repository": "nomic-ai/gpt4all",
|
||||
"scripts": {
|
||||
"install": "node-gyp-build",
|
||||
"install": "node ./scripts/assert-backend-sources.js && node-gyp-build",
|
||||
"test:ci": "jest test/ci.test.js",
|
||||
"test": "jest",
|
||||
"build:backend": "node scripts/build.js",
|
||||
"build": "node-gyp-build",
|
||||
"clean": "rimraf build runtimes prebuilds backend",
|
||||
"prebuild": "npm run clean",
|
||||
"build": "npm run build:runtimes && npm run build:prebuilds",
|
||||
"build:runtimes": "node scripts/build.js",
|
||||
"build:prebuilds": "node scripts/assert-backend-sources.js && node scripts/prebuild.js",
|
||||
"docs:build": "node scripts/docs.js && documentation readme ./src/gpt4all.d.ts --parse-extension js d.ts --format md --section \"API Reference\" --readme-file ../python/docs/gpt4all_nodejs.md"
|
||||
},
|
||||
"files": [
|
||||
"binding.gyp",
|
||||
"src/**/*",
|
||||
"runtimes/**/*",
|
||||
"binding.gyp",
|
||||
"prebuilds/**/*",
|
||||
"backend/**/*",
|
||||
"scripts/assert-backend-sources.js",
|
||||
"*.h",
|
||||
"*.cc",
|
||||
"gpt4all-backend/**/*"
|
||||
"*.cc"
|
||||
],
|
||||
"dependencies": {
|
||||
"md5-file": "^5.0.0",
|
||||
"node-addon-api": "^6.1.0",
|
||||
"node-gyp-build": "^4.6.0"
|
||||
"node-addon-api": "^8.0.0",
|
||||
"node-gyp-build": "~4.8.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.1.5",
|
||||
"@types/node": "^20.12.12",
|
||||
"documentation": "^14.0.2",
|
||||
"jest": "^29.5.0",
|
||||
"prebuildify": "^5.0.1",
|
||||
"prettier": "^2.8.8"
|
||||
"jest": "^29.7.0",
|
||||
"prebuildify": "^6.0.1",
|
||||
"prettier": "^3.2.5",
|
||||
"rimraf": "^5.0.7"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"node-gyp": "9.x.x"
|
||||
|
@ -131,7 +131,8 @@ bool PromptWorker::ResponseCallback(int32_t token_id, const std::string token)
|
||||
// Transform native data into JS data, passing it to the provided
|
||||
// `jsCallback` -- the TSFN's JavaScript function.
|
||||
auto token_id = Napi::Number::New(env, value->tokenId);
|
||||
auto token = Napi::String::New(env, value->token);
|
||||
auto token = Napi::Uint8Array::New(env, value->token.size());
|
||||
memcpy(token.Data(), value->token.data(), value->token.size());
|
||||
auto jsResult = jsCallback.Call({token_id, token}).ToBoolean();
|
||||
promise.set_value(jsResult);
|
||||
}
|
||||
|
@ -0,0 +1,47 @@
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
|
||||
// Copies the shared llmodel sources from gpt4all-backend into the backend folder.
|
||||
// These are dependencies of the bindings and will be required in case node-gyp-build
|
||||
// cannot find a prebuild. This script is used in the package install hook and will
|
||||
// be executed BOTH when `yarn install` is run in the root folder AND when the package
|
||||
// is installed as a dependency in another project.
|
||||
|
||||
const backendDeps = [
|
||||
"llmodel.h",
|
||||
"llmodel.cpp",
|
||||
"llmodel_c.cpp",
|
||||
"llmodel_c.h",
|
||||
"sysinfo.h",
|
||||
"dlhandle.h",
|
||||
"dlhandle.cpp",
|
||||
];
|
||||
|
||||
const sourcePath = path.resolve(__dirname, "../../../gpt4all-backend");
|
||||
const destPath = path.resolve(__dirname, "../backend");
|
||||
|
||||
// Silently ignore if the backend sources are not available.
|
||||
// When the package is installed as a dependency, gpt4all-backend will not be present.
|
||||
if (fs.existsSync(sourcePath)) {
|
||||
if (!fs.existsSync(destPath)) {
|
||||
fs.mkdirSync(destPath);
|
||||
}
|
||||
for (const file of backendDeps) {
|
||||
const sourceFile = path.join(sourcePath, file);
|
||||
const destFile = path.join(destPath, file);
|
||||
if (fs.existsSync(sourceFile)) {
|
||||
console.info(`Copying ${sourceFile} to ${destFile}`);
|
||||
fs.copyFileSync(sourceFile, destFile); // overwrite
|
||||
} else {
|
||||
throw new Error(`File ${sourceFile} does not exist`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// assert that the backend sources are present
|
||||
for (const file of backendDeps) {
|
||||
const destFile = path.join(destPath, file);
|
||||
if (!fs.existsSync(destFile)) {
|
||||
throw new Error(`File ${destFile} does not exist`);
|
||||
}
|
||||
}
|
@ -1,12 +1,42 @@
|
||||
#!/bin/sh
|
||||
# Build script for Unix-like systems (Linux, macOS).
|
||||
# Script assumes the current working directory is the bindings project root.
|
||||
|
||||
SYSNAME=$(uname -s)
|
||||
PLATFORM=$(uname -m)
|
||||
|
||||
# Allows overriding target sysname and platform via args
|
||||
# If not provided, the current system's sysname and platform will be used
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--sysname=*)
|
||||
SYSNAME="${1#*=}"
|
||||
shift
|
||||
;;
|
||||
--platform=*)
|
||||
PLATFORM="${1#*=}"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
echo "Unknown argument: $1" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ "$SYSNAME" = "Linux" ]; then
|
||||
BASE_DIR="runtimes/linux-x64"
|
||||
if [ "$PLATFORM" = "x86_64" ]; then
|
||||
BASE_DIR="runtimes/linux-x64"
|
||||
elif [ "$PLATFORM" = "aarch64" ]; then
|
||||
BASE_DIR="runtimes/linux-arm64"
|
||||
else
|
||||
echo "Unsupported platform: $PLATFORM" >&2
|
||||
exit 1
|
||||
fi
|
||||
LIB_EXT="so"
|
||||
elif [ "$SYSNAME" = "Darwin" ]; then
|
||||
BASE_DIR="runtimes/osx"
|
||||
BASE_DIR="runtimes/darwin"
|
||||
LIB_EXT="dylib"
|
||||
elif [ -n "$SYSNAME" ]; then
|
||||
echo "Unsupported system: $SYSNAME" >&2
|
||||
@ -22,8 +52,24 @@ BUILD_DIR="$BASE_DIR/build"
|
||||
rm -rf "$BASE_DIR"
|
||||
mkdir -p "$NATIVE_DIR" "$BUILD_DIR"
|
||||
|
||||
cmake -S ../../gpt4all-backend -B "$BUILD_DIR" &&
|
||||
cmake --build "$BUILD_DIR" -j --config Release && {
|
||||
if [ "$PLATFORM" = "x86_64" ]; then
|
||||
echo "Building for x86_64"
|
||||
cmake -S ../../gpt4all-backend -B "$BUILD_DIR" -DCMAKE_BUILD_TYPE=RelWithDebInfo
|
||||
fi
|
||||
|
||||
if [ "$PLATFORM" = "aarch64" ]; then
|
||||
if [ "$(uname -m)" != "aarch64" ]; then
|
||||
echo "Cross-compiling for aarch64"
|
||||
cmake -S ../../gpt4all-backend \
|
||||
-B "$BUILD_DIR" \
|
||||
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
|
||||
-DCMAKE_TOOLCHAIN_FILE="./toolchains/linux-arm64-toolchain.cmake"
|
||||
else
|
||||
cmake -S ../../gpt4all-backend -B "$BUILD_DIR" -DCMAKE_BUILD_TYPE=RelWithDebInfo
|
||||
fi
|
||||
fi
|
||||
|
||||
cmake --build "$BUILD_DIR" --parallel && {
|
||||
cp "$BUILD_DIR"/libgptj*.$LIB_EXT "$NATIVE_DIR"/
|
||||
cp "$BUILD_DIR"/libllama*.$LIB_EXT "$NATIVE_DIR"/
|
||||
}
|
@ -1,22 +1,21 @@
|
||||
const prebuildify = require("prebuildify");
|
||||
|
||||
async function createPrebuilds(combinations) {
|
||||
for (const { platform, arch } of combinations) {
|
||||
async function createPrebuilds(configs) {
|
||||
for (const config of configs) {
|
||||
const opts = {
|
||||
platform,
|
||||
arch,
|
||||
napi: true,
|
||||
targets: ["18.16.0"]
|
||||
targets: ["18.16.0"],
|
||||
...config,
|
||||
};
|
||||
try {
|
||||
await createPrebuild(opts);
|
||||
console.log(
|
||||
`Build succeeded for platform ${opts.platform} and architecture ${opts.arch}`
|
||||
`Build succeeded for platform ${opts.platform} and architecture ${opts.arch}`,
|
||||
);
|
||||
} catch (err) {
|
||||
console.error(
|
||||
`Error building for platform ${opts.platform} and architecture ${opts.arch}:`,
|
||||
err
|
||||
err,
|
||||
);
|
||||
}
|
||||
}
|
||||
@ -24,6 +23,17 @@ async function createPrebuilds(combinations) {
|
||||
|
||||
function createPrebuild(opts) {
|
||||
return new Promise((resolve, reject) => {
|
||||
// if this prebuild is cross-compiling for arm64 on a non-arm64 machine,
|
||||
// set the CXX and CC environment variables to the cross-compilers
|
||||
if (
|
||||
opts.arch === "arm64" &&
|
||||
process.arch !== "arm64" &&
|
||||
process.platform === "linux"
|
||||
) {
|
||||
process.env.CXX = "aarch64-linux-gnu-g++-12";
|
||||
process.env.CC = "aarch64-linux-gnu-gcc-12";
|
||||
}
|
||||
|
||||
prebuildify(opts, (err) => {
|
||||
if (err) {
|
||||
reject(err);
|
||||
@ -35,22 +45,18 @@ function createPrebuild(opts) {
|
||||
}
|
||||
|
||||
let prebuildConfigs;
|
||||
if(process.platform === 'win32') {
|
||||
prebuildConfigs = [
|
||||
{ platform: "win32", arch: "x64" }
|
||||
];
|
||||
} else if(process.platform === 'linux') {
|
||||
//Unsure if darwin works, need mac tester!
|
||||
prebuildConfigs = [
|
||||
{ platform: "linux", arch: "x64" },
|
||||
//{ platform: "linux", arch: "arm64" },
|
||||
//{ platform: "linux", arch: "armv7" },
|
||||
]
|
||||
} else if(process.platform === 'darwin') {
|
||||
if (process.platform === "win32") {
|
||||
prebuildConfigs = [{ platform: "win32", arch: "x64" }];
|
||||
} else if (process.platform === "linux") {
|
||||
prebuildConfigs = [
|
||||
{ platform: "darwin", arch: "x64" },
|
||||
{ platform: "darwin", arch: "arm64" },
|
||||
]
|
||||
{ platform: "linux", arch: "x64" },
|
||||
{ platform: "linux", arch: "arm64" },
|
||||
];
|
||||
} else if (process.platform === "darwin") {
|
||||
prebuildConfigs = [
|
||||
{ platform: "darwin", arch: "x64" },
|
||||
{ platform: "darwin", arch: "arm64" },
|
||||
];
|
||||
}
|
||||
|
||||
createPrebuilds(prebuildConfigs)
|
||||
|
@ -2,7 +2,6 @@ import { loadModel, createCompletion } from "../src/gpt4all.js";
|
||||
|
||||
const model = await loadModel("orca-mini-3b-gguf2-q4_0.gguf", {
|
||||
verbose: true,
|
||||
device: "gpu",
|
||||
});
|
||||
|
||||
const chat = await model.createChatSession();
|
||||
@ -12,8 +11,6 @@ await createCompletion(
|
||||
"Why are bananas rather blue than bread at night sometimes?",
|
||||
{
|
||||
verbose: true,
|
||||
nPredict: 10,
|
||||
}
|
||||
);
|
||||
await createCompletion(chat, "Are you sure?", {
|
||||
verbose: true,
|
||||
});
|
||||
|
@ -7,12 +7,12 @@ const modelOptions = {
|
||||
verbose: true,
|
||||
};
|
||||
|
||||
const model1 = await loadModel("orca-mini-3b-gguf2-q4_0.gguf", {
|
||||
const model1 = await loadModel("Phi-3-mini-4k-instruct.Q4_0.gguf", {
|
||||
...modelOptions,
|
||||
device: "gpu", // only one model can be on gpu
|
||||
});
|
||||
const model2 = await loadModel("orca-mini-3b-gguf2-q4_0.gguf", modelOptions);
|
||||
const model3 = await loadModel("orca-mini-3b-gguf2-q4_0.gguf", modelOptions);
|
||||
const model2 = await loadModel("Phi-3-mini-4k-instruct.Q4_0.gguf", modelOptions);
|
||||
const model3 = await loadModel("Phi-3-mini-4k-instruct.Q4_0.gguf", modelOptions);
|
||||
|
||||
const promptContext = {
|
||||
verbose: true,
|
||||
@ -27,3 +27,6 @@ const responses = await Promise.all([
|
||||
createCompletion(model3, "What is 1 + 3?", promptContext),
|
||||
]);
|
||||
console.log(responses.map((res) => res.choices[0].message));
|
||||
model1.dispose();
|
||||
model2.dispose();
|
||||
model3.dispose();
|
@ -1,61 +0,0 @@
|
||||
import {
|
||||
LLModel,
|
||||
createCompletion,
|
||||
DEFAULT_DIRECTORY,
|
||||
DEFAULT_LIBRARIES_DIRECTORY,
|
||||
loadModel,
|
||||
} from "../src/gpt4all.js";
|
||||
|
||||
const model = await loadModel("mistral-7b-openorca.gguf2.Q4_0.gguf", {
|
||||
verbose: true,
|
||||
device: "gpu",
|
||||
});
|
||||
const ll = model.llm;
|
||||
|
||||
try {
|
||||
class Extended extends LLModel {}
|
||||
} catch (e) {
|
||||
console.log("Extending from native class gone wrong " + e);
|
||||
}
|
||||
|
||||
console.log("state size " + ll.stateSize());
|
||||
|
||||
console.log("thread count " + ll.threadCount());
|
||||
ll.setThreadCount(5);
|
||||
|
||||
console.log("thread count " + ll.threadCount());
|
||||
ll.setThreadCount(4);
|
||||
console.log("thread count " + ll.threadCount());
|
||||
console.log("name " + ll.name());
|
||||
console.log("type: " + ll.type());
|
||||
console.log("Default directory for models", DEFAULT_DIRECTORY);
|
||||
console.log("Default directory for libraries", DEFAULT_LIBRARIES_DIRECTORY);
|
||||
console.log("Has GPU", ll.hasGpuDevice());
|
||||
console.log("gpu devices", ll.listGpu());
|
||||
console.log("Required Mem in bytes", ll.memoryNeeded());
|
||||
|
||||
// to ingest a custom system prompt without using a chat session.
|
||||
await createCompletion(
|
||||
model,
|
||||
"<|im_start|>system\nYou are an advanced mathematician.\n<|im_end|>\n",
|
||||
{
|
||||
promptTemplate: "%1",
|
||||
nPredict: 0,
|
||||
special: true,
|
||||
}
|
||||
);
|
||||
const completion1 = await createCompletion(model, "What is 1 + 1?", {
|
||||
verbose: true,
|
||||
});
|
||||
console.log(`🤖 > ${completion1.choices[0].message.content}`);
|
||||
//Very specific:
|
||||
// tested on Ubuntu 22.0, Linux Mint, if I set nPast to 100, the app hangs.
|
||||
const completion2 = await createCompletion(model, "And if we add two?", {
|
||||
verbose: true,
|
||||
});
|
||||
console.log(`🤖 > ${completion2.choices[0].message.content}`);
|
||||
|
||||
//CALLING DISPOSE WILL INVALID THE NATIVE MODEL. USE THIS TO CLEANUP
|
||||
model.dispose();
|
||||
|
||||
console.log("model disposed, exiting...");
|
@ -1,7 +1,6 @@
|
||||
import { promises as fs } from "node:fs";
|
||||
import { loadModel, createCompletion } from "../src/gpt4all.js";
|
||||
|
||||
const model = await loadModel("Nous-Hermes-2-Mistral-7B-DPO.Q4_0.gguf", {
|
||||
const model = await loadModel("Phi-3-mini-4k-instruct.Q4_0.gguf", {
|
||||
verbose: true,
|
||||
device: "gpu",
|
||||
});
|
||||
@ -12,14 +11,15 @@ const res = await createCompletion(
|
||||
{
|
||||
onPromptToken: (tokenId) => {
|
||||
console.debug("onPromptToken", { tokenId });
|
||||
// throwing an error will cancel
|
||||
// errors within the callback will cancel ingestion, inference will still run
|
||||
throw new Error("This is an error");
|
||||
// const foo = thisMethodDoesNotExist();
|
||||
// returning false will cancel as well
|
||||
// return false;
|
||||
},
|
||||
onResponseToken: (tokenId, token) => {
|
||||
console.debug("onResponseToken", { tokenId, token });
|
||||
onResponseTokens: ({ tokenIds, text }) => {
|
||||
// console.debug("onResponseToken", { tokenIds, text });
|
||||
process.stdout.write(text);
|
||||
// same applies here
|
||||
},
|
||||
}
|
37
gpt4all-bindings/typescript/spec/token-streaming-emoji.mjs
Normal file
37
gpt4all-bindings/typescript/spec/token-streaming-emoji.mjs
Normal file
@ -0,0 +1,37 @@
|
||||
import {
|
||||
loadModel,
|
||||
createCompletion,
|
||||
createCompletionStream,
|
||||
createCompletionGenerator,
|
||||
} from "../src/gpt4all.js";
|
||||
|
||||
const model = await loadModel("Phi-3-mini-4k-instruct.Q4_0.gguf", {
|
||||
device: "cpu",
|
||||
});
|
||||
|
||||
const prompt = "Tell a short story but only use emojis. Three sentences max.";
|
||||
|
||||
const result = await createCompletion(model, prompt, {
|
||||
onResponseToken: (tokens) => {
|
||||
console.debug(tokens)
|
||||
},
|
||||
});
|
||||
|
||||
console.debug(result.choices[0].message);
|
||||
|
||||
process.stdout.write("### Stream:");
|
||||
const stream = createCompletionStream(model, prompt);
|
||||
stream.tokens.on("data", (data) => {
|
||||
process.stdout.write(data);
|
||||
});
|
||||
await stream.result;
|
||||
process.stdout.write("\n");
|
||||
|
||||
process.stdout.write("### Generator:");
|
||||
const gen = createCompletionGenerator(model, prompt);
|
||||
for await (const chunk of gen) {
|
||||
process.stdout.write(chunk);
|
||||
}
|
||||
|
||||
|
||||
model.dispose();
|
@ -38,8 +38,8 @@ process.stdout.write("\n");
|
||||
|
||||
process.stdout.write("### Callback:");
|
||||
await createCompletion(model, "Why not just callbacks?", {
|
||||
onResponseToken: (tokenId, token) => {
|
||||
process.stdout.write(token);
|
||||
onResponseTokens: ({ text }) => {
|
||||
process.stdout.write(text);
|
||||
},
|
||||
});
|
||||
process.stdout.write("\n");
|
@ -25,7 +25,7 @@ class ChatSession {
|
||||
const { messages, systemPrompt, ...sessionDefaultPromptContext } =
|
||||
chatSessionOpts;
|
||||
this.model = model;
|
||||
this.modelName = model.llm.name();
|
||||
this.modelName = model.llm.getName();
|
||||
this.messages = messages ?? [];
|
||||
this.systemPrompt = systemPrompt ?? model.config.systemPrompt;
|
||||
this.initialized = false;
|
||||
|
112
gpt4all-bindings/typescript/src/gpt4all.d.ts
vendored
112
gpt4all-bindings/typescript/src/gpt4all.d.ts
vendored
@ -5,10 +5,27 @@ interface LLModelOptions {
|
||||
/**
|
||||
* Model architecture. This argument currently does not have any functionality and is just used as descriptive identifier for user.
|
||||
*/
|
||||
type?: string;
|
||||
model_name: string;
|
||||
model_path: string;
|
||||
library_path?: string;
|
||||
modelType?: string;
|
||||
/**
|
||||
* Absolute path to the model file.
|
||||
*/
|
||||
modelFile: string;
|
||||
/**
|
||||
* Path to the llmodel implementation shared objects. This can be a single path or a list of paths separated by ';' delimiter.
|
||||
*/
|
||||
librariesPath?: string;
|
||||
/**
|
||||
* A string representing the implementation to use. One of 'auto', 'cpu', 'metal', 'kompute', or 'cuda'.
|
||||
*/
|
||||
backend: string;
|
||||
/**
|
||||
* The maximum window size of this model.
|
||||
*/
|
||||
nCtx: number;
|
||||
/**
|
||||
* Number of GPU layers to use (Vulkan)
|
||||
*/
|
||||
nGpuLayers: number;
|
||||
}
|
||||
|
||||
interface ModelConfig {
|
||||
@ -263,10 +280,10 @@ interface LLModelInferenceResult {
|
||||
interface LLModelInferenceOptions extends Partial<LLModelPromptContext> {
|
||||
/** Callback for response tokens, called for each generated token.
|
||||
* @param {number} tokenId The token id.
|
||||
* @param {string} token The token.
|
||||
* @param {Uint8Array} bytes The token bytes.
|
||||
* @returns {boolean | undefined} Whether to continue generating tokens.
|
||||
* */
|
||||
onResponseToken?: (tokenId: number, token: string) => boolean | void;
|
||||
onResponseToken?: (tokenId: number, bytes: Uint8Array) => boolean | void;
|
||||
/** Callback for prompt tokens, called for each input token in the prompt.
|
||||
* @param {number} tokenId The token id.
|
||||
* @returns {boolean | undefined} Whether to continue ingesting the prompt.
|
||||
@ -281,30 +298,42 @@ interface LLModelInferenceOptions extends Partial<LLModelPromptContext> {
|
||||
declare class LLModel {
|
||||
/**
|
||||
* Initialize a new LLModel.
|
||||
* @param {string} path Absolute path to the model file.
|
||||
* @throws {Error} If the model file does not exist.
|
||||
* @param {LLModelOptions} options LLModel options.
|
||||
* @throws {Error} If the model can't be loaded or necessary runtimes are not found.
|
||||
*/
|
||||
constructor(options: LLModelOptions);
|
||||
/**
|
||||
* Loads the LLModel.
|
||||
* @return {boolean} true if the model was loaded successfully, false otherwise.
|
||||
*/
|
||||
load(): boolean;
|
||||
|
||||
/**
|
||||
* Initiate a GPU by a string identifier. See LoadModelOptions.device for more information
|
||||
* @param {string} device 'amd' | 'nvidia' | 'intel' | 'gpu' | gpu name.
|
||||
* @return {boolean} true if the GPU was initialized successfully, false otherwise.
|
||||
*/
|
||||
initGpu(device: string): boolean;
|
||||
|
||||
/** undefined or user supplied */
|
||||
type(): string | undefined;
|
||||
getType(): string | undefined;
|
||||
|
||||
/** The name of the model. */
|
||||
name(): string;
|
||||
getName(): string;
|
||||
|
||||
/**
|
||||
* Get the size of the internal state of the model.
|
||||
* NOTE: This state data is specific to the type of model you have created.
|
||||
* @return the size in bytes of the internal state of the model
|
||||
*/
|
||||
stateSize(): number;
|
||||
getStateSize(): number;
|
||||
|
||||
/**
|
||||
* Get the number of threads used for model inference.
|
||||
* The default is the number of physical cores your computer has.
|
||||
* @returns The number of threads used for model inference.
|
||||
*/
|
||||
threadCount(): number;
|
||||
getThreadCount(): number;
|
||||
|
||||
/**
|
||||
* Set the number of threads used for model inference.
|
||||
@ -375,14 +404,6 @@ declare class LLModel {
|
||||
*/
|
||||
getLibraryPath(): string;
|
||||
|
||||
/**
|
||||
* Initiate a GPU by a string identifier.
|
||||
* @param {number} memory_required Should be in the range size_t or will throw
|
||||
* @param {string} device_name 'amd' | 'nvidia' | 'intel' | 'gpu' | gpu name.
|
||||
* read LoadModelOptions.device for more information
|
||||
*/
|
||||
initGpuByString(memory_required: number, device_name: string): boolean;
|
||||
|
||||
/**
|
||||
* From C documentation
|
||||
* @returns True if a GPU device is successfully initialized, false otherwise.
|
||||
@ -391,11 +412,10 @@ declare class LLModel {
|
||||
|
||||
/**
|
||||
* GPUs that are usable for this LLModel
|
||||
* @param {number} nCtx Maximum size of context window
|
||||
* @throws if hasGpuDevice returns false (i think)
|
||||
* @returns
|
||||
* @throws if gpu device list is not available
|
||||
* @returns an array of GpuDevice objects
|
||||
*/
|
||||
listGpu(nCtx: number): GpuDevice[];
|
||||
getGpuDevices(): GpuDevice[];
|
||||
|
||||
/**
|
||||
* delete and cleanup the native model
|
||||
@ -414,6 +434,7 @@ interface GpuDevice {
|
||||
heapSize: number;
|
||||
name: string;
|
||||
vendor: string;
|
||||
backend: string;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -443,13 +464,15 @@ interface LoadModelOptions {
|
||||
/**
|
||||
* The processing unit on which the model will run. It can be set to
|
||||
* - "cpu": Model will run on the central processing unit.
|
||||
* - "gpu": Model will run on the best available graphics processing unit, irrespective of its vendor.
|
||||
* - "amd", "nvidia", "intel": Model will run on the best available GPU from the specified vendor.
|
||||
* - "kompute": Model will run using the kompute (vulkan) gpu backend
|
||||
* - "cuda": Model will run using the cuda gpu backend
|
||||
* - "gpu": Use Metal on ARM64 macOS, otherwise the same as "kompute"
|
||||
* - "amd", "nvidia": Use the best GPU provided by the Kompute backend from this vendor.
|
||||
* - "gpu name": Model will run on the GPU that matches the name if it's available.
|
||||
* Note: If a GPU device lacks sufficient RAM to accommodate the model, an error will be thrown, and the GPT4All
|
||||
* instance will be rendered invalid. It's advised to ensure the device has enough memory before initiating the
|
||||
* model.
|
||||
* @default "cpu"
|
||||
* @default Metal on ARM64 macOS, "cpu" otherwise.
|
||||
*/
|
||||
device?: string;
|
||||
/**
|
||||
@ -458,10 +481,16 @@ interface LoadModelOptions {
|
||||
*/
|
||||
nCtx?: number;
|
||||
/**
|
||||
* Number of gpu layers needed
|
||||
* Number of GPU layers to use (Vulkan)
|
||||
* @default 100
|
||||
* @alias ngl
|
||||
*/
|
||||
nGpuLayers?: number;
|
||||
ngl?: number;
|
||||
/**
|
||||
* Number of CPU threads used by GPT4All. Default is None, then the number of threads are determined automatically.
|
||||
*/
|
||||
nThreads?: number;
|
||||
}
|
||||
|
||||
interface InferenceModelOptions extends LoadModelOptions {
|
||||
@ -507,15 +536,33 @@ interface CompletionProvider {
|
||||
): Promise<InferenceResult>;
|
||||
}
|
||||
|
||||
interface CompletionTokens {
|
||||
/** The token ids. */
|
||||
tokenIds: number[];
|
||||
/** The token text. May be an empty string. */
|
||||
text: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for creating a completion.
|
||||
*/
|
||||
interface CompletionOptions extends LLModelInferenceOptions {
|
||||
interface CompletionOptions extends Partial<LLModelPromptContext> {
|
||||
/**
|
||||
* Indicates if verbose logging is enabled.
|
||||
* @default false
|
||||
*/
|
||||
verbose?: boolean;
|
||||
|
||||
/** Called every time new tokens can be decoded to text.
|
||||
* @param {CompletionTokens} tokens The token ids and decoded text.
|
||||
* @returns {boolean | undefined} Whether to continue generating tokens.
|
||||
* */
|
||||
onResponseTokens?: (tokens: CompletionTokens) => boolean | void;
|
||||
/** Callback for prompt tokens, called for each input token in the prompt.
|
||||
* @param {number} tokenId The token id.
|
||||
* @returns {boolean | undefined} Whether to continue ingesting the prompt.
|
||||
* */
|
||||
onPromptToken?: (tokenId: number) => boolean | void;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -639,13 +686,6 @@ interface LLModelPromptContext {
|
||||
*/
|
||||
promptTemplate?: string;
|
||||
|
||||
/** The context window size. Do not use, it has no effect. See loadModel options.
|
||||
* THIS IS DEPRECATED!!!
|
||||
* Use loadModel's nCtx option instead.
|
||||
* @default 2048
|
||||
*/
|
||||
nCtx: number;
|
||||
|
||||
/** The top-k logits to sample from.
|
||||
* Top-K sampling selects the next token only from the top K most likely tokens predicted by the model.
|
||||
* It helps reduce the risk of generating low-probability or nonsensical tokens, but it may also limit
|
||||
|
@ -37,9 +37,8 @@ async function loadModel(modelName, options = {}) {
|
||||
type: "inference",
|
||||
allowDownload: true,
|
||||
verbose: false,
|
||||
device: "cpu",
|
||||
nCtx: 2048,
|
||||
ngl: 100,
|
||||
nGpuLayers: options.ngl ?? 100,
|
||||
...options,
|
||||
};
|
||||
|
||||
@ -54,27 +53,77 @@ async function loadModel(modelName, options = {}) {
|
||||
typeof loadOptions.librariesPath === "string",
|
||||
"Libraries path should be a string"
|
||||
);
|
||||
const existingPaths = loadOptions.librariesPath
|
||||
const existingLibPaths = loadOptions.librariesPath
|
||||
.split(";")
|
||||
.filter(existsSync)
|
||||
.join(";");
|
||||
|
||||
const llmOptions = {
|
||||
model_name: appendBinSuffixIfMissing(modelName),
|
||||
model_path: loadOptions.modelPath,
|
||||
library_path: existingPaths,
|
||||
device: loadOptions.device,
|
||||
modelFile: modelConfig.path,
|
||||
librariesPath: existingLibPaths,
|
||||
nCtx: loadOptions.nCtx,
|
||||
ngl: loadOptions.ngl,
|
||||
nGpuLayers: loadOptions.nGpuLayers,
|
||||
};
|
||||
|
||||
let initDevice;
|
||||
if (process.platform === "darwin") {
|
||||
if (!loadOptions.device) {
|
||||
llmOptions.backend = "auto"; // 'auto' is effectively 'metal' due to currently non-functional fallback
|
||||
} else if (loadOptions.device === "cpu") {
|
||||
llmOptions.backend = "cpu";
|
||||
} else {
|
||||
if (process.arch !== "arm64" || loadOptions.device !== "gpu") {
|
||||
throw new Error(
|
||||
`Unknown device for this platform: ${loadOptions.device}`
|
||||
);
|
||||
}
|
||||
llmOptions.backend = "metal";
|
||||
}
|
||||
} else {
|
||||
// default to kompute. use cpu for arm64 because we currently dont build kompute runtimes for arm64
|
||||
llmOptions.backend = process.arch === "arm64" ? "cpu" : "kompute";
|
||||
if (!loadOptions.device || loadOptions.device === "cpu") {
|
||||
// use the default backend
|
||||
} else if (
|
||||
loadOptions.device === "cuda" ||
|
||||
loadOptions.device === "kompute"
|
||||
) {
|
||||
llmOptions.backend = loadOptions.device;
|
||||
initDevice = "gpu";
|
||||
} else if (loadOptions.device.startsWith("cuda:")) {
|
||||
llmOptions.backend = "cuda";
|
||||
initDevice = loadOptions.device.replace(/^cuda:/, "");
|
||||
} else {
|
||||
initDevice = loadOptions.device.replace(/^kompute:/, "");
|
||||
}
|
||||
}
|
||||
|
||||
if (loadOptions.verbose) {
|
||||
console.debug("Creating LLModel:", {
|
||||
initDevice,
|
||||
llmOptions,
|
||||
modelConfig,
|
||||
});
|
||||
}
|
||||
const llmodel = new LLModel(llmOptions);
|
||||
if (initDevice) {
|
||||
const gpuInitSuccess = llmodel.initGpu(initDevice);
|
||||
if (!gpuInitSuccess) {
|
||||
const availableDevices = llmodel.getGpuDevices();
|
||||
const deviceNames = availableDevices
|
||||
.map((device) => device.name)
|
||||
.join(", ");
|
||||
console.warn(
|
||||
`Failed to initialize GPU device "${initDevice}" - Available devices: ${deviceNames}`
|
||||
);
|
||||
}
|
||||
}
|
||||
llmodel.load();
|
||||
|
||||
if (loadOptions.nThreads) {
|
||||
llmodel.setThreadCount(loadOptions.nThreads);
|
||||
}
|
||||
|
||||
if (loadOptions.type === "embedding") {
|
||||
return new EmbeddingModel(llmodel, modelConfig);
|
||||
} else if (loadOptions.type === "inference") {
|
||||
@ -84,7 +133,7 @@ async function loadModel(modelName, options = {}) {
|
||||
}
|
||||
}
|
||||
|
||||
function createEmbedding(model, text, options={}) {
|
||||
function createEmbedding(model, text, options = {}) {
|
||||
let {
|
||||
dimensionality = undefined,
|
||||
longTextMode = "mean",
|
||||
@ -138,10 +187,7 @@ async function createCompletion(
|
||||
...options,
|
||||
};
|
||||
|
||||
const result = await provider.generate(
|
||||
input,
|
||||
completionOptions,
|
||||
);
|
||||
const result = await provider.generate(input, completionOptions);
|
||||
|
||||
return {
|
||||
model: provider.modelName,
|
||||
@ -174,10 +220,10 @@ function createCompletionStream(
|
||||
|
||||
const completionPromise = createCompletion(provider, input, {
|
||||
...options,
|
||||
onResponseToken: (tokenId, token) => {
|
||||
completionStream.push(token);
|
||||
if (options.onResponseToken) {
|
||||
return options.onResponseToken(tokenId, token);
|
||||
onResponseTokens: (tokens) => {
|
||||
completionStream.push(tokens.text);
|
||||
if (options.onResponseTokens) {
|
||||
return options.onResponseTokens(tokens);
|
||||
}
|
||||
},
|
||||
}).then((result) => {
|
||||
|
@ -11,7 +11,7 @@ class InferenceModel {
|
||||
constructor(llmodel, config) {
|
||||
this.llm = llmodel;
|
||||
this.config = config;
|
||||
this.modelName = this.llm.name();
|
||||
this.modelName = this.llm.getName();
|
||||
}
|
||||
|
||||
async createChatSession(options) {
|
||||
@ -90,6 +90,25 @@ class InferenceModel {
|
||||
|
||||
let tokensGenerated = 0;
|
||||
|
||||
const decoder = new TokenDecoder((tokenIds, text) => {
|
||||
let continueGeneration = true;
|
||||
tokensGenerated += tokenIds.length;
|
||||
|
||||
if (options.onResponseTokens) {
|
||||
// catch here because if errors bubble through cpp they will loose stacktraces
|
||||
try {
|
||||
// don't cancel the generation unless user explicitly returns false
|
||||
continueGeneration =
|
||||
options.onResponseTokens({ tokenIds, text }) !== false;
|
||||
} catch (err) {
|
||||
console.error("Error in onResponseToken callback", err);
|
||||
continueGeneration = false;
|
||||
}
|
||||
}
|
||||
return continueGeneration;
|
||||
|
||||
});
|
||||
|
||||
const result = await this.llm.infer(prompt, {
|
||||
...promptContext,
|
||||
nPast,
|
||||
@ -97,7 +116,7 @@ class InferenceModel {
|
||||
let continueIngestion = true;
|
||||
tokensIngested++;
|
||||
if (options.onPromptToken) {
|
||||
// catch errors because if they go through cpp they will loose stacktraces
|
||||
// catch here because if errors bubble through cpp they will looe stacktraces
|
||||
try {
|
||||
// don't cancel ingestion unless user explicitly returns false
|
||||
continueIngestion =
|
||||
@ -109,20 +128,8 @@ class InferenceModel {
|
||||
}
|
||||
return continueIngestion;
|
||||
},
|
||||
onResponseToken: (tokenId, token) => {
|
||||
let continueGeneration = true;
|
||||
tokensGenerated++;
|
||||
if (options.onResponseToken) {
|
||||
try {
|
||||
// don't cancel the generation unless user explicitly returns false
|
||||
continueGeneration =
|
||||
options.onResponseToken(tokenId, token) !== false;
|
||||
} catch (err) {
|
||||
console.error("Error in onResponseToken callback", err);
|
||||
continueGeneration = false;
|
||||
}
|
||||
}
|
||||
return continueGeneration;
|
||||
onResponseToken: (tokenId, bytes) => {
|
||||
return decoder.decode(tokenId, bytes);
|
||||
},
|
||||
});
|
||||
|
||||
@ -141,6 +148,63 @@ class InferenceModel {
|
||||
}
|
||||
}
|
||||
|
||||
// see https://github.com/nomic-ai/gpt4all/pull/1281
|
||||
class TokenDecoder {
|
||||
|
||||
constructor(callback) {
|
||||
this.callback = callback;
|
||||
this.buffer = [];
|
||||
this.tokenIds = [];
|
||||
this.buffExpectingContBytes = 0;
|
||||
this.textDecoder = new TextDecoder();
|
||||
}
|
||||
|
||||
decode(tokenId, bytes) {
|
||||
const decoded = [];
|
||||
this.tokenIds.push(tokenId);
|
||||
|
||||
for (let i = 0; i < bytes.length; i++) {
|
||||
const byte = bytes[i];
|
||||
const bits = byte.toString(2).padStart(8, '0');
|
||||
const highOnes = bits.split('0')[0];
|
||||
|
||||
if (highOnes.length === 1) {
|
||||
// Continuation byte
|
||||
this.buffer.push(byte);
|
||||
this.buffExpectingContBytes -= 1;
|
||||
} else {
|
||||
// Beginning of a byte sequence
|
||||
if (this.buffer.length > 0) {
|
||||
decoded.push(this._decodeBuffer());
|
||||
this.buffer = [];
|
||||
}
|
||||
|
||||
this.buffer.push(byte);
|
||||
this.buffExpectingContBytes = Math.max(0, highOnes.length - 1);
|
||||
}
|
||||
|
||||
if (this.buffExpectingContBytes <= 0) {
|
||||
// Received the whole sequence or an out-of-place continuation byte
|
||||
decoded.push(this._decodeBuffer());
|
||||
this.buffer = [];
|
||||
this.buffExpectingContBytes = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (decoded.length === 0 && this.buffExpectingContBytes > 0) {
|
||||
// Wait for more continuation bytes
|
||||
return true;
|
||||
}
|
||||
const tokenIds = this.tokenIds;
|
||||
this.tokenIds = [];
|
||||
return this.callback(tokenIds, decoded.join(''));
|
||||
}
|
||||
|
||||
_decodeBuffer() {
|
||||
return this.textDecoder.decode(new Uint8Array(this.buffer));
|
||||
}
|
||||
}
|
||||
|
||||
class EmbeddingModel {
|
||||
llm;
|
||||
config;
|
||||
@ -160,6 +224,7 @@ class EmbeddingModel {
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
TokenDecoder,
|
||||
InferenceModel,
|
||||
EmbeddingModel,
|
||||
};
|
||||
|
73
gpt4all-bindings/typescript/test/bindings.test.js
Normal file
73
gpt4all-bindings/typescript/test/bindings.test.js
Normal file
@ -0,0 +1,73 @@
|
||||
const { loadModel } = require("../src/gpt4all.js");
|
||||
|
||||
// these tests require an internet connection / a real model
|
||||
const testModel = "Phi-3-mini-4k-instruct.Q4_0.gguf";
|
||||
|
||||
describe("llmodel", () => {
|
||||
let model;
|
||||
|
||||
test("load on cpu", async () => {
|
||||
model = await loadModel(testModel, {
|
||||
device: "cpu",
|
||||
});
|
||||
});
|
||||
|
||||
test("getter working", async () => {
|
||||
const stateSize = model.llm.getStateSize();
|
||||
expect(stateSize).toBeGreaterThan(0);
|
||||
const name = model.llm.getName();
|
||||
expect(name).toBe(testModel);
|
||||
const type = model.llm.getType();
|
||||
expect(type).toBeUndefined();
|
||||
const devices = model.llm.getGpuDevices();
|
||||
expect(Array.isArray(devices)).toBe(true);
|
||||
const gpuEnabled = model.llm.hasGpuDevice();
|
||||
expect(gpuEnabled).toBe(false);
|
||||
const requiredMem = model.llm.getRequiredMemory();
|
||||
expect(typeof requiredMem).toBe('number');
|
||||
const threadCount = model.llm.getThreadCount();
|
||||
expect(threadCount).toBe(4);
|
||||
});
|
||||
|
||||
test("setting thread count", () => {
|
||||
model.llm.setThreadCount(5);
|
||||
expect(model.llm.getThreadCount()).toBe(5);
|
||||
});
|
||||
|
||||
test("cpu inference", async () => {
|
||||
const res = await model.llm.infer("what is the capital of france?", {
|
||||
temp: 0,
|
||||
promptTemplate: model.config.promptTemplate,
|
||||
nPredict: 10,
|
||||
onResponseToken: () => {
|
||||
return true;
|
||||
},
|
||||
});
|
||||
expect(res.text).toMatch(/paris/i);
|
||||
}, 10000);
|
||||
|
||||
test("dispose and load model on gpu", async () => {
|
||||
model.dispose();
|
||||
model = await loadModel(testModel, {
|
||||
device: "gpu",
|
||||
});
|
||||
const gpuEnabled = model.llm.hasGpuDevice();
|
||||
expect(gpuEnabled).toBe(true);
|
||||
});
|
||||
|
||||
test("gpu inference", async () => {
|
||||
const res = await model.llm.infer("what is the capital of france?", {
|
||||
temp: 0,
|
||||
promptTemplate: model.config.promptTemplate,
|
||||
nPredict: 10,
|
||||
onResponseToken: () => {
|
||||
return true;
|
||||
},
|
||||
});
|
||||
expect(res.text).toMatch(/paris/i);
|
||||
}, 10000);
|
||||
|
||||
afterAll(() => {
|
||||
model.dispose();
|
||||
});
|
||||
});
|
@ -2,7 +2,6 @@ const path = require("node:path");
|
||||
const os = require("node:os");
|
||||
const fsp = require("node:fs/promises");
|
||||
const { existsSync } = require('node:fs');
|
||||
const { LLModel } = require("node-gyp-build")(path.resolve(__dirname, ".."));
|
||||
const {
|
||||
listModels,
|
||||
downloadModel,
|
||||
@ -13,11 +12,8 @@ const {
|
||||
DEFAULT_LIBRARIES_DIRECTORY,
|
||||
DEFAULT_MODEL_LIST_URL,
|
||||
} = require("../src/config.js");
|
||||
const {
|
||||
loadModel,
|
||||
createPrompt,
|
||||
createCompletion,
|
||||
} = require("../src/gpt4all.js");
|
||||
|
||||
// these tests do not require an internet connection or an actual model
|
||||
|
||||
describe("config", () => {
|
||||
test("default paths constants are available and correct", () => {
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user