typescript bindings maintenance (#2363)

* remove outdated comments

Signed-off-by: limez <limez@protonmail.com>

* simpler build from source

Signed-off-by: limez <limez@protonmail.com>

* update unix build script to create .so runtimes correctly

Signed-off-by: limez <limez@protonmail.com>

* configure ci build type, use RelWithDebInfo for dev build script

Signed-off-by: limez <limez@protonmail.com>

* add clean script

Signed-off-by: limez <limez@protonmail.com>

* fix streamed token decoding / emoji

Signed-off-by: limez <limez@protonmail.com>

* remove deprecated nCtx

Signed-off-by: limez <limez@protonmail.com>

* update typings

Signed-off-by: jacob <jacoobes@sern.dev>

update typings

Signed-off-by: jacob <jacoobes@sern.dev>

* readme,mspell

Signed-off-by: jacob <jacoobes@sern.dev>

* cuda/backend logic changes + name napi methods like their js counterparts

Signed-off-by: limez <limez@protonmail.com>

* convert llmodel example into a test, separate test suite that can run in ci

Signed-off-by: limez <limez@protonmail.com>

* update examples / naming

Signed-off-by: limez <limez@protonmail.com>

* update deps, remove the need for binding.ci.gyp, make node-gyp-build fallback easier testable

Signed-off-by: limez <limez@protonmail.com>

* make sure the assert-backend-sources.js script is published, but not the others

Signed-off-by: limez <limez@protonmail.com>

* build correctly on windows (regression on node-gyp-build)

Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>

* codespell

Signed-off-by: limez <limez@protonmail.com>

* make sure dlhandle.cpp gets linked correctly

Signed-off-by: limez <limez@protonmail.com>

* add include for check_cxx_compiler_flag call during aarch64 builds

Signed-off-by: limez <limez@protonmail.com>

* x86 > arm64 cross compilation of runtimes and bindings

Signed-off-by: limez <limez@protonmail.com>

* default to cpu instead of kompute on arm64

Signed-off-by: limez <limez@protonmail.com>

* formatting, more minimal example

Signed-off-by: limez <limez@protonmail.com>

---------

Signed-off-by: limez <limez@protonmail.com>
Signed-off-by: jacob <jacoobes@sern.dev>
Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>
Co-authored-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>
Co-authored-by: jacob <jacoobes@sern.dev>
This commit is contained in:
Andreas Obersteiner 2024-06-03 18:12:55 +02:00 committed by GitHub
parent f001897a1a
commit a602f7fde7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
30 changed files with 1112 additions and 873 deletions

View File

@ -570,7 +570,7 @@ jobs:
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update sudo apt-get update
sudo apt-get install -y cmake build-essential vulkan-sdk cuda-compiler-12-4 libcublas-dev-12-4 libnvidia-compute-550-server libmysqlclient21 libodbc2 libpq5 sudo apt-get install -y cmake build-essential g++-12-aarch64-linux-gnu gcc-12-aarch64-linux-gnu vulkan-sdk cuda-compiler-12-4 libcublas-dev-12-4 libnvidia-compute-550-server libmysqlclient21 libodbc2 libpq5
- run: - run:
name: Build Libraries name: Build Libraries
command: | command: |
@ -578,14 +578,19 @@ jobs:
cd gpt4all-backend cd gpt4all-backend
mkdir -p runtimes/build mkdir -p runtimes/build
cd runtimes/build cd runtimes/build
cmake ../.. cmake ../.. -DCMAKE_BUILD_TYPE=Release
cmake --build . --parallel --config Release cmake --build . --parallel
mkdir ../linux-x64 mkdir ../linux-x64
cp -L *.so ../linux-x64 # otherwise persist_to_workspace seems to mess symlinks cp -L *.so ../linux-x64 # otherwise persist_to_workspace seems to mess symlinks
cmake ../.. -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE="./toolchains/linux-arm64-toolchain.cmake"
cmake --build . --parallel
mkdir ../linux-arm64
cp -L *.so ../linux-arm64
- persist_to_workspace: - persist_to_workspace:
root: gpt4all-backend root: gpt4all-backend
paths: paths:
- runtimes/linux-x64/*.so - runtimes/linux-x64/*.so
- runtimes/linux-arm64/*.so
build-bindings-backend-macos: build-bindings-backend-macos:
macos: macos:
@ -896,6 +901,11 @@ jobs:
- checkout - checkout
- attach_workspace: - attach_workspace:
at: /tmp/gpt4all-backend at: /tmp/gpt4all-backend
- run:
name: Install dependencies
command: |
sudo apt-get update
sudo apt-get install -y g++-12-aarch64-linux-gnu gcc-12-aarch64-linux-gnu
- node/install: - node/install:
install-yarn: true install-yarn: true
node-version: "18.16" node-version: "18.16"
@ -908,18 +918,24 @@ jobs:
- run: - run:
command: | command: |
cd gpt4all-bindings/typescript cd gpt4all-bindings/typescript
yarn prebuildify -t 18.16.0 --napi yarn build:prebuilds
- run: - run:
command: | command: |
mkdir -p gpt4all-backend/prebuilds/linux-x64 mkdir -p gpt4all-backend/prebuilds/linux-x64
mkdir -p gpt4all-backend/runtimes/linux-x64 mkdir -p gpt4all-backend/runtimes/linux-x64
cp /tmp/gpt4all-backend/runtimes/linux-x64/*-*.so gpt4all-backend/runtimes/linux-x64 cp /tmp/gpt4all-backend/runtimes/linux-x64/*-*.so gpt4all-backend/runtimes/linux-x64
cp gpt4all-bindings/typescript/prebuilds/linux-x64/*.node gpt4all-backend/prebuilds/linux-x64 cp gpt4all-bindings/typescript/prebuilds/linux-x64/*.node gpt4all-backend/prebuilds/linux-x64
mkdir -p gpt4all-backend/prebuilds/linux-arm64
mkdir -p gpt4all-backend/runtimes/linux-arm64
cp /tmp/gpt4all-backend/runtimes/linux-arm64/*-*.so gpt4all-backend/runtimes/linux-arm64
cp gpt4all-bindings/typescript/prebuilds/linux-arm64/*.node gpt4all-backend/prebuilds/linux-arm64
- persist_to_workspace: - persist_to_workspace:
root: gpt4all-backend root: gpt4all-backend
paths: paths:
- prebuilds/linux-x64/*.node - prebuilds/linux-x64/*.node
- runtimes/linux-x64/*-*.so - runtimes/linux-x64/*-*.so
- prebuilds/linux-arm64/*.node
- runtimes/linux-arm64/*-*.so
build-nodejs-macos: build-nodejs-macos:
macos: macos:
xcode: "14.0.0" xcode: "14.0.0"
@ -1030,12 +1046,10 @@ jobs:
cp /tmp/gpt4all-backend/prebuilds/darwin-x64/*.node prebuilds/darwin-x64/ cp /tmp/gpt4all-backend/prebuilds/darwin-x64/*.node prebuilds/darwin-x64/
# Fallback build if user is not on above prebuilds # copy the backend source we depend on to make fallback builds work
mv -f binding.ci.gyp binding.gyp mkdir backend
mkdir gpt4all-backend
cd ../../gpt4all-backend cd ../../gpt4all-backend
mv llmodel.h llmodel.cpp llmodel_c.cpp llmodel_c.h sysinfo.h dlhandle.h ../gpt4all-bindings/typescript/gpt4all-backend/ mv llmodel.h llmodel.cpp llmodel_c.cpp llmodel_c.h sysinfo.h dlhandle.h ../gpt4all-bindings/typescript/backend/
# Test install # Test install
- node/install-packages: - node/install-packages:
@ -1045,7 +1059,7 @@ jobs:
- run: - run:
command: | command: |
cd gpt4all-bindings/typescript cd gpt4all-bindings/typescript
yarn run test yarn run test:ci
- run: - run:
command: | command: |
cd gpt4all-bindings/typescript cd gpt4all-bindings/typescript

View File

@ -79,6 +79,7 @@ if (LLMODEL_ROCM)
endif() endif()
set(CMAKE_VERBOSE_MAKEFILE ON) set(CMAKE_VERBOSE_MAKEFILE ON)
include(CheckCXXCompilerFlag)
# Go through each build variant # Go through each build variant
foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS) foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)

View File

@ -0,0 +1,11 @@
# Toolchain to crosscompile runtimes for arm64 on jammy x86_64
# You may have to `sudo apt-get install g++-12-aarch64-linux-gnu gcc-12-aarch64-linux-gnu`
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR aarch64)
set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc-12)
set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++-12)
# Supported backends
set(LLMODEL_CUDA off)
set(LLMODEL_KOMPUTE off)

View File

@ -8,4 +8,5 @@ prebuilds/
!.yarn/sdks !.yarn/sdks
!.yarn/versions !.yarn/versions
runtimes/ runtimes/
backend/
compile_flags.txt compile_flags.txt

View File

@ -1,4 +1,5 @@
test/ test/
spec/ spec/
scripts/ scripts/*
!scripts/assert-backend-sources.js
build build

View File

@ -188,6 +188,8 @@ model.dispose();
* python 3 * python 3
* On Windows and Linux, building GPT4All requires the complete Vulkan SDK. You may download it from here: https://vulkan.lunarg.com/sdk/home * On Windows and Linux, building GPT4All requires the complete Vulkan SDK. You may download it from here: https://vulkan.lunarg.com/sdk/home
* macOS users do not need Vulkan, as GPT4All will use Metal instead. * macOS users do not need Vulkan, as GPT4All will use Metal instead.
* CUDA Toolkit >= 11.4 (you can bypass this with adding a custom flag to build step)
- Windows: There is difficulty compiling with cuda if the Visual Studio IDE is NOT present.
### Build (from source) ### Build (from source)
@ -196,23 +198,29 @@ git clone https://github.com/nomic-ai/gpt4all.git
cd gpt4all-bindings/typescript cd gpt4all-bindings/typescript
``` ```
* The below shell commands assume the current working directory is `typescript`. llama.cpp git submodule for gpt4all can be possibly absent or outdated. Make sure to run
* To Build and Rebuild:
```sh
node scripts/prebuild.js
```
* llama.cpp git submodule for gpt4all can be possibly absent. If this is the case, make sure to run in llama.cpp parent directory
```sh ```sh
git submodule update --init --recursive git submodule update --init --recursive
``` ```
The below shell commands assume the current working directory is `typescript`.
Using yarn
```sh ```sh
yarn build:backend yarn install
yarn build
``` ```
This will build platform-dependent dynamic libraries, and will be located in runtimes/(platform)/native
Using npm
```sh
npm install
npm run build
```
The `build:runtimes` script will create runtime libraries for your platform in `runtimes` and `build:prebuilds` will create the bindings in `prebuilds`. `build` is a shortcut for both.
### Test ### Test
@ -259,7 +267,7 @@ yarn test
This package has been stabilizing over time development, and breaking changes may happen until the api stabilizes. Here's what's the todo list: This package has been stabilizing over time development, and breaking changes may happen until the api stabilizes. Here's what's the todo list:
* \[ ] Purely offline. Per the gui, which can be run completely offline, the bindings should be as well. * \[x] [Purely offline](#Offline-usage). Per the gui, which can be run completely offline, the bindings should be as well.
* \[ ] NPM bundle size reduction via optionalDependencies strategy (need help) * \[ ] NPM bundle size reduction via optionalDependencies strategy (need help)
* Should include prebuilds to avoid painful node-gyp errors * Should include prebuilds to avoid painful node-gyp errors
* \[x] createChatSession ( the python equivalent to create\_chat\_session ) * \[x] createChatSession ( the python equivalent to create\_chat\_session )
@ -276,7 +284,7 @@ This package has been stabilizing over time development, and breaking changes ma
This repository serves as the new bindings for nodejs users. This repository serves as the new bindings for nodejs users.
- If you were a user of [these bindings](https://github.com/nomic-ai/gpt4all-ts), they are outdated. - If you were a user of [these bindings](https://github.com/nomic-ai/gpt4all-ts), they are outdated.
- Version 4 includes the follow breaking changes - Version 4 includes the follow breaking changes
* `createEmbedding` & `EmbeddingModel.embed()` returns an object, `EmbeddingResult`, instead of a float32array. * `createEmbedding` & `EmbeddingModel.embed()` returns an object, `EmbeddingResult`, instead of a Float32Array.
* Removed deprecated types `ModelType` and `ModelFile` * Removed deprecated types `ModelType` and `ModelFile`
* Removed deprecated initiation of model by string path only * Removed deprecated initiation of model by string path only

View File

@ -1,62 +0,0 @@
{
"targets": [
{
"target_name": "gpt4all", # gpt4all-ts will cause compile error
"include_dirs": [
"<!@(node -p \"require('node-addon-api').include\")",
"gpt4all-backend",
],
"sources": [
# PREVIOUS VERSION: had to required the sources, but with newest changes do not need to
#"../../gpt4all-backend/llama.cpp/examples/common.cpp",
#"../../gpt4all-backend/llama.cpp/ggml.c",
#"../../gpt4all-backend/llama.cpp/llama.cpp",
# "../../gpt4all-backend/utils.cpp",
"gpt4all-backend/llmodel_c.cpp",
"gpt4all-backend/llmodel.cpp",
"prompt.cc",
"index.cc",
],
"conditions": [
['OS=="mac"', {
'xcode_settings': {
'GCC_ENABLE_CPP_EXCEPTIONS': 'YES'
},
'defines': [
'LIB_FILE_EXT=".dylib"',
'NAPI_CPP_EXCEPTIONS',
],
'cflags_cc': [
"-fexceptions"
]
}],
['OS=="win"', {
'defines': [
'LIB_FILE_EXT=".dll"',
'NAPI_CPP_EXCEPTIONS',
],
"msvs_settings": {
"VCCLCompilerTool": {
"AdditionalOptions": [
"/std:c++20",
"/EHsc",
],
},
},
}],
['OS=="linux"', {
'defines': [
'LIB_FILE_EXT=".so"',
'NAPI_CPP_EXCEPTIONS',
],
'cflags_cc!': [
'-fno-rtti',
],
'cflags_cc': [
'-std=c++2a',
'-fexceptions'
]
}]
]
}]
}

View File

@ -1,19 +1,15 @@
{ {
"targets": [ "targets": [
{ {
"target_name": "gpt4all", # gpt4all-ts will cause compile error "target_name": "gpt4all",
"include_dirs": [ "include_dirs": [
"<!@(node -p \"require('node-addon-api').include\")", "<!@(node -p \"require('node-addon-api').include\")",
"../../gpt4all-backend", "backend",
], ],
"sources": [ "sources": [
# PREVIOUS VERSION: had to required the sources, but with newest changes do not need to "backend/llmodel_c.cpp",
#"../../gpt4all-backend/llama.cpp/examples/common.cpp", "backend/llmodel.cpp",
#"../../gpt4all-backend/llama.cpp/ggml.c", "backend/dlhandle.cpp",
#"../../gpt4all-backend/llama.cpp/llama.cpp",
# "../../gpt4all-backend/utils.cpp",
"../../gpt4all-backend/llmodel_c.cpp",
"../../gpt4all-backend/llmodel.cpp",
"prompt.cc", "prompt.cc",
"index.cc", "index.cc",
], ],

View File

@ -3,23 +3,24 @@
Napi::Function NodeModelWrapper::GetClass(Napi::Env env) Napi::Function NodeModelWrapper::GetClass(Napi::Env env)
{ {
Napi::Function self = DefineClass(env, "LLModel", Napi::Function self = DefineClass(
{InstanceMethod("type", &NodeModelWrapper::GetType), env, "LLModel",
InstanceMethod("isModelLoaded", &NodeModelWrapper::IsModelLoaded), {InstanceMethod("load", &NodeModelWrapper::Load),
InstanceMethod("name", &NodeModelWrapper::GetName), InstanceMethod("initGpu", &NodeModelWrapper::InitGpu),
InstanceMethod("stateSize", &NodeModelWrapper::StateSize), InstanceMethod("infer", &NodeModelWrapper::Infer),
InstanceMethod("infer", &NodeModelWrapper::Infer), InstanceMethod("embed", &NodeModelWrapper::Embed),
InstanceMethod("setThreadCount", &NodeModelWrapper::SetThreadCount), InstanceMethod("isModelLoaded", &NodeModelWrapper::IsModelLoaded),
InstanceMethod("embed", &NodeModelWrapper::GenerateEmbedding), InstanceMethod("getType", &NodeModelWrapper::GetType),
InstanceMethod("threadCount", &NodeModelWrapper::ThreadCount), InstanceMethod("getName", &NodeModelWrapper::GetName),
InstanceMethod("getLibraryPath", &NodeModelWrapper::GetLibraryPath), InstanceMethod("getStateSize", &NodeModelWrapper::GetStateSize),
InstanceMethod("initGpuByString", &NodeModelWrapper::InitGpuByString), InstanceMethod("setThreadCount", &NodeModelWrapper::SetThreadCount),
InstanceMethod("hasGpuDevice", &NodeModelWrapper::HasGpuDevice), InstanceMethod("getThreadCount", &NodeModelWrapper::GetThreadCount),
InstanceMethod("listGpu", &NodeModelWrapper::GetGpuDevices), InstanceMethod("getLibraryPath", &NodeModelWrapper::GetLibraryPath),
InstanceMethod("memoryNeeded", &NodeModelWrapper::GetRequiredMemory), InstanceMethod("hasGpuDevice", &NodeModelWrapper::HasGpuDevice),
InstanceMethod("dispose", &NodeModelWrapper::Dispose)}); InstanceMethod("getGpuDevices", &NodeModelWrapper::GetGpuDevices),
InstanceMethod("getRequiredMemory", &NodeModelWrapper::GetRequiredMemory),
InstanceMethod("dispose", &NodeModelWrapper::Dispose)});
// Keep a static reference to the constructor // Keep a static reference to the constructor
//
Napi::FunctionReference *constructor = new Napi::FunctionReference(); Napi::FunctionReference *constructor = new Napi::FunctionReference();
*constructor = Napi::Persistent(self); *constructor = Napi::Persistent(self);
env.SetInstanceData(constructor); env.SetInstanceData(constructor);
@ -29,13 +30,13 @@ Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo &info)
{ {
auto env = info.Env(); auto env = info.Env();
return Napi::Number::New( return Napi::Number::New(
env, static_cast<uint32_t>(llmodel_required_mem(GetInference(), full_model_path.c_str(), nCtx, nGpuLayers))); env, static_cast<uint32_t>(llmodel_required_mem(GetInference(), model_file.c_str(), n_ctx, n_gpu_layers)));
} }
Napi::Value NodeModelWrapper::GetGpuDevices(const Napi::CallbackInfo &info) Napi::Value NodeModelWrapper::GetGpuDevices(const Napi::CallbackInfo &info)
{ {
auto env = info.Env(); auto env = info.Env();
int num_devices = 0; int num_devices = 0;
auto mem_size = llmodel_required_mem(GetInference(), full_model_path.c_str(), nCtx, nGpuLayers); auto mem_size = llmodel_required_mem(GetInference(), model_file.c_str(), n_ctx, n_gpu_layers);
llmodel_gpu_device *all_devices = llmodel_available_gpu_devices(mem_size, &num_devices); llmodel_gpu_device *all_devices = llmodel_available_gpu_devices(mem_size, &num_devices);
if (all_devices == nullptr) if (all_devices == nullptr)
{ {
@ -63,6 +64,7 @@ Napi::Value NodeModelWrapper::GetGpuDevices(const Napi::CallbackInfo &info)
js_gpu_device["heapSize"] = static_cast<uint32_t>(gpu_device.heapSize); js_gpu_device["heapSize"] = static_cast<uint32_t>(gpu_device.heapSize);
js_gpu_device["name"] = gpu_device.name; js_gpu_device["name"] = gpu_device.name;
js_gpu_device["vendor"] = gpu_device.vendor; js_gpu_device["vendor"] = gpu_device.vendor;
js_gpu_device["backend"] = gpu_device.backend;
js_array[i] = js_gpu_device; js_array[i] = js_gpu_device;
} }
@ -71,35 +73,13 @@ Napi::Value NodeModelWrapper::GetGpuDevices(const Napi::CallbackInfo &info)
Napi::Value NodeModelWrapper::GetType(const Napi::CallbackInfo &info) Napi::Value NodeModelWrapper::GetType(const Napi::CallbackInfo &info)
{ {
if (type.empty()) if (model_type.empty())
{ {
return info.Env().Undefined(); return info.Env().Undefined();
} }
return Napi::String::New(info.Env(), type); return Napi::String::New(info.Env(), model_type);
} }
Napi::Value NodeModelWrapper::InitGpuByString(const Napi::CallbackInfo &info)
{
auto env = info.Env();
size_t memory_required = static_cast<size_t>(info[0].As<Napi::Number>().Uint32Value());
std::string gpu_device_identifier = info[1].As<Napi::String>();
size_t converted_value;
if (memory_required <= std::numeric_limits<size_t>::max())
{
converted_value = static_cast<size_t>(memory_required);
}
else
{
Napi::Error::New(env, "invalid number for memory size. Exceeded bounds for memory.")
.ThrowAsJavaScriptException();
return env.Undefined();
}
auto result = llmodel_gpu_init_gpu_device_by_string(GetInference(), converted_value, gpu_device_identifier.c_str());
return Napi::Boolean::New(env, result);
}
Napi::Value NodeModelWrapper::HasGpuDevice(const Napi::CallbackInfo &info) Napi::Value NodeModelWrapper::HasGpuDevice(const Napi::CallbackInfo &info)
{ {
return Napi::Boolean::New(info.Env(), llmodel_has_gpu_device(GetInference())); return Napi::Boolean::New(info.Env(), llmodel_has_gpu_device(GetInference()));
@ -110,82 +90,61 @@ NodeModelWrapper::NodeModelWrapper(const Napi::CallbackInfo &info) : Napi::Objec
auto env = info.Env(); auto env = info.Env();
auto config_object = info[0].As<Napi::Object>(); auto config_object = info[0].As<Napi::Object>();
// sets the directory where models (gguf files) are to be searched // sets the directories where runtime libs are to be searched
llmodel_set_implementation_search_path( llmodel_set_implementation_search_path(config_object.Has("librariesPath")
config_object.Has("library_path") ? config_object.Get("library_path").As<Napi::String>().Utf8Value().c_str() ? config_object.Get("librariesPath").As<Napi::String>().Utf8Value().c_str()
: "."); : ".");
std::string model_name = config_object.Get("model_name").As<Napi::String>(); model_file = config_object.Get("modelFile").As<Napi::String>().Utf8Value();
fs::path model_path = config_object.Get("model_path").As<Napi::String>().Utf8Value(); model_name = model_file.substr(model_file.find_last_of("/\\") + 1);
std::string full_weight_path = (model_path / fs::path(model_name)).string(); backend = config_object.Get("backend").As<Napi::String>().Utf8Value();
n_ctx = config_object.Get("nCtx").As<Napi::Number>().Int32Value();
n_gpu_layers = config_object.Get("nGpuLayers").As<Napi::Number>().Int32Value();
name = model_name.empty() ? model_path.filename().string() : model_name; const char *err;
full_model_path = full_weight_path; inference_ = llmodel_model_create2(model_file.c_str(), backend.c_str(), &err);
nCtx = config_object.Get("nCtx").As<Napi::Number>().Int32Value();
nGpuLayers = config_object.Get("ngl").As<Napi::Number>().Int32Value();
const char *e;
inference_ = llmodel_model_create2(full_weight_path.c_str(), "auto", &e);
if (!inference_) if (!inference_)
{ {
Napi::Error::New(env, e).ThrowAsJavaScriptException(); Napi::Error::New(env, err).ThrowAsJavaScriptException();
return; return;
} }
if (GetInference() == nullptr) if (GetInference() == nullptr)
{ {
std::cerr << "Tried searching libraries in \"" << llmodel_get_implementation_search_path() << "\"" << std::endl; std::cerr << "Tried searching libraries in \"" << llmodel_get_implementation_search_path() << "\"" << std::endl;
std::cerr << "Tried searching for model weight in \"" << full_weight_path << "\"" << std::endl; std::cerr << "Tried using model weights in \"" << model_file << "\"" << std::endl;
std::cerr << "Do you have runtime libraries installed?" << std::endl; std::cerr << "Do you have runtime libraries installed?" << std::endl;
Napi::Error::New(env, "Had an issue creating llmodel object, inference is null").ThrowAsJavaScriptException(); Napi::Error::New(env, "Had an issue creating llmodel object, inference is null").ThrowAsJavaScriptException();
return; return;
} }
std::string device = config_object.Get("device").As<Napi::String>();
if (device != "cpu")
{
size_t mem = llmodel_required_mem(GetInference(), full_weight_path.c_str(), nCtx, nGpuLayers);
auto success = llmodel_gpu_init_gpu_device_by_string(GetInference(), mem, device.c_str());
if (!success)
{
// https://github.com/nomic-ai/gpt4all/blob/3acbef14b7c2436fe033cae9036e695d77461a16/gpt4all-bindings/python/gpt4all/pyllmodel.py#L215
// Haven't implemented this but it is still open to contribution
std::cout << "WARNING: Failed to init GPU\n";
}
}
auto success = llmodel_loadModel(GetInference(), full_weight_path.c_str(), nCtx, nGpuLayers);
if (!success)
{
Napi::Error::New(env, "Failed to load model at given path").ThrowAsJavaScriptException();
return;
}
// optional // optional
if (config_object.Has("model_type")) if (config_object.Has("modelType"))
{ {
type = config_object.Get("model_type").As<Napi::String>(); model_type = config_object.Get("modelType").As<Napi::String>();
} }
}; };
// NodeModelWrapper::~NodeModelWrapper() { Napi::Value NodeModelWrapper::Load(const Napi::CallbackInfo &info)
// if(GetInference() != nullptr) { {
// std::cout << "Debug: deleting model\n"; auto env = info.Env();
// llmodel_model_destroy(inference_); auto success = llmodel_loadModel(GetInference(), model_file.c_str(), n_ctx, n_gpu_layers);
// std::cout << (inference_ == nullptr); return Napi::Boolean::New(env, success);
// } }
// }
// void NodeModelWrapper::Finalize(Napi::Env env) { Napi::Value NodeModelWrapper::InitGpu(const Napi::CallbackInfo &info)
// if(inference_ != nullptr) { {
// std::cout << "Debug: deleting model\n"; auto env = info.Env();
// auto device = info[0].As<Napi::String>().Utf8Value();
// } size_t mem_required = llmodel_required_mem(GetInference(), model_file.c_str(), n_ctx, n_gpu_layers);
// } auto success = llmodel_gpu_init_gpu_device_by_string(GetInference(), mem_required, device.c_str());
return Napi::Boolean::New(env, success);
}
Napi::Value NodeModelWrapper::IsModelLoaded(const Napi::CallbackInfo &info) Napi::Value NodeModelWrapper::IsModelLoaded(const Napi::CallbackInfo &info)
{ {
return Napi::Boolean::New(info.Env(), llmodel_isModelLoaded(GetInference())); return Napi::Boolean::New(info.Env(), llmodel_isModelLoaded(GetInference()));
} }
Napi::Value NodeModelWrapper::StateSize(const Napi::CallbackInfo &info) Napi::Value NodeModelWrapper::GetStateSize(const Napi::CallbackInfo &info)
{ {
// Implement the binding for the stateSize method // Implement the binding for the stateSize method
return Napi::Number::New(info.Env(), static_cast<int64_t>(llmodel_get_state_size(GetInference()))); return Napi::Number::New(info.Env(), static_cast<int64_t>(llmodel_get_state_size(GetInference())));
@ -220,7 +179,7 @@ Napi::Array ChunkedFloatPtr(float *embedding_ptr, int embedding_size, int text_l
return result; return result;
} }
Napi::Value NodeModelWrapper::GenerateEmbedding(const Napi::CallbackInfo &info) Napi::Value NodeModelWrapper::Embed(const Napi::CallbackInfo &info)
{ {
auto env = info.Env(); auto env = info.Env();
@ -256,7 +215,7 @@ Napi::Value NodeModelWrapper::GenerateEmbedding(const Napi::CallbackInfo &info)
str_ptrs.push_back(text_arr[i].c_str()); str_ptrs.push_back(text_arr[i].c_str());
str_ptrs.push_back(nullptr); str_ptrs.push_back(nullptr);
const char *_err = nullptr; const char *_err = nullptr;
float *embeds = llmodel_embed(GetInference(), str_ptrs.data(), &embedding_size, float *embeds = llmodel_embed(GetInference(), str_ptrs.data(), &embedding_size,
prefix.IsUndefined() ? nullptr : prefix.As<Napi::String>().Utf8Value().c_str(), prefix.IsUndefined() ? nullptr : prefix.As<Napi::String>().Utf8Value().c_str(),
dimensionality, &token_count, do_mean, atlas, nullptr, &_err); dimensionality, &token_count, do_mean, atlas, nullptr, &_err);
if (!embeds) if (!embeds)
@ -271,9 +230,12 @@ Napi::Value NodeModelWrapper::GenerateEmbedding(const Napi::CallbackInfo &info)
llmodel_free_embedding(embeds); llmodel_free_embedding(embeds);
auto res = Napi::Object::New(env); auto res = Napi::Object::New(env);
res.Set("n_prompt_tokens", token_count); res.Set("n_prompt_tokens", token_count);
if(is_single_text) { if (is_single_text)
{
res.Set("embeddings", embedmat.Get(static_cast<uint32_t>(0))); res.Set("embeddings", embedmat.Get(static_cast<uint32_t>(0)));
} else { }
else
{
res.Set("embeddings", embedmat); res.Set("embeddings", embedmat);
} }
@ -308,7 +270,7 @@ Napi::Value NodeModelWrapper::Infer(const Napi::CallbackInfo &info)
llmodel_prompt_context promptContext = {.logits = nullptr, llmodel_prompt_context promptContext = {.logits = nullptr,
.tokens = nullptr, .tokens = nullptr,
.n_past = 0, .n_past = 0,
.n_ctx = nCtx, .n_ctx = n_ctx,
.n_predict = 4096, .n_predict = 4096,
.top_k = 40, .top_k = 40,
.top_p = 0.9f, .top_p = 0.9f,
@ -323,6 +285,12 @@ Napi::Value NodeModelWrapper::Infer(const Napi::CallbackInfo &info)
auto inputObject = info[1].As<Napi::Object>(); auto inputObject = info[1].As<Napi::Object>();
if (!inputObject.Has("promptTemplate"))
{
Napi::Error::New(info.Env(), "Missing Prompt Template").ThrowAsJavaScriptException();
return info.Env().Undefined();
}
if (inputObject.Has("logits") || inputObject.Has("tokens")) if (inputObject.Has("logits") || inputObject.Has("tokens"))
{ {
Napi::Error::New(info.Env(), "Invalid input: 'logits' or 'tokens' properties are not allowed") Napi::Error::New(info.Env(), "Invalid input: 'logits' or 'tokens' properties are not allowed")
@ -425,9 +393,9 @@ void NodeModelWrapper::SetThreadCount(const Napi::CallbackInfo &info)
Napi::Value NodeModelWrapper::GetName(const Napi::CallbackInfo &info) Napi::Value NodeModelWrapper::GetName(const Napi::CallbackInfo &info)
{ {
return Napi::String::New(info.Env(), name); return Napi::String::New(info.Env(), model_name);
} }
Napi::Value NodeModelWrapper::ThreadCount(const Napi::CallbackInfo &info) Napi::Value NodeModelWrapper::GetThreadCount(const Napi::CallbackInfo &info)
{ {
return Napi::Number::New(info.Env(), llmodel_threadCount(GetInference())); return Napi::Number::New(info.Env(), llmodel_threadCount(GetInference()));
} }

View File

@ -16,30 +16,28 @@ class NodeModelWrapper : public Napi::ObjectWrap<NodeModelWrapper>
public: public:
NodeModelWrapper(const Napi::CallbackInfo &); NodeModelWrapper(const Napi::CallbackInfo &);
// virtual ~NodeModelWrapper(); Napi::Value Load(const Napi::CallbackInfo &info);
Napi::Value GetType(const Napi::CallbackInfo &info); Napi::Value InitGpu(const Napi::CallbackInfo &info);
Napi::Value IsModelLoaded(const Napi::CallbackInfo &info);
Napi::Value StateSize(const Napi::CallbackInfo &info);
// void Finalize(Napi::Env env) override;
/** /**
* Prompting the model. This entails spawning a new thread and adding the response tokens * Prompting the model. This entails spawning a new thread and adding the response tokens
* into a thread local string variable. * into a thread local string variable.
*/ */
Napi::Value Infer(const Napi::CallbackInfo &info); Napi::Value Infer(const Napi::CallbackInfo &info);
void SetThreadCount(const Napi::CallbackInfo &info); Napi::Value Embed(const Napi::CallbackInfo &info);
void Dispose(const Napi::CallbackInfo &info); Napi::Value IsModelLoaded(const Napi::CallbackInfo &info);
Napi::Value GetType(const Napi::CallbackInfo &info);
Napi::Value GetName(const Napi::CallbackInfo &info); Napi::Value GetName(const Napi::CallbackInfo &info);
Napi::Value ThreadCount(const Napi::CallbackInfo &info); Napi::Value GetStateSize(const Napi::CallbackInfo &info);
Napi::Value GenerateEmbedding(const Napi::CallbackInfo &info); void SetThreadCount(const Napi::CallbackInfo &info);
Napi::Value HasGpuDevice(const Napi::CallbackInfo &info); Napi::Value GetThreadCount(const Napi::CallbackInfo &info);
Napi::Value ListGpus(const Napi::CallbackInfo &info);
Napi::Value InitGpuByString(const Napi::CallbackInfo &info);
Napi::Value GetRequiredMemory(const Napi::CallbackInfo &info);
Napi::Value GetGpuDevices(const Napi::CallbackInfo &info);
/* /*
* The path that is used to search for the dynamic libraries * The path that is used to search for the dynamic libraries
*/ */
Napi::Value GetLibraryPath(const Napi::CallbackInfo &info); Napi::Value GetLibraryPath(const Napi::CallbackInfo &info);
Napi::Value HasGpuDevice(const Napi::CallbackInfo &info);
Napi::Value GetGpuDevices(const Napi::CallbackInfo &info);
Napi::Value GetRequiredMemory(const Napi::CallbackInfo &info);
void Dispose(const Napi::CallbackInfo &info);
/** /**
* Creates the LLModel class * Creates the LLModel class
*/ */
@ -54,10 +52,10 @@ class NodeModelWrapper : public Napi::ObjectWrap<NodeModelWrapper>
std::mutex inference_mutex; std::mutex inference_mutex;
std::string type; std::string model_type;
// corresponds to LLModel::name() in typescript std::string model_name;
std::string name; std::string model_file;
int nCtx{}; std::string backend;
int nGpuLayers{}; int n_ctx{};
std::string full_model_path; int n_gpu_layers{};
}; };

View File

@ -5,32 +5,38 @@
"main": "src/gpt4all.js", "main": "src/gpt4all.js",
"repository": "nomic-ai/gpt4all", "repository": "nomic-ai/gpt4all",
"scripts": { "scripts": {
"install": "node-gyp-build", "install": "node ./scripts/assert-backend-sources.js && node-gyp-build",
"test:ci": "jest test/ci.test.js",
"test": "jest", "test": "jest",
"build:backend": "node scripts/build.js", "clean": "rimraf build runtimes prebuilds backend",
"build": "node-gyp-build", "prebuild": "npm run clean",
"build": "npm run build:runtimes && npm run build:prebuilds",
"build:runtimes": "node scripts/build.js",
"build:prebuilds": "node scripts/assert-backend-sources.js && node scripts/prebuild.js",
"docs:build": "node scripts/docs.js && documentation readme ./src/gpt4all.d.ts --parse-extension js d.ts --format md --section \"API Reference\" --readme-file ../python/docs/gpt4all_nodejs.md" "docs:build": "node scripts/docs.js && documentation readme ./src/gpt4all.d.ts --parse-extension js d.ts --format md --section \"API Reference\" --readme-file ../python/docs/gpt4all_nodejs.md"
}, },
"files": [ "files": [
"binding.gyp",
"src/**/*", "src/**/*",
"runtimes/**/*", "runtimes/**/*",
"binding.gyp",
"prebuilds/**/*", "prebuilds/**/*",
"backend/**/*",
"scripts/assert-backend-sources.js",
"*.h", "*.h",
"*.cc", "*.cc"
"gpt4all-backend/**/*"
], ],
"dependencies": { "dependencies": {
"md5-file": "^5.0.0", "md5-file": "^5.0.0",
"node-addon-api": "^6.1.0", "node-addon-api": "^8.0.0",
"node-gyp-build": "^4.6.0" "node-gyp-build": "~4.8.0"
}, },
"devDependencies": { "devDependencies": {
"@types/node": "^20.1.5", "@types/node": "^20.12.12",
"documentation": "^14.0.2", "documentation": "^14.0.2",
"jest": "^29.5.0", "jest": "^29.7.0",
"prebuildify": "^5.0.1", "prebuildify": "^6.0.1",
"prettier": "^2.8.8" "prettier": "^3.2.5",
"rimraf": "^5.0.7"
}, },
"optionalDependencies": { "optionalDependencies": {
"node-gyp": "9.x.x" "node-gyp": "9.x.x"

View File

@ -131,7 +131,8 @@ bool PromptWorker::ResponseCallback(int32_t token_id, const std::string token)
// Transform native data into JS data, passing it to the provided // Transform native data into JS data, passing it to the provided
// `jsCallback` -- the TSFN's JavaScript function. // `jsCallback` -- the TSFN's JavaScript function.
auto token_id = Napi::Number::New(env, value->tokenId); auto token_id = Napi::Number::New(env, value->tokenId);
auto token = Napi::String::New(env, value->token); auto token = Napi::Uint8Array::New(env, value->token.size());
memcpy(token.Data(), value->token.data(), value->token.size());
auto jsResult = jsCallback.Call({token_id, token}).ToBoolean(); auto jsResult = jsCallback.Call({token_id, token}).ToBoolean();
promise.set_value(jsResult); promise.set_value(jsResult);
} }

View File

@ -0,0 +1,47 @@
const fs = require("fs");
const path = require("path");
// Copies the shared llmodel sources from gpt4all-backend into the backend folder.
// These are dependencies of the bindings and will be required in case node-gyp-build
// cannot find a prebuild. This script is used in the package install hook and will
// be executed BOTH when `yarn install` is run in the root folder AND when the package
// is installed as a dependency in another project.
const backendDeps = [
"llmodel.h",
"llmodel.cpp",
"llmodel_c.cpp",
"llmodel_c.h",
"sysinfo.h",
"dlhandle.h",
"dlhandle.cpp",
];
const sourcePath = path.resolve(__dirname, "../../../gpt4all-backend");
const destPath = path.resolve(__dirname, "../backend");
// Silently ignore if the backend sources are not available.
// When the package is installed as a dependency, gpt4all-backend will not be present.
if (fs.existsSync(sourcePath)) {
if (!fs.existsSync(destPath)) {
fs.mkdirSync(destPath);
}
for (const file of backendDeps) {
const sourceFile = path.join(sourcePath, file);
const destFile = path.join(destPath, file);
if (fs.existsSync(sourceFile)) {
console.info(`Copying ${sourceFile} to ${destFile}`);
fs.copyFileSync(sourceFile, destFile); // overwrite
} else {
throw new Error(`File ${sourceFile} does not exist`);
}
}
}
// assert that the backend sources are present
for (const file of backendDeps) {
const destFile = path.join(destPath, file);
if (!fs.existsSync(destFile)) {
throw new Error(`File ${destFile} does not exist`);
}
}

View File

@ -1,12 +1,42 @@
#!/bin/sh #!/bin/sh
# Build script for Unix-like systems (Linux, macOS).
# Script assumes the current working directory is the bindings project root.
SYSNAME=$(uname -s) SYSNAME=$(uname -s)
PLATFORM=$(uname -m)
# Allows overriding target sysname and platform via args
# If not provided, the current system's sysname and platform will be used
while [ $# -gt 0 ]; do
case "$1" in
--sysname=*)
SYSNAME="${1#*=}"
shift
;;
--platform=*)
PLATFORM="${1#*=}"
shift
;;
*)
echo "Unknown argument: $1" >&2
exit 1
;;
esac
done
if [ "$SYSNAME" = "Linux" ]; then if [ "$SYSNAME" = "Linux" ]; then
BASE_DIR="runtimes/linux-x64" if [ "$PLATFORM" = "x86_64" ]; then
BASE_DIR="runtimes/linux-x64"
elif [ "$PLATFORM" = "aarch64" ]; then
BASE_DIR="runtimes/linux-arm64"
else
echo "Unsupported platform: $PLATFORM" >&2
exit 1
fi
LIB_EXT="so" LIB_EXT="so"
elif [ "$SYSNAME" = "Darwin" ]; then elif [ "$SYSNAME" = "Darwin" ]; then
BASE_DIR="runtimes/osx" BASE_DIR="runtimes/darwin"
LIB_EXT="dylib" LIB_EXT="dylib"
elif [ -n "$SYSNAME" ]; then elif [ -n "$SYSNAME" ]; then
echo "Unsupported system: $SYSNAME" >&2 echo "Unsupported system: $SYSNAME" >&2
@ -22,8 +52,24 @@ BUILD_DIR="$BASE_DIR/build"
rm -rf "$BASE_DIR" rm -rf "$BASE_DIR"
mkdir -p "$NATIVE_DIR" "$BUILD_DIR" mkdir -p "$NATIVE_DIR" "$BUILD_DIR"
cmake -S ../../gpt4all-backend -B "$BUILD_DIR" && if [ "$PLATFORM" = "x86_64" ]; then
cmake --build "$BUILD_DIR" -j --config Release && { echo "Building for x86_64"
cmake -S ../../gpt4all-backend -B "$BUILD_DIR" -DCMAKE_BUILD_TYPE=RelWithDebInfo
fi
if [ "$PLATFORM" = "aarch64" ]; then
if [ "$(uname -m)" != "aarch64" ]; then
echo "Cross-compiling for aarch64"
cmake -S ../../gpt4all-backend \
-B "$BUILD_DIR" \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DCMAKE_TOOLCHAIN_FILE="./toolchains/linux-arm64-toolchain.cmake"
else
cmake -S ../../gpt4all-backend -B "$BUILD_DIR" -DCMAKE_BUILD_TYPE=RelWithDebInfo
fi
fi
cmake --build "$BUILD_DIR" --parallel && {
cp "$BUILD_DIR"/libgptj*.$LIB_EXT "$NATIVE_DIR"/ cp "$BUILD_DIR"/libgptj*.$LIB_EXT "$NATIVE_DIR"/
cp "$BUILD_DIR"/libllama*.$LIB_EXT "$NATIVE_DIR"/ cp "$BUILD_DIR"/libllama*.$LIB_EXT "$NATIVE_DIR"/
} }

View File

@ -1,22 +1,21 @@
const prebuildify = require("prebuildify"); const prebuildify = require("prebuildify");
async function createPrebuilds(combinations) { async function createPrebuilds(configs) {
for (const { platform, arch } of combinations) { for (const config of configs) {
const opts = { const opts = {
platform,
arch,
napi: true, napi: true,
targets: ["18.16.0"] targets: ["18.16.0"],
...config,
}; };
try { try {
await createPrebuild(opts); await createPrebuild(opts);
console.log( console.log(
`Build succeeded for platform ${opts.platform} and architecture ${opts.arch}` `Build succeeded for platform ${opts.platform} and architecture ${opts.arch}`,
); );
} catch (err) { } catch (err) {
console.error( console.error(
`Error building for platform ${opts.platform} and architecture ${opts.arch}:`, `Error building for platform ${opts.platform} and architecture ${opts.arch}:`,
err err,
); );
} }
} }
@ -24,6 +23,17 @@ async function createPrebuilds(combinations) {
function createPrebuild(opts) { function createPrebuild(opts) {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
// if this prebuild is cross-compiling for arm64 on a non-arm64 machine,
// set the CXX and CC environment variables to the cross-compilers
if (
opts.arch === "arm64" &&
process.arch !== "arm64" &&
process.platform === "linux"
) {
process.env.CXX = "aarch64-linux-gnu-g++-12";
process.env.CC = "aarch64-linux-gnu-gcc-12";
}
prebuildify(opts, (err) => { prebuildify(opts, (err) => {
if (err) { if (err) {
reject(err); reject(err);
@ -35,22 +45,18 @@ function createPrebuild(opts) {
} }
let prebuildConfigs; let prebuildConfigs;
if(process.platform === 'win32') { if (process.platform === "win32") {
prebuildConfigs = [ prebuildConfigs = [{ platform: "win32", arch: "x64" }];
{ platform: "win32", arch: "x64" } } else if (process.platform === "linux") {
];
} else if(process.platform === 'linux') {
//Unsure if darwin works, need mac tester!
prebuildConfigs = [
{ platform: "linux", arch: "x64" },
//{ platform: "linux", arch: "arm64" },
//{ platform: "linux", arch: "armv7" },
]
} else if(process.platform === 'darwin') {
prebuildConfigs = [ prebuildConfigs = [
{ platform: "darwin", arch: "x64" }, { platform: "linux", arch: "x64" },
{ platform: "darwin", arch: "arm64" }, { platform: "linux", arch: "arm64" },
] ];
} else if (process.platform === "darwin") {
prebuildConfigs = [
{ platform: "darwin", arch: "x64" },
{ platform: "darwin", arch: "arm64" },
];
} }
createPrebuilds(prebuildConfigs) createPrebuilds(prebuildConfigs)

View File

@ -2,7 +2,6 @@ import { loadModel, createCompletion } from "../src/gpt4all.js";
const model = await loadModel("orca-mini-3b-gguf2-q4_0.gguf", { const model = await loadModel("orca-mini-3b-gguf2-q4_0.gguf", {
verbose: true, verbose: true,
device: "gpu",
}); });
const chat = await model.createChatSession(); const chat = await model.createChatSession();
@ -12,8 +11,6 @@ await createCompletion(
"Why are bananas rather blue than bread at night sometimes?", "Why are bananas rather blue than bread at night sometimes?",
{ {
verbose: true, verbose: true,
nPredict: 10,
} }
); );
await createCompletion(chat, "Are you sure?", {
verbose: true,
});

View File

@ -7,12 +7,12 @@ const modelOptions = {
verbose: true, verbose: true,
}; };
const model1 = await loadModel("orca-mini-3b-gguf2-q4_0.gguf", { const model1 = await loadModel("Phi-3-mini-4k-instruct.Q4_0.gguf", {
...modelOptions, ...modelOptions,
device: "gpu", // only one model can be on gpu device: "gpu", // only one model can be on gpu
}); });
const model2 = await loadModel("orca-mini-3b-gguf2-q4_0.gguf", modelOptions); const model2 = await loadModel("Phi-3-mini-4k-instruct.Q4_0.gguf", modelOptions);
const model3 = await loadModel("orca-mini-3b-gguf2-q4_0.gguf", modelOptions); const model3 = await loadModel("Phi-3-mini-4k-instruct.Q4_0.gguf", modelOptions);
const promptContext = { const promptContext = {
verbose: true, verbose: true,
@ -27,3 +27,6 @@ const responses = await Promise.all([
createCompletion(model3, "What is 1 + 3?", promptContext), createCompletion(model3, "What is 1 + 3?", promptContext),
]); ]);
console.log(responses.map((res) => res.choices[0].message)); console.log(responses.map((res) => res.choices[0].message));
model1.dispose();
model2.dispose();
model3.dispose();

View File

@ -1,61 +0,0 @@
import {
LLModel,
createCompletion,
DEFAULT_DIRECTORY,
DEFAULT_LIBRARIES_DIRECTORY,
loadModel,
} from "../src/gpt4all.js";
const model = await loadModel("mistral-7b-openorca.gguf2.Q4_0.gguf", {
verbose: true,
device: "gpu",
});
const ll = model.llm;
try {
class Extended extends LLModel {}
} catch (e) {
console.log("Extending from native class gone wrong " + e);
}
console.log("state size " + ll.stateSize());
console.log("thread count " + ll.threadCount());
ll.setThreadCount(5);
console.log("thread count " + ll.threadCount());
ll.setThreadCount(4);
console.log("thread count " + ll.threadCount());
console.log("name " + ll.name());
console.log("type: " + ll.type());
console.log("Default directory for models", DEFAULT_DIRECTORY);
console.log("Default directory for libraries", DEFAULT_LIBRARIES_DIRECTORY);
console.log("Has GPU", ll.hasGpuDevice());
console.log("gpu devices", ll.listGpu());
console.log("Required Mem in bytes", ll.memoryNeeded());
// to ingest a custom system prompt without using a chat session.
await createCompletion(
model,
"<|im_start|>system\nYou are an advanced mathematician.\n<|im_end|>\n",
{
promptTemplate: "%1",
nPredict: 0,
special: true,
}
);
const completion1 = await createCompletion(model, "What is 1 + 1?", {
verbose: true,
});
console.log(`🤖 > ${completion1.choices[0].message.content}`);
//Very specific:
// tested on Ubuntu 22.0, Linux Mint, if I set nPast to 100, the app hangs.
const completion2 = await createCompletion(model, "And if we add two?", {
verbose: true,
});
console.log(`🤖 > ${completion2.choices[0].message.content}`);
//CALLING DISPOSE WILL INVALID THE NATIVE MODEL. USE THIS TO CLEANUP
model.dispose();
console.log("model disposed, exiting...");

View File

@ -1,7 +1,6 @@
import { promises as fs } from "node:fs";
import { loadModel, createCompletion } from "../src/gpt4all.js"; import { loadModel, createCompletion } from "../src/gpt4all.js";
const model = await loadModel("Nous-Hermes-2-Mistral-7B-DPO.Q4_0.gguf", { const model = await loadModel("Phi-3-mini-4k-instruct.Q4_0.gguf", {
verbose: true, verbose: true,
device: "gpu", device: "gpu",
}); });
@ -12,14 +11,15 @@ const res = await createCompletion(
{ {
onPromptToken: (tokenId) => { onPromptToken: (tokenId) => {
console.debug("onPromptToken", { tokenId }); console.debug("onPromptToken", { tokenId });
// throwing an error will cancel // errors within the callback will cancel ingestion, inference will still run
throw new Error("This is an error"); throw new Error("This is an error");
// const foo = thisMethodDoesNotExist(); // const foo = thisMethodDoesNotExist();
// returning false will cancel as well // returning false will cancel as well
// return false; // return false;
}, },
onResponseToken: (tokenId, token) => { onResponseTokens: ({ tokenIds, text }) => {
console.debug("onResponseToken", { tokenId, token }); // console.debug("onResponseToken", { tokenIds, text });
process.stdout.write(text);
// same applies here // same applies here
}, },
} }

View File

@ -0,0 +1,37 @@
import {
loadModel,
createCompletion,
createCompletionStream,
createCompletionGenerator,
} from "../src/gpt4all.js";
const model = await loadModel("Phi-3-mini-4k-instruct.Q4_0.gguf", {
device: "cpu",
});
const prompt = "Tell a short story but only use emojis. Three sentences max.";
const result = await createCompletion(model, prompt, {
onResponseToken: (tokens) => {
console.debug(tokens)
},
});
console.debug(result.choices[0].message);
process.stdout.write("### Stream:");
const stream = createCompletionStream(model, prompt);
stream.tokens.on("data", (data) => {
process.stdout.write(data);
});
await stream.result;
process.stdout.write("\n");
process.stdout.write("### Generator:");
const gen = createCompletionGenerator(model, prompt);
for await (const chunk of gen) {
process.stdout.write(chunk);
}
model.dispose();

View File

@ -38,8 +38,8 @@ process.stdout.write("\n");
process.stdout.write("### Callback:"); process.stdout.write("### Callback:");
await createCompletion(model, "Why not just callbacks?", { await createCompletion(model, "Why not just callbacks?", {
onResponseToken: (tokenId, token) => { onResponseTokens: ({ text }) => {
process.stdout.write(token); process.stdout.write(text);
}, },
}); });
process.stdout.write("\n"); process.stdout.write("\n");

View File

@ -25,7 +25,7 @@ class ChatSession {
const { messages, systemPrompt, ...sessionDefaultPromptContext } = const { messages, systemPrompt, ...sessionDefaultPromptContext } =
chatSessionOpts; chatSessionOpts;
this.model = model; this.model = model;
this.modelName = model.llm.name(); this.modelName = model.llm.getName();
this.messages = messages ?? []; this.messages = messages ?? [];
this.systemPrompt = systemPrompt ?? model.config.systemPrompt; this.systemPrompt = systemPrompt ?? model.config.systemPrompt;
this.initialized = false; this.initialized = false;

View File

@ -5,10 +5,27 @@ interface LLModelOptions {
/** /**
* Model architecture. This argument currently does not have any functionality and is just used as descriptive identifier for user. * Model architecture. This argument currently does not have any functionality and is just used as descriptive identifier for user.
*/ */
type?: string; modelType?: string;
model_name: string; /**
model_path: string; * Absolute path to the model file.
library_path?: string; */
modelFile: string;
/**
* Path to the llmodel implementation shared objects. This can be a single path or a list of paths separated by ';' delimiter.
*/
librariesPath?: string;
/**
* A string representing the implementation to use. One of 'auto', 'cpu', 'metal', 'kompute', or 'cuda'.
*/
backend: string;
/**
* The maximum window size of this model.
*/
nCtx: number;
/**
* Number of GPU layers to use (Vulkan)
*/
nGpuLayers: number;
} }
interface ModelConfig { interface ModelConfig {
@ -263,10 +280,10 @@ interface LLModelInferenceResult {
interface LLModelInferenceOptions extends Partial<LLModelPromptContext> { interface LLModelInferenceOptions extends Partial<LLModelPromptContext> {
/** Callback for response tokens, called for each generated token. /** Callback for response tokens, called for each generated token.
* @param {number} tokenId The token id. * @param {number} tokenId The token id.
* @param {string} token The token. * @param {Uint8Array} bytes The token bytes.
* @returns {boolean | undefined} Whether to continue generating tokens. * @returns {boolean | undefined} Whether to continue generating tokens.
* */ * */
onResponseToken?: (tokenId: number, token: string) => boolean | void; onResponseToken?: (tokenId: number, bytes: Uint8Array) => boolean | void;
/** Callback for prompt tokens, called for each input token in the prompt. /** Callback for prompt tokens, called for each input token in the prompt.
* @param {number} tokenId The token id. * @param {number} tokenId The token id.
* @returns {boolean | undefined} Whether to continue ingesting the prompt. * @returns {boolean | undefined} Whether to continue ingesting the prompt.
@ -281,30 +298,42 @@ interface LLModelInferenceOptions extends Partial<LLModelPromptContext> {
declare class LLModel { declare class LLModel {
/** /**
* Initialize a new LLModel. * Initialize a new LLModel.
* @param {string} path Absolute path to the model file. * @param {LLModelOptions} options LLModel options.
* @throws {Error} If the model file does not exist. * @throws {Error} If the model can't be loaded or necessary runtimes are not found.
*/ */
constructor(options: LLModelOptions); constructor(options: LLModelOptions);
/**
* Loads the LLModel.
* @return {boolean} true if the model was loaded successfully, false otherwise.
*/
load(): boolean;
/**
* Initiate a GPU by a string identifier. See LoadModelOptions.device for more information
* @param {string} device 'amd' | 'nvidia' | 'intel' | 'gpu' | gpu name.
* @return {boolean} true if the GPU was initialized successfully, false otherwise.
*/
initGpu(device: string): boolean;
/** undefined or user supplied */ /** undefined or user supplied */
type(): string | undefined; getType(): string | undefined;
/** The name of the model. */ /** The name of the model. */
name(): string; getName(): string;
/** /**
* Get the size of the internal state of the model. * Get the size of the internal state of the model.
* NOTE: This state data is specific to the type of model you have created. * NOTE: This state data is specific to the type of model you have created.
* @return the size in bytes of the internal state of the model * @return the size in bytes of the internal state of the model
*/ */
stateSize(): number; getStateSize(): number;
/** /**
* Get the number of threads used for model inference. * Get the number of threads used for model inference.
* The default is the number of physical cores your computer has. * The default is the number of physical cores your computer has.
* @returns The number of threads used for model inference. * @returns The number of threads used for model inference.
*/ */
threadCount(): number; getThreadCount(): number;
/** /**
* Set the number of threads used for model inference. * Set the number of threads used for model inference.
@ -375,14 +404,6 @@ declare class LLModel {
*/ */
getLibraryPath(): string; getLibraryPath(): string;
/**
* Initiate a GPU by a string identifier.
* @param {number} memory_required Should be in the range size_t or will throw
* @param {string} device_name 'amd' | 'nvidia' | 'intel' | 'gpu' | gpu name.
* read LoadModelOptions.device for more information
*/
initGpuByString(memory_required: number, device_name: string): boolean;
/** /**
* From C documentation * From C documentation
* @returns True if a GPU device is successfully initialized, false otherwise. * @returns True if a GPU device is successfully initialized, false otherwise.
@ -391,11 +412,10 @@ declare class LLModel {
/** /**
* GPUs that are usable for this LLModel * GPUs that are usable for this LLModel
* @param {number} nCtx Maximum size of context window * @throws if gpu device list is not available
* @throws if hasGpuDevice returns false (i think) * @returns an array of GpuDevice objects
* @returns
*/ */
listGpu(nCtx: number): GpuDevice[]; getGpuDevices(): GpuDevice[];
/** /**
* delete and cleanup the native model * delete and cleanup the native model
@ -414,6 +434,7 @@ interface GpuDevice {
heapSize: number; heapSize: number;
name: string; name: string;
vendor: string; vendor: string;
backend: string;
} }
/** /**
@ -443,13 +464,15 @@ interface LoadModelOptions {
/** /**
* The processing unit on which the model will run. It can be set to * The processing unit on which the model will run. It can be set to
* - "cpu": Model will run on the central processing unit. * - "cpu": Model will run on the central processing unit.
* - "gpu": Model will run on the best available graphics processing unit, irrespective of its vendor. * - "kompute": Model will run using the kompute (vulkan) gpu backend
* - "amd", "nvidia", "intel": Model will run on the best available GPU from the specified vendor. * - "cuda": Model will run using the cuda gpu backend
* - "gpu": Use Metal on ARM64 macOS, otherwise the same as "kompute"
* - "amd", "nvidia": Use the best GPU provided by the Kompute backend from this vendor.
* - "gpu name": Model will run on the GPU that matches the name if it's available. * - "gpu name": Model will run on the GPU that matches the name if it's available.
* Note: If a GPU device lacks sufficient RAM to accommodate the model, an error will be thrown, and the GPT4All * Note: If a GPU device lacks sufficient RAM to accommodate the model, an error will be thrown, and the GPT4All
* instance will be rendered invalid. It's advised to ensure the device has enough memory before initiating the * instance will be rendered invalid. It's advised to ensure the device has enough memory before initiating the
* model. * model.
* @default "cpu" * @default Metal on ARM64 macOS, "cpu" otherwise.
*/ */
device?: string; device?: string;
/** /**
@ -458,10 +481,16 @@ interface LoadModelOptions {
*/ */
nCtx?: number; nCtx?: number;
/** /**
* Number of gpu layers needed * Number of GPU layers to use (Vulkan)
* @default 100 * @default 100
* @alias ngl
*/ */
nGpuLayers?: number;
ngl?: number; ngl?: number;
/**
* Number of CPU threads used by GPT4All. Default is None, then the number of threads are determined automatically.
*/
nThreads?: number;
} }
interface InferenceModelOptions extends LoadModelOptions { interface InferenceModelOptions extends LoadModelOptions {
@ -507,15 +536,33 @@ interface CompletionProvider {
): Promise<InferenceResult>; ): Promise<InferenceResult>;
} }
interface CompletionTokens {
/** The token ids. */
tokenIds: number[];
/** The token text. May be an empty string. */
text: string;
}
/** /**
* Options for creating a completion. * Options for creating a completion.
*/ */
interface CompletionOptions extends LLModelInferenceOptions { interface CompletionOptions extends Partial<LLModelPromptContext> {
/** /**
* Indicates if verbose logging is enabled. * Indicates if verbose logging is enabled.
* @default false * @default false
*/ */
verbose?: boolean; verbose?: boolean;
/** Called every time new tokens can be decoded to text.
* @param {CompletionTokens} tokens The token ids and decoded text.
* @returns {boolean | undefined} Whether to continue generating tokens.
* */
onResponseTokens?: (tokens: CompletionTokens) => boolean | void;
/** Callback for prompt tokens, called for each input token in the prompt.
* @param {number} tokenId The token id.
* @returns {boolean | undefined} Whether to continue ingesting the prompt.
* */
onPromptToken?: (tokenId: number) => boolean | void;
} }
/** /**
@ -639,13 +686,6 @@ interface LLModelPromptContext {
*/ */
promptTemplate?: string; promptTemplate?: string;
/** The context window size. Do not use, it has no effect. See loadModel options.
* THIS IS DEPRECATED!!!
* Use loadModel's nCtx option instead.
* @default 2048
*/
nCtx: number;
/** The top-k logits to sample from. /** The top-k logits to sample from.
* Top-K sampling selects the next token only from the top K most likely tokens predicted by the model. * Top-K sampling selects the next token only from the top K most likely tokens predicted by the model.
* It helps reduce the risk of generating low-probability or nonsensical tokens, but it may also limit * It helps reduce the risk of generating low-probability or nonsensical tokens, but it may also limit

View File

@ -37,9 +37,8 @@ async function loadModel(modelName, options = {}) {
type: "inference", type: "inference",
allowDownload: true, allowDownload: true,
verbose: false, verbose: false,
device: "cpu",
nCtx: 2048, nCtx: 2048,
ngl: 100, nGpuLayers: options.ngl ?? 100,
...options, ...options,
}; };
@ -54,27 +53,77 @@ async function loadModel(modelName, options = {}) {
typeof loadOptions.librariesPath === "string", typeof loadOptions.librariesPath === "string",
"Libraries path should be a string" "Libraries path should be a string"
); );
const existingPaths = loadOptions.librariesPath const existingLibPaths = loadOptions.librariesPath
.split(";") .split(";")
.filter(existsSync) .filter(existsSync)
.join(";"); .join(";");
const llmOptions = { const llmOptions = {
model_name: appendBinSuffixIfMissing(modelName), modelFile: modelConfig.path,
model_path: loadOptions.modelPath, librariesPath: existingLibPaths,
library_path: existingPaths,
device: loadOptions.device,
nCtx: loadOptions.nCtx, nCtx: loadOptions.nCtx,
ngl: loadOptions.ngl, nGpuLayers: loadOptions.nGpuLayers,
}; };
let initDevice;
if (process.platform === "darwin") {
if (!loadOptions.device) {
llmOptions.backend = "auto"; // 'auto' is effectively 'metal' due to currently non-functional fallback
} else if (loadOptions.device === "cpu") {
llmOptions.backend = "cpu";
} else {
if (process.arch !== "arm64" || loadOptions.device !== "gpu") {
throw new Error(
`Unknown device for this platform: ${loadOptions.device}`
);
}
llmOptions.backend = "metal";
}
} else {
// default to kompute. use cpu for arm64 because we currently dont build kompute runtimes for arm64
llmOptions.backend = process.arch === "arm64" ? "cpu" : "kompute";
if (!loadOptions.device || loadOptions.device === "cpu") {
// use the default backend
} else if (
loadOptions.device === "cuda" ||
loadOptions.device === "kompute"
) {
llmOptions.backend = loadOptions.device;
initDevice = "gpu";
} else if (loadOptions.device.startsWith("cuda:")) {
llmOptions.backend = "cuda";
initDevice = loadOptions.device.replace(/^cuda:/, "");
} else {
initDevice = loadOptions.device.replace(/^kompute:/, "");
}
}
if (loadOptions.verbose) { if (loadOptions.verbose) {
console.debug("Creating LLModel:", { console.debug("Creating LLModel:", {
initDevice,
llmOptions, llmOptions,
modelConfig, modelConfig,
}); });
} }
const llmodel = new LLModel(llmOptions); const llmodel = new LLModel(llmOptions);
if (initDevice) {
const gpuInitSuccess = llmodel.initGpu(initDevice);
if (!gpuInitSuccess) {
const availableDevices = llmodel.getGpuDevices();
const deviceNames = availableDevices
.map((device) => device.name)
.join(", ");
console.warn(
`Failed to initialize GPU device "${initDevice}" - Available devices: ${deviceNames}`
);
}
}
llmodel.load();
if (loadOptions.nThreads) {
llmodel.setThreadCount(loadOptions.nThreads);
}
if (loadOptions.type === "embedding") { if (loadOptions.type === "embedding") {
return new EmbeddingModel(llmodel, modelConfig); return new EmbeddingModel(llmodel, modelConfig);
} else if (loadOptions.type === "inference") { } else if (loadOptions.type === "inference") {
@ -84,7 +133,7 @@ async function loadModel(modelName, options = {}) {
} }
} }
function createEmbedding(model, text, options={}) { function createEmbedding(model, text, options = {}) {
let { let {
dimensionality = undefined, dimensionality = undefined,
longTextMode = "mean", longTextMode = "mean",
@ -138,10 +187,7 @@ async function createCompletion(
...options, ...options,
}; };
const result = await provider.generate( const result = await provider.generate(input, completionOptions);
input,
completionOptions,
);
return { return {
model: provider.modelName, model: provider.modelName,
@ -174,10 +220,10 @@ function createCompletionStream(
const completionPromise = createCompletion(provider, input, { const completionPromise = createCompletion(provider, input, {
...options, ...options,
onResponseToken: (tokenId, token) => { onResponseTokens: (tokens) => {
completionStream.push(token); completionStream.push(tokens.text);
if (options.onResponseToken) { if (options.onResponseTokens) {
return options.onResponseToken(tokenId, token); return options.onResponseTokens(tokens);
} }
}, },
}).then((result) => { }).then((result) => {

View File

@ -11,7 +11,7 @@ class InferenceModel {
constructor(llmodel, config) { constructor(llmodel, config) {
this.llm = llmodel; this.llm = llmodel;
this.config = config; this.config = config;
this.modelName = this.llm.name(); this.modelName = this.llm.getName();
} }
async createChatSession(options) { async createChatSession(options) {
@ -90,6 +90,25 @@ class InferenceModel {
let tokensGenerated = 0; let tokensGenerated = 0;
const decoder = new TokenDecoder((tokenIds, text) => {
let continueGeneration = true;
tokensGenerated += tokenIds.length;
if (options.onResponseTokens) {
// catch here because if errors bubble through cpp they will loose stacktraces
try {
// don't cancel the generation unless user explicitly returns false
continueGeneration =
options.onResponseTokens({ tokenIds, text }) !== false;
} catch (err) {
console.error("Error in onResponseToken callback", err);
continueGeneration = false;
}
}
return continueGeneration;
});
const result = await this.llm.infer(prompt, { const result = await this.llm.infer(prompt, {
...promptContext, ...promptContext,
nPast, nPast,
@ -97,7 +116,7 @@ class InferenceModel {
let continueIngestion = true; let continueIngestion = true;
tokensIngested++; tokensIngested++;
if (options.onPromptToken) { if (options.onPromptToken) {
// catch errors because if they go through cpp they will loose stacktraces // catch here because if errors bubble through cpp they will looe stacktraces
try { try {
// don't cancel ingestion unless user explicitly returns false // don't cancel ingestion unless user explicitly returns false
continueIngestion = continueIngestion =
@ -109,20 +128,8 @@ class InferenceModel {
} }
return continueIngestion; return continueIngestion;
}, },
onResponseToken: (tokenId, token) => { onResponseToken: (tokenId, bytes) => {
let continueGeneration = true; return decoder.decode(tokenId, bytes);
tokensGenerated++;
if (options.onResponseToken) {
try {
// don't cancel the generation unless user explicitly returns false
continueGeneration =
options.onResponseToken(tokenId, token) !== false;
} catch (err) {
console.error("Error in onResponseToken callback", err);
continueGeneration = false;
}
}
return continueGeneration;
}, },
}); });
@ -141,6 +148,63 @@ class InferenceModel {
} }
} }
// see https://github.com/nomic-ai/gpt4all/pull/1281
class TokenDecoder {
constructor(callback) {
this.callback = callback;
this.buffer = [];
this.tokenIds = [];
this.buffExpectingContBytes = 0;
this.textDecoder = new TextDecoder();
}
decode(tokenId, bytes) {
const decoded = [];
this.tokenIds.push(tokenId);
for (let i = 0; i < bytes.length; i++) {
const byte = bytes[i];
const bits = byte.toString(2).padStart(8, '0');
const highOnes = bits.split('0')[0];
if (highOnes.length === 1) {
// Continuation byte
this.buffer.push(byte);
this.buffExpectingContBytes -= 1;
} else {
// Beginning of a byte sequence
if (this.buffer.length > 0) {
decoded.push(this._decodeBuffer());
this.buffer = [];
}
this.buffer.push(byte);
this.buffExpectingContBytes = Math.max(0, highOnes.length - 1);
}
if (this.buffExpectingContBytes <= 0) {
// Received the whole sequence or an out-of-place continuation byte
decoded.push(this._decodeBuffer());
this.buffer = [];
this.buffExpectingContBytes = 0;
}
}
if (decoded.length === 0 && this.buffExpectingContBytes > 0) {
// Wait for more continuation bytes
return true;
}
const tokenIds = this.tokenIds;
this.tokenIds = [];
return this.callback(tokenIds, decoded.join(''));
}
_decodeBuffer() {
return this.textDecoder.decode(new Uint8Array(this.buffer));
}
}
class EmbeddingModel { class EmbeddingModel {
llm; llm;
config; config;
@ -160,6 +224,7 @@ class EmbeddingModel {
} }
module.exports = { module.exports = {
TokenDecoder,
InferenceModel, InferenceModel,
EmbeddingModel, EmbeddingModel,
}; };

View File

@ -0,0 +1,73 @@
const { loadModel } = require("../src/gpt4all.js");
// these tests require an internet connection / a real model
const testModel = "Phi-3-mini-4k-instruct.Q4_0.gguf";
describe("llmodel", () => {
let model;
test("load on cpu", async () => {
model = await loadModel(testModel, {
device: "cpu",
});
});
test("getter working", async () => {
const stateSize = model.llm.getStateSize();
expect(stateSize).toBeGreaterThan(0);
const name = model.llm.getName();
expect(name).toBe(testModel);
const type = model.llm.getType();
expect(type).toBeUndefined();
const devices = model.llm.getGpuDevices();
expect(Array.isArray(devices)).toBe(true);
const gpuEnabled = model.llm.hasGpuDevice();
expect(gpuEnabled).toBe(false);
const requiredMem = model.llm.getRequiredMemory();
expect(typeof requiredMem).toBe('number');
const threadCount = model.llm.getThreadCount();
expect(threadCount).toBe(4);
});
test("setting thread count", () => {
model.llm.setThreadCount(5);
expect(model.llm.getThreadCount()).toBe(5);
});
test("cpu inference", async () => {
const res = await model.llm.infer("what is the capital of france?", {
temp: 0,
promptTemplate: model.config.promptTemplate,
nPredict: 10,
onResponseToken: () => {
return true;
},
});
expect(res.text).toMatch(/paris/i);
}, 10000);
test("dispose and load model on gpu", async () => {
model.dispose();
model = await loadModel(testModel, {
device: "gpu",
});
const gpuEnabled = model.llm.hasGpuDevice();
expect(gpuEnabled).toBe(true);
});
test("gpu inference", async () => {
const res = await model.llm.infer("what is the capital of france?", {
temp: 0,
promptTemplate: model.config.promptTemplate,
nPredict: 10,
onResponseToken: () => {
return true;
},
});
expect(res.text).toMatch(/paris/i);
}, 10000);
afterAll(() => {
model.dispose();
});
});

View File

@ -2,7 +2,6 @@ const path = require("node:path");
const os = require("node:os"); const os = require("node:os");
const fsp = require("node:fs/promises"); const fsp = require("node:fs/promises");
const { existsSync } = require('node:fs'); const { existsSync } = require('node:fs');
const { LLModel } = require("node-gyp-build")(path.resolve(__dirname, ".."));
const { const {
listModels, listModels,
downloadModel, downloadModel,
@ -13,11 +12,8 @@ const {
DEFAULT_LIBRARIES_DIRECTORY, DEFAULT_LIBRARIES_DIRECTORY,
DEFAULT_MODEL_LIST_URL, DEFAULT_MODEL_LIST_URL,
} = require("../src/config.js"); } = require("../src/config.js");
const {
loadModel, // these tests do not require an internet connection or an actual model
createPrompt,
createCompletion,
} = require("../src/gpt4all.js");
describe("config", () => { describe("config", () => {
test("default paths constants are available and correct", () => { test("default paths constants are available and correct", () => {

File diff suppressed because it is too large Load Diff