mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-06-25 15:02:03 +00:00
typescript bindings maintenance (#2363)
* remove outdated comments Signed-off-by: limez <limez@protonmail.com> * simpler build from source Signed-off-by: limez <limez@protonmail.com> * update unix build script to create .so runtimes correctly Signed-off-by: limez <limez@protonmail.com> * configure ci build type, use RelWithDebInfo for dev build script Signed-off-by: limez <limez@protonmail.com> * add clean script Signed-off-by: limez <limez@protonmail.com> * fix streamed token decoding / emoji Signed-off-by: limez <limez@protonmail.com> * remove deprecated nCtx Signed-off-by: limez <limez@protonmail.com> * update typings Signed-off-by: jacob <jacoobes@sern.dev> update typings Signed-off-by: jacob <jacoobes@sern.dev> * readme,mspell Signed-off-by: jacob <jacoobes@sern.dev> * cuda/backend logic changes + name napi methods like their js counterparts Signed-off-by: limez <limez@protonmail.com> * convert llmodel example into a test, separate test suite that can run in ci Signed-off-by: limez <limez@protonmail.com> * update examples / naming Signed-off-by: limez <limez@protonmail.com> * update deps, remove the need for binding.ci.gyp, make node-gyp-build fallback easier testable Signed-off-by: limez <limez@protonmail.com> * make sure the assert-backend-sources.js script is published, but not the others Signed-off-by: limez <limez@protonmail.com> * build correctly on windows (regression on node-gyp-build) Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com> * codespell Signed-off-by: limez <limez@protonmail.com> * make sure dlhandle.cpp gets linked correctly Signed-off-by: limez <limez@protonmail.com> * add include for check_cxx_compiler_flag call during aarch64 builds Signed-off-by: limez <limez@protonmail.com> * x86 > arm64 cross compilation of runtimes and bindings Signed-off-by: limez <limez@protonmail.com> * default to cpu instead of kompute on arm64 Signed-off-by: limez <limez@protonmail.com> * formatting, more minimal example Signed-off-by: limez <limez@protonmail.com> --------- Signed-off-by: limez <limez@protonmail.com> Signed-off-by: jacob <jacoobes@sern.dev> Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com> Co-authored-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com> Co-authored-by: jacob <jacoobes@sern.dev>
This commit is contained in:
parent
f001897a1a
commit
a602f7fde7
@ -570,7 +570,7 @@ jobs:
|
|||||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y cmake build-essential vulkan-sdk cuda-compiler-12-4 libcublas-dev-12-4 libnvidia-compute-550-server libmysqlclient21 libodbc2 libpq5
|
sudo apt-get install -y cmake build-essential g++-12-aarch64-linux-gnu gcc-12-aarch64-linux-gnu vulkan-sdk cuda-compiler-12-4 libcublas-dev-12-4 libnvidia-compute-550-server libmysqlclient21 libodbc2 libpq5
|
||||||
- run:
|
- run:
|
||||||
name: Build Libraries
|
name: Build Libraries
|
||||||
command: |
|
command: |
|
||||||
@ -578,14 +578,19 @@ jobs:
|
|||||||
cd gpt4all-backend
|
cd gpt4all-backend
|
||||||
mkdir -p runtimes/build
|
mkdir -p runtimes/build
|
||||||
cd runtimes/build
|
cd runtimes/build
|
||||||
cmake ../..
|
cmake ../.. -DCMAKE_BUILD_TYPE=Release
|
||||||
cmake --build . --parallel --config Release
|
cmake --build . --parallel
|
||||||
mkdir ../linux-x64
|
mkdir ../linux-x64
|
||||||
cp -L *.so ../linux-x64 # otherwise persist_to_workspace seems to mess symlinks
|
cp -L *.so ../linux-x64 # otherwise persist_to_workspace seems to mess symlinks
|
||||||
|
cmake ../.. -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE="./toolchains/linux-arm64-toolchain.cmake"
|
||||||
|
cmake --build . --parallel
|
||||||
|
mkdir ../linux-arm64
|
||||||
|
cp -L *.so ../linux-arm64
|
||||||
- persist_to_workspace:
|
- persist_to_workspace:
|
||||||
root: gpt4all-backend
|
root: gpt4all-backend
|
||||||
paths:
|
paths:
|
||||||
- runtimes/linux-x64/*.so
|
- runtimes/linux-x64/*.so
|
||||||
|
- runtimes/linux-arm64/*.so
|
||||||
|
|
||||||
build-bindings-backend-macos:
|
build-bindings-backend-macos:
|
||||||
macos:
|
macos:
|
||||||
@ -896,6 +901,11 @@ jobs:
|
|||||||
- checkout
|
- checkout
|
||||||
- attach_workspace:
|
- attach_workspace:
|
||||||
at: /tmp/gpt4all-backend
|
at: /tmp/gpt4all-backend
|
||||||
|
- run:
|
||||||
|
name: Install dependencies
|
||||||
|
command: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y g++-12-aarch64-linux-gnu gcc-12-aarch64-linux-gnu
|
||||||
- node/install:
|
- node/install:
|
||||||
install-yarn: true
|
install-yarn: true
|
||||||
node-version: "18.16"
|
node-version: "18.16"
|
||||||
@ -908,18 +918,24 @@ jobs:
|
|||||||
- run:
|
- run:
|
||||||
command: |
|
command: |
|
||||||
cd gpt4all-bindings/typescript
|
cd gpt4all-bindings/typescript
|
||||||
yarn prebuildify -t 18.16.0 --napi
|
yarn build:prebuilds
|
||||||
- run:
|
- run:
|
||||||
command: |
|
command: |
|
||||||
mkdir -p gpt4all-backend/prebuilds/linux-x64
|
mkdir -p gpt4all-backend/prebuilds/linux-x64
|
||||||
mkdir -p gpt4all-backend/runtimes/linux-x64
|
mkdir -p gpt4all-backend/runtimes/linux-x64
|
||||||
cp /tmp/gpt4all-backend/runtimes/linux-x64/*-*.so gpt4all-backend/runtimes/linux-x64
|
cp /tmp/gpt4all-backend/runtimes/linux-x64/*-*.so gpt4all-backend/runtimes/linux-x64
|
||||||
cp gpt4all-bindings/typescript/prebuilds/linux-x64/*.node gpt4all-backend/prebuilds/linux-x64
|
cp gpt4all-bindings/typescript/prebuilds/linux-x64/*.node gpt4all-backend/prebuilds/linux-x64
|
||||||
|
mkdir -p gpt4all-backend/prebuilds/linux-arm64
|
||||||
|
mkdir -p gpt4all-backend/runtimes/linux-arm64
|
||||||
|
cp /tmp/gpt4all-backend/runtimes/linux-arm64/*-*.so gpt4all-backend/runtimes/linux-arm64
|
||||||
|
cp gpt4all-bindings/typescript/prebuilds/linux-arm64/*.node gpt4all-backend/prebuilds/linux-arm64
|
||||||
- persist_to_workspace:
|
- persist_to_workspace:
|
||||||
root: gpt4all-backend
|
root: gpt4all-backend
|
||||||
paths:
|
paths:
|
||||||
- prebuilds/linux-x64/*.node
|
- prebuilds/linux-x64/*.node
|
||||||
- runtimes/linux-x64/*-*.so
|
- runtimes/linux-x64/*-*.so
|
||||||
|
- prebuilds/linux-arm64/*.node
|
||||||
|
- runtimes/linux-arm64/*-*.so
|
||||||
build-nodejs-macos:
|
build-nodejs-macos:
|
||||||
macos:
|
macos:
|
||||||
xcode: "14.0.0"
|
xcode: "14.0.0"
|
||||||
@ -1030,12 +1046,10 @@ jobs:
|
|||||||
|
|
||||||
cp /tmp/gpt4all-backend/prebuilds/darwin-x64/*.node prebuilds/darwin-x64/
|
cp /tmp/gpt4all-backend/prebuilds/darwin-x64/*.node prebuilds/darwin-x64/
|
||||||
|
|
||||||
# Fallback build if user is not on above prebuilds
|
# copy the backend source we depend on to make fallback builds work
|
||||||
mv -f binding.ci.gyp binding.gyp
|
mkdir backend
|
||||||
|
|
||||||
mkdir gpt4all-backend
|
|
||||||
cd ../../gpt4all-backend
|
cd ../../gpt4all-backend
|
||||||
mv llmodel.h llmodel.cpp llmodel_c.cpp llmodel_c.h sysinfo.h dlhandle.h ../gpt4all-bindings/typescript/gpt4all-backend/
|
mv llmodel.h llmodel.cpp llmodel_c.cpp llmodel_c.h sysinfo.h dlhandle.h ../gpt4all-bindings/typescript/backend/
|
||||||
|
|
||||||
# Test install
|
# Test install
|
||||||
- node/install-packages:
|
- node/install-packages:
|
||||||
@ -1045,7 +1059,7 @@ jobs:
|
|||||||
- run:
|
- run:
|
||||||
command: |
|
command: |
|
||||||
cd gpt4all-bindings/typescript
|
cd gpt4all-bindings/typescript
|
||||||
yarn run test
|
yarn run test:ci
|
||||||
- run:
|
- run:
|
||||||
command: |
|
command: |
|
||||||
cd gpt4all-bindings/typescript
|
cd gpt4all-bindings/typescript
|
||||||
|
@ -79,6 +79,7 @@ if (LLMODEL_ROCM)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(CMAKE_VERBOSE_MAKEFILE ON)
|
set(CMAKE_VERBOSE_MAKEFILE ON)
|
||||||
|
include(CheckCXXCompilerFlag)
|
||||||
|
|
||||||
# Go through each build variant
|
# Go through each build variant
|
||||||
foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
|
foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
|
||||||
|
11
gpt4all-backend/toolchains/linux-arm64-toolchain.cmake
Normal file
11
gpt4all-backend/toolchains/linux-arm64-toolchain.cmake
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
# Toolchain to crosscompile runtimes for arm64 on jammy x86_64
|
||||||
|
# You may have to `sudo apt-get install g++-12-aarch64-linux-gnu gcc-12-aarch64-linux-gnu`
|
||||||
|
|
||||||
|
set(CMAKE_SYSTEM_NAME Linux)
|
||||||
|
set(CMAKE_SYSTEM_PROCESSOR aarch64)
|
||||||
|
set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc-12)
|
||||||
|
set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++-12)
|
||||||
|
|
||||||
|
# Supported backends
|
||||||
|
set(LLMODEL_CUDA off)
|
||||||
|
set(LLMODEL_KOMPUTE off)
|
1
gpt4all-bindings/typescript/.gitignore
vendored
1
gpt4all-bindings/typescript/.gitignore
vendored
@ -8,4 +8,5 @@ prebuilds/
|
|||||||
!.yarn/sdks
|
!.yarn/sdks
|
||||||
!.yarn/versions
|
!.yarn/versions
|
||||||
runtimes/
|
runtimes/
|
||||||
|
backend/
|
||||||
compile_flags.txt
|
compile_flags.txt
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
test/
|
test/
|
||||||
spec/
|
spec/
|
||||||
scripts/
|
scripts/*
|
||||||
|
!scripts/assert-backend-sources.js
|
||||||
build
|
build
|
@ -188,6 +188,8 @@ model.dispose();
|
|||||||
* python 3
|
* python 3
|
||||||
* On Windows and Linux, building GPT4All requires the complete Vulkan SDK. You may download it from here: https://vulkan.lunarg.com/sdk/home
|
* On Windows and Linux, building GPT4All requires the complete Vulkan SDK. You may download it from here: https://vulkan.lunarg.com/sdk/home
|
||||||
* macOS users do not need Vulkan, as GPT4All will use Metal instead.
|
* macOS users do not need Vulkan, as GPT4All will use Metal instead.
|
||||||
|
* CUDA Toolkit >= 11.4 (you can bypass this with adding a custom flag to build step)
|
||||||
|
- Windows: There is difficulty compiling with cuda if the Visual Studio IDE is NOT present.
|
||||||
|
|
||||||
### Build (from source)
|
### Build (from source)
|
||||||
|
|
||||||
@ -196,23 +198,29 @@ git clone https://github.com/nomic-ai/gpt4all.git
|
|||||||
cd gpt4all-bindings/typescript
|
cd gpt4all-bindings/typescript
|
||||||
```
|
```
|
||||||
|
|
||||||
* The below shell commands assume the current working directory is `typescript`.
|
llama.cpp git submodule for gpt4all can be possibly absent or outdated. Make sure to run
|
||||||
|
|
||||||
* To Build and Rebuild:
|
|
||||||
|
|
||||||
```sh
|
|
||||||
node scripts/prebuild.js
|
|
||||||
```
|
|
||||||
* llama.cpp git submodule for gpt4all can be possibly absent. If this is the case, make sure to run in llama.cpp parent directory
|
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
git submodule update --init --recursive
|
git submodule update --init --recursive
|
||||||
```
|
```
|
||||||
|
|
||||||
|
The below shell commands assume the current working directory is `typescript`.
|
||||||
|
|
||||||
|
Using yarn
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
yarn build:backend
|
yarn install
|
||||||
|
yarn build
|
||||||
```
|
```
|
||||||
This will build platform-dependent dynamic libraries, and will be located in runtimes/(platform)/native
|
|
||||||
|
Using npm
|
||||||
|
|
||||||
|
```sh
|
||||||
|
npm install
|
||||||
|
npm run build
|
||||||
|
```
|
||||||
|
|
||||||
|
The `build:runtimes` script will create runtime libraries for your platform in `runtimes` and `build:prebuilds` will create the bindings in `prebuilds`. `build` is a shortcut for both.
|
||||||
|
|
||||||
### Test
|
### Test
|
||||||
|
|
||||||
@ -259,7 +267,7 @@ yarn test
|
|||||||
|
|
||||||
This package has been stabilizing over time development, and breaking changes may happen until the api stabilizes. Here's what's the todo list:
|
This package has been stabilizing over time development, and breaking changes may happen until the api stabilizes. Here's what's the todo list:
|
||||||
|
|
||||||
* \[ ] Purely offline. Per the gui, which can be run completely offline, the bindings should be as well.
|
* \[x] [Purely offline](#Offline-usage). Per the gui, which can be run completely offline, the bindings should be as well.
|
||||||
* \[ ] NPM bundle size reduction via optionalDependencies strategy (need help)
|
* \[ ] NPM bundle size reduction via optionalDependencies strategy (need help)
|
||||||
* Should include prebuilds to avoid painful node-gyp errors
|
* Should include prebuilds to avoid painful node-gyp errors
|
||||||
* \[x] createChatSession ( the python equivalent to create\_chat\_session )
|
* \[x] createChatSession ( the python equivalent to create\_chat\_session )
|
||||||
@ -276,7 +284,7 @@ This package has been stabilizing over time development, and breaking changes ma
|
|||||||
This repository serves as the new bindings for nodejs users.
|
This repository serves as the new bindings for nodejs users.
|
||||||
- If you were a user of [these bindings](https://github.com/nomic-ai/gpt4all-ts), they are outdated.
|
- If you were a user of [these bindings](https://github.com/nomic-ai/gpt4all-ts), they are outdated.
|
||||||
- Version 4 includes the follow breaking changes
|
- Version 4 includes the follow breaking changes
|
||||||
* `createEmbedding` & `EmbeddingModel.embed()` returns an object, `EmbeddingResult`, instead of a float32array.
|
* `createEmbedding` & `EmbeddingModel.embed()` returns an object, `EmbeddingResult`, instead of a Float32Array.
|
||||||
* Removed deprecated types `ModelType` and `ModelFile`
|
* Removed deprecated types `ModelType` and `ModelFile`
|
||||||
* Removed deprecated initiation of model by string path only
|
* Removed deprecated initiation of model by string path only
|
||||||
|
|
||||||
|
@ -1,62 +0,0 @@
|
|||||||
{
|
|
||||||
"targets": [
|
|
||||||
{
|
|
||||||
"target_name": "gpt4all", # gpt4all-ts will cause compile error
|
|
||||||
"include_dirs": [
|
|
||||||
"<!@(node -p \"require('node-addon-api').include\")",
|
|
||||||
"gpt4all-backend",
|
|
||||||
],
|
|
||||||
"sources": [
|
|
||||||
# PREVIOUS VERSION: had to required the sources, but with newest changes do not need to
|
|
||||||
#"../../gpt4all-backend/llama.cpp/examples/common.cpp",
|
|
||||||
#"../../gpt4all-backend/llama.cpp/ggml.c",
|
|
||||||
#"../../gpt4all-backend/llama.cpp/llama.cpp",
|
|
||||||
# "../../gpt4all-backend/utils.cpp",
|
|
||||||
"gpt4all-backend/llmodel_c.cpp",
|
|
||||||
"gpt4all-backend/llmodel.cpp",
|
|
||||||
"prompt.cc",
|
|
||||||
"index.cc",
|
|
||||||
],
|
|
||||||
"conditions": [
|
|
||||||
['OS=="mac"', {
|
|
||||||
'xcode_settings': {
|
|
||||||
'GCC_ENABLE_CPP_EXCEPTIONS': 'YES'
|
|
||||||
},
|
|
||||||
'defines': [
|
|
||||||
'LIB_FILE_EXT=".dylib"',
|
|
||||||
'NAPI_CPP_EXCEPTIONS',
|
|
||||||
],
|
|
||||||
'cflags_cc': [
|
|
||||||
"-fexceptions"
|
|
||||||
]
|
|
||||||
}],
|
|
||||||
['OS=="win"', {
|
|
||||||
'defines': [
|
|
||||||
'LIB_FILE_EXT=".dll"',
|
|
||||||
'NAPI_CPP_EXCEPTIONS',
|
|
||||||
],
|
|
||||||
"msvs_settings": {
|
|
||||||
"VCCLCompilerTool": {
|
|
||||||
"AdditionalOptions": [
|
|
||||||
"/std:c++20",
|
|
||||||
"/EHsc",
|
|
||||||
],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}],
|
|
||||||
['OS=="linux"', {
|
|
||||||
'defines': [
|
|
||||||
'LIB_FILE_EXT=".so"',
|
|
||||||
'NAPI_CPP_EXCEPTIONS',
|
|
||||||
],
|
|
||||||
'cflags_cc!': [
|
|
||||||
'-fno-rtti',
|
|
||||||
],
|
|
||||||
'cflags_cc': [
|
|
||||||
'-std=c++2a',
|
|
||||||
'-fexceptions'
|
|
||||||
]
|
|
||||||
}]
|
|
||||||
]
|
|
||||||
}]
|
|
||||||
}
|
|
@ -1,19 +1,15 @@
|
|||||||
{
|
{
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"target_name": "gpt4all", # gpt4all-ts will cause compile error
|
"target_name": "gpt4all",
|
||||||
"include_dirs": [
|
"include_dirs": [
|
||||||
"<!@(node -p \"require('node-addon-api').include\")",
|
"<!@(node -p \"require('node-addon-api').include\")",
|
||||||
"../../gpt4all-backend",
|
"backend",
|
||||||
],
|
],
|
||||||
"sources": [
|
"sources": [
|
||||||
# PREVIOUS VERSION: had to required the sources, but with newest changes do not need to
|
"backend/llmodel_c.cpp",
|
||||||
#"../../gpt4all-backend/llama.cpp/examples/common.cpp",
|
"backend/llmodel.cpp",
|
||||||
#"../../gpt4all-backend/llama.cpp/ggml.c",
|
"backend/dlhandle.cpp",
|
||||||
#"../../gpt4all-backend/llama.cpp/llama.cpp",
|
|
||||||
# "../../gpt4all-backend/utils.cpp",
|
|
||||||
"../../gpt4all-backend/llmodel_c.cpp",
|
|
||||||
"../../gpt4all-backend/llmodel.cpp",
|
|
||||||
"prompt.cc",
|
"prompt.cc",
|
||||||
"index.cc",
|
"index.cc",
|
||||||
],
|
],
|
||||||
|
@ -3,23 +3,24 @@
|
|||||||
|
|
||||||
Napi::Function NodeModelWrapper::GetClass(Napi::Env env)
|
Napi::Function NodeModelWrapper::GetClass(Napi::Env env)
|
||||||
{
|
{
|
||||||
Napi::Function self = DefineClass(env, "LLModel",
|
Napi::Function self = DefineClass(
|
||||||
{InstanceMethod("type", &NodeModelWrapper::GetType),
|
env, "LLModel",
|
||||||
InstanceMethod("isModelLoaded", &NodeModelWrapper::IsModelLoaded),
|
{InstanceMethod("load", &NodeModelWrapper::Load),
|
||||||
InstanceMethod("name", &NodeModelWrapper::GetName),
|
InstanceMethod("initGpu", &NodeModelWrapper::InitGpu),
|
||||||
InstanceMethod("stateSize", &NodeModelWrapper::StateSize),
|
InstanceMethod("infer", &NodeModelWrapper::Infer),
|
||||||
InstanceMethod("infer", &NodeModelWrapper::Infer),
|
InstanceMethod("embed", &NodeModelWrapper::Embed),
|
||||||
InstanceMethod("setThreadCount", &NodeModelWrapper::SetThreadCount),
|
InstanceMethod("isModelLoaded", &NodeModelWrapper::IsModelLoaded),
|
||||||
InstanceMethod("embed", &NodeModelWrapper::GenerateEmbedding),
|
InstanceMethod("getType", &NodeModelWrapper::GetType),
|
||||||
InstanceMethod("threadCount", &NodeModelWrapper::ThreadCount),
|
InstanceMethod("getName", &NodeModelWrapper::GetName),
|
||||||
InstanceMethod("getLibraryPath", &NodeModelWrapper::GetLibraryPath),
|
InstanceMethod("getStateSize", &NodeModelWrapper::GetStateSize),
|
||||||
InstanceMethod("initGpuByString", &NodeModelWrapper::InitGpuByString),
|
InstanceMethod("setThreadCount", &NodeModelWrapper::SetThreadCount),
|
||||||
InstanceMethod("hasGpuDevice", &NodeModelWrapper::HasGpuDevice),
|
InstanceMethod("getThreadCount", &NodeModelWrapper::GetThreadCount),
|
||||||
InstanceMethod("listGpu", &NodeModelWrapper::GetGpuDevices),
|
InstanceMethod("getLibraryPath", &NodeModelWrapper::GetLibraryPath),
|
||||||
InstanceMethod("memoryNeeded", &NodeModelWrapper::GetRequiredMemory),
|
InstanceMethod("hasGpuDevice", &NodeModelWrapper::HasGpuDevice),
|
||||||
InstanceMethod("dispose", &NodeModelWrapper::Dispose)});
|
InstanceMethod("getGpuDevices", &NodeModelWrapper::GetGpuDevices),
|
||||||
|
InstanceMethod("getRequiredMemory", &NodeModelWrapper::GetRequiredMemory),
|
||||||
|
InstanceMethod("dispose", &NodeModelWrapper::Dispose)});
|
||||||
// Keep a static reference to the constructor
|
// Keep a static reference to the constructor
|
||||||
//
|
|
||||||
Napi::FunctionReference *constructor = new Napi::FunctionReference();
|
Napi::FunctionReference *constructor = new Napi::FunctionReference();
|
||||||
*constructor = Napi::Persistent(self);
|
*constructor = Napi::Persistent(self);
|
||||||
env.SetInstanceData(constructor);
|
env.SetInstanceData(constructor);
|
||||||
@ -29,13 +30,13 @@ Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo &info)
|
|||||||
{
|
{
|
||||||
auto env = info.Env();
|
auto env = info.Env();
|
||||||
return Napi::Number::New(
|
return Napi::Number::New(
|
||||||
env, static_cast<uint32_t>(llmodel_required_mem(GetInference(), full_model_path.c_str(), nCtx, nGpuLayers)));
|
env, static_cast<uint32_t>(llmodel_required_mem(GetInference(), model_file.c_str(), n_ctx, n_gpu_layers)));
|
||||||
}
|
}
|
||||||
Napi::Value NodeModelWrapper::GetGpuDevices(const Napi::CallbackInfo &info)
|
Napi::Value NodeModelWrapper::GetGpuDevices(const Napi::CallbackInfo &info)
|
||||||
{
|
{
|
||||||
auto env = info.Env();
|
auto env = info.Env();
|
||||||
int num_devices = 0;
|
int num_devices = 0;
|
||||||
auto mem_size = llmodel_required_mem(GetInference(), full_model_path.c_str(), nCtx, nGpuLayers);
|
auto mem_size = llmodel_required_mem(GetInference(), model_file.c_str(), n_ctx, n_gpu_layers);
|
||||||
llmodel_gpu_device *all_devices = llmodel_available_gpu_devices(mem_size, &num_devices);
|
llmodel_gpu_device *all_devices = llmodel_available_gpu_devices(mem_size, &num_devices);
|
||||||
if (all_devices == nullptr)
|
if (all_devices == nullptr)
|
||||||
{
|
{
|
||||||
@ -63,6 +64,7 @@ Napi::Value NodeModelWrapper::GetGpuDevices(const Napi::CallbackInfo &info)
|
|||||||
js_gpu_device["heapSize"] = static_cast<uint32_t>(gpu_device.heapSize);
|
js_gpu_device["heapSize"] = static_cast<uint32_t>(gpu_device.heapSize);
|
||||||
js_gpu_device["name"] = gpu_device.name;
|
js_gpu_device["name"] = gpu_device.name;
|
||||||
js_gpu_device["vendor"] = gpu_device.vendor;
|
js_gpu_device["vendor"] = gpu_device.vendor;
|
||||||
|
js_gpu_device["backend"] = gpu_device.backend;
|
||||||
|
|
||||||
js_array[i] = js_gpu_device;
|
js_array[i] = js_gpu_device;
|
||||||
}
|
}
|
||||||
@ -71,35 +73,13 @@ Napi::Value NodeModelWrapper::GetGpuDevices(const Napi::CallbackInfo &info)
|
|||||||
|
|
||||||
Napi::Value NodeModelWrapper::GetType(const Napi::CallbackInfo &info)
|
Napi::Value NodeModelWrapper::GetType(const Napi::CallbackInfo &info)
|
||||||
{
|
{
|
||||||
if (type.empty())
|
if (model_type.empty())
|
||||||
{
|
{
|
||||||
return info.Env().Undefined();
|
return info.Env().Undefined();
|
||||||
}
|
}
|
||||||
return Napi::String::New(info.Env(), type);
|
return Napi::String::New(info.Env(), model_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
Napi::Value NodeModelWrapper::InitGpuByString(const Napi::CallbackInfo &info)
|
|
||||||
{
|
|
||||||
auto env = info.Env();
|
|
||||||
size_t memory_required = static_cast<size_t>(info[0].As<Napi::Number>().Uint32Value());
|
|
||||||
|
|
||||||
std::string gpu_device_identifier = info[1].As<Napi::String>();
|
|
||||||
|
|
||||||
size_t converted_value;
|
|
||||||
if (memory_required <= std::numeric_limits<size_t>::max())
|
|
||||||
{
|
|
||||||
converted_value = static_cast<size_t>(memory_required);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
Napi::Error::New(env, "invalid number for memory size. Exceeded bounds for memory.")
|
|
||||||
.ThrowAsJavaScriptException();
|
|
||||||
return env.Undefined();
|
|
||||||
}
|
|
||||||
|
|
||||||
auto result = llmodel_gpu_init_gpu_device_by_string(GetInference(), converted_value, gpu_device_identifier.c_str());
|
|
||||||
return Napi::Boolean::New(env, result);
|
|
||||||
}
|
|
||||||
Napi::Value NodeModelWrapper::HasGpuDevice(const Napi::CallbackInfo &info)
|
Napi::Value NodeModelWrapper::HasGpuDevice(const Napi::CallbackInfo &info)
|
||||||
{
|
{
|
||||||
return Napi::Boolean::New(info.Env(), llmodel_has_gpu_device(GetInference()));
|
return Napi::Boolean::New(info.Env(), llmodel_has_gpu_device(GetInference()));
|
||||||
@ -110,82 +90,61 @@ NodeModelWrapper::NodeModelWrapper(const Napi::CallbackInfo &info) : Napi::Objec
|
|||||||
auto env = info.Env();
|
auto env = info.Env();
|
||||||
auto config_object = info[0].As<Napi::Object>();
|
auto config_object = info[0].As<Napi::Object>();
|
||||||
|
|
||||||
// sets the directory where models (gguf files) are to be searched
|
// sets the directories where runtime libs are to be searched
|
||||||
llmodel_set_implementation_search_path(
|
llmodel_set_implementation_search_path(config_object.Has("librariesPath")
|
||||||
config_object.Has("library_path") ? config_object.Get("library_path").As<Napi::String>().Utf8Value().c_str()
|
? config_object.Get("librariesPath").As<Napi::String>().Utf8Value().c_str()
|
||||||
: ".");
|
: ".");
|
||||||
|
|
||||||
std::string model_name = config_object.Get("model_name").As<Napi::String>();
|
model_file = config_object.Get("modelFile").As<Napi::String>().Utf8Value();
|
||||||
fs::path model_path = config_object.Get("model_path").As<Napi::String>().Utf8Value();
|
model_name = model_file.substr(model_file.find_last_of("/\\") + 1);
|
||||||
std::string full_weight_path = (model_path / fs::path(model_name)).string();
|
backend = config_object.Get("backend").As<Napi::String>().Utf8Value();
|
||||||
|
n_ctx = config_object.Get("nCtx").As<Napi::Number>().Int32Value();
|
||||||
|
n_gpu_layers = config_object.Get("nGpuLayers").As<Napi::Number>().Int32Value();
|
||||||
|
|
||||||
name = model_name.empty() ? model_path.filename().string() : model_name;
|
const char *err;
|
||||||
full_model_path = full_weight_path;
|
inference_ = llmodel_model_create2(model_file.c_str(), backend.c_str(), &err);
|
||||||
nCtx = config_object.Get("nCtx").As<Napi::Number>().Int32Value();
|
|
||||||
nGpuLayers = config_object.Get("ngl").As<Napi::Number>().Int32Value();
|
|
||||||
|
|
||||||
const char *e;
|
|
||||||
inference_ = llmodel_model_create2(full_weight_path.c_str(), "auto", &e);
|
|
||||||
if (!inference_)
|
if (!inference_)
|
||||||
{
|
{
|
||||||
Napi::Error::New(env, e).ThrowAsJavaScriptException();
|
Napi::Error::New(env, err).ThrowAsJavaScriptException();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (GetInference() == nullptr)
|
if (GetInference() == nullptr)
|
||||||
{
|
{
|
||||||
std::cerr << "Tried searching libraries in \"" << llmodel_get_implementation_search_path() << "\"" << std::endl;
|
std::cerr << "Tried searching libraries in \"" << llmodel_get_implementation_search_path() << "\"" << std::endl;
|
||||||
std::cerr << "Tried searching for model weight in \"" << full_weight_path << "\"" << std::endl;
|
std::cerr << "Tried using model weights in \"" << model_file << "\"" << std::endl;
|
||||||
std::cerr << "Do you have runtime libraries installed?" << std::endl;
|
std::cerr << "Do you have runtime libraries installed?" << std::endl;
|
||||||
Napi::Error::New(env, "Had an issue creating llmodel object, inference is null").ThrowAsJavaScriptException();
|
Napi::Error::New(env, "Had an issue creating llmodel object, inference is null").ThrowAsJavaScriptException();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string device = config_object.Get("device").As<Napi::String>();
|
|
||||||
if (device != "cpu")
|
|
||||||
{
|
|
||||||
size_t mem = llmodel_required_mem(GetInference(), full_weight_path.c_str(), nCtx, nGpuLayers);
|
|
||||||
|
|
||||||
auto success = llmodel_gpu_init_gpu_device_by_string(GetInference(), mem, device.c_str());
|
|
||||||
if (!success)
|
|
||||||
{
|
|
||||||
// https://github.com/nomic-ai/gpt4all/blob/3acbef14b7c2436fe033cae9036e695d77461a16/gpt4all-bindings/python/gpt4all/pyllmodel.py#L215
|
|
||||||
// Haven't implemented this but it is still open to contribution
|
|
||||||
std::cout << "WARNING: Failed to init GPU\n";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
auto success = llmodel_loadModel(GetInference(), full_weight_path.c_str(), nCtx, nGpuLayers);
|
|
||||||
if (!success)
|
|
||||||
{
|
|
||||||
Napi::Error::New(env, "Failed to load model at given path").ThrowAsJavaScriptException();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// optional
|
// optional
|
||||||
if (config_object.Has("model_type"))
|
if (config_object.Has("modelType"))
|
||||||
{
|
{
|
||||||
type = config_object.Get("model_type").As<Napi::String>();
|
model_type = config_object.Get("modelType").As<Napi::String>();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// NodeModelWrapper::~NodeModelWrapper() {
|
Napi::Value NodeModelWrapper::Load(const Napi::CallbackInfo &info)
|
||||||
// if(GetInference() != nullptr) {
|
{
|
||||||
// std::cout << "Debug: deleting model\n";
|
auto env = info.Env();
|
||||||
// llmodel_model_destroy(inference_);
|
auto success = llmodel_loadModel(GetInference(), model_file.c_str(), n_ctx, n_gpu_layers);
|
||||||
// std::cout << (inference_ == nullptr);
|
return Napi::Boolean::New(env, success);
|
||||||
// }
|
}
|
||||||
// }
|
|
||||||
// void NodeModelWrapper::Finalize(Napi::Env env) {
|
Napi::Value NodeModelWrapper::InitGpu(const Napi::CallbackInfo &info)
|
||||||
// if(inference_ != nullptr) {
|
{
|
||||||
// std::cout << "Debug: deleting model\n";
|
auto env = info.Env();
|
||||||
//
|
auto device = info[0].As<Napi::String>().Utf8Value();
|
||||||
// }
|
size_t mem_required = llmodel_required_mem(GetInference(), model_file.c_str(), n_ctx, n_gpu_layers);
|
||||||
// }
|
auto success = llmodel_gpu_init_gpu_device_by_string(GetInference(), mem_required, device.c_str());
|
||||||
|
return Napi::Boolean::New(env, success);
|
||||||
|
}
|
||||||
|
|
||||||
Napi::Value NodeModelWrapper::IsModelLoaded(const Napi::CallbackInfo &info)
|
Napi::Value NodeModelWrapper::IsModelLoaded(const Napi::CallbackInfo &info)
|
||||||
{
|
{
|
||||||
return Napi::Boolean::New(info.Env(), llmodel_isModelLoaded(GetInference()));
|
return Napi::Boolean::New(info.Env(), llmodel_isModelLoaded(GetInference()));
|
||||||
}
|
}
|
||||||
|
|
||||||
Napi::Value NodeModelWrapper::StateSize(const Napi::CallbackInfo &info)
|
Napi::Value NodeModelWrapper::GetStateSize(const Napi::CallbackInfo &info)
|
||||||
{
|
{
|
||||||
// Implement the binding for the stateSize method
|
// Implement the binding for the stateSize method
|
||||||
return Napi::Number::New(info.Env(), static_cast<int64_t>(llmodel_get_state_size(GetInference())));
|
return Napi::Number::New(info.Env(), static_cast<int64_t>(llmodel_get_state_size(GetInference())));
|
||||||
@ -220,7 +179,7 @@ Napi::Array ChunkedFloatPtr(float *embedding_ptr, int embedding_size, int text_l
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
Napi::Value NodeModelWrapper::GenerateEmbedding(const Napi::CallbackInfo &info)
|
Napi::Value NodeModelWrapper::Embed(const Napi::CallbackInfo &info)
|
||||||
{
|
{
|
||||||
auto env = info.Env();
|
auto env = info.Env();
|
||||||
|
|
||||||
@ -256,7 +215,7 @@ Napi::Value NodeModelWrapper::GenerateEmbedding(const Napi::CallbackInfo &info)
|
|||||||
str_ptrs.push_back(text_arr[i].c_str());
|
str_ptrs.push_back(text_arr[i].c_str());
|
||||||
str_ptrs.push_back(nullptr);
|
str_ptrs.push_back(nullptr);
|
||||||
const char *_err = nullptr;
|
const char *_err = nullptr;
|
||||||
float *embeds = llmodel_embed(GetInference(), str_ptrs.data(), &embedding_size,
|
float *embeds = llmodel_embed(GetInference(), str_ptrs.data(), &embedding_size,
|
||||||
prefix.IsUndefined() ? nullptr : prefix.As<Napi::String>().Utf8Value().c_str(),
|
prefix.IsUndefined() ? nullptr : prefix.As<Napi::String>().Utf8Value().c_str(),
|
||||||
dimensionality, &token_count, do_mean, atlas, nullptr, &_err);
|
dimensionality, &token_count, do_mean, atlas, nullptr, &_err);
|
||||||
if (!embeds)
|
if (!embeds)
|
||||||
@ -271,9 +230,12 @@ Napi::Value NodeModelWrapper::GenerateEmbedding(const Napi::CallbackInfo &info)
|
|||||||
llmodel_free_embedding(embeds);
|
llmodel_free_embedding(embeds);
|
||||||
auto res = Napi::Object::New(env);
|
auto res = Napi::Object::New(env);
|
||||||
res.Set("n_prompt_tokens", token_count);
|
res.Set("n_prompt_tokens", token_count);
|
||||||
if(is_single_text) {
|
if (is_single_text)
|
||||||
|
{
|
||||||
res.Set("embeddings", embedmat.Get(static_cast<uint32_t>(0)));
|
res.Set("embeddings", embedmat.Get(static_cast<uint32_t>(0)));
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
res.Set("embeddings", embedmat);
|
res.Set("embeddings", embedmat);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -308,7 +270,7 @@ Napi::Value NodeModelWrapper::Infer(const Napi::CallbackInfo &info)
|
|||||||
llmodel_prompt_context promptContext = {.logits = nullptr,
|
llmodel_prompt_context promptContext = {.logits = nullptr,
|
||||||
.tokens = nullptr,
|
.tokens = nullptr,
|
||||||
.n_past = 0,
|
.n_past = 0,
|
||||||
.n_ctx = nCtx,
|
.n_ctx = n_ctx,
|
||||||
.n_predict = 4096,
|
.n_predict = 4096,
|
||||||
.top_k = 40,
|
.top_k = 40,
|
||||||
.top_p = 0.9f,
|
.top_p = 0.9f,
|
||||||
@ -323,6 +285,12 @@ Napi::Value NodeModelWrapper::Infer(const Napi::CallbackInfo &info)
|
|||||||
|
|
||||||
auto inputObject = info[1].As<Napi::Object>();
|
auto inputObject = info[1].As<Napi::Object>();
|
||||||
|
|
||||||
|
if (!inputObject.Has("promptTemplate"))
|
||||||
|
{
|
||||||
|
Napi::Error::New(info.Env(), "Missing Prompt Template").ThrowAsJavaScriptException();
|
||||||
|
return info.Env().Undefined();
|
||||||
|
}
|
||||||
|
|
||||||
if (inputObject.Has("logits") || inputObject.Has("tokens"))
|
if (inputObject.Has("logits") || inputObject.Has("tokens"))
|
||||||
{
|
{
|
||||||
Napi::Error::New(info.Env(), "Invalid input: 'logits' or 'tokens' properties are not allowed")
|
Napi::Error::New(info.Env(), "Invalid input: 'logits' or 'tokens' properties are not allowed")
|
||||||
@ -425,9 +393,9 @@ void NodeModelWrapper::SetThreadCount(const Napi::CallbackInfo &info)
|
|||||||
|
|
||||||
Napi::Value NodeModelWrapper::GetName(const Napi::CallbackInfo &info)
|
Napi::Value NodeModelWrapper::GetName(const Napi::CallbackInfo &info)
|
||||||
{
|
{
|
||||||
return Napi::String::New(info.Env(), name);
|
return Napi::String::New(info.Env(), model_name);
|
||||||
}
|
}
|
||||||
Napi::Value NodeModelWrapper::ThreadCount(const Napi::CallbackInfo &info)
|
Napi::Value NodeModelWrapper::GetThreadCount(const Napi::CallbackInfo &info)
|
||||||
{
|
{
|
||||||
return Napi::Number::New(info.Env(), llmodel_threadCount(GetInference()));
|
return Napi::Number::New(info.Env(), llmodel_threadCount(GetInference()));
|
||||||
}
|
}
|
||||||
|
@ -16,30 +16,28 @@ class NodeModelWrapper : public Napi::ObjectWrap<NodeModelWrapper>
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
NodeModelWrapper(const Napi::CallbackInfo &);
|
NodeModelWrapper(const Napi::CallbackInfo &);
|
||||||
// virtual ~NodeModelWrapper();
|
Napi::Value Load(const Napi::CallbackInfo &info);
|
||||||
Napi::Value GetType(const Napi::CallbackInfo &info);
|
Napi::Value InitGpu(const Napi::CallbackInfo &info);
|
||||||
Napi::Value IsModelLoaded(const Napi::CallbackInfo &info);
|
|
||||||
Napi::Value StateSize(const Napi::CallbackInfo &info);
|
|
||||||
// void Finalize(Napi::Env env) override;
|
|
||||||
/**
|
/**
|
||||||
* Prompting the model. This entails spawning a new thread and adding the response tokens
|
* Prompting the model. This entails spawning a new thread and adding the response tokens
|
||||||
* into a thread local string variable.
|
* into a thread local string variable.
|
||||||
*/
|
*/
|
||||||
Napi::Value Infer(const Napi::CallbackInfo &info);
|
Napi::Value Infer(const Napi::CallbackInfo &info);
|
||||||
void SetThreadCount(const Napi::CallbackInfo &info);
|
Napi::Value Embed(const Napi::CallbackInfo &info);
|
||||||
void Dispose(const Napi::CallbackInfo &info);
|
Napi::Value IsModelLoaded(const Napi::CallbackInfo &info);
|
||||||
|
Napi::Value GetType(const Napi::CallbackInfo &info);
|
||||||
Napi::Value GetName(const Napi::CallbackInfo &info);
|
Napi::Value GetName(const Napi::CallbackInfo &info);
|
||||||
Napi::Value ThreadCount(const Napi::CallbackInfo &info);
|
Napi::Value GetStateSize(const Napi::CallbackInfo &info);
|
||||||
Napi::Value GenerateEmbedding(const Napi::CallbackInfo &info);
|
void SetThreadCount(const Napi::CallbackInfo &info);
|
||||||
Napi::Value HasGpuDevice(const Napi::CallbackInfo &info);
|
Napi::Value GetThreadCount(const Napi::CallbackInfo &info);
|
||||||
Napi::Value ListGpus(const Napi::CallbackInfo &info);
|
|
||||||
Napi::Value InitGpuByString(const Napi::CallbackInfo &info);
|
|
||||||
Napi::Value GetRequiredMemory(const Napi::CallbackInfo &info);
|
|
||||||
Napi::Value GetGpuDevices(const Napi::CallbackInfo &info);
|
|
||||||
/*
|
/*
|
||||||
* The path that is used to search for the dynamic libraries
|
* The path that is used to search for the dynamic libraries
|
||||||
*/
|
*/
|
||||||
Napi::Value GetLibraryPath(const Napi::CallbackInfo &info);
|
Napi::Value GetLibraryPath(const Napi::CallbackInfo &info);
|
||||||
|
Napi::Value HasGpuDevice(const Napi::CallbackInfo &info);
|
||||||
|
Napi::Value GetGpuDevices(const Napi::CallbackInfo &info);
|
||||||
|
Napi::Value GetRequiredMemory(const Napi::CallbackInfo &info);
|
||||||
|
void Dispose(const Napi::CallbackInfo &info);
|
||||||
/**
|
/**
|
||||||
* Creates the LLModel class
|
* Creates the LLModel class
|
||||||
*/
|
*/
|
||||||
@ -54,10 +52,10 @@ class NodeModelWrapper : public Napi::ObjectWrap<NodeModelWrapper>
|
|||||||
|
|
||||||
std::mutex inference_mutex;
|
std::mutex inference_mutex;
|
||||||
|
|
||||||
std::string type;
|
std::string model_type;
|
||||||
// corresponds to LLModel::name() in typescript
|
std::string model_name;
|
||||||
std::string name;
|
std::string model_file;
|
||||||
int nCtx{};
|
std::string backend;
|
||||||
int nGpuLayers{};
|
int n_ctx{};
|
||||||
std::string full_model_path;
|
int n_gpu_layers{};
|
||||||
};
|
};
|
||||||
|
@ -5,32 +5,38 @@
|
|||||||
"main": "src/gpt4all.js",
|
"main": "src/gpt4all.js",
|
||||||
"repository": "nomic-ai/gpt4all",
|
"repository": "nomic-ai/gpt4all",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"install": "node-gyp-build",
|
"install": "node ./scripts/assert-backend-sources.js && node-gyp-build",
|
||||||
|
"test:ci": "jest test/ci.test.js",
|
||||||
"test": "jest",
|
"test": "jest",
|
||||||
"build:backend": "node scripts/build.js",
|
"clean": "rimraf build runtimes prebuilds backend",
|
||||||
"build": "node-gyp-build",
|
"prebuild": "npm run clean",
|
||||||
|
"build": "npm run build:runtimes && npm run build:prebuilds",
|
||||||
|
"build:runtimes": "node scripts/build.js",
|
||||||
|
"build:prebuilds": "node scripts/assert-backend-sources.js && node scripts/prebuild.js",
|
||||||
"docs:build": "node scripts/docs.js && documentation readme ./src/gpt4all.d.ts --parse-extension js d.ts --format md --section \"API Reference\" --readme-file ../python/docs/gpt4all_nodejs.md"
|
"docs:build": "node scripts/docs.js && documentation readme ./src/gpt4all.d.ts --parse-extension js d.ts --format md --section \"API Reference\" --readme-file ../python/docs/gpt4all_nodejs.md"
|
||||||
},
|
},
|
||||||
"files": [
|
"files": [
|
||||||
|
"binding.gyp",
|
||||||
"src/**/*",
|
"src/**/*",
|
||||||
"runtimes/**/*",
|
"runtimes/**/*",
|
||||||
"binding.gyp",
|
|
||||||
"prebuilds/**/*",
|
"prebuilds/**/*",
|
||||||
|
"backend/**/*",
|
||||||
|
"scripts/assert-backend-sources.js",
|
||||||
"*.h",
|
"*.h",
|
||||||
"*.cc",
|
"*.cc"
|
||||||
"gpt4all-backend/**/*"
|
|
||||||
],
|
],
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"md5-file": "^5.0.0",
|
"md5-file": "^5.0.0",
|
||||||
"node-addon-api": "^6.1.0",
|
"node-addon-api": "^8.0.0",
|
||||||
"node-gyp-build": "^4.6.0"
|
"node-gyp-build": "~4.8.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@types/node": "^20.1.5",
|
"@types/node": "^20.12.12",
|
||||||
"documentation": "^14.0.2",
|
"documentation": "^14.0.2",
|
||||||
"jest": "^29.5.0",
|
"jest": "^29.7.0",
|
||||||
"prebuildify": "^5.0.1",
|
"prebuildify": "^6.0.1",
|
||||||
"prettier": "^2.8.8"
|
"prettier": "^3.2.5",
|
||||||
|
"rimraf": "^5.0.7"
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"node-gyp": "9.x.x"
|
"node-gyp": "9.x.x"
|
||||||
|
@ -131,7 +131,8 @@ bool PromptWorker::ResponseCallback(int32_t token_id, const std::string token)
|
|||||||
// Transform native data into JS data, passing it to the provided
|
// Transform native data into JS data, passing it to the provided
|
||||||
// `jsCallback` -- the TSFN's JavaScript function.
|
// `jsCallback` -- the TSFN's JavaScript function.
|
||||||
auto token_id = Napi::Number::New(env, value->tokenId);
|
auto token_id = Napi::Number::New(env, value->tokenId);
|
||||||
auto token = Napi::String::New(env, value->token);
|
auto token = Napi::Uint8Array::New(env, value->token.size());
|
||||||
|
memcpy(token.Data(), value->token.data(), value->token.size());
|
||||||
auto jsResult = jsCallback.Call({token_id, token}).ToBoolean();
|
auto jsResult = jsCallback.Call({token_id, token}).ToBoolean();
|
||||||
promise.set_value(jsResult);
|
promise.set_value(jsResult);
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,47 @@
|
|||||||
|
const fs = require("fs");
|
||||||
|
const path = require("path");
|
||||||
|
|
||||||
|
// Copies the shared llmodel sources from gpt4all-backend into the backend folder.
|
||||||
|
// These are dependencies of the bindings and will be required in case node-gyp-build
|
||||||
|
// cannot find a prebuild. This script is used in the package install hook and will
|
||||||
|
// be executed BOTH when `yarn install` is run in the root folder AND when the package
|
||||||
|
// is installed as a dependency in another project.
|
||||||
|
|
||||||
|
const backendDeps = [
|
||||||
|
"llmodel.h",
|
||||||
|
"llmodel.cpp",
|
||||||
|
"llmodel_c.cpp",
|
||||||
|
"llmodel_c.h",
|
||||||
|
"sysinfo.h",
|
||||||
|
"dlhandle.h",
|
||||||
|
"dlhandle.cpp",
|
||||||
|
];
|
||||||
|
|
||||||
|
const sourcePath = path.resolve(__dirname, "../../../gpt4all-backend");
|
||||||
|
const destPath = path.resolve(__dirname, "../backend");
|
||||||
|
|
||||||
|
// Silently ignore if the backend sources are not available.
|
||||||
|
// When the package is installed as a dependency, gpt4all-backend will not be present.
|
||||||
|
if (fs.existsSync(sourcePath)) {
|
||||||
|
if (!fs.existsSync(destPath)) {
|
||||||
|
fs.mkdirSync(destPath);
|
||||||
|
}
|
||||||
|
for (const file of backendDeps) {
|
||||||
|
const sourceFile = path.join(sourcePath, file);
|
||||||
|
const destFile = path.join(destPath, file);
|
||||||
|
if (fs.existsSync(sourceFile)) {
|
||||||
|
console.info(`Copying ${sourceFile} to ${destFile}`);
|
||||||
|
fs.copyFileSync(sourceFile, destFile); // overwrite
|
||||||
|
} else {
|
||||||
|
throw new Error(`File ${sourceFile} does not exist`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// assert that the backend sources are present
|
||||||
|
for (const file of backendDeps) {
|
||||||
|
const destFile = path.join(destPath, file);
|
||||||
|
if (!fs.existsSync(destFile)) {
|
||||||
|
throw new Error(`File ${destFile} does not exist`);
|
||||||
|
}
|
||||||
|
}
|
@ -1,12 +1,42 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
# Build script for Unix-like systems (Linux, macOS).
|
||||||
|
# Script assumes the current working directory is the bindings project root.
|
||||||
|
|
||||||
SYSNAME=$(uname -s)
|
SYSNAME=$(uname -s)
|
||||||
|
PLATFORM=$(uname -m)
|
||||||
|
|
||||||
|
# Allows overriding target sysname and platform via args
|
||||||
|
# If not provided, the current system's sysname and platform will be used
|
||||||
|
|
||||||
|
while [ $# -gt 0 ]; do
|
||||||
|
case "$1" in
|
||||||
|
--sysname=*)
|
||||||
|
SYSNAME="${1#*=}"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--platform=*)
|
||||||
|
PLATFORM="${1#*=}"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Unknown argument: $1" >&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
if [ "$SYSNAME" = "Linux" ]; then
|
if [ "$SYSNAME" = "Linux" ]; then
|
||||||
BASE_DIR="runtimes/linux-x64"
|
if [ "$PLATFORM" = "x86_64" ]; then
|
||||||
|
BASE_DIR="runtimes/linux-x64"
|
||||||
|
elif [ "$PLATFORM" = "aarch64" ]; then
|
||||||
|
BASE_DIR="runtimes/linux-arm64"
|
||||||
|
else
|
||||||
|
echo "Unsupported platform: $PLATFORM" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
LIB_EXT="so"
|
LIB_EXT="so"
|
||||||
elif [ "$SYSNAME" = "Darwin" ]; then
|
elif [ "$SYSNAME" = "Darwin" ]; then
|
||||||
BASE_DIR="runtimes/osx"
|
BASE_DIR="runtimes/darwin"
|
||||||
LIB_EXT="dylib"
|
LIB_EXT="dylib"
|
||||||
elif [ -n "$SYSNAME" ]; then
|
elif [ -n "$SYSNAME" ]; then
|
||||||
echo "Unsupported system: $SYSNAME" >&2
|
echo "Unsupported system: $SYSNAME" >&2
|
||||||
@ -22,8 +52,24 @@ BUILD_DIR="$BASE_DIR/build"
|
|||||||
rm -rf "$BASE_DIR"
|
rm -rf "$BASE_DIR"
|
||||||
mkdir -p "$NATIVE_DIR" "$BUILD_DIR"
|
mkdir -p "$NATIVE_DIR" "$BUILD_DIR"
|
||||||
|
|
||||||
cmake -S ../../gpt4all-backend -B "$BUILD_DIR" &&
|
if [ "$PLATFORM" = "x86_64" ]; then
|
||||||
cmake --build "$BUILD_DIR" -j --config Release && {
|
echo "Building for x86_64"
|
||||||
|
cmake -S ../../gpt4all-backend -B "$BUILD_DIR" -DCMAKE_BUILD_TYPE=RelWithDebInfo
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$PLATFORM" = "aarch64" ]; then
|
||||||
|
if [ "$(uname -m)" != "aarch64" ]; then
|
||||||
|
echo "Cross-compiling for aarch64"
|
||||||
|
cmake -S ../../gpt4all-backend \
|
||||||
|
-B "$BUILD_DIR" \
|
||||||
|
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
|
||||||
|
-DCMAKE_TOOLCHAIN_FILE="./toolchains/linux-arm64-toolchain.cmake"
|
||||||
|
else
|
||||||
|
cmake -S ../../gpt4all-backend -B "$BUILD_DIR" -DCMAKE_BUILD_TYPE=RelWithDebInfo
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
cmake --build "$BUILD_DIR" --parallel && {
|
||||||
cp "$BUILD_DIR"/libgptj*.$LIB_EXT "$NATIVE_DIR"/
|
cp "$BUILD_DIR"/libgptj*.$LIB_EXT "$NATIVE_DIR"/
|
||||||
cp "$BUILD_DIR"/libllama*.$LIB_EXT "$NATIVE_DIR"/
|
cp "$BUILD_DIR"/libllama*.$LIB_EXT "$NATIVE_DIR"/
|
||||||
}
|
}
|
@ -1,22 +1,21 @@
|
|||||||
const prebuildify = require("prebuildify");
|
const prebuildify = require("prebuildify");
|
||||||
|
|
||||||
async function createPrebuilds(combinations) {
|
async function createPrebuilds(configs) {
|
||||||
for (const { platform, arch } of combinations) {
|
for (const config of configs) {
|
||||||
const opts = {
|
const opts = {
|
||||||
platform,
|
|
||||||
arch,
|
|
||||||
napi: true,
|
napi: true,
|
||||||
targets: ["18.16.0"]
|
targets: ["18.16.0"],
|
||||||
|
...config,
|
||||||
};
|
};
|
||||||
try {
|
try {
|
||||||
await createPrebuild(opts);
|
await createPrebuild(opts);
|
||||||
console.log(
|
console.log(
|
||||||
`Build succeeded for platform ${opts.platform} and architecture ${opts.arch}`
|
`Build succeeded for platform ${opts.platform} and architecture ${opts.arch}`,
|
||||||
);
|
);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error(
|
console.error(
|
||||||
`Error building for platform ${opts.platform} and architecture ${opts.arch}:`,
|
`Error building for platform ${opts.platform} and architecture ${opts.arch}:`,
|
||||||
err
|
err,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -24,6 +23,17 @@ async function createPrebuilds(combinations) {
|
|||||||
|
|
||||||
function createPrebuild(opts) {
|
function createPrebuild(opts) {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
|
// if this prebuild is cross-compiling for arm64 on a non-arm64 machine,
|
||||||
|
// set the CXX and CC environment variables to the cross-compilers
|
||||||
|
if (
|
||||||
|
opts.arch === "arm64" &&
|
||||||
|
process.arch !== "arm64" &&
|
||||||
|
process.platform === "linux"
|
||||||
|
) {
|
||||||
|
process.env.CXX = "aarch64-linux-gnu-g++-12";
|
||||||
|
process.env.CC = "aarch64-linux-gnu-gcc-12";
|
||||||
|
}
|
||||||
|
|
||||||
prebuildify(opts, (err) => {
|
prebuildify(opts, (err) => {
|
||||||
if (err) {
|
if (err) {
|
||||||
reject(err);
|
reject(err);
|
||||||
@ -35,22 +45,18 @@ function createPrebuild(opts) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let prebuildConfigs;
|
let prebuildConfigs;
|
||||||
if(process.platform === 'win32') {
|
if (process.platform === "win32") {
|
||||||
prebuildConfigs = [
|
prebuildConfigs = [{ platform: "win32", arch: "x64" }];
|
||||||
{ platform: "win32", arch: "x64" }
|
} else if (process.platform === "linux") {
|
||||||
];
|
|
||||||
} else if(process.platform === 'linux') {
|
|
||||||
//Unsure if darwin works, need mac tester!
|
|
||||||
prebuildConfigs = [
|
|
||||||
{ platform: "linux", arch: "x64" },
|
|
||||||
//{ platform: "linux", arch: "arm64" },
|
|
||||||
//{ platform: "linux", arch: "armv7" },
|
|
||||||
]
|
|
||||||
} else if(process.platform === 'darwin') {
|
|
||||||
prebuildConfigs = [
|
prebuildConfigs = [
|
||||||
{ platform: "darwin", arch: "x64" },
|
{ platform: "linux", arch: "x64" },
|
||||||
{ platform: "darwin", arch: "arm64" },
|
{ platform: "linux", arch: "arm64" },
|
||||||
]
|
];
|
||||||
|
} else if (process.platform === "darwin") {
|
||||||
|
prebuildConfigs = [
|
||||||
|
{ platform: "darwin", arch: "x64" },
|
||||||
|
{ platform: "darwin", arch: "arm64" },
|
||||||
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
createPrebuilds(prebuildConfigs)
|
createPrebuilds(prebuildConfigs)
|
||||||
|
@ -2,7 +2,6 @@ import { loadModel, createCompletion } from "../src/gpt4all.js";
|
|||||||
|
|
||||||
const model = await loadModel("orca-mini-3b-gguf2-q4_0.gguf", {
|
const model = await loadModel("orca-mini-3b-gguf2-q4_0.gguf", {
|
||||||
verbose: true,
|
verbose: true,
|
||||||
device: "gpu",
|
|
||||||
});
|
});
|
||||||
|
|
||||||
const chat = await model.createChatSession();
|
const chat = await model.createChatSession();
|
||||||
@ -12,8 +11,6 @@ await createCompletion(
|
|||||||
"Why are bananas rather blue than bread at night sometimes?",
|
"Why are bananas rather blue than bread at night sometimes?",
|
||||||
{
|
{
|
||||||
verbose: true,
|
verbose: true,
|
||||||
|
nPredict: 10,
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
await createCompletion(chat, "Are you sure?", {
|
|
||||||
verbose: true,
|
|
||||||
});
|
|
||||||
|
@ -7,12 +7,12 @@ const modelOptions = {
|
|||||||
verbose: true,
|
verbose: true,
|
||||||
};
|
};
|
||||||
|
|
||||||
const model1 = await loadModel("orca-mini-3b-gguf2-q4_0.gguf", {
|
const model1 = await loadModel("Phi-3-mini-4k-instruct.Q4_0.gguf", {
|
||||||
...modelOptions,
|
...modelOptions,
|
||||||
device: "gpu", // only one model can be on gpu
|
device: "gpu", // only one model can be on gpu
|
||||||
});
|
});
|
||||||
const model2 = await loadModel("orca-mini-3b-gguf2-q4_0.gguf", modelOptions);
|
const model2 = await loadModel("Phi-3-mini-4k-instruct.Q4_0.gguf", modelOptions);
|
||||||
const model3 = await loadModel("orca-mini-3b-gguf2-q4_0.gguf", modelOptions);
|
const model3 = await loadModel("Phi-3-mini-4k-instruct.Q4_0.gguf", modelOptions);
|
||||||
|
|
||||||
const promptContext = {
|
const promptContext = {
|
||||||
verbose: true,
|
verbose: true,
|
||||||
@ -27,3 +27,6 @@ const responses = await Promise.all([
|
|||||||
createCompletion(model3, "What is 1 + 3?", promptContext),
|
createCompletion(model3, "What is 1 + 3?", promptContext),
|
||||||
]);
|
]);
|
||||||
console.log(responses.map((res) => res.choices[0].message));
|
console.log(responses.map((res) => res.choices[0].message));
|
||||||
|
model1.dispose();
|
||||||
|
model2.dispose();
|
||||||
|
model3.dispose();
|
@ -1,61 +0,0 @@
|
|||||||
import {
|
|
||||||
LLModel,
|
|
||||||
createCompletion,
|
|
||||||
DEFAULT_DIRECTORY,
|
|
||||||
DEFAULT_LIBRARIES_DIRECTORY,
|
|
||||||
loadModel,
|
|
||||||
} from "../src/gpt4all.js";
|
|
||||||
|
|
||||||
const model = await loadModel("mistral-7b-openorca.gguf2.Q4_0.gguf", {
|
|
||||||
verbose: true,
|
|
||||||
device: "gpu",
|
|
||||||
});
|
|
||||||
const ll = model.llm;
|
|
||||||
|
|
||||||
try {
|
|
||||||
class Extended extends LLModel {}
|
|
||||||
} catch (e) {
|
|
||||||
console.log("Extending from native class gone wrong " + e);
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log("state size " + ll.stateSize());
|
|
||||||
|
|
||||||
console.log("thread count " + ll.threadCount());
|
|
||||||
ll.setThreadCount(5);
|
|
||||||
|
|
||||||
console.log("thread count " + ll.threadCount());
|
|
||||||
ll.setThreadCount(4);
|
|
||||||
console.log("thread count " + ll.threadCount());
|
|
||||||
console.log("name " + ll.name());
|
|
||||||
console.log("type: " + ll.type());
|
|
||||||
console.log("Default directory for models", DEFAULT_DIRECTORY);
|
|
||||||
console.log("Default directory for libraries", DEFAULT_LIBRARIES_DIRECTORY);
|
|
||||||
console.log("Has GPU", ll.hasGpuDevice());
|
|
||||||
console.log("gpu devices", ll.listGpu());
|
|
||||||
console.log("Required Mem in bytes", ll.memoryNeeded());
|
|
||||||
|
|
||||||
// to ingest a custom system prompt without using a chat session.
|
|
||||||
await createCompletion(
|
|
||||||
model,
|
|
||||||
"<|im_start|>system\nYou are an advanced mathematician.\n<|im_end|>\n",
|
|
||||||
{
|
|
||||||
promptTemplate: "%1",
|
|
||||||
nPredict: 0,
|
|
||||||
special: true,
|
|
||||||
}
|
|
||||||
);
|
|
||||||
const completion1 = await createCompletion(model, "What is 1 + 1?", {
|
|
||||||
verbose: true,
|
|
||||||
});
|
|
||||||
console.log(`🤖 > ${completion1.choices[0].message.content}`);
|
|
||||||
//Very specific:
|
|
||||||
// tested on Ubuntu 22.0, Linux Mint, if I set nPast to 100, the app hangs.
|
|
||||||
const completion2 = await createCompletion(model, "And if we add two?", {
|
|
||||||
verbose: true,
|
|
||||||
});
|
|
||||||
console.log(`🤖 > ${completion2.choices[0].message.content}`);
|
|
||||||
|
|
||||||
//CALLING DISPOSE WILL INVALID THE NATIVE MODEL. USE THIS TO CLEANUP
|
|
||||||
model.dispose();
|
|
||||||
|
|
||||||
console.log("model disposed, exiting...");
|
|
@ -1,7 +1,6 @@
|
|||||||
import { promises as fs } from "node:fs";
|
|
||||||
import { loadModel, createCompletion } from "../src/gpt4all.js";
|
import { loadModel, createCompletion } from "../src/gpt4all.js";
|
||||||
|
|
||||||
const model = await loadModel("Nous-Hermes-2-Mistral-7B-DPO.Q4_0.gguf", {
|
const model = await loadModel("Phi-3-mini-4k-instruct.Q4_0.gguf", {
|
||||||
verbose: true,
|
verbose: true,
|
||||||
device: "gpu",
|
device: "gpu",
|
||||||
});
|
});
|
||||||
@ -12,14 +11,15 @@ const res = await createCompletion(
|
|||||||
{
|
{
|
||||||
onPromptToken: (tokenId) => {
|
onPromptToken: (tokenId) => {
|
||||||
console.debug("onPromptToken", { tokenId });
|
console.debug("onPromptToken", { tokenId });
|
||||||
// throwing an error will cancel
|
// errors within the callback will cancel ingestion, inference will still run
|
||||||
throw new Error("This is an error");
|
throw new Error("This is an error");
|
||||||
// const foo = thisMethodDoesNotExist();
|
// const foo = thisMethodDoesNotExist();
|
||||||
// returning false will cancel as well
|
// returning false will cancel as well
|
||||||
// return false;
|
// return false;
|
||||||
},
|
},
|
||||||
onResponseToken: (tokenId, token) => {
|
onResponseTokens: ({ tokenIds, text }) => {
|
||||||
console.debug("onResponseToken", { tokenId, token });
|
// console.debug("onResponseToken", { tokenIds, text });
|
||||||
|
process.stdout.write(text);
|
||||||
// same applies here
|
// same applies here
|
||||||
},
|
},
|
||||||
}
|
}
|
37
gpt4all-bindings/typescript/spec/token-streaming-emoji.mjs
Normal file
37
gpt4all-bindings/typescript/spec/token-streaming-emoji.mjs
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
import {
|
||||||
|
loadModel,
|
||||||
|
createCompletion,
|
||||||
|
createCompletionStream,
|
||||||
|
createCompletionGenerator,
|
||||||
|
} from "../src/gpt4all.js";
|
||||||
|
|
||||||
|
const model = await loadModel("Phi-3-mini-4k-instruct.Q4_0.gguf", {
|
||||||
|
device: "cpu",
|
||||||
|
});
|
||||||
|
|
||||||
|
const prompt = "Tell a short story but only use emojis. Three sentences max.";
|
||||||
|
|
||||||
|
const result = await createCompletion(model, prompt, {
|
||||||
|
onResponseToken: (tokens) => {
|
||||||
|
console.debug(tokens)
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
console.debug(result.choices[0].message);
|
||||||
|
|
||||||
|
process.stdout.write("### Stream:");
|
||||||
|
const stream = createCompletionStream(model, prompt);
|
||||||
|
stream.tokens.on("data", (data) => {
|
||||||
|
process.stdout.write(data);
|
||||||
|
});
|
||||||
|
await stream.result;
|
||||||
|
process.stdout.write("\n");
|
||||||
|
|
||||||
|
process.stdout.write("### Generator:");
|
||||||
|
const gen = createCompletionGenerator(model, prompt);
|
||||||
|
for await (const chunk of gen) {
|
||||||
|
process.stdout.write(chunk);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
model.dispose();
|
@ -38,8 +38,8 @@ process.stdout.write("\n");
|
|||||||
|
|
||||||
process.stdout.write("### Callback:");
|
process.stdout.write("### Callback:");
|
||||||
await createCompletion(model, "Why not just callbacks?", {
|
await createCompletion(model, "Why not just callbacks?", {
|
||||||
onResponseToken: (tokenId, token) => {
|
onResponseTokens: ({ text }) => {
|
||||||
process.stdout.write(token);
|
process.stdout.write(text);
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
process.stdout.write("\n");
|
process.stdout.write("\n");
|
@ -25,7 +25,7 @@ class ChatSession {
|
|||||||
const { messages, systemPrompt, ...sessionDefaultPromptContext } =
|
const { messages, systemPrompt, ...sessionDefaultPromptContext } =
|
||||||
chatSessionOpts;
|
chatSessionOpts;
|
||||||
this.model = model;
|
this.model = model;
|
||||||
this.modelName = model.llm.name();
|
this.modelName = model.llm.getName();
|
||||||
this.messages = messages ?? [];
|
this.messages = messages ?? [];
|
||||||
this.systemPrompt = systemPrompt ?? model.config.systemPrompt;
|
this.systemPrompt = systemPrompt ?? model.config.systemPrompt;
|
||||||
this.initialized = false;
|
this.initialized = false;
|
||||||
|
112
gpt4all-bindings/typescript/src/gpt4all.d.ts
vendored
112
gpt4all-bindings/typescript/src/gpt4all.d.ts
vendored
@ -5,10 +5,27 @@ interface LLModelOptions {
|
|||||||
/**
|
/**
|
||||||
* Model architecture. This argument currently does not have any functionality and is just used as descriptive identifier for user.
|
* Model architecture. This argument currently does not have any functionality and is just used as descriptive identifier for user.
|
||||||
*/
|
*/
|
||||||
type?: string;
|
modelType?: string;
|
||||||
model_name: string;
|
/**
|
||||||
model_path: string;
|
* Absolute path to the model file.
|
||||||
library_path?: string;
|
*/
|
||||||
|
modelFile: string;
|
||||||
|
/**
|
||||||
|
* Path to the llmodel implementation shared objects. This can be a single path or a list of paths separated by ';' delimiter.
|
||||||
|
*/
|
||||||
|
librariesPath?: string;
|
||||||
|
/**
|
||||||
|
* A string representing the implementation to use. One of 'auto', 'cpu', 'metal', 'kompute', or 'cuda'.
|
||||||
|
*/
|
||||||
|
backend: string;
|
||||||
|
/**
|
||||||
|
* The maximum window size of this model.
|
||||||
|
*/
|
||||||
|
nCtx: number;
|
||||||
|
/**
|
||||||
|
* Number of GPU layers to use (Vulkan)
|
||||||
|
*/
|
||||||
|
nGpuLayers: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface ModelConfig {
|
interface ModelConfig {
|
||||||
@ -263,10 +280,10 @@ interface LLModelInferenceResult {
|
|||||||
interface LLModelInferenceOptions extends Partial<LLModelPromptContext> {
|
interface LLModelInferenceOptions extends Partial<LLModelPromptContext> {
|
||||||
/** Callback for response tokens, called for each generated token.
|
/** Callback for response tokens, called for each generated token.
|
||||||
* @param {number} tokenId The token id.
|
* @param {number} tokenId The token id.
|
||||||
* @param {string} token The token.
|
* @param {Uint8Array} bytes The token bytes.
|
||||||
* @returns {boolean | undefined} Whether to continue generating tokens.
|
* @returns {boolean | undefined} Whether to continue generating tokens.
|
||||||
* */
|
* */
|
||||||
onResponseToken?: (tokenId: number, token: string) => boolean | void;
|
onResponseToken?: (tokenId: number, bytes: Uint8Array) => boolean | void;
|
||||||
/** Callback for prompt tokens, called for each input token in the prompt.
|
/** Callback for prompt tokens, called for each input token in the prompt.
|
||||||
* @param {number} tokenId The token id.
|
* @param {number} tokenId The token id.
|
||||||
* @returns {boolean | undefined} Whether to continue ingesting the prompt.
|
* @returns {boolean | undefined} Whether to continue ingesting the prompt.
|
||||||
@ -281,30 +298,42 @@ interface LLModelInferenceOptions extends Partial<LLModelPromptContext> {
|
|||||||
declare class LLModel {
|
declare class LLModel {
|
||||||
/**
|
/**
|
||||||
* Initialize a new LLModel.
|
* Initialize a new LLModel.
|
||||||
* @param {string} path Absolute path to the model file.
|
* @param {LLModelOptions} options LLModel options.
|
||||||
* @throws {Error} If the model file does not exist.
|
* @throws {Error} If the model can't be loaded or necessary runtimes are not found.
|
||||||
*/
|
*/
|
||||||
constructor(options: LLModelOptions);
|
constructor(options: LLModelOptions);
|
||||||
|
/**
|
||||||
|
* Loads the LLModel.
|
||||||
|
* @return {boolean} true if the model was loaded successfully, false otherwise.
|
||||||
|
*/
|
||||||
|
load(): boolean;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initiate a GPU by a string identifier. See LoadModelOptions.device for more information
|
||||||
|
* @param {string} device 'amd' | 'nvidia' | 'intel' | 'gpu' | gpu name.
|
||||||
|
* @return {boolean} true if the GPU was initialized successfully, false otherwise.
|
||||||
|
*/
|
||||||
|
initGpu(device: string): boolean;
|
||||||
|
|
||||||
/** undefined or user supplied */
|
/** undefined or user supplied */
|
||||||
type(): string | undefined;
|
getType(): string | undefined;
|
||||||
|
|
||||||
/** The name of the model. */
|
/** The name of the model. */
|
||||||
name(): string;
|
getName(): string;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the size of the internal state of the model.
|
* Get the size of the internal state of the model.
|
||||||
* NOTE: This state data is specific to the type of model you have created.
|
* NOTE: This state data is specific to the type of model you have created.
|
||||||
* @return the size in bytes of the internal state of the model
|
* @return the size in bytes of the internal state of the model
|
||||||
*/
|
*/
|
||||||
stateSize(): number;
|
getStateSize(): number;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the number of threads used for model inference.
|
* Get the number of threads used for model inference.
|
||||||
* The default is the number of physical cores your computer has.
|
* The default is the number of physical cores your computer has.
|
||||||
* @returns The number of threads used for model inference.
|
* @returns The number of threads used for model inference.
|
||||||
*/
|
*/
|
||||||
threadCount(): number;
|
getThreadCount(): number;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the number of threads used for model inference.
|
* Set the number of threads used for model inference.
|
||||||
@ -375,14 +404,6 @@ declare class LLModel {
|
|||||||
*/
|
*/
|
||||||
getLibraryPath(): string;
|
getLibraryPath(): string;
|
||||||
|
|
||||||
/**
|
|
||||||
* Initiate a GPU by a string identifier.
|
|
||||||
* @param {number} memory_required Should be in the range size_t or will throw
|
|
||||||
* @param {string} device_name 'amd' | 'nvidia' | 'intel' | 'gpu' | gpu name.
|
|
||||||
* read LoadModelOptions.device for more information
|
|
||||||
*/
|
|
||||||
initGpuByString(memory_required: number, device_name: string): boolean;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* From C documentation
|
* From C documentation
|
||||||
* @returns True if a GPU device is successfully initialized, false otherwise.
|
* @returns True if a GPU device is successfully initialized, false otherwise.
|
||||||
@ -391,11 +412,10 @@ declare class LLModel {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* GPUs that are usable for this LLModel
|
* GPUs that are usable for this LLModel
|
||||||
* @param {number} nCtx Maximum size of context window
|
* @throws if gpu device list is not available
|
||||||
* @throws if hasGpuDevice returns false (i think)
|
* @returns an array of GpuDevice objects
|
||||||
* @returns
|
|
||||||
*/
|
*/
|
||||||
listGpu(nCtx: number): GpuDevice[];
|
getGpuDevices(): GpuDevice[];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* delete and cleanup the native model
|
* delete and cleanup the native model
|
||||||
@ -414,6 +434,7 @@ interface GpuDevice {
|
|||||||
heapSize: number;
|
heapSize: number;
|
||||||
name: string;
|
name: string;
|
||||||
vendor: string;
|
vendor: string;
|
||||||
|
backend: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -443,13 +464,15 @@ interface LoadModelOptions {
|
|||||||
/**
|
/**
|
||||||
* The processing unit on which the model will run. It can be set to
|
* The processing unit on which the model will run. It can be set to
|
||||||
* - "cpu": Model will run on the central processing unit.
|
* - "cpu": Model will run on the central processing unit.
|
||||||
* - "gpu": Model will run on the best available graphics processing unit, irrespective of its vendor.
|
* - "kompute": Model will run using the kompute (vulkan) gpu backend
|
||||||
* - "amd", "nvidia", "intel": Model will run on the best available GPU from the specified vendor.
|
* - "cuda": Model will run using the cuda gpu backend
|
||||||
|
* - "gpu": Use Metal on ARM64 macOS, otherwise the same as "kompute"
|
||||||
|
* - "amd", "nvidia": Use the best GPU provided by the Kompute backend from this vendor.
|
||||||
* - "gpu name": Model will run on the GPU that matches the name if it's available.
|
* - "gpu name": Model will run on the GPU that matches the name if it's available.
|
||||||
* Note: If a GPU device lacks sufficient RAM to accommodate the model, an error will be thrown, and the GPT4All
|
* Note: If a GPU device lacks sufficient RAM to accommodate the model, an error will be thrown, and the GPT4All
|
||||||
* instance will be rendered invalid. It's advised to ensure the device has enough memory before initiating the
|
* instance will be rendered invalid. It's advised to ensure the device has enough memory before initiating the
|
||||||
* model.
|
* model.
|
||||||
* @default "cpu"
|
* @default Metal on ARM64 macOS, "cpu" otherwise.
|
||||||
*/
|
*/
|
||||||
device?: string;
|
device?: string;
|
||||||
/**
|
/**
|
||||||
@ -458,10 +481,16 @@ interface LoadModelOptions {
|
|||||||
*/
|
*/
|
||||||
nCtx?: number;
|
nCtx?: number;
|
||||||
/**
|
/**
|
||||||
* Number of gpu layers needed
|
* Number of GPU layers to use (Vulkan)
|
||||||
* @default 100
|
* @default 100
|
||||||
|
* @alias ngl
|
||||||
*/
|
*/
|
||||||
|
nGpuLayers?: number;
|
||||||
ngl?: number;
|
ngl?: number;
|
||||||
|
/**
|
||||||
|
* Number of CPU threads used by GPT4All. Default is None, then the number of threads are determined automatically.
|
||||||
|
*/
|
||||||
|
nThreads?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface InferenceModelOptions extends LoadModelOptions {
|
interface InferenceModelOptions extends LoadModelOptions {
|
||||||
@ -507,15 +536,33 @@ interface CompletionProvider {
|
|||||||
): Promise<InferenceResult>;
|
): Promise<InferenceResult>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface CompletionTokens {
|
||||||
|
/** The token ids. */
|
||||||
|
tokenIds: number[];
|
||||||
|
/** The token text. May be an empty string. */
|
||||||
|
text: string;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Options for creating a completion.
|
* Options for creating a completion.
|
||||||
*/
|
*/
|
||||||
interface CompletionOptions extends LLModelInferenceOptions {
|
interface CompletionOptions extends Partial<LLModelPromptContext> {
|
||||||
/**
|
/**
|
||||||
* Indicates if verbose logging is enabled.
|
* Indicates if verbose logging is enabled.
|
||||||
* @default false
|
* @default false
|
||||||
*/
|
*/
|
||||||
verbose?: boolean;
|
verbose?: boolean;
|
||||||
|
|
||||||
|
/** Called every time new tokens can be decoded to text.
|
||||||
|
* @param {CompletionTokens} tokens The token ids and decoded text.
|
||||||
|
* @returns {boolean | undefined} Whether to continue generating tokens.
|
||||||
|
* */
|
||||||
|
onResponseTokens?: (tokens: CompletionTokens) => boolean | void;
|
||||||
|
/** Callback for prompt tokens, called for each input token in the prompt.
|
||||||
|
* @param {number} tokenId The token id.
|
||||||
|
* @returns {boolean | undefined} Whether to continue ingesting the prompt.
|
||||||
|
* */
|
||||||
|
onPromptToken?: (tokenId: number) => boolean | void;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -639,13 +686,6 @@ interface LLModelPromptContext {
|
|||||||
*/
|
*/
|
||||||
promptTemplate?: string;
|
promptTemplate?: string;
|
||||||
|
|
||||||
/** The context window size. Do not use, it has no effect. See loadModel options.
|
|
||||||
* THIS IS DEPRECATED!!!
|
|
||||||
* Use loadModel's nCtx option instead.
|
|
||||||
* @default 2048
|
|
||||||
*/
|
|
||||||
nCtx: number;
|
|
||||||
|
|
||||||
/** The top-k logits to sample from.
|
/** The top-k logits to sample from.
|
||||||
* Top-K sampling selects the next token only from the top K most likely tokens predicted by the model.
|
* Top-K sampling selects the next token only from the top K most likely tokens predicted by the model.
|
||||||
* It helps reduce the risk of generating low-probability or nonsensical tokens, but it may also limit
|
* It helps reduce the risk of generating low-probability or nonsensical tokens, but it may also limit
|
||||||
|
@ -37,9 +37,8 @@ async function loadModel(modelName, options = {}) {
|
|||||||
type: "inference",
|
type: "inference",
|
||||||
allowDownload: true,
|
allowDownload: true,
|
||||||
verbose: false,
|
verbose: false,
|
||||||
device: "cpu",
|
|
||||||
nCtx: 2048,
|
nCtx: 2048,
|
||||||
ngl: 100,
|
nGpuLayers: options.ngl ?? 100,
|
||||||
...options,
|
...options,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -54,27 +53,77 @@ async function loadModel(modelName, options = {}) {
|
|||||||
typeof loadOptions.librariesPath === "string",
|
typeof loadOptions.librariesPath === "string",
|
||||||
"Libraries path should be a string"
|
"Libraries path should be a string"
|
||||||
);
|
);
|
||||||
const existingPaths = loadOptions.librariesPath
|
const existingLibPaths = loadOptions.librariesPath
|
||||||
.split(";")
|
.split(";")
|
||||||
.filter(existsSync)
|
.filter(existsSync)
|
||||||
.join(";");
|
.join(";");
|
||||||
|
|
||||||
const llmOptions = {
|
const llmOptions = {
|
||||||
model_name: appendBinSuffixIfMissing(modelName),
|
modelFile: modelConfig.path,
|
||||||
model_path: loadOptions.modelPath,
|
librariesPath: existingLibPaths,
|
||||||
library_path: existingPaths,
|
|
||||||
device: loadOptions.device,
|
|
||||||
nCtx: loadOptions.nCtx,
|
nCtx: loadOptions.nCtx,
|
||||||
ngl: loadOptions.ngl,
|
nGpuLayers: loadOptions.nGpuLayers,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let initDevice;
|
||||||
|
if (process.platform === "darwin") {
|
||||||
|
if (!loadOptions.device) {
|
||||||
|
llmOptions.backend = "auto"; // 'auto' is effectively 'metal' due to currently non-functional fallback
|
||||||
|
} else if (loadOptions.device === "cpu") {
|
||||||
|
llmOptions.backend = "cpu";
|
||||||
|
} else {
|
||||||
|
if (process.arch !== "arm64" || loadOptions.device !== "gpu") {
|
||||||
|
throw new Error(
|
||||||
|
`Unknown device for this platform: ${loadOptions.device}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
llmOptions.backend = "metal";
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// default to kompute. use cpu for arm64 because we currently dont build kompute runtimes for arm64
|
||||||
|
llmOptions.backend = process.arch === "arm64" ? "cpu" : "kompute";
|
||||||
|
if (!loadOptions.device || loadOptions.device === "cpu") {
|
||||||
|
// use the default backend
|
||||||
|
} else if (
|
||||||
|
loadOptions.device === "cuda" ||
|
||||||
|
loadOptions.device === "kompute"
|
||||||
|
) {
|
||||||
|
llmOptions.backend = loadOptions.device;
|
||||||
|
initDevice = "gpu";
|
||||||
|
} else if (loadOptions.device.startsWith("cuda:")) {
|
||||||
|
llmOptions.backend = "cuda";
|
||||||
|
initDevice = loadOptions.device.replace(/^cuda:/, "");
|
||||||
|
} else {
|
||||||
|
initDevice = loadOptions.device.replace(/^kompute:/, "");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (loadOptions.verbose) {
|
if (loadOptions.verbose) {
|
||||||
console.debug("Creating LLModel:", {
|
console.debug("Creating LLModel:", {
|
||||||
|
initDevice,
|
||||||
llmOptions,
|
llmOptions,
|
||||||
modelConfig,
|
modelConfig,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
const llmodel = new LLModel(llmOptions);
|
const llmodel = new LLModel(llmOptions);
|
||||||
|
if (initDevice) {
|
||||||
|
const gpuInitSuccess = llmodel.initGpu(initDevice);
|
||||||
|
if (!gpuInitSuccess) {
|
||||||
|
const availableDevices = llmodel.getGpuDevices();
|
||||||
|
const deviceNames = availableDevices
|
||||||
|
.map((device) => device.name)
|
||||||
|
.join(", ");
|
||||||
|
console.warn(
|
||||||
|
`Failed to initialize GPU device "${initDevice}" - Available devices: ${deviceNames}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
llmodel.load();
|
||||||
|
|
||||||
|
if (loadOptions.nThreads) {
|
||||||
|
llmodel.setThreadCount(loadOptions.nThreads);
|
||||||
|
}
|
||||||
|
|
||||||
if (loadOptions.type === "embedding") {
|
if (loadOptions.type === "embedding") {
|
||||||
return new EmbeddingModel(llmodel, modelConfig);
|
return new EmbeddingModel(llmodel, modelConfig);
|
||||||
} else if (loadOptions.type === "inference") {
|
} else if (loadOptions.type === "inference") {
|
||||||
@ -84,7 +133,7 @@ async function loadModel(modelName, options = {}) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function createEmbedding(model, text, options={}) {
|
function createEmbedding(model, text, options = {}) {
|
||||||
let {
|
let {
|
||||||
dimensionality = undefined,
|
dimensionality = undefined,
|
||||||
longTextMode = "mean",
|
longTextMode = "mean",
|
||||||
@ -138,10 +187,7 @@ async function createCompletion(
|
|||||||
...options,
|
...options,
|
||||||
};
|
};
|
||||||
|
|
||||||
const result = await provider.generate(
|
const result = await provider.generate(input, completionOptions);
|
||||||
input,
|
|
||||||
completionOptions,
|
|
||||||
);
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
model: provider.modelName,
|
model: provider.modelName,
|
||||||
@ -174,10 +220,10 @@ function createCompletionStream(
|
|||||||
|
|
||||||
const completionPromise = createCompletion(provider, input, {
|
const completionPromise = createCompletion(provider, input, {
|
||||||
...options,
|
...options,
|
||||||
onResponseToken: (tokenId, token) => {
|
onResponseTokens: (tokens) => {
|
||||||
completionStream.push(token);
|
completionStream.push(tokens.text);
|
||||||
if (options.onResponseToken) {
|
if (options.onResponseTokens) {
|
||||||
return options.onResponseToken(tokenId, token);
|
return options.onResponseTokens(tokens);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
}).then((result) => {
|
}).then((result) => {
|
||||||
|
@ -11,7 +11,7 @@ class InferenceModel {
|
|||||||
constructor(llmodel, config) {
|
constructor(llmodel, config) {
|
||||||
this.llm = llmodel;
|
this.llm = llmodel;
|
||||||
this.config = config;
|
this.config = config;
|
||||||
this.modelName = this.llm.name();
|
this.modelName = this.llm.getName();
|
||||||
}
|
}
|
||||||
|
|
||||||
async createChatSession(options) {
|
async createChatSession(options) {
|
||||||
@ -90,6 +90,25 @@ class InferenceModel {
|
|||||||
|
|
||||||
let tokensGenerated = 0;
|
let tokensGenerated = 0;
|
||||||
|
|
||||||
|
const decoder = new TokenDecoder((tokenIds, text) => {
|
||||||
|
let continueGeneration = true;
|
||||||
|
tokensGenerated += tokenIds.length;
|
||||||
|
|
||||||
|
if (options.onResponseTokens) {
|
||||||
|
// catch here because if errors bubble through cpp they will loose stacktraces
|
||||||
|
try {
|
||||||
|
// don't cancel the generation unless user explicitly returns false
|
||||||
|
continueGeneration =
|
||||||
|
options.onResponseTokens({ tokenIds, text }) !== false;
|
||||||
|
} catch (err) {
|
||||||
|
console.error("Error in onResponseToken callback", err);
|
||||||
|
continueGeneration = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return continueGeneration;
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
const result = await this.llm.infer(prompt, {
|
const result = await this.llm.infer(prompt, {
|
||||||
...promptContext,
|
...promptContext,
|
||||||
nPast,
|
nPast,
|
||||||
@ -97,7 +116,7 @@ class InferenceModel {
|
|||||||
let continueIngestion = true;
|
let continueIngestion = true;
|
||||||
tokensIngested++;
|
tokensIngested++;
|
||||||
if (options.onPromptToken) {
|
if (options.onPromptToken) {
|
||||||
// catch errors because if they go through cpp they will loose stacktraces
|
// catch here because if errors bubble through cpp they will looe stacktraces
|
||||||
try {
|
try {
|
||||||
// don't cancel ingestion unless user explicitly returns false
|
// don't cancel ingestion unless user explicitly returns false
|
||||||
continueIngestion =
|
continueIngestion =
|
||||||
@ -109,20 +128,8 @@ class InferenceModel {
|
|||||||
}
|
}
|
||||||
return continueIngestion;
|
return continueIngestion;
|
||||||
},
|
},
|
||||||
onResponseToken: (tokenId, token) => {
|
onResponseToken: (tokenId, bytes) => {
|
||||||
let continueGeneration = true;
|
return decoder.decode(tokenId, bytes);
|
||||||
tokensGenerated++;
|
|
||||||
if (options.onResponseToken) {
|
|
||||||
try {
|
|
||||||
// don't cancel the generation unless user explicitly returns false
|
|
||||||
continueGeneration =
|
|
||||||
options.onResponseToken(tokenId, token) !== false;
|
|
||||||
} catch (err) {
|
|
||||||
console.error("Error in onResponseToken callback", err);
|
|
||||||
continueGeneration = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return continueGeneration;
|
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -141,6 +148,63 @@ class InferenceModel {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// see https://github.com/nomic-ai/gpt4all/pull/1281
|
||||||
|
class TokenDecoder {
|
||||||
|
|
||||||
|
constructor(callback) {
|
||||||
|
this.callback = callback;
|
||||||
|
this.buffer = [];
|
||||||
|
this.tokenIds = [];
|
||||||
|
this.buffExpectingContBytes = 0;
|
||||||
|
this.textDecoder = new TextDecoder();
|
||||||
|
}
|
||||||
|
|
||||||
|
decode(tokenId, bytes) {
|
||||||
|
const decoded = [];
|
||||||
|
this.tokenIds.push(tokenId);
|
||||||
|
|
||||||
|
for (let i = 0; i < bytes.length; i++) {
|
||||||
|
const byte = bytes[i];
|
||||||
|
const bits = byte.toString(2).padStart(8, '0');
|
||||||
|
const highOnes = bits.split('0')[0];
|
||||||
|
|
||||||
|
if (highOnes.length === 1) {
|
||||||
|
// Continuation byte
|
||||||
|
this.buffer.push(byte);
|
||||||
|
this.buffExpectingContBytes -= 1;
|
||||||
|
} else {
|
||||||
|
// Beginning of a byte sequence
|
||||||
|
if (this.buffer.length > 0) {
|
||||||
|
decoded.push(this._decodeBuffer());
|
||||||
|
this.buffer = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
this.buffer.push(byte);
|
||||||
|
this.buffExpectingContBytes = Math.max(0, highOnes.length - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.buffExpectingContBytes <= 0) {
|
||||||
|
// Received the whole sequence or an out-of-place continuation byte
|
||||||
|
decoded.push(this._decodeBuffer());
|
||||||
|
this.buffer = [];
|
||||||
|
this.buffExpectingContBytes = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (decoded.length === 0 && this.buffExpectingContBytes > 0) {
|
||||||
|
// Wait for more continuation bytes
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
const tokenIds = this.tokenIds;
|
||||||
|
this.tokenIds = [];
|
||||||
|
return this.callback(tokenIds, decoded.join(''));
|
||||||
|
}
|
||||||
|
|
||||||
|
_decodeBuffer() {
|
||||||
|
return this.textDecoder.decode(new Uint8Array(this.buffer));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
class EmbeddingModel {
|
class EmbeddingModel {
|
||||||
llm;
|
llm;
|
||||||
config;
|
config;
|
||||||
@ -160,6 +224,7 @@ class EmbeddingModel {
|
|||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
TokenDecoder,
|
||||||
InferenceModel,
|
InferenceModel,
|
||||||
EmbeddingModel,
|
EmbeddingModel,
|
||||||
};
|
};
|
||||||
|
73
gpt4all-bindings/typescript/test/bindings.test.js
Normal file
73
gpt4all-bindings/typescript/test/bindings.test.js
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
const { loadModel } = require("../src/gpt4all.js");
|
||||||
|
|
||||||
|
// these tests require an internet connection / a real model
|
||||||
|
const testModel = "Phi-3-mini-4k-instruct.Q4_0.gguf";
|
||||||
|
|
||||||
|
describe("llmodel", () => {
|
||||||
|
let model;
|
||||||
|
|
||||||
|
test("load on cpu", async () => {
|
||||||
|
model = await loadModel(testModel, {
|
||||||
|
device: "cpu",
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
test("getter working", async () => {
|
||||||
|
const stateSize = model.llm.getStateSize();
|
||||||
|
expect(stateSize).toBeGreaterThan(0);
|
||||||
|
const name = model.llm.getName();
|
||||||
|
expect(name).toBe(testModel);
|
||||||
|
const type = model.llm.getType();
|
||||||
|
expect(type).toBeUndefined();
|
||||||
|
const devices = model.llm.getGpuDevices();
|
||||||
|
expect(Array.isArray(devices)).toBe(true);
|
||||||
|
const gpuEnabled = model.llm.hasGpuDevice();
|
||||||
|
expect(gpuEnabled).toBe(false);
|
||||||
|
const requiredMem = model.llm.getRequiredMemory();
|
||||||
|
expect(typeof requiredMem).toBe('number');
|
||||||
|
const threadCount = model.llm.getThreadCount();
|
||||||
|
expect(threadCount).toBe(4);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("setting thread count", () => {
|
||||||
|
model.llm.setThreadCount(5);
|
||||||
|
expect(model.llm.getThreadCount()).toBe(5);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("cpu inference", async () => {
|
||||||
|
const res = await model.llm.infer("what is the capital of france?", {
|
||||||
|
temp: 0,
|
||||||
|
promptTemplate: model.config.promptTemplate,
|
||||||
|
nPredict: 10,
|
||||||
|
onResponseToken: () => {
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
});
|
||||||
|
expect(res.text).toMatch(/paris/i);
|
||||||
|
}, 10000);
|
||||||
|
|
||||||
|
test("dispose and load model on gpu", async () => {
|
||||||
|
model.dispose();
|
||||||
|
model = await loadModel(testModel, {
|
||||||
|
device: "gpu",
|
||||||
|
});
|
||||||
|
const gpuEnabled = model.llm.hasGpuDevice();
|
||||||
|
expect(gpuEnabled).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("gpu inference", async () => {
|
||||||
|
const res = await model.llm.infer("what is the capital of france?", {
|
||||||
|
temp: 0,
|
||||||
|
promptTemplate: model.config.promptTemplate,
|
||||||
|
nPredict: 10,
|
||||||
|
onResponseToken: () => {
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
});
|
||||||
|
expect(res.text).toMatch(/paris/i);
|
||||||
|
}, 10000);
|
||||||
|
|
||||||
|
afterAll(() => {
|
||||||
|
model.dispose();
|
||||||
|
});
|
||||||
|
});
|
@ -2,7 +2,6 @@ const path = require("node:path");
|
|||||||
const os = require("node:os");
|
const os = require("node:os");
|
||||||
const fsp = require("node:fs/promises");
|
const fsp = require("node:fs/promises");
|
||||||
const { existsSync } = require('node:fs');
|
const { existsSync } = require('node:fs');
|
||||||
const { LLModel } = require("node-gyp-build")(path.resolve(__dirname, ".."));
|
|
||||||
const {
|
const {
|
||||||
listModels,
|
listModels,
|
||||||
downloadModel,
|
downloadModel,
|
||||||
@ -13,11 +12,8 @@ const {
|
|||||||
DEFAULT_LIBRARIES_DIRECTORY,
|
DEFAULT_LIBRARIES_DIRECTORY,
|
||||||
DEFAULT_MODEL_LIST_URL,
|
DEFAULT_MODEL_LIST_URL,
|
||||||
} = require("../src/config.js");
|
} = require("../src/config.js");
|
||||||
const {
|
|
||||||
loadModel,
|
// these tests do not require an internet connection or an actual model
|
||||||
createPrompt,
|
|
||||||
createCompletion,
|
|
||||||
} = require("../src/gpt4all.js");
|
|
||||||
|
|
||||||
describe("config", () => {
|
describe("config", () => {
|
||||||
test("default paths constants are available and correct", () => {
|
test("default paths constants are available and correct", () => {
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user