typescript bindings maintenance (#2363)

* remove outdated comments

Signed-off-by: limez <limez@protonmail.com>

* simpler build from source

Signed-off-by: limez <limez@protonmail.com>

* update unix build script to create .so runtimes correctly

Signed-off-by: limez <limez@protonmail.com>

* configure ci build type, use RelWithDebInfo for dev build script

Signed-off-by: limez <limez@protonmail.com>

* add clean script

Signed-off-by: limez <limez@protonmail.com>

* fix streamed token decoding / emoji

Signed-off-by: limez <limez@protonmail.com>

* remove deprecated nCtx

Signed-off-by: limez <limez@protonmail.com>

* update typings

Signed-off-by: jacob <jacoobes@sern.dev>

update typings

Signed-off-by: jacob <jacoobes@sern.dev>

* readme,mspell

Signed-off-by: jacob <jacoobes@sern.dev>

* cuda/backend logic changes + name napi methods like their js counterparts

Signed-off-by: limez <limez@protonmail.com>

* convert llmodel example into a test, separate test suite that can run in ci

Signed-off-by: limez <limez@protonmail.com>

* update examples / naming

Signed-off-by: limez <limez@protonmail.com>

* update deps, remove the need for binding.ci.gyp, make node-gyp-build fallback easier testable

Signed-off-by: limez <limez@protonmail.com>

* make sure the assert-backend-sources.js script is published, but not the others

Signed-off-by: limez <limez@protonmail.com>

* build correctly on windows (regression on node-gyp-build)

Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>

* codespell

Signed-off-by: limez <limez@protonmail.com>

* make sure dlhandle.cpp gets linked correctly

Signed-off-by: limez <limez@protonmail.com>

* add include for check_cxx_compiler_flag call during aarch64 builds

Signed-off-by: limez <limez@protonmail.com>

* x86 > arm64 cross compilation of runtimes and bindings

Signed-off-by: limez <limez@protonmail.com>

* default to cpu instead of kompute on arm64

Signed-off-by: limez <limez@protonmail.com>

* formatting, more minimal example

Signed-off-by: limez <limez@protonmail.com>

---------

Signed-off-by: limez <limez@protonmail.com>
Signed-off-by: jacob <jacoobes@sern.dev>
Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>
Co-authored-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>
Co-authored-by: jacob <jacoobes@sern.dev>
This commit is contained in:
Andreas Obersteiner
2024-06-03 18:12:55 +02:00
committed by GitHub
parent f001897a1a
commit a602f7fde7
30 changed files with 1112 additions and 873 deletions

View File

@@ -2,7 +2,6 @@ import { loadModel, createCompletion } from "../src/gpt4all.js";
const model = await loadModel("orca-mini-3b-gguf2-q4_0.gguf", {
verbose: true,
device: "gpu",
});
const chat = await model.createChatSession();
@@ -12,8 +11,6 @@ await createCompletion(
"Why are bananas rather blue than bread at night sometimes?",
{
verbose: true,
nPredict: 10,
}
);
await createCompletion(chat, "Are you sure?", {
verbose: true,
});
);

View File

@@ -7,12 +7,12 @@ const modelOptions = {
verbose: true,
};
const model1 = await loadModel("orca-mini-3b-gguf2-q4_0.gguf", {
const model1 = await loadModel("Phi-3-mini-4k-instruct.Q4_0.gguf", {
...modelOptions,
device: "gpu", // only one model can be on gpu
});
const model2 = await loadModel("orca-mini-3b-gguf2-q4_0.gguf", modelOptions);
const model3 = await loadModel("orca-mini-3b-gguf2-q4_0.gguf", modelOptions);
const model2 = await loadModel("Phi-3-mini-4k-instruct.Q4_0.gguf", modelOptions);
const model3 = await loadModel("Phi-3-mini-4k-instruct.Q4_0.gguf", modelOptions);
const promptContext = {
verbose: true,
@@ -27,3 +27,6 @@ const responses = await Promise.all([
createCompletion(model3, "What is 1 + 3?", promptContext),
]);
console.log(responses.map((res) => res.choices[0].message));
model1.dispose();
model2.dispose();
model3.dispose();

View File

@@ -1,61 +0,0 @@
import {
LLModel,
createCompletion,
DEFAULT_DIRECTORY,
DEFAULT_LIBRARIES_DIRECTORY,
loadModel,
} from "../src/gpt4all.js";
const model = await loadModel("mistral-7b-openorca.gguf2.Q4_0.gguf", {
verbose: true,
device: "gpu",
});
const ll = model.llm;
try {
class Extended extends LLModel {}
} catch (e) {
console.log("Extending from native class gone wrong " + e);
}
console.log("state size " + ll.stateSize());
console.log("thread count " + ll.threadCount());
ll.setThreadCount(5);
console.log("thread count " + ll.threadCount());
ll.setThreadCount(4);
console.log("thread count " + ll.threadCount());
console.log("name " + ll.name());
console.log("type: " + ll.type());
console.log("Default directory for models", DEFAULT_DIRECTORY);
console.log("Default directory for libraries", DEFAULT_LIBRARIES_DIRECTORY);
console.log("Has GPU", ll.hasGpuDevice());
console.log("gpu devices", ll.listGpu());
console.log("Required Mem in bytes", ll.memoryNeeded());
// to ingest a custom system prompt without using a chat session.
await createCompletion(
model,
"<|im_start|>system\nYou are an advanced mathematician.\n<|im_end|>\n",
{
promptTemplate: "%1",
nPredict: 0,
special: true,
}
);
const completion1 = await createCompletion(model, "What is 1 + 1?", {
verbose: true,
});
console.log(`🤖 > ${completion1.choices[0].message.content}`);
//Very specific:
// tested on Ubuntu 22.0, Linux Mint, if I set nPast to 100, the app hangs.
const completion2 = await createCompletion(model, "And if we add two?", {
verbose: true,
});
console.log(`🤖 > ${completion2.choices[0].message.content}`);
//CALLING DISPOSE WILL INVALID THE NATIVE MODEL. USE THIS TO CLEANUP
model.dispose();
console.log("model disposed, exiting...");

View File

@@ -1,7 +1,6 @@
import { promises as fs } from "node:fs";
import { loadModel, createCompletion } from "../src/gpt4all.js";
const model = await loadModel("Nous-Hermes-2-Mistral-7B-DPO.Q4_0.gguf", {
const model = await loadModel("Phi-3-mini-4k-instruct.Q4_0.gguf", {
verbose: true,
device: "gpu",
});
@@ -12,14 +11,15 @@ const res = await createCompletion(
{
onPromptToken: (tokenId) => {
console.debug("onPromptToken", { tokenId });
// throwing an error will cancel
// errors within the callback will cancel ingestion, inference will still run
throw new Error("This is an error");
// const foo = thisMethodDoesNotExist();
// returning false will cancel as well
// return false;
},
onResponseToken: (tokenId, token) => {
console.debug("onResponseToken", { tokenId, token });
onResponseTokens: ({ tokenIds, text }) => {
// console.debug("onResponseToken", { tokenIds, text });
process.stdout.write(text);
// same applies here
},
}

View File

@@ -0,0 +1,37 @@
import {
loadModel,
createCompletion,
createCompletionStream,
createCompletionGenerator,
} from "../src/gpt4all.js";
const model = await loadModel("Phi-3-mini-4k-instruct.Q4_0.gguf", {
device: "cpu",
});
const prompt = "Tell a short story but only use emojis. Three sentences max.";
const result = await createCompletion(model, prompt, {
onResponseToken: (tokens) => {
console.debug(tokens)
},
});
console.debug(result.choices[0].message);
process.stdout.write("### Stream:");
const stream = createCompletionStream(model, prompt);
stream.tokens.on("data", (data) => {
process.stdout.write(data);
});
await stream.result;
process.stdout.write("\n");
process.stdout.write("### Generator:");
const gen = createCompletionGenerator(model, prompt);
for await (const chunk of gen) {
process.stdout.write(chunk);
}
model.dispose();

View File

@@ -38,8 +38,8 @@ process.stdout.write("\n");
process.stdout.write("### Callback:");
await createCompletion(model, "Why not just callbacks?", {
onResponseToken: (tokenId, token) => {
process.stdout.write(token);
onResponseTokens: ({ text }) => {
process.stdout.write(text);
},
});
process.stdout.write("\n");