Embed4All: optionally count tokens, misc fixes (#2145)

Key changes: * python: optionally return token count in Embed4All.embed * python and docs: models2.json -> models3.json * Embed4All: require explicit prefix for unknown models * llamamodel: fix shouldAddBOS for Bert and Nomic Bert Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-09-06 11:00:48 +00:00 · 2024-03-20 11:24:02 -04:00
parent 271e6a529c
commit 0455b80b7f
11 changed files with 105 additions and 52 deletions
--- a/gpt4all-backend/llmodel_c.h
+++ b/gpt4all-backend/llmodel_c.h
@@ -193,6 +193,7 @@ void llmodel_prompt(llmodel_model model, const char *prompt,
 * @param prefix The model-specific prefix representing the embedding task, without the trailing colon. NULL for no
 * prefix.
 * @param dimensionality The embedding dimension, for use with Matryoshka-capable models. Set to -1 to for full-size.
+ * @param token_count Return location for the number of prompt tokens processed, or NULL.
 * @param do_mean True to average multiple embeddings if the text is longer than the model can accept, False to
 * truncate.
 * @param atlas Try to be fully compatible with the Atlas API. Currently, this means texts longer than 8192 tokens with
@@ -202,7 +203,7 @@ void llmodel_prompt(llmodel_model model, const char *prompt,
 * be responsible for lifetime of this memory. NULL if an error occurred.
 */
 float *llmodel_embed(llmodel_model model, const char **texts, size_t *embedding_size, const char *prefix,
-                     int dimensionality, bool do_mean, bool atlas, const char **error);
+                     int dimensionality, size_t *token_count, bool do_mean, bool atlas, const char **error);

 /**
 * Frees the memory allocated by the llmodel_embedding function.