Embed4All: optionally count tokens, misc fixes (#2145)

Key changes: * python: optionally return token count in Embed4All.embed * python and docs: models2.json -> models3.json * Embed4All: require explicit prefix for unknown models * llamamodel: fix shouldAddBOS for Bert and Nomic Bert Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-09-06 02:50:36 +00:00 · 2024-03-20 11:24:02 -04:00
parent 271e6a529c
commit 0455b80b7f
11 changed files with 105 additions and 52 deletions
--- a/gpt4all-backend/llmodel_shared.cpp
+++ b/gpt4all-backend/llmodel_shared.cpp
@@ -270,25 +270,27 @@ void LLModel::generateResponse(std::function<bool(int32_t, const std::string&)>

 void LLModel::embed(
    const std::vector<std::string> &texts, float *embeddings, std::optional<std::string> prefix, int dimensionality,
-    bool doMean, bool atlas
+    size_t *tokenCount, bool doMean, bool atlas
 ) {
    (void)texts;
    (void)embeddings;
    (void)prefix;
    (void)dimensionality;
+    (void)tokenCount;
    (void)doMean;
    (void)atlas;
    throw std::logic_error(std::string(implementation().modelType()) + " does not support embeddings");
 }

 void LLModel::embed(
-    const std::vector<std::string> &texts, float *embeddings, bool isRetrieval, int dimensionality, bool doMean,
-    bool atlas
+    const std::vector<std::string> &texts, float *embeddings, bool isRetrieval, int dimensionality, size_t *tokenCount,
+    bool doMean, bool atlas
 ) {
    (void)texts;
    (void)embeddings;
    (void)isRetrieval;
    (void)dimensionality;
+    (void)tokenCount;
    (void)doMean;
    (void)atlas;
    throw std::logic_error(std::string(implementation().modelType()) + " does not support embeddings");