Embed4All: optionally count tokens, misc fixes (#2145)

Key changes: * python: optionally return token count in Embed4All.embed * python and docs: models2.json -> models3.json * Embed4All: require explicit prefix for unknown models * llamamodel: fix shouldAddBOS for Bert and Nomic Bert Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-09-08 20:09:12 +00:00 · 2024-03-20 11:24:02 -04:00
parent 271e6a529c
commit 0455b80b7f
11 changed files with 105 additions and 52 deletions
--- a/gpt4all-backend/llamamodel_impl.h
+++ b/gpt4all-backend/llamamodel_impl.h
@@ -39,10 +39,10 @@ public:
    size_t embeddingSize() const override;
    // user-specified prefix
    void embed(const std::vector<std::string> &texts, float *embeddings, std::optional<std::string> prefix,
-               int dimensionality = -1, bool doMean = true, bool atlas = false) override;
+               int dimensionality = -1, size_t *tokenCount = nullptr, bool doMean = true, bool atlas = false) override;
    // automatic prefix
    void embed(const std::vector<std::string> &texts, float *embeddings, bool isRetrieval, int dimensionality = -1,
-               bool doMean = true, bool atlas = false) override;
+               size_t *tokenCount = nullptr, bool doMean = true, bool atlas = false) override;

 private:
    std::unique_ptr<LLamaPrivate> d_ptr;
@@ -61,7 +61,7 @@ protected:
    int32_t layerCount(std::string const &modelPath) const override;

    void embedInternal(const std::vector<std::string> &texts, float *embeddings, std::string prefix, int dimensionality,
-                       bool doMean, bool atlas, const EmbModelSpec *spec);
+                       size_t *tokenCount, bool doMean, bool atlas, const EmbModelSpec *spec);
 };

 #endif // LLAMAMODEL_H