Embed4All: optionally count tokens, misc fixes (#2145)

Key changes:
* python: optionally return token count in Embed4All.embed
* python and docs: models2.json -> models3.json
* Embed4All: require explicit prefix for unknown models
* llamamodel: fix shouldAddBOS for Bert and Nomic Bert

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Jared Van Bortel
2024-03-20 11:24:02 -04:00
committed by GitHub
parent 271e6a529c
commit 0455b80b7f
11 changed files with 105 additions and 52 deletions

View File

@@ -110,10 +110,10 @@ public:
}
// user-specified prefix
virtual void embed(const std::vector<std::string> &texts, float *embeddings, std::optional<std::string> prefix,
int dimensionality = -1, bool doMean = true, bool atlas = false);
int dimensionality = -1, size_t *tokenCount = nullptr, bool doMean = true, bool atlas = false);
// automatic prefix
virtual void embed(const std::vector<std::string> &texts, float *embeddings, bool isRetrieval,
int dimensionality = -1, bool doMean = true, bool atlas = false);
int dimensionality = -1, size_t *tokenCount = nullptr, bool doMean = true, bool atlas = false);
virtual void setThreadCount(int32_t n_threads) { (void)n_threads; }
virtual int32_t threadCount() const { return 1; }