mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-06 02:50:36 +00:00
Embed4All: optionally count tokens, misc fixes (#2145)
Key changes: * python: optionally return token count in Embed4All.embed * python and docs: models2.json -> models3.json * Embed4All: require explicit prefix for unknown models * llamamodel: fix shouldAddBOS for Bert and Nomic Bert Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
@@ -110,10 +110,10 @@ public:
|
||||
}
|
||||
// user-specified prefix
|
||||
virtual void embed(const std::vector<std::string> &texts, float *embeddings, std::optional<std::string> prefix,
|
||||
int dimensionality = -1, bool doMean = true, bool atlas = false);
|
||||
int dimensionality = -1, size_t *tokenCount = nullptr, bool doMean = true, bool atlas = false);
|
||||
// automatic prefix
|
||||
virtual void embed(const std::vector<std::string> &texts, float *embeddings, bool isRetrieval,
|
||||
int dimensionality = -1, bool doMean = true, bool atlas = false);
|
||||
int dimensionality = -1, size_t *tokenCount = nullptr, bool doMean = true, bool atlas = false);
|
||||
|
||||
virtual void setThreadCount(int32_t n_threads) { (void)n_threads; }
|
||||
virtual int32_t threadCount() const { return 1; }
|
||||
|
Reference in New Issue
Block a user