backend: fix extra spaces in tokenization and a CUDA crash (#2778)

Also potentially improves accuracy of BOS insertion, token cache, and logit indexing.

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Jared Van Bortel
2024-08-01 10:46:36 -04:00
committed by GitHub
parent da59c9f5ea
commit 51bd01ae05
10 changed files with 46 additions and 36 deletions

View File

@@ -53,7 +53,7 @@ private:
bool m_supportsCompletion = false;
protected:
std::vector<Token> tokenize(PromptContext &ctx, const std::string &str, bool special) const override;
std::vector<Token> tokenize(PromptContext &ctx, const std::string &str, bool special) override;
std::string tokenToString(Token id) const override;
Token sampleToken(PromptContext &ctx) const override;
bool evalTokens(PromptContext &ctx, const std::vector<int32_t> &tokens) const override;