Handle edge cases when generating embeddings (#1215)

* Handle edge cases when generating embeddings
* Improve Python handling & add llmodel_c.h note
- In the Python bindings fail fast with a ValueError when text is empty
- Advice other bindings authors to do likewise in llmodel_c.h
This commit is contained in:
cosmic-snow
2023-07-17 22:21:03 +02:00
committed by GitHub
parent 1e74171a7b
commit 2d02c65177
4 changed files with 16 additions and 1 deletions

View File

@@ -168,10 +168,14 @@ void llmodel_prompt(llmodel_model model, const char *prompt,
float *llmodel_embedding(llmodel_model model, const char *text, size_t *embedding_size)
{
if (model == nullptr || text == nullptr || !strlen(text)) {
*embedding_size = 0;
return nullptr;
}
LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
std::vector<float> embeddingVector = wrapper->llModel->embedding(text);
float *embedding = (float *)malloc(embeddingVector.size() * sizeof(float));
if(embedding == nullptr) {
if (embedding == nullptr) {
*embedding_size = 0;
return nullptr;
}

View File

@@ -173,6 +173,8 @@ void llmodel_prompt(llmodel_model model, const char *prompt,
/**
* Generate an embedding using the model.
* NOTE: If given NULL pointers for the model or text, or an empty text, a NULL pointer will be
* returned. Bindings should signal an error when NULL is the return value.
* @param model A pointer to the llmodel_model instance.
* @param text A string representing the text to generate an embedding for.
* @param embedding_size A pointer to a size_t type that will be set by the call indicating the length