mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-08 11:58:53 +00:00
Handle edge cases when generating embeddings (#1215)
* Handle edge cases when generating embeddings * Improve Python handling & add llmodel_c.h note - In the Python bindings fail fast with a ValueError when text is empty - Advice other bindings authors to do likewise in llmodel_c.h
This commit is contained in:
@@ -251,6 +251,8 @@ class LLModel:
|
||||
self,
|
||||
text: str
|
||||
) -> list[float]:
|
||||
if not text:
|
||||
raise ValueError("Text must not be None or empty")
|
||||
embedding_size = ctypes.c_size_t()
|
||||
c_text = ctypes.c_char_p(text.encode('utf-8'))
|
||||
embedding_ptr = llmodel.llmodel_embedding(self.model, c_text, ctypes.byref(embedding_size))
|
||||
|
@@ -3,6 +3,7 @@ from io import StringIO
|
||||
|
||||
from gpt4all import GPT4All, Embed4All
|
||||
import time
|
||||
import pytest
|
||||
|
||||
def test_inference():
|
||||
model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin')
|
||||
@@ -107,3 +108,9 @@ def test_embedding():
|
||||
#for i, value in enumerate(output):
|
||||
#print(f'Value at index {i}: {value}')
|
||||
assert len(output) == 384
|
||||
|
||||
def test_empty_embedding():
|
||||
text = ''
|
||||
embedder = Embed4All()
|
||||
with pytest.raises(ValueError):
|
||||
output = embedder.embed(text)
|
||||
|
Reference in New Issue
Block a user