Handle edge cases when generating embeddings (#1215)

* Handle edge cases when generating embeddings * Improve Python handling & add llmodel_c.h note - In the Python bindings fail fast with a ValueError when text is empty - Advice other bindings authors to do likewise in llmodel_c.h
2025-09-08 11:58:53 +00:00 · 2023-07-17 22:21:03 +02:00
parent 1e74171a7b
commit 2d02c65177
4 changed files with 16 additions and 1 deletions
--- a/gpt4all-bindings/python/gpt4all/pyllmodel.py
+++ b/gpt4all-bindings/python/gpt4all/pyllmodel.py
@@ -251,6 +251,8 @@ class LLModel:
        self,
        text: str
    ) -> list[float]:
+        if not text:
+            raise ValueError("Text must not be None or empty")
        embedding_size = ctypes.c_size_t()
        c_text = ctypes.c_char_p(text.encode('utf-8'))
        embedding_ptr = llmodel.llmodel_embedding(self.model, c_text, ctypes.byref(embedding_size))
--- a/gpt4all-bindings/python/gpt4all/tests/test_gpt4all.py
+++ b/gpt4all-bindings/python/gpt4all/tests/test_gpt4all.py
@@ -3,6 +3,7 @@ from io import StringIO

 from gpt4all import GPT4All, Embed4All
 import time
+import pytest

 def test_inference():
    model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin')
@@ -107,3 +108,9 @@ def test_embedding():
    #for i, value in enumerate(output):
        #print(f'Value at index {i}: {value}')
    assert len(output) == 384
+
+def test_empty_embedding():
+    text = ''
+    embedder = Embed4All()
+    with pytest.raises(ValueError):
+        output = embedder.embed(text)