Fixup bert python bindings.

2025-08-15 14:43:19 +00:00 · 2023-07-13 17:57:48 -04:00 · 2023-07-13 17:57:48 -04:00 · ee4186d579
commit ee4186d579
parent 6200900677
5 changed files with 37 additions and 23 deletions
--- a/gpt4all-backend/bert.cpp
+++ b/gpt4all-backend/bert.cpp
@ -14,6 +14,7 @@
 #include <regex>
 #include <thread>
 #include <algorithm>
+#include <numeric>

 //#define DEBUG_BERT

@ -462,11 +463,6 @@ void bert_eval(
    ggml_set_f32(sum, 1.0f / N);
    inpL = ggml_mul_mat(ctx0, inpL, sum);

-    // normalizer
-    ggml_tensor *length = ggml_sqrt(ctx0,
-                                    ggml_sum(ctx0, ggml_sqr(ctx0, inpL)));
-    inpL = ggml_scale(ctx0, inpL, ggml_div(ctx0, ggml_new_f32(ctx0, 1.0f), length));
-
    ggml_tensor *output = inpL;
    // run the computation
    ggml_build_forward_expand(&gf, output);
@ -987,6 +983,9 @@ std::vector<float> Bert::embedding(const std::string &text)
    }

    std::transform(embeddingsSum.begin(), embeddingsSum.end(), embeddingsSum.begin(), [embeddingsSumTotal](float num){ return num / embeddingsSumTotal; });
+    double magnitude = std::sqrt(std::inner_product(embeddingsSum.begin(), embeddingsSum.end(), embeddingsSum.begin(), 0.0));
+    for (auto &value : embeddingsSum)
+        value /= magnitude;
    std::vector<float> finalEmbeddings(embeddingsSum.begin(), embeddingsSum.end());
    return finalEmbeddings;
 }
--- a/gpt4all-bindings/python/gpt4all/init.py
+++ b/gpt4all-bindings/python/gpt4all/init.py
@ -1,2 +1,2 @@
-from .gpt4all import GPT4All, embed  # noqa
+from .gpt4all import GPT4All, Embedder  # noqa
 from .pyllmodel import LLModel  # noqa
--- a/gpt4all-bindings/python/gpt4all/gpt4all.py
+++ b/gpt4all-bindings/python/gpt4all/gpt4all.py
@ -15,20 +15,26 @@ from . import pyllmodel
 # TODO: move to config
 DEFAULT_MODEL_DIRECTORY = os.path.join(str(Path.home()), ".cache", "gpt4all").replace("\\", "\\\\")

-def embed(
-    text: str
-) -> list[float]:
-    """
-    Generate an embedding for all GPT4All.
+class Embedder:
+    def __init__(
+        self
+    ):
+        self.gpt4all = GPT4All(model_name='ggml-all-MiniLM-L6-v2-f16.bin', n_threads=8)

-    Args:
-        text: The text document to generate an embedding for.
+    def embed(
+        self,
+        text: str
+    ) -> list[float]:
+        """
+        Generate an embedding for all GPT4All.

-    Returns:
-        An embedding of your document of text.
-    """
-    model = GPT4All(model_name='ggml-all-MiniLM-L6-v2-f16.bin')
-    return model.model.generate_embedding(text)
+        Args:
+            text: The text document to generate an embedding for.
+
+        Returns:
+            An embedding of your document of text.
+        """
+        return self.gpt4all.model.generate_embedding(text)

 class GPT4All:
    """
--- a/gpt4all-bindings/python/gpt4all/pyllmodel.py
+++ b/gpt4all-bindings/python/gpt4all/pyllmodel.py
@ -253,7 +253,7 @@ class LLModel:
        embedding_size = ctypes.c_size_t()
        c_text = ctypes.c_char_p(text.encode('utf-8'))
        embedding_ptr = llmodel.llmodel_embedding(self.model, c_text, ctypes.byref(embedding_size))
-        embedding_array = ctypes.cast(embedding_ptr, ctypes.POINTER(ctypes.c_float * embedding_size.value)).contents
+        embedding_array = [embedding_ptr[i] for i in range(embedding_size.value)]
        llmodel.llmodel_free_embedding(embedding_ptr)
        return list(embedding_array)

--- a/gpt4all-bindings/python/gpt4all/tests/test_gpt4all.py
+++ b/gpt4all-bindings/python/gpt4all/tests/test_gpt4all.py