implement local Nomic Embed via llama.cpp (#2086)

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Jared Van Bortel
2024-03-13 18:09:24 -04:00
committed by GitHub
parent 171f4e488e
commit 406e88b59a
23 changed files with 799 additions and 1198 deletions

View File

@@ -10,7 +10,7 @@ import sys
import threading
from enum import Enum
from queue import Queue
from typing import Callable, Iterable, List
from typing import Callable, Iterable, overload
if sys.version_info >= (3, 9):
import importlib.resources as importlib_resources
@@ -105,13 +105,18 @@ llmodel.llmodel_prompt.argtypes = [
llmodel.llmodel_prompt.restype = None
llmodel.llmodel_embedding.argtypes = [
llmodel.llmodel_embed.argtypes = [
ctypes.c_void_p,
ctypes.c_char_p,
ctypes.POINTER(ctypes.c_char_p),
ctypes.POINTER(ctypes.c_size_t),
ctypes.c_char_p,
ctypes.c_int,
ctypes.c_bool,
ctypes.c_bool,
ctypes.POINTER(ctypes.c_char_p),
]
llmodel.llmodel_embedding.restype = ctypes.POINTER(ctypes.c_float)
llmodel.llmodel_embed.restype = ctypes.POINTER(ctypes.c_float)
llmodel.llmodel_free_embedding.argtypes = [ctypes.POINTER(ctypes.c_float)]
llmodel.llmodel_free_embedding.restype = None
@@ -287,16 +292,50 @@ class LLModel:
self.context.repeat_last_n = repeat_last_n
self.context.context_erase = context_erase
def generate_embedding(self, text: str) -> List[float]:
if not text:
raise ValueError("Text must not be None or empty")
@overload
def generate_embeddings(
self, text: str, prefix: str, dimensionality: int, do_mean: bool, atlas: bool,
) -> list[float]: ...
@overload
def generate_embeddings(
self, text: list[str], prefix: str, dimensionality: int, do_mean: bool, atlas: bool,
) -> list[list[float]]: ...
def generate_embeddings(self, text, prefix, dimensionality, do_mean, atlas):
if not text:
raise ValueError("text must not be None or empty")
single_text = isinstance(text, str)
if single_text:
text = [text]
# prepare input
embedding_size = ctypes.c_size_t()
c_text = ctypes.c_char_p(text.encode())
embedding_ptr = llmodel.llmodel_embedding(self.model, c_text, ctypes.byref(embedding_size))
embedding_array = [embedding_ptr[i] for i in range(embedding_size.value)]
error = ctypes.c_char_p()
c_prefix = ctypes.c_char_p() if prefix is None else prefix.encode()
c_texts = (ctypes.c_char_p * (len(text) + 1))()
for i, t in enumerate(text):
c_texts[i] = t.encode()
# generate the embeddings
embedding_ptr = llmodel.llmodel_embed(
self.model, c_texts, ctypes.byref(embedding_size), c_prefix, dimensionality, do_mean, atlas,
ctypes.byref(error),
)
if embedding_ptr.value is None:
msg = "(unknown error)" if error.value is None else error.value.decode()
raise RuntimeError(f'Failed to generate embeddings: {msg}')
# extract output
n_embd = embedding_size.value // len(text)
embedding_array = [
embedding_ptr[i:i + n_embd]
for i in range(0, embedding_size.value, n_embd)
]
llmodel.llmodel_free_embedding(embedding_ptr)
return list(embedding_array)
return embedding_array[0] if single_text else embedding_array
def prompt_model(
self,

View File

@@ -10,7 +10,7 @@ import time
import warnings
from contextlib import contextmanager
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Union
from typing import Any, Dict, Iterable, List, Optional, Union, overload
import requests
from requests.exceptions import ChunkedEncodingError
@@ -36,6 +36,8 @@ class Embed4All:
Python class that handles embeddings for GPT4All.
"""
MIN_DIMENSIONALITY = 64
def __init__(self, model_name: Optional[str] = None, n_threads: Optional[int] = None, **kwargs):
"""
Constructor
@@ -45,17 +47,48 @@ class Embed4All:
"""
self.gpt4all = GPT4All(model_name or 'all-MiniLM-L6-v2-f16.gguf', n_threads=n_threads, **kwargs)
def embed(self, text: str) -> List[float]:
@overload
def embed(
self, text: str, prefix: str | None = ..., dimensionality: int | None = ..., long_text_mode: str = ...,
atlas: bool = ...,
) -> list[float]: ...
@overload
def embed(
self, text: list[str], prefix: str | None = ..., dimensionality: int | None = ..., long_text_mode: str = ...,
atlas: bool = ...,
) -> list[list[float]]: ...
def embed(self, text, prefix=None, dimensionality=None, long_text_mode="truncate", atlas=False):
"""
Generate an embedding.
Generate one or more embeddings.
Args:
text: The text document to generate an embedding for.
text: A text or list of texts to generate embeddings for.
prefix: The model-specific prefix representing the embedding task, without the trailing colon. For Nomic
Embed this can be `search_query`, `search_document`, `classification`, or `clustering`.
dimensionality: The embedding dimension, for use with Matryoshka-capable models. Defaults to full-size.
long_text_mode: How to handle texts longer than the model can accept. One of `mean` or `truncate`.
atlas: Try to be fully compatible with the Atlas API. Currently, this means texts longer than 8192 tokens
with long_text_mode="mean" will raise an error. Disabled by default.
Returns:
An embedding of your document of text.
An embedding or list of embeddings of your text(s).
"""
return self.gpt4all.model.generate_embedding(text)
if dimensionality is None:
dimensionality = -1
else:
if dimensionality <= 0:
raise ValueError(f'Dimensionality must be None or a positive integer, got {dimensionality}')
if dimensionality < self.MIN_DIMENSIONALITY:
warnings.warn(
f'Dimensionality {dimensionality} is less than the suggested minimum of {self.MIN_DIMENSIONALITY}.'
' Performance may be degraded.'
)
try:
do_mean = {"mean": True, "truncate": False}[long_text_mode]
except KeyError:
raise ValueError(f"Long text mode must be one of 'mean' or 'truncate', got {long_text_mode!r}")
return self.gpt4all.model.generate_embeddings(text, prefix, dimensionality, do_mean, atlas)
class GPT4All: