mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-06 19:10:15 +00:00
implement local Nomic Embed via llama.cpp (#2086)
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
@@ -10,7 +10,7 @@ import sys
|
||||
import threading
|
||||
from enum import Enum
|
||||
from queue import Queue
|
||||
from typing import Callable, Iterable, List
|
||||
from typing import Callable, Iterable, overload
|
||||
|
||||
if sys.version_info >= (3, 9):
|
||||
import importlib.resources as importlib_resources
|
||||
@@ -105,13 +105,18 @@ llmodel.llmodel_prompt.argtypes = [
|
||||
|
||||
llmodel.llmodel_prompt.restype = None
|
||||
|
||||
llmodel.llmodel_embedding.argtypes = [
|
||||
llmodel.llmodel_embed.argtypes = [
|
||||
ctypes.c_void_p,
|
||||
ctypes.c_char_p,
|
||||
ctypes.POINTER(ctypes.c_char_p),
|
||||
ctypes.POINTER(ctypes.c_size_t),
|
||||
ctypes.c_char_p,
|
||||
ctypes.c_int,
|
||||
ctypes.c_bool,
|
||||
ctypes.c_bool,
|
||||
ctypes.POINTER(ctypes.c_char_p),
|
||||
]
|
||||
|
||||
llmodel.llmodel_embedding.restype = ctypes.POINTER(ctypes.c_float)
|
||||
llmodel.llmodel_embed.restype = ctypes.POINTER(ctypes.c_float)
|
||||
|
||||
llmodel.llmodel_free_embedding.argtypes = [ctypes.POINTER(ctypes.c_float)]
|
||||
llmodel.llmodel_free_embedding.restype = None
|
||||
@@ -287,16 +292,50 @@ class LLModel:
|
||||
self.context.repeat_last_n = repeat_last_n
|
||||
self.context.context_erase = context_erase
|
||||
|
||||
def generate_embedding(self, text: str) -> List[float]:
|
||||
if not text:
|
||||
raise ValueError("Text must not be None or empty")
|
||||
@overload
|
||||
def generate_embeddings(
|
||||
self, text: str, prefix: str, dimensionality: int, do_mean: bool, atlas: bool,
|
||||
) -> list[float]: ...
|
||||
@overload
|
||||
def generate_embeddings(
|
||||
self, text: list[str], prefix: str, dimensionality: int, do_mean: bool, atlas: bool,
|
||||
) -> list[list[float]]: ...
|
||||
|
||||
def generate_embeddings(self, text, prefix, dimensionality, do_mean, atlas):
|
||||
if not text:
|
||||
raise ValueError("text must not be None or empty")
|
||||
|
||||
single_text = isinstance(text, str)
|
||||
if single_text:
|
||||
text = [text]
|
||||
|
||||
# prepare input
|
||||
embedding_size = ctypes.c_size_t()
|
||||
c_text = ctypes.c_char_p(text.encode())
|
||||
embedding_ptr = llmodel.llmodel_embedding(self.model, c_text, ctypes.byref(embedding_size))
|
||||
embedding_array = [embedding_ptr[i] for i in range(embedding_size.value)]
|
||||
error = ctypes.c_char_p()
|
||||
c_prefix = ctypes.c_char_p() if prefix is None else prefix.encode()
|
||||
c_texts = (ctypes.c_char_p * (len(text) + 1))()
|
||||
for i, t in enumerate(text):
|
||||
c_texts[i] = t.encode()
|
||||
|
||||
# generate the embeddings
|
||||
embedding_ptr = llmodel.llmodel_embed(
|
||||
self.model, c_texts, ctypes.byref(embedding_size), c_prefix, dimensionality, do_mean, atlas,
|
||||
ctypes.byref(error),
|
||||
)
|
||||
|
||||
if embedding_ptr.value is None:
|
||||
msg = "(unknown error)" if error.value is None else error.value.decode()
|
||||
raise RuntimeError(f'Failed to generate embeddings: {msg}')
|
||||
|
||||
# extract output
|
||||
n_embd = embedding_size.value // len(text)
|
||||
embedding_array = [
|
||||
embedding_ptr[i:i + n_embd]
|
||||
for i in range(0, embedding_size.value, n_embd)
|
||||
]
|
||||
llmodel.llmodel_free_embedding(embedding_ptr)
|
||||
return list(embedding_array)
|
||||
|
||||
return embedding_array[0] if single_text else embedding_array
|
||||
|
||||
def prompt_model(
|
||||
self,
|
||||
|
@@ -10,7 +10,7 @@ import time
|
||||
import warnings
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional, Union
|
||||
from typing import Any, Dict, Iterable, List, Optional, Union, overload
|
||||
|
||||
import requests
|
||||
from requests.exceptions import ChunkedEncodingError
|
||||
@@ -36,6 +36,8 @@ class Embed4All:
|
||||
Python class that handles embeddings for GPT4All.
|
||||
"""
|
||||
|
||||
MIN_DIMENSIONALITY = 64
|
||||
|
||||
def __init__(self, model_name: Optional[str] = None, n_threads: Optional[int] = None, **kwargs):
|
||||
"""
|
||||
Constructor
|
||||
@@ -45,17 +47,48 @@ class Embed4All:
|
||||
"""
|
||||
self.gpt4all = GPT4All(model_name or 'all-MiniLM-L6-v2-f16.gguf', n_threads=n_threads, **kwargs)
|
||||
|
||||
def embed(self, text: str) -> List[float]:
|
||||
@overload
|
||||
def embed(
|
||||
self, text: str, prefix: str | None = ..., dimensionality: int | None = ..., long_text_mode: str = ...,
|
||||
atlas: bool = ...,
|
||||
) -> list[float]: ...
|
||||
@overload
|
||||
def embed(
|
||||
self, text: list[str], prefix: str | None = ..., dimensionality: int | None = ..., long_text_mode: str = ...,
|
||||
atlas: bool = ...,
|
||||
) -> list[list[float]]: ...
|
||||
|
||||
def embed(self, text, prefix=None, dimensionality=None, long_text_mode="truncate", atlas=False):
|
||||
"""
|
||||
Generate an embedding.
|
||||
Generate one or more embeddings.
|
||||
|
||||
Args:
|
||||
text: The text document to generate an embedding for.
|
||||
text: A text or list of texts to generate embeddings for.
|
||||
prefix: The model-specific prefix representing the embedding task, without the trailing colon. For Nomic
|
||||
Embed this can be `search_query`, `search_document`, `classification`, or `clustering`.
|
||||
dimensionality: The embedding dimension, for use with Matryoshka-capable models. Defaults to full-size.
|
||||
long_text_mode: How to handle texts longer than the model can accept. One of `mean` or `truncate`.
|
||||
atlas: Try to be fully compatible with the Atlas API. Currently, this means texts longer than 8192 tokens
|
||||
with long_text_mode="mean" will raise an error. Disabled by default.
|
||||
|
||||
Returns:
|
||||
An embedding of your document of text.
|
||||
An embedding or list of embeddings of your text(s).
|
||||
"""
|
||||
return self.gpt4all.model.generate_embedding(text)
|
||||
if dimensionality is None:
|
||||
dimensionality = -1
|
||||
else:
|
||||
if dimensionality <= 0:
|
||||
raise ValueError(f'Dimensionality must be None or a positive integer, got {dimensionality}')
|
||||
if dimensionality < self.MIN_DIMENSIONALITY:
|
||||
warnings.warn(
|
||||
f'Dimensionality {dimensionality} is less than the suggested minimum of {self.MIN_DIMENSIONALITY}.'
|
||||
' Performance may be degraded.'
|
||||
)
|
||||
try:
|
||||
do_mean = {"mean": True, "truncate": False}[long_text_mode]
|
||||
except KeyError:
|
||||
raise ValueError(f"Long text mode must be one of 'mean' or 'truncate', got {long_text_mode!r}")
|
||||
return self.gpt4all.model.generate_embeddings(text, prefix, dimensionality, do_mean, atlas)
|
||||
|
||||
|
||||
class GPT4All:
|
||||
|
Reference in New Issue
Block a user