python: various fixes for GPT4All and Embed4All (#2130)

Key changes:
* honor empty system prompt argument
* current_chat_session is now read-only and defaults to None
* deprecate fallback prompt template for unknown models
* fix mistakes from #2086

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Jared Van Bortel 2024-03-15 11:49:58 -04:00 committed by GitHub
parent 53f109f519
commit 255568fb9a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 132 additions and 148 deletions

View File

@ -10,6 +10,7 @@
#include <iomanip> #include <iomanip>
#include <iostream> #include <iostream>
#include <map> #include <map>
#include <numeric>
#include <random> #include <random>
#include <sstream> #include <sstream>
#include <stdexcept> #include <stdexcept>
@ -345,7 +346,7 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
d_ptr->ctx_params.n_threads = d_ptr->n_threads; d_ptr->ctx_params.n_threads = d_ptr->n_threads;
d_ptr->ctx_params.n_threads_batch = d_ptr->n_threads; d_ptr->ctx_params.n_threads_batch = d_ptr->n_threads;
if (m_supportsEmbedding) if (isEmbedding)
d_ptr->ctx_params.embeddings = true; d_ptr->ctx_params.embeddings = true;
d_ptr->ctx = llama_new_context_with_model(d_ptr->model, d_ptr->ctx_params); d_ptr->ctx = llama_new_context_with_model(d_ptr->model, d_ptr->ctx_params);
@ -612,22 +613,22 @@ struct EmbModelGroup {
std::vector<const char *> names; std::vector<const char *> names;
}; };
static const EmbModelSpec NOPREFIX_SPEC {nullptr, nullptr}; static const EmbModelSpec NOPREFIX_SPEC {"", ""};
static const EmbModelSpec NOMIC_SPEC {"search_document", "search_query", {"clustering", "classification"}}; static const EmbModelSpec NOMIC_SPEC {"search_document", "search_query", {"clustering", "classification"}};
static const EmbModelSpec E5_SPEC {"passage", "query"}; static const EmbModelSpec E5_SPEC {"passage", "query"};
static const EmbModelSpec NOMIC_1_5_SPEC { static const EmbModelSpec NOMIC_1_5_SPEC {
"search_document", "search_query", {"clustering", "classification"}, true, "[768, 512, 384, 256, 128]" "search_document", "search_query", {"clustering", "classification"}, true, "[768, 512, 384, 256, 128]",
}; };
static const EmbModelSpec LLM_EMBEDDER_SPEC { static const EmbModelSpec LLM_EMBEDDER_SPEC {
"Represent this document for retrieval", "Represent this document for retrieval",
"Represent this query for retrieving relevant documents", "Represent this query for retrieving relevant documents",
}; };
static const EmbModelSpec BGE_SPEC { static const EmbModelSpec BGE_SPEC {
nullptr, "Represent this sentence for searching relevant passages", "", "Represent this sentence for searching relevant passages",
}; };
static const EmbModelSpec E5_MISTRAL_SPEC { static const EmbModelSpec E5_MISTRAL_SPEC {
nullptr, "Instruct: Given a query, retrieve relevant passages that answer the query\nQuery", "", "Instruct: Given a query, retrieve relevant passages that answer the query\nQuery",
}; };
static const EmbModelGroup EMBEDDING_MODEL_SPECS[] { static const EmbModelGroup EMBEDDING_MODEL_SPECS[] {
@ -738,18 +739,20 @@ void LLamaModel::embedInternal(
const llama_token bos_token = llama_token_bos(d_ptr->model); const llama_token bos_token = llama_token_bos(d_ptr->model);
const llama_token eos_token = llama_token_eos(d_ptr->model); const llama_token eos_token = llama_token_eos(d_ptr->model);
assert(shouldAddBOS()); bool useBOS = shouldAddBOS();
bool addEOS = llama_vocab_type(d_ptr->model) == LLAMA_VOCAB_TYPE_WPM; bool useEOS = llama_vocab_type(d_ptr->model) == LLAMA_VOCAB_TYPE_WPM;
// no EOS, optional BOS // no EOS, optional BOS
auto tokenize = [this, addEOS](std::string text, TokenString &tokens, bool addBOS) { auto tokenize = [this, useBOS, useEOS, eos_token](std::string text, TokenString &tokens, bool wantBOS) {
if (!text.empty() && text[0] != ' ') if (!text.empty() && text[0] != ' ') {
text = ' ' + text; // normalize for SPM - our fork of llama.cpp doesn't add a space prefix text = ' ' + text; // normalize for SPM - our fork of llama.cpp doesn't add a space prefix
}
wantBOS &= useBOS;
tokens.resize(text.length()+4); tokens.resize(text.length()+4);
int32_t n_tokens = llama_tokenize(d_ptr->model, text.c_str(), text.length(), tokens.data(), tokens.size(), addBOS, false); int32_t n_tokens = llama_tokenize(d_ptr->model, text.c_str(), text.length(), tokens.data(), tokens.size(), wantBOS, false);
assert(addEOS == (eos_token != -1 && tokens[n_tokens - 1] == eos_token)); assert(useEOS == (eos_token != -1 && tokens[n_tokens - 1] == eos_token));
tokens.resize(n_tokens - addEOS); // erase EOS/SEP tokens.resize(n_tokens - useEOS); // erase EOS/SEP
}; };
// tokenize the texts // tokenize the texts
@ -784,7 +787,7 @@ void LLamaModel::embedInternal(
} }
const uint32_t n_batch = llama_n_batch(d_ptr->ctx); const uint32_t n_batch = llama_n_batch(d_ptr->ctx);
const uint32_t max_len = n_batch - (prefixTokens.size() + addEOS); // minus BOS/CLS and EOS/SEP const uint32_t max_len = n_batch - (prefixTokens.size() + useEOS); // minus BOS/CLS and EOS/SEP
if (chunkOverlap >= max_len) { if (chunkOverlap >= max_len) {
throw std::logic_error("max chunk length of " + std::to_string(max_len) + " is smaller than overlap of " + throw std::logic_error("max chunk length of " + std::to_string(max_len) + " is smaller than overlap of " +
std::to_string(chunkOverlap) + " tokens"); std::to_string(chunkOverlap) + " tokens");

View File

@ -317,10 +317,10 @@ are used instead of model-specific system and prompt templates:
=== "Output" === "Output"
``` ```
default system template: '' default system template: ''
default prompt template: '### Human: \n{0}\n\n### Assistant:\n' default prompt template: '### Human:\n{0}\n\n### Assistant:\n'
session system template: '' session system template: ''
session prompt template: '### Human: \n{0}\n\n### Assistant:\n' session prompt template: '### Human:\n{0}\n\n### Assistant:\n'
``` ```

View File

@ -1,7 +1,6 @@
from __future__ import annotations from __future__ import annotations
import ctypes import ctypes
import logging
import os import os
import platform import platform
import re import re
@ -17,8 +16,6 @@ if sys.version_info >= (3, 9):
else: else:
import importlib_resources import importlib_resources
logger: logging.Logger = logging.getLogger(__name__)
# TODO: provide a config file to make this more robust # TODO: provide a config file to make this more robust
MODEL_LIB_PATH = importlib_resources.files("gpt4all") / "llmodel_DO_NOT_MODIFY" / "build" MODEL_LIB_PATH = importlib_resources.files("gpt4all") / "llmodel_DO_NOT_MODIFY" / "build"
@ -130,7 +127,7 @@ llmodel.llmodel_set_implementation_search_path.restype = None
llmodel.llmodel_threadCount.argtypes = [ctypes.c_void_p] llmodel.llmodel_threadCount.argtypes = [ctypes.c_void_p]
llmodel.llmodel_threadCount.restype = ctypes.c_int32 llmodel.llmodel_threadCount.restype = ctypes.c_int32
llmodel.llmodel_set_implementation_search_path(str(MODEL_LIB_PATH).replace("\\", r"\\").encode()) llmodel.llmodel_set_implementation_search_path(str(MODEL_LIB_PATH).encode())
llmodel.llmodel_available_gpu_devices.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.POINTER(ctypes.c_int32)] llmodel.llmodel_available_gpu_devices.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.POINTER(ctypes.c_int32)]
llmodel.llmodel_available_gpu_devices.restype = ctypes.POINTER(LLModelGPUDevice) llmodel.llmodel_available_gpu_devices.restype = ctypes.POINTER(LLModelGPUDevice)
@ -323,7 +320,7 @@ class LLModel:
ctypes.byref(error), ctypes.byref(error),
) )
if embedding_ptr.value is None: if not embedding_ptr:
msg = "(unknown error)" if error.value is None else error.value.decode() msg = "(unknown error)" if error.value is None else error.value.decode()
raise RuntimeError(f'Failed to generate embeddings: {msg}') raise RuntimeError(f'Failed to generate embeddings: {msg}')
@ -372,13 +369,6 @@ class LLModel:
self.buffer.clear() self.buffer.clear()
self.buff_expecting_cont_bytes = 0 self.buff_expecting_cont_bytes = 0
logger.info(
"LLModel.prompt_model -- prompt:\n"
+ "%s\n"
+ "===/LLModel.prompt_model -- prompt/===",
prompt,
)
self._set_context( self._set_context(
n_predict=n_predict, n_predict=n_predict,
top_k=top_k, top_k=top_k,

View File

@ -20,12 +20,9 @@ from urllib3.exceptions import IncompleteRead, ProtocolError
from . import _pyllmodel from . import _pyllmodel
# TODO: move to config # TODO: move to config
DEFAULT_MODEL_DIRECTORY = os.path.join(str(Path.home()), ".cache", "gpt4all").replace("\\", "\\\\") DEFAULT_MODEL_DIRECTORY = Path.home() / ".cache" / "gpt4all"
DEFAULT_MODEL_CONFIG = { DEFAULT_PROMPT_TEMPLATE = "### Human:\n{0}\n\n### Assistant:\n"
"systemPrompt": "",
"promptTemplate": "### Human: \n{0}\n\n### Assistant:\n",
}
ConfigType = Dict[str, str] ConfigType = Dict[str, str]
MessageType = Dict[str, str] MessageType = Dict[str, str]
@ -34,18 +31,19 @@ MessageType = Dict[str, str]
class Embed4All: class Embed4All:
""" """
Python class that handles embeddings for GPT4All. Python class that handles embeddings for GPT4All.
Args:
model_name: The name of the embedding model to use. Defaults to `all-MiniLM-L6-v2.gguf2.f16.gguf`.
All other arguments are passed to the GPT4All constructor. See its documentation for more info.
""" """
MIN_DIMENSIONALITY = 64 MIN_DIMENSIONALITY = 64
def __init__(self, model_name: Optional[str] = None, n_threads: Optional[int] = None, **kwargs): def __init__(self, model_name: Optional[str] = None, **kwargs):
""" if model_name is None:
Constructor model_name = 'all-MiniLM-L6-v2.gguf2.f16.gguf'
self.gpt4all = GPT4All(model_name, **kwargs)
Args:
n_threads: number of CPU threads used by GPT4All. Default is None, then the number of threads are determined automatically.
"""
self.gpt4all = GPT4All(model_name or 'all-MiniLM-L6-v2-f16.gguf', n_threads=n_threads, **kwargs)
@overload @overload
def embed( def embed(
@ -58,7 +56,7 @@ class Embed4All:
atlas: bool = ..., atlas: bool = ...,
) -> list[list[float]]: ... ) -> list[list[float]]: ...
def embed(self, text, prefix=None, dimensionality=None, long_text_mode="truncate", atlas=False): def embed(self, text, prefix=None, dimensionality=None, long_text_mode="mean", atlas=False):
""" """
Generate one or more embeddings. Generate one or more embeddings.
@ -94,22 +92,6 @@ class Embed4All:
class GPT4All: class GPT4All:
""" """
Python class that handles instantiation, downloading, generation and chat with GPT4All models. Python class that handles instantiation, downloading, generation and chat with GPT4All models.
"""
def __init__(
self,
model_name: str,
model_path: Optional[Union[str, os.PathLike[str]]] = None,
model_type: Optional[str] = None,
allow_download: bool = True,
n_threads: Optional[int] = None,
device: Optional[str] = "cpu",
n_ctx: int = 2048,
ngl: int = 100,
verbose: bool = False,
):
"""
Constructor
Args: Args:
model_name: Name of GPT4All or custom model. Including ".gguf" file extension is optional but encouraged. model_name: Name of GPT4All or custom model. Including ".gguf" file extension is optional but encouraged.
@ -131,6 +113,19 @@ class GPT4All:
ngl: Number of GPU layers to use (Vulkan) ngl: Number of GPU layers to use (Vulkan)
verbose: If True, print debug messages. verbose: If True, print debug messages.
""" """
def __init__(
self,
model_name: str,
model_path: Optional[Union[str, os.PathLike[str]]] = None,
model_type: Optional[str] = None,
allow_download: bool = True,
n_threads: Optional[int] = None,
device: Optional[str] = "cpu",
n_ctx: int = 2048,
ngl: int = 100,
verbose: bool = False,
):
self.model_type = model_type self.model_type = model_type
# Retrieve model and download if allowed # Retrieve model and download if allowed
self.config: ConfigType = self.retrieve_model(model_name, model_path=model_path, allow_download=allow_download, verbose=verbose) self.config: ConfigType = self.retrieve_model(model_name, model_path=model_path, allow_download=allow_download, verbose=verbose)
@ -142,10 +137,13 @@ class GPT4All:
if n_threads is not None: if n_threads is not None:
self.model.set_thread_count(n_threads) self.model.set_thread_count(n_threads)
self._is_chat_session_activated: bool = False self._history: list[MessageType] | None = None
self.current_chat_session: List[MessageType] = empty_chat_session()
self._current_prompt_template: str = "{0}" self._current_prompt_template: str = "{0}"
@property
def current_chat_session(self) -> list[MessageType] | None:
return self._history
@staticmethod @staticmethod
def list_models() -> List[ConfigType]: def list_models() -> List[ConfigType]:
""" """
@ -159,8 +157,9 @@ class GPT4All:
raise ValueError(f'Request failed: HTTP {resp.status_code} {resp.reason}') raise ValueError(f'Request failed: HTTP {resp.status_code} {resp.reason}')
return resp.json() return resp.json()
@staticmethod @classmethod
def retrieve_model( def retrieve_model(
cls,
model_name: str, model_name: str,
model_path: Optional[Union[str, os.PathLike[str]]] = None, model_path: Optional[Union[str, os.PathLike[str]]] = None,
allow_download: bool = True, allow_download: bool = True,
@ -183,58 +182,51 @@ class GPT4All:
model_filename = append_extension_if_missing(model_name) model_filename = append_extension_if_missing(model_name)
# get the config for the model # get the config for the model
config: ConfigType = DEFAULT_MODEL_CONFIG config: ConfigType = {}
if allow_download: if allow_download:
available_models = GPT4All.list_models() available_models = cls.list_models()
for m in available_models: for m in available_models:
if model_filename == m["filename"]: if model_filename == m["filename"]:
config.update(m) tmpl = m.get("promptTemplate", DEFAULT_PROMPT_TEMPLATE)
config["systemPrompt"] = config["systemPrompt"].strip()
# change to Python-style formatting # change to Python-style formatting
config["promptTemplate"] = config["promptTemplate"].replace("%1", "{0}", 1).replace("%2", "{1}", 1) m["promptTemplate"] = tmpl.replace("%1", "{0}", 1).replace("%2", "{1}", 1)
config.update(m)
break break
# Validate download directory # Validate download directory
if model_path is None: if model_path is None:
try: try:
os.makedirs(DEFAULT_MODEL_DIRECTORY, exist_ok=True) os.makedirs(DEFAULT_MODEL_DIRECTORY, exist_ok=True)
except OSError as exc: except OSError as e:
raise ValueError( raise RuntimeError("Failed to create model download directory") from e
f"Failed to create model download directory at {DEFAULT_MODEL_DIRECTORY}: {exc}. "
"Please specify model_path."
)
model_path = DEFAULT_MODEL_DIRECTORY model_path = DEFAULT_MODEL_DIRECTORY
else: else:
model_path = str(model_path).replace("\\", "\\\\") model_path = Path(model_path)
if not os.path.exists(model_path): if not model_path.exists():
raise ValueError(f"Invalid model directory: {model_path}") raise FileNotFoundError(f"Model directory does not exist: {model_path!r}")
model_dest = os.path.join(model_path, model_filename).replace("\\", "\\\\") model_dest = model_path / model_filename
if os.path.exists(model_dest): if model_dest.exists():
config.pop("url", None) config["path"] = str(model_dest)
config["path"] = model_dest
if verbose: if verbose:
print("Found model file at", model_dest, file=sys.stderr) print(f"Found model file at {str(model_dest)!r}", file=sys.stderr)
# If model file does not exist, download
elif allow_download: elif allow_download:
url = config.pop("url", None) # If model file does not exist, download
config["path"] = str(cls.download_model(model_filename, model_path, verbose=verbose, url=config.get("url")))
config["path"] = GPT4All.download_model(model_filename, model_path, verbose=verbose, url=url)
else: else:
raise ValueError("Failed to retrieve model") raise FileNotFoundError(f"Model file does not exist: {model_dest!r}")
return config return config
@staticmethod @staticmethod
def download_model( def download_model(
model_filename: str, model_filename: str,
model_path: Union[str, os.PathLike[str]], model_path: str | os.PathLike[str],
verbose: bool = True, verbose: bool = True,
url: Optional[str] = None, url: Optional[str] = None,
) -> str: ) -> str | os.PathLike[str]:
""" """
Download model from https://gpt4all.io. Download model from https://gpt4all.io.
@ -248,21 +240,17 @@ class GPT4All:
Model file destination. Model file destination.
""" """
def get_download_url(model_filename):
if url:
return url
return f"https://gpt4all.io/models/gguf/{model_filename}"
# Download model # Download model
download_path = os.path.join(model_path, model_filename).replace("\\", "\\\\") download_path = Path(model_path) / model_filename
download_url = get_download_url(model_filename) if url is None:
url = f"https://gpt4all.io/models/gguf/{model_filename}"
def make_request(offset=None): def make_request(offset=None):
headers = {} headers = {}
if offset: if offset:
print(f"\nDownload interrupted, resuming from byte position {offset}", file=sys.stderr) print(f"\nDownload interrupted, resuming from byte position {offset}", file=sys.stderr)
headers['Range'] = f'bytes={offset}-' # resume incomplete response headers['Range'] = f'bytes={offset}-' # resume incomplete response
response = requests.get(download_url, stream=True, headers=headers) response = requests.get(url, stream=True, headers=headers)
if response.status_code not in (200, 206): if response.status_code not in (200, 206):
raise ValueError(f'Request failed: HTTP {response.status_code} {response.reason}') raise ValueError(f'Request failed: HTTP {response.status_code} {response.reason}')
if offset and (response.status_code != 206 or str(offset) not in response.headers.get('Content-Range', '')): if offset and (response.status_code != 206 or str(offset) not in response.headers.get('Content-Range', '')):
@ -311,7 +299,7 @@ class GPT4All:
time.sleep(2) # Sleep for a little bit so Windows can remove file lock time.sleep(2) # Sleep for a little bit so Windows can remove file lock
if verbose: if verbose:
print("Model downloaded at:", download_path, file=sys.stderr) print(f"Model downloaded to {str(download_path)!r}", file=sys.stderr)
return download_path return download_path
def generate( def generate(
@ -350,10 +338,6 @@ class GPT4All:
Either the entire completion or a generator that yields the completion token by token. Either the entire completion or a generator that yields the completion token by token.
""" """
if re.search(r"%1(?![0-9])", self._current_prompt_template):
raise ValueError("Prompt template containing a literal '%1' is not supported. For a prompt "
"placeholder, please use '{0}' instead.")
# Preparing the model request # Preparing the model request
generate_kwargs: Dict[str, Any] = dict( generate_kwargs: Dict[str, Any] = dict(
temp=temp, temp=temp,
@ -366,17 +350,17 @@ class GPT4All:
n_predict=n_predict if n_predict is not None else max_tokens, n_predict=n_predict if n_predict is not None else max_tokens,
) )
if self._is_chat_session_activated: if self._history is not None:
# check if there is only one message, i.e. system prompt: # check if there is only one message, i.e. system prompt:
reset = len(self.current_chat_session) == 1 reset = len(self._history) == 1
generate_kwargs["reset_context"] = reset generate_kwargs["reset_context"] = reset
self.current_chat_session.append({"role": "user", "content": prompt}) self._history.append({"role": "user", "content": prompt})
fct_func = self._format_chat_prompt_template.__func__ # type: ignore[attr-defined] fct_func = self._format_chat_prompt_template.__func__ # type: ignore[attr-defined]
if fct_func is GPT4All._format_chat_prompt_template: if fct_func is GPT4All._format_chat_prompt_template:
if reset: if reset:
# ingest system prompt # ingest system prompt
self.model.prompt_model(self.current_chat_session[0]["content"], "%1", self.model.prompt_model(self._history[0]["content"], "%1",
_pyllmodel.empty_response_callback, _pyllmodel.empty_response_callback,
n_batch=n_batch, n_predict=0, special=True) n_batch=n_batch, n_predict=0, special=True)
prompt_template = self._current_prompt_template.format("%1", "%2") prompt_template = self._current_prompt_template.format("%1", "%2")
@ -387,8 +371,8 @@ class GPT4All:
) )
# special tokens won't be processed # special tokens won't be processed
prompt = self._format_chat_prompt_template( prompt = self._format_chat_prompt_template(
self.current_chat_session[-1:], self._history[-1:],
self.current_chat_session[0]["content"] if reset else "", self._history[0]["content"] if reset else "",
) )
prompt_template = "%1" prompt_template = "%1"
else: else:
@ -399,11 +383,11 @@ class GPT4All:
output_collector: List[MessageType] output_collector: List[MessageType]
output_collector = [ output_collector = [
{"content": ""} {"content": ""}
] # placeholder for the self.current_chat_session if chat session is not activated ] # placeholder for the self._history if chat session is not activated
if self._is_chat_session_activated: if self._history is not None:
self.current_chat_session.append({"role": "assistant", "content": ""}) self._history.append({"role": "assistant", "content": ""})
output_collector = self.current_chat_session output_collector = self._history
def _callback_wrapper( def _callback_wrapper(
callback: _pyllmodel.ResponseCallbackType, callback: _pyllmodel.ResponseCallbackType,
@ -439,8 +423,8 @@ class GPT4All:
@contextmanager @contextmanager
def chat_session( def chat_session(
self, self,
system_prompt: str = "", system_prompt: str | None = None,
prompt_template: str = "", prompt_template: str | None = None,
): ):
""" """
Context manager to hold an inference optimized chat session with a GPT4All model. Context manager to hold an inference optimized chat session with a GPT4All model.
@ -449,16 +433,27 @@ class GPT4All:
system_prompt: An initial instruction for the model. system_prompt: An initial instruction for the model.
prompt_template: Template for the prompts with {0} being replaced by the user message. prompt_template: Template for the prompts with {0} being replaced by the user message.
""" """
# Code to acquire resource, e.g.:
self._is_chat_session_activated = True if system_prompt is None:
self.current_chat_session = empty_chat_session(system_prompt or self.config["systemPrompt"]) system_prompt = self.config.get("systemPrompt", "")
self._current_prompt_template = prompt_template or self.config["promptTemplate"]
if prompt_template is None:
if (tmpl := self.config.get("promptTemplate")) is None:
warnings.warn("Use of a sideloaded model or allow_download=False without specifying a prompt template "
"is deprecated. Defaulting to Alpaca.", DeprecationWarning)
tmpl = DEFAULT_PROMPT_TEMPLATE
prompt_template = tmpl
if re.search(r"%1(?![0-9])", prompt_template):
raise ValueError("Prompt template containing a literal '%1' is not supported. For a prompt "
"placeholder, please use '{0}' instead.")
self._history = [{"role": "system", "content": system_prompt}]
self._current_prompt_template = prompt_template
try: try:
yield self yield self
finally: finally:
# Code to release resource, e.g.: self._history = None
self._is_chat_session_activated = False
self.current_chat_session = empty_chat_session()
self._current_prompt_template = "{0}" self._current_prompt_template = "{0}"
def _format_chat_prompt_template( def _format_chat_prompt_template(
@ -496,10 +491,6 @@ class GPT4All:
return full_prompt return full_prompt
def empty_chat_session(system_prompt: str = "") -> List[MessageType]:
return [{"role": "system", "content": system_prompt}]
def append_extension_if_missing(model_name): def append_extension_if_missing(model_name):
if not model_name.endswith((".bin", ".gguf")): if not model_name.endswith((".bin", ".gguf")):
model_name += ".gguf" model_name += ".gguf"

View File

@ -115,13 +115,13 @@ def test_empty_embedding():
output = embedder.embed(text) output = embedder.embed(text)
def test_download_model(tmp_path: Path): def test_download_model(tmp_path: Path):
import gpt4all.gpt4all from gpt4all import gpt4all
old_default_dir = gpt4all.gpt4all.DEFAULT_MODEL_DIRECTORY old_default_dir = gpt4all.DEFAULT_MODEL_DIRECTORY
gpt4all.gpt4all.DEFAULT_MODEL_DIRECTORY = str(tmp_path) # temporary pytest directory to ensure a download happens gpt4all.DEFAULT_MODEL_DIRECTORY = tmp_path # temporary pytest directory to ensure a download happens
try: try:
model = GPT4All(model_name='ggml-all-MiniLM-L6-v2-f16.bin') model = GPT4All(model_name='ggml-all-MiniLM-L6-v2-f16.bin')
model_path = tmp_path / model.config['filename'] model_path = tmp_path / model.config['filename']
assert model_path.absolute() == Path(model.config['path']).absolute() assert model_path.absolute() == Path(model.config['path']).absolute()
assert model_path.stat().st_size == int(model.config['filesize']) assert model_path.stat().st_size == int(model.config['filesize'])
finally: finally:
gpt4all.gpt4all.DEFAULT_MODEL_DIRECTORY = old_default_dir gpt4all.DEFAULT_MODEL_DIRECTORY = old_default_dir

View File

@ -24,7 +24,7 @@ const DEFAULT_LIBRARIES_DIRECTORY = librarySearchPaths.join(";");
const DEFAULT_MODEL_CONFIG = { const DEFAULT_MODEL_CONFIG = {
systemPrompt: "", systemPrompt: "",
promptTemplate: "### Human: \n%1\n### Assistant:\n", promptTemplate: "### Human:\n%1\n\n### Assistant:\n",
} }
const DEFAULT_MODEL_LIST_URL = "https://gpt4all.io/models/models2.json"; const DEFAULT_MODEL_LIST_URL = "https://gpt4all.io/models/models2.json";

View File

@ -29,7 +29,7 @@
"description": "<strong>Strong overall fast chat model</strong><br><ul><li>Fast responses</li><li>Chat based model</li><li>Trained by Mistral AI<li>Finetuned on OpenOrca dataset curated via <a href=\"https://atlas.nomic.ai/\">Nomic Atlas</a><li>Licensed for commercial use</ul>", "description": "<strong>Strong overall fast chat model</strong><br><ul><li>Fast responses</li><li>Chat based model</li><li>Trained by Mistral AI<li>Finetuned on OpenOrca dataset curated via <a href=\"https://atlas.nomic.ai/\">Nomic Atlas</a><li>Licensed for commercial use</ul>",
"url": "https://gpt4all.io/models/gguf/mistral-7b-openorca.gguf2.Q4_0.gguf", "url": "https://gpt4all.io/models/gguf/mistral-7b-openorca.gguf2.Q4_0.gguf",
"promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>\n", "promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>\n",
"systemPrompt": "<|im_start|>system\nYou are MistralOrca, a large language model trained by Alignment Lab AI. For multi-step problems, write out your reasoning for each step.\n<|im_end|>" "systemPrompt": "<|im_start|>system\nYou are MistralOrca, a large language model trained by Alignment Lab AI. For multi-step problems, write out your reasoning for each step.\n<|im_end|>\n"
}, },
{ {
"order": "c", "order": "c",
@ -42,7 +42,7 @@
"parameters": "7 billion", "parameters": "7 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "Mistral", "type": "Mistral",
"systemPrompt": " ", "systemPrompt": "",
"description": "<strong>Strong overall fast instruction following model</strong><br><ul><li>Fast responses</li><li>Trained by Mistral AI<li>Uncensored</li><li>Licensed for commercial use</li></ul>", "description": "<strong>Strong overall fast instruction following model</strong><br><ul><li>Fast responses</li><li>Trained by Mistral AI<li>Uncensored</li><li>Licensed for commercial use</li></ul>",
"url": "https://gpt4all.io/models/gguf/mistral-7b-instruct-v0.1.Q4_0.gguf", "url": "https://gpt4all.io/models/gguf/mistral-7b-instruct-v0.1.Q4_0.gguf",
"promptTemplate": "[INST] %1 [/INST]" "promptTemplate": "[INST] %1 [/INST]"
@ -58,7 +58,7 @@
"parameters": "7 billion", "parameters": "7 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "Falcon", "type": "Falcon",
"systemPrompt": " ", "systemPrompt": "",
"description": "<strong>Very fast model with good quality</strong><br><ul><li>Fastest responses</li><li>Instruction based</li><li>Trained by TII<li>Finetuned by Nomic AI<li>Licensed for commercial use</ul>", "description": "<strong>Very fast model with good quality</strong><br><ul><li>Fastest responses</li><li>Instruction based</li><li>Trained by TII<li>Finetuned by Nomic AI<li>Licensed for commercial use</ul>",
"url": "https://gpt4all.io/models/gguf/gpt4all-falcon-newbpe-q4_0.gguf", "url": "https://gpt4all.io/models/gguf/gpt4all-falcon-newbpe-q4_0.gguf",
"promptTemplate": "### Instruction:\n%1\n\n### Response:\n" "promptTemplate": "### Instruction:\n%1\n\n### Response:\n"
@ -74,7 +74,7 @@
"parameters": "7 billion", "parameters": "7 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "LLaMA2", "type": "LLaMA2",
"systemPrompt": " ", "systemPrompt": "",
"description": "<ul><li>Instruction based<li>Trained by Microsoft<li>Cannot be used commercially</ul>", "description": "<ul><li>Instruction based<li>Trained by Microsoft<li>Cannot be used commercially</ul>",
"url": "https://gpt4all.io/models/gguf/orca-2-7b.Q4_0.gguf" "url": "https://gpt4all.io/models/gguf/orca-2-7b.Q4_0.gguf"
}, },
@ -89,7 +89,7 @@
"parameters": "13 billion", "parameters": "13 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "LLaMA2", "type": "LLaMA2",
"systemPrompt": " ", "systemPrompt": "",
"description": "<ul><li>Instruction based<li>Trained by Microsoft<li>Cannot be used commercially</ul>", "description": "<ul><li>Instruction based<li>Trained by Microsoft<li>Cannot be used commercially</ul>",
"url": "https://gpt4all.io/models/gguf/orca-2-13b.Q4_0.gguf" "url": "https://gpt4all.io/models/gguf/orca-2-13b.Q4_0.gguf"
}, },
@ -104,7 +104,7 @@
"parameters": "13 billion", "parameters": "13 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "LLaMA2", "type": "LLaMA2",
"systemPrompt": " ", "systemPrompt": "",
"description": "<strong>Strong overall larger model</strong><br><ul><li>Instruction based<li>Gives very long responses<li>Finetuned with only 1k of high-quality data<li>Trained by Microsoft and Peking University<li>Cannot be used commercially</ul>", "description": "<strong>Strong overall larger model</strong><br><ul><li>Instruction based<li>Gives very long responses<li>Finetuned with only 1k of high-quality data<li>Trained by Microsoft and Peking University<li>Cannot be used commercially</ul>",
"url": "https://gpt4all.io/models/gguf/wizardlm-13b-v1.2.Q4_0.gguf" "url": "https://gpt4all.io/models/gguf/wizardlm-13b-v1.2.Q4_0.gguf"
}, },
@ -119,7 +119,7 @@
"parameters": "13 billion", "parameters": "13 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "LLaMA2", "type": "LLaMA2",
"systemPrompt": " ", "systemPrompt": "",
"description": "<strong>Extremely good model</strong><br><ul><li>Instruction based<li>Gives long responses<li>Curated with 300,000 uncensored instructions<li>Trained by Nous Research<li>Cannot be used commercially</ul>", "description": "<strong>Extremely good model</strong><br><ul><li>Instruction based<li>Gives long responses<li>Curated with 300,000 uncensored instructions<li>Trained by Nous Research<li>Cannot be used commercially</ul>",
"url": "https://gpt4all.io/models/gguf/nous-hermes-llama2-13b.Q4_0.gguf", "url": "https://gpt4all.io/models/gguf/nous-hermes-llama2-13b.Q4_0.gguf",
"promptTemplate": "### Instruction:\n%1\n\n### Response:\n" "promptTemplate": "### Instruction:\n%1\n\n### Response:\n"
@ -135,7 +135,7 @@
"parameters": "13 billion", "parameters": "13 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "LLaMA", "type": "LLaMA",
"systemPrompt": " ", "systemPrompt": "",
"description": "<strong>Very good overall model</strong><br><ul><li>Instruction based<li>Based on the same dataset as Groovy<li>Slower than Groovy, with higher quality responses<li>Trained by Nomic AI<li>Cannot be used commercially</ul>", "description": "<strong>Very good overall model</strong><br><ul><li>Instruction based<li>Based on the same dataset as Groovy<li>Slower than Groovy, with higher quality responses<li>Trained by Nomic AI<li>Cannot be used commercially</ul>",
"url": "https://gpt4all.io/models/gguf/gpt4all-13b-snoozy-q4_0.gguf" "url": "https://gpt4all.io/models/gguf/gpt4all-13b-snoozy-q4_0.gguf"
}, },
@ -154,7 +154,7 @@
"description": "<strong>Good model with novel architecture</strong><br><ul><li>Fast responses<li>Chat based<li>Trained by Mosaic ML<li>Cannot be used commercially</ul>", "description": "<strong>Good model with novel architecture</strong><br><ul><li>Fast responses<li>Chat based<li>Trained by Mosaic ML<li>Cannot be used commercially</ul>",
"url": "https://gpt4all.io/models/gguf/mpt-7b-chat-newbpe-q4_0.gguf", "url": "https://gpt4all.io/models/gguf/mpt-7b-chat-newbpe-q4_0.gguf",
"promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>\n", "promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>\n",
"systemPrompt": "<|im_start|>system\n- You are a helpful assistant chatbot trained by MosaicML.\n- You answer questions.\n- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>" "systemPrompt": "<|im_start|>system\n- You are a helpful assistant chatbot trained by MosaicML.\n- You answer questions.\n- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>\n"
}, },
{ {
"order": "j", "order": "j",
@ -170,7 +170,7 @@
"description": "<strong>Good model with novel architecture</strong><br><ul><li>Fast responses<li>Chat based<li>Trained by Mosaic ML<li>Cannot be used commercially</ul>", "description": "<strong>Good model with novel architecture</strong><br><ul><li>Fast responses<li>Chat based<li>Trained by Mosaic ML<li>Cannot be used commercially</ul>",
"url": "https://gpt4all.io/models/gguf/mpt-7b-chat.gguf4.Q4_0.gguf", "url": "https://gpt4all.io/models/gguf/mpt-7b-chat.gguf4.Q4_0.gguf",
"promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>\n", "promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>\n",
"systemPrompt": "<|im_start|>system\n- You are a helpful assistant chatbot trained by MosaicML.\n- You answer questions.\n- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>" "systemPrompt": "<|im_start|>system\n- You are a helpful assistant chatbot trained by MosaicML.\n- You answer questions.\n- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>\n"
}, },
{ {
"order": "k", "order": "k",
@ -200,7 +200,7 @@
"parameters": "3 billion", "parameters": "3 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "Replit", "type": "Replit",
"systemPrompt": " ", "systemPrompt": "",
"promptTemplate": "%1", "promptTemplate": "%1",
"description": "<strong>Trained on subset of the Stack</strong><br><ul><li>Code completion based<li>Licensed for commercial use<li>WARNING: Not available for chat GUI</ul>", "description": "<strong>Trained on subset of the Stack</strong><br><ul><li>Code completion based<li>Licensed for commercial use<li>WARNING: Not available for chat GUI</ul>",
"url": "https://gpt4all.io/models/gguf/replit-code-v1_5-3b-newbpe-q4_0.gguf" "url": "https://gpt4all.io/models/gguf/replit-code-v1_5-3b-newbpe-q4_0.gguf"
@ -217,7 +217,7 @@
"parameters": "7 billion", "parameters": "7 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "Starcoder", "type": "Starcoder",
"systemPrompt": " ", "systemPrompt": "",
"promptTemplate": "%1", "promptTemplate": "%1",
"description": "<strong>Trained on subset of the Stack</strong><br><ul><li>Code completion based<li>WARNING: Not available for chat GUI</ul>", "description": "<strong>Trained on subset of the Stack</strong><br><ul><li>Code completion based<li>WARNING: Not available for chat GUI</ul>",
"url": "https://gpt4all.io/models/gguf/starcoder-newbpe-q4_0.gguf" "url": "https://gpt4all.io/models/gguf/starcoder-newbpe-q4_0.gguf"
@ -234,7 +234,7 @@
"parameters": "7 billion", "parameters": "7 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "LLaMA", "type": "LLaMA",
"systemPrompt": " ", "systemPrompt": "",
"promptTemplate": "%1", "promptTemplate": "%1",
"description": "<strong>Trained on collection of Python and TypeScript</strong><br><ul><li>Code completion based<li>WARNING: Not available for chat GUI</li>", "description": "<strong>Trained on collection of Python and TypeScript</strong><br><ul><li>Code completion based<li>WARNING: Not available for chat GUI</li>",
"url": "https://gpt4all.io/models/gguf/rift-coder-v0-7b-q4_0.gguf" "url": "https://gpt4all.io/models/gguf/rift-coder-v0-7b-q4_0.gguf"
@ -253,7 +253,7 @@
"quant": "f16", "quant": "f16",
"type": "Bert", "type": "Bert",
"embeddingModel": true, "embeddingModel": true,
"systemPrompt": " ", "systemPrompt": "",
"description": "<strong>LocalDocs text embeddings model</strong><br><ul><li>For use with LocalDocs feature<li>Used for retrieval augmented generation (RAG)", "description": "<strong>LocalDocs text embeddings model</strong><br><ul><li>For use with LocalDocs feature<li>Used for retrieval augmented generation (RAG)",
"url": "https://gpt4all.io/models/gguf/all-MiniLM-L6-v2-f16.gguf" "url": "https://gpt4all.io/models/gguf/all-MiniLM-L6-v2-f16.gguf"
}, },