python: various fixes for GPT4All and Embed4All (#2130)

Key changes:
* honor empty system prompt argument
* current_chat_session is now read-only and defaults to None
* deprecate fallback prompt template for unknown models
* fix mistakes from #2086

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Jared Van Bortel 2024-03-15 11:49:58 -04:00 committed by GitHub
parent 53f109f519
commit 255568fb9a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 132 additions and 148 deletions

View File

@ -10,6 +10,7 @@
#include <iomanip> #include <iomanip>
#include <iostream> #include <iostream>
#include <map> #include <map>
#include <numeric>
#include <random> #include <random>
#include <sstream> #include <sstream>
#include <stdexcept> #include <stdexcept>
@ -345,7 +346,7 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
d_ptr->ctx_params.n_threads = d_ptr->n_threads; d_ptr->ctx_params.n_threads = d_ptr->n_threads;
d_ptr->ctx_params.n_threads_batch = d_ptr->n_threads; d_ptr->ctx_params.n_threads_batch = d_ptr->n_threads;
if (m_supportsEmbedding) if (isEmbedding)
d_ptr->ctx_params.embeddings = true; d_ptr->ctx_params.embeddings = true;
d_ptr->ctx = llama_new_context_with_model(d_ptr->model, d_ptr->ctx_params); d_ptr->ctx = llama_new_context_with_model(d_ptr->model, d_ptr->ctx_params);
@ -612,22 +613,22 @@ struct EmbModelGroup {
std::vector<const char *> names; std::vector<const char *> names;
}; };
static const EmbModelSpec NOPREFIX_SPEC {nullptr, nullptr}; static const EmbModelSpec NOPREFIX_SPEC {"", ""};
static const EmbModelSpec NOMIC_SPEC {"search_document", "search_query", {"clustering", "classification"}}; static const EmbModelSpec NOMIC_SPEC {"search_document", "search_query", {"clustering", "classification"}};
static const EmbModelSpec E5_SPEC {"passage", "query"}; static const EmbModelSpec E5_SPEC {"passage", "query"};
static const EmbModelSpec NOMIC_1_5_SPEC { static const EmbModelSpec NOMIC_1_5_SPEC {
"search_document", "search_query", {"clustering", "classification"}, true, "[768, 512, 384, 256, 128]" "search_document", "search_query", {"clustering", "classification"}, true, "[768, 512, 384, 256, 128]",
}; };
static const EmbModelSpec LLM_EMBEDDER_SPEC { static const EmbModelSpec LLM_EMBEDDER_SPEC {
"Represent this document for retrieval", "Represent this document for retrieval",
"Represent this query for retrieving relevant documents", "Represent this query for retrieving relevant documents",
}; };
static const EmbModelSpec BGE_SPEC { static const EmbModelSpec BGE_SPEC {
nullptr, "Represent this sentence for searching relevant passages", "", "Represent this sentence for searching relevant passages",
}; };
static const EmbModelSpec E5_MISTRAL_SPEC { static const EmbModelSpec E5_MISTRAL_SPEC {
nullptr, "Instruct: Given a query, retrieve relevant passages that answer the query\nQuery", "", "Instruct: Given a query, retrieve relevant passages that answer the query\nQuery",
}; };
static const EmbModelGroup EMBEDDING_MODEL_SPECS[] { static const EmbModelGroup EMBEDDING_MODEL_SPECS[] {
@ -738,18 +739,20 @@ void LLamaModel::embedInternal(
const llama_token bos_token = llama_token_bos(d_ptr->model); const llama_token bos_token = llama_token_bos(d_ptr->model);
const llama_token eos_token = llama_token_eos(d_ptr->model); const llama_token eos_token = llama_token_eos(d_ptr->model);
assert(shouldAddBOS()); bool useBOS = shouldAddBOS();
bool addEOS = llama_vocab_type(d_ptr->model) == LLAMA_VOCAB_TYPE_WPM; bool useEOS = llama_vocab_type(d_ptr->model) == LLAMA_VOCAB_TYPE_WPM;
// no EOS, optional BOS // no EOS, optional BOS
auto tokenize = [this, addEOS](std::string text, TokenString &tokens, bool addBOS) { auto tokenize = [this, useBOS, useEOS, eos_token](std::string text, TokenString &tokens, bool wantBOS) {
if (!text.empty() && text[0] != ' ') if (!text.empty() && text[0] != ' ') {
text = ' ' + text; // normalize for SPM - our fork of llama.cpp doesn't add a space prefix text = ' ' + text; // normalize for SPM - our fork of llama.cpp doesn't add a space prefix
}
wantBOS &= useBOS;
tokens.resize(text.length()+4); tokens.resize(text.length()+4);
int32_t n_tokens = llama_tokenize(d_ptr->model, text.c_str(), text.length(), tokens.data(), tokens.size(), addBOS, false); int32_t n_tokens = llama_tokenize(d_ptr->model, text.c_str(), text.length(), tokens.data(), tokens.size(), wantBOS, false);
assert(addEOS == (eos_token != -1 && tokens[n_tokens - 1] == eos_token)); assert(useEOS == (eos_token != -1 && tokens[n_tokens - 1] == eos_token));
tokens.resize(n_tokens - addEOS); // erase EOS/SEP tokens.resize(n_tokens - useEOS); // erase EOS/SEP
}; };
// tokenize the texts // tokenize the texts
@ -784,7 +787,7 @@ void LLamaModel::embedInternal(
} }
const uint32_t n_batch = llama_n_batch(d_ptr->ctx); const uint32_t n_batch = llama_n_batch(d_ptr->ctx);
const uint32_t max_len = n_batch - (prefixTokens.size() + addEOS); // minus BOS/CLS and EOS/SEP const uint32_t max_len = n_batch - (prefixTokens.size() + useEOS); // minus BOS/CLS and EOS/SEP
if (chunkOverlap >= max_len) { if (chunkOverlap >= max_len) {
throw std::logic_error("max chunk length of " + std::to_string(max_len) + " is smaller than overlap of " + throw std::logic_error("max chunk length of " + std::to_string(max_len) + " is smaller than overlap of " +
std::to_string(chunkOverlap) + " tokens"); std::to_string(chunkOverlap) + " tokens");

View File

@ -317,10 +317,10 @@ are used instead of model-specific system and prompt templates:
=== "Output" === "Output"
``` ```
default system template: '' default system template: ''
default prompt template: '### Human: \n{0}\n\n### Assistant:\n' default prompt template: '### Human:\n{0}\n\n### Assistant:\n'
session system template: '' session system template: ''
session prompt template: '### Human: \n{0}\n\n### Assistant:\n' session prompt template: '### Human:\n{0}\n\n### Assistant:\n'
``` ```

View File

@ -1,7 +1,6 @@
from __future__ import annotations from __future__ import annotations
import ctypes import ctypes
import logging
import os import os
import platform import platform
import re import re
@ -17,8 +16,6 @@ if sys.version_info >= (3, 9):
else: else:
import importlib_resources import importlib_resources
logger: logging.Logger = logging.getLogger(__name__)
# TODO: provide a config file to make this more robust # TODO: provide a config file to make this more robust
MODEL_LIB_PATH = importlib_resources.files("gpt4all") / "llmodel_DO_NOT_MODIFY" / "build" MODEL_LIB_PATH = importlib_resources.files("gpt4all") / "llmodel_DO_NOT_MODIFY" / "build"
@ -130,7 +127,7 @@ llmodel.llmodel_set_implementation_search_path.restype = None
llmodel.llmodel_threadCount.argtypes = [ctypes.c_void_p] llmodel.llmodel_threadCount.argtypes = [ctypes.c_void_p]
llmodel.llmodel_threadCount.restype = ctypes.c_int32 llmodel.llmodel_threadCount.restype = ctypes.c_int32
llmodel.llmodel_set_implementation_search_path(str(MODEL_LIB_PATH).replace("\\", r"\\").encode()) llmodel.llmodel_set_implementation_search_path(str(MODEL_LIB_PATH).encode())
llmodel.llmodel_available_gpu_devices.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.POINTER(ctypes.c_int32)] llmodel.llmodel_available_gpu_devices.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.POINTER(ctypes.c_int32)]
llmodel.llmodel_available_gpu_devices.restype = ctypes.POINTER(LLModelGPUDevice) llmodel.llmodel_available_gpu_devices.restype = ctypes.POINTER(LLModelGPUDevice)
@ -323,7 +320,7 @@ class LLModel:
ctypes.byref(error), ctypes.byref(error),
) )
if embedding_ptr.value is None: if not embedding_ptr:
msg = "(unknown error)" if error.value is None else error.value.decode() msg = "(unknown error)" if error.value is None else error.value.decode()
raise RuntimeError(f'Failed to generate embeddings: {msg}') raise RuntimeError(f'Failed to generate embeddings: {msg}')
@ -372,13 +369,6 @@ class LLModel:
self.buffer.clear() self.buffer.clear()
self.buff_expecting_cont_bytes = 0 self.buff_expecting_cont_bytes = 0
logger.info(
"LLModel.prompt_model -- prompt:\n"
+ "%s\n"
+ "===/LLModel.prompt_model -- prompt/===",
prompt,
)
self._set_context( self._set_context(
n_predict=n_predict, n_predict=n_predict,
top_k=top_k, top_k=top_k,

View File

@ -20,12 +20,9 @@ from urllib3.exceptions import IncompleteRead, ProtocolError
from . import _pyllmodel from . import _pyllmodel
# TODO: move to config # TODO: move to config
DEFAULT_MODEL_DIRECTORY = os.path.join(str(Path.home()), ".cache", "gpt4all").replace("\\", "\\\\") DEFAULT_MODEL_DIRECTORY = Path.home() / ".cache" / "gpt4all"
DEFAULT_MODEL_CONFIG = { DEFAULT_PROMPT_TEMPLATE = "### Human:\n{0}\n\n### Assistant:\n"
"systemPrompt": "",
"promptTemplate": "### Human: \n{0}\n\n### Assistant:\n",
}
ConfigType = Dict[str, str] ConfigType = Dict[str, str]
MessageType = Dict[str, str] MessageType = Dict[str, str]
@ -34,18 +31,19 @@ MessageType = Dict[str, str]
class Embed4All: class Embed4All:
""" """
Python class that handles embeddings for GPT4All. Python class that handles embeddings for GPT4All.
Args:
model_name: The name of the embedding model to use. Defaults to `all-MiniLM-L6-v2.gguf2.f16.gguf`.
All other arguments are passed to the GPT4All constructor. See its documentation for more info.
""" """
MIN_DIMENSIONALITY = 64 MIN_DIMENSIONALITY = 64
def __init__(self, model_name: Optional[str] = None, n_threads: Optional[int] = None, **kwargs): def __init__(self, model_name: Optional[str] = None, **kwargs):
""" if model_name is None:
Constructor model_name = 'all-MiniLM-L6-v2.gguf2.f16.gguf'
self.gpt4all = GPT4All(model_name, **kwargs)
Args:
n_threads: number of CPU threads used by GPT4All. Default is None, then the number of threads are determined automatically.
"""
self.gpt4all = GPT4All(model_name or 'all-MiniLM-L6-v2-f16.gguf', n_threads=n_threads, **kwargs)
@overload @overload
def embed( def embed(
@ -58,7 +56,7 @@ class Embed4All:
atlas: bool = ..., atlas: bool = ...,
) -> list[list[float]]: ... ) -> list[list[float]]: ...
def embed(self, text, prefix=None, dimensionality=None, long_text_mode="truncate", atlas=False): def embed(self, text, prefix=None, dimensionality=None, long_text_mode="mean", atlas=False):
""" """
Generate one or more embeddings. Generate one or more embeddings.
@ -94,6 +92,26 @@ class Embed4All:
class GPT4All: class GPT4All:
""" """
Python class that handles instantiation, downloading, generation and chat with GPT4All models. Python class that handles instantiation, downloading, generation and chat with GPT4All models.
Args:
model_name: Name of GPT4All or custom model. Including ".gguf" file extension is optional but encouraged.
model_path: Path to directory containing model file or, if file does not exist, where to download model.
Default is None, in which case models will be stored in `~/.cache/gpt4all/`.
model_type: Model architecture. This argument currently does not have any functionality and is just used as
descriptive identifier for user. Default is None.
allow_download: Allow API to download models from gpt4all.io. Default is True.
n_threads: number of CPU threads used by GPT4All. Default is None, then the number of threads are determined automatically.
device: The processing unit on which the GPT4All model will run. It can be set to:
- "cpu": Model will run on the central processing unit.
- "gpu": Model will run on the best available graphics processing unit, irrespective of its vendor.
- "amd", "nvidia", "intel": Model will run on the best available GPU from the specified vendor.
Alternatively, a specific GPU name can also be provided, and the model will run on the GPU that matches the name if it's available.
Default is "cpu".
Note: If a selected GPU device does not have sufficient RAM to accommodate the model, an error will be thrown, and the GPT4All instance will be rendered invalid. It's advised to ensure the device has enough memory before initiating the model.
n_ctx: Maximum size of context window
ngl: Number of GPU layers to use (Vulkan)
verbose: If True, print debug messages.
""" """
def __init__( def __init__(
@ -108,29 +126,6 @@ class GPT4All:
ngl: int = 100, ngl: int = 100,
verbose: bool = False, verbose: bool = False,
): ):
"""
Constructor
Args:
model_name: Name of GPT4All or custom model. Including ".gguf" file extension is optional but encouraged.
model_path: Path to directory containing model file or, if file does not exist, where to download model.
Default is None, in which case models will be stored in `~/.cache/gpt4all/`.
model_type: Model architecture. This argument currently does not have any functionality and is just used as
descriptive identifier for user. Default is None.
allow_download: Allow API to download models from gpt4all.io. Default is True.
n_threads: number of CPU threads used by GPT4All. Default is None, then the number of threads are determined automatically.
device: The processing unit on which the GPT4All model will run. It can be set to:
- "cpu": Model will run on the central processing unit.
- "gpu": Model will run on the best available graphics processing unit, irrespective of its vendor.
- "amd", "nvidia", "intel": Model will run on the best available GPU from the specified vendor.
Alternatively, a specific GPU name can also be provided, and the model will run on the GPU that matches the name if it's available.
Default is "cpu".
Note: If a selected GPU device does not have sufficient RAM to accommodate the model, an error will be thrown, and the GPT4All instance will be rendered invalid. It's advised to ensure the device has enough memory before initiating the model.
n_ctx: Maximum size of context window
ngl: Number of GPU layers to use (Vulkan)
verbose: If True, print debug messages.
"""
self.model_type = model_type self.model_type = model_type
# Retrieve model and download if allowed # Retrieve model and download if allowed
self.config: ConfigType = self.retrieve_model(model_name, model_path=model_path, allow_download=allow_download, verbose=verbose) self.config: ConfigType = self.retrieve_model(model_name, model_path=model_path, allow_download=allow_download, verbose=verbose)
@ -142,10 +137,13 @@ class GPT4All:
if n_threads is not None: if n_threads is not None:
self.model.set_thread_count(n_threads) self.model.set_thread_count(n_threads)
self._is_chat_session_activated: bool = False self._history: list[MessageType] | None = None
self.current_chat_session: List[MessageType] = empty_chat_session()
self._current_prompt_template: str = "{0}" self._current_prompt_template: str = "{0}"
@property
def current_chat_session(self) -> list[MessageType] | None:
return self._history
@staticmethod @staticmethod
def list_models() -> List[ConfigType]: def list_models() -> List[ConfigType]:
""" """
@ -159,8 +157,9 @@ class GPT4All:
raise ValueError(f'Request failed: HTTP {resp.status_code} {resp.reason}') raise ValueError(f'Request failed: HTTP {resp.status_code} {resp.reason}')
return resp.json() return resp.json()
@staticmethod @classmethod
def retrieve_model( def retrieve_model(
cls,
model_name: str, model_name: str,
model_path: Optional[Union[str, os.PathLike[str]]] = None, model_path: Optional[Union[str, os.PathLike[str]]] = None,
allow_download: bool = True, allow_download: bool = True,
@ -183,58 +182,51 @@ class GPT4All:
model_filename = append_extension_if_missing(model_name) model_filename = append_extension_if_missing(model_name)
# get the config for the model # get the config for the model
config: ConfigType = DEFAULT_MODEL_CONFIG config: ConfigType = {}
if allow_download: if allow_download:
available_models = GPT4All.list_models() available_models = cls.list_models()
for m in available_models: for m in available_models:
if model_filename == m["filename"]: if model_filename == m["filename"]:
config.update(m) tmpl = m.get("promptTemplate", DEFAULT_PROMPT_TEMPLATE)
config["systemPrompt"] = config["systemPrompt"].strip()
# change to Python-style formatting # change to Python-style formatting
config["promptTemplate"] = config["promptTemplate"].replace("%1", "{0}", 1).replace("%2", "{1}", 1) m["promptTemplate"] = tmpl.replace("%1", "{0}", 1).replace("%2", "{1}", 1)
config.update(m)
break break
# Validate download directory # Validate download directory
if model_path is None: if model_path is None:
try: try:
os.makedirs(DEFAULT_MODEL_DIRECTORY, exist_ok=True) os.makedirs(DEFAULT_MODEL_DIRECTORY, exist_ok=True)
except OSError as exc: except OSError as e:
raise ValueError( raise RuntimeError("Failed to create model download directory") from e
f"Failed to create model download directory at {DEFAULT_MODEL_DIRECTORY}: {exc}. "
"Please specify model_path."
)
model_path = DEFAULT_MODEL_DIRECTORY model_path = DEFAULT_MODEL_DIRECTORY
else: else:
model_path = str(model_path).replace("\\", "\\\\") model_path = Path(model_path)
if not os.path.exists(model_path): if not model_path.exists():
raise ValueError(f"Invalid model directory: {model_path}") raise FileNotFoundError(f"Model directory does not exist: {model_path!r}")
model_dest = os.path.join(model_path, model_filename).replace("\\", "\\\\") model_dest = model_path / model_filename
if os.path.exists(model_dest): if model_dest.exists():
config.pop("url", None) config["path"] = str(model_dest)
config["path"] = model_dest
if verbose: if verbose:
print("Found model file at", model_dest, file=sys.stderr) print(f"Found model file at {str(model_dest)!r}", file=sys.stderr)
# If model file does not exist, download
elif allow_download: elif allow_download:
url = config.pop("url", None) # If model file does not exist, download
config["path"] = str(cls.download_model(model_filename, model_path, verbose=verbose, url=config.get("url")))
config["path"] = GPT4All.download_model(model_filename, model_path, verbose=verbose, url=url)
else: else:
raise ValueError("Failed to retrieve model") raise FileNotFoundError(f"Model file does not exist: {model_dest!r}")
return config return config
@staticmethod @staticmethod
def download_model( def download_model(
model_filename: str, model_filename: str,
model_path: Union[str, os.PathLike[str]], model_path: str | os.PathLike[str],
verbose: bool = True, verbose: bool = True,
url: Optional[str] = None, url: Optional[str] = None,
) -> str: ) -> str | os.PathLike[str]:
""" """
Download model from https://gpt4all.io. Download model from https://gpt4all.io.
@ -248,21 +240,17 @@ class GPT4All:
Model file destination. Model file destination.
""" """
def get_download_url(model_filename):
if url:
return url
return f"https://gpt4all.io/models/gguf/{model_filename}"
# Download model # Download model
download_path = os.path.join(model_path, model_filename).replace("\\", "\\\\") download_path = Path(model_path) / model_filename
download_url = get_download_url(model_filename) if url is None:
url = f"https://gpt4all.io/models/gguf/{model_filename}"
def make_request(offset=None): def make_request(offset=None):
headers = {} headers = {}
if offset: if offset:
print(f"\nDownload interrupted, resuming from byte position {offset}", file=sys.stderr) print(f"\nDownload interrupted, resuming from byte position {offset}", file=sys.stderr)
headers['Range'] = f'bytes={offset}-' # resume incomplete response headers['Range'] = f'bytes={offset}-' # resume incomplete response
response = requests.get(download_url, stream=True, headers=headers) response = requests.get(url, stream=True, headers=headers)
if response.status_code not in (200, 206): if response.status_code not in (200, 206):
raise ValueError(f'Request failed: HTTP {response.status_code} {response.reason}') raise ValueError(f'Request failed: HTTP {response.status_code} {response.reason}')
if offset and (response.status_code != 206 or str(offset) not in response.headers.get('Content-Range', '')): if offset and (response.status_code != 206 or str(offset) not in response.headers.get('Content-Range', '')):
@ -311,7 +299,7 @@ class GPT4All:
time.sleep(2) # Sleep for a little bit so Windows can remove file lock time.sleep(2) # Sleep for a little bit so Windows can remove file lock
if verbose: if verbose:
print("Model downloaded at:", download_path, file=sys.stderr) print(f"Model downloaded to {str(download_path)!r}", file=sys.stderr)
return download_path return download_path
def generate( def generate(
@ -350,10 +338,6 @@ class GPT4All:
Either the entire completion or a generator that yields the completion token by token. Either the entire completion or a generator that yields the completion token by token.
""" """
if re.search(r"%1(?![0-9])", self._current_prompt_template):
raise ValueError("Prompt template containing a literal '%1' is not supported. For a prompt "
"placeholder, please use '{0}' instead.")
# Preparing the model request # Preparing the model request
generate_kwargs: Dict[str, Any] = dict( generate_kwargs: Dict[str, Any] = dict(
temp=temp, temp=temp,
@ -366,17 +350,17 @@ class GPT4All:
n_predict=n_predict if n_predict is not None else max_tokens, n_predict=n_predict if n_predict is not None else max_tokens,
) )
if self._is_chat_session_activated: if self._history is not None:
# check if there is only one message, i.e. system prompt: # check if there is only one message, i.e. system prompt:
reset = len(self.current_chat_session) == 1 reset = len(self._history) == 1
generate_kwargs["reset_context"] = reset generate_kwargs["reset_context"] = reset
self.current_chat_session.append({"role": "user", "content": prompt}) self._history.append({"role": "user", "content": prompt})
fct_func = self._format_chat_prompt_template.__func__ # type: ignore[attr-defined] fct_func = self._format_chat_prompt_template.__func__ # type: ignore[attr-defined]
if fct_func is GPT4All._format_chat_prompt_template: if fct_func is GPT4All._format_chat_prompt_template:
if reset: if reset:
# ingest system prompt # ingest system prompt
self.model.prompt_model(self.current_chat_session[0]["content"], "%1", self.model.prompt_model(self._history[0]["content"], "%1",
_pyllmodel.empty_response_callback, _pyllmodel.empty_response_callback,
n_batch=n_batch, n_predict=0, special=True) n_batch=n_batch, n_predict=0, special=True)
prompt_template = self._current_prompt_template.format("%1", "%2") prompt_template = self._current_prompt_template.format("%1", "%2")
@ -387,8 +371,8 @@ class GPT4All:
) )
# special tokens won't be processed # special tokens won't be processed
prompt = self._format_chat_prompt_template( prompt = self._format_chat_prompt_template(
self.current_chat_session[-1:], self._history[-1:],
self.current_chat_session[0]["content"] if reset else "", self._history[0]["content"] if reset else "",
) )
prompt_template = "%1" prompt_template = "%1"
else: else:
@ -399,11 +383,11 @@ class GPT4All:
output_collector: List[MessageType] output_collector: List[MessageType]
output_collector = [ output_collector = [
{"content": ""} {"content": ""}
] # placeholder for the self.current_chat_session if chat session is not activated ] # placeholder for the self._history if chat session is not activated
if self._is_chat_session_activated: if self._history is not None:
self.current_chat_session.append({"role": "assistant", "content": ""}) self._history.append({"role": "assistant", "content": ""})
output_collector = self.current_chat_session output_collector = self._history
def _callback_wrapper( def _callback_wrapper(
callback: _pyllmodel.ResponseCallbackType, callback: _pyllmodel.ResponseCallbackType,
@ -439,8 +423,8 @@ class GPT4All:
@contextmanager @contextmanager
def chat_session( def chat_session(
self, self,
system_prompt: str = "", system_prompt: str | None = None,
prompt_template: str = "", prompt_template: str | None = None,
): ):
""" """
Context manager to hold an inference optimized chat session with a GPT4All model. Context manager to hold an inference optimized chat session with a GPT4All model.
@ -449,16 +433,27 @@ class GPT4All:
system_prompt: An initial instruction for the model. system_prompt: An initial instruction for the model.
prompt_template: Template for the prompts with {0} being replaced by the user message. prompt_template: Template for the prompts with {0} being replaced by the user message.
""" """
# Code to acquire resource, e.g.:
self._is_chat_session_activated = True if system_prompt is None:
self.current_chat_session = empty_chat_session(system_prompt or self.config["systemPrompt"]) system_prompt = self.config.get("systemPrompt", "")
self._current_prompt_template = prompt_template or self.config["promptTemplate"]
if prompt_template is None:
if (tmpl := self.config.get("promptTemplate")) is None:
warnings.warn("Use of a sideloaded model or allow_download=False without specifying a prompt template "
"is deprecated. Defaulting to Alpaca.", DeprecationWarning)
tmpl = DEFAULT_PROMPT_TEMPLATE
prompt_template = tmpl
if re.search(r"%1(?![0-9])", prompt_template):
raise ValueError("Prompt template containing a literal '%1' is not supported. For a prompt "
"placeholder, please use '{0}' instead.")
self._history = [{"role": "system", "content": system_prompt}]
self._current_prompt_template = prompt_template
try: try:
yield self yield self
finally: finally:
# Code to release resource, e.g.: self._history = None
self._is_chat_session_activated = False
self.current_chat_session = empty_chat_session()
self._current_prompt_template = "{0}" self._current_prompt_template = "{0}"
def _format_chat_prompt_template( def _format_chat_prompt_template(
@ -496,10 +491,6 @@ class GPT4All:
return full_prompt return full_prompt
def empty_chat_session(system_prompt: str = "") -> List[MessageType]:
return [{"role": "system", "content": system_prompt}]
def append_extension_if_missing(model_name): def append_extension_if_missing(model_name):
if not model_name.endswith((".bin", ".gguf")): if not model_name.endswith((".bin", ".gguf")):
model_name += ".gguf" model_name += ".gguf"

View File

@ -115,13 +115,13 @@ def test_empty_embedding():
output = embedder.embed(text) output = embedder.embed(text)
def test_download_model(tmp_path: Path): def test_download_model(tmp_path: Path):
import gpt4all.gpt4all from gpt4all import gpt4all
old_default_dir = gpt4all.gpt4all.DEFAULT_MODEL_DIRECTORY old_default_dir = gpt4all.DEFAULT_MODEL_DIRECTORY
gpt4all.gpt4all.DEFAULT_MODEL_DIRECTORY = str(tmp_path) # temporary pytest directory to ensure a download happens gpt4all.DEFAULT_MODEL_DIRECTORY = tmp_path # temporary pytest directory to ensure a download happens
try: try:
model = GPT4All(model_name='ggml-all-MiniLM-L6-v2-f16.bin') model = GPT4All(model_name='ggml-all-MiniLM-L6-v2-f16.bin')
model_path = tmp_path / model.config['filename'] model_path = tmp_path / model.config['filename']
assert model_path.absolute() == Path(model.config['path']).absolute() assert model_path.absolute() == Path(model.config['path']).absolute()
assert model_path.stat().st_size == int(model.config['filesize']) assert model_path.stat().st_size == int(model.config['filesize'])
finally: finally:
gpt4all.gpt4all.DEFAULT_MODEL_DIRECTORY = old_default_dir gpt4all.DEFAULT_MODEL_DIRECTORY = old_default_dir

View File

@ -24,7 +24,7 @@ const DEFAULT_LIBRARIES_DIRECTORY = librarySearchPaths.join(";");
const DEFAULT_MODEL_CONFIG = { const DEFAULT_MODEL_CONFIG = {
systemPrompt: "", systemPrompt: "",
promptTemplate: "### Human: \n%1\n### Assistant:\n", promptTemplate: "### Human:\n%1\n\n### Assistant:\n",
} }
const DEFAULT_MODEL_LIST_URL = "https://gpt4all.io/models/models2.json"; const DEFAULT_MODEL_LIST_URL = "https://gpt4all.io/models/models2.json";

View File

@ -29,7 +29,7 @@
"description": "<strong>Strong overall fast chat model</strong><br><ul><li>Fast responses</li><li>Chat based model</li><li>Trained by Mistral AI<li>Finetuned on OpenOrca dataset curated via <a href=\"https://atlas.nomic.ai/\">Nomic Atlas</a><li>Licensed for commercial use</ul>", "description": "<strong>Strong overall fast chat model</strong><br><ul><li>Fast responses</li><li>Chat based model</li><li>Trained by Mistral AI<li>Finetuned on OpenOrca dataset curated via <a href=\"https://atlas.nomic.ai/\">Nomic Atlas</a><li>Licensed for commercial use</ul>",
"url": "https://gpt4all.io/models/gguf/mistral-7b-openorca.gguf2.Q4_0.gguf", "url": "https://gpt4all.io/models/gguf/mistral-7b-openorca.gguf2.Q4_0.gguf",
"promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>\n", "promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>\n",
"systemPrompt": "<|im_start|>system\nYou are MistralOrca, a large language model trained by Alignment Lab AI. For multi-step problems, write out your reasoning for each step.\n<|im_end|>" "systemPrompt": "<|im_start|>system\nYou are MistralOrca, a large language model trained by Alignment Lab AI. For multi-step problems, write out your reasoning for each step.\n<|im_end|>\n"
}, },
{ {
"order": "c", "order": "c",
@ -42,7 +42,7 @@
"parameters": "7 billion", "parameters": "7 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "Mistral", "type": "Mistral",
"systemPrompt": " ", "systemPrompt": "",
"description": "<strong>Strong overall fast instruction following model</strong><br><ul><li>Fast responses</li><li>Trained by Mistral AI<li>Uncensored</li><li>Licensed for commercial use</li></ul>", "description": "<strong>Strong overall fast instruction following model</strong><br><ul><li>Fast responses</li><li>Trained by Mistral AI<li>Uncensored</li><li>Licensed for commercial use</li></ul>",
"url": "https://gpt4all.io/models/gguf/mistral-7b-instruct-v0.1.Q4_0.gguf", "url": "https://gpt4all.io/models/gguf/mistral-7b-instruct-v0.1.Q4_0.gguf",
"promptTemplate": "[INST] %1 [/INST]" "promptTemplate": "[INST] %1 [/INST]"
@ -58,7 +58,7 @@
"parameters": "7 billion", "parameters": "7 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "Falcon", "type": "Falcon",
"systemPrompt": " ", "systemPrompt": "",
"description": "<strong>Very fast model with good quality</strong><br><ul><li>Fastest responses</li><li>Instruction based</li><li>Trained by TII<li>Finetuned by Nomic AI<li>Licensed for commercial use</ul>", "description": "<strong>Very fast model with good quality</strong><br><ul><li>Fastest responses</li><li>Instruction based</li><li>Trained by TII<li>Finetuned by Nomic AI<li>Licensed for commercial use</ul>",
"url": "https://gpt4all.io/models/gguf/gpt4all-falcon-newbpe-q4_0.gguf", "url": "https://gpt4all.io/models/gguf/gpt4all-falcon-newbpe-q4_0.gguf",
"promptTemplate": "### Instruction:\n%1\n\n### Response:\n" "promptTemplate": "### Instruction:\n%1\n\n### Response:\n"
@ -74,7 +74,7 @@
"parameters": "7 billion", "parameters": "7 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "LLaMA2", "type": "LLaMA2",
"systemPrompt": " ", "systemPrompt": "",
"description": "<ul><li>Instruction based<li>Trained by Microsoft<li>Cannot be used commercially</ul>", "description": "<ul><li>Instruction based<li>Trained by Microsoft<li>Cannot be used commercially</ul>",
"url": "https://gpt4all.io/models/gguf/orca-2-7b.Q4_0.gguf" "url": "https://gpt4all.io/models/gguf/orca-2-7b.Q4_0.gguf"
}, },
@ -89,7 +89,7 @@
"parameters": "13 billion", "parameters": "13 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "LLaMA2", "type": "LLaMA2",
"systemPrompt": " ", "systemPrompt": "",
"description": "<ul><li>Instruction based<li>Trained by Microsoft<li>Cannot be used commercially</ul>", "description": "<ul><li>Instruction based<li>Trained by Microsoft<li>Cannot be used commercially</ul>",
"url": "https://gpt4all.io/models/gguf/orca-2-13b.Q4_0.gguf" "url": "https://gpt4all.io/models/gguf/orca-2-13b.Q4_0.gguf"
}, },
@ -104,7 +104,7 @@
"parameters": "13 billion", "parameters": "13 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "LLaMA2", "type": "LLaMA2",
"systemPrompt": " ", "systemPrompt": "",
"description": "<strong>Strong overall larger model</strong><br><ul><li>Instruction based<li>Gives very long responses<li>Finetuned with only 1k of high-quality data<li>Trained by Microsoft and Peking University<li>Cannot be used commercially</ul>", "description": "<strong>Strong overall larger model</strong><br><ul><li>Instruction based<li>Gives very long responses<li>Finetuned with only 1k of high-quality data<li>Trained by Microsoft and Peking University<li>Cannot be used commercially</ul>",
"url": "https://gpt4all.io/models/gguf/wizardlm-13b-v1.2.Q4_0.gguf" "url": "https://gpt4all.io/models/gguf/wizardlm-13b-v1.2.Q4_0.gguf"
}, },
@ -119,7 +119,7 @@
"parameters": "13 billion", "parameters": "13 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "LLaMA2", "type": "LLaMA2",
"systemPrompt": " ", "systemPrompt": "",
"description": "<strong>Extremely good model</strong><br><ul><li>Instruction based<li>Gives long responses<li>Curated with 300,000 uncensored instructions<li>Trained by Nous Research<li>Cannot be used commercially</ul>", "description": "<strong>Extremely good model</strong><br><ul><li>Instruction based<li>Gives long responses<li>Curated with 300,000 uncensored instructions<li>Trained by Nous Research<li>Cannot be used commercially</ul>",
"url": "https://gpt4all.io/models/gguf/nous-hermes-llama2-13b.Q4_0.gguf", "url": "https://gpt4all.io/models/gguf/nous-hermes-llama2-13b.Q4_0.gguf",
"promptTemplate": "### Instruction:\n%1\n\n### Response:\n" "promptTemplate": "### Instruction:\n%1\n\n### Response:\n"
@ -135,7 +135,7 @@
"parameters": "13 billion", "parameters": "13 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "LLaMA", "type": "LLaMA",
"systemPrompt": " ", "systemPrompt": "",
"description": "<strong>Very good overall model</strong><br><ul><li>Instruction based<li>Based on the same dataset as Groovy<li>Slower than Groovy, with higher quality responses<li>Trained by Nomic AI<li>Cannot be used commercially</ul>", "description": "<strong>Very good overall model</strong><br><ul><li>Instruction based<li>Based on the same dataset as Groovy<li>Slower than Groovy, with higher quality responses<li>Trained by Nomic AI<li>Cannot be used commercially</ul>",
"url": "https://gpt4all.io/models/gguf/gpt4all-13b-snoozy-q4_0.gguf" "url": "https://gpt4all.io/models/gguf/gpt4all-13b-snoozy-q4_0.gguf"
}, },
@ -154,7 +154,7 @@
"description": "<strong>Good model with novel architecture</strong><br><ul><li>Fast responses<li>Chat based<li>Trained by Mosaic ML<li>Cannot be used commercially</ul>", "description": "<strong>Good model with novel architecture</strong><br><ul><li>Fast responses<li>Chat based<li>Trained by Mosaic ML<li>Cannot be used commercially</ul>",
"url": "https://gpt4all.io/models/gguf/mpt-7b-chat-newbpe-q4_0.gguf", "url": "https://gpt4all.io/models/gguf/mpt-7b-chat-newbpe-q4_0.gguf",
"promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>\n", "promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>\n",
"systemPrompt": "<|im_start|>system\n- You are a helpful assistant chatbot trained by MosaicML.\n- You answer questions.\n- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>" "systemPrompt": "<|im_start|>system\n- You are a helpful assistant chatbot trained by MosaicML.\n- You answer questions.\n- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>\n"
}, },
{ {
"order": "j", "order": "j",
@ -170,7 +170,7 @@
"description": "<strong>Good model with novel architecture</strong><br><ul><li>Fast responses<li>Chat based<li>Trained by Mosaic ML<li>Cannot be used commercially</ul>", "description": "<strong>Good model with novel architecture</strong><br><ul><li>Fast responses<li>Chat based<li>Trained by Mosaic ML<li>Cannot be used commercially</ul>",
"url": "https://gpt4all.io/models/gguf/mpt-7b-chat.gguf4.Q4_0.gguf", "url": "https://gpt4all.io/models/gguf/mpt-7b-chat.gguf4.Q4_0.gguf",
"promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>\n", "promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>\n",
"systemPrompt": "<|im_start|>system\n- You are a helpful assistant chatbot trained by MosaicML.\n- You answer questions.\n- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>" "systemPrompt": "<|im_start|>system\n- You are a helpful assistant chatbot trained by MosaicML.\n- You answer questions.\n- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>\n"
}, },
{ {
"order": "k", "order": "k",
@ -200,7 +200,7 @@
"parameters": "3 billion", "parameters": "3 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "Replit", "type": "Replit",
"systemPrompt": " ", "systemPrompt": "",
"promptTemplate": "%1", "promptTemplate": "%1",
"description": "<strong>Trained on subset of the Stack</strong><br><ul><li>Code completion based<li>Licensed for commercial use<li>WARNING: Not available for chat GUI</ul>", "description": "<strong>Trained on subset of the Stack</strong><br><ul><li>Code completion based<li>Licensed for commercial use<li>WARNING: Not available for chat GUI</ul>",
"url": "https://gpt4all.io/models/gguf/replit-code-v1_5-3b-newbpe-q4_0.gguf" "url": "https://gpt4all.io/models/gguf/replit-code-v1_5-3b-newbpe-q4_0.gguf"
@ -217,7 +217,7 @@
"parameters": "7 billion", "parameters": "7 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "Starcoder", "type": "Starcoder",
"systemPrompt": " ", "systemPrompt": "",
"promptTemplate": "%1", "promptTemplate": "%1",
"description": "<strong>Trained on subset of the Stack</strong><br><ul><li>Code completion based<li>WARNING: Not available for chat GUI</ul>", "description": "<strong>Trained on subset of the Stack</strong><br><ul><li>Code completion based<li>WARNING: Not available for chat GUI</ul>",
"url": "https://gpt4all.io/models/gguf/starcoder-newbpe-q4_0.gguf" "url": "https://gpt4all.io/models/gguf/starcoder-newbpe-q4_0.gguf"
@ -234,7 +234,7 @@
"parameters": "7 billion", "parameters": "7 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "LLaMA", "type": "LLaMA",
"systemPrompt": " ", "systemPrompt": "",
"promptTemplate": "%1", "promptTemplate": "%1",
"description": "<strong>Trained on collection of Python and TypeScript</strong><br><ul><li>Code completion based<li>WARNING: Not available for chat GUI</li>", "description": "<strong>Trained on collection of Python and TypeScript</strong><br><ul><li>Code completion based<li>WARNING: Not available for chat GUI</li>",
"url": "https://gpt4all.io/models/gguf/rift-coder-v0-7b-q4_0.gguf" "url": "https://gpt4all.io/models/gguf/rift-coder-v0-7b-q4_0.gguf"
@ -253,7 +253,7 @@
"quant": "f16", "quant": "f16",
"type": "Bert", "type": "Bert",
"embeddingModel": true, "embeddingModel": true,
"systemPrompt": " ", "systemPrompt": "",
"description": "<strong>LocalDocs text embeddings model</strong><br><ul><li>For use with LocalDocs feature<li>Used for retrieval augmented generation (RAG)", "description": "<strong>LocalDocs text embeddings model</strong><br><ul><li>For use with LocalDocs feature<li>Used for retrieval augmented generation (RAG)",
"url": "https://gpt4all.io/models/gguf/all-MiniLM-L6-v2-f16.gguf" "url": "https://gpt4all.io/models/gguf/all-MiniLM-L6-v2-f16.gguf"
}, },