mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-08-08 03:23:39 +00:00
python: various fixes for GPT4All and Embed4All (#2130)
Key changes: * honor empty system prompt argument * current_chat_session is now read-only and defaults to None * deprecate fallback prompt template for unknown models * fix mistakes from #2086 Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
parent
53f109f519
commit
255568fb9a
@ -10,6 +10,7 @@
|
|||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
#include <numeric>
|
||||||
#include <random>
|
#include <random>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
@ -345,7 +346,7 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
|
|||||||
d_ptr->ctx_params.n_threads = d_ptr->n_threads;
|
d_ptr->ctx_params.n_threads = d_ptr->n_threads;
|
||||||
d_ptr->ctx_params.n_threads_batch = d_ptr->n_threads;
|
d_ptr->ctx_params.n_threads_batch = d_ptr->n_threads;
|
||||||
|
|
||||||
if (m_supportsEmbedding)
|
if (isEmbedding)
|
||||||
d_ptr->ctx_params.embeddings = true;
|
d_ptr->ctx_params.embeddings = true;
|
||||||
|
|
||||||
d_ptr->ctx = llama_new_context_with_model(d_ptr->model, d_ptr->ctx_params);
|
d_ptr->ctx = llama_new_context_with_model(d_ptr->model, d_ptr->ctx_params);
|
||||||
@ -612,22 +613,22 @@ struct EmbModelGroup {
|
|||||||
std::vector<const char *> names;
|
std::vector<const char *> names;
|
||||||
};
|
};
|
||||||
|
|
||||||
static const EmbModelSpec NOPREFIX_SPEC {nullptr, nullptr};
|
static const EmbModelSpec NOPREFIX_SPEC {"", ""};
|
||||||
static const EmbModelSpec NOMIC_SPEC {"search_document", "search_query", {"clustering", "classification"}};
|
static const EmbModelSpec NOMIC_SPEC {"search_document", "search_query", {"clustering", "classification"}};
|
||||||
static const EmbModelSpec E5_SPEC {"passage", "query"};
|
static const EmbModelSpec E5_SPEC {"passage", "query"};
|
||||||
|
|
||||||
static const EmbModelSpec NOMIC_1_5_SPEC {
|
static const EmbModelSpec NOMIC_1_5_SPEC {
|
||||||
"search_document", "search_query", {"clustering", "classification"}, true, "[768, 512, 384, 256, 128]"
|
"search_document", "search_query", {"clustering", "classification"}, true, "[768, 512, 384, 256, 128]",
|
||||||
};
|
};
|
||||||
static const EmbModelSpec LLM_EMBEDDER_SPEC {
|
static const EmbModelSpec LLM_EMBEDDER_SPEC {
|
||||||
"Represent this document for retrieval",
|
"Represent this document for retrieval",
|
||||||
"Represent this query for retrieving relevant documents",
|
"Represent this query for retrieving relevant documents",
|
||||||
};
|
};
|
||||||
static const EmbModelSpec BGE_SPEC {
|
static const EmbModelSpec BGE_SPEC {
|
||||||
nullptr, "Represent this sentence for searching relevant passages",
|
"", "Represent this sentence for searching relevant passages",
|
||||||
};
|
};
|
||||||
static const EmbModelSpec E5_MISTRAL_SPEC {
|
static const EmbModelSpec E5_MISTRAL_SPEC {
|
||||||
nullptr, "Instruct: Given a query, retrieve relevant passages that answer the query\nQuery",
|
"", "Instruct: Given a query, retrieve relevant passages that answer the query\nQuery",
|
||||||
};
|
};
|
||||||
|
|
||||||
static const EmbModelGroup EMBEDDING_MODEL_SPECS[] {
|
static const EmbModelGroup EMBEDDING_MODEL_SPECS[] {
|
||||||
@ -738,18 +739,20 @@ void LLamaModel::embedInternal(
|
|||||||
const llama_token bos_token = llama_token_bos(d_ptr->model);
|
const llama_token bos_token = llama_token_bos(d_ptr->model);
|
||||||
const llama_token eos_token = llama_token_eos(d_ptr->model);
|
const llama_token eos_token = llama_token_eos(d_ptr->model);
|
||||||
|
|
||||||
assert(shouldAddBOS());
|
bool useBOS = shouldAddBOS();
|
||||||
bool addEOS = llama_vocab_type(d_ptr->model) == LLAMA_VOCAB_TYPE_WPM;
|
bool useEOS = llama_vocab_type(d_ptr->model) == LLAMA_VOCAB_TYPE_WPM;
|
||||||
|
|
||||||
// no EOS, optional BOS
|
// no EOS, optional BOS
|
||||||
auto tokenize = [this, addEOS](std::string text, TokenString &tokens, bool addBOS) {
|
auto tokenize = [this, useBOS, useEOS, eos_token](std::string text, TokenString &tokens, bool wantBOS) {
|
||||||
if (!text.empty() && text[0] != ' ')
|
if (!text.empty() && text[0] != ' ') {
|
||||||
text = ' ' + text; // normalize for SPM - our fork of llama.cpp doesn't add a space prefix
|
text = ' ' + text; // normalize for SPM - our fork of llama.cpp doesn't add a space prefix
|
||||||
|
}
|
||||||
|
wantBOS &= useBOS;
|
||||||
|
|
||||||
tokens.resize(text.length()+4);
|
tokens.resize(text.length()+4);
|
||||||
int32_t n_tokens = llama_tokenize(d_ptr->model, text.c_str(), text.length(), tokens.data(), tokens.size(), addBOS, false);
|
int32_t n_tokens = llama_tokenize(d_ptr->model, text.c_str(), text.length(), tokens.data(), tokens.size(), wantBOS, false);
|
||||||
assert(addEOS == (eos_token != -1 && tokens[n_tokens - 1] == eos_token));
|
assert(useEOS == (eos_token != -1 && tokens[n_tokens - 1] == eos_token));
|
||||||
tokens.resize(n_tokens - addEOS); // erase EOS/SEP
|
tokens.resize(n_tokens - useEOS); // erase EOS/SEP
|
||||||
};
|
};
|
||||||
|
|
||||||
// tokenize the texts
|
// tokenize the texts
|
||||||
@ -784,7 +787,7 @@ void LLamaModel::embedInternal(
|
|||||||
}
|
}
|
||||||
|
|
||||||
const uint32_t n_batch = llama_n_batch(d_ptr->ctx);
|
const uint32_t n_batch = llama_n_batch(d_ptr->ctx);
|
||||||
const uint32_t max_len = n_batch - (prefixTokens.size() + addEOS); // minus BOS/CLS and EOS/SEP
|
const uint32_t max_len = n_batch - (prefixTokens.size() + useEOS); // minus BOS/CLS and EOS/SEP
|
||||||
if (chunkOverlap >= max_len) {
|
if (chunkOverlap >= max_len) {
|
||||||
throw std::logic_error("max chunk length of " + std::to_string(max_len) + " is smaller than overlap of " +
|
throw std::logic_error("max chunk length of " + std::to_string(max_len) + " is smaller than overlap of " +
|
||||||
std::to_string(chunkOverlap) + " tokens");
|
std::to_string(chunkOverlap) + " tokens");
|
||||||
|
@ -317,10 +317,10 @@ are used instead of model-specific system and prompt templates:
|
|||||||
=== "Output"
|
=== "Output"
|
||||||
```
|
```
|
||||||
default system template: ''
|
default system template: ''
|
||||||
default prompt template: '### Human: \n{0}\n\n### Assistant:\n'
|
default prompt template: '### Human:\n{0}\n\n### Assistant:\n'
|
||||||
|
|
||||||
session system template: ''
|
session system template: ''
|
||||||
session prompt template: '### Human: \n{0}\n\n### Assistant:\n'
|
session prompt template: '### Human:\n{0}\n\n### Assistant:\n'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import ctypes
|
import ctypes
|
||||||
import logging
|
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
import re
|
import re
|
||||||
@ -17,8 +16,6 @@ if sys.version_info >= (3, 9):
|
|||||||
else:
|
else:
|
||||||
import importlib_resources
|
import importlib_resources
|
||||||
|
|
||||||
logger: logging.Logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
# TODO: provide a config file to make this more robust
|
# TODO: provide a config file to make this more robust
|
||||||
MODEL_LIB_PATH = importlib_resources.files("gpt4all") / "llmodel_DO_NOT_MODIFY" / "build"
|
MODEL_LIB_PATH = importlib_resources.files("gpt4all") / "llmodel_DO_NOT_MODIFY" / "build"
|
||||||
@ -130,7 +127,7 @@ llmodel.llmodel_set_implementation_search_path.restype = None
|
|||||||
llmodel.llmodel_threadCount.argtypes = [ctypes.c_void_p]
|
llmodel.llmodel_threadCount.argtypes = [ctypes.c_void_p]
|
||||||
llmodel.llmodel_threadCount.restype = ctypes.c_int32
|
llmodel.llmodel_threadCount.restype = ctypes.c_int32
|
||||||
|
|
||||||
llmodel.llmodel_set_implementation_search_path(str(MODEL_LIB_PATH).replace("\\", r"\\").encode())
|
llmodel.llmodel_set_implementation_search_path(str(MODEL_LIB_PATH).encode())
|
||||||
|
|
||||||
llmodel.llmodel_available_gpu_devices.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.POINTER(ctypes.c_int32)]
|
llmodel.llmodel_available_gpu_devices.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.POINTER(ctypes.c_int32)]
|
||||||
llmodel.llmodel_available_gpu_devices.restype = ctypes.POINTER(LLModelGPUDevice)
|
llmodel.llmodel_available_gpu_devices.restype = ctypes.POINTER(LLModelGPUDevice)
|
||||||
@ -323,7 +320,7 @@ class LLModel:
|
|||||||
ctypes.byref(error),
|
ctypes.byref(error),
|
||||||
)
|
)
|
||||||
|
|
||||||
if embedding_ptr.value is None:
|
if not embedding_ptr:
|
||||||
msg = "(unknown error)" if error.value is None else error.value.decode()
|
msg = "(unknown error)" if error.value is None else error.value.decode()
|
||||||
raise RuntimeError(f'Failed to generate embeddings: {msg}')
|
raise RuntimeError(f'Failed to generate embeddings: {msg}')
|
||||||
|
|
||||||
@ -372,13 +369,6 @@ class LLModel:
|
|||||||
self.buffer.clear()
|
self.buffer.clear()
|
||||||
self.buff_expecting_cont_bytes = 0
|
self.buff_expecting_cont_bytes = 0
|
||||||
|
|
||||||
logger.info(
|
|
||||||
"LLModel.prompt_model -- prompt:\n"
|
|
||||||
+ "%s\n"
|
|
||||||
+ "===/LLModel.prompt_model -- prompt/===",
|
|
||||||
prompt,
|
|
||||||
)
|
|
||||||
|
|
||||||
self._set_context(
|
self._set_context(
|
||||||
n_predict=n_predict,
|
n_predict=n_predict,
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
|
@ -20,12 +20,9 @@ from urllib3.exceptions import IncompleteRead, ProtocolError
|
|||||||
from . import _pyllmodel
|
from . import _pyllmodel
|
||||||
|
|
||||||
# TODO: move to config
|
# TODO: move to config
|
||||||
DEFAULT_MODEL_DIRECTORY = os.path.join(str(Path.home()), ".cache", "gpt4all").replace("\\", "\\\\")
|
DEFAULT_MODEL_DIRECTORY = Path.home() / ".cache" / "gpt4all"
|
||||||
|
|
||||||
DEFAULT_MODEL_CONFIG = {
|
DEFAULT_PROMPT_TEMPLATE = "### Human:\n{0}\n\n### Assistant:\n"
|
||||||
"systemPrompt": "",
|
|
||||||
"promptTemplate": "### Human: \n{0}\n\n### Assistant:\n",
|
|
||||||
}
|
|
||||||
|
|
||||||
ConfigType = Dict[str, str]
|
ConfigType = Dict[str, str]
|
||||||
MessageType = Dict[str, str]
|
MessageType = Dict[str, str]
|
||||||
@ -34,18 +31,19 @@ MessageType = Dict[str, str]
|
|||||||
class Embed4All:
|
class Embed4All:
|
||||||
"""
|
"""
|
||||||
Python class that handles embeddings for GPT4All.
|
Python class that handles embeddings for GPT4All.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model_name: The name of the embedding model to use. Defaults to `all-MiniLM-L6-v2.gguf2.f16.gguf`.
|
||||||
|
|
||||||
|
All other arguments are passed to the GPT4All constructor. See its documentation for more info.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
MIN_DIMENSIONALITY = 64
|
MIN_DIMENSIONALITY = 64
|
||||||
|
|
||||||
def __init__(self, model_name: Optional[str] = None, n_threads: Optional[int] = None, **kwargs):
|
def __init__(self, model_name: Optional[str] = None, **kwargs):
|
||||||
"""
|
if model_name is None:
|
||||||
Constructor
|
model_name = 'all-MiniLM-L6-v2.gguf2.f16.gguf'
|
||||||
|
self.gpt4all = GPT4All(model_name, **kwargs)
|
||||||
Args:
|
|
||||||
n_threads: number of CPU threads used by GPT4All. Default is None, then the number of threads are determined automatically.
|
|
||||||
"""
|
|
||||||
self.gpt4all = GPT4All(model_name or 'all-MiniLM-L6-v2-f16.gguf', n_threads=n_threads, **kwargs)
|
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def embed(
|
def embed(
|
||||||
@ -58,7 +56,7 @@ class Embed4All:
|
|||||||
atlas: bool = ...,
|
atlas: bool = ...,
|
||||||
) -> list[list[float]]: ...
|
) -> list[list[float]]: ...
|
||||||
|
|
||||||
def embed(self, text, prefix=None, dimensionality=None, long_text_mode="truncate", atlas=False):
|
def embed(self, text, prefix=None, dimensionality=None, long_text_mode="mean", atlas=False):
|
||||||
"""
|
"""
|
||||||
Generate one or more embeddings.
|
Generate one or more embeddings.
|
||||||
|
|
||||||
@ -94,22 +92,6 @@ class Embed4All:
|
|||||||
class GPT4All:
|
class GPT4All:
|
||||||
"""
|
"""
|
||||||
Python class that handles instantiation, downloading, generation and chat with GPT4All models.
|
Python class that handles instantiation, downloading, generation and chat with GPT4All models.
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
model_name: str,
|
|
||||||
model_path: Optional[Union[str, os.PathLike[str]]] = None,
|
|
||||||
model_type: Optional[str] = None,
|
|
||||||
allow_download: bool = True,
|
|
||||||
n_threads: Optional[int] = None,
|
|
||||||
device: Optional[str] = "cpu",
|
|
||||||
n_ctx: int = 2048,
|
|
||||||
ngl: int = 100,
|
|
||||||
verbose: bool = False,
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
Constructor
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
model_name: Name of GPT4All or custom model. Including ".gguf" file extension is optional but encouraged.
|
model_name: Name of GPT4All or custom model. Including ".gguf" file extension is optional but encouraged.
|
||||||
@ -131,6 +113,19 @@ class GPT4All:
|
|||||||
ngl: Number of GPU layers to use (Vulkan)
|
ngl: Number of GPU layers to use (Vulkan)
|
||||||
verbose: If True, print debug messages.
|
verbose: If True, print debug messages.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
model_name: str,
|
||||||
|
model_path: Optional[Union[str, os.PathLike[str]]] = None,
|
||||||
|
model_type: Optional[str] = None,
|
||||||
|
allow_download: bool = True,
|
||||||
|
n_threads: Optional[int] = None,
|
||||||
|
device: Optional[str] = "cpu",
|
||||||
|
n_ctx: int = 2048,
|
||||||
|
ngl: int = 100,
|
||||||
|
verbose: bool = False,
|
||||||
|
):
|
||||||
self.model_type = model_type
|
self.model_type = model_type
|
||||||
# Retrieve model and download if allowed
|
# Retrieve model and download if allowed
|
||||||
self.config: ConfigType = self.retrieve_model(model_name, model_path=model_path, allow_download=allow_download, verbose=verbose)
|
self.config: ConfigType = self.retrieve_model(model_name, model_path=model_path, allow_download=allow_download, verbose=verbose)
|
||||||
@ -142,10 +137,13 @@ class GPT4All:
|
|||||||
if n_threads is not None:
|
if n_threads is not None:
|
||||||
self.model.set_thread_count(n_threads)
|
self.model.set_thread_count(n_threads)
|
||||||
|
|
||||||
self._is_chat_session_activated: bool = False
|
self._history: list[MessageType] | None = None
|
||||||
self.current_chat_session: List[MessageType] = empty_chat_session()
|
|
||||||
self._current_prompt_template: str = "{0}"
|
self._current_prompt_template: str = "{0}"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_chat_session(self) -> list[MessageType] | None:
|
||||||
|
return self._history
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def list_models() -> List[ConfigType]:
|
def list_models() -> List[ConfigType]:
|
||||||
"""
|
"""
|
||||||
@ -159,8 +157,9 @@ class GPT4All:
|
|||||||
raise ValueError(f'Request failed: HTTP {resp.status_code} {resp.reason}')
|
raise ValueError(f'Request failed: HTTP {resp.status_code} {resp.reason}')
|
||||||
return resp.json()
|
return resp.json()
|
||||||
|
|
||||||
@staticmethod
|
@classmethod
|
||||||
def retrieve_model(
|
def retrieve_model(
|
||||||
|
cls,
|
||||||
model_name: str,
|
model_name: str,
|
||||||
model_path: Optional[Union[str, os.PathLike[str]]] = None,
|
model_path: Optional[Union[str, os.PathLike[str]]] = None,
|
||||||
allow_download: bool = True,
|
allow_download: bool = True,
|
||||||
@ -183,58 +182,51 @@ class GPT4All:
|
|||||||
model_filename = append_extension_if_missing(model_name)
|
model_filename = append_extension_if_missing(model_name)
|
||||||
|
|
||||||
# get the config for the model
|
# get the config for the model
|
||||||
config: ConfigType = DEFAULT_MODEL_CONFIG
|
config: ConfigType = {}
|
||||||
if allow_download:
|
if allow_download:
|
||||||
available_models = GPT4All.list_models()
|
available_models = cls.list_models()
|
||||||
|
|
||||||
for m in available_models:
|
for m in available_models:
|
||||||
if model_filename == m["filename"]:
|
if model_filename == m["filename"]:
|
||||||
config.update(m)
|
tmpl = m.get("promptTemplate", DEFAULT_PROMPT_TEMPLATE)
|
||||||
config["systemPrompt"] = config["systemPrompt"].strip()
|
|
||||||
# change to Python-style formatting
|
# change to Python-style formatting
|
||||||
config["promptTemplate"] = config["promptTemplate"].replace("%1", "{0}", 1).replace("%2", "{1}", 1)
|
m["promptTemplate"] = tmpl.replace("%1", "{0}", 1).replace("%2", "{1}", 1)
|
||||||
|
config.update(m)
|
||||||
break
|
break
|
||||||
|
|
||||||
# Validate download directory
|
# Validate download directory
|
||||||
if model_path is None:
|
if model_path is None:
|
||||||
try:
|
try:
|
||||||
os.makedirs(DEFAULT_MODEL_DIRECTORY, exist_ok=True)
|
os.makedirs(DEFAULT_MODEL_DIRECTORY, exist_ok=True)
|
||||||
except OSError as exc:
|
except OSError as e:
|
||||||
raise ValueError(
|
raise RuntimeError("Failed to create model download directory") from e
|
||||||
f"Failed to create model download directory at {DEFAULT_MODEL_DIRECTORY}: {exc}. "
|
|
||||||
"Please specify model_path."
|
|
||||||
)
|
|
||||||
model_path = DEFAULT_MODEL_DIRECTORY
|
model_path = DEFAULT_MODEL_DIRECTORY
|
||||||
else:
|
else:
|
||||||
model_path = str(model_path).replace("\\", "\\\\")
|
model_path = Path(model_path)
|
||||||
|
|
||||||
if not os.path.exists(model_path):
|
if not model_path.exists():
|
||||||
raise ValueError(f"Invalid model directory: {model_path}")
|
raise FileNotFoundError(f"Model directory does not exist: {model_path!r}")
|
||||||
|
|
||||||
model_dest = os.path.join(model_path, model_filename).replace("\\", "\\\\")
|
model_dest = model_path / model_filename
|
||||||
if os.path.exists(model_dest):
|
if model_dest.exists():
|
||||||
config.pop("url", None)
|
config["path"] = str(model_dest)
|
||||||
config["path"] = model_dest
|
|
||||||
if verbose:
|
if verbose:
|
||||||
print("Found model file at", model_dest, file=sys.stderr)
|
print(f"Found model file at {str(model_dest)!r}", file=sys.stderr)
|
||||||
|
|
||||||
# If model file does not exist, download
|
|
||||||
elif allow_download:
|
elif allow_download:
|
||||||
url = config.pop("url", None)
|
# If model file does not exist, download
|
||||||
|
config["path"] = str(cls.download_model(model_filename, model_path, verbose=verbose, url=config.get("url")))
|
||||||
config["path"] = GPT4All.download_model(model_filename, model_path, verbose=verbose, url=url)
|
|
||||||
else:
|
else:
|
||||||
raise ValueError("Failed to retrieve model")
|
raise FileNotFoundError(f"Model file does not exist: {model_dest!r}")
|
||||||
|
|
||||||
return config
|
return config
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def download_model(
|
def download_model(
|
||||||
model_filename: str,
|
model_filename: str,
|
||||||
model_path: Union[str, os.PathLike[str]],
|
model_path: str | os.PathLike[str],
|
||||||
verbose: bool = True,
|
verbose: bool = True,
|
||||||
url: Optional[str] = None,
|
url: Optional[str] = None,
|
||||||
) -> str:
|
) -> str | os.PathLike[str]:
|
||||||
"""
|
"""
|
||||||
Download model from https://gpt4all.io.
|
Download model from https://gpt4all.io.
|
||||||
|
|
||||||
@ -248,21 +240,17 @@ class GPT4All:
|
|||||||
Model file destination.
|
Model file destination.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def get_download_url(model_filename):
|
|
||||||
if url:
|
|
||||||
return url
|
|
||||||
return f"https://gpt4all.io/models/gguf/{model_filename}"
|
|
||||||
|
|
||||||
# Download model
|
# Download model
|
||||||
download_path = os.path.join(model_path, model_filename).replace("\\", "\\\\")
|
download_path = Path(model_path) / model_filename
|
||||||
download_url = get_download_url(model_filename)
|
if url is None:
|
||||||
|
url = f"https://gpt4all.io/models/gguf/{model_filename}"
|
||||||
|
|
||||||
def make_request(offset=None):
|
def make_request(offset=None):
|
||||||
headers = {}
|
headers = {}
|
||||||
if offset:
|
if offset:
|
||||||
print(f"\nDownload interrupted, resuming from byte position {offset}", file=sys.stderr)
|
print(f"\nDownload interrupted, resuming from byte position {offset}", file=sys.stderr)
|
||||||
headers['Range'] = f'bytes={offset}-' # resume incomplete response
|
headers['Range'] = f'bytes={offset}-' # resume incomplete response
|
||||||
response = requests.get(download_url, stream=True, headers=headers)
|
response = requests.get(url, stream=True, headers=headers)
|
||||||
if response.status_code not in (200, 206):
|
if response.status_code not in (200, 206):
|
||||||
raise ValueError(f'Request failed: HTTP {response.status_code} {response.reason}')
|
raise ValueError(f'Request failed: HTTP {response.status_code} {response.reason}')
|
||||||
if offset and (response.status_code != 206 or str(offset) not in response.headers.get('Content-Range', '')):
|
if offset and (response.status_code != 206 or str(offset) not in response.headers.get('Content-Range', '')):
|
||||||
@ -311,7 +299,7 @@ class GPT4All:
|
|||||||
time.sleep(2) # Sleep for a little bit so Windows can remove file lock
|
time.sleep(2) # Sleep for a little bit so Windows can remove file lock
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
print("Model downloaded at:", download_path, file=sys.stderr)
|
print(f"Model downloaded to {str(download_path)!r}", file=sys.stderr)
|
||||||
return download_path
|
return download_path
|
||||||
|
|
||||||
def generate(
|
def generate(
|
||||||
@ -350,10 +338,6 @@ class GPT4All:
|
|||||||
Either the entire completion or a generator that yields the completion token by token.
|
Either the entire completion or a generator that yields the completion token by token.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if re.search(r"%1(?![0-9])", self._current_prompt_template):
|
|
||||||
raise ValueError("Prompt template containing a literal '%1' is not supported. For a prompt "
|
|
||||||
"placeholder, please use '{0}' instead.")
|
|
||||||
|
|
||||||
# Preparing the model request
|
# Preparing the model request
|
||||||
generate_kwargs: Dict[str, Any] = dict(
|
generate_kwargs: Dict[str, Any] = dict(
|
||||||
temp=temp,
|
temp=temp,
|
||||||
@ -366,17 +350,17 @@ class GPT4All:
|
|||||||
n_predict=n_predict if n_predict is not None else max_tokens,
|
n_predict=n_predict if n_predict is not None else max_tokens,
|
||||||
)
|
)
|
||||||
|
|
||||||
if self._is_chat_session_activated:
|
if self._history is not None:
|
||||||
# check if there is only one message, i.e. system prompt:
|
# check if there is only one message, i.e. system prompt:
|
||||||
reset = len(self.current_chat_session) == 1
|
reset = len(self._history) == 1
|
||||||
generate_kwargs["reset_context"] = reset
|
generate_kwargs["reset_context"] = reset
|
||||||
self.current_chat_session.append({"role": "user", "content": prompt})
|
self._history.append({"role": "user", "content": prompt})
|
||||||
|
|
||||||
fct_func = self._format_chat_prompt_template.__func__ # type: ignore[attr-defined]
|
fct_func = self._format_chat_prompt_template.__func__ # type: ignore[attr-defined]
|
||||||
if fct_func is GPT4All._format_chat_prompt_template:
|
if fct_func is GPT4All._format_chat_prompt_template:
|
||||||
if reset:
|
if reset:
|
||||||
# ingest system prompt
|
# ingest system prompt
|
||||||
self.model.prompt_model(self.current_chat_session[0]["content"], "%1",
|
self.model.prompt_model(self._history[0]["content"], "%1",
|
||||||
_pyllmodel.empty_response_callback,
|
_pyllmodel.empty_response_callback,
|
||||||
n_batch=n_batch, n_predict=0, special=True)
|
n_batch=n_batch, n_predict=0, special=True)
|
||||||
prompt_template = self._current_prompt_template.format("%1", "%2")
|
prompt_template = self._current_prompt_template.format("%1", "%2")
|
||||||
@ -387,8 +371,8 @@ class GPT4All:
|
|||||||
)
|
)
|
||||||
# special tokens won't be processed
|
# special tokens won't be processed
|
||||||
prompt = self._format_chat_prompt_template(
|
prompt = self._format_chat_prompt_template(
|
||||||
self.current_chat_session[-1:],
|
self._history[-1:],
|
||||||
self.current_chat_session[0]["content"] if reset else "",
|
self._history[0]["content"] if reset else "",
|
||||||
)
|
)
|
||||||
prompt_template = "%1"
|
prompt_template = "%1"
|
||||||
else:
|
else:
|
||||||
@ -399,11 +383,11 @@ class GPT4All:
|
|||||||
output_collector: List[MessageType]
|
output_collector: List[MessageType]
|
||||||
output_collector = [
|
output_collector = [
|
||||||
{"content": ""}
|
{"content": ""}
|
||||||
] # placeholder for the self.current_chat_session if chat session is not activated
|
] # placeholder for the self._history if chat session is not activated
|
||||||
|
|
||||||
if self._is_chat_session_activated:
|
if self._history is not None:
|
||||||
self.current_chat_session.append({"role": "assistant", "content": ""})
|
self._history.append({"role": "assistant", "content": ""})
|
||||||
output_collector = self.current_chat_session
|
output_collector = self._history
|
||||||
|
|
||||||
def _callback_wrapper(
|
def _callback_wrapper(
|
||||||
callback: _pyllmodel.ResponseCallbackType,
|
callback: _pyllmodel.ResponseCallbackType,
|
||||||
@ -439,8 +423,8 @@ class GPT4All:
|
|||||||
@contextmanager
|
@contextmanager
|
||||||
def chat_session(
|
def chat_session(
|
||||||
self,
|
self,
|
||||||
system_prompt: str = "",
|
system_prompt: str | None = None,
|
||||||
prompt_template: str = "",
|
prompt_template: str | None = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Context manager to hold an inference optimized chat session with a GPT4All model.
|
Context manager to hold an inference optimized chat session with a GPT4All model.
|
||||||
@ -449,16 +433,27 @@ class GPT4All:
|
|||||||
system_prompt: An initial instruction for the model.
|
system_prompt: An initial instruction for the model.
|
||||||
prompt_template: Template for the prompts with {0} being replaced by the user message.
|
prompt_template: Template for the prompts with {0} being replaced by the user message.
|
||||||
"""
|
"""
|
||||||
# Code to acquire resource, e.g.:
|
|
||||||
self._is_chat_session_activated = True
|
if system_prompt is None:
|
||||||
self.current_chat_session = empty_chat_session(system_prompt or self.config["systemPrompt"])
|
system_prompt = self.config.get("systemPrompt", "")
|
||||||
self._current_prompt_template = prompt_template or self.config["promptTemplate"]
|
|
||||||
|
if prompt_template is None:
|
||||||
|
if (tmpl := self.config.get("promptTemplate")) is None:
|
||||||
|
warnings.warn("Use of a sideloaded model or allow_download=False without specifying a prompt template "
|
||||||
|
"is deprecated. Defaulting to Alpaca.", DeprecationWarning)
|
||||||
|
tmpl = DEFAULT_PROMPT_TEMPLATE
|
||||||
|
prompt_template = tmpl
|
||||||
|
|
||||||
|
if re.search(r"%1(?![0-9])", prompt_template):
|
||||||
|
raise ValueError("Prompt template containing a literal '%1' is not supported. For a prompt "
|
||||||
|
"placeholder, please use '{0}' instead.")
|
||||||
|
|
||||||
|
self._history = [{"role": "system", "content": system_prompt}]
|
||||||
|
self._current_prompt_template = prompt_template
|
||||||
try:
|
try:
|
||||||
yield self
|
yield self
|
||||||
finally:
|
finally:
|
||||||
# Code to release resource, e.g.:
|
self._history = None
|
||||||
self._is_chat_session_activated = False
|
|
||||||
self.current_chat_session = empty_chat_session()
|
|
||||||
self._current_prompt_template = "{0}"
|
self._current_prompt_template = "{0}"
|
||||||
|
|
||||||
def _format_chat_prompt_template(
|
def _format_chat_prompt_template(
|
||||||
@ -496,10 +491,6 @@ class GPT4All:
|
|||||||
return full_prompt
|
return full_prompt
|
||||||
|
|
||||||
|
|
||||||
def empty_chat_session(system_prompt: str = "") -> List[MessageType]:
|
|
||||||
return [{"role": "system", "content": system_prompt}]
|
|
||||||
|
|
||||||
|
|
||||||
def append_extension_if_missing(model_name):
|
def append_extension_if_missing(model_name):
|
||||||
if not model_name.endswith((".bin", ".gguf")):
|
if not model_name.endswith((".bin", ".gguf")):
|
||||||
model_name += ".gguf"
|
model_name += ".gguf"
|
||||||
|
@ -115,13 +115,13 @@ def test_empty_embedding():
|
|||||||
output = embedder.embed(text)
|
output = embedder.embed(text)
|
||||||
|
|
||||||
def test_download_model(tmp_path: Path):
|
def test_download_model(tmp_path: Path):
|
||||||
import gpt4all.gpt4all
|
from gpt4all import gpt4all
|
||||||
old_default_dir = gpt4all.gpt4all.DEFAULT_MODEL_DIRECTORY
|
old_default_dir = gpt4all.DEFAULT_MODEL_DIRECTORY
|
||||||
gpt4all.gpt4all.DEFAULT_MODEL_DIRECTORY = str(tmp_path) # temporary pytest directory to ensure a download happens
|
gpt4all.DEFAULT_MODEL_DIRECTORY = tmp_path # temporary pytest directory to ensure a download happens
|
||||||
try:
|
try:
|
||||||
model = GPT4All(model_name='ggml-all-MiniLM-L6-v2-f16.bin')
|
model = GPT4All(model_name='ggml-all-MiniLM-L6-v2-f16.bin')
|
||||||
model_path = tmp_path / model.config['filename']
|
model_path = tmp_path / model.config['filename']
|
||||||
assert model_path.absolute() == Path(model.config['path']).absolute()
|
assert model_path.absolute() == Path(model.config['path']).absolute()
|
||||||
assert model_path.stat().st_size == int(model.config['filesize'])
|
assert model_path.stat().st_size == int(model.config['filesize'])
|
||||||
finally:
|
finally:
|
||||||
gpt4all.gpt4all.DEFAULT_MODEL_DIRECTORY = old_default_dir
|
gpt4all.DEFAULT_MODEL_DIRECTORY = old_default_dir
|
||||||
|
@ -24,7 +24,7 @@ const DEFAULT_LIBRARIES_DIRECTORY = librarySearchPaths.join(";");
|
|||||||
|
|
||||||
const DEFAULT_MODEL_CONFIG = {
|
const DEFAULT_MODEL_CONFIG = {
|
||||||
systemPrompt: "",
|
systemPrompt: "",
|
||||||
promptTemplate: "### Human: \n%1\n### Assistant:\n",
|
promptTemplate: "### Human:\n%1\n\n### Assistant:\n",
|
||||||
}
|
}
|
||||||
|
|
||||||
const DEFAULT_MODEL_LIST_URL = "https://gpt4all.io/models/models2.json";
|
const DEFAULT_MODEL_LIST_URL = "https://gpt4all.io/models/models2.json";
|
||||||
|
@ -29,7 +29,7 @@
|
|||||||
"description": "<strong>Strong overall fast chat model</strong><br><ul><li>Fast responses</li><li>Chat based model</li><li>Trained by Mistral AI<li>Finetuned on OpenOrca dataset curated via <a href=\"https://atlas.nomic.ai/\">Nomic Atlas</a><li>Licensed for commercial use</ul>",
|
"description": "<strong>Strong overall fast chat model</strong><br><ul><li>Fast responses</li><li>Chat based model</li><li>Trained by Mistral AI<li>Finetuned on OpenOrca dataset curated via <a href=\"https://atlas.nomic.ai/\">Nomic Atlas</a><li>Licensed for commercial use</ul>",
|
||||||
"url": "https://gpt4all.io/models/gguf/mistral-7b-openorca.gguf2.Q4_0.gguf",
|
"url": "https://gpt4all.io/models/gguf/mistral-7b-openorca.gguf2.Q4_0.gguf",
|
||||||
"promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>\n",
|
"promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>\n",
|
||||||
"systemPrompt": "<|im_start|>system\nYou are MistralOrca, a large language model trained by Alignment Lab AI. For multi-step problems, write out your reasoning for each step.\n<|im_end|>"
|
"systemPrompt": "<|im_start|>system\nYou are MistralOrca, a large language model trained by Alignment Lab AI. For multi-step problems, write out your reasoning for each step.\n<|im_end|>\n"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"order": "c",
|
"order": "c",
|
||||||
@ -42,7 +42,7 @@
|
|||||||
"parameters": "7 billion",
|
"parameters": "7 billion",
|
||||||
"quant": "q4_0",
|
"quant": "q4_0",
|
||||||
"type": "Mistral",
|
"type": "Mistral",
|
||||||
"systemPrompt": " ",
|
"systemPrompt": "",
|
||||||
"description": "<strong>Strong overall fast instruction following model</strong><br><ul><li>Fast responses</li><li>Trained by Mistral AI<li>Uncensored</li><li>Licensed for commercial use</li></ul>",
|
"description": "<strong>Strong overall fast instruction following model</strong><br><ul><li>Fast responses</li><li>Trained by Mistral AI<li>Uncensored</li><li>Licensed for commercial use</li></ul>",
|
||||||
"url": "https://gpt4all.io/models/gguf/mistral-7b-instruct-v0.1.Q4_0.gguf",
|
"url": "https://gpt4all.io/models/gguf/mistral-7b-instruct-v0.1.Q4_0.gguf",
|
||||||
"promptTemplate": "[INST] %1 [/INST]"
|
"promptTemplate": "[INST] %1 [/INST]"
|
||||||
@ -58,7 +58,7 @@
|
|||||||
"parameters": "7 billion",
|
"parameters": "7 billion",
|
||||||
"quant": "q4_0",
|
"quant": "q4_0",
|
||||||
"type": "Falcon",
|
"type": "Falcon",
|
||||||
"systemPrompt": " ",
|
"systemPrompt": "",
|
||||||
"description": "<strong>Very fast model with good quality</strong><br><ul><li>Fastest responses</li><li>Instruction based</li><li>Trained by TII<li>Finetuned by Nomic AI<li>Licensed for commercial use</ul>",
|
"description": "<strong>Very fast model with good quality</strong><br><ul><li>Fastest responses</li><li>Instruction based</li><li>Trained by TII<li>Finetuned by Nomic AI<li>Licensed for commercial use</ul>",
|
||||||
"url": "https://gpt4all.io/models/gguf/gpt4all-falcon-newbpe-q4_0.gguf",
|
"url": "https://gpt4all.io/models/gguf/gpt4all-falcon-newbpe-q4_0.gguf",
|
||||||
"promptTemplate": "### Instruction:\n%1\n\n### Response:\n"
|
"promptTemplate": "### Instruction:\n%1\n\n### Response:\n"
|
||||||
@ -74,7 +74,7 @@
|
|||||||
"parameters": "7 billion",
|
"parameters": "7 billion",
|
||||||
"quant": "q4_0",
|
"quant": "q4_0",
|
||||||
"type": "LLaMA2",
|
"type": "LLaMA2",
|
||||||
"systemPrompt": " ",
|
"systemPrompt": "",
|
||||||
"description": "<ul><li>Instruction based<li>Trained by Microsoft<li>Cannot be used commercially</ul>",
|
"description": "<ul><li>Instruction based<li>Trained by Microsoft<li>Cannot be used commercially</ul>",
|
||||||
"url": "https://gpt4all.io/models/gguf/orca-2-7b.Q4_0.gguf"
|
"url": "https://gpt4all.io/models/gguf/orca-2-7b.Q4_0.gguf"
|
||||||
},
|
},
|
||||||
@ -89,7 +89,7 @@
|
|||||||
"parameters": "13 billion",
|
"parameters": "13 billion",
|
||||||
"quant": "q4_0",
|
"quant": "q4_0",
|
||||||
"type": "LLaMA2",
|
"type": "LLaMA2",
|
||||||
"systemPrompt": " ",
|
"systemPrompt": "",
|
||||||
"description": "<ul><li>Instruction based<li>Trained by Microsoft<li>Cannot be used commercially</ul>",
|
"description": "<ul><li>Instruction based<li>Trained by Microsoft<li>Cannot be used commercially</ul>",
|
||||||
"url": "https://gpt4all.io/models/gguf/orca-2-13b.Q4_0.gguf"
|
"url": "https://gpt4all.io/models/gguf/orca-2-13b.Q4_0.gguf"
|
||||||
},
|
},
|
||||||
@ -104,7 +104,7 @@
|
|||||||
"parameters": "13 billion",
|
"parameters": "13 billion",
|
||||||
"quant": "q4_0",
|
"quant": "q4_0",
|
||||||
"type": "LLaMA2",
|
"type": "LLaMA2",
|
||||||
"systemPrompt": " ",
|
"systemPrompt": "",
|
||||||
"description": "<strong>Strong overall larger model</strong><br><ul><li>Instruction based<li>Gives very long responses<li>Finetuned with only 1k of high-quality data<li>Trained by Microsoft and Peking University<li>Cannot be used commercially</ul>",
|
"description": "<strong>Strong overall larger model</strong><br><ul><li>Instruction based<li>Gives very long responses<li>Finetuned with only 1k of high-quality data<li>Trained by Microsoft and Peking University<li>Cannot be used commercially</ul>",
|
||||||
"url": "https://gpt4all.io/models/gguf/wizardlm-13b-v1.2.Q4_0.gguf"
|
"url": "https://gpt4all.io/models/gguf/wizardlm-13b-v1.2.Q4_0.gguf"
|
||||||
},
|
},
|
||||||
@ -119,7 +119,7 @@
|
|||||||
"parameters": "13 billion",
|
"parameters": "13 billion",
|
||||||
"quant": "q4_0",
|
"quant": "q4_0",
|
||||||
"type": "LLaMA2",
|
"type": "LLaMA2",
|
||||||
"systemPrompt": " ",
|
"systemPrompt": "",
|
||||||
"description": "<strong>Extremely good model</strong><br><ul><li>Instruction based<li>Gives long responses<li>Curated with 300,000 uncensored instructions<li>Trained by Nous Research<li>Cannot be used commercially</ul>",
|
"description": "<strong>Extremely good model</strong><br><ul><li>Instruction based<li>Gives long responses<li>Curated with 300,000 uncensored instructions<li>Trained by Nous Research<li>Cannot be used commercially</ul>",
|
||||||
"url": "https://gpt4all.io/models/gguf/nous-hermes-llama2-13b.Q4_0.gguf",
|
"url": "https://gpt4all.io/models/gguf/nous-hermes-llama2-13b.Q4_0.gguf",
|
||||||
"promptTemplate": "### Instruction:\n%1\n\n### Response:\n"
|
"promptTemplate": "### Instruction:\n%1\n\n### Response:\n"
|
||||||
@ -135,7 +135,7 @@
|
|||||||
"parameters": "13 billion",
|
"parameters": "13 billion",
|
||||||
"quant": "q4_0",
|
"quant": "q4_0",
|
||||||
"type": "LLaMA",
|
"type": "LLaMA",
|
||||||
"systemPrompt": " ",
|
"systemPrompt": "",
|
||||||
"description": "<strong>Very good overall model</strong><br><ul><li>Instruction based<li>Based on the same dataset as Groovy<li>Slower than Groovy, with higher quality responses<li>Trained by Nomic AI<li>Cannot be used commercially</ul>",
|
"description": "<strong>Very good overall model</strong><br><ul><li>Instruction based<li>Based on the same dataset as Groovy<li>Slower than Groovy, with higher quality responses<li>Trained by Nomic AI<li>Cannot be used commercially</ul>",
|
||||||
"url": "https://gpt4all.io/models/gguf/gpt4all-13b-snoozy-q4_0.gguf"
|
"url": "https://gpt4all.io/models/gguf/gpt4all-13b-snoozy-q4_0.gguf"
|
||||||
},
|
},
|
||||||
@ -154,7 +154,7 @@
|
|||||||
"description": "<strong>Good model with novel architecture</strong><br><ul><li>Fast responses<li>Chat based<li>Trained by Mosaic ML<li>Cannot be used commercially</ul>",
|
"description": "<strong>Good model with novel architecture</strong><br><ul><li>Fast responses<li>Chat based<li>Trained by Mosaic ML<li>Cannot be used commercially</ul>",
|
||||||
"url": "https://gpt4all.io/models/gguf/mpt-7b-chat-newbpe-q4_0.gguf",
|
"url": "https://gpt4all.io/models/gguf/mpt-7b-chat-newbpe-q4_0.gguf",
|
||||||
"promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>\n",
|
"promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>\n",
|
||||||
"systemPrompt": "<|im_start|>system\n- You are a helpful assistant chatbot trained by MosaicML.\n- You answer questions.\n- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>"
|
"systemPrompt": "<|im_start|>system\n- You are a helpful assistant chatbot trained by MosaicML.\n- You answer questions.\n- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>\n"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"order": "j",
|
"order": "j",
|
||||||
@ -170,7 +170,7 @@
|
|||||||
"description": "<strong>Good model with novel architecture</strong><br><ul><li>Fast responses<li>Chat based<li>Trained by Mosaic ML<li>Cannot be used commercially</ul>",
|
"description": "<strong>Good model with novel architecture</strong><br><ul><li>Fast responses<li>Chat based<li>Trained by Mosaic ML<li>Cannot be used commercially</ul>",
|
||||||
"url": "https://gpt4all.io/models/gguf/mpt-7b-chat.gguf4.Q4_0.gguf",
|
"url": "https://gpt4all.io/models/gguf/mpt-7b-chat.gguf4.Q4_0.gguf",
|
||||||
"promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>\n",
|
"promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>\n",
|
||||||
"systemPrompt": "<|im_start|>system\n- You are a helpful assistant chatbot trained by MosaicML.\n- You answer questions.\n- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>"
|
"systemPrompt": "<|im_start|>system\n- You are a helpful assistant chatbot trained by MosaicML.\n- You answer questions.\n- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>\n"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"order": "k",
|
"order": "k",
|
||||||
@ -200,7 +200,7 @@
|
|||||||
"parameters": "3 billion",
|
"parameters": "3 billion",
|
||||||
"quant": "q4_0",
|
"quant": "q4_0",
|
||||||
"type": "Replit",
|
"type": "Replit",
|
||||||
"systemPrompt": " ",
|
"systemPrompt": "",
|
||||||
"promptTemplate": "%1",
|
"promptTemplate": "%1",
|
||||||
"description": "<strong>Trained on subset of the Stack</strong><br><ul><li>Code completion based<li>Licensed for commercial use<li>WARNING: Not available for chat GUI</ul>",
|
"description": "<strong>Trained on subset of the Stack</strong><br><ul><li>Code completion based<li>Licensed for commercial use<li>WARNING: Not available for chat GUI</ul>",
|
||||||
"url": "https://gpt4all.io/models/gguf/replit-code-v1_5-3b-newbpe-q4_0.gguf"
|
"url": "https://gpt4all.io/models/gguf/replit-code-v1_5-3b-newbpe-q4_0.gguf"
|
||||||
@ -217,7 +217,7 @@
|
|||||||
"parameters": "7 billion",
|
"parameters": "7 billion",
|
||||||
"quant": "q4_0",
|
"quant": "q4_0",
|
||||||
"type": "Starcoder",
|
"type": "Starcoder",
|
||||||
"systemPrompt": " ",
|
"systemPrompt": "",
|
||||||
"promptTemplate": "%1",
|
"promptTemplate": "%1",
|
||||||
"description": "<strong>Trained on subset of the Stack</strong><br><ul><li>Code completion based<li>WARNING: Not available for chat GUI</ul>",
|
"description": "<strong>Trained on subset of the Stack</strong><br><ul><li>Code completion based<li>WARNING: Not available for chat GUI</ul>",
|
||||||
"url": "https://gpt4all.io/models/gguf/starcoder-newbpe-q4_0.gguf"
|
"url": "https://gpt4all.io/models/gguf/starcoder-newbpe-q4_0.gguf"
|
||||||
@ -234,7 +234,7 @@
|
|||||||
"parameters": "7 billion",
|
"parameters": "7 billion",
|
||||||
"quant": "q4_0",
|
"quant": "q4_0",
|
||||||
"type": "LLaMA",
|
"type": "LLaMA",
|
||||||
"systemPrompt": " ",
|
"systemPrompt": "",
|
||||||
"promptTemplate": "%1",
|
"promptTemplate": "%1",
|
||||||
"description": "<strong>Trained on collection of Python and TypeScript</strong><br><ul><li>Code completion based<li>WARNING: Not available for chat GUI</li>",
|
"description": "<strong>Trained on collection of Python and TypeScript</strong><br><ul><li>Code completion based<li>WARNING: Not available for chat GUI</li>",
|
||||||
"url": "https://gpt4all.io/models/gguf/rift-coder-v0-7b-q4_0.gguf"
|
"url": "https://gpt4all.io/models/gguf/rift-coder-v0-7b-q4_0.gguf"
|
||||||
@ -253,7 +253,7 @@
|
|||||||
"quant": "f16",
|
"quant": "f16",
|
||||||
"type": "Bert",
|
"type": "Bert",
|
||||||
"embeddingModel": true,
|
"embeddingModel": true,
|
||||||
"systemPrompt": " ",
|
"systemPrompt": "",
|
||||||
"description": "<strong>LocalDocs text embeddings model</strong><br><ul><li>For use with LocalDocs feature<li>Used for retrieval augmented generation (RAG)",
|
"description": "<strong>LocalDocs text embeddings model</strong><br><ul><li>For use with LocalDocs feature<li>Used for retrieval augmented generation (RAG)",
|
||||||
"url": "https://gpt4all.io/models/gguf/all-MiniLM-L6-v2-f16.gguf"
|
"url": "https://gpt4all.io/models/gguf/all-MiniLM-L6-v2-f16.gguf"
|
||||||
},
|
},
|
||||||
|
Loading…
Reference in New Issue
Block a user