mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-08 11:58:53 +00:00
fix chat-style prompt templates (#1970)
Also use a new version of Mistral OpenOrca. Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
@@ -246,90 +246,6 @@ To do the same outside a session, the input has to be formatted manually. For ex
|
||||
The colors in my previous response are blue, green and red.
|
||||
```
|
||||
|
||||
Ultimately, the method `GPT4All._format_chat_prompt_template()` is responsible for formatting templates. It can be
|
||||
customized in a subclass. As an example:
|
||||
|
||||
=== "Custom Subclass"
|
||||
``` py
|
||||
from itertools import cycle
|
||||
from gpt4all import GPT4All
|
||||
|
||||
class RotatingTemplateGPT4All(GPT4All):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._templates = [
|
||||
"Respond like a pirate.",
|
||||
"Respond like a politician.",
|
||||
"Respond like a philosopher.",
|
||||
"Respond like a Klingon.",
|
||||
]
|
||||
self._cycling_templates = cycle(self._templates)
|
||||
|
||||
def _format_chat_prompt_template(
|
||||
self,
|
||||
messages: list,
|
||||
default_prompt_header: str = "",
|
||||
default_prompt_footer: str = "",
|
||||
) -> str:
|
||||
full_prompt = default_prompt_header + "\n\n" if default_prompt_header != "" else ""
|
||||
for message in messages:
|
||||
if message["role"] == "user":
|
||||
user_message = f"USER: {message['content']} {next(self._cycling_templates)}\n"
|
||||
full_prompt += user_message
|
||||
if message["role"] == "assistant":
|
||||
assistant_message = f"ASSISTANT: {message['content']}\n"
|
||||
full_prompt += assistant_message
|
||||
full_prompt += "\n\n" + default_prompt_footer if default_prompt_footer != "" else ""
|
||||
print(full_prompt)
|
||||
return full_prompt
|
||||
```
|
||||
=== "GPT4All Custom Subclass Example"
|
||||
``` py
|
||||
model = RotatingTemplateGPT4All('wizardlm-13b-v1.2.Q4_0.gguf')
|
||||
with model.chat_session(): # starting a session is optional in this example
|
||||
response1 = model.generate("hi, who are you?")
|
||||
print(response1)
|
||||
print()
|
||||
response2 = model.generate("what can you tell me about snakes?")
|
||||
print(response2)
|
||||
print()
|
||||
response3 = model.generate("what's your opinion on Chess?")
|
||||
print(response3)
|
||||
print()
|
||||
response4 = model.generate("tell me about ancient Rome.")
|
||||
print(response4)
|
||||
```
|
||||
=== "Possible Output"
|
||||
```
|
||||
USER: hi, who are you? Respond like a pirate.
|
||||
|
||||
Pirate: Ahoy there mateys! I be Cap'n Jack Sparrow of the Black Pearl.
|
||||
|
||||
USER: what can you tell me about snakes? Respond like a politician.
|
||||
|
||||
Politician: Snakes have been making headlines lately due to their ability to
|
||||
slither into tight spaces and evade capture, much like myself during my last
|
||||
election campaign. However, I believe that with proper education and
|
||||
understanding of these creatures, we can work together towards creating a
|
||||
safer environment for both humans and snakes alike.
|
||||
|
||||
USER: what's your opinion on Chess? Respond like a philosopher.
|
||||
|
||||
Philosopher: The game of chess is often used as an analogy to illustrate the
|
||||
complexities of life and decision-making processes. However, I believe that it
|
||||
can also be seen as a reflection of our own consciousness and subconscious mind.
|
||||
Just as each piece on the board has its unique role to play in shaping the
|
||||
outcome of the game, we too have different roles to fulfill in creating our own
|
||||
personal narrative.
|
||||
|
||||
USER: tell me about ancient Rome. Respond like a Klingon.
|
||||
|
||||
Klingon: Ancient Rome was once a great empire that ruled over much of Europe and
|
||||
the Mediterranean region. However, just as the Empire fell due to internal strife
|
||||
and external threats, so too did my own house come crashing down when I failed to
|
||||
protect our homeworld from invading forces.
|
||||
```
|
||||
|
||||
|
||||
### Introspection
|
||||
A less apparent feature is the capacity to log the final prompt that gets sent to the model. It relies on
|
||||
|
@@ -89,10 +89,12 @@ RecalculateCallback = ctypes.CFUNCTYPE(ctypes.c_bool, ctypes.c_bool)
|
||||
llmodel.llmodel_prompt.argtypes = [
|
||||
ctypes.c_void_p,
|
||||
ctypes.c_char_p,
|
||||
ctypes.c_char_p,
|
||||
PromptCallback,
|
||||
ResponseCallback,
|
||||
RecalculateCallback,
|
||||
ctypes.POINTER(LLModelPromptContext),
|
||||
ctypes.c_bool,
|
||||
]
|
||||
|
||||
llmodel.llmodel_prompt.restype = None
|
||||
@@ -290,6 +292,7 @@ class LLModel:
|
||||
def prompt_model(
|
||||
self,
|
||||
prompt: str,
|
||||
prompt_template: str,
|
||||
callback: ResponseCallbackType,
|
||||
n_predict: int = 4096,
|
||||
top_k: int = 40,
|
||||
@@ -300,6 +303,7 @@ class LLModel:
|
||||
repeat_last_n: int = 10,
|
||||
context_erase: float = 0.75,
|
||||
reset_context: bool = False,
|
||||
special: bool = False,
|
||||
):
|
||||
"""
|
||||
Generate response from model from a prompt.
|
||||
@@ -326,9 +330,6 @@ class LLModel:
|
||||
prompt,
|
||||
)
|
||||
|
||||
prompt_bytes = prompt.encode()
|
||||
prompt_ptr = ctypes.c_char_p(prompt_bytes)
|
||||
|
||||
self._set_context(
|
||||
n_predict=n_predict,
|
||||
top_k=top_k,
|
||||
@@ -343,16 +344,18 @@ class LLModel:
|
||||
|
||||
llmodel.llmodel_prompt(
|
||||
self.model,
|
||||
prompt_ptr,
|
||||
ctypes.c_char_p(prompt.encode()),
|
||||
ctypes.c_char_p(prompt_template.encode()),
|
||||
PromptCallback(self._prompt_callback),
|
||||
ResponseCallback(self._callback_decoder(callback)),
|
||||
RecalculateCallback(self._recalculate_callback),
|
||||
self.context,
|
||||
special,
|
||||
)
|
||||
|
||||
|
||||
def prompt_model_streaming(
|
||||
self, prompt: str, callback: ResponseCallbackType = empty_response_callback, **kwargs
|
||||
self, prompt: str, prompt_template: str, callback: ResponseCallbackType = empty_response_callback, **kwargs
|
||||
) -> Iterable[str]:
|
||||
output_queue: Queue[str | Sentinel] = Queue()
|
||||
|
||||
@@ -369,15 +372,15 @@ class LLModel:
|
||||
|
||||
return _generator_callback
|
||||
|
||||
def run_llmodel_prompt(prompt: str, callback: ResponseCallbackType, **kwargs):
|
||||
self.prompt_model(prompt, callback, **kwargs)
|
||||
def run_llmodel_prompt(prompt: str, prompt_template: str, callback: ResponseCallbackType, **kwargs):
|
||||
self.prompt_model(prompt, prompt_template, callback, **kwargs)
|
||||
output_queue.put(Sentinel.TERMINATING_SYMBOL)
|
||||
|
||||
# Kick off llmodel_prompt in separate thread so we can return generator
|
||||
# immediately
|
||||
thread = threading.Thread(
|
||||
target=run_llmodel_prompt,
|
||||
args=(prompt, _generator_callback_wrapper(callback)),
|
||||
args=(prompt, prompt_template, _generator_callback_wrapper(callback)),
|
||||
kwargs=kwargs,
|
||||
)
|
||||
thread.start()
|
||||
|
@@ -4,8 +4,10 @@ Python only API for running all GPT4All models.
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import warnings
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional, Union
|
||||
@@ -314,6 +316,10 @@ class GPT4All:
|
||||
Either the entire completion or a generator that yields the completion token by token.
|
||||
"""
|
||||
|
||||
if re.search(r"%1(?![0-9])", self._current_prompt_template):
|
||||
raise ValueError("Prompt template containing a literal '%1' is not supported. For a prompt "
|
||||
"placeholder, please use '{0}' instead.")
|
||||
|
||||
# Preparing the model request
|
||||
generate_kwargs: Dict[str, Any] = dict(
|
||||
temp=temp,
|
||||
@@ -327,16 +333,29 @@ class GPT4All:
|
||||
|
||||
if self._is_chat_session_activated:
|
||||
# check if there is only one message, i.e. system prompt:
|
||||
generate_kwargs["reset_context"] = len(self.current_chat_session) == 1
|
||||
reset = len(self.current_chat_session) == 1
|
||||
generate_kwargs["reset_context"] = reset
|
||||
self.current_chat_session.append({"role": "user", "content": prompt})
|
||||
|
||||
prompt = self._format_chat_prompt_template(
|
||||
messages=self.current_chat_session[-1:],
|
||||
default_prompt_header=self.current_chat_session[0]["content"]
|
||||
if generate_kwargs["reset_context"]
|
||||
else "",
|
||||
)
|
||||
if self._format_chat_prompt_template.__func__ is GPT4All._format_chat_prompt_template:
|
||||
if reset:
|
||||
# ingest system prompt
|
||||
self.model.prompt_model(self.current_chat_session[0]["content"], "%1",
|
||||
n_batch=n_batch, n_predict=0, special=True)
|
||||
prompt_template = self._current_prompt_template.format("%1")
|
||||
else:
|
||||
warnings.warn(
|
||||
"_format_chat_prompt_template is deprecated. Please use a chat session with a prompt template.",
|
||||
DeprecationWarning,
|
||||
)
|
||||
# special tokens won't be processed
|
||||
prompt = self._format_chat_prompt_template(
|
||||
self.current_chat_session[-1:],
|
||||
self.current_chat_session[0]["content"] if reset else "",
|
||||
)
|
||||
prompt_template = "%1"
|
||||
else:
|
||||
prompt_template = "%1"
|
||||
generate_kwargs["reset_context"] = True
|
||||
|
||||
# Prepare the callback, process the model response
|
||||
@@ -365,14 +384,16 @@ class GPT4All:
|
||||
# Send the request to the model
|
||||
if streaming:
|
||||
return self.model.prompt_model_streaming(
|
||||
prompt=prompt,
|
||||
callback=_callback_wrapper(callback, output_collector),
|
||||
prompt,
|
||||
prompt_template,
|
||||
_callback_wrapper(callback, output_collector),
|
||||
**generate_kwargs,
|
||||
)
|
||||
|
||||
self.model.prompt_model(
|
||||
prompt=prompt,
|
||||
callback=_callback_wrapper(callback, output_collector),
|
||||
prompt,
|
||||
prompt_template,
|
||||
_callback_wrapper(callback, output_collector),
|
||||
**generate_kwargs,
|
||||
)
|
||||
|
||||
@@ -423,24 +444,6 @@ class GPT4All:
|
||||
Formatted prompt.
|
||||
"""
|
||||
|
||||
if isinstance(default_prompt_header, bool):
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"Using True/False for the 'default_prompt_header' is deprecated. Use a string instead.",
|
||||
DeprecationWarning,
|
||||
)
|
||||
default_prompt_header = ""
|
||||
|
||||
if isinstance(default_prompt_footer, bool):
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"Using True/False for the 'default_prompt_footer' is deprecated. Use a string instead.",
|
||||
DeprecationWarning,
|
||||
)
|
||||
default_prompt_footer = ""
|
||||
|
||||
full_prompt = default_prompt_header + "\n\n" if default_prompt_header != "" else ""
|
||||
|
||||
for message in messages:
|
||||
|
@@ -68,7 +68,7 @@ def get_long_description():
|
||||
|
||||
setup(
|
||||
name=package_name,
|
||||
version="2.2.1.post1",
|
||||
version="2.3.0",
|
||||
description="Python bindings for GPT4All",
|
||||
long_description=get_long_description(),
|
||||
long_description_content_type="text/markdown",
|
||||
|
Reference in New Issue
Block a user