mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-07-07 04:20:59 +00:00
deploying new version with streaming
This commit is contained in:
parent
14f4a1c4ba
commit
4b583dd759
@ -170,25 +170,25 @@ workflows:
|
|||||||
branches:
|
branches:
|
||||||
only:
|
only:
|
||||||
- main
|
- main
|
||||||
# build-py-deploy:
|
build-py-deploy:
|
||||||
# jobs:
|
jobs:
|
||||||
# - build-py-linux:
|
- build-py-linux:
|
||||||
# filters:
|
filters:
|
||||||
# branches:
|
branches:
|
||||||
# only:
|
only:
|
||||||
# - build-py-macos:
|
- build-py-macos:
|
||||||
# filters:
|
filters:
|
||||||
# branches:
|
branches:
|
||||||
# only:
|
only:
|
||||||
# - build-py-windows:
|
- build-py-windows:
|
||||||
# filters:
|
filters:
|
||||||
# branches:
|
branches:
|
||||||
# only:
|
only:
|
||||||
# - store-and-upload-wheels:
|
- store-and-upload-wheels:
|
||||||
# filters:
|
filters:
|
||||||
# branches:
|
branches:
|
||||||
# only:
|
only:
|
||||||
# requires:
|
requires:
|
||||||
# - build-py-windows
|
- build-py-windows
|
||||||
# - build-py-linux
|
- build-py-linux
|
||||||
# - build-py-macos
|
- build-py-macos
|
||||||
|
@ -155,24 +155,26 @@ class GPT4All():
|
|||||||
print("Model downloaded at: " + download_path)
|
print("Model downloaded at: " + download_path)
|
||||||
return download_path
|
return download_path
|
||||||
|
|
||||||
def generate(self, prompt: str, **generate_kwargs) -> str:
|
def generate(self, prompt: str, streaming: bool = False, **generate_kwargs) -> str:
|
||||||
"""
|
"""
|
||||||
Surfaced method of running generate without accessing model object.
|
Surfaced method of running generate without accessing model object.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
prompt: Raw string to be passed to model.
|
prompt: Raw string to be passed to model.
|
||||||
|
streaming: True if want output streamed to stdout.
|
||||||
**generate_kwargs: Optional kwargs to pass to prompt context.
|
**generate_kwargs: Optional kwargs to pass to prompt context.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Raw string of generated model response.
|
Raw string of generated model response.
|
||||||
"""
|
"""
|
||||||
return self.model.generate(prompt, **generate_kwargs)
|
return self.model.generate(prompt, streaming=streaming, **generate_kwargs)
|
||||||
|
|
||||||
def chat_completion(self,
|
def chat_completion(self,
|
||||||
messages: List[Dict],
|
messages: List[Dict],
|
||||||
default_prompt_header: bool = True,
|
default_prompt_header: bool = True,
|
||||||
default_prompt_footer: bool = True,
|
default_prompt_footer: bool = True,
|
||||||
verbose: bool = True,
|
verbose: bool = True,
|
||||||
|
streaming: bool = True,
|
||||||
**generate_kwargs) -> str:
|
**generate_kwargs) -> str:
|
||||||
"""
|
"""
|
||||||
Format list of message dictionaries into a prompt and call model
|
Format list of message dictionaries into a prompt and call model
|
||||||
@ -189,6 +191,7 @@ class GPT4All():
|
|||||||
before user/assistant role messages.
|
before user/assistant role messages.
|
||||||
default_prompt_footer: If True (default), add default footer at end of prompt.
|
default_prompt_footer: If True (default), add default footer at end of prompt.
|
||||||
verbose: If True (default), print full prompt and generated response.
|
verbose: If True (default), print full prompt and generated response.
|
||||||
|
streaming: True if want output streamed to stdout.
|
||||||
**generate_kwargs: Optional kwargs to pass to prompt context.
|
**generate_kwargs: Optional kwargs to pass to prompt context.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
@ -206,7 +209,7 @@ class GPT4All():
|
|||||||
if verbose:
|
if verbose:
|
||||||
print(full_prompt)
|
print(full_prompt)
|
||||||
|
|
||||||
response = self.model.generate(full_prompt, **generate_kwargs)
|
response = self.model.generate(full_prompt, streaming=streaming, **generate_kwargs)
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
print(response)
|
print(response)
|
||||||
|
@ -1,25 +1,23 @@
|
|||||||
from io import StringIO
|
|
||||||
import pkg_resources
|
import pkg_resources
|
||||||
import ctypes
|
import ctypes
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
import re
|
import re
|
||||||
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
class DualOutput:
|
class DualStreamProcessor:
|
||||||
def __init__(self, stdout, string_io):
|
def __init__(self, stream=None):
|
||||||
self.stdout = stdout
|
self.stream = stream
|
||||||
self.string_io = string_io
|
self.output = ""
|
||||||
|
|
||||||
def write(self, text):
|
def write(self, text):
|
||||||
self.stdout.write(text)
|
cleaned_text = re.sub(r"\n(?!\n)", "", text)
|
||||||
self.string_io.write(text)
|
if self.stream is not None:
|
||||||
|
self.stream.write(cleaned_text)
|
||||||
|
self.stream.flush()
|
||||||
|
self.output += cleaned_text
|
||||||
|
|
||||||
def flush(self):
|
|
||||||
# It's a good idea to also define a flush method that flushes both
|
|
||||||
# outputs, as sys.stdout is expected to have this method.
|
|
||||||
self.stdout.flush()
|
|
||||||
self.string_io.flush()
|
|
||||||
|
|
||||||
# TODO: provide a config file to make this more robust
|
# TODO: provide a config file to make this more robust
|
||||||
LLMODEL_PATH = os.path.join("llmodel_DO_NOT_MODIFY", "build").replace("\\", "\\\\")
|
LLMODEL_PATH = os.path.join("llmodel_DO_NOT_MODIFY", "build").replace("\\", "\\\\")
|
||||||
@ -175,7 +173,7 @@ class LLModel:
|
|||||||
repeat_penalty: float = 1.2,
|
repeat_penalty: float = 1.2,
|
||||||
repeat_last_n: int = 10,
|
repeat_last_n: int = 10,
|
||||||
context_erase: float = .5,
|
context_erase: float = .5,
|
||||||
std_passthrough: bool = False) -> str:
|
streaming: bool = False) -> str:
|
||||||
"""
|
"""
|
||||||
Generate response from model from a prompt.
|
Generate response from model from a prompt.
|
||||||
|
|
||||||
@ -183,12 +181,8 @@ class LLModel:
|
|||||||
----------
|
----------
|
||||||
prompt: str
|
prompt: str
|
||||||
Question, task, or conversation for model to respond to
|
Question, task, or conversation for model to respond to
|
||||||
add_default_header: bool, optional
|
streaming: bool
|
||||||
Whether to add a prompt header (default is True)
|
Stream response to stdout
|
||||||
add_default_footer: bool, optional
|
|
||||||
Whether to add a prompt footer (default is True)
|
|
||||||
verbose: bool, optional
|
|
||||||
Whether to print prompt and response
|
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
@ -198,13 +192,14 @@ class LLModel:
|
|||||||
prompt = prompt.encode('utf-8')
|
prompt = prompt.encode('utf-8')
|
||||||
prompt = ctypes.c_char_p(prompt)
|
prompt = ctypes.c_char_p(prompt)
|
||||||
|
|
||||||
# Change stdout to StringIO so we can collect response
|
|
||||||
old_stdout = sys.stdout
|
old_stdout = sys.stdout
|
||||||
collect_response = StringIO()
|
|
||||||
if std_passthrough:
|
stream_processor = DualStreamProcessor()
|
||||||
sys.stdout = DualOutput(old_stdout, collect_response)
|
|
||||||
else:
|
if streaming:
|
||||||
sys.stdout = collect_response
|
stream_processor.stream = sys.stdout
|
||||||
|
|
||||||
|
sys.stdout = stream_processor
|
||||||
|
|
||||||
context = LLModelPromptContext(
|
context = LLModelPromptContext(
|
||||||
logits_size=logits_size,
|
logits_size=logits_size,
|
||||||
@ -227,14 +222,11 @@ class LLModel:
|
|||||||
ResponseCallback(self._response_callback),
|
ResponseCallback(self._response_callback),
|
||||||
RecalculateCallback(self._recalculate_callback),
|
RecalculateCallback(self._recalculate_callback),
|
||||||
context)
|
context)
|
||||||
|
|
||||||
response = collect_response.getvalue()
|
# Revert to old stdout
|
||||||
sys.stdout = old_stdout
|
sys.stdout = old_stdout
|
||||||
|
|
||||||
# Remove the unnecessary new lines from response
|
return stream_processor.output
|
||||||
response = re.sub(r"\n(?!\n)", "", response).strip()
|
|
||||||
|
|
||||||
return response
|
|
||||||
|
|
||||||
# Empty prompt callback
|
# Empty prompt callback
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -78,6 +78,8 @@ setup(
|
|||||||
'dev': [
|
'dev': [
|
||||||
'pytest',
|
'pytest',
|
||||||
'twine',
|
'twine',
|
||||||
|
'wheel',
|
||||||
|
'setuptools',
|
||||||
'mkdocs-material',
|
'mkdocs-material',
|
||||||
'mkautodoc',
|
'mkautodoc',
|
||||||
'mkdocstrings[python]',
|
'mkdocstrings[python]',
|
||||||
|
Loading…
Reference in New Issue
Block a user