Merge branch 'zylon-ai:main' into fix-setup

This commit is contained in:
Mart 2024-05-10 18:01:02 +02:00 committed by GitHub
commit 090ce7c69e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 39 additions and 24 deletions

View File

@ -8,14 +8,14 @@ The clients are kept up to date automatically, so we encourage you to use the la
<Cards> <Cards>
<Card <Card
title="Node.js/TypeScript - WIP" title="TypeScript"
icon="fa-brands fa-node" icon="fa-brands fa-node"
href="https://github.com/imartinez/privateGPT-typescript" href="https://github.com/zylon-ai/privategpt-ts"
/> />
<Card <Card
title="Python - Ready!" title="Python"
icon="fa-brands fa-python" icon="fa-brands fa-python"
href="https://github.com/imartinez/pgpt_python" href="https://github.com/zylon-ai/pgpt-python"
/> />
<br /> <br />
</Cards> </Cards>

View File

@ -218,7 +218,7 @@ class SagemakerLLM(CustomLLM):
response_body = resp["Body"] response_body = resp["Body"]
response_str = response_body.read().decode("utf-8") response_str = response_body.read().decode("utf-8")
response_dict = eval(response_str) response_dict = json.loads(response_str)
return CompletionResponse( return CompletionResponse(
text=response_dict[0]["generated_text"][len(prompt) :], raw=resp text=response_dict[0]["generated_text"][len(prompt) :], raw=resp

View File

@ -51,7 +51,7 @@ class LLMComponent:
"Local dependencies not found, install with `poetry install --extras llms-llama-cpp`" "Local dependencies not found, install with `poetry install --extras llms-llama-cpp`"
) from e ) from e
prompt_style = get_prompt_style(settings.llamacpp.prompt_style) prompt_style = get_prompt_style(settings.llm.prompt_style)
settings_kwargs = { settings_kwargs = {
"tfs_z": settings.llamacpp.tfs_z, # ollama and llama-cpp "tfs_z": settings.llamacpp.tfs_z, # ollama and llama-cpp
"top_k": settings.llamacpp.top_k, # ollama and llama-cpp "top_k": settings.llamacpp.top_k, # ollama and llama-cpp
@ -109,15 +109,23 @@ class LLMComponent:
raise ImportError( raise ImportError(
"OpenAILike dependencies not found, install with `poetry install --extras llms-openai-like`" "OpenAILike dependencies not found, install with `poetry install --extras llms-openai-like`"
) from e ) from e
prompt_style = get_prompt_style(settings.llm.prompt_style)
openai_settings = settings.openai openai_settings = settings.openai
self.llm = OpenAILike( self.llm = OpenAILike(
api_base=openai_settings.api_base, api_base=openai_settings.api_base,
api_key=openai_settings.api_key, api_key=openai_settings.api_key,
model=openai_settings.model, model=openai_settings.model,
is_chat_model=True, is_chat_model=True,
max_tokens=None, max_tokens=settings.llm.max_new_tokens,
api_version="", api_version="",
temperature=settings.llm.temperature,
context_window=settings.llm.context_window,
max_new_tokens=settings.llm.max_new_tokens,
messages_to_prompt=prompt_style.messages_to_prompt,
completion_to_prompt=prompt_style.completion_to_prompt,
tokenizer=settings.llm.tokenizer,
timeout=openai_settings.request_timeout,
reuse_client=False,
) )
case "ollama": case "ollama":
try: try:

View File

@ -104,6 +104,17 @@ class LLMSettings(BaseModel):
0.1, 0.1,
description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.", description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.",
) )
prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field(
"llama2",
description=(
"The prompt style to use for the chat engine. "
"If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
"If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
"If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
"If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
"`llama2` is the historic behaviour. `default` might work better with your custom models."
),
)
class VectorstoreSettings(BaseModel): class VectorstoreSettings(BaseModel):
@ -117,18 +128,6 @@ class NodeStoreSettings(BaseModel):
class LlamaCPPSettings(BaseModel): class LlamaCPPSettings(BaseModel):
llm_hf_repo_id: str llm_hf_repo_id: str
llm_hf_model_file: str llm_hf_model_file: str
prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field(
"llama2",
description=(
"The prompt style to use for the chat engine. "
"If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
"If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
"If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
"If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
"`llama2` is the historic behaviour. `default` might work better with your custom models."
),
)
tfs_z: float = Field( tfs_z: float = Field(
1.0, 1.0,
description="Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting.", description="Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting.",
@ -206,6 +205,10 @@ class OpenAISettings(BaseModel):
"gpt-3.5-turbo", "gpt-3.5-turbo",
description="OpenAI Model to use. Example: 'gpt-4'.", description="OpenAI Model to use. Example: 'gpt-4'.",
) )
request_timeout: float = Field(
120.0,
description="Time elapsed until openailike server times out the request. Default is 120s. Format is float. ",
)
class OllamaSettings(BaseModel): class OllamaSettings(BaseModel):

View File

@ -8,9 +8,9 @@ llm:
max_new_tokens: 512 max_new_tokens: 512
context_window: 3900 context_window: 3900
tokenizer: mistralai/Mistral-7B-Instruct-v0.2 tokenizer: mistralai/Mistral-7B-Instruct-v0.2
prompt_style: "mistral"
llamacpp: llamacpp:
prompt_style: "mistral"
llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf

View File

@ -3,6 +3,9 @@ server:
llm: llm:
mode: openailike mode: openailike
max_new_tokens: 512
tokenizer: mistralai/Mistral-7B-Instruct-v0.2
temperature: 0.1
embedding: embedding:
mode: huggingface mode: huggingface
@ -15,3 +18,4 @@ openai:
api_base: http://localhost:8000/v1 api_base: http://localhost:8000/v1
api_key: EMPTY api_key: EMPTY
model: facebook/opt-125m model: facebook/opt-125m
request_timeout: 600.0

View File

@ -5,7 +5,7 @@ server:
env_name: ${APP_ENV:prod} env_name: ${APP_ENV:prod}
port: ${PORT:8001} port: ${PORT:8001}
cors: cors:
enabled: false enabled: true
allow_origins: ["*"] allow_origins: ["*"]
allow_methods: ["*"] allow_methods: ["*"]
allow_headers: ["*"] allow_headers: ["*"]
@ -36,6 +36,7 @@ ui:
llm: llm:
mode: llamacpp mode: llamacpp
prompt_style: "mistral"
# Should be matching the selected model # Should be matching the selected model
max_new_tokens: 512 max_new_tokens: 512
context_window: 3900 context_window: 3900
@ -53,7 +54,6 @@ rag:
top_n: 1 top_n: 1
llamacpp: llamacpp:
prompt_style: "mistral"
llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf
tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting