mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-08-21 17:03:49 +00:00
Merge branch 'zylon-ai:main' into fix-setup
This commit is contained in:
commit
090ce7c69e
@ -8,14 +8,14 @@ The clients are kept up to date automatically, so we encourage you to use the la
|
|||||||
|
|
||||||
<Cards>
|
<Cards>
|
||||||
<Card
|
<Card
|
||||||
title="Node.js/TypeScript - WIP"
|
title="TypeScript"
|
||||||
icon="fa-brands fa-node"
|
icon="fa-brands fa-node"
|
||||||
href="https://github.com/imartinez/privateGPT-typescript"
|
href="https://github.com/zylon-ai/privategpt-ts"
|
||||||
/>
|
/>
|
||||||
<Card
|
<Card
|
||||||
title="Python - Ready!"
|
title="Python"
|
||||||
icon="fa-brands fa-python"
|
icon="fa-brands fa-python"
|
||||||
href="https://github.com/imartinez/pgpt_python"
|
href="https://github.com/zylon-ai/pgpt-python"
|
||||||
/>
|
/>
|
||||||
<br />
|
<br />
|
||||||
</Cards>
|
</Cards>
|
||||||
|
@ -218,7 +218,7 @@ class SagemakerLLM(CustomLLM):
|
|||||||
|
|
||||||
response_body = resp["Body"]
|
response_body = resp["Body"]
|
||||||
response_str = response_body.read().decode("utf-8")
|
response_str = response_body.read().decode("utf-8")
|
||||||
response_dict = eval(response_str)
|
response_dict = json.loads(response_str)
|
||||||
|
|
||||||
return CompletionResponse(
|
return CompletionResponse(
|
||||||
text=response_dict[0]["generated_text"][len(prompt) :], raw=resp
|
text=response_dict[0]["generated_text"][len(prompt) :], raw=resp
|
||||||
|
@ -51,7 +51,7 @@ class LLMComponent:
|
|||||||
"Local dependencies not found, install with `poetry install --extras llms-llama-cpp`"
|
"Local dependencies not found, install with `poetry install --extras llms-llama-cpp`"
|
||||||
) from e
|
) from e
|
||||||
|
|
||||||
prompt_style = get_prompt_style(settings.llamacpp.prompt_style)
|
prompt_style = get_prompt_style(settings.llm.prompt_style)
|
||||||
settings_kwargs = {
|
settings_kwargs = {
|
||||||
"tfs_z": settings.llamacpp.tfs_z, # ollama and llama-cpp
|
"tfs_z": settings.llamacpp.tfs_z, # ollama and llama-cpp
|
||||||
"top_k": settings.llamacpp.top_k, # ollama and llama-cpp
|
"top_k": settings.llamacpp.top_k, # ollama and llama-cpp
|
||||||
@ -109,15 +109,23 @@ class LLMComponent:
|
|||||||
raise ImportError(
|
raise ImportError(
|
||||||
"OpenAILike dependencies not found, install with `poetry install --extras llms-openai-like`"
|
"OpenAILike dependencies not found, install with `poetry install --extras llms-openai-like`"
|
||||||
) from e
|
) from e
|
||||||
|
prompt_style = get_prompt_style(settings.llm.prompt_style)
|
||||||
openai_settings = settings.openai
|
openai_settings = settings.openai
|
||||||
self.llm = OpenAILike(
|
self.llm = OpenAILike(
|
||||||
api_base=openai_settings.api_base,
|
api_base=openai_settings.api_base,
|
||||||
api_key=openai_settings.api_key,
|
api_key=openai_settings.api_key,
|
||||||
model=openai_settings.model,
|
model=openai_settings.model,
|
||||||
is_chat_model=True,
|
is_chat_model=True,
|
||||||
max_tokens=None,
|
max_tokens=settings.llm.max_new_tokens,
|
||||||
api_version="",
|
api_version="",
|
||||||
|
temperature=settings.llm.temperature,
|
||||||
|
context_window=settings.llm.context_window,
|
||||||
|
max_new_tokens=settings.llm.max_new_tokens,
|
||||||
|
messages_to_prompt=prompt_style.messages_to_prompt,
|
||||||
|
completion_to_prompt=prompt_style.completion_to_prompt,
|
||||||
|
tokenizer=settings.llm.tokenizer,
|
||||||
|
timeout=openai_settings.request_timeout,
|
||||||
|
reuse_client=False,
|
||||||
)
|
)
|
||||||
case "ollama":
|
case "ollama":
|
||||||
try:
|
try:
|
||||||
|
@ -104,6 +104,17 @@ class LLMSettings(BaseModel):
|
|||||||
0.1,
|
0.1,
|
||||||
description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.",
|
description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.",
|
||||||
)
|
)
|
||||||
|
prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field(
|
||||||
|
"llama2",
|
||||||
|
description=(
|
||||||
|
"The prompt style to use for the chat engine. "
|
||||||
|
"If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
|
||||||
|
"If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
|
||||||
|
"If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
|
||||||
|
"If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
|
||||||
|
"`llama2` is the historic behaviour. `default` might work better with your custom models."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class VectorstoreSettings(BaseModel):
|
class VectorstoreSettings(BaseModel):
|
||||||
@ -117,18 +128,6 @@ class NodeStoreSettings(BaseModel):
|
|||||||
class LlamaCPPSettings(BaseModel):
|
class LlamaCPPSettings(BaseModel):
|
||||||
llm_hf_repo_id: str
|
llm_hf_repo_id: str
|
||||||
llm_hf_model_file: str
|
llm_hf_model_file: str
|
||||||
prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field(
|
|
||||||
"llama2",
|
|
||||||
description=(
|
|
||||||
"The prompt style to use for the chat engine. "
|
|
||||||
"If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
|
|
||||||
"If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
|
|
||||||
"If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
|
|
||||||
"If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
|
|
||||||
"`llama2` is the historic behaviour. `default` might work better with your custom models."
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
tfs_z: float = Field(
|
tfs_z: float = Field(
|
||||||
1.0,
|
1.0,
|
||||||
description="Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting.",
|
description="Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting.",
|
||||||
@ -206,6 +205,10 @@ class OpenAISettings(BaseModel):
|
|||||||
"gpt-3.5-turbo",
|
"gpt-3.5-turbo",
|
||||||
description="OpenAI Model to use. Example: 'gpt-4'.",
|
description="OpenAI Model to use. Example: 'gpt-4'.",
|
||||||
)
|
)
|
||||||
|
request_timeout: float = Field(
|
||||||
|
120.0,
|
||||||
|
description="Time elapsed until openailike server times out the request. Default is 120s. Format is float. ",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class OllamaSettings(BaseModel):
|
class OllamaSettings(BaseModel):
|
||||||
|
@ -8,9 +8,9 @@ llm:
|
|||||||
max_new_tokens: 512
|
max_new_tokens: 512
|
||||||
context_window: 3900
|
context_window: 3900
|
||||||
tokenizer: mistralai/Mistral-7B-Instruct-v0.2
|
tokenizer: mistralai/Mistral-7B-Instruct-v0.2
|
||||||
|
prompt_style: "mistral"
|
||||||
|
|
||||||
llamacpp:
|
llamacpp:
|
||||||
prompt_style: "mistral"
|
|
||||||
llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
|
llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
|
||||||
llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf
|
llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf
|
||||||
|
|
||||||
|
@ -3,6 +3,9 @@ server:
|
|||||||
|
|
||||||
llm:
|
llm:
|
||||||
mode: openailike
|
mode: openailike
|
||||||
|
max_new_tokens: 512
|
||||||
|
tokenizer: mistralai/Mistral-7B-Instruct-v0.2
|
||||||
|
temperature: 0.1
|
||||||
|
|
||||||
embedding:
|
embedding:
|
||||||
mode: huggingface
|
mode: huggingface
|
||||||
@ -15,3 +18,4 @@ openai:
|
|||||||
api_base: http://localhost:8000/v1
|
api_base: http://localhost:8000/v1
|
||||||
api_key: EMPTY
|
api_key: EMPTY
|
||||||
model: facebook/opt-125m
|
model: facebook/opt-125m
|
||||||
|
request_timeout: 600.0
|
@ -5,7 +5,7 @@ server:
|
|||||||
env_name: ${APP_ENV:prod}
|
env_name: ${APP_ENV:prod}
|
||||||
port: ${PORT:8001}
|
port: ${PORT:8001}
|
||||||
cors:
|
cors:
|
||||||
enabled: false
|
enabled: true
|
||||||
allow_origins: ["*"]
|
allow_origins: ["*"]
|
||||||
allow_methods: ["*"]
|
allow_methods: ["*"]
|
||||||
allow_headers: ["*"]
|
allow_headers: ["*"]
|
||||||
@ -36,6 +36,7 @@ ui:
|
|||||||
|
|
||||||
llm:
|
llm:
|
||||||
mode: llamacpp
|
mode: llamacpp
|
||||||
|
prompt_style: "mistral"
|
||||||
# Should be matching the selected model
|
# Should be matching the selected model
|
||||||
max_new_tokens: 512
|
max_new_tokens: 512
|
||||||
context_window: 3900
|
context_window: 3900
|
||||||
@ -53,7 +54,6 @@ rag:
|
|||||||
top_n: 1
|
top_n: 1
|
||||||
|
|
||||||
llamacpp:
|
llamacpp:
|
||||||
prompt_style: "mistral"
|
|
||||||
llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
|
llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
|
||||||
llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf
|
llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf
|
||||||
tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
|
tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
|
||||||
|
Loading…
Reference in New Issue
Block a user