From e21bf20c10938b24711d9f2c765997f44d7e02a9 Mon Sep 17 00:00:00 2001 From: icsy7867 Date: Tue, 30 Apr 2024 03:53:10 -0400 Subject: [PATCH 1/5] feat: prompt_style applied to all LLMs + extra LLM params. (#1835) * Updated prompt_style to be moved to the main LLM setting since all LLMs from llama_index can utilize this. I also included temperature, context window size, max_tokens, max_new_tokens into the openailike to help ensure the settings are consistent from the other implementations. * Removed prompt_style from llamacpp entirely * Fixed settings-local.yaml to include prompt_style in the LLM settings instead of llamacpp. --- private_gpt/components/llm/llm_component.py | 11 +++++++--- private_gpt/settings/settings.py | 23 ++++++++++----------- settings-local.yaml | 4 ++-- settings.yaml | 2 +- 4 files changed, 22 insertions(+), 18 deletions(-) diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py index baffa4e4..51d71a3e 100644 --- a/private_gpt/components/llm/llm_component.py +++ b/private_gpt/components/llm/llm_component.py @@ -51,7 +51,7 @@ class LLMComponent: "Local dependencies not found, install with `poetry install --extras llms-llama-cpp`" ) from e - prompt_style = get_prompt_style(settings.llamacpp.prompt_style) + prompt_style = get_prompt_style(settings.llm.prompt_style) settings_kwargs = { "tfs_z": settings.llamacpp.tfs_z, # ollama and llama-cpp "top_k": settings.llamacpp.top_k, # ollama and llama-cpp @@ -109,15 +109,20 @@ class LLMComponent: raise ImportError( "OpenAILike dependencies not found, install with `poetry install --extras llms-openai-like`" ) from e - + prompt_style = get_prompt_style(settings.llm.prompt_style) openai_settings = settings.openai self.llm = OpenAILike( api_base=openai_settings.api_base, api_key=openai_settings.api_key, model=openai_settings.model, is_chat_model=True, - max_tokens=None, + max_tokens=settings.llm.max_new_tokens, api_version="", + temperature=settings.llm.temperature, + context_window=settings.llm.context_window, + max_new_tokens=settings.llm.max_new_tokens, + messages_to_prompt=prompt_style.messages_to_prompt, + completion_to_prompt=prompt_style.completion_to_prompt, ) case "ollama": try: diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index 051cfcab..c4c5e20d 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -104,6 +104,17 @@ class LLMSettings(BaseModel): 0.1, description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.", ) + prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field( + "llama2", + description=( + "The prompt style to use for the chat engine. " + "If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n" + "If `llama2` - use the llama2 prompt style from the llama_index. Based on ``, `[INST]` and `<>`.\n" + "If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n" + "If `mistral` - use the `mistral prompt style. It shoudl look like [INST] {System Prompt} [/INST][INST] { UserInstructions } [/INST]" + "`llama2` is the historic behaviour. `default` might work better with your custom models." + ), + ) class VectorstoreSettings(BaseModel): @@ -117,18 +128,6 @@ class NodeStoreSettings(BaseModel): class LlamaCPPSettings(BaseModel): llm_hf_repo_id: str llm_hf_model_file: str - prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field( - "llama2", - description=( - "The prompt style to use for the chat engine. " - "If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n" - "If `llama2` - use the llama2 prompt style from the llama_index. Based on ``, `[INST]` and `<>`.\n" - "If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n" - "If `mistral` - use the `mistral prompt style. It shoudl look like [INST] {System Prompt} [/INST][INST] { UserInstructions } [/INST]" - "`llama2` is the historic behaviour. `default` might work better with your custom models." - ), - ) - tfs_z: float = Field( 1.0, description="Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting.", diff --git a/settings-local.yaml b/settings-local.yaml index c9d02742..48eeb0ea 100644 --- a/settings-local.yaml +++ b/settings-local.yaml @@ -8,9 +8,9 @@ llm: max_new_tokens: 512 context_window: 3900 tokenizer: mistralai/Mistral-7B-Instruct-v0.2 + prompt_style: "mistral" llamacpp: - prompt_style: "mistral" llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf @@ -24,4 +24,4 @@ vectorstore: database: qdrant qdrant: - path: local_data/private_gpt/qdrant \ No newline at end of file + path: local_data/private_gpt/qdrant diff --git a/settings.yaml b/settings.yaml index e881a555..d8d2500c 100644 --- a/settings.yaml +++ b/settings.yaml @@ -36,6 +36,7 @@ ui: llm: mode: llamacpp + prompt_style: "mistral" # Should be matching the selected model max_new_tokens: 512 context_window: 3900 @@ -53,7 +54,6 @@ rag: top_n: 1 llamacpp: - prompt_style: "mistral" llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting From 9d0d614706581a8bfa57db45f62f84ab23d26f15 Mon Sep 17 00:00:00 2001 From: Patrick Peng Date: Tue, 30 Apr 2024 15:58:19 +0800 Subject: [PATCH 2/5] fix: Replacing unsafe `eval()` with `json.loads()` (#1890) --- private_gpt/components/llm/custom/sagemaker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/private_gpt/components/llm/custom/sagemaker.py b/private_gpt/components/llm/custom/sagemaker.py index e20f5394..bd2aec18 100644 --- a/private_gpt/components/llm/custom/sagemaker.py +++ b/private_gpt/components/llm/custom/sagemaker.py @@ -218,7 +218,7 @@ class SagemakerLLM(CustomLLM): response_body = resp["Body"] response_str = response_body.read().decode("utf-8") - response_dict = eval(response_str) + response_dict = json.loads(response_str) return CompletionResponse( text=response_dict[0]["generated_text"][len(prompt) :], raw=resp From d13029a046f6e19e8ee65bef3acd96365c738df2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=20Garc=C3=ADa?= Date: Fri, 10 May 2024 14:13:15 +0200 Subject: [PATCH 3/5] feat(docs): add privategpt-ts sdk (#1924) --- fern/docs/pages/api-reference/sdks.mdx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fern/docs/pages/api-reference/sdks.mdx b/fern/docs/pages/api-reference/sdks.mdx index 0172d9d4..f7cf6f84 100644 --- a/fern/docs/pages/api-reference/sdks.mdx +++ b/fern/docs/pages/api-reference/sdks.mdx @@ -8,14 +8,14 @@ The clients are kept up to date automatically, so we encourage you to use the la
From 966af4771dbe5cf3fdf554b5fdf8f732407859c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=20Garc=C3=ADa?= Date: Fri, 10 May 2024 14:13:46 +0200 Subject: [PATCH 4/5] fix(settings): enable cors by default so it will work when using ts sdk (spa) (#1925) --- settings.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/settings.yaml b/settings.yaml index d8d2500c..06fcd63d 100644 --- a/settings.yaml +++ b/settings.yaml @@ -5,7 +5,7 @@ server: env_name: ${APP_ENV:prod} port: ${PORT:8001} cors: - enabled: false + enabled: true allow_origins: ["*"] allow_methods: ["*"] allow_headers: ["*"] From 45df99feb7eb308d7dd4770039814558b75d78ae Mon Sep 17 00:00:00 2001 From: jcbonnet-fwd <141936727+jcbonnet-fwd@users.noreply.github.com> Date: Fri, 10 May 2024 16:44:08 +0200 Subject: [PATCH 5/5] Add timeout parameter for better support of openailike LLM tools on local computer (like LM Studio). (#1858) feat(llm): Improve settings of the OpenAILike LLM --- private_gpt/components/llm/llm_component.py | 3 +++ private_gpt/settings/settings.py | 4 ++++ settings-vllm.yaml | 4 ++++ 3 files changed, 11 insertions(+) diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py index 51d71a3e..c29638b1 100644 --- a/private_gpt/components/llm/llm_component.py +++ b/private_gpt/components/llm/llm_component.py @@ -123,6 +123,9 @@ class LLMComponent: max_new_tokens=settings.llm.max_new_tokens, messages_to_prompt=prompt_style.messages_to_prompt, completion_to_prompt=prompt_style.completion_to_prompt, + tokenizer=settings.llm.tokenizer, + timeout=openai_settings.request_timeout, + reuse_client=False, ) case "ollama": try: diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index c4c5e20d..bd83fb8b 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -205,6 +205,10 @@ class OpenAISettings(BaseModel): "gpt-3.5-turbo", description="OpenAI Model to use. Example: 'gpt-4'.", ) + request_timeout: float = Field( + 120.0, + description="Time elapsed until openailike server times out the request. Default is 120s. Format is float. ", + ) class OllamaSettings(BaseModel): diff --git a/settings-vllm.yaml b/settings-vllm.yaml index 5a0a68c6..1bfab6b2 100644 --- a/settings-vllm.yaml +++ b/settings-vllm.yaml @@ -3,6 +3,9 @@ server: llm: mode: openailike + max_new_tokens: 512 + tokenizer: mistralai/Mistral-7B-Instruct-v0.2 + temperature: 0.1 embedding: mode: huggingface @@ -15,3 +18,4 @@ openai: api_base: http://localhost:8000/v1 api_key: EMPTY model: facebook/opt-125m + request_timeout: 600.0 \ No newline at end of file