From e21bf20c10938b24711d9f2c765997f44d7e02a9 Mon Sep 17 00:00:00 2001
From: icsy7867 <wstewart3@gmail.com>
Date: Tue, 30 Apr 2024 03:53:10 -0400
Subject: [PATCH 1/5] feat: prompt_style applied to all LLMs + extra LLM
 params. (#1835)

* Updated prompt_style to be moved to the main LLM setting since all LLMs from llama_index can utilize this.  I also included temperature, context window size, max_tokens, max_new_tokens into the openailike to help ensure the settings are consistent from the other implementations.

* Removed prompt_style from llamacpp entirely

* Fixed settings-local.yaml to include prompt_style in the LLM settings instead of llamacpp.
---
 private_gpt/components/llm/llm_component.py | 11 +++++++---
 private_gpt/settings/settings.py            | 23 ++++++++++-----------
 settings-local.yaml                         |  4 ++--
 settings.yaml                               |  2 +-
 4 files changed, 22 insertions(+), 18 deletions(-)
diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py
index baffa4e4..51d71a3e 100644
--- a/private_gpt/components/llm/llm_component.py
+++ b/private_gpt/components/llm/llm_component.py
@@ -51,7 +51,7 @@ class LLMComponent:
                         "Local dependencies not found, install with `poetry install --extras llms-llama-cpp`"
                     ) from e
 
-                prompt_style = get_prompt_style(settings.llamacpp.prompt_style)
+                prompt_style = get_prompt_style(settings.llm.prompt_style)
                 settings_kwargs = {
                     "tfs_z": settings.llamacpp.tfs_z,  # ollama and llama-cpp
                     "top_k": settings.llamacpp.top_k,  # ollama and llama-cpp
@@ -109,15 +109,20 @@ class LLMComponent:
                     raise ImportError(
                         "OpenAILike dependencies not found, install with `poetry install --extras llms-openai-like`"
                     ) from e
-
+                prompt_style = get_prompt_style(settings.llm.prompt_style)
                 openai_settings = settings.openai
                 self.llm = OpenAILike(
                     api_base=openai_settings.api_base,
                     api_key=openai_settings.api_key,
                     model=openai_settings.model,
                     is_chat_model=True,
-                    max_tokens=None,
+                    max_tokens=settings.llm.max_new_tokens,
                     api_version="",
+                    temperature=settings.llm.temperature,
+                    context_window=settings.llm.context_window,
+                    max_new_tokens=settings.llm.max_new_tokens,
+                    messages_to_prompt=prompt_style.messages_to_prompt,
+                    completion_to_prompt=prompt_style.completion_to_prompt,
                 )
             case "ollama":
                 try:
diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py
index 051cfcab..c4c5e20d 100644
--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@@ -104,6 +104,17 @@ class LLMSettings(BaseModel):
         0.1,
         description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.",
     )
+    prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field(
+        "llama2",
+        description=(
+            "The prompt style to use for the chat engine. "
+            "If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
+            "If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
+            "If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
+            "If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
+            "`llama2` is the historic behaviour. `default` might work better with your custom models."
+        ),
+    )
 
 
 class VectorstoreSettings(BaseModel):
@@ -117,18 +128,6 @@ class NodeStoreSettings(BaseModel):
 class LlamaCPPSettings(BaseModel):
     llm_hf_repo_id: str
     llm_hf_model_file: str
-    prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field(
-        "llama2",
-        description=(
-            "The prompt style to use for the chat engine. "
-            "If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
-            "If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
-            "If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
-            "If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
-            "`llama2` is the historic behaviour. `default` might work better with your custom models."
-        ),
-    )
-
     tfs_z: float = Field(
         1.0,
         description="Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting.",
diff --git a/settings-local.yaml b/settings-local.yaml
index c9d02742..48eeb0ea 100644
--- a/settings-local.yaml
+++ b/settings-local.yaml
@@ -8,9 +8,9 @@ llm:
   max_new_tokens: 512
   context_window: 3900
   tokenizer: mistralai/Mistral-7B-Instruct-v0.2
+  prompt_style: "mistral"
 
 llamacpp:
-  prompt_style: "mistral"
   llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
   llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf
 
@@ -24,4 +24,4 @@ vectorstore:
   database: qdrant
 
 qdrant:
-  path: local_data/private_gpt/qdrant
\ No newline at end of file
+  path: local_data/private_gpt/qdrant
diff --git a/settings.yaml b/settings.yaml
index e881a555..d8d2500c 100644
--- a/settings.yaml
+++ b/settings.yaml
@@ -36,6 +36,7 @@ ui:
 
 llm:
   mode: llamacpp
+  prompt_style: "mistral"
   # Should be matching the selected model
   max_new_tokens: 512
   context_window: 3900
@@ -53,7 +54,6 @@ rag:
     top_n: 1
 
 llamacpp:
-  prompt_style: "mistral"
   llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
   llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf
   tfs_z: 1.0            # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting

From 9d0d614706581a8bfa57db45f62f84ab23d26f15 Mon Sep 17 00:00:00 2001
From: Patrick Peng <retr0@retr0.blog>
Date: Tue, 30 Apr 2024 15:58:19 +0800
Subject: [PATCH 2/5] fix: Replacing unsafe `eval()` with `json.loads()`
 (#1890)

---
 private_gpt/components/llm/custom/sagemaker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/private_gpt/components/llm/custom/sagemaker.py b/private_gpt/components/llm/custom/sagemaker.py
index e20f5394..bd2aec18 100644
--- a/private_gpt/components/llm/custom/sagemaker.py
+++ b/private_gpt/components/llm/custom/sagemaker.py
@@ -218,7 +218,7 @@ class SagemakerLLM(CustomLLM):
 
         response_body = resp["Body"]
         response_str = response_body.read().decode("utf-8")
-        response_dict = eval(response_str)
+        response_dict = json.loads(response_str)
 
         return CompletionResponse(
             text=response_dict[0]["generated_text"][len(prompt) :], raw=resp

From d13029a046f6e19e8ee65bef3acd96365c738df2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=20Garc=C3=ADa?= <fran17901@gmail.com>
Date: Fri, 10 May 2024 14:13:15 +0200
Subject: [PATCH 3/5] feat(docs): add privategpt-ts sdk (#1924)

---
 fern/docs/pages/api-reference/sdks.mdx | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fern/docs/pages/api-reference/sdks.mdx b/fern/docs/pages/api-reference/sdks.mdx
index 0172d9d4..f7cf6f84 100644
--- a/fern/docs/pages/api-reference/sdks.mdx
+++ b/fern/docs/pages/api-reference/sdks.mdx
@@ -8,14 +8,14 @@ The clients are kept up to date automatically, so we encourage you to use the la
 
 <Cards>
   <Card
-    title="Node.js/TypeScript - WIP"
+    title="TypeScript"
     icon="fa-brands fa-node"
-    href="https://github.com/imartinez/privateGPT-typescript"
+    href="https://github.com/zylon-ai/privategpt-ts"
   />
   <Card
-    title="Python - Ready!"
+    title="Python"
     icon="fa-brands fa-python"
-    href="https://github.com/imartinez/pgpt_python"
+    href="https://github.com/zylon-ai/pgpt-python"
   />
   <br />
 </Cards>

From 966af4771dbe5cf3fdf554b5fdf8f732407859c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=20Garc=C3=ADa?= <fran17901@gmail.com>
Date: Fri, 10 May 2024 14:13:46 +0200
Subject: [PATCH 4/5] fix(settings): enable cors by default so it will work
 when using ts sdk (spa) (#1925)

---
 settings.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/settings.yaml b/settings.yaml
index d8d2500c..06fcd63d 100644
--- a/settings.yaml
+++ b/settings.yaml
@@ -5,7 +5,7 @@ server:
   env_name: ${APP_ENV:prod}
   port: ${PORT:8001}
   cors:
-    enabled: false
+    enabled: true
     allow_origins: ["*"]
     allow_methods: ["*"]
     allow_headers: ["*"]

From 45df99feb7eb308d7dd4770039814558b75d78ae Mon Sep 17 00:00:00 2001
From: jcbonnet-fwd <141936727+jcbonnet-fwd@users.noreply.github.com>
Date: Fri, 10 May 2024 16:44:08 +0200
Subject: [PATCH 5/5] Add timeout parameter for better support of openailike
 LLM tools on local computer (like LM Studio). (#1858)

feat(llm): Improve settings of the OpenAILike LLM
---
 private_gpt/components/llm/llm_component.py | 3 +++
 private_gpt/settings/settings.py            | 4 ++++
 settings-vllm.yaml                          | 4 ++++
 3 files changed, 11 insertions(+)

diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py
index 51d71a3e..c29638b1 100644
--- a/private_gpt/components/llm/llm_component.py
+++ b/private_gpt/components/llm/llm_component.py
@@ -123,6 +123,9 @@ class LLMComponent:
                     max_new_tokens=settings.llm.max_new_tokens,
                     messages_to_prompt=prompt_style.messages_to_prompt,
                     completion_to_prompt=prompt_style.completion_to_prompt,
+                    tokenizer=settings.llm.tokenizer,
+                    timeout=openai_settings.request_timeout,
+                    reuse_client=False,
                 )
             case "ollama":
                 try:
diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py
index c4c5e20d..bd83fb8b 100644
--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@@ -205,6 +205,10 @@ class OpenAISettings(BaseModel):
         "gpt-3.5-turbo",
         description="OpenAI Model to use. Example: 'gpt-4'.",
     )
+    request_timeout: float = Field(
+        120.0,
+        description="Time elapsed until openailike server times out the request. Default is 120s. Format is float. ",
+    )
 
 
 class OllamaSettings(BaseModel):
diff --git a/settings-vllm.yaml b/settings-vllm.yaml
index 5a0a68c6..1bfab6b2 100644
--- a/settings-vllm.yaml
+++ b/settings-vllm.yaml
@@ -3,6 +3,9 @@ server:
 
 llm:
   mode: openailike
+  max_new_tokens: 512
+  tokenizer: mistralai/Mistral-7B-Instruct-v0.2
+  temperature: 0.1
 
 embedding:
   mode: huggingface
@@ -15,3 +18,4 @@ openai:
   api_base: http://localhost:8000/v1
   api_key: EMPTY
   model: facebook/opt-125m
+  request_timeout: 600.0
\ No newline at end of file