Merge branch 'zylon-ai:main' into fix-setup

2025-08-21 17:03:49 +00:00 · 2024-05-10 18:01:02 +02:00 · 2024-05-10 18:01:02 +02:00 · 090ce7c69e
commit 090ce7c69e
parent 7c5604b82e 45df99feb7
7 changed files with 39 additions and 24 deletions
--- a/fern/docs/pages/api-reference/sdks.mdx
+++ b/fern/docs/pages/api-reference/sdks.mdx
@ -8,14 +8,14 @@ The clients are kept up to date automatically, so we encourage you to use the la
 <Cards>
  <Card
-    title="Node.js/TypeScript - WIP"
+    title="TypeScript"
    icon="fa-brands fa-node"
-    href="https://github.com/imartinez/privateGPT-typescript"
+    href="https://github.com/zylon-ai/privategpt-ts"
  />
  <Card
-    title="Python - Ready!"
+    title="Python"
    icon="fa-brands fa-python"
-    href="https://github.com/imartinez/pgpt_python"
+    href="https://github.com/zylon-ai/pgpt-python"
  />
  <br />
 </Cards>
--- a/private_gpt/components/llm/custom/sagemaker.py
+++ b/private_gpt/components/llm/custom/sagemaker.py
@ -218,7 +218,7 @@ class SagemakerLLM(CustomLLM):
        response_body = resp["Body"]
        response_str = response_body.read().decode("utf-8")
-        response_dict = eval(response_str)
+        response_dict = json.loads(response_str)
        return CompletionResponse(
            text=response_dict[0]["generated_text"][len(prompt) :], raw=resp
--- a/private_gpt/components/llm/llm_component.py
+++ b/private_gpt/components/llm/llm_component.py
@ -51,7 +51,7 @@ class LLMComponent:
                        "Local dependencies not found, install with `poetry install --extras llms-llama-cpp`"
                    ) from e
-                prompt_style = get_prompt_style(settings.llamacpp.prompt_style)
+                prompt_style = get_prompt_style(settings.llm.prompt_style)
                settings_kwargs = {
                    "tfs_z": settings.llamacpp.tfs_z,  # ollama and llama-cpp
                    "top_k": settings.llamacpp.top_k,  # ollama and llama-cpp
@ -109,15 +109,23 @@ class LLMComponent:
                    raise ImportError(
                        "OpenAILike dependencies not found, install with `poetry install --extras llms-openai-like`"
                    ) from e
-
+                prompt_style = get_prompt_style(settings.llm.prompt_style)
                openai_settings = settings.openai
                self.llm = OpenAILike(
                    api_base=openai_settings.api_base,
                    api_key=openai_settings.api_key,
                    model=openai_settings.model,
                    is_chat_model=True,
-                    max_tokens=None,
+                    max_tokens=settings.llm.max_new_tokens,
                    api_version="",
                    temperature=settings.llm.temperature,
                    context_window=settings.llm.context_window,
                    max_new_tokens=settings.llm.max_new_tokens,
                    messages_to_prompt=prompt_style.messages_to_prompt,
                    completion_to_prompt=prompt_style.completion_to_prompt,
                    tokenizer=settings.llm.tokenizer,
                    timeout=openai_settings.request_timeout,
                    reuse_client=False,
                )
            case "ollama":
                try:
--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@ -104,6 +104,17 @@ class LLMSettings(BaseModel):
        0.1,
        description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.",
    )
    prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field(
        "llama2",
        description=(
            "The prompt style to use for the chat engine. "
            "If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
            "If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
            "If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
            "If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
            "`llama2` is the historic behaviour. `default` might work better with your custom models."
        ),
    )
 class VectorstoreSettings(BaseModel):
@ -117,18 +128,6 @@ class NodeStoreSettings(BaseModel):
 class LlamaCPPSettings(BaseModel):
    llm_hf_repo_id: str
    llm_hf_model_file: str
    prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field(
        "llama2",
        description=(
            "The prompt style to use for the chat engine. "
            "If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
            "If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
            "If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
            "If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
            "`llama2` is the historic behaviour. `default` might work better with your custom models."
        ),
    )
    tfs_z: float = Field(
        1.0,
        description="Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting.",
@ -206,6 +205,10 @@ class OpenAISettings(BaseModel):
        "gpt-3.5-turbo",
        description="OpenAI Model to use. Example: 'gpt-4'.",
    )
    request_timeout: float = Field(
        120.0,
        description="Time elapsed until openailike server times out the request. Default is 120s. Format is float. ",
    )
 class OllamaSettings(BaseModel):
--- a/settings-local.yaml
+++ b/settings-local.yaml
@ -8,9 +8,9 @@ llm:
  max_new_tokens: 512
  context_window: 3900
  tokenizer: mistralai/Mistral-7B-Instruct-v0.2
  prompt_style: "mistral"
 llamacpp:
  prompt_style: "mistral"
  llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
  llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf
--- a/settings-vllm.yaml
+++ b/settings-vllm.yaml
@ -3,6 +3,9 @@ server:
 llm:
  mode: openailike
  max_new_tokens: 512
  tokenizer: mistralai/Mistral-7B-Instruct-v0.2
  temperature: 0.1
 embedding:
  mode: huggingface
@ -15,3 +18,4 @@ openai:
  api_base: http://localhost:8000/v1
  api_key: EMPTY
  model: facebook/opt-125m
  request_timeout: 600.0
--- a/settings.yaml
+++ b/settings.yaml
@ -5,7 +5,7 @@ server:
  env_name: ${APP_ENV:prod}
  port: ${PORT:8001}
  cors:
-    enabled: false
+    enabled: true
    allow_origins: ["*"]
    allow_methods: ["*"]
    allow_headers: ["*"]
@ -36,6 +36,7 @@ ui:
 llm:
  mode: llamacpp
  prompt_style: "mistral"
  # Should be matching the selected model
  max_new_tokens: 512
  context_window: 3900
@ -53,7 +54,6 @@ rag:
    top_n: 1
 llamacpp:
  prompt_style: "mistral"
  llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
  llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf
  tfs_z: 1.0            # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting