From f83abff8bc955a6952c92cc7bcb8985fcec93afa Mon Sep 17 00:00:00 2001
From: Marco Repetto <119503933+mrepetto-certx@users.noreply.github.com>
Date: Mon, 1 Apr 2024 13:08:48 +0200
Subject: [PATCH 01/17] feat(docker): set default Docker to use Ollama (#1812)

---
 Dockerfile.external  |  2 +-
 docker-compose.yaml  |  9 ++++++---
 settings-docker.yaml | 11 +++++++++++
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/Dockerfile.external b/Dockerfile.external
index b56af501..3e2530e0 100644
--- a/Dockerfile.external
+++ b/Dockerfile.external
@@ -14,7 +14,7 @@ FROM base as dependencies
 WORKDIR /home/worker/app
 COPY pyproject.toml poetry.lock ./
 
-RUN poetry install --extras "ui vector-stores-qdrant"
+RUN poetry install --extras "ui vector-stores-qdrant llms-ollama embeddings-ollama"
 
 FROM base as app
 
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 7129b126..f12e1da9 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -1,13 +1,16 @@
 services:
   private-gpt:
     build:
-      dockerfile: Dockerfile.local
+      dockerfile: Dockerfile.external
     volumes:
       - ./local_data/:/home/worker/app/local_data
-      - ./models/:/home/worker/app/models
     ports:
       - 8001:8080
     environment:
       PORT: 8080
       PGPT_PROFILES: docker
-      PGPT_MODE: llamacpp
+      PGPT_MODE: ollama
+  ollama:
+    image: ollama/ollama:latest
+    volumes:
+      - ./models:/root/.ollama
diff --git a/settings-docker.yaml b/settings-docker.yaml
index d71c4070..cb85afdf 100644
--- a/settings-docker.yaml
+++ b/settings-docker.yaml
@@ -19,6 +19,17 @@ sagemaker:
   llm_endpoint_name: ${PGPT_SAGEMAKER_LLM_ENDPOINT_NAME:}
   embedding_endpoint_name: ${PGPT_SAGEMAKER_EMBEDDING_ENDPOINT_NAME:}
 
+ollama:
+  llm_model: ${PGPT_OLLAMA_LLM_MODEL:mistral}
+  embedding_model: ${PGPT_OLLAMA_EMBEDDING_MODEL:nomic-embed-text}
+  api_base: ${PGPT_OLLAMA_API_BASE:http://ollama:11434}
+  tfs_z: ${PGPT_OLLAMA_TFS_Z:1.0}
+  top_k: ${PGPT_OLLAMA_TOP_K:40}
+  top_p: ${PGPT_OLLAMA_TOP_P:0.9}
+  repeat_last_n: ${PGPT_OLLAMA_REPEAT_LAST_N:64}
+  repeat_penalty: ${PGPT_OLLAMA_REPEAT_PENALTY:1.2}
+  request_timeout: ${PGPT_OLLAMA_REQUEST_TIMEOUT:600.0}
+
 ui:
   enabled: true
   path: /

From 83adc12a8ef0fa0c13a0dec084fa596445fc9075 Mon Sep 17 00:00:00 2001
From: machatschek <46938423+machatschek@users.noreply.github.com>
Date: Tue, 2 Apr 2024 10:29:51 +0200
Subject: [PATCH 02/17] feat(RAG): Introduce SentenceTransformer Reranker
 (#1810)

---
 fern/docs.yml                           |   2 +
 fern/docs/pages/manual/reranker.mdx     |  36 +++++++
 poetry.lock                             | 119 +++++++++++++++++++++++-
 private_gpt/server/chat/chat_service.py |  21 +++--
 private_gpt/settings/settings.py        |  18 +++-
 pyproject.toml                          |   6 ++
 settings.yaml                           |   4 +
 7 files changed, 198 insertions(+), 8 deletions(-)
 create mode 100644 fern/docs/pages/manual/reranker.mdx

diff --git a/fern/docs.yml b/fern/docs.yml
index 2611dac8..be0d904c 100644
--- a/fern/docs.yml
+++ b/fern/docs.yml
@@ -64,6 +64,8 @@ navigation:
         contents:
           - page: LLM Backends
             path: ./docs/pages/manual/llms.mdx
+          - page: Reranking
+            path: ./docs/pages/manual/reranker.mdx
       - section: User Interface
         contents:
           - page: User interface (Gradio) Manual
diff --git a/fern/docs/pages/manual/reranker.mdx b/fern/docs/pages/manual/reranker.mdx
new file mode 100644
index 00000000..576e3e8f
--- /dev/null
+++ b/fern/docs/pages/manual/reranker.mdx
@@ -0,0 +1,36 @@
+## Enhancing Response Quality with Reranking
+
+PrivateGPT offers a reranking feature aimed at optimizing response generation by filtering out irrelevant documents, potentially leading to faster response times and enhanced relevance of answers generated by the LLM.
+
+### Enabling Reranking
+
+Document reranking can significantly improve the efficiency and quality of the responses by pre-selecting the most relevant documents before generating an answer. To leverage this feature, ensure that it is enabled in the RAG settings and consider adjusting the parameters to best fit your use case.
+
+#### Additional Requirements
+
+Before enabling reranking, you must install additional dependencies:
+
+```bash
+poetry install --extras rerank-sentence-transformers
+```
+
+This command installs dependencies for the cross-encoder reranker from sentence-transformers, which is currently the only supported method by PrivateGPT for document reranking.
+
+#### Configuration
+
+To enable and configure reranking, adjust the `rag` section within the `settings.yaml` file. Here are the key settings to consider:
+
+- `similarity_top_k`: Determines the number of documents to initially retrieve and consider for reranking. This value should be larger than `top_n`.
+- `rerank`:
+  - `enabled`: Set to `true` to activate the reranking feature.
+  - `top_n`: Specifies the number of documents to use in the final answer generation process, chosen from the top-ranked documents provided by `similarity_top_k`.
+
+Example configuration snippet:
+
+```yaml
+rag:
+  similarity_top_k: 10  # Number of documents to retrieve and consider for reranking
+  rerank:
+    enabled: true
+    top_n: 3  # Number of top-ranked documents to use for generating the answer
+```
\ No newline at end of file
diff --git a/poetry.lock b/poetry.lock
index 38c2374c..e7f186dd 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -4949,6 +4949,90 @@ tensorflow = ["safetensors[numpy]", "tensorflow (>=2.11.0)"]
 testing = ["h5py (>=3.7.0)", "huggingface_hub (>=0.12.1)", "hypothesis (>=6.70.2)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "safetensors[numpy]", "setuptools_rust (>=1.5.2)"]
 torch = ["safetensors[numpy]", "torch (>=1.10)"]
 
+[[package]]
+name = "scikit-learn"
+version = "1.4.1.post1"
+description = "A set of python modules for machine learning and data mining"
+optional = true
+python-versions = ">=3.9"
+files = [
+    {file = "scikit-learn-1.4.1.post1.tar.gz", hash = "sha256:93d3d496ff1965470f9977d05e5ec3376fb1e63b10e4fda5e39d23c2d8969a30"},
+    {file = "scikit_learn-1.4.1.post1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c540aaf44729ab5cd4bd5e394f2b375e65ceaea9cdd8c195788e70433d91bbc5"},
+    {file = "scikit_learn-1.4.1.post1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4310bff71aa98b45b46cd26fa641309deb73a5d1c0461d181587ad4f30ea3c36"},
+    {file = "scikit_learn-1.4.1.post1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f43dd527dabff5521af2786a2f8de5ba381e182ec7292663508901cf6ceaf6e"},
+    {file = "scikit_learn-1.4.1.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c02e27d65b0c7dc32f2c5eb601aaf5530b7a02bfbe92438188624524878336f2"},
+    {file = "scikit_learn-1.4.1.post1-cp310-cp310-win_amd64.whl", hash = "sha256:629e09f772ad42f657ca60a1a52342eef786218dd20cf1369a3b8d085e55ef8f"},
+    {file = "scikit_learn-1.4.1.post1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6145dfd9605b0b50ae72cdf72b61a2acd87501369a763b0d73d004710ebb76b5"},
+    {file = "scikit_learn-1.4.1.post1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:1afed6951bc9d2053c6ee9a518a466cbc9b07c6a3f9d43bfe734192b6125d508"},
+    {file = "scikit_learn-1.4.1.post1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce03506ccf5f96b7e9030fea7eb148999b254c44c10182ac55857bc9b5d4815f"},
+    {file = "scikit_learn-1.4.1.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ba516fcdc73d60e7f48cbb0bccb9acbdb21807de3651531208aac73c758e3ab"},
+    {file = "scikit_learn-1.4.1.post1-cp311-cp311-win_amd64.whl", hash = "sha256:78cd27b4669513b50db4f683ef41ea35b5dddc797bd2bbd990d49897fd1c8a46"},
+    {file = "scikit_learn-1.4.1.post1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a1e289f33f613cefe6707dead50db31930530dc386b6ccff176c786335a7b01c"},
+    {file = "scikit_learn-1.4.1.post1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:0df87de9ce1c0140f2818beef310fb2e2afdc1e66fc9ad587965577f17733649"},
+    {file = "scikit_learn-1.4.1.post1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:712c1c69c45b58ef21635360b3d0a680ff7d83ac95b6f9b82cf9294070cda710"},
+    {file = "scikit_learn-1.4.1.post1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1754b0c2409d6ed5a3380512d0adcf182a01363c669033a2b55cca429ed86a81"},
+    {file = "scikit_learn-1.4.1.post1-cp312-cp312-win_amd64.whl", hash = "sha256:1d491ef66e37f4e812db7e6c8286520c2c3fc61b34bf5e59b67b4ce528de93af"},
+    {file = "scikit_learn-1.4.1.post1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:aa0029b78ef59af22cfbd833e8ace8526e4df90212db7ceccbea582ebb5d6794"},
+    {file = "scikit_learn-1.4.1.post1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:14e4c88436ac96bf69eb6d746ac76a574c314a23c6961b7d344b38877f20fee1"},
+    {file = "scikit_learn-1.4.1.post1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7cd3a77c32879311f2aa93466d3c288c955ef71d191503cf0677c3340ae8ae0"},
+    {file = "scikit_learn-1.4.1.post1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2a3ee19211ded1a52ee37b0a7b373a8bfc66f95353af058a210b692bd4cda0dd"},
+    {file = "scikit_learn-1.4.1.post1-cp39-cp39-win_amd64.whl", hash = "sha256:234b6bda70fdcae9e4abbbe028582ce99c280458665a155eed0b820599377d25"},
+]
+
+[package.dependencies]
+joblib = ">=1.2.0"
+numpy = ">=1.19.5,<2.0"
+scipy = ">=1.6.0"
+threadpoolctl = ">=2.0.0"
+
+[package.extras]
+benchmark = ["matplotlib (>=3.3.4)", "memory-profiler (>=0.57.0)", "pandas (>=1.1.5)"]
+docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory-profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)", "sphinx (>=6.0.0)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.15.0)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"]
+examples = ["matplotlib (>=3.3.4)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)"]
+tests = ["black (>=23.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.3)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.19.12)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.0.272)", "scikit-image (>=0.17.2)"]
+
+[[package]]
+name = "scipy"
+version = "1.12.0"
+description = "Fundamental algorithms for scientific computing in Python"
+optional = true
+python-versions = ">=3.9"
+files = [
+    {file = "scipy-1.12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:78e4402e140879387187f7f25d91cc592b3501a2e51dfb320f48dfb73565f10b"},
+    {file = "scipy-1.12.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:f5f00ebaf8de24d14b8449981a2842d404152774c1a1d880c901bf454cb8e2a1"},
+    {file = "scipy-1.12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e53958531a7c695ff66c2e7bb7b79560ffdc562e2051644c5576c39ff8efb563"},
+    {file = "scipy-1.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e32847e08da8d895ce09d108a494d9eb78974cf6de23063f93306a3e419960c"},
+    {file = "scipy-1.12.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4c1020cad92772bf44b8e4cdabc1df5d87376cb219742549ef69fc9fd86282dd"},
+    {file = "scipy-1.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:75ea2a144096b5e39402e2ff53a36fecfd3b960d786b7efd3c180e29c39e53f2"},
+    {file = "scipy-1.12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:408c68423f9de16cb9e602528be4ce0d6312b05001f3de61fe9ec8b1263cad08"},
+    {file = "scipy-1.12.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5adfad5dbf0163397beb4aca679187d24aec085343755fcdbdeb32b3679f254c"},
+    {file = "scipy-1.12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3003652496f6e7c387b1cf63f4bb720951cfa18907e998ea551e6de51a04467"},
+    {file = "scipy-1.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b8066bce124ee5531d12a74b617d9ac0ea59245246410e19bca549656d9a40a"},
+    {file = "scipy-1.12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8bee4993817e204d761dba10dbab0774ba5a8612e57e81319ea04d84945375ba"},
+    {file = "scipy-1.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:a24024d45ce9a675c1fb8494e8e5244efea1c7a09c60beb1eeb80373d0fecc70"},
+    {file = "scipy-1.12.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e7e76cc48638228212c747ada851ef355c2bb5e7f939e10952bc504c11f4e372"},
+    {file = "scipy-1.12.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:f7ce148dffcd64ade37b2df9315541f9adad6efcaa86866ee7dd5db0c8f041c3"},
+    {file = "scipy-1.12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c39f92041f490422924dfdb782527a4abddf4707616e07b021de33467f917bc"},
+    {file = "scipy-1.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7ebda398f86e56178c2fa94cad15bf457a218a54a35c2a7b4490b9f9cb2676c"},
+    {file = "scipy-1.12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:95e5c750d55cf518c398a8240571b0e0782c2d5a703250872f36eaf737751338"},
+    {file = "scipy-1.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:e646d8571804a304e1da01040d21577685ce8e2db08ac58e543eaca063453e1c"},
+    {file = "scipy-1.12.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:913d6e7956c3a671de3b05ccb66b11bc293f56bfdef040583a7221d9e22a2e35"},
+    {file = "scipy-1.12.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:bba1b0c7256ad75401c73e4b3cf09d1f176e9bd4248f0d3112170fb2ec4db067"},
+    {file = "scipy-1.12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:730badef9b827b368f351eacae2e82da414e13cf8bd5051b4bdfd720271a5371"},
+    {file = "scipy-1.12.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6546dc2c11a9df6926afcbdd8a3edec28566e4e785b915e849348c6dd9f3f490"},
+    {file = "scipy-1.12.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:196ebad3a4882081f62a5bf4aeb7326aa34b110e533aab23e4374fcccb0890dc"},
+    {file = "scipy-1.12.0-cp39-cp39-win_amd64.whl", hash = "sha256:b360f1b6b2f742781299514e99ff560d1fe9bd1bff2712894b52abe528d1fd1e"},
+    {file = "scipy-1.12.0.tar.gz", hash = "sha256:4bf5abab8a36d20193c698b0f1fc282c1d083c94723902c447e5d2f1780936a3"},
+]
+
+[package.dependencies]
+numpy = ">=1.22.4,<1.29.0"
+
+[package.extras]
+dev = ["click", "cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy", "pycodestyle", "pydevtool", "rich-click", "ruff", "types-psutil", "typing_extensions"]
+doc = ["jupytext", "matplotlib (>2)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-design (>=0.2.0)"]
+test = ["asv", "gmpy2", "hypothesis", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"]
+
 [[package]]
 name = "semantic-version"
 version = "2.10.0"
@@ -4964,6 +5048,27 @@ files = [
 dev = ["Django (>=1.11)", "check-manifest", "colorama (<=0.4.1)", "coverage", "flake8", "nose2", "readme-renderer (<25.0)", "tox", "wheel", "zest.releaser[recommended]"]
 doc = ["Sphinx", "sphinx-rtd-theme"]
 
+[[package]]
+name = "sentence-transformers"
+version = "2.6.1"
+description = "Multilingual text embeddings"
+optional = true
+python-versions = ">=3.8.0"
+files = [
+    {file = "sentence-transformers-2.6.1.tar.gz", hash = "sha256:633ad6b70e390ea335de8689652a5d6c21a323b79ed19519c2f392451088487f"},
+    {file = "sentence_transformers-2.6.1-py3-none-any.whl", hash = "sha256:a887e17696b513f99a709ce1f37fd547f53857aebe863785ede546c303b09ea0"},
+]
+
+[package.dependencies]
+huggingface-hub = ">=0.15.1"
+numpy = "*"
+Pillow = "*"
+scikit-learn = "*"
+scipy = "*"
+torch = ">=1.11.0"
+tqdm = "*"
+transformers = ">=4.32.0,<5.0.0"
+
 [[package]]
 name = "setuptools"
 version = "69.0.2"
@@ -5156,6 +5261,17 @@ files = [
 [package.extras]
 doc = ["reno", "sphinx", "tornado (>=4.5)"]
 
+[[package]]
+name = "threadpoolctl"
+version = "3.4.0"
+description = "threadpoolctl"
+optional = true
+python-versions = ">=3.8"
+files = [
+    {file = "threadpoolctl-3.4.0-py3-none-any.whl", hash = "sha256:8f4c689a65b23e5ed825c8436a92b818aac005e0f3715f6a1664d7c7ee29d262"},
+    {file = "threadpoolctl-3.4.0.tar.gz", hash = "sha256:f11b491a03661d6dd7ef692dd422ab34185d982466c49c8f98c8f716b5c93196"},
+]
+
 [[package]]
 name = "tiktoken"
 version = "0.5.2"
@@ -6206,6 +6322,7 @@ llms-ollama = ["llama-index-llms-ollama"]
 llms-openai = ["llama-index-llms-openai"]
 llms-openai-like = ["llama-index-llms-openai-like"]
 llms-sagemaker = ["boto3"]
+rerank-sentence-transformers = ["sentence-transformers", "torch"]
 storage-nodestore-postgres = ["asyncpg", "llama-index-storage-docstore-postgres", "llama-index-storage-index-store-postgres", "psycopg2-binary"]
 ui = ["gradio"]
 vector-stores-chroma = ["llama-index-vector-stores-chroma"]
@@ -6215,4 +6332,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11,<3.12"
-content-hash = "3d5f21e5e41ea66d655891a6d9b01bcdd8348b275e27a54e90b65ac9d5719981"
+content-hash = "0b3665bd11a604609249ff0267e4e5cf009881d16a84f9774fc54d45a1373e09"
diff --git a/private_gpt/server/chat/chat_service.py b/private_gpt/server/chat/chat_service.py
index ea57f2c0..ae8cf008 100644
--- a/private_gpt/server/chat/chat_service.py
+++ b/private_gpt/server/chat/chat_service.py
@@ -9,6 +9,7 @@ from llama_index.core.indices import VectorStoreIndex
 from llama_index.core.indices.postprocessor import MetadataReplacementPostProcessor
 from llama_index.core.llms import ChatMessage, MessageRole
 from llama_index.core.postprocessor import (
+    SentenceTransformerRerank,
     SimilarityPostprocessor,
 )
 from llama_index.core.storage import StorageContext
@@ -113,16 +114,24 @@ class ChatService:
                 context_filter=context_filter,
                 similarity_top_k=self.settings.rag.similarity_top_k,
             )
+            node_postprocessors = [
+                MetadataReplacementPostProcessor(target_metadata_key="window"),
+                SimilarityPostprocessor(
+                    similarity_cutoff=settings.rag.similarity_value
+                ),
+            ]
+
+            if settings.rag.rerank.enabled:
+                rerank_postprocessor = SentenceTransformerRerank(
+                    model=settings.rag.rerank.model, top_n=settings.rag.rerank.top_n
+                )
+                node_postprocessors.append(rerank_postprocessor)
+
             return ContextChatEngine.from_defaults(
                 system_prompt=system_prompt,
                 retriever=vector_index_retriever,
                 llm=self.llm_component.llm,  # Takes no effect at the moment
-                node_postprocessors=[
-                    MetadataReplacementPostProcessor(target_metadata_key="window"),
-                    SimilarityPostprocessor(
-                        similarity_cutoff=settings.rag.similarity_value
-                    ),
-                ],
+                node_postprocessors=node_postprocessors,
             )
         else:
             return SimpleChatEngine.from_defaults(
diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py
index 5896f00d..bc03e30a 100644
--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@@ -284,15 +284,31 @@ class UISettings(BaseModel):
     )
 
 
+class RerankSettings(BaseModel):
+    enabled: bool = Field(
+        False,
+        description="This value controls whether a reranker should be included in the RAG pipeline.",
+    )
+    model: str = Field(
+        "cross-encoder/ms-marco-MiniLM-L-2-v2",
+        description="Rerank model to use. Limited to SentenceTransformer cross-encoder models.",
+    )
+    top_n: int = Field(
+        2,
+        description="This value controls the number of documents returned by the RAG pipeline.",
+    )
+
+
 class RagSettings(BaseModel):
     similarity_top_k: int = Field(
         2,
-        description="This value controls the number of documents returned by the RAG pipeline",
+        description="This value controls the number of documents returned by the RAG pipeline or considered for reranking if enabled.",
     )
     similarity_value: float = Field(
         None,
         description="If set, any documents retrieved from the RAG must meet a certain match score. Acceptable values are between 0 and 1.",
     )
+    rerank: RerankSettings
 
 
 class PostgresSettings(BaseModel):
diff --git a/pyproject.toml b/pyproject.toml
index d5689998..21d68a85 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,6 +37,11 @@ asyncpg = {version="^0.29.0", optional = true}
 
 # Optional Sagemaker dependency
 boto3 = {version ="^1.34.51", optional = true}
+
+# Optional Reranker dependencies
+torch = {version ="^2.1.2", optional = true}
+sentence-transformers = {version ="^2.6.1", optional = true}
+
 # Optional UI
 gradio = {version ="^4.19.2", optional = true}
 
@@ -57,6 +62,7 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
 vector-stores-chroma = ["llama-index-vector-stores-chroma"]
 vector-stores-postgres = ["llama-index-vector-stores-postgres"]
 storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-index-storage-index-store-postgres","psycopg2-binary","asyncpg"]
+rerank-sentence-transformers = ["torch", "sentence-transformers"]
 
 [tool.poetry.group.dev.dependencies]
 black = "^22"
diff --git a/settings.yaml b/settings.yaml
index 87a63ef4..ce6a2b9f 100644
--- a/settings.yaml
+++ b/settings.yaml
@@ -47,6 +47,10 @@ rag:
   #This value controls how many "top" documents the RAG returns to use in the context.
   #similarity_value: 0.45
   #This value is disabled by default.  If you enable this settings, the RAG will only use articles that meet a certain percentage score.
+  rerank:
+    enabled: false
+    model: cross-encoder/ms-marco-MiniLM-L-2-v2
+    top_n: 1
 
 llamacpp:
   prompt_style: "mistral"

From b3b0140e244e7a313bfaf4ef10eb0f7e4192710e Mon Sep 17 00:00:00 2001
From: Robin Boone <43806560+Robinsane@users.noreply.github.com>
Date: Tue, 2 Apr 2024 16:23:10 +0200
Subject: [PATCH 03/17] feat(llm): Ollama LLM-Embeddings decouple + longer
 keep_alive settings (#1800)

---
 .../embedding/embedding_component.py          |  2 +-
 private_gpt/components/llm/llm_component.py   | 20 +++++++++++++++++++
 private_gpt/settings/settings.py              |  8 ++++++++
 settings-ollama.yaml                          |  2 ++
 settings.yaml                                 |  2 ++
 5 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/private_gpt/components/embedding/embedding_component.py b/private_gpt/components/embedding/embedding_component.py
index 2967c38b..77e8c3d4 100644
--- a/private_gpt/components/embedding/embedding_component.py
+++ b/private_gpt/components/embedding/embedding_component.py
@@ -70,7 +70,7 @@ class EmbeddingComponent:
                 ollama_settings = settings.ollama
                 self.embedding_model = OllamaEmbedding(
                     model_name=ollama_settings.embedding_model,
-                    base_url=ollama_settings.api_base,
+                    base_url=ollama_settings.embedding_api_base,
                 )
             case "azopenai":
                 try:
diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py
index 4e46c250..dae997cc 100644
--- a/private_gpt/components/llm/llm_component.py
+++ b/private_gpt/components/llm/llm_component.py
@@ -1,4 +1,6 @@
 import logging
+from collections.abc import Callable
+from typing import Any
 
 from injector import inject, singleton
 from llama_index.core.llms import LLM, MockLLM
@@ -133,6 +135,24 @@ class LLMComponent:
                     additional_kwargs=settings_kwargs,
                     request_timeout=ollama_settings.request_timeout,
                 )
+
+                if (
+                    ollama_settings.keep_alive
+                    != ollama_settings.model_fields["keep_alive"].default
+                ):
+                    # Modify Ollama methods to use the "keep_alive" field.
+                    def add_keep_alive(func: Callable[..., Any]) -> Callable[..., Any]:
+                        def wrapper(*args: Any, **kwargs: Any) -> Any:
+                            kwargs["keep_alive"] = ollama_settings.keep_alive
+                            return func(*args, **kwargs)
+
+                        return wrapper
+
+                    Ollama.chat = add_keep_alive(Ollama.chat)
+                    Ollama.stream_chat = add_keep_alive(Ollama.stream_chat)
+                    Ollama.complete = add_keep_alive(Ollama.complete)
+                    Ollama.stream_complete = add_keep_alive(Ollama.stream_complete)
+
             case "azopenai":
                 try:
                     from llama_index.llms.azure_openai import (  # type: ignore
diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py
index bc03e30a..7ec84a7b 100644
--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@@ -209,6 +209,10 @@ class OllamaSettings(BaseModel):
         "http://localhost:11434",
         description="Base URL of Ollama API. Example: 'https://localhost:11434'.",
     )
+    embedding_api_base: str = Field(
+        api_base,  # default is same as api_base, unless specified differently
+        description="Base URL of Ollama embedding API. Defaults to the same value as api_base",
+    )
     llm_model: str = Field(
         None,
         description="Model to use. Example: 'llama2-uncensored'.",
@@ -217,6 +221,10 @@ class OllamaSettings(BaseModel):
         None,
         description="Model to use. Example: 'nomic-embed-text'.",
     )
+    keep_alive: str = Field(
+        "5m",
+        description="Time the model will stay loaded in memory after a request. examples: 5m, 5h, '-1' ",
+    )
     tfs_z: float = Field(
         1.0,
         description="Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting.",
diff --git a/settings-ollama.yaml b/settings-ollama.yaml
index d7e1a12c..4f0be4ff 100644
--- a/settings-ollama.yaml
+++ b/settings-ollama.yaml
@@ -14,6 +14,8 @@ ollama:
   llm_model: mistral
   embedding_model: nomic-embed-text
   api_base: http://localhost:11434
+  keep_alive: 5m
+  # embedding_api_base: http://ollama_embedding:11434  # uncomment if your embedding model runs on another ollama
   tfs_z: 1.0              # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting.
   top_k: 40               # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
   top_p: 0.9              # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
diff --git a/settings.yaml b/settings.yaml
index ce6a2b9f..11c3c42f 100644
--- a/settings.yaml
+++ b/settings.yaml
@@ -99,6 +99,8 @@ ollama:
   llm_model: llama2
   embedding_model: nomic-embed-text
   api_base: http://localhost:11434
+  keep_alive: 5m
+  # embedding_api_base: http://ollama_embedding:11434  # uncomment if your embedding model runs on another ollama
   request_timeout: 120.0
 
 azopenai:

From ea153fb92f1f61f64c0d04fff0048d4d00b6f8d0 Mon Sep 17 00:00:00 2001
From: Brett England <brett@dbzoo.com>
Date: Tue, 2 Apr 2024 10:41:42 -0400
Subject: [PATCH 04/17] feat(scripts): Wipe qdrant and obtain db Stats command
 (#1783)

---
 Makefile         |   3 +
 scripts/utils.py | 199 ++++++++++++++++++++++++++++++++---------------
 2 files changed, 139 insertions(+), 63 deletions(-)

diff --git a/Makefile b/Makefile
index 67b76e40..7b9a8469 100644
--- a/Makefile
+++ b/Makefile
@@ -51,6 +51,9 @@ api-docs:
 ingest:
 	@poetry run python scripts/ingest_folder.py $(call args)
 
+stats:
+	poetry run python scripts/utils.py stats
+
 wipe:
 	poetry run python scripts/utils.py wipe
 
diff --git a/scripts/utils.py b/scripts/utils.py
index 48068789..92fd5d8c 100644
--- a/scripts/utils.py
+++ b/scripts/utils.py
@@ -1,26 +1,12 @@
 import argparse
 import os
 import shutil
+from typing import Any, ClassVar
 
 from private_gpt.paths import local_data_path
 from private_gpt.settings.settings import settings
 
 
-def wipe() -> None:
-    WIPE_MAP = {
-        "simple": wipe_simple,  # node store
-        "chroma": wipe_chroma,  # vector store
-        "postgres": wipe_postgres,  # node, index and vector store
-    }
-    for dbtype in ("nodestore", "vectorstore"):
-        database = getattr(settings(), dbtype).database
-        func = WIPE_MAP.get(database)
-        if func:
-            func(dbtype)
-        else:
-            print(f"Unable to wipe database '{database}' for '{dbtype}'")
-
-
 def wipe_file(file: str) -> None:
     if os.path.isfile(file):
         os.remove(file)
@@ -50,62 +36,149 @@ def wipe_tree(path: str) -> None:
             continue
 
 
-def wipe_simple(dbtype: str) -> None:
-    assert dbtype == "nodestore"
-    from llama_index.core.storage.docstore.types import (
-        DEFAULT_PERSIST_FNAME as DOCSTORE,
-    )
-    from llama_index.core.storage.index_store.types import (
-        DEFAULT_PERSIST_FNAME as INDEXSTORE,
-    )
+class Postgres:
+    tables: ClassVar[dict[str, list[str]]] = {
+        "nodestore": ["data_docstore", "data_indexstore"],
+        "vectorstore": ["data_embeddings"],
+    }
 
-    for store in (DOCSTORE, INDEXSTORE):
-        wipe_file(str((local_data_path / store).absolute()))
+    def __init__(self) -> None:
+        try:
+            import psycopg2
+        except ModuleNotFoundError:
+            raise ModuleNotFoundError("Postgres dependencies not found") from None
 
-
-def wipe_postgres(dbtype: str) -> None:
-    try:
-        import psycopg2
-    except ImportError as e:
-        raise ImportError("Postgres dependencies not found") from e
-
-    cur = conn = None
-    try:
-        tables = {
-            "nodestore": ["data_docstore", "data_indexstore"],
-            "vectorstore": ["data_embeddings"],
-        }[dbtype]
         connection = settings().postgres.model_dump(exclude_none=True)
-        schema = connection.pop("schema_name")
-        conn = psycopg2.connect(**connection)
-        cur = conn.cursor()
-        for table in tables:
-            sql = f"DROP TABLE IF EXISTS {schema}.{table}"
-            cur.execute(sql)
-            print(f"Table {schema}.{table} dropped.")
-        conn.commit()
-    except psycopg2.Error as e:
-        print("Error:", e)
-    finally:
-        if cur:
+        self.schema = connection.pop("schema_name")
+        self.conn = psycopg2.connect(**connection)
+
+    def wipe(self, storetype: str) -> None:
+        cur = self.conn.cursor()
+        try:
+            for table in self.tables[storetype]:
+                sql = f"DROP TABLE IF EXISTS {self.schema}.{table}"
+                cur.execute(sql)
+                print(f"Table {self.schema}.{table} dropped.")
+            self.conn.commit()
+        finally:
             cur.close()
-        if conn:
-            conn.close()
+
+    def stats(self, store_type: str) -> None:
+        template = "SELECT '{table}', COUNT(*), pg_size_pretty(pg_total_relation_size('{table}')) FROM {table}"
+        sql = " UNION ALL ".join(
+            template.format(table=tbl) for tbl in self.tables[store_type]
+        )
+
+        cur = self.conn.cursor()
+        try:
+            print(f"Storage for Postgres {store_type}.")
+            print("{:<15} | {:>15} | {:>9}".format("Table", "Rows", "Size"))
+            print("-" * 45)  # Print a line separator
+
+            cur.execute(sql)
+            for row in cur.fetchall():
+                formatted_row_count = f"{row[1]:,}"
+                print(f"{row[0]:<15} | {formatted_row_count:>15} | {row[2]:>9}")
+
+            print()
+        finally:
+            cur.close()
+
+    def __del__(self):
+        if hasattr(self, "conn") and self.conn:
+            self.conn.close()
 
 
-def wipe_chroma(dbtype: str):
-    assert dbtype == "vectorstore"
-    wipe_tree(str((local_data_path / "chroma_db").absolute()))
+class Simple:
+    def wipe(self, store_type: str) -> None:
+        assert store_type == "nodestore"
+        from llama_index.core.storage.docstore.types import (
+            DEFAULT_PERSIST_FNAME as DOCSTORE,
+        )
+        from llama_index.core.storage.index_store.types import (
+            DEFAULT_PERSIST_FNAME as INDEXSTORE,
+        )
+
+        for store in (DOCSTORE, INDEXSTORE):
+            wipe_file(str((local_data_path / store).absolute()))
+
+
+class Chroma:
+    def wipe(self, store_type: str) -> None:
+        assert store_type == "vectorstore"
+        wipe_tree(str((local_data_path / "chroma_db").absolute()))
+
+
+class Qdrant:
+    COLLECTION = (
+        "make_this_parameterizable_per_api_call"  # ?! see vector_store_component.py
+    )
+
+    def __init__(self) -> None:
+        try:
+            from qdrant_client import QdrantClient  # type: ignore
+        except ImportError:
+            raise ImportError("Qdrant dependencies not found") from None
+        self.client = QdrantClient(**settings().qdrant.model_dump(exclude_none=True))
+
+    def wipe(self, store_type: str) -> None:
+        assert store_type == "vectorstore"
+        try:
+            self.client.delete_collection(self.COLLECTION)
+            print("Collection dropped successfully.")
+        except Exception as e:
+            print("Error dropping collection:", e)
+
+    def stats(self, store_type: str) -> None:
+        print(f"Storage for Qdrant {store_type}.")
+        try:
+            collection_data = self.client.get_collection(self.COLLECTION)
+            if collection_data:
+                # Collection Info
+                # https://qdrant.tech/documentation/concepts/collections/
+                print(f"\tPoints:        {collection_data.points_count:,}")
+                print(f"\tVectors:       {collection_data.vectors_count:,}")
+                print(f"\tIndex Vectors: {collection_data.indexed_vectors_count:,}")
+                return
+        except ValueError:
+            pass
+        print("\t- Qdrant collection not found or empty")
+
+
+class Command:
+    DB_HANDLERS: ClassVar[dict[str, Any]] = {
+        "simple": Simple,  # node store
+        "chroma": Chroma,  # vector store
+        "postgres": Postgres,  # node, index and vector store
+        "qdrant": Qdrant,  # vector store
+    }
+
+    def for_each_store(self, cmd: str):
+        for store_type in ("nodestore", "vectorstore"):
+            database = getattr(settings(), store_type).database
+            handler_class = self.DB_HANDLERS.get(database)
+            if handler_class is None:
+                print(f"No handler found for database '{database}'")
+                continue
+            handler_instance = handler_class()  # Instantiate the class
+            # If the DB can handle this cmd dispatch it.
+            if hasattr(handler_instance, cmd) and callable(
+                func := getattr(handler_instance, cmd)
+            ):
+                func(store_type)
+            else:
+                print(
+                    f"Unable to execute command '{cmd}' on '{store_type}' in database '{database}'"
+                )
+
+    def execute(self, cmd: str) -> None:
+        if cmd in ("wipe", "stats"):
+            self.for_each_store(cmd)
 
 
 if __name__ == "__main__":
-    commands = {
-        "wipe": wipe,
-    }
-
     parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "mode", help="select a mode to run", choices=list(commands.keys())
-    )
+    parser.add_argument("mode", help="select a mode to run", choices=["wipe", "stats"])
     args = parser.parse_args()
-    commands[args.mode.lower()]()
+
+    Command().execute(args.mode.lower())

From bac818add51b104cda925b8f1f7b51448e935ca1 Mon Sep 17 00:00:00 2001
From: igeni <kublin@it8.ru>
Date: Tue, 2 Apr 2024 17:42:40 +0300
Subject: [PATCH 05/17] feat(code): improve concat of strings in ui (#1785)

---
 private_gpt/ui/ui.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/private_gpt/ui/ui.py b/private_gpt/ui/ui.py
index 7c34e849..7ba1c1ff 100644
--- a/private_gpt/ui/ui.py
+++ b/private_gpt/ui/ui.py
@@ -103,12 +103,12 @@ class PrivateGptUi:
                 sources_text = "\n\n\n"
                 used_files = set()
                 for index, source in enumerate(cur_sources, start=1):
-                    if (source.file + "-" + source.page) not in used_files:
+                    if f"{source.file}-{source.page}" not in used_files:
                         sources_text = (
                             sources_text
                             + f"{index}. {source.file} (page {source.page}) \n\n"
                         )
-                        used_files.add(source.file + "-" + source.page)
+                        used_files.add(f"{source.file}-{source.page}")
                 full_response += sources_text
             yield full_response
 

From f0b174c097c2d5e52deae8ef88de30a0d9013a38 Mon Sep 17 00:00:00 2001
From: Ingrid Stevens <icklebobz@gmail.com>
Date: Tue, 2 Apr 2024 16:52:27 +0200
Subject: [PATCH 06/17] feat(ui): Add Model Information to ChatInterface label

---
 private_gpt/ui/ui.py | 47 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/private_gpt/ui/ui.py b/private_gpt/ui/ui.py
index 7ba1c1ff..4206f653 100644
--- a/private_gpt/ui/ui.py
+++ b/private_gpt/ui/ui.py
@@ -1,4 +1,4 @@
-"""This file should be imported only and only if you want to run the UI locally."""
+"""This file should be imported if and only if you want to run the UI locally."""
 
 import itertools
 import logging
@@ -418,11 +418,54 @@ class PrivateGptUi:
                         inputs=system_prompt_input,
                     )
 
+                    def get_model_label() -> str | None:
+                        """Get model label from llm mode setting YAML.
+
+                        Raises:
+                            ValueError: If an invalid 'llm_mode' is encountered.
+
+                        Returns:
+                            str: The corresponding model label.
+                        """
+                        # Get model label from llm mode setting YAML
+                        # Labels: local, openai, openailike, sagemaker, mock, ollama
+                        config_settings = settings()
+                        if config_settings is None:
+                            raise ValueError("Settings are not configured.")
+
+                        # Get llm_mode from settings
+                        llm_mode = config_settings.llm.mode
+
+                        # Mapping of 'llm_mode' to corresponding model labels
+                        model_mapping = {
+                            "llamacpp": config_settings.llamacpp.llm_hf_model_file,
+                            "openai": config_settings.openai.model,
+                            "openailike": config_settings.openai.model,
+                            "sagemaker": config_settings.sagemaker.llm_endpoint_name,
+                            "mock": llm_mode,
+                            "ollama": config_settings.ollama.llm_model,
+                        }
+
+                        if llm_mode not in model_mapping:
+                            print(f"Invalid 'llm mode': {llm_mode}")
+                            return None
+
+                        return model_mapping[llm_mode]
+
                 with gr.Column(scale=7, elem_id="col"):
+                    # Determine the model label based on the value of PGPT_PROFILES
+                    model_label = get_model_label()
+                    if model_label is not None:
+                        label_text = (
+                            f"LLM: {settings().llm.mode} | Model: {model_label}"
+                        )
+                    else:
+                        label_text = f"LLM: {settings().llm.mode}"
+
                     _ = gr.ChatInterface(
                         self._chat,
                         chatbot=gr.Chatbot(
-                            label=f"LLM: {settings().llm.mode}",
+                            label=label_text,
                             show_copy_button=True,
                             elem_id="chatbot",
                             render=False,

From 8a836e4651543f099c59e2bf497ab8c55a7cd2e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=98=D0=B2=D0=B0=D0=BD?= <hardandheavymetal@yandex.ru>
Date: Tue, 2 Apr 2024 17:55:05 +0300
Subject: [PATCH 07/17] feat(docs): Add guide Llama-CPP Linux AMD GPU support
 (#1782)

---
 fern/docs/pages/installation/installation.mdx | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/fern/docs/pages/installation/installation.mdx b/fern/docs/pages/installation/installation.mdx
index b47e2b2b..31a7953c 100644
--- a/fern/docs/pages/installation/installation.mdx
+++ b/fern/docs/pages/installation/installation.mdx
@@ -300,6 +300,40 @@ llama_new_context_with_model: total VRAM used: 4857.93 MB (model: 4095.05 MB, co
 AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 |
 ```
 
+##### Llama-CPP Linux AMD GPU support
+
+Linux GPU support is done through ROCm.
+Some tips:
+* Install ROCm from [quick-start install guide](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html)
+* [Install PyTorch for ROCm](https://rocm.docs.amd.com/projects/radeon/en/latest/docs/install/install-pytorch.html)
+```bash
+wget https://repo.radeon.com/rocm/manylinux/rocm-rel-6.0/torch-2.1.1%2Brocm6.0-cp311-cp311-linux_x86_64.whl
+poetry run pip install --force-reinstall --no-cache-dir torch-2.1.1+rocm6.0-cp311-cp311-linux_x86_64.whl
+```
+* Install bitsandbytes for ROCm
+```bash
+PYTORCH_ROCM_ARCH=gfx900,gfx906,gfx908,gfx90a,gfx1030,gfx1100,gfx1101,gfx940,gfx941,gfx942
+BITSANDBYTES_VERSION=62353b0200b8557026c176e74ac48b84b953a854
+git clone https://github.com/arlo-phoenix/bitsandbytes-rocm-5.6
+cd bitsandbytes-rocm-5.6
+git checkout ${BITSANDBYTES_VERSION}
+make hip ROCM_TARGET=${PYTORCH_ROCM_ARCH} ROCM_HOME=/opt/rocm/
+pip install . --extra-index-url https://download.pytorch.org/whl/nightly
+```
+
+After that running the following command in the repository will install llama.cpp with GPU support:
+```bash
+LLAMA_CPP_PYTHON_VERSION=0.2.56
+DAMDGPU_TARGETS=gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx940;gfx941;gfx942
+CMAKE_ARGS="-DLLAMA_HIPBLAS=ON -DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/clang -DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ -DAMDGPU_TARGETS=${DAMDGPU_TARGETS}" poetry run pip install --force-reinstall --no-cache-dir llama-cpp-python==${LLAMA_CPP_PYTHON_VERSION}
+```
+
+If your installation was correct, you should see a message similar to the following next time you start the server `BLAS = 1`.
+
+```
+AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | MATMUL_INT8 = 0 |
+```
+
 ##### Llama-CPP Known issues and Troubleshooting
 
 Execution of LLMs locally still has a lot of sharp edges, specially when running on non Linux platforms.

From 94ef38cbba8fe5e406eb192efafe774b9aadb564 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 2 Apr 2024 17:45:15 +0200
Subject: [PATCH 08/17] chore(main): release 0.5.0 (#1708)

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 CHANGELOG.md | 35 +++++++++++++++++++++++++++++++++++
 version.txt  |  2 +-
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5728a097..37e5dce3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,40 @@
 # Changelog
 
+## [0.5.0](https://github.com/zylon-ai/private-gpt/compare/v0.4.0...v0.5.0) (2024-04-02)
+
+
+### Features
+
+* **code:** improve concat of strings in ui ([#1785](https://github.com/zylon-ai/private-gpt/issues/1785)) ([bac818a](https://github.com/zylon-ai/private-gpt/commit/bac818add51b104cda925b8f1f7b51448e935ca1))
+* **docker:** set default Docker to use Ollama ([#1812](https://github.com/zylon-ai/private-gpt/issues/1812)) ([f83abff](https://github.com/zylon-ai/private-gpt/commit/f83abff8bc955a6952c92cc7bcb8985fcec93afa))
+* **docs:** Add guide Llama-CPP Linux AMD GPU support ([#1782](https://github.com/zylon-ai/private-gpt/issues/1782)) ([8a836e4](https://github.com/zylon-ai/private-gpt/commit/8a836e4651543f099c59e2bf497ab8c55a7cd2e5))
+* **docs:** Feature/upgrade docs ([#1741](https://github.com/zylon-ai/private-gpt/issues/1741)) ([5725181](https://github.com/zylon-ai/private-gpt/commit/572518143ac46532382db70bed6f73b5082302c1))
+* **docs:** upgrade fern ([#1596](https://github.com/zylon-ai/private-gpt/issues/1596)) ([84ad16a](https://github.com/zylon-ai/private-gpt/commit/84ad16af80191597a953248ce66e963180e8ddec))
+* **ingest:** Created a faster ingestion mode - pipeline ([#1750](https://github.com/zylon-ai/private-gpt/issues/1750)) ([134fc54](https://github.com/zylon-ai/private-gpt/commit/134fc54d7d636be91680dc531f5cbe2c5892ac56))
+* **llm - embed:** Add support for Azure OpenAI ([#1698](https://github.com/zylon-ai/private-gpt/issues/1698)) ([1efac6a](https://github.com/zylon-ai/private-gpt/commit/1efac6a3fe19e4d62325e2c2915cd84ea277f04f))
+* **llm:** adds serveral settings for llamacpp and ollama ([#1703](https://github.com/zylon-ai/private-gpt/issues/1703)) ([02dc83e](https://github.com/zylon-ai/private-gpt/commit/02dc83e8e9f7ada181ff813f25051bbdff7b7c6b))
+* **llm:** Ollama LLM-Embeddings decouple + longer keep_alive settings ([#1800](https://github.com/zylon-ai/private-gpt/issues/1800)) ([b3b0140](https://github.com/zylon-ai/private-gpt/commit/b3b0140e244e7a313bfaf4ef10eb0f7e4192710e))
+* **llm:** Ollama timeout setting ([#1773](https://github.com/zylon-ai/private-gpt/issues/1773)) ([6f6c785](https://github.com/zylon-ai/private-gpt/commit/6f6c785dac2bbad37d0b67fda215784298514d39))
+* **local:** tiktoken cache within repo for offline ([#1467](https://github.com/zylon-ai/private-gpt/issues/1467)) ([821bca3](https://github.com/zylon-ai/private-gpt/commit/821bca32e9ee7c909fd6488445ff6a04463bf91b))
+* **nodestore:** add Postgres for the doc and index store ([#1706](https://github.com/zylon-ai/private-gpt/issues/1706)) ([68b3a34](https://github.com/zylon-ai/private-gpt/commit/68b3a34b032a08ca073a687d2058f926032495b3))
+* **rag:** expose similarity_top_k and similarity_score to settings ([#1771](https://github.com/zylon-ai/private-gpt/issues/1771)) ([087cb0b](https://github.com/zylon-ai/private-gpt/commit/087cb0b7b74c3eb80f4f60b47b3a021c81272ae1))
+* **RAG:** Introduce SentenceTransformer Reranker ([#1810](https://github.com/zylon-ai/private-gpt/issues/1810)) ([83adc12](https://github.com/zylon-ai/private-gpt/commit/83adc12a8ef0fa0c13a0dec084fa596445fc9075))
+* **scripts:** Wipe qdrant and obtain db Stats command ([#1783](https://github.com/zylon-ai/private-gpt/issues/1783)) ([ea153fb](https://github.com/zylon-ai/private-gpt/commit/ea153fb92f1f61f64c0d04fff0048d4d00b6f8d0))
+* **ui:** Add Model Information to ChatInterface label ([f0b174c](https://github.com/zylon-ai/private-gpt/commit/f0b174c097c2d5e52deae8ef88de30a0d9013a38))
+* **ui:** add sources check to not repeat identical sources ([#1705](https://github.com/zylon-ai/private-gpt/issues/1705)) ([290b9fb](https://github.com/zylon-ai/private-gpt/commit/290b9fb084632216300e89bdadbfeb0380724b12))
+* **UI:** Faster startup and document listing ([#1763](https://github.com/zylon-ai/private-gpt/issues/1763)) ([348df78](https://github.com/zylon-ai/private-gpt/commit/348df781b51606b2f9810bcd46f850e54192fd16))
+* **ui:** maintain score order when curating sources ([#1643](https://github.com/zylon-ai/private-gpt/issues/1643)) ([410bf7a](https://github.com/zylon-ai/private-gpt/commit/410bf7a71f17e77c4aec723ab80c233b53765964))
+* unify settings for vector and nodestore connections to PostgreSQL ([#1730](https://github.com/zylon-ai/private-gpt/issues/1730)) ([63de7e4](https://github.com/zylon-ai/private-gpt/commit/63de7e4930ac90dd87620225112a22ffcbbb31ee))
+* wipe per storage type ([#1772](https://github.com/zylon-ai/private-gpt/issues/1772)) ([c2d6948](https://github.com/zylon-ai/private-gpt/commit/c2d694852b4696834962a42fde047b728722ad74))
+
+
+### Bug Fixes
+
+* **docs:** Minor documentation amendment ([#1739](https://github.com/zylon-ai/private-gpt/issues/1739)) ([258d02d](https://github.com/zylon-ai/private-gpt/commit/258d02d87c5cb81d6c3a6f06aa69339b670dffa9))
+* Fixed docker-compose ([#1758](https://github.com/zylon-ai/private-gpt/issues/1758)) ([774e256](https://github.com/zylon-ai/private-gpt/commit/774e2560520dc31146561d09a2eb464c68593871))
+* **ingest:** update script label ([#1770](https://github.com/zylon-ai/private-gpt/issues/1770)) ([7d2de5c](https://github.com/zylon-ai/private-gpt/commit/7d2de5c96fd42e339b26269b3155791311ef1d08))
+* **settings:** set default tokenizer to avoid running make setup fail ([#1709](https://github.com/zylon-ai/private-gpt/issues/1709)) ([d17c34e](https://github.com/zylon-ai/private-gpt/commit/d17c34e81a84518086b93605b15032e2482377f7))
+
 ## [0.4.0](https://github.com/imartinez/privateGPT/compare/v0.3.0...v0.4.0) (2024-03-06)
 
 
diff --git a/version.txt b/version.txt
index 1d0ba9ea..8f0916f7 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-0.4.0
+0.5.0

From f469b4619dde8e435dc71ee61614dc1eec23f842 Mon Sep 17 00:00:00 2001
From: imartinez <ivanmartit@gmail.com>
Date: Tue, 2 Apr 2024 18:27:57 +0200
Subject: [PATCH 09/17] Add required Ollama setting

---
 private_gpt/settings/settings.py | 4 ++--
 settings-ollama.yaml             | 2 +-
 settings.yaml                    | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py
index 7ec84a7b..5df68114 100644
--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@@ -210,8 +210,8 @@ class OllamaSettings(BaseModel):
         description="Base URL of Ollama API. Example: 'https://localhost:11434'.",
     )
     embedding_api_base: str = Field(
-        api_base,  # default is same as api_base, unless specified differently
-        description="Base URL of Ollama embedding API. Defaults to the same value as api_base",
+        "http://localhost:11434",
+        description="Base URL of Ollama embedding API. Example: 'https://localhost:11434'.",
     )
     llm_model: str = Field(
         None,
diff --git a/settings-ollama.yaml b/settings-ollama.yaml
index 4f0be4ff..13663dc7 100644
--- a/settings-ollama.yaml
+++ b/settings-ollama.yaml
@@ -14,8 +14,8 @@ ollama:
   llm_model: mistral
   embedding_model: nomic-embed-text
   api_base: http://localhost:11434
+  embedding_api_base: http://localhost:11434  # change if your embedding model runs on another ollama
   keep_alive: 5m
-  # embedding_api_base: http://ollama_embedding:11434  # uncomment if your embedding model runs on another ollama
   tfs_z: 1.0              # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting.
   top_k: 40               # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
   top_p: 0.9              # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
diff --git a/settings.yaml b/settings.yaml
index 11c3c42f..dfd719bc 100644
--- a/settings.yaml
+++ b/settings.yaml
@@ -99,8 +99,8 @@ ollama:
   llm_model: llama2
   embedding_model: nomic-embed-text
   api_base: http://localhost:11434
+  embedding_api_base: http://localhost:11434  # change if your embedding model runs on another ollama
   keep_alive: 5m
-  # embedding_api_base: http://ollama_embedding:11434  # uncomment if your embedding model runs on another ollama
   request_timeout: 120.0
 
 azopenai:

From 08c4ab175ebe0c8b9ebb79c9b4fca959068458ea Mon Sep 17 00:00:00 2001
From: imartinez <ivanmartit@gmail.com>
Date: Wed, 3 Apr 2024 10:59:35 +0200
Subject: [PATCH 10/17] Fix version in poetry

---
 poetry.lock    | 2 --
 pyproject.toml | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index e7f186dd..279707d8 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -3173,7 +3173,6 @@ optional = true
 python-versions = ">=3"
 files = [
     {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-manylinux1_x86_64.whl", hash = "sha256:64335a8088e2b9d196ae8665430bc6a2b7e6ef2eb877a9c735c804bd4ff6467c"},
-    {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-manylinux2014_aarch64.whl", hash = "sha256:211a63e7b30a9d62f1a853e19928fbb1a750e3f17a13a3d1f98ff0ced19478dd"},
     {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-win_amd64.whl", hash = "sha256:1b2e317e437433753530792f13eece58f0aec21a2b05903be7bffe58a606cbd1"},
 ]
 
@@ -4438,7 +4437,6 @@ files = [
     {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
-    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
     {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
     {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
     {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
diff --git a/pyproject.toml b/pyproject.toml
index 21d68a85..21132b3d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "private-gpt"
-version = "0.4.0"
+version = "0.5.0"
 description = "Private GPT"
 authors = ["Zylon <hi@zylon.ai>"]
 

From 347be643f7929c56382a77c3f45f0867605e0e0a Mon Sep 17 00:00:00 2001
From: Pablo Orgaz <pabloogc@gmail.com>
Date: Thu, 4 Apr 2024 14:37:29 +0200
Subject: [PATCH 11/17] fix(llm): special tokens and leading space (#1831)

---
 private_gpt/components/llm/custom/sagemaker.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/private_gpt/components/llm/custom/sagemaker.py b/private_gpt/components/llm/custom/sagemaker.py
index 7c46111f..e20f5394 100644
--- a/private_gpt/components/llm/custom/sagemaker.py
+++ b/private_gpt/components/llm/custom/sagemaker.py
@@ -243,12 +243,19 @@ class SagemakerLLM(CustomLLM):
             event_stream = resp["Body"]
             start_json = b"{"
             stop_token = "<|endoftext|>"
+            first_token = True
 
             for line in LineIterator(event_stream):
                 if line != b"" and start_json in line:
                     data = json.loads(line[line.find(start_json) :].decode("utf-8"))
-                    if data["token"]["text"] != stop_token:
+                    special = data["token"]["special"]
+                    stop = data["token"]["text"] == stop_token
+                    if not special and not stop:
                         delta = data["token"]["text"]
+                        # trim the leading space for the first token if present
+                        if first_token:
+                            delta = delta.lstrip()
+                            first_token = False
                         text += delta
                         yield CompletionResponse(delta=delta, text=text, raw=data)
 

From 49ef729abc818f983770f0f3d16c18dfa661a5ef Mon Sep 17 00:00:00 2001
From: imartinez <ivanmartit@gmail.com>
Date: Fri, 19 Apr 2024 15:38:25 +0200
Subject: [PATCH 12/17] Allow passing HF access token to download tokenizer.
 Fallback to default tokenizer.

---
 private_gpt/components/llm/llm_component.py | 23 +++++++++++++++------
 private_gpt/settings/settings.py            |  4 ++++
 settings.yaml                               |  1 +
 3 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py
index dae997cc..baffa4e4 100644
--- a/private_gpt/components/llm/llm_component.py
+++ b/private_gpt/components/llm/llm_component.py
@@ -22,13 +22,24 @@ class LLMComponent:
     @inject
     def __init__(self, settings: Settings) -> None:
         llm_mode = settings.llm.mode
-        if settings.llm.tokenizer:
-            set_global_tokenizer(
-                AutoTokenizer.from_pretrained(
-                    pretrained_model_name_or_path=settings.llm.tokenizer,
-                    cache_dir=str(models_cache_path),
+        if settings.llm.tokenizer and settings.llm.mode != "mock":
+            # Try to download the tokenizer. If it fails, the LLM will still work
+            # using the default one, which is less accurate.
+            try:
+                set_global_tokenizer(
+                    AutoTokenizer.from_pretrained(
+                        pretrained_model_name_or_path=settings.llm.tokenizer,
+                        cache_dir=str(models_cache_path),
+                        token=settings.huggingface.access_token,
+                    )
+                )
+            except Exception as e:
+                logger.warning(
+                    "Failed to download tokenizer %s. Falling back to "
+                    "default tokenizer.",
+                    settings.llm.tokenizer,
+                    e,
                 )
-            )
 
         logger.info("Initializing the LLM in mode=%s", llm_mode)
         match settings.llm.mode:
diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py
index 5df68114..051cfcab 100644
--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@@ -151,6 +151,10 @@ class HuggingFaceSettings(BaseModel):
     embedding_hf_model_name: str = Field(
         description="Name of the HuggingFace model to use for embeddings"
     )
+    access_token: str = Field(
+        None,
+        description="Huggingface access token, required to download some models",
+    )
 
 
 class EmbeddingSettings(BaseModel):
diff --git a/settings.yaml b/settings.yaml
index dfd719bc..e881a555 100644
--- a/settings.yaml
+++ b/settings.yaml
@@ -69,6 +69,7 @@ embedding:
 
 huggingface:
   embedding_hf_model_name: BAAI/bge-small-en-v1.5
+  access_token: ${HUGGINGFACE_TOKEN:}
 
 vectorstore:
   database: qdrant

From 947e737f300adf621d2261d527192f36f3387f8e Mon Sep 17 00:00:00 2001
From: dividebysandwich <josef.jahn@gmail.com>
Date: Fri, 19 Apr 2024 15:40:00 +0200
Subject: [PATCH 13/17] fix: "no such group" error in Dockerfile, added
 docx2txt and cryptography deps (#1841)

* Fixed "no such group" error in Dockerfile, added docx2txt to poetry so docx parsing works out of the box for docker containers

* added cryptography dependency for pdf parsing
---
 Dockerfile.local |  3 +-
 poetry.lock      | 91 +++++++++++++++++++++++-------------------------
 pyproject.toml   |  2 ++
 3 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/Dockerfile.local b/Dockerfile.local
index a5222116..980a9b08 100644
--- a/Dockerfile.local
+++ b/Dockerfile.local
@@ -33,7 +33,8 @@ ENV PORT=8080
 EXPOSE 8080
 
 # Prepare a non-root user
-RUN adduser --system worker
+RUN adduser --group worker
+RUN adduser --system --ingroup worker worker
 WORKDIR /home/worker/app
 
 RUN mkdir local_data; chown worker local_data
diff --git a/poetry.lock b/poetry.lock
index 279707d8..961a1299 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
 
 [[package]]
 name = "aiofiles"
@@ -515,7 +515,7 @@ files = [
 name = "cffi"
 version = "1.16.0"
 description = "Foreign Function Interface for Python calling C code."
-optional = true
+optional = false
 python-versions = ">=3.8"
 files = [
     {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"},
@@ -934,57 +934,42 @@ toml = ["tomli"]
 
 [[package]]
 name = "cryptography"
-version = "42.0.5"
+version = "3.4.8"
 description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
-optional = true
-python-versions = ">=3.7"
+optional = false
+python-versions = ">=3.6"
 files = [
-    {file = "cryptography-42.0.5-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:a30596bae9403a342c978fb47d9b0ee277699fa53bbafad14706af51fe543d16"},
-    {file = "cryptography-42.0.5-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:b7ffe927ee6531c78f81aa17e684e2ff617daeba7f189f911065b2ea2d526dec"},
-    {file = "cryptography-42.0.5-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2424ff4c4ac7f6b8177b53c17ed5d8fa74ae5955656867f5a8affaca36a27abb"},
-    {file = "cryptography-42.0.5-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:329906dcc7b20ff3cad13c069a78124ed8247adcac44b10bea1130e36caae0b4"},
-    {file = "cryptography-42.0.5-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:b03c2ae5d2f0fc05f9a2c0c997e1bc18c8229f392234e8a0194f202169ccd278"},
-    {file = "cryptography-42.0.5-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f8837fe1d6ac4a8052a9a8ddab256bc006242696f03368a4009be7ee3075cdb7"},
-    {file = "cryptography-42.0.5-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:0270572b8bd2c833c3981724b8ee9747b3ec96f699a9665470018594301439ee"},
-    {file = "cryptography-42.0.5-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:b8cac287fafc4ad485b8a9b67d0ee80c66bf3574f655d3b97ef2e1082360faf1"},
-    {file = "cryptography-42.0.5-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:16a48c23a62a2f4a285699dba2e4ff2d1cff3115b9df052cdd976a18856d8e3d"},
-    {file = "cryptography-42.0.5-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:2bce03af1ce5a5567ab89bd90d11e7bbdff56b8af3acbbec1faded8f44cb06da"},
-    {file = "cryptography-42.0.5-cp37-abi3-win32.whl", hash = "sha256:b6cd2203306b63e41acdf39aa93b86fb566049aeb6dc489b70e34bcd07adca74"},
-    {file = "cryptography-42.0.5-cp37-abi3-win_amd64.whl", hash = "sha256:98d8dc6d012b82287f2c3d26ce1d2dd130ec200c8679b6213b3c73c08b2b7940"},
-    {file = "cryptography-42.0.5-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:5e6275c09d2badf57aea3afa80d975444f4be8d3bc58f7f80d2a484c6f9485c8"},
-    {file = "cryptography-42.0.5-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4985a790f921508f36f81831817cbc03b102d643b5fcb81cd33df3fa291a1a1"},
-    {file = "cryptography-42.0.5-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7cde5f38e614f55e28d831754e8a3bacf9ace5d1566235e39d91b35502d6936e"},
-    {file = "cryptography-42.0.5-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:7367d7b2eca6513681127ebad53b2582911d1736dc2ffc19f2c3ae49997496bc"},
-    {file = "cryptography-42.0.5-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:cd2030f6650c089aeb304cf093f3244d34745ce0cfcc39f20c6fbfe030102e2a"},
-    {file = "cryptography-42.0.5-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a2913c5375154b6ef2e91c10b5720ea6e21007412f6437504ffea2109b5a33d7"},
-    {file = "cryptography-42.0.5-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:c41fb5e6a5fe9ebcd58ca3abfeb51dffb5d83d6775405305bfa8715b76521922"},
-    {file = "cryptography-42.0.5-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3eaafe47ec0d0ffcc9349e1708be2aaea4c6dd4978d76bf6eb0cb2c13636c6fc"},
-    {file = "cryptography-42.0.5-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:1b95b98b0d2af784078fa69f637135e3c317091b615cd0905f8b8a087e86fa30"},
-    {file = "cryptography-42.0.5-cp39-abi3-win32.whl", hash = "sha256:1f71c10d1e88467126f0efd484bd44bca5e14c664ec2ede64c32f20875c0d413"},
-    {file = "cryptography-42.0.5-cp39-abi3-win_amd64.whl", hash = "sha256:a011a644f6d7d03736214d38832e030d8268bcff4a41f728e6030325fea3e400"},
-    {file = "cryptography-42.0.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9481ffe3cf013b71b2428b905c4f7a9a4f76ec03065b05ff499bb5682a8d9ad8"},
-    {file = "cryptography-42.0.5-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:ba334e6e4b1d92442b75ddacc615c5476d4ad55cc29b15d590cc6b86efa487e2"},
-    {file = "cryptography-42.0.5-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:ba3e4a42397c25b7ff88cdec6e2a16c2be18720f317506ee25210f6d31925f9c"},
-    {file = "cryptography-42.0.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:111a0d8553afcf8eb02a4fea6ca4f59d48ddb34497aa8706a6cf536f1a5ec576"},
-    {file = "cryptography-42.0.5-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:cd65d75953847815962c84a4654a84850b2bb4aed3f26fadcc1c13892e1e29f6"},
-    {file = "cryptography-42.0.5-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:e807b3188f9eb0eaa7bbb579b462c5ace579f1cedb28107ce8b48a9f7ad3679e"},
-    {file = "cryptography-42.0.5-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f12764b8fffc7a123f641d7d049d382b73f96a34117e0b637b80643169cec8ac"},
-    {file = "cryptography-42.0.5-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:37dd623507659e08be98eec89323469e8c7b4c1407c85112634ae3dbdb926fdd"},
-    {file = "cryptography-42.0.5.tar.gz", hash = "sha256:6fe07eec95dfd477eb9530aef5bead34fec819b3aaf6c5bd6d20565da607bfe1"},
+    {file = "cryptography-3.4.8-cp36-abi3-macosx_10_10_x86_64.whl", hash = "sha256:a00cf305f07b26c351d8d4e1af84ad7501eca8a342dedf24a7acb0e7b7406e14"},
+    {file = "cryptography-3.4.8-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:f44d141b8c4ea5eb4dbc9b3ad992d45580c1d22bf5e24363f2fbf50c2d7ae8a7"},
+    {file = "cryptography-3.4.8-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:0a7dcbcd3f1913f664aca35d47c1331fce738d44ec34b7be8b9d332151b0b01e"},
+    {file = "cryptography-3.4.8-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:34dae04a0dce5730d8eb7894eab617d8a70d0c97da76b905de9efb7128ad7085"},
+    {file = "cryptography-3.4.8-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1eb7bb0df6f6f583dd8e054689def236255161ebbcf62b226454ab9ec663746b"},
+    {file = "cryptography-3.4.8-cp36-abi3-manylinux_2_24_x86_64.whl", hash = "sha256:9965c46c674ba8cc572bc09a03f4c649292ee73e1b683adb1ce81e82e9a6a0fb"},
+    {file = "cryptography-3.4.8-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:3c4129fc3fdc0fa8e40861b5ac0c673315b3c902bbdc05fc176764815b43dd1d"},
+    {file = "cryptography-3.4.8-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:695104a9223a7239d155d7627ad912953b540929ef97ae0c34c7b8bf30857e89"},
+    {file = "cryptography-3.4.8-cp36-abi3-win32.whl", hash = "sha256:21ca464b3a4b8d8e86ba0ee5045e103a1fcfac3b39319727bc0fc58c09c6aff7"},
+    {file = "cryptography-3.4.8-cp36-abi3-win_amd64.whl", hash = "sha256:3520667fda779eb788ea00080124875be18f2d8f0848ec00733c0ec3bb8219fc"},
+    {file = "cryptography-3.4.8-pp36-pypy36_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:d2a6e5ef66503da51d2110edf6c403dc6b494cc0082f85db12f54e9c5d4c3ec5"},
+    {file = "cryptography-3.4.8-pp36-pypy36_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a305600e7a6b7b855cd798e00278161b681ad6e9b7eca94c721d5f588ab212af"},
+    {file = "cryptography-3.4.8-pp36-pypy36_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:3fa3a7ccf96e826affdf1a0a9432be74dc73423125c8f96a909e3835a5ef194a"},
+    {file = "cryptography-3.4.8-pp37-pypy37_pp73-macosx_10_10_x86_64.whl", hash = "sha256:d9ec0e67a14f9d1d48dd87a2531009a9b251c02ea42851c060b25c782516ff06"},
+    {file = "cryptography-3.4.8-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5b0fbfae7ff7febdb74b574055c7466da334a5371f253732d7e2e7525d570498"},
+    {file = "cryptography-3.4.8-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94fff993ee9bc1b2440d3b7243d488c6a3d9724cc2b09cdb297f6a886d040ef7"},
+    {file = "cryptography-3.4.8-pp37-pypy37_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:8695456444f277af73a4877db9fc979849cd3ee74c198d04fc0776ebc3db52b9"},
+    {file = "cryptography-3.4.8-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:cd65b60cfe004790c795cc35f272e41a3df4631e2fb6b35aa7ac6ef2859d554e"},
+    {file = "cryptography-3.4.8.tar.gz", hash = "sha256:94cc5ed4ceaefcbe5bf38c8fba6a21fc1d365bb8fb826ea1688e3370b2e24a1c"},
 ]
 
 [package.dependencies]
-cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""}
+cffi = ">=1.12"
 
 [package.extras]
-docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"]
-docstest = ["pyenchant (>=1.6.11)", "readme-renderer", "sphinxcontrib-spelling (>=4.0.1)"]
-nox = ["nox"]
-pep8test = ["check-sdist", "click", "mypy", "ruff"]
-sdist = ["build"]
+docs = ["sphinx (>=1.6.5,!=1.8.0,!=3.1.0,!=3.1.1)", "sphinx-rtd-theme"]
+docstest = ["doc8", "pyenchant (>=1.6.11)", "sphinxcontrib-spelling (>=4.0.1)", "twine (>=1.12.0)"]
+pep8test = ["black", "flake8", "flake8-import-order", "pep8-naming"]
+sdist = ["setuptools-rust (>=0.11.4)"]
 ssh = ["bcrypt (>=3.1.5)"]
-test = ["certifi", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"]
-test-randomorder = ["pytest-randomly"]
+test = ["hypothesis (>=1.11.4,!=3.79.2)", "iso8601", "pretend", "pytest (>=6.0)", "pytest-cov", "pytest-subtests", "pytest-xdist", "pytz"]
 
 [[package]]
 name = "cycler"
@@ -1096,6 +1081,16 @@ idna = ["idna (>=2.1,<4.0)"]
 trio = ["trio (>=0.14,<0.23)"]
 wmi = ["wmi (>=1.5.1,<2.0.0)"]
 
+[[package]]
+name = "docx2txt"
+version = "0.8"
+description = "A pure python-based utility to extract text and images from docx files."
+optional = false
+python-versions = "*"
+files = [
+    {file = "docx2txt-0.8.tar.gz", hash = "sha256:2c06d98d7cfe2d3947e5760a57d924e3ff07745b379c8737723922e7009236e5"},
+]
+
 [[package]]
 name = "email-validator"
 version = "2.1.0.post1"
@@ -3173,6 +3168,7 @@ optional = true
 python-versions = ">=3"
 files = [
     {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-manylinux1_x86_64.whl", hash = "sha256:64335a8088e2b9d196ae8665430bc6a2b7e6ef2eb877a9c735c804bd4ff6467c"},
+    {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-manylinux2014_aarch64.whl", hash = "sha256:211a63e7b30a9d62f1a853e19928fbb1a750e3f17a13a3d1f98ff0ced19478dd"},
     {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-win_amd64.whl", hash = "sha256:1b2e317e437433753530792f13eece58f0aec21a2b05903be7bffe58a606cbd1"},
 ]
 
@@ -3937,7 +3933,7 @@ pyasn1 = ">=0.4.6,<0.6.0"
 name = "pycparser"
 version = "2.21"
 description = "C parser in Python"
-optional = true
+optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
     {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"},
@@ -4437,6 +4433,7 @@ files = [
     {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
+    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
     {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
     {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
     {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
@@ -6330,4 +6327,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11,<3.12"
-content-hash = "0b3665bd11a604609249ff0267e4e5cf009881d16a84f9774fc54d45a1373e09"
+content-hash = "992c2486ee05e66eab29026e4275dd5509074b38a31ead9db2271e6f94f6da08"
diff --git a/pyproject.toml b/pyproject.toml
index 21132b3d..3d6d1dde 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,6 +13,8 @@ injector = "^0.21.0"
 pyyaml = "^6.0.1"
 watchdog = "^4.0.0"
 transformers = "^4.38.2"
+docx2txt = "^0.8"
+cryptography = "^3.1"
 # LlamaIndex core libs
 llama-index-core = "^0.10.14"
 llama-index-readers-file = "^0.1.6"

From 2a432bf9c5582a94eb4052b1e80cabdb118d298e Mon Sep 17 00:00:00 2001
From: Marco Repetto <119503933+mrepetto-certx@users.noreply.github.com>
Date: Fri, 19 Apr 2024 06:42:19 -0700
Subject: [PATCH 14/17] fix: make embedding_api_base match api_base when on
 docker (#1859)

---
 settings-docker.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/settings-docker.yaml b/settings-docker.yaml
index cb85afdf..d8037fa6 100644
--- a/settings-docker.yaml
+++ b/settings-docker.yaml
@@ -23,6 +23,7 @@ ollama:
   llm_model: ${PGPT_OLLAMA_LLM_MODEL:mistral}
   embedding_model: ${PGPT_OLLAMA_EMBEDDING_MODEL:nomic-embed-text}
   api_base: ${PGPT_OLLAMA_API_BASE:http://ollama:11434}
+  embedding_api_base: ${PGPT_OLLAMA_EMBEDDING_API_BASE:http://ollama:11434}
   tfs_z: ${PGPT_OLLAMA_TFS_Z:1.0}
   top_k: ${PGPT_OLLAMA_TOP_K:40}
   top_p: ${PGPT_OLLAMA_TOP_P:0.9}

From c1802e7cf0e56a2603213ec3b6a4af8fadb8a17a Mon Sep 17 00:00:00 2001
From: Daniel Gallego Vico <danielgallegovico@gmail.com>
Date: Fri, 19 Apr 2024 17:10:58 +0200
Subject: [PATCH 15/17] fix(docs): Update installation.mdx (#1866)

Update repo url
---
 fern/docs/pages/installation/installation.mdx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fern/docs/pages/installation/installation.mdx b/fern/docs/pages/installation/installation.mdx
index 31a7953c..d1d18634 100644
--- a/fern/docs/pages/installation/installation.mdx
+++ b/fern/docs/pages/installation/installation.mdx
@@ -5,8 +5,8 @@ It is important that you review the Main Concepts before you start the installat
 * Clone PrivateGPT repository, and navigate to it:
 
 ```bash
-  git clone https://github.com/imartinez/privateGPT
-  cd privateGPT
+  git clone https://github.com/zylon-ai/private-gpt
+  cd private-gpt
 ```
 
 * Install Python `3.11` (*if you do not have it already*). Ideally through a python version manager like `pyenv`.

From e21bf20c10938b24711d9f2c765997f44d7e02a9 Mon Sep 17 00:00:00 2001
From: icsy7867 <wstewart3@gmail.com>
Date: Tue, 30 Apr 2024 03:53:10 -0400
Subject: [PATCH 16/17] feat: prompt_style applied to all LLMs + extra LLM
 params. (#1835)

* Updated prompt_style to be moved to the main LLM setting since all LLMs from llama_index can utilize this.  I also included temperature, context window size, max_tokens, max_new_tokens into the openailike to help ensure the settings are consistent from the other implementations.

* Removed prompt_style from llamacpp entirely

* Fixed settings-local.yaml to include prompt_style in the LLM settings instead of llamacpp.
---
 private_gpt/components/llm/llm_component.py | 11 +++++++---
 private_gpt/settings/settings.py            | 23 ++++++++++-----------
 settings-local.yaml                         |  4 ++--
 settings.yaml                               |  2 +-
 4 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py
index baffa4e4..51d71a3e 100644
--- a/private_gpt/components/llm/llm_component.py
+++ b/private_gpt/components/llm/llm_component.py
@@ -51,7 +51,7 @@ class LLMComponent:
                         "Local dependencies not found, install with `poetry install --extras llms-llama-cpp`"
                     ) from e
 
-                prompt_style = get_prompt_style(settings.llamacpp.prompt_style)
+                prompt_style = get_prompt_style(settings.llm.prompt_style)
                 settings_kwargs = {
                     "tfs_z": settings.llamacpp.tfs_z,  # ollama and llama-cpp
                     "top_k": settings.llamacpp.top_k,  # ollama and llama-cpp
@@ -109,15 +109,20 @@ class LLMComponent:
                     raise ImportError(
                         "OpenAILike dependencies not found, install with `poetry install --extras llms-openai-like`"
                     ) from e
-
+                prompt_style = get_prompt_style(settings.llm.prompt_style)
                 openai_settings = settings.openai
                 self.llm = OpenAILike(
                     api_base=openai_settings.api_base,
                     api_key=openai_settings.api_key,
                     model=openai_settings.model,
                     is_chat_model=True,
-                    max_tokens=None,
+                    max_tokens=settings.llm.max_new_tokens,
                     api_version="",
+                    temperature=settings.llm.temperature,
+                    context_window=settings.llm.context_window,
+                    max_new_tokens=settings.llm.max_new_tokens,
+                    messages_to_prompt=prompt_style.messages_to_prompt,
+                    completion_to_prompt=prompt_style.completion_to_prompt,
                 )
             case "ollama":
                 try:
diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py
index 051cfcab..c4c5e20d 100644
--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@@ -104,6 +104,17 @@ class LLMSettings(BaseModel):
         0.1,
         description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.",
     )
+    prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field(
+        "llama2",
+        description=(
+            "The prompt style to use for the chat engine. "
+            "If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
+            "If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
+            "If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
+            "If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
+            "`llama2` is the historic behaviour. `default` might work better with your custom models."
+        ),
+    )
 
 
 class VectorstoreSettings(BaseModel):
@@ -117,18 +128,6 @@ class NodeStoreSettings(BaseModel):
 class LlamaCPPSettings(BaseModel):
     llm_hf_repo_id: str
     llm_hf_model_file: str
-    prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field(
-        "llama2",
-        description=(
-            "The prompt style to use for the chat engine. "
-            "If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
-            "If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
-            "If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
-            "If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
-            "`llama2` is the historic behaviour. `default` might work better with your custom models."
-        ),
-    )
-
     tfs_z: float = Field(
         1.0,
         description="Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting.",
diff --git a/settings-local.yaml b/settings-local.yaml
index c9d02742..48eeb0ea 100644
--- a/settings-local.yaml
+++ b/settings-local.yaml
@@ -8,9 +8,9 @@ llm:
   max_new_tokens: 512
   context_window: 3900
   tokenizer: mistralai/Mistral-7B-Instruct-v0.2
+  prompt_style: "mistral"
 
 llamacpp:
-  prompt_style: "mistral"
   llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
   llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf
 
@@ -24,4 +24,4 @@ vectorstore:
   database: qdrant
 
 qdrant:
-  path: local_data/private_gpt/qdrant
\ No newline at end of file
+  path: local_data/private_gpt/qdrant
diff --git a/settings.yaml b/settings.yaml
index e881a555..d8d2500c 100644
--- a/settings.yaml
+++ b/settings.yaml
@@ -36,6 +36,7 @@ ui:
 
 llm:
   mode: llamacpp
+  prompt_style: "mistral"
   # Should be matching the selected model
   max_new_tokens: 512
   context_window: 3900
@@ -53,7 +54,6 @@ rag:
     top_n: 1
 
 llamacpp:
-  prompt_style: "mistral"
   llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
   llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf
   tfs_z: 1.0            # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting

From 9d0d614706581a8bfa57db45f62f84ab23d26f15 Mon Sep 17 00:00:00 2001
From: Patrick Peng <retr0@retr0.blog>
Date: Tue, 30 Apr 2024 15:58:19 +0800
Subject: [PATCH 17/17] fix: Replacing unsafe `eval()` with `json.loads()`
 (#1890)

---
 private_gpt/components/llm/custom/sagemaker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/private_gpt/components/llm/custom/sagemaker.py b/private_gpt/components/llm/custom/sagemaker.py
index e20f5394..bd2aec18 100644
--- a/private_gpt/components/llm/custom/sagemaker.py
+++ b/private_gpt/components/llm/custom/sagemaker.py
@@ -218,7 +218,7 @@ class SagemakerLLM(CustomLLM):
 
         response_body = resp["Body"]
         response_str = response_body.read().decode("utf-8")
-        response_dict = eval(response_str)
+        response_dict = json.loads(response_str)
 
         return CompletionResponse(
             text=response_dict[0]["generated_text"][len(prompt) :], raw=resp