feat(RAG): Introduce SentenceTransformer Reranker (#1810)

This commit is contained in:
machatschek 2024-04-02 10:29:51 +02:00 committed by GitHub
parent f83abff8bc
commit 83adc12a8e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 198 additions and 8 deletions

View File

@ -64,6 +64,8 @@ navigation:
contents:
- page: LLM Backends
path: ./docs/pages/manual/llms.mdx
- page: Reranking
path: ./docs/pages/manual/reranker.mdx
- section: User Interface
contents:
- page: User interface (Gradio) Manual

View File

@ -0,0 +1,36 @@
## Enhancing Response Quality with Reranking
PrivateGPT offers a reranking feature aimed at optimizing response generation by filtering out irrelevant documents, potentially leading to faster response times and enhanced relevance of answers generated by the LLM.
### Enabling Reranking
Document reranking can significantly improve the efficiency and quality of the responses by pre-selecting the most relevant documents before generating an answer. To leverage this feature, ensure that it is enabled in the RAG settings and consider adjusting the parameters to best fit your use case.
#### Additional Requirements
Before enabling reranking, you must install additional dependencies:
```bash
poetry install --extras rerank-sentence-transformers
```
This command installs dependencies for the cross-encoder reranker from sentence-transformers, which is currently the only supported method by PrivateGPT for document reranking.
#### Configuration
To enable and configure reranking, adjust the `rag` section within the `settings.yaml` file. Here are the key settings to consider:
- `similarity_top_k`: Determines the number of documents to initially retrieve and consider for reranking. This value should be larger than `top_n`.
- `rerank`:
- `enabled`: Set to `true` to activate the reranking feature.
- `top_n`: Specifies the number of documents to use in the final answer generation process, chosen from the top-ranked documents provided by `similarity_top_k`.
Example configuration snippet:
```yaml
rag:
similarity_top_k: 10 # Number of documents to retrieve and consider for reranking
rerank:
enabled: true
top_n: 3 # Number of top-ranked documents to use for generating the answer
```

119
poetry.lock generated
View File

@ -4949,6 +4949,90 @@ tensorflow = ["safetensors[numpy]", "tensorflow (>=2.11.0)"]
testing = ["h5py (>=3.7.0)", "huggingface_hub (>=0.12.1)", "hypothesis (>=6.70.2)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "safetensors[numpy]", "setuptools_rust (>=1.5.2)"]
torch = ["safetensors[numpy]", "torch (>=1.10)"]
[[package]]
name = "scikit-learn"
version = "1.4.1.post1"
description = "A set of python modules for machine learning and data mining"
optional = true
python-versions = ">=3.9"
files = [
{file = "scikit-learn-1.4.1.post1.tar.gz", hash = "sha256:93d3d496ff1965470f9977d05e5ec3376fb1e63b10e4fda5e39d23c2d8969a30"},
{file = "scikit_learn-1.4.1.post1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c540aaf44729ab5cd4bd5e394f2b375e65ceaea9cdd8c195788e70433d91bbc5"},
{file = "scikit_learn-1.4.1.post1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4310bff71aa98b45b46cd26fa641309deb73a5d1c0461d181587ad4f30ea3c36"},
{file = "scikit_learn-1.4.1.post1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f43dd527dabff5521af2786a2f8de5ba381e182ec7292663508901cf6ceaf6e"},
{file = "scikit_learn-1.4.1.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c02e27d65b0c7dc32f2c5eb601aaf5530b7a02bfbe92438188624524878336f2"},
{file = "scikit_learn-1.4.1.post1-cp310-cp310-win_amd64.whl", hash = "sha256:629e09f772ad42f657ca60a1a52342eef786218dd20cf1369a3b8d085e55ef8f"},
{file = "scikit_learn-1.4.1.post1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6145dfd9605b0b50ae72cdf72b61a2acd87501369a763b0d73d004710ebb76b5"},
{file = "scikit_learn-1.4.1.post1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:1afed6951bc9d2053c6ee9a518a466cbc9b07c6a3f9d43bfe734192b6125d508"},
{file = "scikit_learn-1.4.1.post1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce03506ccf5f96b7e9030fea7eb148999b254c44c10182ac55857bc9b5d4815f"},
{file = "scikit_learn-1.4.1.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ba516fcdc73d60e7f48cbb0bccb9acbdb21807de3651531208aac73c758e3ab"},
{file = "scikit_learn-1.4.1.post1-cp311-cp311-win_amd64.whl", hash = "sha256:78cd27b4669513b50db4f683ef41ea35b5dddc797bd2bbd990d49897fd1c8a46"},
{file = "scikit_learn-1.4.1.post1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a1e289f33f613cefe6707dead50db31930530dc386b6ccff176c786335a7b01c"},
{file = "scikit_learn-1.4.1.post1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:0df87de9ce1c0140f2818beef310fb2e2afdc1e66fc9ad587965577f17733649"},
{file = "scikit_learn-1.4.1.post1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:712c1c69c45b58ef21635360b3d0a680ff7d83ac95b6f9b82cf9294070cda710"},
{file = "scikit_learn-1.4.1.post1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1754b0c2409d6ed5a3380512d0adcf182a01363c669033a2b55cca429ed86a81"},
{file = "scikit_learn-1.4.1.post1-cp312-cp312-win_amd64.whl", hash = "sha256:1d491ef66e37f4e812db7e6c8286520c2c3fc61b34bf5e59b67b4ce528de93af"},
{file = "scikit_learn-1.4.1.post1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:aa0029b78ef59af22cfbd833e8ace8526e4df90212db7ceccbea582ebb5d6794"},
{file = "scikit_learn-1.4.1.post1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:14e4c88436ac96bf69eb6d746ac76a574c314a23c6961b7d344b38877f20fee1"},
{file = "scikit_learn-1.4.1.post1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7cd3a77c32879311f2aa93466d3c288c955ef71d191503cf0677c3340ae8ae0"},
{file = "scikit_learn-1.4.1.post1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2a3ee19211ded1a52ee37b0a7b373a8bfc66f95353af058a210b692bd4cda0dd"},
{file = "scikit_learn-1.4.1.post1-cp39-cp39-win_amd64.whl", hash = "sha256:234b6bda70fdcae9e4abbbe028582ce99c280458665a155eed0b820599377d25"},
]
[package.dependencies]
joblib = ">=1.2.0"
numpy = ">=1.19.5,<2.0"
scipy = ">=1.6.0"
threadpoolctl = ">=2.0.0"
[package.extras]
benchmark = ["matplotlib (>=3.3.4)", "memory-profiler (>=0.57.0)", "pandas (>=1.1.5)"]
docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory-profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)", "sphinx (>=6.0.0)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.15.0)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"]
examples = ["matplotlib (>=3.3.4)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)"]
tests = ["black (>=23.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.3)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.19.12)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.0.272)", "scikit-image (>=0.17.2)"]
[[package]]
name = "scipy"
version = "1.12.0"
description = "Fundamental algorithms for scientific computing in Python"
optional = true
python-versions = ">=3.9"
files = [
{file = "scipy-1.12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:78e4402e140879387187f7f25d91cc592b3501a2e51dfb320f48dfb73565f10b"},
{file = "scipy-1.12.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:f5f00ebaf8de24d14b8449981a2842d404152774c1a1d880c901bf454cb8e2a1"},
{file = "scipy-1.12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e53958531a7c695ff66c2e7bb7b79560ffdc562e2051644c5576c39ff8efb563"},
{file = "scipy-1.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e32847e08da8d895ce09d108a494d9eb78974cf6de23063f93306a3e419960c"},
{file = "scipy-1.12.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4c1020cad92772bf44b8e4cdabc1df5d87376cb219742549ef69fc9fd86282dd"},
{file = "scipy-1.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:75ea2a144096b5e39402e2ff53a36fecfd3b960d786b7efd3c180e29c39e53f2"},
{file = "scipy-1.12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:408c68423f9de16cb9e602528be4ce0d6312b05001f3de61fe9ec8b1263cad08"},
{file = "scipy-1.12.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5adfad5dbf0163397beb4aca679187d24aec085343755fcdbdeb32b3679f254c"},
{file = "scipy-1.12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3003652496f6e7c387b1cf63f4bb720951cfa18907e998ea551e6de51a04467"},
{file = "scipy-1.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b8066bce124ee5531d12a74b617d9ac0ea59245246410e19bca549656d9a40a"},
{file = "scipy-1.12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8bee4993817e204d761dba10dbab0774ba5a8612e57e81319ea04d84945375ba"},
{file = "scipy-1.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:a24024d45ce9a675c1fb8494e8e5244efea1c7a09c60beb1eeb80373d0fecc70"},
{file = "scipy-1.12.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e7e76cc48638228212c747ada851ef355c2bb5e7f939e10952bc504c11f4e372"},
{file = "scipy-1.12.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:f7ce148dffcd64ade37b2df9315541f9adad6efcaa86866ee7dd5db0c8f041c3"},
{file = "scipy-1.12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c39f92041f490422924dfdb782527a4abddf4707616e07b021de33467f917bc"},
{file = "scipy-1.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7ebda398f86e56178c2fa94cad15bf457a218a54a35c2a7b4490b9f9cb2676c"},
{file = "scipy-1.12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:95e5c750d55cf518c398a8240571b0e0782c2d5a703250872f36eaf737751338"},
{file = "scipy-1.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:e646d8571804a304e1da01040d21577685ce8e2db08ac58e543eaca063453e1c"},
{file = "scipy-1.12.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:913d6e7956c3a671de3b05ccb66b11bc293f56bfdef040583a7221d9e22a2e35"},
{file = "scipy-1.12.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:bba1b0c7256ad75401c73e4b3cf09d1f176e9bd4248f0d3112170fb2ec4db067"},
{file = "scipy-1.12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:730badef9b827b368f351eacae2e82da414e13cf8bd5051b4bdfd720271a5371"},
{file = "scipy-1.12.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6546dc2c11a9df6926afcbdd8a3edec28566e4e785b915e849348c6dd9f3f490"},
{file = "scipy-1.12.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:196ebad3a4882081f62a5bf4aeb7326aa34b110e533aab23e4374fcccb0890dc"},
{file = "scipy-1.12.0-cp39-cp39-win_amd64.whl", hash = "sha256:b360f1b6b2f742781299514e99ff560d1fe9bd1bff2712894b52abe528d1fd1e"},
{file = "scipy-1.12.0.tar.gz", hash = "sha256:4bf5abab8a36d20193c698b0f1fc282c1d083c94723902c447e5d2f1780936a3"},
]
[package.dependencies]
numpy = ">=1.22.4,<1.29.0"
[package.extras]
dev = ["click", "cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy", "pycodestyle", "pydevtool", "rich-click", "ruff", "types-psutil", "typing_extensions"]
doc = ["jupytext", "matplotlib (>2)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-design (>=0.2.0)"]
test = ["asv", "gmpy2", "hypothesis", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"]
[[package]]
name = "semantic-version"
version = "2.10.0"
@ -4964,6 +5048,27 @@ files = [
dev = ["Django (>=1.11)", "check-manifest", "colorama (<=0.4.1)", "coverage", "flake8", "nose2", "readme-renderer (<25.0)", "tox", "wheel", "zest.releaser[recommended]"]
doc = ["Sphinx", "sphinx-rtd-theme"]
[[package]]
name = "sentence-transformers"
version = "2.6.1"
description = "Multilingual text embeddings"
optional = true
python-versions = ">=3.8.0"
files = [
{file = "sentence-transformers-2.6.1.tar.gz", hash = "sha256:633ad6b70e390ea335de8689652a5d6c21a323b79ed19519c2f392451088487f"},
{file = "sentence_transformers-2.6.1-py3-none-any.whl", hash = "sha256:a887e17696b513f99a709ce1f37fd547f53857aebe863785ede546c303b09ea0"},
]
[package.dependencies]
huggingface-hub = ">=0.15.1"
numpy = "*"
Pillow = "*"
scikit-learn = "*"
scipy = "*"
torch = ">=1.11.0"
tqdm = "*"
transformers = ">=4.32.0,<5.0.0"
[[package]]
name = "setuptools"
version = "69.0.2"
@ -5156,6 +5261,17 @@ files = [
[package.extras]
doc = ["reno", "sphinx", "tornado (>=4.5)"]
[[package]]
name = "threadpoolctl"
version = "3.4.0"
description = "threadpoolctl"
optional = true
python-versions = ">=3.8"
files = [
{file = "threadpoolctl-3.4.0-py3-none-any.whl", hash = "sha256:8f4c689a65b23e5ed825c8436a92b818aac005e0f3715f6a1664d7c7ee29d262"},
{file = "threadpoolctl-3.4.0.tar.gz", hash = "sha256:f11b491a03661d6dd7ef692dd422ab34185d982466c49c8f98c8f716b5c93196"},
]
[[package]]
name = "tiktoken"
version = "0.5.2"
@ -6206,6 +6322,7 @@ llms-ollama = ["llama-index-llms-ollama"]
llms-openai = ["llama-index-llms-openai"]
llms-openai-like = ["llama-index-llms-openai-like"]
llms-sagemaker = ["boto3"]
rerank-sentence-transformers = ["sentence-transformers", "torch"]
storage-nodestore-postgres = ["asyncpg", "llama-index-storage-docstore-postgres", "llama-index-storage-index-store-postgres", "psycopg2-binary"]
ui = ["gradio"]
vector-stores-chroma = ["llama-index-vector-stores-chroma"]
@ -6215,4 +6332,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.11,<3.12"
content-hash = "3d5f21e5e41ea66d655891a6d9b01bcdd8348b275e27a54e90b65ac9d5719981"
content-hash = "0b3665bd11a604609249ff0267e4e5cf009881d16a84f9774fc54d45a1373e09"

View File

@ -9,6 +9,7 @@ from llama_index.core.indices import VectorStoreIndex
from llama_index.core.indices.postprocessor import MetadataReplacementPostProcessor
from llama_index.core.llms import ChatMessage, MessageRole
from llama_index.core.postprocessor import (
SentenceTransformerRerank,
SimilarityPostprocessor,
)
from llama_index.core.storage import StorageContext
@ -113,16 +114,24 @@ class ChatService:
context_filter=context_filter,
similarity_top_k=self.settings.rag.similarity_top_k,
)
node_postprocessors = [
MetadataReplacementPostProcessor(target_metadata_key="window"),
SimilarityPostprocessor(
similarity_cutoff=settings.rag.similarity_value
),
]
if settings.rag.rerank.enabled:
rerank_postprocessor = SentenceTransformerRerank(
model=settings.rag.rerank.model, top_n=settings.rag.rerank.top_n
)
node_postprocessors.append(rerank_postprocessor)
return ContextChatEngine.from_defaults(
system_prompt=system_prompt,
retriever=vector_index_retriever,
llm=self.llm_component.llm, # Takes no effect at the moment
node_postprocessors=[
MetadataReplacementPostProcessor(target_metadata_key="window"),
SimilarityPostprocessor(
similarity_cutoff=settings.rag.similarity_value
),
],
node_postprocessors=node_postprocessors,
)
else:
return SimpleChatEngine.from_defaults(

View File

@ -284,15 +284,31 @@ class UISettings(BaseModel):
)
class RerankSettings(BaseModel):
enabled: bool = Field(
False,
description="This value controls whether a reranker should be included in the RAG pipeline.",
)
model: str = Field(
"cross-encoder/ms-marco-MiniLM-L-2-v2",
description="Rerank model to use. Limited to SentenceTransformer cross-encoder models.",
)
top_n: int = Field(
2,
description="This value controls the number of documents returned by the RAG pipeline.",
)
class RagSettings(BaseModel):
similarity_top_k: int = Field(
2,
description="This value controls the number of documents returned by the RAG pipeline",
description="This value controls the number of documents returned by the RAG pipeline or considered for reranking if enabled.",
)
similarity_value: float = Field(
None,
description="If set, any documents retrieved from the RAG must meet a certain match score. Acceptable values are between 0 and 1.",
)
rerank: RerankSettings
class PostgresSettings(BaseModel):

View File

@ -37,6 +37,11 @@ asyncpg = {version="^0.29.0", optional = true}
# Optional Sagemaker dependency
boto3 = {version ="^1.34.51", optional = true}
# Optional Reranker dependencies
torch = {version ="^2.1.2", optional = true}
sentence-transformers = {version ="^2.6.1", optional = true}
# Optional UI
gradio = {version ="^4.19.2", optional = true}
@ -57,6 +62,7 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
vector-stores-chroma = ["llama-index-vector-stores-chroma"]
vector-stores-postgres = ["llama-index-vector-stores-postgres"]
storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-index-storage-index-store-postgres","psycopg2-binary","asyncpg"]
rerank-sentence-transformers = ["torch", "sentence-transformers"]
[tool.poetry.group.dev.dependencies]
black = "^22"

View File

@ -47,6 +47,10 @@ rag:
#This value controls how many "top" documents the RAG returns to use in the context.
#similarity_value: 0.45
#This value is disabled by default. If you enable this settings, the RAG will only use articles that meet a certain percentage score.
rerank:
enabled: false
model: cross-encoder/ms-marco-MiniLM-L-2-v2
top_n: 1
llamacpp:
prompt_style: "mistral"