diff --git a/private_gpt/components/vector_store/vector_store_component.py b/private_gpt/components/vector_store/vector_store_component.py index f9932b57..37bef89a 100644 --- a/private_gpt/components/vector_store/vector_store_component.py +++ b/private_gpt/components/vector_store/vector_store_component.py @@ -135,6 +135,7 @@ class VectorStoreComponent: similarity_top_k: int = 2, ) -> VectorIndexRetriever: # This way we support qdrant (using doc_ids) and the rest (using filters) + similarity_top_k = self.settings.rag.similarity_top_k return VectorIndexRetriever( index=index, similarity_top_k=similarity_top_k, diff --git a/private_gpt/server/chat/chat_service.py b/private_gpt/server/chat/chat_service.py index 5369200b..e4e52ead 100644 --- a/private_gpt/server/chat/chat_service.py +++ b/private_gpt/server/chat/chat_service.py @@ -8,6 +8,9 @@ from llama_index.core.chat_engine.types import ( from llama_index.core.indices import VectorStoreIndex from llama_index.core.indices.postprocessor import MetadataReplacementPostProcessor from llama_index.core.llms import ChatMessage, MessageRole +from llama_index.core.postprocessor import ( + SimilarityPostprocessor, +) from llama_index.core.storage import StorageContext from llama_index.core.types import TokenGen from pydantic import BaseModel @@ -20,6 +23,7 @@ from private_gpt.components.vector_store.vector_store_component import ( ) from private_gpt.open_ai.extensions.context_filter import ContextFilter from private_gpt.server.chunks.chunks_service import Chunk +from private_gpt.settings.settings import Settings class Completion(BaseModel): @@ -68,14 +72,18 @@ class ChatEngineInput: @singleton class ChatService: + settings: Settings + @inject def __init__( self, + settings: Settings, llm_component: LLMComponent, vector_store_component: VectorStoreComponent, embedding_component: EmbeddingComponent, node_store_component: NodeStoreComponent, ) -> None: + self.settings = settings self.llm_component = llm_component self.embedding_component = embedding_component self.vector_store_component = vector_store_component @@ -98,6 +106,7 @@ class ChatService: use_context: bool = False, context_filter: ContextFilter | None = None, ) -> BaseChatEngine: + settings = self.settings if use_context: vector_index_retriever = self.vector_store_component.get_retriever( index=self.index, context_filter=context_filter @@ -108,6 +117,9 @@ class ChatService: llm=self.llm_component.llm, # Takes no effect at the moment node_postprocessors=[ MetadataReplacementPostProcessor(target_metadata_key="window"), + SimilarityPostprocessor( + similarity_cutoff=settings.rag.similarity_value + ), ], ) else: diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index 41771055..f1910a4b 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -280,6 +280,17 @@ class UISettings(BaseModel): ) +class RagSettings(BaseModel): + similarity_top_k: int = Field( + 2, + description="This value controls the number of documents returned by the RAG pipeline", + ) + similarity_value: float = Field( + None, + description="If set, any documents retrieved from the RAG must meet a certain match score. Acceptable values are between 0 and 1.", + ) + + class PostgresSettings(BaseModel): host: str = Field( "localhost", @@ -375,6 +386,7 @@ class Settings(BaseModel): azopenai: AzureOpenAISettings vectorstore: VectorstoreSettings nodestore: NodeStoreSettings + rag: RagSettings qdrant: QdrantSettings | None = None postgres: PostgresSettings | None = None diff --git a/settings.yaml b/settings.yaml index 0b4cb341..1d423177 100644 --- a/settings.yaml +++ b/settings.yaml @@ -42,6 +42,12 @@ llm: tokenizer: mistralai/Mistral-7B-Instruct-v0.2 temperature: 0.1 # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1) +rag: + similarity_top_k: 2 + #This value controls how many "top" documents the RAG returns to use in the context. + #similarity_value: 0.45 + #This value is disabled by default. If you enable this settings, the RAG will only use articles that meet a certain percentage score. + llamacpp: prompt_style: "mistral" llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF