Added RAG settings to settings.py, vector_store and chat_service to add similarity_top_k and similarity_score

2025-08-24 10:09:24 +00:00 · 2024-03-20 16:42:18 +00:00 · 2024-03-20 16:42:18 +00:00 · 5fcca04256
commit 5fcca04256
parent 572518143a
4 changed files with 31 additions and 0 deletions
--- a/private_gpt/components/vector_store/vector_store_component.py
+++ b/private_gpt/components/vector_store/vector_store_component.py
@ -135,6 +135,7 @@ class VectorStoreComponent:
        similarity_top_k: int = 2,
    ) -> VectorIndexRetriever:
        # This way we support qdrant (using doc_ids) and the rest (using filters)
        similarity_top_k = self.settings.rag.similarity_top_k
        return VectorIndexRetriever(
            index=index,
            similarity_top_k=similarity_top_k,
--- a/private_gpt/server/chat/chat_service.py
+++ b/private_gpt/server/chat/chat_service.py
@ -8,6 +8,9 @@ from llama_index.core.chat_engine.types import (
 from llama_index.core.indices import VectorStoreIndex
 from llama_index.core.indices.postprocessor import MetadataReplacementPostProcessor
 from llama_index.core.llms import ChatMessage, MessageRole
 from llama_index.core.postprocessor import (
    SimilarityPostprocessor,
 )
 from llama_index.core.storage import StorageContext
 from llama_index.core.types import TokenGen
 from pydantic import BaseModel
@ -20,6 +23,7 @@ from private_gpt.components.vector_store.vector_store_component import (
 )
 from private_gpt.open_ai.extensions.context_filter import ContextFilter
 from private_gpt.server.chunks.chunks_service import Chunk
 from private_gpt.settings.settings import Settings
 class Completion(BaseModel):
@ -68,14 +72,18 @@ class ChatEngineInput:
@singleton
 class ChatService:
    settings: Settings
    @inject
    def __init__(
        self,
        settings: Settings,
        llm_component: LLMComponent,
        vector_store_component: VectorStoreComponent,
        embedding_component: EmbeddingComponent,
        node_store_component: NodeStoreComponent,
    ) -> None:
        self.settings = settings
        self.llm_component = llm_component
        self.embedding_component = embedding_component
        self.vector_store_component = vector_store_component
@ -98,6 +106,7 @@ class ChatService:
        use_context: bool = False,
        context_filter: ContextFilter | None = None,
    ) -> BaseChatEngine:
        settings = self.settings
        if use_context:
            vector_index_retriever = self.vector_store_component.get_retriever(
                index=self.index, context_filter=context_filter
@ -108,6 +117,9 @@ class ChatService:
                llm=self.llm_component.llm,  # Takes no effect at the moment
                node_postprocessors=[
                    MetadataReplacementPostProcessor(target_metadata_key="window"),
                    SimilarityPostprocessor(
                        similarity_cutoff=settings.rag.similarity_value
                    ),
                ],
            )
        else:
--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@ -280,6 +280,17 @@ class UISettings(BaseModel):
    )
 class RagSettings(BaseModel):
    similarity_top_k: int = Field(
        2,
        description="This value controls the number of documents returned by the RAG pipeline",
    )
    similarity_value: float = Field(
        None,
        description="If set, any documents retrieved from the RAG must meet a certain match score. Acceptable values are between 0 and 1.",
    )
 class PostgresSettings(BaseModel):
    host: str = Field(
        "localhost",
@ -375,6 +386,7 @@ class Settings(BaseModel):
    azopenai: AzureOpenAISettings
    vectorstore: VectorstoreSettings
    nodestore: NodeStoreSettings
    rag: RagSettings
    qdrant: QdrantSettings | None = None
    postgres: PostgresSettings | None = None
--- a/settings.yaml
+++ b/settings.yaml
@ -42,6 +42,12 @@ llm:
  tokenizer: mistralai/Mistral-7B-Instruct-v0.2
  temperature: 0.1      # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)
 rag:
  similarity_top_k: 2
  #This value controls how many "top" documents the RAG returns to use in the context.
  #similarity_value: 0.45
  #This value is disabled by default.  If you enable this settings, the RAG will only use articles that meet a certain percentage score.
 llamacpp:
  prompt_style: "mistral"
  llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF