mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-08-24 01:59:11 +00:00
Added RAG settings to settings.py, vector_store and chat_service to add similarity_top_k and similarity_score
This commit is contained in:
parent
572518143a
commit
5fcca04256
@ -135,6 +135,7 @@ class VectorStoreComponent:
|
||||
similarity_top_k: int = 2,
|
||||
) -> VectorIndexRetriever:
|
||||
# This way we support qdrant (using doc_ids) and the rest (using filters)
|
||||
similarity_top_k = self.settings.rag.similarity_top_k
|
||||
return VectorIndexRetriever(
|
||||
index=index,
|
||||
similarity_top_k=similarity_top_k,
|
||||
|
@ -8,6 +8,9 @@ from llama_index.core.chat_engine.types import (
|
||||
from llama_index.core.indices import VectorStoreIndex
|
||||
from llama_index.core.indices.postprocessor import MetadataReplacementPostProcessor
|
||||
from llama_index.core.llms import ChatMessage, MessageRole
|
||||
from llama_index.core.postprocessor import (
|
||||
SimilarityPostprocessor,
|
||||
)
|
||||
from llama_index.core.storage import StorageContext
|
||||
from llama_index.core.types import TokenGen
|
||||
from pydantic import BaseModel
|
||||
@ -20,6 +23,7 @@ from private_gpt.components.vector_store.vector_store_component import (
|
||||
)
|
||||
from private_gpt.open_ai.extensions.context_filter import ContextFilter
|
||||
from private_gpt.server.chunks.chunks_service import Chunk
|
||||
from private_gpt.settings.settings import Settings
|
||||
|
||||
|
||||
class Completion(BaseModel):
|
||||
@ -68,14 +72,18 @@ class ChatEngineInput:
|
||||
|
||||
@singleton
|
||||
class ChatService:
|
||||
settings: Settings
|
||||
|
||||
@inject
|
||||
def __init__(
|
||||
self,
|
||||
settings: Settings,
|
||||
llm_component: LLMComponent,
|
||||
vector_store_component: VectorStoreComponent,
|
||||
embedding_component: EmbeddingComponent,
|
||||
node_store_component: NodeStoreComponent,
|
||||
) -> None:
|
||||
self.settings = settings
|
||||
self.llm_component = llm_component
|
||||
self.embedding_component = embedding_component
|
||||
self.vector_store_component = vector_store_component
|
||||
@ -98,6 +106,7 @@ class ChatService:
|
||||
use_context: bool = False,
|
||||
context_filter: ContextFilter | None = None,
|
||||
) -> BaseChatEngine:
|
||||
settings = self.settings
|
||||
if use_context:
|
||||
vector_index_retriever = self.vector_store_component.get_retriever(
|
||||
index=self.index, context_filter=context_filter
|
||||
@ -108,6 +117,9 @@ class ChatService:
|
||||
llm=self.llm_component.llm, # Takes no effect at the moment
|
||||
node_postprocessors=[
|
||||
MetadataReplacementPostProcessor(target_metadata_key="window"),
|
||||
SimilarityPostprocessor(
|
||||
similarity_cutoff=settings.rag.similarity_value
|
||||
),
|
||||
],
|
||||
)
|
||||
else:
|
||||
|
@ -280,6 +280,17 @@ class UISettings(BaseModel):
|
||||
)
|
||||
|
||||
|
||||
class RagSettings(BaseModel):
|
||||
similarity_top_k: int = Field(
|
||||
2,
|
||||
description="This value controls the number of documents returned by the RAG pipeline",
|
||||
)
|
||||
similarity_value: float = Field(
|
||||
None,
|
||||
description="If set, any documents retrieved from the RAG must meet a certain match score. Acceptable values are between 0 and 1.",
|
||||
)
|
||||
|
||||
|
||||
class PostgresSettings(BaseModel):
|
||||
host: str = Field(
|
||||
"localhost",
|
||||
@ -375,6 +386,7 @@ class Settings(BaseModel):
|
||||
azopenai: AzureOpenAISettings
|
||||
vectorstore: VectorstoreSettings
|
||||
nodestore: NodeStoreSettings
|
||||
rag: RagSettings
|
||||
qdrant: QdrantSettings | None = None
|
||||
postgres: PostgresSettings | None = None
|
||||
|
||||
|
@ -42,6 +42,12 @@ llm:
|
||||
tokenizer: mistralai/Mistral-7B-Instruct-v0.2
|
||||
temperature: 0.1 # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)
|
||||
|
||||
rag:
|
||||
similarity_top_k: 2
|
||||
#This value controls how many "top" documents the RAG returns to use in the context.
|
||||
#similarity_value: 0.45
|
||||
#This value is disabled by default. If you enable this settings, the RAG will only use articles that meet a certain percentage score.
|
||||
|
||||
llamacpp:
|
||||
prompt_style: "mistral"
|
||||
llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
|
||||
|
Loading…
Reference in New Issue
Block a user