diff --git a/private_gpt/server/chat/chat_service.py b/private_gpt/server/chat/chat_service.py index efa4a194..3277af0f 100644 --- a/private_gpt/server/chat/chat_service.py +++ b/private_gpt/server/chat/chat_service.py @@ -104,6 +104,7 @@ class ChatService: embed_model=embedding_component.embedding_model, show_progress=True, ) + self.default_context_template = settings.rag.default_context_template def _chat_engine( self, @@ -113,6 +114,10 @@ class ChatService: ) -> BaseChatEngine: settings = self.settings if use_context: + if self.default_context_template is not None: + context_template = self.default_context_template + else: + context_template = None vector_index_retriever = self.vector_store_component.get_retriever( index=self.index, context_filter=context_filter, @@ -139,6 +144,7 @@ class ChatService: retriever=vector_index_retriever, llm=self.llm_component.llm, # Takes no effect at the moment node_postprocessors=node_postprocessors, + context_template=context_template, ) else: return SimpleChatEngine.from_defaults( diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index 0589f13a..52bc8edb 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -396,6 +396,13 @@ class RerankSettings(BaseModel): class RagSettings(BaseModel): + default_context_template: str | None = Field( + None, + description=( + "The default context template to use for the chat engine when using RAG. " + "If none is given - use the default system prompt (from the llama_index). " + ), + ) similarity_top_k: int = Field( 2, description="This value controls the number of documents returned by the RAG pipeline or considered for reranking if enabled.", diff --git a/settings.yaml b/settings.yaml index eda1af86..cb23b14e 100644 --- a/settings.yaml +++ b/settings.yaml @@ -57,6 +57,11 @@ llm: temperature: 0.1 # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1) rag: + default_context_template: | + Context information is below. + -------------------- + {context_str} + -------------------- similarity_top_k: 2 #This value controls how many "top" documents the RAG returns to use in the context. #similarity_value: 0.45 @@ -70,11 +75,11 @@ summarize: use_async: true clickhouse: - host: localhost - port: 8443 - username: admin - password: clickhouse - database: embeddings + host: localhost + port: 8443 + username: admin + password: clickhouse + database: embeddings llamacpp: llm_hf_repo_id: lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF @@ -150,3 +155,4 @@ gemini: api_key: ${GOOGLE_API_KEY:} model: models/gemini-pro embedding_model: models/embedding-001 +