diff --git a/private_gpt/server/chat/chat_service.py b/private_gpt/server/chat/chat_service.py
index efa4a194..3277af0f 100644
--- a/private_gpt/server/chat/chat_service.py
+++ b/private_gpt/server/chat/chat_service.py
@@ -104,6 +104,7 @@ class ChatService:
             embed_model=embedding_component.embedding_model,
             show_progress=True,
         )
+        self.default_context_template = settings.rag.default_context_template
 
     def _chat_engine(
         self,
@@ -113,6 +114,10 @@ class ChatService:
     ) -> BaseChatEngine:
         settings = self.settings
         if use_context:
+            if self.default_context_template is not None:
+                context_template = self.default_context_template
+            else:
+                context_template = None
             vector_index_retriever = self.vector_store_component.get_retriever(
                 index=self.index,
                 context_filter=context_filter,
@@ -139,6 +144,7 @@ class ChatService:
                 retriever=vector_index_retriever,
                 llm=self.llm_component.llm,  # Takes no effect at the moment
                 node_postprocessors=node_postprocessors,
+                context_template=context_template,
             )
         else:
             return SimpleChatEngine.from_defaults(
diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py
index 0589f13a..52bc8edb 100644
--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@@ -396,6 +396,13 @@ class RerankSettings(BaseModel):
 
 
 class RagSettings(BaseModel):
+    default_context_template: str | None = Field(
+        None,
+        description=(
+            "The default context template to use for the chat engine when using RAG. "
+            "If none is given - use the default system prompt (from the llama_index). "
+        ),
+    )
     similarity_top_k: int = Field(
         2,
         description="This value controls the number of documents returned by the RAG pipeline or considered for reranking if enabled.",
diff --git a/settings.yaml b/settings.yaml
index eda1af86..cb23b14e 100644
--- a/settings.yaml
+++ b/settings.yaml
@@ -57,6 +57,11 @@ llm:
   temperature: 0.1      # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)
 
 rag:
+  default_context_template: |
+    Context information is below.
+    --------------------
+    {context_str}
+    --------------------
   similarity_top_k: 2
   #This value controls how many "top" documents the RAG returns to use in the context.
   #similarity_value: 0.45
@@ -70,11 +75,11 @@ summarize:
   use_async: true
 
 clickhouse:
-    host: localhost
-    port: 8443
-    username: admin
-    password: clickhouse
-    database: embeddings
+  host: localhost
+  port: 8443
+  username: admin
+  password: clickhouse
+  database: embeddings
 
 llamacpp:
   llm_hf_repo_id: lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF
@@ -150,3 +155,4 @@ gemini:
   api_key: ${GOOGLE_API_KEY:}
   model: models/gemini-pro
   embedding_model: models/embedding-001
+