Added new system prompt

2025-09-17 15:51:55 +00:00 · 2024-05-29 10:55:42 +05:45
parent 474698834f
commit 759767dc1b
4 changed files with 31 additions and 18 deletions
--- a/private_gpt/server/chat/chat_service.py
+++ b/private_gpt/server/chat/chat_service.py
@@ -70,7 +70,6 @@ class ChatEngineInput:
            chat_history=chat_history,
        )
@singleton
 class ChatService:
    settings: Settings
@@ -126,7 +125,7 @@ class ChatService:
                    model=settings.rag.rerank.model, top_n=settings.rag.rerank.top_n
                )
                node_postprocessors.append(rerank_postprocessor)
-
+            
            return ContextChatEngine.from_defaults(
                system_prompt=system_prompt,
                retriever=vector_index_retriever,
@@ -189,17 +188,19 @@ class ChatService:
        )
        system_prompt = (
            """
-            You are a helpful, respectful and honest question-answering assistant. 
+            You are QuickGPT, a helpful assistant by Quickfox Consulting.
-            Your role is to provide accurate and informative responses based solely 
+
-            on the context provided for each query. If the answer cannot be found in 
+            Responses should be based on the context documents provided 
-            the given context, you must state that the answer is not present rather 
+            and should be relevant, informative, and easy to understand. 
-            than speculating or making up information. Always follow the user's 
+            You should aim to deliver high-quality responses that are 
-            instructions carefully and answer as helpfully as possible while strictly 
+            respectful and helpful, using clear and concise language. 
-            adhering to the context boundaries. Do not reference the instructions or 
+            Avoid providing information outside of the context documents unless 
-            context you were given when generating responses.
+            it is necessary for clarity or completeness. Focus on providing 
            accurate and reliable answers based on the given context.
            If answer is not in the context documents, just say I don't have answer 
            in respectful way.
            """
        )
        chat_history = (
            chat_engine_input.chat_history if chat_engine_input.chat_history else None
        )
--- a/private_gpt/server/completions/completions_router.py
+++ b/private_gpt/server/completions/completions_router.py
@@ -195,7 +195,9 @@ async def prompt_completion(
                        )
                    )
            return history_messages
-        user_message = OpenAIMessage(content=body.prompt, role="user")        
+        message = body.prompt 
        # message = body.prompt + 'Only answer if there is answer in the provided documents'
        user_message = OpenAIMessage(content=message, role="user")        
        user_message_json = {
            'text': body.prompt,
        }
--- a/private_gpt/users/crud/document_crud.py
+++ b/private_gpt/users/crud/document_crud.py
@@ -28,11 +28,21 @@ class CRUDDocuments(CRUDBase[Document, DocumentCreate, DocumentUpdate]):
    def get_documents_by_departments(
            self, db: Session, *, department_id: int
        ) -> List[Document]:
            all_department_id = 1 # department ID for "ALL" is 1
            return (
                db.query(self.model)
                .join(document_department_association)
                .join(Department)
-                .filter(document_department_association.c.department_id == department_id)
+                .filter(
                     or_(
                        and_(
                            document_department_association.c.department_id == department_id,
                        ),
                        and_(
                            document_department_association.c.department_id == all_department_id,
                        ),
                    )
                )
                .order_by(desc(getattr(Document, 'uploaded_at')))
                .all()
            )
--- a/settings.yaml
+++ b/settings.yaml
@@ -51,14 +51,14 @@ rag:
  #This value is disabled by default.  If you enable this settings, the RAG will only use articles that meet a certain percentage score.
  rerank:
    enabled: true
-    model: cross-encoder/ms-marco-MiniLM-L-2-v2
+    model: mixedbread-ai/mxbai-embed-large-v1
    top_n: 2
 llamacpp:
  # llm_hf_repo_id: bartowski/Meta-Llama-3-8B-Instruct-GGUF
  # llm_hf_model_file: Meta-Llama-3-8B-Instruct-Q6_K.gguf
-  llm_hf_repo_id: qwp4w3hyb/Hermes-2-Pro-Llama-3-8B-iMat-GGUF
+  llm_hf_repo_id: NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF
-  llm_hf_model_file: hermes-2-pro-llama-3-8b-imat-Q6_K.gguf
+  llm_hf_model_file: Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q6_K.gguf
  tfs_z: 1.0            # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
  top_k: 40             # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
  top_p: 0.9            # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
@@ -68,11 +68,11 @@ llamacpp:
 embedding:
  # Should be matching the value above in most cases
  mode: huggingface
-  ingest_mode: simple
+  ingest_mode: parallel
  embed_dim: 384 # 384 is for BAAI/bge-small-en-v1.5
 huggingface:
-  embedding_hf_model_name: Snowflake/snowflake-arctic-embed-l
+  embedding_hf_model_name: mixedbread-ai/mxbai-embed-large-v1
  access_token: ${HUGGINGFACE_TOKEN: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}
 vectorstore: