Added build history

2025-07-10 05:44:29 +00:00 · 2024-04-10 15:13:06 +05:45 · 2024-04-10 15:13:06 +05:45 · bf135b1692
commit bf135b1692
parent 44d94e145e
3 changed files with 42 additions and 11 deletions
--- a/.env
+++ b/.env
@ -4,7 +4,7 @@ ENVIRONMENT=dev
 DB_HOST=localhost
 DB_USER=postgres
 DB_PORT=5432
-DB_PASSWORD=quick
+DB_PASSWORD=admin
 DB_NAME=QuickGpt

 SUPER_ADMIN_EMAIL=superadmin@email.com
--- a/private_gpt/server/completions/completions_router.py
+++ b/private_gpt/server/completions/completions_router.py
@ -4,7 +4,7 @@ from llama_index.core.llms import ChatMessage, ChatResponse, MessageRole
 from fastapi import APIRouter, Depends, Request, Security, HTTPException, status
 from private_gpt.server.ingest.ingest_service import IngestService
 from pydantic import BaseModel
-from typing import List, Dict, Any
+from typing import List, Dict, Any, Optional
 from sqlalchemy.orm import Session
 import traceback
 import logging
@ -28,6 +28,7 @@ completions_router = APIRouter(prefix="/v1", dependencies=[Depends(authenticated

 class CompletionsBody(BaseModel):
    conversation_id: uuid.UUID
+    history: Optional[list[OpenAIMessage]]
    prompt: str
    system_prompt: str | None = None
    use_context: bool = False
@ -39,7 +40,17 @@ class CompletionsBody(BaseModel):
        "json_schema_extra": {
            "examples": [
                {
-                    "conversation_id": 123,
+                    "conversation_id": "3fa85f64-5717-4562-b3fc-2c963f66afa6",
+                    "history": [
+                        {
+                            "role": "user",
+                            "content": "Hello!"
+                        },
+                        {
+                            "role": "assistant",
+                            "content": "Hello, how can I help you?"
+                        }
+                    ],
                    "prompt": "How do you fry an egg?",
                    "system_prompt": "You are a rapper. Always answer with a rap.",
                    "stream": False,
@ -50,7 +61,6 @@ class CompletionsBody(BaseModel):
        }
    }

-
 class ChatContentCreate(BaseModel):
    content: Dict[str, Any]

@ -159,11 +169,31 @@ async def prompt_completion(
        )
        if (chat_history is None) and (chat_history.user_id != current_user.id):
            raise HTTPException(
-                status_code=404, detail="Chat history not found")
+                status_code=404, detail="Chat not found")
        
-        user_message = OpenAIMessage(content=body.prompt, role="user")
-        user_message = user_message.model_dump(mode="json")
+        _history = body.history if body.history else []

+        def build_history() -> list[OpenAIMessage]:
+            history_messages: list[OpenAIMessage] = []
+            for interaction in _history:
+                role = interaction.role
+                if role == 'user':
+                    history_messages.append(
+                        OpenAIMessage(
+                            content=interaction.content,
+                            role="user"
+                        )
+                    )
+                else:
+                    history_messages.append(
+                        OpenAIMessage(
+                            content=interaction.content,
+                            role="assistant"
+                        )
+                    )
+            return history_messages
+        
+        user_message = OpenAIMessage(content=body.prompt, role="user")        
        user_message_json = {
            'text': body.prompt,
        }
@ -174,9 +204,10 @@ async def prompt_completion(
        if body.system_prompt:
            messages.insert(0, OpenAIMessage(
                content=body.system_prompt, role="system"))
-
+            
+        all_messages = [*build_history(), user_message]
        chat_body = ChatBody(
-            messages=messages,
+            messages=all_messages,
            use_context=body.use_context,
            stream=body.stream,
            include_sources=body.include_sources,
--- a/settings.yaml
+++ b/settings.yaml
@ -45,8 +45,8 @@ llm:
  
 llamacpp:
  prompt_style: "chatml"
-  llm_hf_repo_id: NousResearch/Hermes-2-Pro-Mistral-7B
-  llm_hf_model_file: NousResearch/Hermes-2-Pro-Mistral-7B-GGUF
+  llm_hf_repo_id: TheBloke/OpenHermes-2.5-Mistral-7B-GGUF
+  llm_hf_model_file: openhermes-2.5-mistral-7b.Q5_K_M.gguf
  tfs_z: 1.0            # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
  top_k: 40             # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
  top_p: 0.9            # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)