From bf135b1692f160131123e09cd91efcffa0346dab Mon Sep 17 00:00:00 2001 From: Saurab-Shrestha9639*969**9858//852 Date: Wed, 10 Apr 2024 15:13:06 +0545 Subject: [PATCH] Added build history --- .env | 2 +- .../server/completions/completions_router.py | 47 +++++++++++++++---- settings.yaml | 4 +- 3 files changed, 42 insertions(+), 11 deletions(-) diff --git a/.env b/.env index fdfe9c80..f218b889 100644 --- a/.env +++ b/.env @@ -4,7 +4,7 @@ ENVIRONMENT=dev DB_HOST=localhost DB_USER=postgres DB_PORT=5432 -DB_PASSWORD=quick +DB_PASSWORD=admin DB_NAME=QuickGpt SUPER_ADMIN_EMAIL=superadmin@email.com diff --git a/private_gpt/server/completions/completions_router.py b/private_gpt/server/completions/completions_router.py index 448d5116..734f1799 100644 --- a/private_gpt/server/completions/completions_router.py +++ b/private_gpt/server/completions/completions_router.py @@ -4,7 +4,7 @@ from llama_index.core.llms import ChatMessage, ChatResponse, MessageRole from fastapi import APIRouter, Depends, Request, Security, HTTPException, status from private_gpt.server.ingest.ingest_service import IngestService from pydantic import BaseModel -from typing import List, Dict, Any +from typing import List, Dict, Any, Optional from sqlalchemy.orm import Session import traceback import logging @@ -28,6 +28,7 @@ completions_router = APIRouter(prefix="/v1", dependencies=[Depends(authenticated class CompletionsBody(BaseModel): conversation_id: uuid.UUID + history: Optional[list[OpenAIMessage]] prompt: str system_prompt: str | None = None use_context: bool = False @@ -39,7 +40,17 @@ class CompletionsBody(BaseModel): "json_schema_extra": { "examples": [ { - "conversation_id": 123, + "conversation_id": "3fa85f64-5717-4562-b3fc-2c963f66afa6", + "history": [ + { + "role": "user", + "content": "Hello!" + }, + { + "role": "assistant", + "content": "Hello, how can I help you?" + } + ], "prompt": "How do you fry an egg?", "system_prompt": "You are a rapper. Always answer with a rap.", "stream": False, @@ -50,7 +61,6 @@ class CompletionsBody(BaseModel): } } - class ChatContentCreate(BaseModel): content: Dict[str, Any] @@ -159,11 +169,31 @@ async def prompt_completion( ) if (chat_history is None) and (chat_history.user_id != current_user.id): raise HTTPException( - status_code=404, detail="Chat history not found") + status_code=404, detail="Chat not found") - user_message = OpenAIMessage(content=body.prompt, role="user") - user_message = user_message.model_dump(mode="json") + _history = body.history if body.history else [] + def build_history() -> list[OpenAIMessage]: + history_messages: list[OpenAIMessage] = [] + for interaction in _history: + role = interaction.role + if role == 'user': + history_messages.append( + OpenAIMessage( + content=interaction.content, + role="user" + ) + ) + else: + history_messages.append( + OpenAIMessage( + content=interaction.content, + role="assistant" + ) + ) + return history_messages + + user_message = OpenAIMessage(content=body.prompt, role="user") user_message_json = { 'text': body.prompt, } @@ -174,9 +204,10 @@ async def prompt_completion( if body.system_prompt: messages.insert(0, OpenAIMessage( content=body.system_prompt, role="system")) - + + all_messages = [*build_history(), user_message] chat_body = ChatBody( - messages=messages, + messages=all_messages, use_context=body.use_context, stream=body.stream, include_sources=body.include_sources, diff --git a/settings.yaml b/settings.yaml index 449e017c..8910af65 100644 --- a/settings.yaml +++ b/settings.yaml @@ -45,8 +45,8 @@ llm: llamacpp: prompt_style: "chatml" - llm_hf_repo_id: NousResearch/Hermes-2-Pro-Mistral-7B - llm_hf_model_file: NousResearch/Hermes-2-Pro-Mistral-7B-GGUF + llm_hf_repo_id: TheBloke/OpenHermes-2.5-Mistral-7B-GGUF + llm_hf_model_file: openhermes-2.5-mistral-7b.Q5_K_M.gguf tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) top_p: 0.9 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)