Added build history

This commit is contained in:
Saurab-Shrestha9639*969**9858//852 2024-04-10 15:13:06 +05:45
parent 44d94e145e
commit bf135b1692
3 changed files with 42 additions and 11 deletions

2
.env
View File

@ -4,7 +4,7 @@ ENVIRONMENT=dev
DB_HOST=localhost
DB_USER=postgres
DB_PORT=5432
DB_PASSWORD=quick
DB_PASSWORD=admin
DB_NAME=QuickGpt
SUPER_ADMIN_EMAIL=superadmin@email.com

View File

@ -4,7 +4,7 @@ from llama_index.core.llms import ChatMessage, ChatResponse, MessageRole
from fastapi import APIRouter, Depends, Request, Security, HTTPException, status
from private_gpt.server.ingest.ingest_service import IngestService
from pydantic import BaseModel
from typing import List, Dict, Any
from typing import List, Dict, Any, Optional
from sqlalchemy.orm import Session
import traceback
import logging
@ -28,6 +28,7 @@ completions_router = APIRouter(prefix="/v1", dependencies=[Depends(authenticated
class CompletionsBody(BaseModel):
conversation_id: uuid.UUID
history: Optional[list[OpenAIMessage]]
prompt: str
system_prompt: str | None = None
use_context: bool = False
@ -39,7 +40,17 @@ class CompletionsBody(BaseModel):
"json_schema_extra": {
"examples": [
{
"conversation_id": 123,
"conversation_id": "3fa85f64-5717-4562-b3fc-2c963f66afa6",
"history": [
{
"role": "user",
"content": "Hello!"
},
{
"role": "assistant",
"content": "Hello, how can I help you?"
}
],
"prompt": "How do you fry an egg?",
"system_prompt": "You are a rapper. Always answer with a rap.",
"stream": False,
@ -50,7 +61,6 @@ class CompletionsBody(BaseModel):
}
}
class ChatContentCreate(BaseModel):
content: Dict[str, Any]
@ -159,11 +169,31 @@ async def prompt_completion(
)
if (chat_history is None) and (chat_history.user_id != current_user.id):
raise HTTPException(
status_code=404, detail="Chat history not found")
status_code=404, detail="Chat not found")
user_message = OpenAIMessage(content=body.prompt, role="user")
user_message = user_message.model_dump(mode="json")
_history = body.history if body.history else []
def build_history() -> list[OpenAIMessage]:
history_messages: list[OpenAIMessage] = []
for interaction in _history:
role = interaction.role
if role == 'user':
history_messages.append(
OpenAIMessage(
content=interaction.content,
role="user"
)
)
else:
history_messages.append(
OpenAIMessage(
content=interaction.content,
role="assistant"
)
)
return history_messages
user_message = OpenAIMessage(content=body.prompt, role="user")
user_message_json = {
'text': body.prompt,
}
@ -174,9 +204,10 @@ async def prompt_completion(
if body.system_prompt:
messages.insert(0, OpenAIMessage(
content=body.system_prompt, role="system"))
all_messages = [*build_history(), user_message]
chat_body = ChatBody(
messages=messages,
messages=all_messages,
use_context=body.use_context,
stream=body.stream,
include_sources=body.include_sources,

View File

@ -45,8 +45,8 @@ llm:
llamacpp:
prompt_style: "chatml"
llm_hf_repo_id: NousResearch/Hermes-2-Pro-Mistral-7B
llm_hf_model_file: NousResearch/Hermes-2-Pro-Mistral-7B-GGUF
llm_hf_repo_id: TheBloke/OpenHermes-2.5-Mistral-7B-GGUF
llm_hf_model_file: openhermes-2.5-mistral-7b.Q5_K_M.gguf
tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
top_p: 0.9 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)