Added build history

This commit is contained in:
Saurab-Shrestha9639*969**9858//852 2024-04-10 15:13:06 +05:45
parent 44d94e145e
commit bf135b1692
3 changed files with 42 additions and 11 deletions

2
.env
View File

@ -4,7 +4,7 @@ ENVIRONMENT=dev
DB_HOST=localhost DB_HOST=localhost
DB_USER=postgres DB_USER=postgres
DB_PORT=5432 DB_PORT=5432
DB_PASSWORD=quick DB_PASSWORD=admin
DB_NAME=QuickGpt DB_NAME=QuickGpt
SUPER_ADMIN_EMAIL=superadmin@email.com SUPER_ADMIN_EMAIL=superadmin@email.com

View File

@ -4,7 +4,7 @@ from llama_index.core.llms import ChatMessage, ChatResponse, MessageRole
from fastapi import APIRouter, Depends, Request, Security, HTTPException, status from fastapi import APIRouter, Depends, Request, Security, HTTPException, status
from private_gpt.server.ingest.ingest_service import IngestService from private_gpt.server.ingest.ingest_service import IngestService
from pydantic import BaseModel from pydantic import BaseModel
from typing import List, Dict, Any from typing import List, Dict, Any, Optional
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
import traceback import traceback
import logging import logging
@ -28,6 +28,7 @@ completions_router = APIRouter(prefix="/v1", dependencies=[Depends(authenticated
class CompletionsBody(BaseModel): class CompletionsBody(BaseModel):
conversation_id: uuid.UUID conversation_id: uuid.UUID
history: Optional[list[OpenAIMessage]]
prompt: str prompt: str
system_prompt: str | None = None system_prompt: str | None = None
use_context: bool = False use_context: bool = False
@ -39,7 +40,17 @@ class CompletionsBody(BaseModel):
"json_schema_extra": { "json_schema_extra": {
"examples": [ "examples": [
{ {
"conversation_id": 123, "conversation_id": "3fa85f64-5717-4562-b3fc-2c963f66afa6",
"history": [
{
"role": "user",
"content": "Hello!"
},
{
"role": "assistant",
"content": "Hello, how can I help you?"
}
],
"prompt": "How do you fry an egg?", "prompt": "How do you fry an egg?",
"system_prompt": "You are a rapper. Always answer with a rap.", "system_prompt": "You are a rapper. Always answer with a rap.",
"stream": False, "stream": False,
@ -50,7 +61,6 @@ class CompletionsBody(BaseModel):
} }
} }
class ChatContentCreate(BaseModel): class ChatContentCreate(BaseModel):
content: Dict[str, Any] content: Dict[str, Any]
@ -159,11 +169,31 @@ async def prompt_completion(
) )
if (chat_history is None) and (chat_history.user_id != current_user.id): if (chat_history is None) and (chat_history.user_id != current_user.id):
raise HTTPException( raise HTTPException(
status_code=404, detail="Chat history not found") status_code=404, detail="Chat not found")
user_message = OpenAIMessage(content=body.prompt, role="user") _history = body.history if body.history else []
user_message = user_message.model_dump(mode="json")
def build_history() -> list[OpenAIMessage]:
history_messages: list[OpenAIMessage] = []
for interaction in _history:
role = interaction.role
if role == 'user':
history_messages.append(
OpenAIMessage(
content=interaction.content,
role="user"
)
)
else:
history_messages.append(
OpenAIMessage(
content=interaction.content,
role="assistant"
)
)
return history_messages
user_message = OpenAIMessage(content=body.prompt, role="user")
user_message_json = { user_message_json = {
'text': body.prompt, 'text': body.prompt,
} }
@ -174,9 +204,10 @@ async def prompt_completion(
if body.system_prompt: if body.system_prompt:
messages.insert(0, OpenAIMessage( messages.insert(0, OpenAIMessage(
content=body.system_prompt, role="system")) content=body.system_prompt, role="system"))
all_messages = [*build_history(), user_message]
chat_body = ChatBody( chat_body = ChatBody(
messages=messages, messages=all_messages,
use_context=body.use_context, use_context=body.use_context,
stream=body.stream, stream=body.stream,
include_sources=body.include_sources, include_sources=body.include_sources,

View File

@ -45,8 +45,8 @@ llm:
llamacpp: llamacpp:
prompt_style: "chatml" prompt_style: "chatml"
llm_hf_repo_id: NousResearch/Hermes-2-Pro-Mistral-7B llm_hf_repo_id: TheBloke/OpenHermes-2.5-Mistral-7B-GGUF
llm_hf_model_file: NousResearch/Hermes-2-Pro-Mistral-7B-GGUF llm_hf_model_file: openhermes-2.5-mistral-7b.Q5_K_M.gguf
tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
top_p: 0.9 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) top_p: 0.9 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)