mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-07-10 05:44:29 +00:00
Added build history
This commit is contained in:
parent
44d94e145e
commit
bf135b1692
2
.env
2
.env
@ -4,7 +4,7 @@ ENVIRONMENT=dev
|
||||
DB_HOST=localhost
|
||||
DB_USER=postgres
|
||||
DB_PORT=5432
|
||||
DB_PASSWORD=quick
|
||||
DB_PASSWORD=admin
|
||||
DB_NAME=QuickGpt
|
||||
|
||||
SUPER_ADMIN_EMAIL=superadmin@email.com
|
||||
|
@ -4,7 +4,7 @@ from llama_index.core.llms import ChatMessage, ChatResponse, MessageRole
|
||||
from fastapi import APIRouter, Depends, Request, Security, HTTPException, status
|
||||
from private_gpt.server.ingest.ingest_service import IngestService
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Dict, Any
|
||||
from typing import List, Dict, Any, Optional
|
||||
from sqlalchemy.orm import Session
|
||||
import traceback
|
||||
import logging
|
||||
@ -28,6 +28,7 @@ completions_router = APIRouter(prefix="/v1", dependencies=[Depends(authenticated
|
||||
|
||||
class CompletionsBody(BaseModel):
|
||||
conversation_id: uuid.UUID
|
||||
history: Optional[list[OpenAIMessage]]
|
||||
prompt: str
|
||||
system_prompt: str | None = None
|
||||
use_context: bool = False
|
||||
@ -39,7 +40,17 @@ class CompletionsBody(BaseModel):
|
||||
"json_schema_extra": {
|
||||
"examples": [
|
||||
{
|
||||
"conversation_id": 123,
|
||||
"conversation_id": "3fa85f64-5717-4562-b3fc-2c963f66afa6",
|
||||
"history": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello!"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "Hello, how can I help you?"
|
||||
}
|
||||
],
|
||||
"prompt": "How do you fry an egg?",
|
||||
"system_prompt": "You are a rapper. Always answer with a rap.",
|
||||
"stream": False,
|
||||
@ -50,7 +61,6 @@ class CompletionsBody(BaseModel):
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class ChatContentCreate(BaseModel):
|
||||
content: Dict[str, Any]
|
||||
|
||||
@ -159,11 +169,31 @@ async def prompt_completion(
|
||||
)
|
||||
if (chat_history is None) and (chat_history.user_id != current_user.id):
|
||||
raise HTTPException(
|
||||
status_code=404, detail="Chat history not found")
|
||||
status_code=404, detail="Chat not found")
|
||||
|
||||
user_message = OpenAIMessage(content=body.prompt, role="user")
|
||||
user_message = user_message.model_dump(mode="json")
|
||||
_history = body.history if body.history else []
|
||||
|
||||
def build_history() -> list[OpenAIMessage]:
|
||||
history_messages: list[OpenAIMessage] = []
|
||||
for interaction in _history:
|
||||
role = interaction.role
|
||||
if role == 'user':
|
||||
history_messages.append(
|
||||
OpenAIMessage(
|
||||
content=interaction.content,
|
||||
role="user"
|
||||
)
|
||||
)
|
||||
else:
|
||||
history_messages.append(
|
||||
OpenAIMessage(
|
||||
content=interaction.content,
|
||||
role="assistant"
|
||||
)
|
||||
)
|
||||
return history_messages
|
||||
|
||||
user_message = OpenAIMessage(content=body.prompt, role="user")
|
||||
user_message_json = {
|
||||
'text': body.prompt,
|
||||
}
|
||||
@ -174,9 +204,10 @@ async def prompt_completion(
|
||||
if body.system_prompt:
|
||||
messages.insert(0, OpenAIMessage(
|
||||
content=body.system_prompt, role="system"))
|
||||
|
||||
|
||||
all_messages = [*build_history(), user_message]
|
||||
chat_body = ChatBody(
|
||||
messages=messages,
|
||||
messages=all_messages,
|
||||
use_context=body.use_context,
|
||||
stream=body.stream,
|
||||
include_sources=body.include_sources,
|
||||
|
@ -45,8 +45,8 @@ llm:
|
||||
|
||||
llamacpp:
|
||||
prompt_style: "chatml"
|
||||
llm_hf_repo_id: NousResearch/Hermes-2-Pro-Mistral-7B
|
||||
llm_hf_model_file: NousResearch/Hermes-2-Pro-Mistral-7B-GGUF
|
||||
llm_hf_repo_id: TheBloke/OpenHermes-2.5-Mistral-7B-GGUF
|
||||
llm_hf_model_file: openhermes-2.5-mistral-7b.Q5_K_M.gguf
|
||||
tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
|
||||
top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
|
||||
top_p: 0.9 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
|
||||
|
Loading…
Reference in New Issue
Block a user