mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-07-11 14:23:04 +00:00
Added build history
This commit is contained in:
parent
44d94e145e
commit
bf135b1692
2
.env
2
.env
@ -4,7 +4,7 @@ ENVIRONMENT=dev
|
|||||||
DB_HOST=localhost
|
DB_HOST=localhost
|
||||||
DB_USER=postgres
|
DB_USER=postgres
|
||||||
DB_PORT=5432
|
DB_PORT=5432
|
||||||
DB_PASSWORD=quick
|
DB_PASSWORD=admin
|
||||||
DB_NAME=QuickGpt
|
DB_NAME=QuickGpt
|
||||||
|
|
||||||
SUPER_ADMIN_EMAIL=superadmin@email.com
|
SUPER_ADMIN_EMAIL=superadmin@email.com
|
||||||
|
@ -4,7 +4,7 @@ from llama_index.core.llms import ChatMessage, ChatResponse, MessageRole
|
|||||||
from fastapi import APIRouter, Depends, Request, Security, HTTPException, status
|
from fastapi import APIRouter, Depends, Request, Security, HTTPException, status
|
||||||
from private_gpt.server.ingest.ingest_service import IngestService
|
from private_gpt.server.ingest.ingest_service import IngestService
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from typing import List, Dict, Any
|
from typing import List, Dict, Any, Optional
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
import traceback
|
import traceback
|
||||||
import logging
|
import logging
|
||||||
@ -28,6 +28,7 @@ completions_router = APIRouter(prefix="/v1", dependencies=[Depends(authenticated
|
|||||||
|
|
||||||
class CompletionsBody(BaseModel):
|
class CompletionsBody(BaseModel):
|
||||||
conversation_id: uuid.UUID
|
conversation_id: uuid.UUID
|
||||||
|
history: Optional[list[OpenAIMessage]]
|
||||||
prompt: str
|
prompt: str
|
||||||
system_prompt: str | None = None
|
system_prompt: str | None = None
|
||||||
use_context: bool = False
|
use_context: bool = False
|
||||||
@ -39,7 +40,17 @@ class CompletionsBody(BaseModel):
|
|||||||
"json_schema_extra": {
|
"json_schema_extra": {
|
||||||
"examples": [
|
"examples": [
|
||||||
{
|
{
|
||||||
"conversation_id": 123,
|
"conversation_id": "3fa85f64-5717-4562-b3fc-2c963f66afa6",
|
||||||
|
"history": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hello!"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "Hello, how can I help you?"
|
||||||
|
}
|
||||||
|
],
|
||||||
"prompt": "How do you fry an egg?",
|
"prompt": "How do you fry an egg?",
|
||||||
"system_prompt": "You are a rapper. Always answer with a rap.",
|
"system_prompt": "You are a rapper. Always answer with a rap.",
|
||||||
"stream": False,
|
"stream": False,
|
||||||
@ -50,7 +61,6 @@ class CompletionsBody(BaseModel):
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class ChatContentCreate(BaseModel):
|
class ChatContentCreate(BaseModel):
|
||||||
content: Dict[str, Any]
|
content: Dict[str, Any]
|
||||||
|
|
||||||
@ -159,11 +169,31 @@ async def prompt_completion(
|
|||||||
)
|
)
|
||||||
if (chat_history is None) and (chat_history.user_id != current_user.id):
|
if (chat_history is None) and (chat_history.user_id != current_user.id):
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=404, detail="Chat history not found")
|
status_code=404, detail="Chat not found")
|
||||||
|
|
||||||
user_message = OpenAIMessage(content=body.prompt, role="user")
|
_history = body.history if body.history else []
|
||||||
user_message = user_message.model_dump(mode="json")
|
|
||||||
|
|
||||||
|
def build_history() -> list[OpenAIMessage]:
|
||||||
|
history_messages: list[OpenAIMessage] = []
|
||||||
|
for interaction in _history:
|
||||||
|
role = interaction.role
|
||||||
|
if role == 'user':
|
||||||
|
history_messages.append(
|
||||||
|
OpenAIMessage(
|
||||||
|
content=interaction.content,
|
||||||
|
role="user"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
history_messages.append(
|
||||||
|
OpenAIMessage(
|
||||||
|
content=interaction.content,
|
||||||
|
role="assistant"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return history_messages
|
||||||
|
|
||||||
|
user_message = OpenAIMessage(content=body.prompt, role="user")
|
||||||
user_message_json = {
|
user_message_json = {
|
||||||
'text': body.prompt,
|
'text': body.prompt,
|
||||||
}
|
}
|
||||||
@ -174,9 +204,10 @@ async def prompt_completion(
|
|||||||
if body.system_prompt:
|
if body.system_prompt:
|
||||||
messages.insert(0, OpenAIMessage(
|
messages.insert(0, OpenAIMessage(
|
||||||
content=body.system_prompt, role="system"))
|
content=body.system_prompt, role="system"))
|
||||||
|
|
||||||
|
all_messages = [*build_history(), user_message]
|
||||||
chat_body = ChatBody(
|
chat_body = ChatBody(
|
||||||
messages=messages,
|
messages=all_messages,
|
||||||
use_context=body.use_context,
|
use_context=body.use_context,
|
||||||
stream=body.stream,
|
stream=body.stream,
|
||||||
include_sources=body.include_sources,
|
include_sources=body.include_sources,
|
||||||
|
@ -45,8 +45,8 @@ llm:
|
|||||||
|
|
||||||
llamacpp:
|
llamacpp:
|
||||||
prompt_style: "chatml"
|
prompt_style: "chatml"
|
||||||
llm_hf_repo_id: NousResearch/Hermes-2-Pro-Mistral-7B
|
llm_hf_repo_id: TheBloke/OpenHermes-2.5-Mistral-7B-GGUF
|
||||||
llm_hf_model_file: NousResearch/Hermes-2-Pro-Mistral-7B-GGUF
|
llm_hf_model_file: openhermes-2.5-mistral-7b.Q5_K_M.gguf
|
||||||
tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
|
tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
|
||||||
top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
|
top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
|
||||||
top_p: 0.9 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
|
top_p: 0.9 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
|
||||||
|
Loading…
Reference in New Issue
Block a user