mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-06-27 07:49:55 +00:00
Added llama3 prompt
This commit is contained in:
parent
3282d52bf2
commit
1d6fc7144a
13
poetry.lock
generated
13
poetry.lock
generated
@ -7006,6 +7006,17 @@ files = [
|
||||
{file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "xlsxwriter"
|
||||
version = "3.2.0"
|
||||
description = "A Python module for creating Excel XLSX files."
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "XlsxWriter-3.2.0-py3-none-any.whl", hash = "sha256:ecfd5405b3e0e228219bcaf24c2ca0915e012ca9464a14048021d21a995d490e"},
|
||||
{file = "XlsxWriter-3.2.0.tar.gz", hash = "sha256:9977d0c661a72866a61f9f7a809e25ebbb0fb7036baa3b9fe74afcfca6b3cb8c"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "yarl"
|
||||
version = "1.9.4"
|
||||
@ -7215,4 +7226,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.11,<3.12"
|
||||
content-hash = "d7f6a81832b4a41ebd23b4e2041beb53bc26437212204254c87e33d1a4e68b8d"
|
||||
content-hash = "0eabe198d1cce12a17bce03fe3f6d2aad58c74d9b5e19a54db40271f92293e83"
|
||||
|
@ -138,6 +138,40 @@ class Llama2PromptStyle(AbstractPromptStyle):
|
||||
)
|
||||
|
||||
|
||||
class Llama3PromptStyle(AbstractPromptStyle):
|
||||
"""Simple prompt style that uses llama 3 prompt style.
|
||||
|
||||
Inspired by llama_index/legacy/llms/llama_utils.py
|
||||
|
||||
It transforms the sequence of messages into a prompt that should look like:
|
||||
```
|
||||
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
||||
You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
|
||||
{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
||||
```
|
||||
"""
|
||||
|
||||
def _messages_to_prompt(self, messages: Sequence[ChatMessage]) -> str:
|
||||
prompt = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n"
|
||||
for message in messages:
|
||||
role = message.role
|
||||
content = message.content or ""
|
||||
if role.lower() == "system":
|
||||
message_from_user = f"{content.strip()}"
|
||||
prompt += message_from_user
|
||||
elif role.lower() == "user":
|
||||
prompt += "<|eot_id|><|start_header_id|>user<|end_header_id|>\n"
|
||||
message_from_user = f"{content.strip()}<|eot_id|>\n"
|
||||
prompt += message_from_user
|
||||
prompt += "<|start_header_id|>assistant<|end_header_id|>\n"
|
||||
print("THE PROMPT MESSAGE: ", prompt)
|
||||
return prompt
|
||||
|
||||
def _completion_to_prompt(self, completion: str) -> str:
|
||||
return self._messages_to_prompt(
|
||||
[ChatMessage(content=completion, role=MessageRole.USER)]
|
||||
)
|
||||
|
||||
class TagPromptStyle(AbstractPromptStyle):
|
||||
"""Tag prompt style (used by Vigogne) that uses the prompt style `<|ROLE|>`.
|
||||
|
||||
@ -226,6 +260,8 @@ def get_prompt_style(
|
||||
return DefaultPromptStyle()
|
||||
elif prompt_style == "llama2":
|
||||
return Llama2PromptStyle()
|
||||
elif prompt_style == "llama3":
|
||||
return Llama3PromptStyle()
|
||||
elif prompt_style == "tag":
|
||||
return TagPromptStyle()
|
||||
elif prompt_style == "mistral":
|
||||
|
@ -15,6 +15,8 @@ router = APIRouter(prefix="/c", tags=["Chat Histories"])
|
||||
@router.get("", response_model=list[schemas.ChatHistory])
|
||||
def list_chat_histories(
|
||||
db: Session = Depends(deps.get_db),
|
||||
skip: int = 0,
|
||||
limit: int = 100,
|
||||
current_user: models.User = Security(
|
||||
deps.get_current_user,
|
||||
),
|
||||
@ -24,7 +26,7 @@ def list_chat_histories(
|
||||
"""
|
||||
try:
|
||||
chat_histories = crud.chat.get_chat_history(
|
||||
db, user_id=current_user.id)
|
||||
db, user_id=current_user.id, skip=skip, limit=limit)
|
||||
return chat_histories
|
||||
except Exception as e:
|
||||
print(traceback.format_exc())
|
||||
|
@ -23,8 +23,7 @@ logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix='/documents', tags=['Documents'])
|
||||
|
||||
|
||||
CHECKER = False
|
||||
ENABLE_MAKER = False
|
||||
ENABLE_MAKER_CHECKER = False
|
||||
|
||||
def get_username(db, id):
|
||||
user = crud.user.get_by_id(db=db, id=id)
|
||||
@ -299,7 +298,7 @@ async def upload_documents(
|
||||
logger.info(
|
||||
f"{original_filename} is uploaded by {current_user.username} in {departments.departments_ids}")
|
||||
|
||||
if not ENABLE_MAKER:
|
||||
if not ENABLE_MAKER_CHECKER:
|
||||
checker_in = schemas.DocumentUpdate(
|
||||
id=document.id,
|
||||
status=MakerCheckerStatus.APPROVED.value
|
||||
@ -343,7 +342,7 @@ async def verify_documents(
|
||||
)
|
||||
|
||||
|
||||
if CHECKER:
|
||||
if ENABLE_MAKER_CHECKER:
|
||||
if document.verified:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
|
@ -27,12 +27,14 @@ class CRUDChat(CRUDBase[ChatHistory, ChatHistoryCreate, ChatHistoryCreate]):
|
||||
return chat_history
|
||||
|
||||
def get_chat_history(
|
||||
self, db: Session, *,user_id:int
|
||||
self, db: Session, *,user_id:int, skip: int = 0, limit: int =100
|
||||
) -> List[ChatHistory]:
|
||||
return (
|
||||
db.query(self.model)
|
||||
.filter(ChatHistory.user_id == user_id)
|
||||
.order_by(desc(getattr(ChatHistory, 'created_at')))
|
||||
.offset(skip)
|
||||
.limit(limit)
|
||||
.all()
|
||||
)
|
||||
|
||||
|
@ -57,6 +57,7 @@ uuid = "^1.30"
|
||||
openpyxl = "^3.1.2"
|
||||
pandas = "^2.2.2"
|
||||
fastapi-pagination = "^0.12.23"
|
||||
xlsxwriter = "^3.2.0"
|
||||
|
||||
[tool.poetry.extras]
|
||||
ui = ["gradio"]
|
||||
|
@ -44,9 +44,9 @@ llm:
|
||||
tokenizer: mistralai/Mistral-7B-Instruct-v0.2
|
||||
|
||||
llamacpp:
|
||||
prompt_style: "chatml"
|
||||
llm_hf_repo_id: TheBloke/OpenHermes-2.5-Mistral-7B-GGUF
|
||||
llm_hf_model_file: openhermes-2.5-mistral-7b.Q5_K_M.gguf
|
||||
prompt_style: "default"
|
||||
llm_hf_repo_id: bartowski/Meta-Llama-3-8B-Instruct-GGUF
|
||||
llm_hf_model_file: Meta-Llama-3-8B-Instruct-Q6_K.gguf
|
||||
tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
|
||||
top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
|
||||
top_p: 0.9 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
|
||||
|
Loading…
Reference in New Issue
Block a user