Added llama3 prompt

2025-06-26 23:39:29 +00:00 · 2024-04-24 17:15:13 +05:45 · 2024-04-24 17:15:13 +05:45 · 1d6fc7144a
commit 1d6fc7144a
parent 3282d52bf2
7 changed files with 61 additions and 10 deletions
--- a/poetry.lock
+++ b/poetry.lock
@ -7006,6 +7006,17 @@ files = [
    {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"},
 ]

+[[package]]
+name = "xlsxwriter"
+version = "3.2.0"
+description = "A Python module for creating Excel XLSX files."
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "XlsxWriter-3.2.0-py3-none-any.whl", hash = "sha256:ecfd5405b3e0e228219bcaf24c2ca0915e012ca9464a14048021d21a995d490e"},
+    {file = "XlsxWriter-3.2.0.tar.gz", hash = "sha256:9977d0c661a72866a61f9f7a809e25ebbb0fb7036baa3b9fe74afcfca6b3cb8c"},
+]
+
 [[package]]
 name = "yarl"
 version = "1.9.4"
@ -7215,4 +7226,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11,<3.12"
-content-hash = "d7f6a81832b4a41ebd23b4e2041beb53bc26437212204254c87e33d1a4e68b8d"
+content-hash = "0eabe198d1cce12a17bce03fe3f6d2aad58c74d9b5e19a54db40271f92293e83"
--- a/private_gpt/components/llm/prompt_helper.py
+++ b/private_gpt/components/llm/prompt_helper.py
@ -138,6 +138,40 @@ class Llama2PromptStyle(AbstractPromptStyle):
        )


+class Llama3PromptStyle(AbstractPromptStyle):
+    """Simple prompt style that uses llama 3 prompt style.
+
+    Inspired by llama_index/legacy/llms/llama_utils.py
+
+    It transforms the sequence of messages into a prompt that should look like:
+    ```
+    <|begin_of_text|><|start_header_id|>system<|end_header_id|>
+    You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
+    {prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+    ```
+    """
+
+    def _messages_to_prompt(self, messages: Sequence[ChatMessage]) -> str:
+        prompt = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n"
+        for message in messages:
+            role = message.role
+            content = message.content or ""
+            if role.lower() == "system":
+                message_from_user = f"{content.strip()}"
+                prompt += message_from_user
+            elif role.lower() == "user":
+                prompt += "<|eot_id|><|start_header_id|>user<|end_header_id|>\n"
+                message_from_user = f"{content.strip()}<|eot_id|>\n"
+                prompt += message_from_user
+        prompt += "<|start_header_id|>assistant<|end_header_id|>\n"
+        print("THE PROMPT MESSAGE: ", prompt)
+        return prompt
+
+    def _completion_to_prompt(self, completion: str) -> str:
+        return self._messages_to_prompt(
+            [ChatMessage(content=completion, role=MessageRole.USER)]
+        )
+    
 class TagPromptStyle(AbstractPromptStyle):
    """Tag prompt style (used by Vigogne) that uses the prompt style `<|ROLE|>`.

@ -226,6 +260,8 @@ def get_prompt_style(
        return DefaultPromptStyle()
    elif prompt_style == "llama2":
        return Llama2PromptStyle()
+    elif prompt_style == "llama3":
+        return Llama3PromptStyle()
    elif prompt_style == "tag":
        return TagPromptStyle()
    elif prompt_style == "mistral":
--- a/private_gpt/users/api/v1/routers/chat_history.py
+++ b/private_gpt/users/api/v1/routers/chat_history.py
@ -15,6 +15,8 @@ router = APIRouter(prefix="/c", tags=["Chat Histories"])
@router.get("", response_model=list[schemas.ChatHistory])
 def list_chat_histories(
    db: Session = Depends(deps.get_db),
+    skip: int = 0,
+    limit: int = 100,
    current_user: models.User = Security(
        deps.get_current_user,
    ),
@ -24,7 +26,7 @@ def list_chat_histories(
    """
    try:
        chat_histories = crud.chat.get_chat_history(
-            db, user_id=current_user.id)
+            db, user_id=current_user.id, skip=skip, limit=limit)
        return chat_histories
    except Exception as e:
        print(traceback.format_exc())
--- a/private_gpt/users/api/v1/routers/documents.py
+++ b/private_gpt/users/api/v1/routers/documents.py
@ -23,8 +23,7 @@ logger = logging.getLogger(__name__)
 router = APIRouter(prefix='/documents', tags=['Documents'])


-CHECKER = False
-ENABLE_MAKER = False
+ENABLE_MAKER_CHECKER = False

 def get_username(db, id):
    user = crud.user.get_by_id(db=db, id=id)
@ -299,7 +298,7 @@ async def upload_documents(
        logger.info(
            f"{original_filename} is uploaded by {current_user.username} in {departments.departments_ids}")
        
-        if not ENABLE_MAKER:
+        if not ENABLE_MAKER_CHECKER:
            checker_in = schemas.DocumentUpdate(
                id=document.id,
                status=MakerCheckerStatus.APPROVED.value
@ -343,7 +342,7 @@ async def verify_documents(
            )
        
        
-        if CHECKER:
+        if ENABLE_MAKER_CHECKER:
            if document.verified:
                raise HTTPException(
                    status_code=status.HTTP_400_BAD_REQUEST,
--- a/private_gpt/users/crud/chat_crud.py
+++ b/private_gpt/users/crud/chat_crud.py
@ -27,12 +27,14 @@ class CRUDChat(CRUDBase[ChatHistory, ChatHistoryCreate, ChatHistoryCreate]):
        return chat_history

    def get_chat_history(
-            self, db: Session, *,user_id:int
+            self, db: Session, *,user_id:int, skip: int = 0, limit: int =100
        ) -> List[ChatHistory]:
            return (
                db.query(self.model)
                .filter(ChatHistory.user_id == user_id)
                .order_by(desc(getattr(ChatHistory, 'created_at')))
+                .offset(skip)
+                .limit(limit)
                .all()
            )
    
--- a/pyproject.toml
+++ b/pyproject.toml
@ -57,6 +57,7 @@ uuid = "^1.30"
 openpyxl = "^3.1.2"
 pandas = "^2.2.2"
 fastapi-pagination = "^0.12.23"
+xlsxwriter = "^3.2.0"

 [tool.poetry.extras]
 ui = ["gradio"]
--- a/settings.yaml
+++ b/settings.yaml
@ -44,9 +44,9 @@ llm:
  tokenizer: mistralai/Mistral-7B-Instruct-v0.2
  
 llamacpp:
-  prompt_style: "chatml"
-  llm_hf_repo_id: TheBloke/OpenHermes-2.5-Mistral-7B-GGUF
-  llm_hf_model_file: openhermes-2.5-mistral-7b.Q5_K_M.gguf
+  prompt_style: "default"
+  llm_hf_repo_id: bartowski/Meta-Llama-3-8B-Instruct-GGUF
+  llm_hf_model_file: Meta-Llama-3-8B-Instruct-Q6_K.gguf
  tfs_z: 1.0            # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
  top_k: 40             # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
  top_p: 0.9            # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)