Added new docker files

2025-07-16 00:16:06 +00:00 · 2024-04-28 11:25:38 +05:45 · 2024-04-28 11:25:38 +05:45 · c7aac53cd9
commit c7aac53cd9
parent 1d6fc7144a
15 changed files with 76 additions and 54 deletions
--- a/.env
+++ b/.env
@ -11,8 +11,10 @@ SUPER_ADMIN_EMAIL=superadmin@email.com
 SUPER_ADMIN_PASSWORD=supersecretpassword
 SUPER_ADMIN_ACCOUNT_NAME=superaccount

-SECRET_KEY=ba9dc3f976cf8fb40519dcd152a8d7d21c0b7861d841711cdb2602be8e85fd7c
-ACCESS_TOKEN_EXPIRE_MINUTES=720                
+SECRET_KEY=ba9d23f976cf8fb40519dcd152a8d7d21c0b7861d841711cdb2602be8e85fd7c
+REFRESH_KEY=ba9dc3f976cf8fb12312dve22a8d7d21c0b7861d841711cdb2602be8e85fdde
+
+ACCESS_TOKEN_EXPIRE_MINUTES=240                
 REFRESH_TOKEN_EXPIRE_MINUTES=1400 

 SMTP_SERVER=mail.gibl.com.np
@ -22,4 +24,5 @@ SMTP_USERNAME=noreply@gibl.com.np
 SMTP_PASSWORD=*G15y^N0reP!y

 LDAP_SERVER=ldap://192.168.101.111
-LDAP_ENABLE=False
+LDAP_ENABLE=False
+ENABLE_MAKER_CHECKER=False
--- a/Dockerfile.local
+++ b/Dockerfile.local
@ -10,11 +10,7 @@ RUN apt-get update && apt-get install -y \
    ninja-build \
    build-essential \
    pkg-config \
-    wget \
-    make \
-    cmake \ 
-    g++ \ 
-    gcc 
+    wget 

 # Switch to the Python image for the final build stage
 FROM python:3.11.6-slim-bookworm as base
@ -50,11 +46,12 @@ RUN poetry run python scripts/setup
 FROM base as app

 ENV PYTHONUNBUFFERED=1
-ENV PORT=8080
-EXPOSE 8080
+ENV PORT=80
+EXPOSE 80

 # Prepare a non-root user
-RUN adduser --system worker
+RUN adduser --group worker
+RUN adduser --system --ingroup worker worker
 WORKDIR /home/worker/app

 RUN mkdir local_data; chown worker local_data
@ -68,4 +65,4 @@ COPY --chown=worker scripts/ scripts
 ENV PYTHONPATH="$PYTHONPATH:/private_gpt/"

 USER worker
-ENTRYPOINT python -m private_gpt
+ENTRYPOINT uvicorn private_gpt.main:app --host 0.0.0.0 --port 80
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -6,11 +6,12 @@ services:
      - ./local_data/:/home/worker/app/local_data
      - ./models/:/home/worker/app/models
    ports:
-      - 8000:8000
+      - 80:80
    environment:
-      PORT: 8000
+      PORT: 80
      PGPT_PROFILES: docker
      PGPT_MODE: local

    env_file:
      - .env
+    
--- a/private_gpt/components/llm/llm_component.py
+++ b/private_gpt/components/llm/llm_component.py
@ -11,7 +11,6 @@ from private_gpt.paths import models_cache_path, models_path
 from private_gpt.settings.settings import Settings

 logger = logging.getLogger(__name__)
-local_path = models_path / "tokenizer/Mistral-7B-Instruct-v0.1"
@singleton
 class LLMComponent:
    llm: LLM
@ -20,11 +19,22 @@ class LLMComponent:
    def __init__(self, settings: Settings) -> None:
        llm_mode = settings.llm.mode
        if settings.llm.tokenizer:
-            set_global_tokenizer(
-                AutoTokenizer.from_pretrained(
-                    local_path
+            try:
+                set_global_tokenizer(
+                    AutoTokenizer.from_pretrained(
+                        pretrained_model_name_or_path=settings.llm.tokenizer,
+                        cache_dir=str(models_cache_path),
+                        token=settings.huggingface.access_token,
+                    )
                )
-            )
+            except Exception as e:
+                logger.warning(
+                    "Failed to download tokenizer %s. Falling back to "
+                    "default tokenizer.",
+                    settings.llm.tokenizer,
+                    e,
+                )
+

        logger.info("Initializing the LLM in mode=%s", llm_mode)
        match settings.llm.mode:
--- a/private_gpt/server/chat/chat_service.py
+++ b/private_gpt/server/chat/chat_service.py
@ -166,7 +166,8 @@ class ChatService:
        )
        system_prompt = (
            """
-            You should answer questions only in English or Nepali. 
+            You are a helpful assistant that should answer questions 
+            only in English or Nepali. 
            Responses should be based on the context documents provided 
            and should be relevant, informative, and easy to understand. 
            You should aim to deliver high-quality responses that are 
--- a/private_gpt/server/chunks/chunks_router.py
+++ b/private_gpt/server/chunks/chunks_router.py
@ -70,7 +70,6 @@ async def chunks_retrieval(
        action='Chat',
        details={
            "query": body.text,
-            'user': current_user.username,
            }, 
        user_id=current_user.id
    )
--- a/private_gpt/server/ingest/ingest_router.py
+++ b/private_gpt/server/ingest/ingest_router.py
@ -208,7 +208,6 @@ async def create_documents(
        status=MakerCheckerStatus.PENDING,
        doc_type_id=departments.doc_type_id,
    )
-    print("DOCUMENT CREATE: ", docs_in)
    document = crud.documents.create(db=db, obj_in=docs_in)
    department_ids = department_ids if department_ids else "1"
    department_ids = [int(number) for number in department_ids.split(",")]
--- a/private_gpt/users/api/v1/routers/auth.py
+++ b/private_gpt/users/api/v1/routers/auth.py
@ -43,12 +43,12 @@ def register_user(
            department_id=department.id,
            checker= True if role == 'OPERATOR' else False
        )    
-    # try:
-    #     send_registration_email(fullname, email, password)
-    # except Exception as e:
-    #     logging.info(f"Failed to send registration email: {str(e)}")
-    #     raise HTTPException(
-    #         status_code=500, detail=f"Failed to send registration email.")
+    try:
+        send_registration_email(fullname, email, password)
+    except Exception as e:
+        logging.info(f"Failed to send registration email: {str(e)}")
+        raise HTTPException(
+            status_code=500, detail=f"Failed to send registration email.")
    return crud.user.create(db, obj_in=user_in)


--- a/private_gpt/users/api/v1/routers/documents.py
+++ b/private_gpt/users/api/v1/routers/documents.py
@ -7,13 +7,13 @@ from datetime import datetime

 from typing import Any, List
 from sqlalchemy.orm import Session
-from sqlalchemy import select
 from fastapi_pagination import Page, paginate
 from fastapi import APIRouter, Depends, HTTPException, status, Security, Request, File, UploadFile

 from private_gpt.users.api import deps
 from private_gpt.constants import UNCHECKED_DIR
 from private_gpt.users.constants.role import Role
+from private_gpt.users.core.config import settings
 from private_gpt.users import crud, models, schemas
 from private_gpt.server.ingest.ingest_router import create_documents, ingest
 from private_gpt.users.models.document import MakerCheckerActionType, MakerCheckerStatus
@ -23,7 +23,7 @@ logger = logging.getLogger(__name__)
 router = APIRouter(prefix='/documents', tags=['Documents'])


-ENABLE_MAKER_CHECKER = False
+ENABLE_MAKER_CHECKER = settings.ENABLE_MAKER_CHECKER

 def get_username(db, id):
    user = crud.user.get_by_id(db=db, id=id)
@ -259,7 +259,6 @@ def update_department(
            status_code=500,
            detail="Internal Server Error.",
        )
-    

@router.post('/upload', response_model=schemas.Document)
 async def upload_documents(
@ -384,12 +383,12 @@ async def verify_documents(
                user_id=current_user.id
            )

-            if document.doc_type_id == 2:
+            if document.doc_type_id == 2:  # For OCR
                return await process_ocr(request, unchecked_path)
-            elif document.doc_type_id == 3:
+            elif document.doc_type_id == 3: # For BOTH
                return await process_both_ocr(request, unchecked_path)
            else:
-                return await ingest(request, unchecked_path)
+                return await ingest(request, unchecked_path) # For pdf
            
            
        elif checker_in.status == MakerCheckerStatus.REJECTED.value:
--- a/private_gpt/users/core/config.py
+++ b/private_gpt/users/core/config.py
@ -3,9 +3,11 @@ from functools import lru_cache
 from pydantic_settings import BaseSettings

 class Settings(BaseSettings):
-    PROJECT_NAME: str = "AUTHENTICATION AND AUTHORIZATION"
+    PROJECT_NAME: str = "QuickGPT"
    API_V1_STR: str = "/v1"
    SECRET_KEY: str
+    REFRESH_KEY: str
+
    ACCESS_TOKEN_EXPIRE_MINUTES: int
    REFRESH_TOKEN_EXPIRE_MINUTES: int

@ -30,7 +32,8 @@ class Settings(BaseSettings):

    LDAP_SERVER: str
    LDAP_ENABLE: bool
-
+    ENABLE_MAKER_CHECKER: bool
+    
    class Config:
        case_sensitive = True
        env_file = ".env"
--- a/private_gpt/users/core/security.py
+++ b/private_gpt/users/core/security.py
@ -1,4 +1,3 @@
-import os
 import random
 import string
 from datetime import datetime, timedelta
@ -6,14 +5,13 @@ from typing import Dict, Any, Optional, Union

 from jose import JWTError, jwt
 from passlib.context import CryptContext
+from private_gpt.users.core.config import settings

-ACCESS_TOKEN_EXPIRE_MINUTES = 60 * 24 * 1   # 12 hrs
-REFRESH_TOKEN_EXPIRE_MINUTES = 60 * 24 * 7 # 7 days
+ACCESS_TOKEN_EXPIRE_MINUTES = 60 * 24 * 1   # 12 hrs # Default Value
+REFRESH_TOKEN_EXPIRE_MINUTES = 60 * 24 * 7 # 7 days  # Default Value
 ALGORITHM = "HS256"
-# JWT_SECRET_KEY = os.environ['JWT_SECRET_KEY']     # should be kept secret
-# JWT_REFRESH_SECRET_KEY = os.environ['JWT_REFRESH_SECRET_KEY']      # should be kept secret
-JWT_SECRET_KEY = "QUICKGPT"
-JWT_REFRESH_SECRET_KEY = "QUICKGPT_REFRESH"
+JWT_SECRET_KEY = settings.SECRET_KEY
+JWT_REFRESH_SECRET_KEY = settings.REFRESH_KEY

 pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")

--- a/scripts/setup
+++ b/scripts/setup
@ -40,10 +40,10 @@ print("LLM model downloaded!")

 # Download Tokenizer
 print(f"Downloading tokenizer {settings().llm.tokenizer}")
-local_path = models_path / "tokenizer/Mistral-7B-Instruct-v0.1"
 AutoTokenizer.from_pretrained(
-    local_path
+    pretrained_model_name_or_path=settings().llm.tokenizer,
+    cache_dir=models_cache_path,
 )
 print("Tokenizer downloaded!")

-print("Setup done")
+print("Setup done")
--- a/settings-docker.yaml
+++ b/settings-docker.yaml
@ -9,16 +9,28 @@ embedding:
  mode: ${PGPT_MODE:sagemaker}

 llamacpp:
-  llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.1-GGUF}
-  llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.1.Q4_K_M.gguf}
+  llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/OpenHermes-2.5-Mistral-7B-GGUF}
+  llm_hf_model_file: ${PGPT_HF_MODEL_FILE:openhermes-2.5-mistral-7b.Q5_K_M.gguf}

 huggingface:
-  embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:BAAI/bge-small-en-v1.5}
+  embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:mixedbread-ai/mxbai-embed-large-v1}

 sagemaker:
  llm_endpoint_name: ${PGPT_SAGEMAKER_LLM_ENDPOINT_NAME:}
  embedding_endpoint_name: ${PGPT_SAGEMAKER_EMBEDDING_ENDPOINT_NAME:}

+ollama:
+  llm_model: ${PGPT_OLLAMA_LLM_MODEL:mistral}
+  embedding_model: ${PGPT_OLLAMA_EMBEDDING_MODEL:nomic-embed-text}
+  api_base: ${PGPT_OLLAMA_API_BASE:http://ollama:11434}
+  embedding_api_base: ${PGPT_OLLAMA_EMBEDDING_API_BASE:http://ollama:11434}
+  tfs_z: ${PGPT_OLLAMA_TFS_Z:1.0}
+  top_k: ${PGPT_OLLAMA_TOP_K:40}
+  top_p: ${PGPT_OLLAMA_TOP_P:0.9}
+  repeat_last_n: ${PGPT_OLLAMA_REPEAT_LAST_N:64}
+  repeat_penalty: ${PGPT_OLLAMA_REPEAT_PENALTY:1.2}
+  request_timeout: ${PGPT_OLLAMA_REQUEST_TIMEOUT:600.0}
+
 ui:
  enabled: true
  path: /
--- a/settings-local.yaml
+++ b/settings-local.yaml
@ -25,7 +25,7 @@ embedding:
  ingest_mode: simple

 huggingface:
-  embedding_hf_model_name: BAAI/bge-large-en-v1.5
+  embedding_hf_model_name: mixedbread-ai/mxbai-embed-large-v1

 vectorstore:
  database: qdrant
--- a/settings.yaml
+++ b/settings.yaml
@ -7,7 +7,7 @@ server:
  cors:
    enabled: true
    allow_credentials: true
-    allow_origins: ["http://localhost:80", "http://10.1.101.125", "http://quickgpt.gibl.com.np"]
+    allow_origins: ["*"]
    allow_methods: ["DELETE", "GET", "POST", "PUT", "OPTIONS", "PATCH"]
    allow_headers: ["*"]
  auth:
@ -44,9 +44,9 @@ llm:
  tokenizer: mistralai/Mistral-7B-Instruct-v0.2
  
 llamacpp:
-  prompt_style: "default"
-  llm_hf_repo_id: bartowski/Meta-Llama-3-8B-Instruct-GGUF
-  llm_hf_model_file: Meta-Llama-3-8B-Instruct-Q6_K.gguf
+  prompt_style: "chatml"
+  llm_hf_repo_id: TheBloke/OpenHermes-2.5-Mistral-7B-GGUF
+  llm_hf_model_file: openhermes-2.5-mistral-7b.Q5_K_M.gguf
  tfs_z: 1.0            # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
  top_k: 40             # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
  top_p: 0.9            # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)