Added new docker files

2025-09-06 09:41:31 +00:00 · 2024-04-28 11:25:38 +05:45
parent 1d6fc7144a
commit c7aac53cd9
15 changed files with 76 additions and 54 deletions
--- a/.env
+++ b/.env
@@ -11,8 +11,10 @@ SUPER_ADMIN_EMAIL=superadmin@email.com
 SUPER_ADMIN_PASSWORD=supersecretpassword
 SUPER_ADMIN_ACCOUNT_NAME=superaccount
-SECRET_KEY=ba9dc3f976cf8fb40519dcd152a8d7d21c0b7861d841711cdb2602be8e85fd7c
+SECRET_KEY=ba9d23f976cf8fb40519dcd152a8d7d21c0b7861d841711cdb2602be8e85fd7c
-ACCESS_TOKEN_EXPIRE_MINUTES=720                
+REFRESH_KEY=ba9dc3f976cf8fb12312dve22a8d7d21c0b7861d841711cdb2602be8e85fdde
 ACCESS_TOKEN_EXPIRE_MINUTES=240                
 REFRESH_TOKEN_EXPIRE_MINUTES=1400 
 SMTP_SERVER=mail.gibl.com.np
@@ -22,4 +24,5 @@ SMTP_USERNAME=noreply@gibl.com.np
 SMTP_PASSWORD=*G15y^N0reP!y
 LDAP_SERVER=ldap://192.168.101.111
-LDAP_ENABLE=False
+LDAP_ENABLE=False
 ENABLE_MAKER_CHECKER=False
--- a/Dockerfile.local
+++ b/Dockerfile.local
@@ -10,11 +10,7 @@ RUN apt-get update && apt-get install -y \
    ninja-build \
    build-essential \
    pkg-config \
-    wget \
+    wget 
    make \
    cmake \ 
    g++ \ 
    gcc 
 # Switch to the Python image for the final build stage
 FROM python:3.11.6-slim-bookworm as base
@@ -50,11 +46,12 @@ RUN poetry run python scripts/setup
 FROM base as app
 ENV PYTHONUNBUFFERED=1
-ENV PORT=8080
+ENV PORT=80
-EXPOSE 8080
+EXPOSE 80
 # Prepare a non-root user
-RUN adduser --system worker
+RUN adduser --group worker
 RUN adduser --system --ingroup worker worker
 WORKDIR /home/worker/app
 RUN mkdir local_data; chown worker local_data
@@ -68,4 +65,4 @@ COPY --chown=worker scripts/ scripts
 ENV PYTHONPATH="$PYTHONPATH:/private_gpt/"
 USER worker
-ENTRYPOINT python -m private_gpt
+ENTRYPOINT uvicorn private_gpt.main:app --host 0.0.0.0 --port 80
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -6,11 +6,12 @@ services:
      - ./local_data/:/home/worker/app/local_data
      - ./models/:/home/worker/app/models
    ports:
-      - 8000:8000
+      - 80:80
    environment:
-      PORT: 8000
+      PORT: 80
      PGPT_PROFILES: docker
      PGPT_MODE: local
    env_file:
      - .env
--- a/private_gpt/components/llm/llm_component.py
+++ b/private_gpt/components/llm/llm_component.py
@@ -11,7 +11,6 @@ from private_gpt.paths import models_cache_path, models_path
 from private_gpt.settings.settings import Settings
 logger = logging.getLogger(__name__)
 local_path = models_path / "tokenizer/Mistral-7B-Instruct-v0.1"
@singleton
 class LLMComponent:
    llm: LLM
@@ -20,11 +19,22 @@ class LLMComponent:
    def __init__(self, settings: Settings) -> None:
        llm_mode = settings.llm.mode
        if settings.llm.tokenizer:
-            set_global_tokenizer(
+            try:
-                AutoTokenizer.from_pretrained(
+                set_global_tokenizer(
-                    local_path
+                    AutoTokenizer.from_pretrained(
                        pretrained_model_name_or_path=settings.llm.tokenizer,
                        cache_dir=str(models_cache_path),
                        token=settings.huggingface.access_token,
                    )
                )
-            )
+            except Exception as e:
                logger.warning(
                    "Failed to download tokenizer %s. Falling back to "
                    "default tokenizer.",
                    settings.llm.tokenizer,
                    e,
                )
        logger.info("Initializing the LLM in mode=%s", llm_mode)
        match settings.llm.mode:
--- a/private_gpt/server/chat/chat_service.py
+++ b/private_gpt/server/chat/chat_service.py
@@ -166,7 +166,8 @@ class ChatService:
        )
        system_prompt = (
            """
-            You should answer questions only in English or Nepali. 
+            You are a helpful assistant that should answer questions 
            only in English or Nepali. 
            Responses should be based on the context documents provided 
            and should be relevant, informative, and easy to understand. 
            You should aim to deliver high-quality responses that are 
--- a/private_gpt/server/chunks/chunks_router.py
+++ b/private_gpt/server/chunks/chunks_router.py
@@ -70,7 +70,6 @@ async def chunks_retrieval(
        action='Chat',
        details={
            "query": body.text,
            'user': current_user.username,
            }, 
        user_id=current_user.id
    )
--- a/private_gpt/server/ingest/ingest_router.py
+++ b/private_gpt/server/ingest/ingest_router.py
@@ -208,7 +208,6 @@ async def create_documents(
        status=MakerCheckerStatus.PENDING,
        doc_type_id=departments.doc_type_id,
    )
    print("DOCUMENT CREATE: ", docs_in)
    document = crud.documents.create(db=db, obj_in=docs_in)
    department_ids = department_ids if department_ids else "1"
    department_ids = [int(number) for number in department_ids.split(",")]
--- a/private_gpt/users/api/v1/routers/auth.py
+++ b/private_gpt/users/api/v1/routers/auth.py
@@ -43,12 +43,12 @@ def register_user(
            department_id=department.id,
            checker= True if role == 'OPERATOR' else False
        )    
-    # try:
+    try:
-    #     send_registration_email(fullname, email, password)
+        send_registration_email(fullname, email, password)
-    # except Exception as e:
+    except Exception as e:
-    #     logging.info(f"Failed to send registration email: {str(e)}")
+        logging.info(f"Failed to send registration email: {str(e)}")
-    #     raise HTTPException(
+        raise HTTPException(
-    #         status_code=500, detail=f"Failed to send registration email.")
+            status_code=500, detail=f"Failed to send registration email.")
    return crud.user.create(db, obj_in=user_in)
--- a/private_gpt/users/api/v1/routers/documents.py
+++ b/private_gpt/users/api/v1/routers/documents.py
@@ -7,13 +7,13 @@ from datetime import datetime
 from typing import Any, List
 from sqlalchemy.orm import Session
 from sqlalchemy import select
 from fastapi_pagination import Page, paginate
 from fastapi import APIRouter, Depends, HTTPException, status, Security, Request, File, UploadFile
 from private_gpt.users.api import deps
 from private_gpt.constants import UNCHECKED_DIR
 from private_gpt.users.constants.role import Role
 from private_gpt.users.core.config import settings
 from private_gpt.users import crud, models, schemas
 from private_gpt.server.ingest.ingest_router import create_documents, ingest
 from private_gpt.users.models.document import MakerCheckerActionType, MakerCheckerStatus
@@ -23,7 +23,7 @@ logger = logging.getLogger(__name__)
 router = APIRouter(prefix='/documents', tags=['Documents'])
-ENABLE_MAKER_CHECKER = False
+ENABLE_MAKER_CHECKER = settings.ENABLE_MAKER_CHECKER
 def get_username(db, id):
    user = crud.user.get_by_id(db=db, id=id)
@@ -259,7 +259,6 @@ def update_department(
            status_code=500,
            detail="Internal Server Error.",
        )
@router.post('/upload', response_model=schemas.Document)
 async def upload_documents(
@@ -384,12 +383,12 @@ async def verify_documents(
                user_id=current_user.id
            )
-            if document.doc_type_id == 2:
+            if document.doc_type_id == 2:  # For OCR
                return await process_ocr(request, unchecked_path)
-            elif document.doc_type_id == 3:
+            elif document.doc_type_id == 3: # For BOTH
                return await process_both_ocr(request, unchecked_path)
            else:
-                return await ingest(request, unchecked_path)
+                return await ingest(request, unchecked_path) # For pdf
        elif checker_in.status == MakerCheckerStatus.REJECTED.value:
--- a/private_gpt/users/core/config.py
+++ b/private_gpt/users/core/config.py
@@ -3,9 +3,11 @@ from functools import lru_cache
 from pydantic_settings import BaseSettings
 class Settings(BaseSettings):
-    PROJECT_NAME: str = "AUTHENTICATION AND AUTHORIZATION"
+    PROJECT_NAME: str = "QuickGPT"
    API_V1_STR: str = "/v1"
    SECRET_KEY: str
    REFRESH_KEY: str
    ACCESS_TOKEN_EXPIRE_MINUTES: int
    REFRESH_TOKEN_EXPIRE_MINUTES: int
@@ -30,7 +32,8 @@ class Settings(BaseSettings):
    LDAP_SERVER: str
    LDAP_ENABLE: bool
-
+    ENABLE_MAKER_CHECKER: bool
    class Config:
        case_sensitive = True
        env_file = ".env"
--- a/private_gpt/users/core/security.py
+++ b/private_gpt/users/core/security.py
@@ -1,4 +1,3 @@
 import os
 import random
 import string
 from datetime import datetime, timedelta
@@ -6,14 +5,13 @@ from typing import Dict, Any, Optional, Union
 from jose import JWTError, jwt
 from passlib.context import CryptContext
 from private_gpt.users.core.config import settings
-ACCESS_TOKEN_EXPIRE_MINUTES = 60 * 24 * 1   # 12 hrs
+ACCESS_TOKEN_EXPIRE_MINUTES = 60 * 24 * 1   # 12 hrs # Default Value
-REFRESH_TOKEN_EXPIRE_MINUTES = 60 * 24 * 7 # 7 days
+REFRESH_TOKEN_EXPIRE_MINUTES = 60 * 24 * 7 # 7 days  # Default Value
 ALGORITHM = "HS256"
-# JWT_SECRET_KEY = os.environ['JWT_SECRET_KEY']     # should be kept secret
+JWT_SECRET_KEY = settings.SECRET_KEY
-# JWT_REFRESH_SECRET_KEY = os.environ['JWT_REFRESH_SECRET_KEY']      # should be kept secret
+JWT_REFRESH_SECRET_KEY = settings.REFRESH_KEY
 JWT_SECRET_KEY = "QUICKGPT"
 JWT_REFRESH_SECRET_KEY = "QUICKGPT_REFRESH"
 pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
--- a/scripts/setup
+++ b/scripts/setup
@@ -40,10 +40,10 @@ print("LLM model downloaded!")
 # Download Tokenizer
 print(f"Downloading tokenizer {settings().llm.tokenizer}")
 local_path = models_path / "tokenizer/Mistral-7B-Instruct-v0.1"
 AutoTokenizer.from_pretrained(
-    local_path
+    pretrained_model_name_or_path=settings().llm.tokenizer,
    cache_dir=models_cache_path,
 )
 print("Tokenizer downloaded!")
-print("Setup done")
+print("Setup done")
--- a/settings-docker.yaml
+++ b/settings-docker.yaml
@@ -9,16 +9,28 @@ embedding:
  mode: ${PGPT_MODE:sagemaker}
 llamacpp:
-  llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.1-GGUF}
+  llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/OpenHermes-2.5-Mistral-7B-GGUF}
-  llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.1.Q4_K_M.gguf}
+  llm_hf_model_file: ${PGPT_HF_MODEL_FILE:openhermes-2.5-mistral-7b.Q5_K_M.gguf}
 huggingface:
-  embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:BAAI/bge-small-en-v1.5}
+  embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:mixedbread-ai/mxbai-embed-large-v1}
 sagemaker:
  llm_endpoint_name: ${PGPT_SAGEMAKER_LLM_ENDPOINT_NAME:}
  embedding_endpoint_name: ${PGPT_SAGEMAKER_EMBEDDING_ENDPOINT_NAME:}
 ollama:
  llm_model: ${PGPT_OLLAMA_LLM_MODEL:mistral}
  embedding_model: ${PGPT_OLLAMA_EMBEDDING_MODEL:nomic-embed-text}
  api_base: ${PGPT_OLLAMA_API_BASE:http://ollama:11434}
  embedding_api_base: ${PGPT_OLLAMA_EMBEDDING_API_BASE:http://ollama:11434}
  tfs_z: ${PGPT_OLLAMA_TFS_Z:1.0}
  top_k: ${PGPT_OLLAMA_TOP_K:40}
  top_p: ${PGPT_OLLAMA_TOP_P:0.9}
  repeat_last_n: ${PGPT_OLLAMA_REPEAT_LAST_N:64}
  repeat_penalty: ${PGPT_OLLAMA_REPEAT_PENALTY:1.2}
  request_timeout: ${PGPT_OLLAMA_REQUEST_TIMEOUT:600.0}
 ui:
  enabled: true
  path: /
--- a/settings-local.yaml
+++ b/settings-local.yaml
@@ -25,7 +25,7 @@ embedding:
  ingest_mode: simple
 huggingface:
-  embedding_hf_model_name: BAAI/bge-large-en-v1.5
+  embedding_hf_model_name: mixedbread-ai/mxbai-embed-large-v1
 vectorstore:
  database: qdrant
--- a/settings.yaml
+++ b/settings.yaml
@@ -7,7 +7,7 @@ server:
  cors:
    enabled: true
    allow_credentials: true
-    allow_origins: ["http://localhost:80", "http://10.1.101.125", "http://quickgpt.gibl.com.np"]
+    allow_origins: ["*"]
    allow_methods: ["DELETE", "GET", "POST", "PUT", "OPTIONS", "PATCH"]
    allow_headers: ["*"]
  auth:
@@ -44,9 +44,9 @@ llm:
  tokenizer: mistralai/Mistral-7B-Instruct-v0.2
 llamacpp:
-  prompt_style: "default"
+  prompt_style: "chatml"
-  llm_hf_repo_id: bartowski/Meta-Llama-3-8B-Instruct-GGUF
+  llm_hf_repo_id: TheBloke/OpenHermes-2.5-Mistral-7B-GGUF
-  llm_hf_model_file: Meta-Llama-3-8B-Instruct-Q6_K.gguf
+  llm_hf_model_file: openhermes-2.5-mistral-7b.Q5_K_M.gguf
  tfs_z: 1.0            # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
  top_k: 40             # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
  top_p: 0.9            # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)