diff --git a/.env b/.env index 56559ddd..d36234af 100644 --- a/.env +++ b/.env @@ -11,8 +11,10 @@ SUPER_ADMIN_EMAIL=superadmin@email.com SUPER_ADMIN_PASSWORD=supersecretpassword SUPER_ADMIN_ACCOUNT_NAME=superaccount -SECRET_KEY=ba9dc3f976cf8fb40519dcd152a8d7d21c0b7861d841711cdb2602be8e85fd7c -ACCESS_TOKEN_EXPIRE_MINUTES=720 +SECRET_KEY=ba9d23f976cf8fb40519dcd152a8d7d21c0b7861d841711cdb2602be8e85fd7c +REFRESH_KEY=ba9dc3f976cf8fb12312dve22a8d7d21c0b7861d841711cdb2602be8e85fdde + +ACCESS_TOKEN_EXPIRE_MINUTES=240 REFRESH_TOKEN_EXPIRE_MINUTES=1400 SMTP_SERVER=mail.gibl.com.np @@ -22,4 +24,5 @@ SMTP_USERNAME=noreply@gibl.com.np SMTP_PASSWORD=*G15y^N0reP!y LDAP_SERVER=ldap://192.168.101.111 -LDAP_ENABLE=False \ No newline at end of file +LDAP_ENABLE=False +ENABLE_MAKER_CHECKER=False \ No newline at end of file diff --git a/Dockerfile.local b/Dockerfile.local index 93f4ae1a..8775ada7 100644 --- a/Dockerfile.local +++ b/Dockerfile.local @@ -10,11 +10,7 @@ RUN apt-get update && apt-get install -y \ ninja-build \ build-essential \ pkg-config \ - wget \ - make \ - cmake \ - g++ \ - gcc + wget # Switch to the Python image for the final build stage FROM python:3.11.6-slim-bookworm as base @@ -50,11 +46,12 @@ RUN poetry run python scripts/setup FROM base as app ENV PYTHONUNBUFFERED=1 -ENV PORT=8080 -EXPOSE 8080 +ENV PORT=80 +EXPOSE 80 # Prepare a non-root user -RUN adduser --system worker +RUN adduser --group worker +RUN adduser --system --ingroup worker worker WORKDIR /home/worker/app RUN mkdir local_data; chown worker local_data @@ -68,4 +65,4 @@ COPY --chown=worker scripts/ scripts ENV PYTHONPATH="$PYTHONPATH:/private_gpt/" USER worker -ENTRYPOINT python -m private_gpt +ENTRYPOINT uvicorn private_gpt.main:app --host 0.0.0.0 --port 80 diff --git a/docker-compose.yaml b/docker-compose.yaml index 56a79d72..563655f4 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -6,11 +6,12 @@ services: - ./local_data/:/home/worker/app/local_data - ./models/:/home/worker/app/models ports: - - 8000:8000 + - 80:80 environment: - PORT: 8000 + PORT: 80 PGPT_PROFILES: docker PGPT_MODE: local env_file: - .env + diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py index 9a3b25ad..24aab6db 100644 --- a/private_gpt/components/llm/llm_component.py +++ b/private_gpt/components/llm/llm_component.py @@ -11,7 +11,6 @@ from private_gpt.paths import models_cache_path, models_path from private_gpt.settings.settings import Settings logger = logging.getLogger(__name__) -local_path = models_path / "tokenizer/Mistral-7B-Instruct-v0.1" @singleton class LLMComponent: llm: LLM @@ -20,11 +19,22 @@ class LLMComponent: def __init__(self, settings: Settings) -> None: llm_mode = settings.llm.mode if settings.llm.tokenizer: - set_global_tokenizer( - AutoTokenizer.from_pretrained( - local_path + try: + set_global_tokenizer( + AutoTokenizer.from_pretrained( + pretrained_model_name_or_path=settings.llm.tokenizer, + cache_dir=str(models_cache_path), + token=settings.huggingface.access_token, + ) ) - ) + except Exception as e: + logger.warning( + "Failed to download tokenizer %s. Falling back to " + "default tokenizer.", + settings.llm.tokenizer, + e, + ) + logger.info("Initializing the LLM in mode=%s", llm_mode) match settings.llm.mode: diff --git a/private_gpt/server/chat/chat_service.py b/private_gpt/server/chat/chat_service.py index fa5fe94b..2a97a5c2 100644 --- a/private_gpt/server/chat/chat_service.py +++ b/private_gpt/server/chat/chat_service.py @@ -166,7 +166,8 @@ class ChatService: ) system_prompt = ( """ - You should answer questions only in English or Nepali. + You are a helpful assistant that should answer questions + only in English or Nepali. Responses should be based on the context documents provided and should be relevant, informative, and easy to understand. You should aim to deliver high-quality responses that are diff --git a/private_gpt/server/chunks/chunks_router.py b/private_gpt/server/chunks/chunks_router.py index ff9a9d5f..4a658f00 100644 --- a/private_gpt/server/chunks/chunks_router.py +++ b/private_gpt/server/chunks/chunks_router.py @@ -70,7 +70,6 @@ async def chunks_retrieval( action='Chat', details={ "query": body.text, - 'user': current_user.username, }, user_id=current_user.id ) diff --git a/private_gpt/server/ingest/ingest_router.py b/private_gpt/server/ingest/ingest_router.py index 6074a949..df14fe75 100644 --- a/private_gpt/server/ingest/ingest_router.py +++ b/private_gpt/server/ingest/ingest_router.py @@ -208,7 +208,6 @@ async def create_documents( status=MakerCheckerStatus.PENDING, doc_type_id=departments.doc_type_id, ) - print("DOCUMENT CREATE: ", docs_in) document = crud.documents.create(db=db, obj_in=docs_in) department_ids = department_ids if department_ids else "1" department_ids = [int(number) for number in department_ids.split(",")] diff --git a/private_gpt/users/api/v1/routers/auth.py b/private_gpt/users/api/v1/routers/auth.py index 8b91cff8..1ee87760 100644 --- a/private_gpt/users/api/v1/routers/auth.py +++ b/private_gpt/users/api/v1/routers/auth.py @@ -43,12 +43,12 @@ def register_user( department_id=department.id, checker= True if role == 'OPERATOR' else False ) - # try: - # send_registration_email(fullname, email, password) - # except Exception as e: - # logging.info(f"Failed to send registration email: {str(e)}") - # raise HTTPException( - # status_code=500, detail=f"Failed to send registration email.") + try: + send_registration_email(fullname, email, password) + except Exception as e: + logging.info(f"Failed to send registration email: {str(e)}") + raise HTTPException( + status_code=500, detail=f"Failed to send registration email.") return crud.user.create(db, obj_in=user_in) diff --git a/private_gpt/users/api/v1/routers/documents.py b/private_gpt/users/api/v1/routers/documents.py index 2242996e..a828d19a 100644 --- a/private_gpt/users/api/v1/routers/documents.py +++ b/private_gpt/users/api/v1/routers/documents.py @@ -7,13 +7,13 @@ from datetime import datetime from typing import Any, List from sqlalchemy.orm import Session -from sqlalchemy import select from fastapi_pagination import Page, paginate from fastapi import APIRouter, Depends, HTTPException, status, Security, Request, File, UploadFile from private_gpt.users.api import deps from private_gpt.constants import UNCHECKED_DIR from private_gpt.users.constants.role import Role +from private_gpt.users.core.config import settings from private_gpt.users import crud, models, schemas from private_gpt.server.ingest.ingest_router import create_documents, ingest from private_gpt.users.models.document import MakerCheckerActionType, MakerCheckerStatus @@ -23,7 +23,7 @@ logger = logging.getLogger(__name__) router = APIRouter(prefix='/documents', tags=['Documents']) -ENABLE_MAKER_CHECKER = False +ENABLE_MAKER_CHECKER = settings.ENABLE_MAKER_CHECKER def get_username(db, id): user = crud.user.get_by_id(db=db, id=id) @@ -259,7 +259,6 @@ def update_department( status_code=500, detail="Internal Server Error.", ) - @router.post('/upload', response_model=schemas.Document) async def upload_documents( @@ -384,12 +383,12 @@ async def verify_documents( user_id=current_user.id ) - if document.doc_type_id == 2: + if document.doc_type_id == 2: # For OCR return await process_ocr(request, unchecked_path) - elif document.doc_type_id == 3: + elif document.doc_type_id == 3: # For BOTH return await process_both_ocr(request, unchecked_path) else: - return await ingest(request, unchecked_path) + return await ingest(request, unchecked_path) # For pdf elif checker_in.status == MakerCheckerStatus.REJECTED.value: diff --git a/private_gpt/users/core/config.py b/private_gpt/users/core/config.py index dca6f9b9..c2854130 100644 --- a/private_gpt/users/core/config.py +++ b/private_gpt/users/core/config.py @@ -3,9 +3,11 @@ from functools import lru_cache from pydantic_settings import BaseSettings class Settings(BaseSettings): - PROJECT_NAME: str = "AUTHENTICATION AND AUTHORIZATION" + PROJECT_NAME: str = "QuickGPT" API_V1_STR: str = "/v1" SECRET_KEY: str + REFRESH_KEY: str + ACCESS_TOKEN_EXPIRE_MINUTES: int REFRESH_TOKEN_EXPIRE_MINUTES: int @@ -30,7 +32,8 @@ class Settings(BaseSettings): LDAP_SERVER: str LDAP_ENABLE: bool - + ENABLE_MAKER_CHECKER: bool + class Config: case_sensitive = True env_file = ".env" diff --git a/private_gpt/users/core/security.py b/private_gpt/users/core/security.py index bfa2e0e4..f3ec97f8 100644 --- a/private_gpt/users/core/security.py +++ b/private_gpt/users/core/security.py @@ -1,4 +1,3 @@ -import os import random import string from datetime import datetime, timedelta @@ -6,14 +5,13 @@ from typing import Dict, Any, Optional, Union from jose import JWTError, jwt from passlib.context import CryptContext +from private_gpt.users.core.config import settings -ACCESS_TOKEN_EXPIRE_MINUTES = 60 * 24 * 1 # 12 hrs -REFRESH_TOKEN_EXPIRE_MINUTES = 60 * 24 * 7 # 7 days +ACCESS_TOKEN_EXPIRE_MINUTES = 60 * 24 * 1 # 12 hrs # Default Value +REFRESH_TOKEN_EXPIRE_MINUTES = 60 * 24 * 7 # 7 days # Default Value ALGORITHM = "HS256" -# JWT_SECRET_KEY = os.environ['JWT_SECRET_KEY'] # should be kept secret -# JWT_REFRESH_SECRET_KEY = os.environ['JWT_REFRESH_SECRET_KEY'] # should be kept secret -JWT_SECRET_KEY = "QUICKGPT" -JWT_REFRESH_SECRET_KEY = "QUICKGPT_REFRESH" +JWT_SECRET_KEY = settings.SECRET_KEY +JWT_REFRESH_SECRET_KEY = settings.REFRESH_KEY pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto") diff --git a/scripts/setup b/scripts/setup index cbf49577..edba1049 100755 --- a/scripts/setup +++ b/scripts/setup @@ -40,10 +40,10 @@ print("LLM model downloaded!") # Download Tokenizer print(f"Downloading tokenizer {settings().llm.tokenizer}") -local_path = models_path / "tokenizer/Mistral-7B-Instruct-v0.1" AutoTokenizer.from_pretrained( - local_path + pretrained_model_name_or_path=settings().llm.tokenizer, + cache_dir=models_cache_path, ) print("Tokenizer downloaded!") -print("Setup done") \ No newline at end of file +print("Setup done") diff --git a/settings-docker.yaml b/settings-docker.yaml index d71c4070..cbd352f8 100644 --- a/settings-docker.yaml +++ b/settings-docker.yaml @@ -9,16 +9,28 @@ embedding: mode: ${PGPT_MODE:sagemaker} llamacpp: - llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.1-GGUF} - llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.1.Q4_K_M.gguf} + llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/OpenHermes-2.5-Mistral-7B-GGUF} + llm_hf_model_file: ${PGPT_HF_MODEL_FILE:openhermes-2.5-mistral-7b.Q5_K_M.gguf} huggingface: - embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:BAAI/bge-small-en-v1.5} + embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:mixedbread-ai/mxbai-embed-large-v1} sagemaker: llm_endpoint_name: ${PGPT_SAGEMAKER_LLM_ENDPOINT_NAME:} embedding_endpoint_name: ${PGPT_SAGEMAKER_EMBEDDING_ENDPOINT_NAME:} +ollama: + llm_model: ${PGPT_OLLAMA_LLM_MODEL:mistral} + embedding_model: ${PGPT_OLLAMA_EMBEDDING_MODEL:nomic-embed-text} + api_base: ${PGPT_OLLAMA_API_BASE:http://ollama:11434} + embedding_api_base: ${PGPT_OLLAMA_EMBEDDING_API_BASE:http://ollama:11434} + tfs_z: ${PGPT_OLLAMA_TFS_Z:1.0} + top_k: ${PGPT_OLLAMA_TOP_K:40} + top_p: ${PGPT_OLLAMA_TOP_P:0.9} + repeat_last_n: ${PGPT_OLLAMA_REPEAT_LAST_N:64} + repeat_penalty: ${PGPT_OLLAMA_REPEAT_PENALTY:1.2} + request_timeout: ${PGPT_OLLAMA_REQUEST_TIMEOUT:600.0} + ui: enabled: true path: / diff --git a/settings-local.yaml b/settings-local.yaml index 102eee4c..ed6c522b 100644 --- a/settings-local.yaml +++ b/settings-local.yaml @@ -25,7 +25,7 @@ embedding: ingest_mode: simple huggingface: - embedding_hf_model_name: BAAI/bge-large-en-v1.5 + embedding_hf_model_name: mixedbread-ai/mxbai-embed-large-v1 vectorstore: database: qdrant diff --git a/settings.yaml b/settings.yaml index 5ce50b55..89aa4bcd 100644 --- a/settings.yaml +++ b/settings.yaml @@ -7,7 +7,7 @@ server: cors: enabled: true allow_credentials: true - allow_origins: ["http://localhost:80", "http://10.1.101.125", "http://quickgpt.gibl.com.np"] + allow_origins: ["*"] allow_methods: ["DELETE", "GET", "POST", "PUT", "OPTIONS", "PATCH"] allow_headers: ["*"] auth: @@ -44,9 +44,9 @@ llm: tokenizer: mistralai/Mistral-7B-Instruct-v0.2 llamacpp: - prompt_style: "default" - llm_hf_repo_id: bartowski/Meta-Llama-3-8B-Instruct-GGUF - llm_hf_model_file: Meta-Llama-3-8B-Instruct-Q6_K.gguf + prompt_style: "chatml" + llm_hf_repo_id: TheBloke/OpenHermes-2.5-Mistral-7B-GGUF + llm_hf_model_file: openhermes-2.5-mistral-7b.Q5_K_M.gguf tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) top_p: 0.9 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)