Added new docker files

This commit is contained in:
Saurab-Shrestha 2024-04-28 11:25:38 +05:45
parent 1d6fc7144a
commit c7aac53cd9
15 changed files with 76 additions and 54 deletions

9
.env
View File

@ -11,8 +11,10 @@ SUPER_ADMIN_EMAIL=superadmin@email.com
SUPER_ADMIN_PASSWORD=supersecretpassword SUPER_ADMIN_PASSWORD=supersecretpassword
SUPER_ADMIN_ACCOUNT_NAME=superaccount SUPER_ADMIN_ACCOUNT_NAME=superaccount
SECRET_KEY=ba9dc3f976cf8fb40519dcd152a8d7d21c0b7861d841711cdb2602be8e85fd7c SECRET_KEY=ba9d23f976cf8fb40519dcd152a8d7d21c0b7861d841711cdb2602be8e85fd7c
ACCESS_TOKEN_EXPIRE_MINUTES=720 REFRESH_KEY=ba9dc3f976cf8fb12312dve22a8d7d21c0b7861d841711cdb2602be8e85fdde
ACCESS_TOKEN_EXPIRE_MINUTES=240
REFRESH_TOKEN_EXPIRE_MINUTES=1400 REFRESH_TOKEN_EXPIRE_MINUTES=1400
SMTP_SERVER=mail.gibl.com.np SMTP_SERVER=mail.gibl.com.np
@ -22,4 +24,5 @@ SMTP_USERNAME=noreply@gibl.com.np
SMTP_PASSWORD=*G15y^N0reP!y SMTP_PASSWORD=*G15y^N0reP!y
LDAP_SERVER=ldap://192.168.101.111 LDAP_SERVER=ldap://192.168.101.111
LDAP_ENABLE=False LDAP_ENABLE=False
ENABLE_MAKER_CHECKER=False

View File

@ -10,11 +10,7 @@ RUN apt-get update && apt-get install -y \
ninja-build \ ninja-build \
build-essential \ build-essential \
pkg-config \ pkg-config \
wget \ wget
make \
cmake \
g++ \
gcc
# Switch to the Python image for the final build stage # Switch to the Python image for the final build stage
FROM python:3.11.6-slim-bookworm as base FROM python:3.11.6-slim-bookworm as base
@ -50,11 +46,12 @@ RUN poetry run python scripts/setup
FROM base as app FROM base as app
ENV PYTHONUNBUFFERED=1 ENV PYTHONUNBUFFERED=1
ENV PORT=8080 ENV PORT=80
EXPOSE 8080 EXPOSE 80
# Prepare a non-root user # Prepare a non-root user
RUN adduser --system worker RUN adduser --group worker
RUN adduser --system --ingroup worker worker
WORKDIR /home/worker/app WORKDIR /home/worker/app
RUN mkdir local_data; chown worker local_data RUN mkdir local_data; chown worker local_data
@ -68,4 +65,4 @@ COPY --chown=worker scripts/ scripts
ENV PYTHONPATH="$PYTHONPATH:/private_gpt/" ENV PYTHONPATH="$PYTHONPATH:/private_gpt/"
USER worker USER worker
ENTRYPOINT python -m private_gpt ENTRYPOINT uvicorn private_gpt.main:app --host 0.0.0.0 --port 80

View File

@ -6,11 +6,12 @@ services:
- ./local_data/:/home/worker/app/local_data - ./local_data/:/home/worker/app/local_data
- ./models/:/home/worker/app/models - ./models/:/home/worker/app/models
ports: ports:
- 8000:8000 - 80:80
environment: environment:
PORT: 8000 PORT: 80
PGPT_PROFILES: docker PGPT_PROFILES: docker
PGPT_MODE: local PGPT_MODE: local
env_file: env_file:
- .env - .env

View File

@ -11,7 +11,6 @@ from private_gpt.paths import models_cache_path, models_path
from private_gpt.settings.settings import Settings from private_gpt.settings.settings import Settings
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
local_path = models_path / "tokenizer/Mistral-7B-Instruct-v0.1"
@singleton @singleton
class LLMComponent: class LLMComponent:
llm: LLM llm: LLM
@ -20,11 +19,22 @@ class LLMComponent:
def __init__(self, settings: Settings) -> None: def __init__(self, settings: Settings) -> None:
llm_mode = settings.llm.mode llm_mode = settings.llm.mode
if settings.llm.tokenizer: if settings.llm.tokenizer:
set_global_tokenizer( try:
AutoTokenizer.from_pretrained( set_global_tokenizer(
local_path AutoTokenizer.from_pretrained(
pretrained_model_name_or_path=settings.llm.tokenizer,
cache_dir=str(models_cache_path),
token=settings.huggingface.access_token,
)
) )
) except Exception as e:
logger.warning(
"Failed to download tokenizer %s. Falling back to "
"default tokenizer.",
settings.llm.tokenizer,
e,
)
logger.info("Initializing the LLM in mode=%s", llm_mode) logger.info("Initializing the LLM in mode=%s", llm_mode)
match settings.llm.mode: match settings.llm.mode:

View File

@ -166,7 +166,8 @@ class ChatService:
) )
system_prompt = ( system_prompt = (
""" """
You should answer questions only in English or Nepali. You are a helpful assistant that should answer questions
only in English or Nepali.
Responses should be based on the context documents provided Responses should be based on the context documents provided
and should be relevant, informative, and easy to understand. and should be relevant, informative, and easy to understand.
You should aim to deliver high-quality responses that are You should aim to deliver high-quality responses that are

View File

@ -70,7 +70,6 @@ async def chunks_retrieval(
action='Chat', action='Chat',
details={ details={
"query": body.text, "query": body.text,
'user': current_user.username,
}, },
user_id=current_user.id user_id=current_user.id
) )

View File

@ -208,7 +208,6 @@ async def create_documents(
status=MakerCheckerStatus.PENDING, status=MakerCheckerStatus.PENDING,
doc_type_id=departments.doc_type_id, doc_type_id=departments.doc_type_id,
) )
print("DOCUMENT CREATE: ", docs_in)
document = crud.documents.create(db=db, obj_in=docs_in) document = crud.documents.create(db=db, obj_in=docs_in)
department_ids = department_ids if department_ids else "1" department_ids = department_ids if department_ids else "1"
department_ids = [int(number) for number in department_ids.split(",")] department_ids = [int(number) for number in department_ids.split(",")]

View File

@ -43,12 +43,12 @@ def register_user(
department_id=department.id, department_id=department.id,
checker= True if role == 'OPERATOR' else False checker= True if role == 'OPERATOR' else False
) )
# try: try:
# send_registration_email(fullname, email, password) send_registration_email(fullname, email, password)
# except Exception as e: except Exception as e:
# logging.info(f"Failed to send registration email: {str(e)}") logging.info(f"Failed to send registration email: {str(e)}")
# raise HTTPException( raise HTTPException(
# status_code=500, detail=f"Failed to send registration email.") status_code=500, detail=f"Failed to send registration email.")
return crud.user.create(db, obj_in=user_in) return crud.user.create(db, obj_in=user_in)

View File

@ -7,13 +7,13 @@ from datetime import datetime
from typing import Any, List from typing import Any, List
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from sqlalchemy import select
from fastapi_pagination import Page, paginate from fastapi_pagination import Page, paginate
from fastapi import APIRouter, Depends, HTTPException, status, Security, Request, File, UploadFile from fastapi import APIRouter, Depends, HTTPException, status, Security, Request, File, UploadFile
from private_gpt.users.api import deps from private_gpt.users.api import deps
from private_gpt.constants import UNCHECKED_DIR from private_gpt.constants import UNCHECKED_DIR
from private_gpt.users.constants.role import Role from private_gpt.users.constants.role import Role
from private_gpt.users.core.config import settings
from private_gpt.users import crud, models, schemas from private_gpt.users import crud, models, schemas
from private_gpt.server.ingest.ingest_router import create_documents, ingest from private_gpt.server.ingest.ingest_router import create_documents, ingest
from private_gpt.users.models.document import MakerCheckerActionType, MakerCheckerStatus from private_gpt.users.models.document import MakerCheckerActionType, MakerCheckerStatus
@ -23,7 +23,7 @@ logger = logging.getLogger(__name__)
router = APIRouter(prefix='/documents', tags=['Documents']) router = APIRouter(prefix='/documents', tags=['Documents'])
ENABLE_MAKER_CHECKER = False ENABLE_MAKER_CHECKER = settings.ENABLE_MAKER_CHECKER
def get_username(db, id): def get_username(db, id):
user = crud.user.get_by_id(db=db, id=id) user = crud.user.get_by_id(db=db, id=id)
@ -259,7 +259,6 @@ def update_department(
status_code=500, status_code=500,
detail="Internal Server Error.", detail="Internal Server Error.",
) )
@router.post('/upload', response_model=schemas.Document) @router.post('/upload', response_model=schemas.Document)
async def upload_documents( async def upload_documents(
@ -384,12 +383,12 @@ async def verify_documents(
user_id=current_user.id user_id=current_user.id
) )
if document.doc_type_id == 2: if document.doc_type_id == 2: # For OCR
return await process_ocr(request, unchecked_path) return await process_ocr(request, unchecked_path)
elif document.doc_type_id == 3: elif document.doc_type_id == 3: # For BOTH
return await process_both_ocr(request, unchecked_path) return await process_both_ocr(request, unchecked_path)
else: else:
return await ingest(request, unchecked_path) return await ingest(request, unchecked_path) # For pdf
elif checker_in.status == MakerCheckerStatus.REJECTED.value: elif checker_in.status == MakerCheckerStatus.REJECTED.value:

View File

@ -3,9 +3,11 @@ from functools import lru_cache
from pydantic_settings import BaseSettings from pydantic_settings import BaseSettings
class Settings(BaseSettings): class Settings(BaseSettings):
PROJECT_NAME: str = "AUTHENTICATION AND AUTHORIZATION" PROJECT_NAME: str = "QuickGPT"
API_V1_STR: str = "/v1" API_V1_STR: str = "/v1"
SECRET_KEY: str SECRET_KEY: str
REFRESH_KEY: str
ACCESS_TOKEN_EXPIRE_MINUTES: int ACCESS_TOKEN_EXPIRE_MINUTES: int
REFRESH_TOKEN_EXPIRE_MINUTES: int REFRESH_TOKEN_EXPIRE_MINUTES: int
@ -30,7 +32,8 @@ class Settings(BaseSettings):
LDAP_SERVER: str LDAP_SERVER: str
LDAP_ENABLE: bool LDAP_ENABLE: bool
ENABLE_MAKER_CHECKER: bool
class Config: class Config:
case_sensitive = True case_sensitive = True
env_file = ".env" env_file = ".env"

View File

@ -1,4 +1,3 @@
import os
import random import random
import string import string
from datetime import datetime, timedelta from datetime import datetime, timedelta
@ -6,14 +5,13 @@ from typing import Dict, Any, Optional, Union
from jose import JWTError, jwt from jose import JWTError, jwt
from passlib.context import CryptContext from passlib.context import CryptContext
from private_gpt.users.core.config import settings
ACCESS_TOKEN_EXPIRE_MINUTES = 60 * 24 * 1 # 12 hrs ACCESS_TOKEN_EXPIRE_MINUTES = 60 * 24 * 1 # 12 hrs # Default Value
REFRESH_TOKEN_EXPIRE_MINUTES = 60 * 24 * 7 # 7 days REFRESH_TOKEN_EXPIRE_MINUTES = 60 * 24 * 7 # 7 days # Default Value
ALGORITHM = "HS256" ALGORITHM = "HS256"
# JWT_SECRET_KEY = os.environ['JWT_SECRET_KEY'] # should be kept secret JWT_SECRET_KEY = settings.SECRET_KEY
# JWT_REFRESH_SECRET_KEY = os.environ['JWT_REFRESH_SECRET_KEY'] # should be kept secret JWT_REFRESH_SECRET_KEY = settings.REFRESH_KEY
JWT_SECRET_KEY = "QUICKGPT"
JWT_REFRESH_SECRET_KEY = "QUICKGPT_REFRESH"
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto") pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")

View File

@ -40,10 +40,10 @@ print("LLM model downloaded!")
# Download Tokenizer # Download Tokenizer
print(f"Downloading tokenizer {settings().llm.tokenizer}") print(f"Downloading tokenizer {settings().llm.tokenizer}")
local_path = models_path / "tokenizer/Mistral-7B-Instruct-v0.1"
AutoTokenizer.from_pretrained( AutoTokenizer.from_pretrained(
local_path pretrained_model_name_or_path=settings().llm.tokenizer,
cache_dir=models_cache_path,
) )
print("Tokenizer downloaded!") print("Tokenizer downloaded!")
print("Setup done") print("Setup done")

View File

@ -9,16 +9,28 @@ embedding:
mode: ${PGPT_MODE:sagemaker} mode: ${PGPT_MODE:sagemaker}
llamacpp: llamacpp:
llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.1-GGUF} llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/OpenHermes-2.5-Mistral-7B-GGUF}
llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.1.Q4_K_M.gguf} llm_hf_model_file: ${PGPT_HF_MODEL_FILE:openhermes-2.5-mistral-7b.Q5_K_M.gguf}
huggingface: huggingface:
embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:BAAI/bge-small-en-v1.5} embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:mixedbread-ai/mxbai-embed-large-v1}
sagemaker: sagemaker:
llm_endpoint_name: ${PGPT_SAGEMAKER_LLM_ENDPOINT_NAME:} llm_endpoint_name: ${PGPT_SAGEMAKER_LLM_ENDPOINT_NAME:}
embedding_endpoint_name: ${PGPT_SAGEMAKER_EMBEDDING_ENDPOINT_NAME:} embedding_endpoint_name: ${PGPT_SAGEMAKER_EMBEDDING_ENDPOINT_NAME:}
ollama:
llm_model: ${PGPT_OLLAMA_LLM_MODEL:mistral}
embedding_model: ${PGPT_OLLAMA_EMBEDDING_MODEL:nomic-embed-text}
api_base: ${PGPT_OLLAMA_API_BASE:http://ollama:11434}
embedding_api_base: ${PGPT_OLLAMA_EMBEDDING_API_BASE:http://ollama:11434}
tfs_z: ${PGPT_OLLAMA_TFS_Z:1.0}
top_k: ${PGPT_OLLAMA_TOP_K:40}
top_p: ${PGPT_OLLAMA_TOP_P:0.9}
repeat_last_n: ${PGPT_OLLAMA_REPEAT_LAST_N:64}
repeat_penalty: ${PGPT_OLLAMA_REPEAT_PENALTY:1.2}
request_timeout: ${PGPT_OLLAMA_REQUEST_TIMEOUT:600.0}
ui: ui:
enabled: true enabled: true
path: / path: /

View File

@ -25,7 +25,7 @@ embedding:
ingest_mode: simple ingest_mode: simple
huggingface: huggingface:
embedding_hf_model_name: BAAI/bge-large-en-v1.5 embedding_hf_model_name: mixedbread-ai/mxbai-embed-large-v1
vectorstore: vectorstore:
database: qdrant database: qdrant

View File

@ -7,7 +7,7 @@ server:
cors: cors:
enabled: true enabled: true
allow_credentials: true allow_credentials: true
allow_origins: ["http://localhost:80", "http://10.1.101.125", "http://quickgpt.gibl.com.np"] allow_origins: ["*"]
allow_methods: ["DELETE", "GET", "POST", "PUT", "OPTIONS", "PATCH"] allow_methods: ["DELETE", "GET", "POST", "PUT", "OPTIONS", "PATCH"]
allow_headers: ["*"] allow_headers: ["*"]
auth: auth:
@ -44,9 +44,9 @@ llm:
tokenizer: mistralai/Mistral-7B-Instruct-v0.2 tokenizer: mistralai/Mistral-7B-Instruct-v0.2
llamacpp: llamacpp:
prompt_style: "default" prompt_style: "chatml"
llm_hf_repo_id: bartowski/Meta-Llama-3-8B-Instruct-GGUF llm_hf_repo_id: TheBloke/OpenHermes-2.5-Mistral-7B-GGUF
llm_hf_model_file: Meta-Llama-3-8B-Instruct-Q6_K.gguf llm_hf_model_file: openhermes-2.5-mistral-7b.Q5_K_M.gguf
tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
top_p: 0.9 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) top_p: 0.9 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)