Added new docker files

This commit is contained in:
Saurab-Shrestha 2024-04-28 11:25:38 +05:45
parent 1d6fc7144a
commit c7aac53cd9
15 changed files with 76 additions and 54 deletions

9
.env
View File

@ -11,8 +11,10 @@ SUPER_ADMIN_EMAIL=superadmin@email.com
SUPER_ADMIN_PASSWORD=supersecretpassword
SUPER_ADMIN_ACCOUNT_NAME=superaccount
SECRET_KEY=ba9dc3f976cf8fb40519dcd152a8d7d21c0b7861d841711cdb2602be8e85fd7c
ACCESS_TOKEN_EXPIRE_MINUTES=720
SECRET_KEY=ba9d23f976cf8fb40519dcd152a8d7d21c0b7861d841711cdb2602be8e85fd7c
REFRESH_KEY=ba9dc3f976cf8fb12312dve22a8d7d21c0b7861d841711cdb2602be8e85fdde
ACCESS_TOKEN_EXPIRE_MINUTES=240
REFRESH_TOKEN_EXPIRE_MINUTES=1400
SMTP_SERVER=mail.gibl.com.np
@ -22,4 +24,5 @@ SMTP_USERNAME=noreply@gibl.com.np
SMTP_PASSWORD=*G15y^N0reP!y
LDAP_SERVER=ldap://192.168.101.111
LDAP_ENABLE=False
LDAP_ENABLE=False
ENABLE_MAKER_CHECKER=False

View File

@ -10,11 +10,7 @@ RUN apt-get update && apt-get install -y \
ninja-build \
build-essential \
pkg-config \
wget \
make \
cmake \
g++ \
gcc
wget
# Switch to the Python image for the final build stage
FROM python:3.11.6-slim-bookworm as base
@ -50,11 +46,12 @@ RUN poetry run python scripts/setup
FROM base as app
ENV PYTHONUNBUFFERED=1
ENV PORT=8080
EXPOSE 8080
ENV PORT=80
EXPOSE 80
# Prepare a non-root user
RUN adduser --system worker
RUN adduser --group worker
RUN adduser --system --ingroup worker worker
WORKDIR /home/worker/app
RUN mkdir local_data; chown worker local_data
@ -68,4 +65,4 @@ COPY --chown=worker scripts/ scripts
ENV PYTHONPATH="$PYTHONPATH:/private_gpt/"
USER worker
ENTRYPOINT python -m private_gpt
ENTRYPOINT uvicorn private_gpt.main:app --host 0.0.0.0 --port 80

View File

@ -6,11 +6,12 @@ services:
- ./local_data/:/home/worker/app/local_data
- ./models/:/home/worker/app/models
ports:
- 8000:8000
- 80:80
environment:
PORT: 8000
PORT: 80
PGPT_PROFILES: docker
PGPT_MODE: local
env_file:
- .env

View File

@ -11,7 +11,6 @@ from private_gpt.paths import models_cache_path, models_path
from private_gpt.settings.settings import Settings
logger = logging.getLogger(__name__)
local_path = models_path / "tokenizer/Mistral-7B-Instruct-v0.1"
@singleton
class LLMComponent:
llm: LLM
@ -20,11 +19,22 @@ class LLMComponent:
def __init__(self, settings: Settings) -> None:
llm_mode = settings.llm.mode
if settings.llm.tokenizer:
set_global_tokenizer(
AutoTokenizer.from_pretrained(
local_path
try:
set_global_tokenizer(
AutoTokenizer.from_pretrained(
pretrained_model_name_or_path=settings.llm.tokenizer,
cache_dir=str(models_cache_path),
token=settings.huggingface.access_token,
)
)
)
except Exception as e:
logger.warning(
"Failed to download tokenizer %s. Falling back to "
"default tokenizer.",
settings.llm.tokenizer,
e,
)
logger.info("Initializing the LLM in mode=%s", llm_mode)
match settings.llm.mode:

View File

@ -166,7 +166,8 @@ class ChatService:
)
system_prompt = (
"""
You should answer questions only in English or Nepali.
You are a helpful assistant that should answer questions
only in English or Nepali.
Responses should be based on the context documents provided
and should be relevant, informative, and easy to understand.
You should aim to deliver high-quality responses that are

View File

@ -70,7 +70,6 @@ async def chunks_retrieval(
action='Chat',
details={
"query": body.text,
'user': current_user.username,
},
user_id=current_user.id
)

View File

@ -208,7 +208,6 @@ async def create_documents(
status=MakerCheckerStatus.PENDING,
doc_type_id=departments.doc_type_id,
)
print("DOCUMENT CREATE: ", docs_in)
document = crud.documents.create(db=db, obj_in=docs_in)
department_ids = department_ids if department_ids else "1"
department_ids = [int(number) for number in department_ids.split(",")]

View File

@ -43,12 +43,12 @@ def register_user(
department_id=department.id,
checker= True if role == 'OPERATOR' else False
)
# try:
# send_registration_email(fullname, email, password)
# except Exception as e:
# logging.info(f"Failed to send registration email: {str(e)}")
# raise HTTPException(
# status_code=500, detail=f"Failed to send registration email.")
try:
send_registration_email(fullname, email, password)
except Exception as e:
logging.info(f"Failed to send registration email: {str(e)}")
raise HTTPException(
status_code=500, detail=f"Failed to send registration email.")
return crud.user.create(db, obj_in=user_in)

View File

@ -7,13 +7,13 @@ from datetime import datetime
from typing import Any, List
from sqlalchemy.orm import Session
from sqlalchemy import select
from fastapi_pagination import Page, paginate
from fastapi import APIRouter, Depends, HTTPException, status, Security, Request, File, UploadFile
from private_gpt.users.api import deps
from private_gpt.constants import UNCHECKED_DIR
from private_gpt.users.constants.role import Role
from private_gpt.users.core.config import settings
from private_gpt.users import crud, models, schemas
from private_gpt.server.ingest.ingest_router import create_documents, ingest
from private_gpt.users.models.document import MakerCheckerActionType, MakerCheckerStatus
@ -23,7 +23,7 @@ logger = logging.getLogger(__name__)
router = APIRouter(prefix='/documents', tags=['Documents'])
ENABLE_MAKER_CHECKER = False
ENABLE_MAKER_CHECKER = settings.ENABLE_MAKER_CHECKER
def get_username(db, id):
user = crud.user.get_by_id(db=db, id=id)
@ -259,7 +259,6 @@ def update_department(
status_code=500,
detail="Internal Server Error.",
)
@router.post('/upload', response_model=schemas.Document)
async def upload_documents(
@ -384,12 +383,12 @@ async def verify_documents(
user_id=current_user.id
)
if document.doc_type_id == 2:
if document.doc_type_id == 2: # For OCR
return await process_ocr(request, unchecked_path)
elif document.doc_type_id == 3:
elif document.doc_type_id == 3: # For BOTH
return await process_both_ocr(request, unchecked_path)
else:
return await ingest(request, unchecked_path)
return await ingest(request, unchecked_path) # For pdf
elif checker_in.status == MakerCheckerStatus.REJECTED.value:

View File

@ -3,9 +3,11 @@ from functools import lru_cache
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
PROJECT_NAME: str = "AUTHENTICATION AND AUTHORIZATION"
PROJECT_NAME: str = "QuickGPT"
API_V1_STR: str = "/v1"
SECRET_KEY: str
REFRESH_KEY: str
ACCESS_TOKEN_EXPIRE_MINUTES: int
REFRESH_TOKEN_EXPIRE_MINUTES: int
@ -30,7 +32,8 @@ class Settings(BaseSettings):
LDAP_SERVER: str
LDAP_ENABLE: bool
ENABLE_MAKER_CHECKER: bool
class Config:
case_sensitive = True
env_file = ".env"

View File

@ -1,4 +1,3 @@
import os
import random
import string
from datetime import datetime, timedelta
@ -6,14 +5,13 @@ from typing import Dict, Any, Optional, Union
from jose import JWTError, jwt
from passlib.context import CryptContext
from private_gpt.users.core.config import settings
ACCESS_TOKEN_EXPIRE_MINUTES = 60 * 24 * 1 # 12 hrs
REFRESH_TOKEN_EXPIRE_MINUTES = 60 * 24 * 7 # 7 days
ACCESS_TOKEN_EXPIRE_MINUTES = 60 * 24 * 1 # 12 hrs # Default Value
REFRESH_TOKEN_EXPIRE_MINUTES = 60 * 24 * 7 # 7 days # Default Value
ALGORITHM = "HS256"
# JWT_SECRET_KEY = os.environ['JWT_SECRET_KEY'] # should be kept secret
# JWT_REFRESH_SECRET_KEY = os.environ['JWT_REFRESH_SECRET_KEY'] # should be kept secret
JWT_SECRET_KEY = "QUICKGPT"
JWT_REFRESH_SECRET_KEY = "QUICKGPT_REFRESH"
JWT_SECRET_KEY = settings.SECRET_KEY
JWT_REFRESH_SECRET_KEY = settings.REFRESH_KEY
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")

View File

@ -40,10 +40,10 @@ print("LLM model downloaded!")
# Download Tokenizer
print(f"Downloading tokenizer {settings().llm.tokenizer}")
local_path = models_path / "tokenizer/Mistral-7B-Instruct-v0.1"
AutoTokenizer.from_pretrained(
local_path
pretrained_model_name_or_path=settings().llm.tokenizer,
cache_dir=models_cache_path,
)
print("Tokenizer downloaded!")
print("Setup done")
print("Setup done")

View File

@ -9,16 +9,28 @@ embedding:
mode: ${PGPT_MODE:sagemaker}
llamacpp:
llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.1-GGUF}
llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.1.Q4_K_M.gguf}
llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/OpenHermes-2.5-Mistral-7B-GGUF}
llm_hf_model_file: ${PGPT_HF_MODEL_FILE:openhermes-2.5-mistral-7b.Q5_K_M.gguf}
huggingface:
embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:BAAI/bge-small-en-v1.5}
embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:mixedbread-ai/mxbai-embed-large-v1}
sagemaker:
llm_endpoint_name: ${PGPT_SAGEMAKER_LLM_ENDPOINT_NAME:}
embedding_endpoint_name: ${PGPT_SAGEMAKER_EMBEDDING_ENDPOINT_NAME:}
ollama:
llm_model: ${PGPT_OLLAMA_LLM_MODEL:mistral}
embedding_model: ${PGPT_OLLAMA_EMBEDDING_MODEL:nomic-embed-text}
api_base: ${PGPT_OLLAMA_API_BASE:http://ollama:11434}
embedding_api_base: ${PGPT_OLLAMA_EMBEDDING_API_BASE:http://ollama:11434}
tfs_z: ${PGPT_OLLAMA_TFS_Z:1.0}
top_k: ${PGPT_OLLAMA_TOP_K:40}
top_p: ${PGPT_OLLAMA_TOP_P:0.9}
repeat_last_n: ${PGPT_OLLAMA_REPEAT_LAST_N:64}
repeat_penalty: ${PGPT_OLLAMA_REPEAT_PENALTY:1.2}
request_timeout: ${PGPT_OLLAMA_REQUEST_TIMEOUT:600.0}
ui:
enabled: true
path: /

View File

@ -25,7 +25,7 @@ embedding:
ingest_mode: simple
huggingface:
embedding_hf_model_name: BAAI/bge-large-en-v1.5
embedding_hf_model_name: mixedbread-ai/mxbai-embed-large-v1
vectorstore:
database: qdrant

View File

@ -7,7 +7,7 @@ server:
cors:
enabled: true
allow_credentials: true
allow_origins: ["http://localhost:80", "http://10.1.101.125", "http://quickgpt.gibl.com.np"]
allow_origins: ["*"]
allow_methods: ["DELETE", "GET", "POST", "PUT", "OPTIONS", "PATCH"]
allow_headers: ["*"]
auth:
@ -44,9 +44,9 @@ llm:
tokenizer: mistralai/Mistral-7B-Instruct-v0.2
llamacpp:
prompt_style: "default"
llm_hf_repo_id: bartowski/Meta-Llama-3-8B-Instruct-GGUF
llm_hf_model_file: Meta-Llama-3-8B-Instruct-Q6_K.gguf
prompt_style: "chatml"
llm_hf_repo_id: TheBloke/OpenHermes-2.5-Mistral-7B-GGUF
llm_hf_model_file: openhermes-2.5-mistral-7b.Q5_K_M.gguf
tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
top_p: 0.9 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)