mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-07-16 16:32:20 +00:00
Added new docker files
This commit is contained in:
parent
1d6fc7144a
commit
c7aac53cd9
9
.env
9
.env
@ -11,8 +11,10 @@ SUPER_ADMIN_EMAIL=superadmin@email.com
|
||||
SUPER_ADMIN_PASSWORD=supersecretpassword
|
||||
SUPER_ADMIN_ACCOUNT_NAME=superaccount
|
||||
|
||||
SECRET_KEY=ba9dc3f976cf8fb40519dcd152a8d7d21c0b7861d841711cdb2602be8e85fd7c
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES=720
|
||||
SECRET_KEY=ba9d23f976cf8fb40519dcd152a8d7d21c0b7861d841711cdb2602be8e85fd7c
|
||||
REFRESH_KEY=ba9dc3f976cf8fb12312dve22a8d7d21c0b7861d841711cdb2602be8e85fdde
|
||||
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES=240
|
||||
REFRESH_TOKEN_EXPIRE_MINUTES=1400
|
||||
|
||||
SMTP_SERVER=mail.gibl.com.np
|
||||
@ -22,4 +24,5 @@ SMTP_USERNAME=noreply@gibl.com.np
|
||||
SMTP_PASSWORD=*G15y^N0reP!y
|
||||
|
||||
LDAP_SERVER=ldap://192.168.101.111
|
||||
LDAP_ENABLE=False
|
||||
LDAP_ENABLE=False
|
||||
ENABLE_MAKER_CHECKER=False
|
@ -10,11 +10,7 @@ RUN apt-get update && apt-get install -y \
|
||||
ninja-build \
|
||||
build-essential \
|
||||
pkg-config \
|
||||
wget \
|
||||
make \
|
||||
cmake \
|
||||
g++ \
|
||||
gcc
|
||||
wget
|
||||
|
||||
# Switch to the Python image for the final build stage
|
||||
FROM python:3.11.6-slim-bookworm as base
|
||||
@ -50,11 +46,12 @@ RUN poetry run python scripts/setup
|
||||
FROM base as app
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV PORT=8080
|
||||
EXPOSE 8080
|
||||
ENV PORT=80
|
||||
EXPOSE 80
|
||||
|
||||
# Prepare a non-root user
|
||||
RUN adduser --system worker
|
||||
RUN adduser --group worker
|
||||
RUN adduser --system --ingroup worker worker
|
||||
WORKDIR /home/worker/app
|
||||
|
||||
RUN mkdir local_data; chown worker local_data
|
||||
@ -68,4 +65,4 @@ COPY --chown=worker scripts/ scripts
|
||||
ENV PYTHONPATH="$PYTHONPATH:/private_gpt/"
|
||||
|
||||
USER worker
|
||||
ENTRYPOINT python -m private_gpt
|
||||
ENTRYPOINT uvicorn private_gpt.main:app --host 0.0.0.0 --port 80
|
||||
|
@ -6,11 +6,12 @@ services:
|
||||
- ./local_data/:/home/worker/app/local_data
|
||||
- ./models/:/home/worker/app/models
|
||||
ports:
|
||||
- 8000:8000
|
||||
- 80:80
|
||||
environment:
|
||||
PORT: 8000
|
||||
PORT: 80
|
||||
PGPT_PROFILES: docker
|
||||
PGPT_MODE: local
|
||||
|
||||
env_file:
|
||||
- .env
|
||||
|
||||
|
@ -11,7 +11,6 @@ from private_gpt.paths import models_cache_path, models_path
|
||||
from private_gpt.settings.settings import Settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
local_path = models_path / "tokenizer/Mistral-7B-Instruct-v0.1"
|
||||
@singleton
|
||||
class LLMComponent:
|
||||
llm: LLM
|
||||
@ -20,11 +19,22 @@ class LLMComponent:
|
||||
def __init__(self, settings: Settings) -> None:
|
||||
llm_mode = settings.llm.mode
|
||||
if settings.llm.tokenizer:
|
||||
set_global_tokenizer(
|
||||
AutoTokenizer.from_pretrained(
|
||||
local_path
|
||||
try:
|
||||
set_global_tokenizer(
|
||||
AutoTokenizer.from_pretrained(
|
||||
pretrained_model_name_or_path=settings.llm.tokenizer,
|
||||
cache_dir=str(models_cache_path),
|
||||
token=settings.huggingface.access_token,
|
||||
)
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to download tokenizer %s. Falling back to "
|
||||
"default tokenizer.",
|
||||
settings.llm.tokenizer,
|
||||
e,
|
||||
)
|
||||
|
||||
|
||||
logger.info("Initializing the LLM in mode=%s", llm_mode)
|
||||
match settings.llm.mode:
|
||||
|
@ -166,7 +166,8 @@ class ChatService:
|
||||
)
|
||||
system_prompt = (
|
||||
"""
|
||||
You should answer questions only in English or Nepali.
|
||||
You are a helpful assistant that should answer questions
|
||||
only in English or Nepali.
|
||||
Responses should be based on the context documents provided
|
||||
and should be relevant, informative, and easy to understand.
|
||||
You should aim to deliver high-quality responses that are
|
||||
|
@ -70,7 +70,6 @@ async def chunks_retrieval(
|
||||
action='Chat',
|
||||
details={
|
||||
"query": body.text,
|
||||
'user': current_user.username,
|
||||
},
|
||||
user_id=current_user.id
|
||||
)
|
||||
|
@ -208,7 +208,6 @@ async def create_documents(
|
||||
status=MakerCheckerStatus.PENDING,
|
||||
doc_type_id=departments.doc_type_id,
|
||||
)
|
||||
print("DOCUMENT CREATE: ", docs_in)
|
||||
document = crud.documents.create(db=db, obj_in=docs_in)
|
||||
department_ids = department_ids if department_ids else "1"
|
||||
department_ids = [int(number) for number in department_ids.split(",")]
|
||||
|
@ -43,12 +43,12 @@ def register_user(
|
||||
department_id=department.id,
|
||||
checker= True if role == 'OPERATOR' else False
|
||||
)
|
||||
# try:
|
||||
# send_registration_email(fullname, email, password)
|
||||
# except Exception as e:
|
||||
# logging.info(f"Failed to send registration email: {str(e)}")
|
||||
# raise HTTPException(
|
||||
# status_code=500, detail=f"Failed to send registration email.")
|
||||
try:
|
||||
send_registration_email(fullname, email, password)
|
||||
except Exception as e:
|
||||
logging.info(f"Failed to send registration email: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to send registration email.")
|
||||
return crud.user.create(db, obj_in=user_in)
|
||||
|
||||
|
||||
|
@ -7,13 +7,13 @@ from datetime import datetime
|
||||
|
||||
from typing import Any, List
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import select
|
||||
from fastapi_pagination import Page, paginate
|
||||
from fastapi import APIRouter, Depends, HTTPException, status, Security, Request, File, UploadFile
|
||||
|
||||
from private_gpt.users.api import deps
|
||||
from private_gpt.constants import UNCHECKED_DIR
|
||||
from private_gpt.users.constants.role import Role
|
||||
from private_gpt.users.core.config import settings
|
||||
from private_gpt.users import crud, models, schemas
|
||||
from private_gpt.server.ingest.ingest_router import create_documents, ingest
|
||||
from private_gpt.users.models.document import MakerCheckerActionType, MakerCheckerStatus
|
||||
@ -23,7 +23,7 @@ logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix='/documents', tags=['Documents'])
|
||||
|
||||
|
||||
ENABLE_MAKER_CHECKER = False
|
||||
ENABLE_MAKER_CHECKER = settings.ENABLE_MAKER_CHECKER
|
||||
|
||||
def get_username(db, id):
|
||||
user = crud.user.get_by_id(db=db, id=id)
|
||||
@ -259,7 +259,6 @@ def update_department(
|
||||
status_code=500,
|
||||
detail="Internal Server Error.",
|
||||
)
|
||||
|
||||
|
||||
@router.post('/upload', response_model=schemas.Document)
|
||||
async def upload_documents(
|
||||
@ -384,12 +383,12 @@ async def verify_documents(
|
||||
user_id=current_user.id
|
||||
)
|
||||
|
||||
if document.doc_type_id == 2:
|
||||
if document.doc_type_id == 2: # For OCR
|
||||
return await process_ocr(request, unchecked_path)
|
||||
elif document.doc_type_id == 3:
|
||||
elif document.doc_type_id == 3: # For BOTH
|
||||
return await process_both_ocr(request, unchecked_path)
|
||||
else:
|
||||
return await ingest(request, unchecked_path)
|
||||
return await ingest(request, unchecked_path) # For pdf
|
||||
|
||||
|
||||
elif checker_in.status == MakerCheckerStatus.REJECTED.value:
|
||||
|
@ -3,9 +3,11 @@ from functools import lru_cache
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
class Settings(BaseSettings):
|
||||
PROJECT_NAME: str = "AUTHENTICATION AND AUTHORIZATION"
|
||||
PROJECT_NAME: str = "QuickGPT"
|
||||
API_V1_STR: str = "/v1"
|
||||
SECRET_KEY: str
|
||||
REFRESH_KEY: str
|
||||
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES: int
|
||||
REFRESH_TOKEN_EXPIRE_MINUTES: int
|
||||
|
||||
@ -30,7 +32,8 @@ class Settings(BaseSettings):
|
||||
|
||||
LDAP_SERVER: str
|
||||
LDAP_ENABLE: bool
|
||||
|
||||
ENABLE_MAKER_CHECKER: bool
|
||||
|
||||
class Config:
|
||||
case_sensitive = True
|
||||
env_file = ".env"
|
||||
|
@ -1,4 +1,3 @@
|
||||
import os
|
||||
import random
|
||||
import string
|
||||
from datetime import datetime, timedelta
|
||||
@ -6,14 +5,13 @@ from typing import Dict, Any, Optional, Union
|
||||
|
||||
from jose import JWTError, jwt
|
||||
from passlib.context import CryptContext
|
||||
from private_gpt.users.core.config import settings
|
||||
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES = 60 * 24 * 1 # 12 hrs
|
||||
REFRESH_TOKEN_EXPIRE_MINUTES = 60 * 24 * 7 # 7 days
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES = 60 * 24 * 1 # 12 hrs # Default Value
|
||||
REFRESH_TOKEN_EXPIRE_MINUTES = 60 * 24 * 7 # 7 days # Default Value
|
||||
ALGORITHM = "HS256"
|
||||
# JWT_SECRET_KEY = os.environ['JWT_SECRET_KEY'] # should be kept secret
|
||||
# JWT_REFRESH_SECRET_KEY = os.environ['JWT_REFRESH_SECRET_KEY'] # should be kept secret
|
||||
JWT_SECRET_KEY = "QUICKGPT"
|
||||
JWT_REFRESH_SECRET_KEY = "QUICKGPT_REFRESH"
|
||||
JWT_SECRET_KEY = settings.SECRET_KEY
|
||||
JWT_REFRESH_SECRET_KEY = settings.REFRESH_KEY
|
||||
|
||||
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
||||
|
||||
|
@ -40,10 +40,10 @@ print("LLM model downloaded!")
|
||||
|
||||
# Download Tokenizer
|
||||
print(f"Downloading tokenizer {settings().llm.tokenizer}")
|
||||
local_path = models_path / "tokenizer/Mistral-7B-Instruct-v0.1"
|
||||
AutoTokenizer.from_pretrained(
|
||||
local_path
|
||||
pretrained_model_name_or_path=settings().llm.tokenizer,
|
||||
cache_dir=models_cache_path,
|
||||
)
|
||||
print("Tokenizer downloaded!")
|
||||
|
||||
print("Setup done")
|
||||
print("Setup done")
|
||||
|
@ -9,16 +9,28 @@ embedding:
|
||||
mode: ${PGPT_MODE:sagemaker}
|
||||
|
||||
llamacpp:
|
||||
llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.1-GGUF}
|
||||
llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.1.Q4_K_M.gguf}
|
||||
llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/OpenHermes-2.5-Mistral-7B-GGUF}
|
||||
llm_hf_model_file: ${PGPT_HF_MODEL_FILE:openhermes-2.5-mistral-7b.Q5_K_M.gguf}
|
||||
|
||||
huggingface:
|
||||
embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:BAAI/bge-small-en-v1.5}
|
||||
embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:mixedbread-ai/mxbai-embed-large-v1}
|
||||
|
||||
sagemaker:
|
||||
llm_endpoint_name: ${PGPT_SAGEMAKER_LLM_ENDPOINT_NAME:}
|
||||
embedding_endpoint_name: ${PGPT_SAGEMAKER_EMBEDDING_ENDPOINT_NAME:}
|
||||
|
||||
ollama:
|
||||
llm_model: ${PGPT_OLLAMA_LLM_MODEL:mistral}
|
||||
embedding_model: ${PGPT_OLLAMA_EMBEDDING_MODEL:nomic-embed-text}
|
||||
api_base: ${PGPT_OLLAMA_API_BASE:http://ollama:11434}
|
||||
embedding_api_base: ${PGPT_OLLAMA_EMBEDDING_API_BASE:http://ollama:11434}
|
||||
tfs_z: ${PGPT_OLLAMA_TFS_Z:1.0}
|
||||
top_k: ${PGPT_OLLAMA_TOP_K:40}
|
||||
top_p: ${PGPT_OLLAMA_TOP_P:0.9}
|
||||
repeat_last_n: ${PGPT_OLLAMA_REPEAT_LAST_N:64}
|
||||
repeat_penalty: ${PGPT_OLLAMA_REPEAT_PENALTY:1.2}
|
||||
request_timeout: ${PGPT_OLLAMA_REQUEST_TIMEOUT:600.0}
|
||||
|
||||
ui:
|
||||
enabled: true
|
||||
path: /
|
||||
|
@ -25,7 +25,7 @@ embedding:
|
||||
ingest_mode: simple
|
||||
|
||||
huggingface:
|
||||
embedding_hf_model_name: BAAI/bge-large-en-v1.5
|
||||
embedding_hf_model_name: mixedbread-ai/mxbai-embed-large-v1
|
||||
|
||||
vectorstore:
|
||||
database: qdrant
|
||||
|
@ -7,7 +7,7 @@ server:
|
||||
cors:
|
||||
enabled: true
|
||||
allow_credentials: true
|
||||
allow_origins: ["http://localhost:80", "http://10.1.101.125", "http://quickgpt.gibl.com.np"]
|
||||
allow_origins: ["*"]
|
||||
allow_methods: ["DELETE", "GET", "POST", "PUT", "OPTIONS", "PATCH"]
|
||||
allow_headers: ["*"]
|
||||
auth:
|
||||
@ -44,9 +44,9 @@ llm:
|
||||
tokenizer: mistralai/Mistral-7B-Instruct-v0.2
|
||||
|
||||
llamacpp:
|
||||
prompt_style: "default"
|
||||
llm_hf_repo_id: bartowski/Meta-Llama-3-8B-Instruct-GGUF
|
||||
llm_hf_model_file: Meta-Llama-3-8B-Instruct-Q6_K.gguf
|
||||
prompt_style: "chatml"
|
||||
llm_hf_repo_id: TheBloke/OpenHermes-2.5-Mistral-7B-GGUF
|
||||
llm_hf_model_file: openhermes-2.5-mistral-7b.Q5_K_M.gguf
|
||||
tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
|
||||
top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
|
||||
top_p: 0.9 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
|
||||
|
Loading…
Reference in New Issue
Block a user