Updated docker compose compatible for GPU

This commit is contained in:
Saurab-Shrestha 2024-05-02 17:32:29 +05:45
parent 1963190d16
commit 4472add3c2
11 changed files with 3510 additions and 72 deletions

8
.env
View File

@ -1,10 +1,10 @@
PORT=8000
ENVIRONMENT=dev
DB_HOST=localhost
DB_HOST=db
DB_USER=postgres
DB_PORT=5432
DB_PASSWORD=quick
DB_PASSWORD=admin
DB_NAME=QuickGpt
SUPER_ADMIN_EMAIL=superadmin@email.com
@ -24,5 +24,5 @@ SMTP_USERNAME=noreply@gibl.com.np
SMTP_PASSWORD=*G15y^N0reP!y
LDAP_SERVER=ldap://192.168.101.111
LDAP_ENABLE=False
ENABLE_MAKER_CHECKER=False
LDAP_ENABLE=True
ENABLE_MAKER_CHECKER=True

View File

@ -35,6 +35,10 @@ WORKDIR /home/worker/app
RUN mkdir local_data; chown worker local_data
RUN mkdir models; chown worker models
RUN mkdir tiktoken_cache; chown worker tiktoken_cache
RUN mkdir static; chown worker static
RUN mkdir static/unchecked; chown worker static/unchecked
RUN mkdir static/checked; chown worker static/checked
RUN mkdir uploads; chown worker uploads
COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
COPY --chown=worker private_gpt/ private_gpt
COPY --chown=worker alembic/ alembic
@ -46,6 +50,16 @@ COPY --chown=worker *.ini ./
COPY --chown=worker docker-entrypoint.sh /home/worker/app/
RUN chmod +x /home/worker/app/docker-entrypoint.sh
########################################
#### ---- Set up NVIDIA-Docker ---- ####
########################################
## ref: https://github.com/NVIDIA/nvidia-docker/wiki/Installation-(Native-GPU-Support)#usage
ENV TOKENIZERS_PARALLELISM=false
ENV NVIDIA_VISIBLE_DEVICES=all
ENV NVIDIA_DRIVER_CAPABILITIES=compute,video,utility
# Set the user to run the container
USER worker

View File

@ -10,7 +10,14 @@ RUN apt-get update && apt-get install -y \
ninja-build \
build-essential \
pkg-config \
wget
wget \
nvidia-container-runtime
# Set up Nvidia Container Toolkit
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
ENV NVIDIA_REQUIRE_CUDA "cuda>=11.0"
# Switch to the Python image for the final build stage
FROM python:3.11.6-slim-bookworm as base

View File

@ -1,54 +0,0 @@
"""Chat items
Revision ID: 739fb4ac6615
Revises:
Create Date: 2024-05-01 19:20:19.652290
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '739fb4ac6615'
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('chat_history',
sa.Column('conversation_id', sa.UUID(), nullable=False),
sa.Column('title', sa.String(length=255), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=True),
sa.Column('updated_at', sa.DateTime(), nullable=True),
sa.Column('user_id', sa.Integer(), nullable=True),
sa.Column('_title_generated', sa.Boolean(), nullable=True),
sa.ForeignKeyConstraint(['user_id'], ['users.id'], ),
sa.PrimaryKeyConstraint('conversation_id')
)
op.create_table('chat_items',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('index', sa.Integer(), nullable=False),
sa.Column('sender', sa.String(length=225), nullable=False),
sa.Column('content', sa.JSON(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=True),
sa.Column('updated_at', sa.DateTime(), nullable=True),
sa.Column('like', sa.Boolean(), nullable=True),
sa.Column('conversation_id', sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(['conversation_id'], ['chat_history.conversation_id'], ),
sa.PrimaryKeyConstraint('id')
)
# op.create_unique_constraint('unique_user_role', 'user_roles', ['user_id', 'role_id', 'company_id'])
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
# op.drop_constraint('unique_user_role', 'user_roles', type_='unique')
op.drop_table('chat_items')
op.drop_table('chat_history')
# ### end Alembic commands ###

View File

@ -25,6 +25,8 @@ services:
image: ollama/ollama:latest
volumes:
- ./models:/root/.ollama
ports:
- 11434:11434
db:
image: postgres:15-alpine

View File

@ -3,7 +3,12 @@ from pathlib import Path
PROJECT_ROOT_PATH: Path = Path(__file__).parents[1]
script_dir = os.path.dirname(os.path.abspath(__file__))
UPLOAD_DIR = os.path.join(script_dir, "static/checked") # Actual upload path for uploaded file
UNCHECKED_DIR = os.path.join(script_dir, "static/unchecked") # Actual upload path for uploaded file
# Create directories if they don't exist
UPLOAD_DIR = os.path.join(script_dir, "static/checked")
os.makedirs(UPLOAD_DIR, exist_ok=True) # Actual upload path for uploaded file
OCR_UPLOAD = os.path.join(script_dir, 'uploads') # temporary upload path for scanned pdf file
UNCHECKED_DIR = os.path.join(script_dir, "static/unchecked")
os.makedirs(UNCHECKED_DIR, exist_ok=True) # Actual upload path for uploaded file
OCR_UPLOAD = os.path.join(script_dir, 'uploads')
os.makedirs(OCR_UPLOAD, exist_ok=True) # Temporary upload path for scanned pdf file

View File

@ -20,7 +20,7 @@ sagemaker:
embedding_endpoint_name: ${PGPT_SAGEMAKER_EMBEDDING_ENDPOINT_NAME:}
ollama:
llm_model: ${PGPT_OLLAMA_LLM_MODEL:llama3}
llm_model: ${PGPT_OLLAMA_LLM_MODEL:openhermes}
embedding_model: ${PGPT_OLLAMA_EMBEDDING_MODEL:mxbai-embed-large}
api_base: ${PGPT_OLLAMA_API_BASE:http://ollama:11434}
tfs_z: ${PGPT_OLLAMA_TFS_Z:1.0}

View File

@ -14,9 +14,9 @@ embedding:
embed_dim: 768
ollama:
llm_model: llama3
llm_model: openhermes
embedding_model: mxbai-embed-large
api_base: http://localhost:11434
api_base: http://ollama-1:11434
nodestore:
database: postgres
@ -25,7 +25,7 @@ vectorstore:
database: postgres
postgres:
host: localhost
host: db
port: 5432
database: postgres
user: postgres

View File

@ -11,7 +11,7 @@ embedding:
mode: ollama
ollama:
llm_model: llama3
llm_model: openhermes
embedding_model: mxbai-embed-large
api_base: http://localhost:11434
embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama

View File

@ -49,7 +49,7 @@ rag:
#similarity_value: 0.45
#This value is disabled by default. If you enable this settings, the RAG will only use articles that meet a certain percentage score.
rerank:
enabled: true
enabled: false
model: cross-encoder/ms-marco-MiniLM-L-2-v2
top_n: 3
@ -99,10 +99,10 @@ openai:
model: gpt-3.5-turbo
ollama:
llm_model: llama3
llm_model: openhermes
embedding_model: mxbai-embed-large
api_base: http://localhost:11434
embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama
api_base: http://privategpt-ollama-1:11434
embedding_api_base: http://privategpt-ollama-1:11434 # change if your embedding model runs on another ollama
keep_alive: 5m
request_timeout: 120.0

3464
sql_dump.sql Normal file

File diff suppressed because one or more lines are too long