Updated docker compose compatible for GPU

2025-07-16 16:32:20 +00:00 · 2024-05-02 17:32:29 +05:45 · 2024-05-02 17:32:29 +05:45 · 4472add3c2
commit 4472add3c2
parent 1963190d16
11 changed files with 3510 additions and 72 deletions
--- a/.env
+++ b/.env
@ -1,10 +1,10 @@
 PORT=8000
 ENVIRONMENT=dev

-DB_HOST=localhost
+DB_HOST=db
 DB_USER=postgres
 DB_PORT=5432
-DB_PASSWORD=quick
+DB_PASSWORD=admin
 DB_NAME=QuickGpt

 SUPER_ADMIN_EMAIL=superadmin@email.com
@ -24,5 +24,5 @@ SMTP_USERNAME=noreply@gibl.com.np
 SMTP_PASSWORD=*G15y^N0reP!y

 LDAP_SERVER=ldap://192.168.101.111
-LDAP_ENABLE=False
-ENABLE_MAKER_CHECKER=False
+LDAP_ENABLE=True
+ENABLE_MAKER_CHECKER=True
--- a/Dockerfile.external
+++ b/Dockerfile.external
@ -35,6 +35,10 @@ WORKDIR /home/worker/app
 RUN mkdir local_data; chown worker local_data
 RUN mkdir models; chown worker models
 RUN mkdir tiktoken_cache; chown worker tiktoken_cache
+RUN mkdir static; chown worker static
+RUN mkdir static/unchecked; chown worker static/unchecked
+RUN mkdir static/checked; chown worker static/checked
+RUN mkdir uploads; chown worker uploads
 COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
 COPY --chown=worker private_gpt/ private_gpt
 COPY --chown=worker alembic/ alembic
@ -46,6 +50,16 @@ COPY --chown=worker *.ini ./
 COPY --chown=worker docker-entrypoint.sh /home/worker/app/
 RUN chmod +x /home/worker/app/docker-entrypoint.sh

+
+########################################
+#### ---- Set up NVIDIA-Docker ---- ####
+########################################
+## ref: https://github.com/NVIDIA/nvidia-docker/wiki/Installation-(Native-GPU-Support)#usage
+ENV TOKENIZERS_PARALLELISM=false
+ENV NVIDIA_VISIBLE_DEVICES=all
+ENV NVIDIA_DRIVER_CAPABILITIES=compute,video,utility
+
+
 # Set the user to run the container
 USER worker

--- a/Dockerfile.local
+++ b/Dockerfile.local
@ -10,7 +10,14 @@ RUN apt-get update && apt-get install -y \
    ninja-build \
    build-essential \
    pkg-config \
-    wget 
+    wget \
+        nvidia-container-runtime
+
+# Set up Nvidia Container Toolkit
+ENV NVIDIA_VISIBLE_DEVICES all
+ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
+ENV NVIDIA_REQUIRE_CUDA "cuda>=11.0"
+

 # Switch to the Python image for the final build stage
 FROM python:3.11.6-slim-bookworm as base
--- a/alembic/versions/739fb4ac6615_chat_items.py
+++ b/alembic/versions/739fb4ac6615_chat_items.py
@ -1,54 +0,0 @@
-"""Chat items
-
-Revision ID: 739fb4ac6615
-Revises: 
-Create Date: 2024-05-01 19:20:19.652290
-
-"""
-from typing import Sequence, Union
-
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision: str = '739fb4ac6615'
-down_revision: Union[str, None] = None
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
-
-
-def upgrade() -> None:
-    # ### commands auto generated by Alembic - please adjust! ###
-    op.create_table('chat_history',
-    sa.Column('conversation_id', sa.UUID(), nullable=False),
-    sa.Column('title', sa.String(length=255), nullable=True),
-    sa.Column('created_at', sa.DateTime(), nullable=True),
-    sa.Column('updated_at', sa.DateTime(), nullable=True),
-    sa.Column('user_id', sa.Integer(), nullable=True),
-    sa.Column('_title_generated', sa.Boolean(), nullable=True),
-    sa.ForeignKeyConstraint(['user_id'], ['users.id'], ),
-    sa.PrimaryKeyConstraint('conversation_id')
-    )
-    op.create_table('chat_items',
-    sa.Column('id', sa.Integer(), nullable=False),
-    sa.Column('index', sa.Integer(), nullable=False),
-    sa.Column('sender', sa.String(length=225), nullable=False),
-    sa.Column('content', sa.JSON(), nullable=True),
-    sa.Column('created_at', sa.DateTime(), nullable=True),
-    sa.Column('updated_at', sa.DateTime(), nullable=True),
-    sa.Column('like', sa.Boolean(), nullable=True),
-    sa.Column('conversation_id', sa.UUID(), nullable=False),
-    sa.ForeignKeyConstraint(['conversation_id'], ['chat_history.conversation_id'], ),
-    sa.PrimaryKeyConstraint('id')
-    )
-    # op.create_unique_constraint('unique_user_role', 'user_roles', ['user_id', 'role_id', 'company_id'])
-    # ### end Alembic commands ###
-
-
-def downgrade() -> None:
-    # ### commands auto generated by Alembic - please adjust! ###
-    # op.drop_constraint('unique_user_role', 'user_roles', type_='unique')
-    op.drop_table('chat_items')
-    op.drop_table('chat_history')
-    # ### end Alembic commands ###
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -25,6 +25,8 @@ services:
    image: ollama/ollama:latest
    volumes:
      - ./models:/root/.ollama
+    ports:
+      - 11434:11434

  db:
    image: postgres:15-alpine
--- a/private_gpt/constants.py
+++ b/private_gpt/constants.py
@ -3,7 +3,12 @@ from pathlib import Path

 PROJECT_ROOT_PATH: Path = Path(__file__).parents[1]
 script_dir = os.path.dirname(os.path.abspath(__file__))
-UPLOAD_DIR = os.path.join(script_dir, "static/checked")  # Actual upload path for uploaded file
-UNCHECKED_DIR = os.path.join(script_dir, "static/unchecked")  # Actual upload path for uploaded file
+# Create directories if they don't exist
+UPLOAD_DIR = os.path.join(script_dir, "static/checked")
+os.makedirs(UPLOAD_DIR, exist_ok=True)  # Actual upload path for uploaded file

-OCR_UPLOAD = os.path.join(script_dir, 'uploads') # temporary upload path for scanned pdf file
+UNCHECKED_DIR = os.path.join(script_dir, "static/unchecked")
+os.makedirs(UNCHECKED_DIR, exist_ok=True)  # Actual upload path for uploaded file
+
+OCR_UPLOAD = os.path.join(script_dir, 'uploads')
+os.makedirs(OCR_UPLOAD, exist_ok=True)  # Temporary upload path for scanned pdf file
--- a/settings-docker.yaml
+++ b/settings-docker.yaml
@ -20,7 +20,7 @@ sagemaker:
  embedding_endpoint_name: ${PGPT_SAGEMAKER_EMBEDDING_ENDPOINT_NAME:}

 ollama:
-  llm_model: ${PGPT_OLLAMA_LLM_MODEL:llama3}
+  llm_model: ${PGPT_OLLAMA_LLM_MODEL:openhermes}
  embedding_model: ${PGPT_OLLAMA_EMBEDDING_MODEL:mxbai-embed-large}
  api_base: ${PGPT_OLLAMA_API_BASE:http://ollama:11434}
  tfs_z: ${PGPT_OLLAMA_TFS_Z:1.0}
--- a/settings-ollama-pg.yaml
+++ b/settings-ollama-pg.yaml
@ -14,9 +14,9 @@ embedding:
  embed_dim: 768

 ollama:
-  llm_model: llama3
+  llm_model: openhermes
  embedding_model: mxbai-embed-large
-  api_base: http://localhost:11434
+  api_base: http://ollama-1:11434

 nodestore:
  database: postgres
@ -25,7 +25,7 @@ vectorstore:
  database: postgres

 postgres:
-  host: localhost
+  host: db
  port: 5432
  database: postgres
  user: postgres
--- a/settings-ollama.yaml
+++ b/settings-ollama.yaml
@ -11,7 +11,7 @@ embedding:
  mode: ollama

 ollama:
-  llm_model: llama3
+  llm_model: openhermes
  embedding_model: mxbai-embed-large
  api_base: http://localhost:11434
  embedding_api_base: http://localhost:11434  # change if your embedding model runs on another ollama
--- a/settings.yaml
+++ b/settings.yaml
@ -49,7 +49,7 @@ rag:
  #similarity_value: 0.45
  #This value is disabled by default.  If you enable this settings, the RAG will only use articles that meet a certain percentage score.
  rerank:
-    enabled: true
+    enabled: false
    model: cross-encoder/ms-marco-MiniLM-L-2-v2
    top_n: 3

@ -99,10 +99,10 @@ openai:
  model: gpt-3.5-turbo

 ollama:
-  llm_model: llama3
+  llm_model: openhermes
  embedding_model: mxbai-embed-large
-  api_base: http://localhost:11434
-  embedding_api_base: http://localhost:11434  # change if your embedding model runs on another ollama
+  api_base: http://privategpt-ollama-1:11434
+  embedding_api_base: http://privategpt-ollama-1:11434  # change if your embedding model runs on another ollama
  keep_alive: 5m
  request_timeout: 120.0

--- a/sql_dump.sql
+++ b/sql_dump.sql