diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..2bcdfd92b --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +models/ diff --git a/.gitignore b/.gitignore index c4bbee8ba..359f7a03d 100644 --- a/.gitignore +++ b/.gitignore @@ -25,10 +25,10 @@ lib/ lib64/ parts/ sdist/ -models + var/ wheels/ -models/ +models/* pip-wheel-metadata/ share/python-wheels/ *.egg-info/ diff --git a/Dockerfile b/Dockerfile-llmserver similarity index 51% rename from Dockerfile rename to Dockerfile-llmserver index 297e5d8cc..c36567381 100644 --- a/Dockerfile +++ b/Dockerfile-llmserver @@ -10,10 +10,12 @@ WORKDIR /app COPY . /app -RUN pip install -r requirements.txt -EXPOSE 7860 +# upgrade pip +RUN pip3 install --upgrade pip + +RUN pip install --no-cache-dir -r requirements.txt + EXPOSE 8000 -CMD ["python", "pilot/server/llmserver.py"] -CMD ["python", "pilot/server/webserver.py"] +CMD ["python3", "pilot/server/llmserver.py"] diff --git a/Dockerfile-webserver b/Dockerfile-webserver new file mode 100644 index 000000000..c0f5a1f3e --- /dev/null +++ b/Dockerfile-webserver @@ -0,0 +1,21 @@ +FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 + +RUN apt-get update && apt-get install -y \ + git \ + python3 \ + pip + + +WORKDIR /app + +COPY . /app + + +# upgrade pip +RUN pip3 install --upgrade pip + +RUN pip install --no-cache-dir -r requirements.txt + +EXPOSE 7860 + +CMD ["python3", "pilot/server/webserver.py"] diff --git a/docker-compose.yml b/docker-compose.yml index 6938457c4..4a35b5a5e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,16 +13,31 @@ services: volumes: - my-db:/var/lib/mysql restart: unless-stopped - db-gpt: + webserver: build: context: . - dockerfile: Dockerfile - container_name: db-gpt + dockerfile: Dockerfile-webserver + environment: + - MODEL_SERVER=http://llmserver:8000 + - LOCAL_DB_HOST=db + env_file: + - .env ports: - 7860:7860 expose: - 7860 restart: unless-stopped + llmserver: + build: + context: . + dockerfile: Dockerfile-llmserver + volumes: + - ./models:/app/models + env_file: + - .env + ports: + - 8000:8000 + restart: unless-stopped deploy: resources: reservations: @@ -31,5 +46,7 @@ services: device_ids: ['1', '0'] capabilities: [gpu] + + volumes: my-db: