feat: Support 8-bit quantization and 4-bit quantization for multi-gpu inference

This commit is contained in:
FangYin Cheng
2023-08-02 15:51:57 +08:00
parent e16a5ccfc9
commit d8a4b776d5
8 changed files with 368 additions and 93 deletions

View File

@@ -1,25 +1,48 @@
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
ARG BASE_IMAGE="nvidia/cuda:11.8.0-devel-ubuntu22.04"
FROM ${BASE_IMAGE}
ARG BASE_IMAGE
RUN apt-get update && apt-get install -y git python3 pip wget \
&& apt-get clean
# download code from githu: https://github.com/csunny/DB-GPT
# ENV DBGPT_VERSION="v0.3.3"
# RUN wget https://github.com/csunny/DB-GPT/archive/refs/tags/$DBGPT_VERSION.zip
ARG BUILD_LOCAL_CODE="false"
ARG LANGUAGE="en"
ARG PIP_INDEX_URL="https://pypi.org/simple"
ENV PIP_INDEX_URL=$PIP_INDEX_URL
# clone latest code, and rename to /app
RUN git clone https://github.com/csunny/DB-GPT.git /app
# COPY only requirements.txt first to leverage Docker cache
COPY ./requirements.txt /tmp/requirements.txt
WORKDIR /app
RUN pip3 install --upgrade pip \
&& pip3 install --no-cache-dir -r requirements.txt \
&& pip3 install seaborn mpld3 \
&& wget https://github.com/explosion/spacy-models/releases/download/zh_core_web_sm-3.5.0/zh_core_web_sm-3.5.0-py3-none-any.whl -O /tmp/zh_core_web_sm-3.5.0-py3-none-any.whl \
&& pip3 install /tmp/zh_core_web_sm-3.5.0-py3-none-any.whl \
&& rm /tmp/zh_core_web_sm-3.5.0-py3-none-any.whl \
&& rm -rf `pip3 cache dir`
RUN pip3 install --upgrade pip -i $PIP_INDEX_URL \
&& (if [ "${BUILD_LOCAL_CODE}" = "false" ]; \
# if not build local code, clone latest code from git, and rename to /app, TODO: download by version, like: https://github.com/eosphoros-ai/DB-GPT/archive/refs/tags/$DBGPT_VERSION.zip
then git clone https://github.com/eosphoros-ai/DB-GPT.git /app \
&& cp /app/requirements.txt /tmp/requirements.txt; \
fi;) \
&& pip3 install -r /tmp/requirements.txt -i $PIP_INDEX_URL --no-cache-dir \
&& rm /tmp/requirements.txt
# RUN python3 -m spacy download zh_core_web_sm
RUN (if [ "${LANGUAGE}" = "zh" ]; \
# language is zh, download zh_core_web_sm from github
then wget https://github.com/explosion/spacy-models/releases/download/zh_core_web_sm-3.5.0/zh_core_web_sm-3.5.0-py3-none-any.whl -O /tmp/zh_core_web_sm-3.5.0-py3-none-any.whl \
&& pip3 install /tmp/zh_core_web_sm-3.5.0-py3-none-any.whl -i $PIP_INDEX_URL \
&& rm /tmp/zh_core_web_sm-3.5.0-py3-none-any.whl; \
# not zh, download directly
else python3 -m spacy download zh_core_web_sm; \
fi;) \
&& rm -rf `pip3 cache dir`
ARG BUILD_LOCAL_CODE="false"
# COPY the rest of the app
COPY . /tmp/app
# TODONeed to find a better way to determine whether to build docker image with local code.
RUN (if [ "${BUILD_LOCAL_CODE}" = "true" ]; \
then mv /tmp/app / && rm -rf /app/logs && rm -rf /app/pilot/data && rm -rf /app/pilot/message; \
else rm -rf /tmp/app; \
fi;)
EXPOSE 5000