mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-13 13:10:29 +00:00
feat(KnowledgeBase):Add Word97-2003 (.doc) Binary File parsing module (#2544)
This commit is contained in:
@@ -1,8 +1,9 @@
|
||||
FROM eosphorosai/dbgpt:latest
|
||||
FROM eosphorosai/dbgpt-full:latest
|
||||
ARG PYTHON_VERSION=3.11
|
||||
ARG PIP_INDEX_URL="https://mirrors.aliyun.com/pypi/simple"
|
||||
ARG PIP_INDEX_URL="https://pypi.tuna.tsinghua.edu.cn/simple"
|
||||
ARG USERNAME
|
||||
ARG DEFAULT_VEN=/opt/.uv.venv
|
||||
ARG EXTRAS="base,proxy_openai,rag,storage_chromadb, storage_elasticsearch,cuda121,hf,quant_bnb,dbgpts"
|
||||
ARG DEFAULT_VENV=/opt/.uv.venv
|
||||
WORKDIR /app
|
||||
COPY . .
|
||||
USER root
|
||||
@@ -11,30 +12,38 @@ USER root
|
||||
# between the container user (root) and the host user,
|
||||
# and to resolve the issue of the host user lacking write permissions.
|
||||
RUN . .devcontainer/.env && \
|
||||
groupadd -g $USER_GID $USERNAME && \
|
||||
groupadd -g $USER_GID $GROUPNAME && \
|
||||
useradd -u $USER_UID -g $USER_GID -m $USERNAME && \
|
||||
chown -R $USER_UID:$USER_GID /app
|
||||
RUN apt-get update && apt-get install -y \
|
||||
git \
|
||||
curl \
|
||||
wget \
|
||||
python${PYTHON_VERSION}-dev \
|
||||
default-libmysqlclient-dev \
|
||||
ssh zsh autojump curl git-flow vim sudo \
|
||||
&& python${PYTHON_VERSION} -m pip install --upgrade pip \
|
||||
&& python${PYTHON_VERSION} -m pip install --upgrade pipx \
|
||||
&& pipx install -i $PIP_INDEX_URL uv --global \
|
||||
&& chown -R $USERNAME:$USERNAME $DEFAULT_VEN \
|
||||
&& chown -R $USERNAME:$GROUPNAME $DEFAULT_VENV \
|
||||
&& echo "$USERNAME ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
|
||||
&& chmod 0440 /etc/sudoers.d/$USERNAME
|
||||
USER $USERNAME
|
||||
ENV UV_LINK_MODE=copy \
|
||||
PIP_INDEX_URL=$PIP_INDEX_URL \
|
||||
VIRTUAL_ENV=$DEFAULT_VEN \
|
||||
UV_PROJECT_ENVIRONMENT=$DEFAULT_VEN \
|
||||
UV_PYTHON=$DEFAULT_VEN/bin/python3
|
||||
VIRTUAL_ENV=$DEFAULT_VENV \
|
||||
UV_PROJECT_ENVIRONMENT=$DEFAULT_VENV \
|
||||
UV_PYTHON=$DEFAULT_VENV/bin/python3 \
|
||||
UV_INDEX=$PIP_INDEX_URL \
|
||||
UV_DEFAULT_INDEX=$PIP_INDEX_URL
|
||||
|
||||
RUN . $DEFAULT_VEN/bin/activate && \
|
||||
uv pip install --prefix $VIRTUAL_ENV -r pyproject.toml --all-extras --index-url=$PIP_INDEX_URL && \
|
||||
uv pip install --prefix $VIRTUAL_ENV -r requirements/dev-requirements.txt --index-url=$PIP_INDEX_URL && \
|
||||
uv pip install --prefix $VIRTUAL_ENV -r requirements/lint-requirements.txt --index-url=$PIP_INDEX_URL && \
|
||||
RUN sed -i "s|/app/\.venv|${FINAL_VENV_NAME}|g" /${DEFAULT_VENV}/bin/activate && \
|
||||
pip config set global.index-url $PIP_INDEX_URL && \
|
||||
pip config set global.trusted-host $(echo "$PIP_INDEX_URL" | sed -E 's|^https?://([^/]+).*|\1|') && \
|
||||
. $DEFAULT_VENV/bin/activate && \
|
||||
extras=$(echo $EXTRAS | tr ',' '\n' | while read extra; do echo "--extra $extra"; done | tr '\n' ' ') && \
|
||||
uv sync -v --active --all-packages $extras --default-index $PIP_INDEX_URL && \
|
||||
uv pip -v install --prefix $VIRTUAL_ENV -r requirements/dev-requirements.txt && \
|
||||
uv pip -v install --prefix $VIRTUAL_ENV -r requirements/lint-requirements.txt && \
|
||||
cp .devcontainer/dbgpt.pth /opt/.uv.venv/lib/python${PYTHON_VERSION}/site-packages/dbgpt.pth && \
|
||||
python -c "import dbgpt; print(dbgpt.__version__)"
|
36
.devcontainer/README.md
Normal file
36
.devcontainer/README.md
Normal file
@@ -0,0 +1,36 @@
|
||||
# Developing inside a Container
|
||||
Use VS Code's Dev Container extension to build a containerized development environment. Leverage the eosphorosai/dbgpt:latest image as the development environment to avoid repeated dependency installations and improve development efficiency.
|
||||
NOTE: **Compatible with Linux and Windows Subsystem for Linux (WSL) environments only.**
|
||||
# Setup
|
||||
|
||||
- Follow the guide [Developing inside a Container](https://code.visualstudio.com/docs/devcontainers/containers) to set up the Dev Container:
|
||||
- Install the **Dev Containers** extension.
|
||||
|
||||
- Before the first launch, please execute the .devcontainer/init_env.sh script in the project root directory in **host**
|
||||
- Create `models` dir in project root and download text2vec-large-chinese to models/text2vec-large-chinese
|
||||
- Use the shortcut `Ctrl+Shift+P` to open the command palette, then enter `Dev Containers: Open Folder in Container`.
|
||||
|
||||
# Develop
|
||||
After successfully starting the Dev Container, open the terminal
|
||||
|
||||
- Activate the virtual environment
|
||||
```bash
|
||||
. /opt/.uv.venv/bin/activate
|
||||
```
|
||||
|
||||
- Customize the configuration file
|
||||
|
||||
You can copy the configuration file to the `.devcontainer` directory and rename it to `dev.toml` to avoid committing your personal configurations to the repository.
|
||||
```bash
|
||||
cp configs/dbgpt-app-config.example.toml .devcontainer/dev.toml
|
||||
```
|
||||
|
||||
- Start the service
|
||||
|
||||
```bash
|
||||
dbgpt start webserver --config .devcontainer/dev.toml
|
||||
```
|
||||
|
||||
# Create A Pull Request
|
||||
|
||||
Please refer to [CONTRIBUTING.md](../CONTRIBUTING.md). Before executing the make script or git commit, remember to deactivate the current virtual environment in the development environment.
|
@@ -20,10 +20,19 @@ printf "OS=%s\nUSERNAME=%s\nUSER_UID=%s\nGROUPNAME=%s\nUSER_GID=%s\n" \
|
||||
|
||||
# sharing-git-credentials see https://code.visualstudio.com/remote/advancedcontainers/sharing-git-credentials
|
||||
init_ssh_agent(){
|
||||
if [[ -z "$SSH_AUTH_SOCK" || ! -S "$SSH_AUTH_SOCK" ]]; then
|
||||
RUNNING_AGENT="$(ps -ax | grep '''ssh-agent -s''' | grep -v grep | wc -l)"
|
||||
if [ "$RUNNING_AGENT" = "0" ]; then
|
||||
ssh-agent -s &> $HOME/.ssh/ssh-agent
|
||||
fi
|
||||
eval $(cat $HOME/.ssh/ssh-agent) > /dev/null
|
||||
ssh-add 2> /dev/null
|
||||
echo $SSH_AUTH_SOCK
|
||||
fi
|
||||
# Define code block to insert (with unique identifier comment)
|
||||
SSH_AGENT_CODE='# SSH Agent Auto Management[ID:ssh_agent_v1]
|
||||
if [ -z "$SSH_AUTH_SOCK" ]; then
|
||||
RUNNING_AGENT="$(ps -ax | grep '\''ssh-agent -s'\'' | grep -v grep | wc -l | tr -d '\''[:space:]'\'')"
|
||||
if [[ -z "$SSH_AUTH_SOCK" || ! -S "$SSH_AUTH_SOCK" ]]; then
|
||||
RUNNING_AGENT="$(ps -ax | grep '\''ssh-agent -s'\'' | grep -v grep | wc -l)"
|
||||
if [ "$RUNNING_AGENT" = "0" ]; then
|
||||
ssh-agent -s &> $HOME/.ssh/ssh-agent
|
||||
fi
|
||||
@@ -32,11 +41,7 @@ if [ -z "$SSH_AUTH_SOCK" ]; then
|
||||
fi
|
||||
# END_SSH_AGENT_CODE'
|
||||
|
||||
# Auto detect shell type
|
||||
TARGET_FILE="$HOME/.bashrc"
|
||||
if [[ "$SHELL" == *"zsh"* ]]; then
|
||||
TARGET_FILE="$HOME/.zshrc"
|
||||
fi
|
||||
|
||||
# Create .ssh directory if not exists
|
||||
mkdir -p "$HOME/.ssh"
|
||||
@@ -45,6 +50,9 @@ mkdir -p "$HOME/.ssh"
|
||||
if ! grep -q 'END_SSH_AGENT_CODE' "$TARGET_FILE"; then
|
||||
echo "Adding SSH agent management code to ${TARGET_FILE}..."
|
||||
echo "$SSH_AGENT_CODE" >> "$TARGET_FILE"
|
||||
if [[ "$SHELL" == *"zsh"* ]]; then
|
||||
echo "$SSH_AGENT_CODE" >> "$HOME/.zshrc"
|
||||
fi
|
||||
echo "Code added successfully. Please run source ${TARGET_FILE} to apply changes immediately"
|
||||
else
|
||||
echo "Existing SSH agent code detected, no need to add again"
|
||||
|
@@ -35,8 +35,8 @@ fi
|
||||
|
||||
# Configuration section remains the same...
|
||||
# Apply custom configuration
|
||||
if [ -f /workspace/.devcontainer/zshrc-config ]; then
|
||||
cp /workspace/.devcontainer/zshrc-config ~/.zshrc
|
||||
if [ -f /app/.devcontainer/zshrc-config ]; then
|
||||
cp /app/.devcontainer/zshrc-config ~/.zshrc
|
||||
else
|
||||
# Generate basic .zshrc if no custom configuration exists
|
||||
cat << EOF >> ~/.zshrc
|
||||
|
Reference in New Issue
Block a user