feat(KnowledgeBase):Add Word97-2003 (.doc) Binary File parsing module (#2544)

This commit is contained in:
geebytes
2025-03-29 11:05:06 +08:00
committed by GitHub
parent 7c05bc1784
commit c86243aa62
16 changed files with 2724 additions and 524 deletions

View File

@@ -1,8 +1,9 @@
FROM eosphorosai/dbgpt:latest
FROM eosphorosai/dbgpt-full:latest
ARG PYTHON_VERSION=3.11
ARG PIP_INDEX_URL="https://mirrors.aliyun.com/pypi/simple"
ARG PIP_INDEX_URL="https://pypi.tuna.tsinghua.edu.cn/simple"
ARG USERNAME
ARG DEFAULT_VEN=/opt/.uv.venv
ARG EXTRAS="base,proxy_openai,rag,storage_chromadb, storage_elasticsearch,cuda121,hf,quant_bnb,dbgpts"
ARG DEFAULT_VENV=/opt/.uv.venv
WORKDIR /app
COPY . .
USER root
@@ -11,30 +12,38 @@ USER root
# between the container user (root) and the host user,
# and to resolve the issue of the host user lacking write permissions.
RUN . .devcontainer/.env && \
groupadd -g $USER_GID $USERNAME && \
groupadd -g $USER_GID $GROUPNAME && \
useradd -u $USER_UID -g $USER_GID -m $USERNAME && \
chown -R $USER_UID:$USER_GID /app
RUN apt-get update && apt-get install -y \
git \
curl \
wget \
python${PYTHON_VERSION}-dev \
default-libmysqlclient-dev \
ssh zsh autojump curl git-flow vim sudo \
&& python${PYTHON_VERSION} -m pip install --upgrade pip \
&& python${PYTHON_VERSION} -m pip install --upgrade pipx \
&& pipx install -i $PIP_INDEX_URL uv --global \
&& chown -R $USERNAME:$USERNAME $DEFAULT_VEN \
&& chown -R $USERNAME:$GROUPNAME $DEFAULT_VENV \
&& echo "$USERNAME ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
&& chmod 0440 /etc/sudoers.d/$USERNAME
USER $USERNAME
ENV UV_LINK_MODE=copy \
PIP_INDEX_URL=$PIP_INDEX_URL \
VIRTUAL_ENV=$DEFAULT_VEN \
UV_PROJECT_ENVIRONMENT=$DEFAULT_VEN \
UV_PYTHON=$DEFAULT_VEN/bin/python3
VIRTUAL_ENV=$DEFAULT_VENV \
UV_PROJECT_ENVIRONMENT=$DEFAULT_VENV \
UV_PYTHON=$DEFAULT_VENV/bin/python3 \
UV_INDEX=$PIP_INDEX_URL \
UV_DEFAULT_INDEX=$PIP_INDEX_URL
RUN . $DEFAULT_VEN/bin/activate && \
uv pip install --prefix $VIRTUAL_ENV -r pyproject.toml --all-extras --index-url=$PIP_INDEX_URL && \
uv pip install --prefix $VIRTUAL_ENV -r requirements/dev-requirements.txt --index-url=$PIP_INDEX_URL && \
uv pip install --prefix $VIRTUAL_ENV -r requirements/lint-requirements.txt --index-url=$PIP_INDEX_URL && \
RUN sed -i "s|/app/\.venv|${FINAL_VENV_NAME}|g" /${DEFAULT_VENV}/bin/activate && \
pip config set global.index-url $PIP_INDEX_URL && \
pip config set global.trusted-host $(echo "$PIP_INDEX_URL" | sed -E 's|^https?://([^/]+).*|\1|') && \
. $DEFAULT_VENV/bin/activate && \
extras=$(echo $EXTRAS | tr ',' '\n' | while read extra; do echo "--extra $extra"; done | tr '\n' ' ') && \
uv sync -v --active --all-packages $extras --default-index $PIP_INDEX_URL && \
uv pip -v install --prefix $VIRTUAL_ENV -r requirements/dev-requirements.txt && \
uv pip -v install --prefix $VIRTUAL_ENV -r requirements/lint-requirements.txt && \
cp .devcontainer/dbgpt.pth /opt/.uv.venv/lib/python${PYTHON_VERSION}/site-packages/dbgpt.pth && \
python -c "import dbgpt; print(dbgpt.__version__)"

36
.devcontainer/README.md Normal file
View File

@@ -0,0 +1,36 @@
# Developing inside a Container
Use VS Code's Dev Container extension to build a containerized development environment. Leverage the eosphorosai/dbgpt:latest image as the development environment to avoid repeated dependency installations and improve development efficiency.
NOTE: **Compatible with Linux and Windows Subsystem for Linux (WSL) environments only.**
# Setup
- Follow the guide [Developing inside a Container](https://code.visualstudio.com/docs/devcontainers/containers) to set up the Dev Container:
- Install the **Dev Containers** extension.
- Before the first launch, please execute the .devcontainer/init_env.sh script in the project root directory in **host**
- Create `models` dir in project root and download text2vec-large-chinese to models/text2vec-large-chinese
- Use the shortcut `Ctrl+Shift+P` to open the command palette, then enter `Dev Containers: Open Folder in Container`.
# Develop
After successfully starting the Dev Container, open the terminal
- Activate the virtual environment
```bash
. /opt/.uv.venv/bin/activate
```
- Customize the configuration file
You can copy the configuration file to the `.devcontainer` directory and rename it to `dev.toml` to avoid committing your personal configurations to the repository.
```bash
cp configs/dbgpt-app-config.example.toml .devcontainer/dev.toml
```
- Start the service
```bash
dbgpt start webserver --config .devcontainer/dev.toml
```
# Create A Pull Request
Please refer to [CONTRIBUTING.md](../CONTRIBUTING.md). Before executing the make script or git commit, remember to deactivate the current virtual environment in the development environment.

View File

@@ -20,10 +20,19 @@ printf "OS=%s\nUSERNAME=%s\nUSER_UID=%s\nGROUPNAME=%s\nUSER_GID=%s\n" \
# sharing-git-credentials see https://code.visualstudio.com/remote/advancedcontainers/sharing-git-credentials
init_ssh_agent(){
if [[ -z "$SSH_AUTH_SOCK" || ! -S "$SSH_AUTH_SOCK" ]]; then
RUNNING_AGENT="$(ps -ax | grep '''ssh-agent -s''' | grep -v grep | wc -l)"
if [ "$RUNNING_AGENT" = "0" ]; then
ssh-agent -s &> $HOME/.ssh/ssh-agent
fi
eval $(cat $HOME/.ssh/ssh-agent) > /dev/null
ssh-add 2> /dev/null
echo $SSH_AUTH_SOCK
fi
# Define code block to insert (with unique identifier comment)
SSH_AGENT_CODE='# SSH Agent Auto Management[ID:ssh_agent_v1]
if [ -z "$SSH_AUTH_SOCK" ]; then
RUNNING_AGENT="$(ps -ax | grep '\''ssh-agent -s'\'' | grep -v grep | wc -l | tr -d '\''[:space:]'\'')"
if [[ -z "$SSH_AUTH_SOCK" || ! -S "$SSH_AUTH_SOCK" ]]; then
RUNNING_AGENT="$(ps -ax | grep '\''ssh-agent -s'\'' | grep -v grep | wc -l)"
if [ "$RUNNING_AGENT" = "0" ]; then
ssh-agent -s &> $HOME/.ssh/ssh-agent
fi
@@ -32,11 +41,7 @@ if [ -z "$SSH_AUTH_SOCK" ]; then
fi
# END_SSH_AGENT_CODE'
# Auto detect shell type
TARGET_FILE="$HOME/.bashrc"
if [[ "$SHELL" == *"zsh"* ]]; then
TARGET_FILE="$HOME/.zshrc"
fi
# Create .ssh directory if not exists
mkdir -p "$HOME/.ssh"
@@ -45,6 +50,9 @@ mkdir -p "$HOME/.ssh"
if ! grep -q 'END_SSH_AGENT_CODE' "$TARGET_FILE"; then
echo "Adding SSH agent management code to ${TARGET_FILE}..."
echo "$SSH_AGENT_CODE" >> "$TARGET_FILE"
if [[ "$SHELL" == *"zsh"* ]]; then
echo "$SSH_AGENT_CODE" >> "$HOME/.zshrc"
fi
echo "Code added successfully. Please run source ${TARGET_FILE} to apply changes immediately"
else
echo "Existing SSH agent code detected, no need to add again"

View File

@@ -35,8 +35,8 @@ fi
# Configuration section remains the same...
# Apply custom configuration
if [ -f /workspace/.devcontainer/zshrc-config ]; then
cp /workspace/.devcontainer/zshrc-config ~/.zshrc
if [ -f /app/.devcontainer/zshrc-config ]; then
cp /app/.devcontainer/zshrc-config ~/.zshrc
else
# Generate basic .zshrc if no custom configuration exists
cat << EOF >> ~/.zshrc