feat(ChatExcel): ChatExcel Use AntV Chart

1.Native Support AntvChart
2025-10-30 06:08:57 +00:00 · 2023-11-10 16:50:44 +08:00
parent 8df671c6db 8b1c73f6c5
commit 9b4b935f31
152 changed files with 7524 additions and 924 deletions
--- a/.env.template
+++ b/.env.template
@@ -23,6 +23,15 @@ WEB_SERVER_PORT=7860
 #*******************************************************************#
 # LLM_MODEL, see /pilot/configs/model_config.LLM_MODEL_CONFIG
 LLM_MODEL=vicuna-13b-v1.5
+## LLM model path, by default, DB-GPT will read the model path from LLM_MODEL_CONFIG based on the LLM_MODEL.
+## Of course you can specify your model path according to LLM_MODEL_PATH
+## In DB-GPT, the priority from high to low to read model path:
+##    1. environment variable with key: {LLM_MODEL}_MODEL_PATH (Avoid multi-model conflicts)
+##    2. environment variable with key: MODEL_PATH
+##    3. environment variable with key: LLM_MODEL_PATH
+##    4. the config in /pilot/configs/model_config.LLM_MODEL_CONFIG
+# LLM_MODEL_PATH=/app/models/vicuna-13b-v1.5
+# LLM_PROMPT_TEMPLATE=vicuna_v1.1
 MODEL_SERVER=http://127.0.0.1:8000
 LIMIT_MODEL_CONCURRENCY=5
 MAX_POSITION_EMBEDDINGS=4096
--- a/126
+++ b/126
@@ -0,0 +1,126 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, caste, color, religion, or sexual
+identity and orientation.
+
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+
+## Our Standards
+
+Examples of behavior that contributes to a positive environment for our
+community include:
+
+* Demonstrating empathy and kindness toward other people
+* Being respectful of differing opinions, viewpoints, and experiences
+* Giving and gracefully accepting constructive feedback
+* Accepting responsibility and apologizing to those affected by our mistakes,
+  and learning from the experience
+* Focusing on what is best not just for us as individuals, but for the overall
+  community
+
+Examples of unacceptable behavior include:
+
+* The use of sexualized language or imagery, and sexual attention or advances of
+  any kind
+* Trolling, insulting or derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or email address,
+  without their explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Enforcement Responsibilities
+
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive,
+or harmful.
+
+Community leaders have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, and will communicate reasons for moderation
+decisions when appropriate.
+
+## Scope
+
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+Examples of representing our community include using an official e-mail address,
+posting via an official social media account, or acting as an appointed
+representative at an online or offline event.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the community leaders responsible for enforcement at
+[INSERT CONTACT METHOD].
+All complaints will be reviewed and investigated promptly and fairly.
+
+All community leaders are obligated to respect the privacy and security of the
+reporter of any incident.
+
+## Enforcement Guidelines
+
+Community leaders will follow these Community Impact Guidelines in determining
+the consequences for any action they deem in violation of this Code of Conduct:
+
+### 1. Correction
+
+*Community Impact*: Use of inappropriate language or other behavior deemed
+unprofessional or unwelcome in the community.
+
+*Consequence*: A private, written warning from community leaders, providing
+clarity around the nature of the violation and an explanation of why the
+behavior was inappropriate. A public apology may be requested.
+
+### 2. Warning
+
+*Community Impact*: A violation through a single incident or series of
+actions.
+
+*Consequence*: A warning with consequences for continued behavior. No
+interaction with the people involved, including unsolicited interaction with
+those enforcing the Code of Conduct, for a specified period of time. This
+includes avoiding interactions in community spaces as well as external channels
+like social media. Violating these terms may lead to a temporary or permanent
+ban.
+
+### 3. Temporary Ban
+
+*Community Impact*: A serious violation of community standards, including
+sustained inappropriate behavior.
+
+*Consequence*: A temporary ban from any sort of interaction or public
+communication with the community for a specified period of time. No public or
+private interaction with the people involved, including unsolicited interaction
+with those enforcing the Code of Conduct, is allowed during this period.
+Violating these terms may lead to a permanent ban.
+
+### 4. Permanent Ban
+
+*Community Impact*: Demonstrating a pattern of violation of community
+standards, including sustained inappropriate behavior, harassment of an
+individual, or aggression toward or disparagement of classes of individuals.
+
+*Consequence*: A permanent ban from any sort of public interaction within the
+community.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.1, available at
+[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
+
+Community Impact Guidelines were inspired by
+[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
+
+For answers to common questions about this code of conduct, see the FAQ at
+[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
+[https://www.contributor-covenant.org/translations][translations].
--- a/README.md
+++ b/README.md
@@ -25,8 +25,8 @@
    <a href="https://github.com/eosphoros-ai/DB-GPT/issues">
      <img alt="Open Issues" src="https://img.shields.io/github/issues-raw/eosphoros-ai/DB-GPT" />
    </a>
-    <a href="https://discord.gg/vqBrcV7Nd">
-      <img alt="Discord" src="https://dcbadge.vercel.app/api/server/vqBrcV7Nd?compact=true&style=flat" />
+    <a href="https://discord.gg/nASQyBjvY">
+      <img alt="Discord" src="https://dcbadge.vercel.app/api/server/nASQyBjvY?compact=true&style=flat" />
    </a>
    <a href="https://codespaces.new/eosphoros-ai/DB-GPT">
      <img alt="Open in GitHub Codespaces" src="https://github.com/codespaces/badge.svg" />
@@ -34,7 +34,7 @@
  </p>


-[**简体中文**](README.zh.md) |[**Discord**](https://discord.gg/vqBrcV7Nd) |[**Documents**](https://db-gpt.readthedocs.io/en/latest/)|[**Wechat**](https://github.com/eosphoros-ai/DB-GPT/blob/main/README.zh.md#%E8%81%94%E7%B3%BB%E6%88%91%E4%BB%AC)|[**Community**](https://github.com/eosphoros-ai/community)
+[**简体中文**](README.zh.md) |[**Discord**](https://discord.gg/nASQyBjvY) |[**Documents**](https://db-gpt.readthedocs.io/en/latest/)|[**Wechat**](https://github.com/eosphoros-ai/DB-GPT/blob/main/README.zh.md#%E8%81%94%E7%B3%BB%E6%88%91%E4%BB%AC)|[**Community**](https://github.com/eosphoros-ai/community)
 </div>

 ## What is DB-GPT?
@@ -43,8 +43,8 @@ DB-GPT is an experimental open-source project that uses localized GPT large mode


 ## Contents
- [install](#install)
- [demo](#demo)
+- [Install](#install)
+- [Demo](#demo)
 - [introduction](#introduction)
 - [features](#features)
 - [contribution](#contribution)
@@ -75,8 +75,8 @@ Run on an RTX 4090 GPU.
 ![Windows](https://img.shields.io/badge/Windows-0078D6?style=for-the-badge&logo=windows&logoColor=white)

 [**Usage Tutorial**](https://db-gpt.readthedocs.io/en/latest/getting_started/install/deploy/deploy.html)
- [**Install**](https://db-gpt.readthedocs.io/en/latest/getting_started/install/deploy/deploy.html)
-  - [**Install Step by Step**](https://db-gpt.readthedocs.io/en/latest/getting_started/install/deploy/deploy.html)
+- [**Install**](https://db-gpt.readthedocs.io/en/latest/getting_started/install/deploy.html)
+  - [**Install Step by Step**](https://db-gpt.readthedocs.io/en/latest/getting_started/install/deploy.html)
  - [**Docker Install**](https://db-gpt.readthedocs.io/en/latest/getting_started/install/docker/docker.html)
  - [**Docker Compose**](https://db-gpt.readthedocs.io/en/latest/getting_started/install/docker_compose/docker_compose.html)
 - [**How to Use**](https://db-gpt.readthedocs.io/en/latest/getting_started/application/chatdb/chatdb.html)
@@ -107,11 +107,11 @@ Currently, we have released multiple key features, which are listed below to dem

 - Multi-Agents&Plugins

-  Supports custom plug-ins to perform tasks, natively supports the Auto-GPT plug-in model, and the Agents protocol adopts the Agent Protocol standard
+  It supports custom plug-ins to perform tasks, natively supports the Auto-GPT plug-in model, and the Agents protocol adopts the Agent Protocol standard.

 - Fine-tuning text2SQL

-  An automated fine-tuning lightweight framework built around large language models, Text2SQL data sets, LoRA/QLoRA/Pturning and other fine-tuning methods, making TextSQL fine-tuning as convenient as an assembly line. [DB-GPT-Hub](https://github.com/eosphoros-ai/DB-GPT-Hub)
+  An automated fine-tuning lightweight framework built around large language models, Text2SQL data sets, LoRA/QLoRA/Pturning, and other fine-tuning methods, making TextSQL fine-tuning as convenient as an assembly line. [DB-GPT-Hub](https://github.com/eosphoros-ai/DB-GPT-Hub)

 - Multi LLMs Support, Supports multiple large language models, currently supporting

@@ -141,7 +141,7 @@ Currently, we have released multiple key features, which are listed below to dem
  - [Llama2-Chinese-13b-Chat](https://huggingface.co/FlagAlpha/Llama2-Chinese-13b-Chat)
  - [OpenLLaMa OpenInstruct](https://huggingface.co/VMware/open-llama-7b-open-instruct)

-  etc.
+  Etc.

  - Support API Proxy LLMs
    - [x] [ChatGPT](https://api.openai.com/)
@@ -151,7 +151,7 @@ Currently, we have released multiple key features, which are listed below to dem

 - Privacy and security
  
-  The privacy and security of data are ensured through various technologies such as privatized large models and proxy desensitization.
+  The privacy and security of data are ensured through various technologies, such as privatized large models and proxy desensitization.

 - Support Datasources

@@ -177,7 +177,7 @@ Currently, we have released multiple key features, which are listed below to dem
 | [StarRocks](https://github.com/StarRocks/starrocks)                             | No          |           TODO                              |

 ## Introduction 
-Is the architecture of the entire DB-GPT shown in the following figure:
+The architecture of the entire DB-GPT is shown.

 <p align="center">
  <img src="./assets/DB-GPT.png" width="800" />
@@ -185,7 +185,7 @@ Is the architecture of the entire DB-GPT shown in the following figure:

 The core capabilities mainly consist of the following parts:
 1. Multi-Models: Support multi-LLMs, such as LLaMA/LLaMA2、CodeLLaMA、ChatGLM, QWen、Vicuna and proxy model ChatGPT、Baichuan、tongyi、wenxin etc
-2. Knowledge Based QA: You can perform high-quality intelligent Q&A based on local documents such as pdf, word, excel and other data.
+2. Knowledge-Based QA: You can perform high-quality intelligent Q&A based on local documents such as PDF, word, excel, and other data.
 3. Embedding: Unified data vector storage and indexing, Embed data as vectors and store them in vector databases, providing content similarity search.
 4. Multi-Datasources: Used to connect different modules and data sources to achieve data flow and interaction. 
 5. Multi-Agents: Provides Agent and plugin mechanisms, allowing users to customize and enhance the system's behavior.
@@ -199,7 +199,7 @@ The core capabilities mainly consist of the following parts:

 ### SubModule
 - [DB-GPT-Hub](https://github.com/eosphoros-ai/DB-GPT-Hub) Text-to-SQL performance by applying Supervised Fine-Tuning (SFT) on large language models.
- [DB-GPT-Plugins](https://github.com/eosphoros-ai/DB-GPT-Plugins) DB-GPT Plugins, Can run autogpt plugin directly
+- [DB-GPT-Plugins](https://github.com/eosphoros-ai/DB-GPT-Plugins) DB-GPT Plugins Can run autogpt plugin directly
 - [DB-GPT-Web](https://github.com/eosphoros-ai/DB-GPT-Web)  ChatUI for DB-GPT  

 ## Image
@@ -213,7 +213,7 @@ The core capabilities mainly consist of the following parts:

 ## Contribution

- Please run `black .` before submitting the code. contributing guidelines, [how to contribution](https://github.com/csunny/DB-GPT/blob/main/CONTRIBUTING.md)
+- Please run `black .` before submitting the code. Contributing guidelines, [how to contribute](https://github.com/csunny/DB-GPT/blob/main/CONTRIBUTING.md)

 ## RoadMap

@@ -224,7 +224,7 @@ The core capabilities mainly consist of the following parts:
 ### KBQA RAG optimization
 - [x] Multi Documents
  - [x] PDF
-  - [x] Excel, csv
+  - [x] Excel, CSV
  - [x] Word
  - [x] Text
  - [x] MarkDown
@@ -235,7 +235,7 @@ The core capabilities mainly consist of the following parts:
 - [ ] Graph Database
  - [ ] Neo4j Graph
  - [ ] Nebula Graph
- [x] Multi Vector Database
+- [x] Multi-Vector Database
  - [x] Chroma
  - [x] Milvus
  - [x] Weaviate
@@ -330,8 +330,8 @@ As of October 10, 2023, by fine-tuning an open-source model of 13 billion parame
 The MIT License (MIT)

 ## Contact Information
-We are working on building a community, if you have any ideas about building the community, feel free to contact us.
-[![](https://dcbadge.vercel.app/api/server/vqBrcV7Nd?compact=true&style=flat)](https://discord.gg/vqBrcV7Nd)
+We are working on building a community, if you have any ideas for building the community, feel free to contact us.
+[![](https://dcbadge.vercel.app/api/server/nASQyBjvY?compact=true&style=flat)](https://discord.gg/nASQyBjvY)

 <p align="center">
  <img src="./assets/wechat.jpg" width="300px" />
--- a/README.zh.md
+++ b/README.zh.md
@@ -22,15 +22,15 @@
    <a href="https://github.com/eosphoros-ai/DB-GPT/issues">
      <img alt="Open Issues" src="https://img.shields.io/github/issues-raw/csunny/DB-GPT" />
    </a>
-    <a href="https://discord.gg/vqBrcV7Nd">
-      <img alt="Discord" src="https://dcbadge.vercel.app/api/server/vqBrcV7Nd?compact=true&style=flat" />
+    <a href="https://discord.gg/nASQyBjvY">
+      <img alt="Discord" src="https://dcbadge.vercel.app/api/server/nASQyBjvY?compact=true&style=flat" />
    </a>
    <a href="https://codespaces.new/eosphoros-ai/DB-GPT">
      <img alt="Open in GitHub Codespaces" src="https://github.com/codespaces/badge.svg" />
    </a>
  </p>

-[**English**](README.md)|[**Discord**](https://discord.gg/vqBrcV7Nd)|[**文档**](https://db-gpt.readthedocs.io/projects/db-gpt-docs-zh-cn/zh_CN/latest/)|[**微信**](https://github.com/csunny/DB-GPT/blob/main/README.zh.md#%E8%81%94%E7%B3%BB%E6%88%91%E4%BB%AC)|[**社区**](https://github.com/eosphoros-ai/community)
+[**English**](README.md)|[**Discord**](https://discord.gg/nASQyBjvY)|[**文档**](https://db-gpt.readthedocs.io/projects/db-gpt-docs-zh-cn/zh_CN/latest/)|[**微信**](https://github.com/csunny/DB-GPT/blob/main/README.zh.md#%E8%81%94%E7%B3%BB%E6%88%91%E4%BB%AC)|[**社区**](https://github.com/eosphoros-ai/community)
 </div>

 ## DB-GPT 是什么？
@@ -91,9 +91,9 @@ DB-GPT 是一个开源的以数据库为基础的GPT实验项目，使用本地
 ![macOS](https://img.shields.io/badge/mac%20os-000000?style=for-the-badge&logo=macos&logoColor=F0F0F0)
 ![Windows](https://img.shields.io/badge/Windows-0078D6?style=for-the-badge&logo=windows&logoColor=white)

-[**教程**](https://db-gpt.readthedocs.io/en/latest/getting_started/install/deploy/deploy.html)
- [**安装**](https://db-gpt.readthedocs.io/projects/db-gpt-docs-zh-cn/zh_CN/latest/getting_started/install/deploy/deploy.html)
-  - [**Install Step by Step**](https://db-gpt.readthedocs.io/projects/db-gpt-docs-zh-cn/zh_CN/latest/getting_started/install/deploy/deploy.html)
+[**教程**](https://db-gpt.readthedocs.io/projects/db-gpt-docs-zh-cn/zh-cn/latest/getting_started/install/deploy.html)
+- [**安装**](https://db-gpt.readthedocs.io/projects/db-gpt-docs-zh-cn/zh-cn/latest/getting_started/install/deploy.html)
+  - [**Install Step by Step**](https://db-gpt.readthedocs.io/projects/db-gpt-docs-zh-cn/zh-cn/latest/getting_started/install/deploy.html)
  - [**Docker安装**](https://db-gpt.readthedocs.io/projects/db-gpt-docs-zh-cn/zh_CN/latest/getting_started/install/docker/docker.html)
  - [**Docker Compose安装**](https://db-gpt.readthedocs.io/projects/db-gpt-docs-zh-cn/zh_CN/latest/getting_started/install/docker_compose/docker_compose.html)
 - [**产品使用手册**](https://db-gpt.readthedocs.io/projects/db-gpt-docs-zh-cn/zh_CN/latest/getting_started/application/chatdb/chatdb.html)
--- a/assets/wechat.jpg
+++ b/assets/wechat.jpg
--- a/docker/base/Dockerfile
+++ b/docker/base/Dockerfile
@@ -28,7 +28,9 @@ WORKDIR /app
 # RUN pip3 install -i $PIP_INDEX_URL ".[all]"

 RUN pip3 install --upgrade pip -i $PIP_INDEX_URL \
-    && pip3 install -i $PIP_INDEX_URL ".[$DB_GPT_INSTALL_MODEL]"
+    && pip3 install -i $PIP_INDEX_URL ".[$DB_GPT_INSTALL_MODEL]" \
+    # install openai for proxyllm
+    && pip3 install -i $PIP_INDEX_URL ".[openai]"

 RUN (if [ "${LANGUAGE}" = "zh" ]; \
        # language is zh, download zh_core_web_sm from github
--- a/docker/compose_examples/cluster-docker-compose.yml
+++ b/docker/compose_examples/cluster-docker-compose.yml
@@ -7,6 +7,16 @@ services:
    restart: unless-stopped
    networks:
      - dbgptnet
+  api-server:
+    image: eosphorosai/dbgpt:latest
+    command: dbgpt start apiserver --controller_addr http://controller:8000
+    restart: unless-stopped
+    depends_on:
+      - controller
+    networks:
+      - dbgptnet
+    ports:
+      - 8100:8100/tcp
  llm-worker:
    image: eosphorosai/dbgpt:latest
    command: dbgpt start worker --model_name vicuna-13b-v1.5 --model_path /app/models/vicuna-13b-v1.5 --port 8001 --controller_addr http://controller:8000
--- a/docs/_static/css/custom.css
+++ b/docs/_static/css/custom.css
@@ -0,0 +1,928 @@
+/* override default colors used in the Sphinx theme */
+:root {
+    --tabs-color-label-active: #0475DE;
+    --tabs-color-label-hover: #0475DE;
+    --buttons-color-blue: #0475DE;
+    --tabs-color-label-inactive: #9E9E9E;
+    --tabs-color-overline: #e0e0e0;
+    --tabs-color-underline: #e0e0e0;
+    --border-color-gray: #e0e0e0;
+    --background-color-light-gray:#fafafa;
+    --background-color-disabled: #9E9E9E;
+    --pst-color-link: 4, 117, 222;
+    --pst-color-primary: 4, 117, 222;
+    --pst-color-text-secondary: #616161;
+    --blue: #0475DE;
+    --sidebar-top: 5em;
+}
+
+/* Remove flicker for announcement top bar replacement */
+.header-item.announcement {
+    background-color: white;
+    color: white;
+    padding: 0;
+}
+
+/* Make the book theme secondary nav stick below the new main top nav */
+.header-article {
+    top: 58px;
+    z-index: 900 !important;
+}
+
+.toctree-l1.has-children {
+    font-weight: bold;
+}
+
+.toctree-l2 {
+    font-weight: normal;
+}
+
+div.navbar-brand-box {
+    padding-top: 4em;
+}
+
+td p {
+    margin-left: 0.75rem;
+}
+
+table.longtable.table.autosummary {
+    table-layout: fixed;
+}
+
+.table.autosummary td {
+    width: 100%;
+}
+
+tr.row-odd {
+    background-color: #f9fafb;
+}
+
+/* For Algolia search box
+    * overflow-y: to flow-over horizontally into main content
+    * height: to prevent topbar overlap
+*/
+#site-navigation {
+  overflow-y: auto;
+  height: calc(100vh - var(--sidebar-top));
+  position: sticky;
+  top: var(--sidebar-top) !important;
+}
+
+/* Center the algolia search bar*/
+#search-input {
+    text-align: center;
+}
+.algolia-autocomplete {
+    width: 100%;
+    margin: auto;
+}
+
+/* Hide confusing "<-" back arrow in navigation for larger displays */
+@media (min-width: 768px) {
+    #navbar-toggler {
+        display: none;
+    }
+}
+
+/* Make navigation scrollable on mobile, by making algolia not overflow */
+@media (max-width: 768px) {
+    #site-navigation {
+        overflow-y: scroll;
+    }
+
+    .algolia-autocomplete .ds-dropdown-menu{
+        min-width: 250px;
+    }
+}
+
+/* sphinx-panels overrides the content width to 1140 for large displays.*/
+@media (min-width: 1200px) {
+    .container, .container-lg, .container-md, .container-sm, .container-xl {
+        max-width: 1400px !important;
+    }
+}
+
+.bottom-right-promo-banner {
+    position: fixed;
+    bottom: 100px;
+    right: 20px;
+    width: 270px;
+}
+
+@media (max-width: 1500px) {
+    .bottom-right-promo-banner {
+        display: none;
+    }
+}
+
+@media screen and (max-width: 767px) {
+    .remove-mobile {
+      display: none;
+    }
+  }
+
+  @media screen and (max-width: 767px) {
+    .row-2-column {
+      flex-direction: column;
+      margin-top: 20px;
+    }
+  }
+
+/* Make Algolia search box scrollable */
+.algolia-autocomplete .ds-dropdown-menu {
+    height: 60vh !important;
+    overflow-y: scroll !important;
+}
+
+.bd-sidebar__content {
+  overflow-y: unset !important;
+}
+
+.bd-sidebar__top {
+    display: flex;
+    flex-direction: column;
+}
+
+.bd-sidebar li {
+    position: relative;
+    word-wrap: break-word;
+}
+
+nav.bd-links {
+    flex: 1;
+}
+
+nav.bd-links::-webkit-scrollbar-thumb {
+    background-color: #ccc;
+}
+
+nav.bd-links::-webkit-scrollbar {
+    width: 5px;
+}
+
+dt:target, span.highlighted {
+    background-color: white;
+}
+
+div.sphx-glr-bigcontainer {
+    display: inline-block;
+    width: 100%;
+}
+
+td.tune-colab,
+th.tune-colab {
+    border: 1px solid #dddddd;
+    text-align: left;
+    padding: 8px;
+}
+
+/* Adjustment to Sphinx Book Theme */
+.table td {
+    /* Remove row spacing on the left */
+    padding-left: 0;
+}
+
+.table thead th {
+    /* Remove row spacing on the left */
+    padding-left: 0;
+}
+
+img.inline-figure {
+    /* Override the display: block for img */
+    display: inherit !important;
+}
+
+#version-warning-banner {
+    /* Make version warning clickable */
+    z-index: 1;
+    margin-left: 0;
+    /* 20% is for ToC rightbar */
+    /* 2 * 1.5625em is for horizontal margins */
+    width: calc(100% - 20% - 2 * 1.5625em);
+}
+
+/* allow scrollable images */
+.figure {
+    max-width: 100%;
+    overflow-x: auto;
+}
+img.horizontal-scroll {
+    max-width: none;
+}
+
+.clear-both {
+    clear: both;
+    min-height: 100px;
+    margin-top: 15px;
+}
+
+.buttons-float-left {
+    width: 150px;
+    float: left;
+}
+
+.buttons-float-right {
+    width: 150px;
+    float: right;
+}
+
+.card-body {
+    padding: 0.5rem !important;
+}
+
+/* custom css for pre elements */
+pre {
+    /* Wrap code blocks instead of horizontal scrolling. */
+    white-space: pre-wrap;
+    box-shadow: none;
+    border-color: var(--border-color-gray);
+    background-color: var(--background-color-light-gray);
+    border-radius:0.25em;
+}
+
+/* notebook formatting */
+.cell .cell_output {
+    max-height: 250px;
+    overflow-y: auto;
+    font-weight: bold;
+}
+
+/* Yellow doesn't render well on light background */
+.cell .cell_output pre .-Color-Yellow {
+    color: #785840;
+}
+
+/* Newlines (\a) and spaces (\20) before each parameter */
+.sig-param::before {
+    content: "\a\20\20\20\20";
+    white-space: pre;
+}
+
+/* custom css for outlined buttons */
+.btn-outline-info:hover span, .btn-outline-primary:hover span {
+    color: #fff;
+}
+
+.btn-outline-info, .btn-outline-primary{
+    border-color: var(--buttons-color-blue);
+}
+
+.btn-outline-info:hover, .btn-outline-primary:hover{
+    border-color: var(--buttons-color-blue);
+    background-color: var(--buttons-color-blue);
+}
+
+.btn-outline-info.active:not(:disabled):not(.disabled), .btn-outline-info:not(:disabled):not(.disabled):active, .show>.btn-outline-info.dropdown-toggle {
+    border-color: var(--buttons-color-blue);
+    background-color: var(--buttons-color-blue);
+    color: #fff;
+}
+
+.btn-info, .btn-info:hover, .btn-info:focus {
+    border-color: var(--buttons-color-blue);
+    background-color: var(--buttons-color-blue);
+}
+
+.btn-info:hover{
+    opacity: 90%;
+}
+
+.btn-info:disabled{
+    border-color: var(--background-color-disabled);
+    background-color: var(--background-color-disabled);
+    opacity: 100%;
+}
+
+.btn-info.active:not(:disabled):not(.disabled), .btn-info:not(:disabled):not(.disabled):active, .show>.btn-info.dropdown-toggle {
+    border-color: var(--buttons-color-blue);
+    background-color: var(--buttons-color-blue);
+}
+
+
+.topnav {
+    background-color: white;
+    border-bottom: 1px solid rgba(0, 0, 0, .1);
+    display: flex;
+    align-items: center;
+}
+
+/* Content wrapper for the unified nav link / menus */
+.top-nav-content {
+    max-width: 1400px;
+    width: 100%;
+    margin-left: auto;
+    margin-right: auto;
+    padding: 0 1.5rem;
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+}
+
+@media (max-width: 900px) {
+  /* If the window is too small, hide the custom sticky navigation bar at the top of the page.
+  Also make the pydata-sphinx-theme nav bar, which usually sits below the top nav bar, stick
+  to the top of the page.
+  */
+    .top-nav-content {
+        display: none;
+    }
+    div.header-article.row.sticky-top.noprint {
+      position: sticky;
+      top: 0;
+    }
+}
+
+/* Styling the links and menus in the top nav */
+.top-nav-content a {
+    text-decoration: none;
+    color: black;
+    font-size: 17px;
+}
+
+.top-nav-content a:hover {
+    color: #007bff;
+}
+
+/* The left part are the links and menus */
+.top-nav-content > .left {
+    display: flex;
+    white-space: nowrap;
+}
+
+.top-nav-content .left > * {
+    margin-right: 8px;
+}
+
+.top-nav-content .left > a,
+.top-nav-content .left > .menu > a {
+    text-align: center;
+    padding: 14px 16px;
+    border-bottom: 2px solid white;
+}
+
+.top-nav-content .menu:hover > a,
+.top-nav-content .left > a:hover {
+    border-bottom: 2px solid #007bff;
+}
+
+/* Special styling for the Ray logo */
+.top-nav-content .left > a.ray-logo {
+    width: 90px;
+    padding: 10px 0;
+}
+.top-nav-content .left > a.ray-logo:hover {
+    border-bottom: 2px solid white;
+}
+
+/* Styling the dropdown menus */
+.top-nav-content .menu {
+    display: flex;
+}
+.top-nav-content .menu > a > .down-caret {
+    margin-left: 8px;
+}
+.top-nav-content .menu > ul {
+    display: none;
+}
+
+.top-nav-content > button.try-anyscale > span {
+    margin: 0 12px;
+}
+
+.top-nav-content .menu:hover > ul {
+    display: flex;
+    flex-direction: column;
+    align-items: flex-start;
+    box-shadow: 0 5px 15px 0 rgb(0 0 0 / 10%);
+    padding: 15px;
+    width: 330px;
+    position: absolute;
+    z-index: 2000;
+    background-color: white;
+    top: 58px;
+}
+
+.top-nav-content .menu:hover > ul > li {
+    list-style: none;
+    padding: 5px 0;
+}
+
+.top-nav-content .menu:hover > ul > li span {
+    display: block;
+}
+
+.top-nav-content .menu:hover > ul > li span.secondary {
+    color: #787878;
+}
+
+/* Styling the "Try Anyscale" button */
+.top-nav-content > button.try-anyscale {
+    float: right;
+    border-radius: 6px;
+    background-color: #e7f2fa;
+    padding-left: 12px;
+    padding-right: 12px;
+    margin-left: 12px;
+    height: 40px;
+    border: none;
+    white-space: nowrap;
+}
+
+@media (max-width: 1000px) {
+  .top-nav-content > button.try-anyscale {
+    display: none;
+  }
+}
+
+/* custom css for tabs*/
+.tabbed-set>label,.tabbed-set>label:hover {
+    border-bottom: 1px solid var(--border-color-gray);
+    color:var(--tabs-color-label-inactive);
+    font-weight: 500;
+}
+
+.tabbed-set>input:checked+label{
+    border-bottom: 0.125em solid;
+    color:var(--tabs-color-label-active);
+}
+
+
+.tabbed-label{
+    margin-bottom:0;
+}
+
+/* custom css for jupyter cells */
+div.cell div.cell_input{
+    border: 1px var(--border-color-gray) solid;
+    background-color: var(--background-color-light-gray);
+    border-radius:0.25em;
+    border-left-color: var(--green);
+    border-left-width: medium;
+}
+
+/* custom css for table */
+table {
+    border-color: var(--border-color-gray);
+}
+
+/* custom css for topic component */
+div.topic{
+    border: 1px solid var(--border-color-gray);
+    border-radius:0.25em;
+}
+
+.topic {
+    background-color: var(--background-color-light-gray);
+}
+
+/* custom css for card component */
+.card{
+    border-color: var(--border-color-gray);
+}
+
+.card-footer{
+    background-color: var(--background-color-light-gray);
+    border-top-color: var(--border-color-gray);
+}
+
+/* custom css for section navigation component */
+.bd-toc nav>.nav {
+    border-left-color: var(--border-color-gray);
+}
+
+/* custom css for up and down arrows in collapsible cards */
+details.dropdown .summary-up, details.dropdown .summary-down {
+    top: 1em;
+}
+
+/* remove focus border in collapsible admonition buttons */
+.toggle.admonition button.toggle-button:focus {
+    outline: none;
+}
+
+/* custom css for shadow class */
+.shadow {
+    box-shadow: 0 0.2rem 0.5rem rgb(0 0 0 / 5%), 0 0 0.0625rem rgb(0 0 0 / 10%) !important;
+}
+
+/* custom css for text area */
+textarea {
+    border-color: var(--border-color-gray);
+}
+
+/* custom css for footer */
+footer {
+    margin-top: 1rem;
+    padding:1em 0;
+    border-top-color: var(--border-color-gray);
+}
+
+.footer p{
+    color: var(--pst-color-text-secondary);
+}
+
+/* Make the hover color of tag/gallery buttons differ from "active" */
+.tag.btn-outline-primary:hover {
+    background-color: rgba(20, 99, 208, 0.62) !important;
+}
+
+span.rst-current-version > span.fa.fa-book {
+    /* Move the book icon away from the top right
+    * corner of the version flyout menu */
+    margin: 10px 0px 0px 5px;
+}
+
+
+/*Extends the docstring signature box.*/
+.rst-content dl:not(.docutils) dt {
+    display: block;
+    padding: 10px;
+    word-wrap: break-word;
+    padding-right: 100px;
+}
+
+/*Lists in an admonition note do not have awkward whitespace below.*/
+.rst-content .admonition-note .section ul {
+    margin-bottom: 0;
+}
+
+/*Properties become blue (classmethod, staticmethod, property)*/
+.rst-content dl dt em.property {
+    color: #2980b9;
+    text-transform: uppercase;
+}
+
+.rst-content .section ol p,
+.rst-content .section ul p {
+    margin-bottom: 0;
+}
+
+
+/* Adjustment to Version block */
+.rst-versions {
+    z-index: 1200 !important;
+}
+
+.image-header {
+    display: flex;
+    flex-direction: row;
+    align-items: center;
+    padding-left: 16px;
+    padding-right:16px;
+    gap: 16px;
+}
+
+.info-box {
+    box-shadow: 0px 4px 20px rgba(0, 0, 0, 0.05);
+    border-radius: 8px;
+    padding: 20px;
+}
+
+.info-box:hover{
+    box-shadow: 0px 4px 20px rgba(0, 0, 0, 0.1);
+}
+
+.no-underline{
+    text-decoration: none;
+}
+.no-underline:hover{
+    text-decoration: none;
+}
+
+.icon-hover:hover{
+    height: 30px ;
+    width: 30px;
+}
+
+.info-box-2 {
+    background-color: #F9FAFB;
+    border-radius: 8px;
+    padding-right: 16px;
+    padding-left: 16px;
+    padding-bottom: 24px;
+    padding-top: 4px;
+}
+
+
+.bold-link {
+    color: #000000 !important;
+    font-weight: 600;
+}
+
+.community-box {
+    border: 1px solid #D2DCE6;
+    border-radius: 8px;
+    display: flex;
+    margin-bottom: 16px;
+}
+
+.community-box:hover {
+    box-shadow: 0px 4px 20px rgba(0, 0, 0, 0.05);
+    text-decoration: none;
+}
+
+.community-box p {
+    margin-top: 1rem !important;
+}
+
+.tab-pane pre {
+    margin: 0;
+    padding: 0;
+    max-height: 252px;
+    overflow-y: auto;
+}
+
+.grid-container {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(300px,1fr));
+    grid-gap: 16px;
+}
+
+.grid-item {
+padding: 20px;
+}
+
+
+.nav-pills {
+    background-color: #F9FAFB;
+    color: #000000;
+    padding: 8px;
+    border-bottom:none;
+    border-radius: 8px;
+}
+
+.nav-pills .nav-link.active {
+    background-color: #FFFFFF !important;
+    box-shadow: 0px 3px 14px 2px rgba(3,28,74,0.12);
+    border-radius: 8px;
+    padding: 20px;
+    color: #000000;
+    font-weight: 500;
+}
+
+.searchDiv {
+    width: 100%;
+    position: relative;
+    display: block;
+}
+
+.searchTerm {
+    width: 80%;
+    border: 2px solid var(--blue);
+    padding: 5px;
+    height: 45px;
+    border-radius: 5px;
+    outline: none;
+}
+
+.searchButton {
+    width: 40px;
+    height: 45px;
+    border: 1px solid var(--blue);
+    background: var(--blue);
+    color: #fff;
+    border-radius: 5px;
+    cursor: pointer;
+    font-size: 20px;
+}
+
+/*Resize the wrap to see the search bar change!*/
+.searchWrap {
+    width: 100%;
+    position: relative;
+    margin: 15px;
+    top: 50%;
+    left: 50%;
+    transform: translate(-50%, -10%);
+    text-align: center;
+}
+
+.sd-card {
+    border: none !important;
+}
+
+.tag {
+    margin-bottom: 5px;
+    font-size: small;
+}
+
+/* Override float positioning of next-prev buttons so that
+ they take up space normally, and we can put other stuff at
+ the bottom of the page. */
+.prev-next-area {
+  display: flex;
+  flex-direction: row;
+}
+.prev-next-area a.left-prev {
+  margin-right: auto;
+  width: fit-content;
+  float: none;
+}
+.prev-next-area a.right-next {
+  margin-left: auto;
+  width: fit-content;
+  float: none;
+}
+
+/* CSAT widgets */
+#csat-inputs {
+  display: flex;
+  flex-direction: row;
+  align-items: center;
+}
+
+.csat-hidden {
+  display: none !important;
+}
+
+#csat-feedback-label {
+  color: #000;
+  font-weight: 500;
+}
+
+.csat-button {
+  margin-left: 16px;
+  padding: 8px 16px 8px 16px;
+  border-radius: 4px;
+  border: 1px solid #D2DCE6;
+  background: #FFF;
+  display: flex;
+  flex-direction: row;
+  align-items: center;
+  justify-content: center;
+  cursor: pointer;
+  width: 85px;
+}
+
+#csat-textarea-group {
+  display: flex;
+  flex-direction: column;
+}
+
+#csat-submit {
+  margin-left: auto;
+  font-weight: 700;
+  border: none;
+  margin-top: 12px;
+  cursor: pointer;
+}
+
+#csat-feedback-received {
+  display: flex;
+  flex-direction: row;
+  align-items: center;
+  justify-content: center;
+}
+
+.csat-button-active {
+  border: 1px solid #000;
+}
+
+.csat-icon {
+  margin-right: 4px;
+}
+
+footer.col.footer {
+  display: flex;
+  flex-direction: row;
+}
+
+footer.col.footer > p {
+  margin-left: auto;
+}
+
+#csat {
+  min-width: 60%;
+}
+
+#csat-textarea {
+  resize: none;
+}
+
+
+/* Ray Assistant */
+
+.container-xl.blurred {
+    filter: blur(5px);
+}
+
+.chat-widget {
+    position: fixed;
+    bottom: 10px;
+    right: 10px;
+    z-index: 1000;
+}
+
+.chat-popup {
+    display: none;
+    position: fixed;
+    top: 20%;
+    left: 50%;
+    transform: translate(-50%, -20%);
+    width: 50%;
+    height: 70%;
+    background-color: white;
+    border: 1px solid #ccc;
+    border-radius: 10px;
+    box-shadow: 0 5px 10px rgba(0,0,0,0.1);
+    z-index: 1001;
+    max-height: 1000px;
+    overflow: hidden;
+    padding-bottom: 40px;
+}
+
+.chatFooter {
+    position: absolute;
+    bottom: 0;
+    right: 0;
+    width: 100%;
+    background-color: #f8f9fa;
+}
+
+#openChatBtn {
+    background-color: #000;
+    color: #fff;
+    width: 70px;
+    height: 70px;
+    border-radius: 10px;
+    border: none;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+}
+
+#closeChatBtn {
+    border: none;
+    background-color: transparent;
+    color: #000;
+    font-size: 1.2em;
+}
+
+#closeChatBtn:hover {
+    color: #888;
+}
+
+.chatHeader {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+}
+
+.chatContentContainer {
+    padding: 15px;
+    max-height: calc(100% - 80px);
+    overflow-y: auto;
+}
+
+.chatContentContainer input {
+    margin-top: 10px;
+    margin-bottom: 10px;
+}
+
+#result{
+    padding: 15px;
+    border-radius: 10px;
+    margin-top: 10px;
+    margin-bottom: 10px;
+    background-color: #f8f9fa;
+    max-height: calc(100% - 20px);
+    overflow-y: auto;
+}
+
+.chatContentContainer textarea {
+    flex-grow: 1;
+    min-width: 50px;
+    max-height: 40px;
+    resize: none;
+}
+
+.searchBtn {
+    white-space: nowrap;
+}
+
+.input-group {
+    display: flex;
+    align-items: stretch;
+}
+
+/* Kapa Ask AI button */
+#kapa-widget-container figure {
+    padding: 0 !important;
+  }
+  
+  .mantine-Modal-root figure {
+    padding: 0 !important;
+  }
+
+@font-face {
+    font-family: "Linux Biolinum Keyboard";
+    src: url(../fonts/LinBiolinum_Kah.ttf);
+}
+
+.keys {
+  font-family: "Linux Biolinum Keyboard", sans-serif;
+}
+
+.bd-article-container h1, .bd-article-container h2, .bd-article-container h3, .bd-article-container h4, .bd-article-container h5, .bd-article-container p.caption {
+    color: black;
+}
--- a/docs/_static/css/examples.css
+++ b/docs/_static/css/examples.css
@@ -0,0 +1,218 @@
+
+#site-navigation {
+  width: 330px !important;
+  border-right: none;
+  margin-left: 32px;
+  overflow-y: auto;
+  max-height: calc(100vh - var(--sidebar-top));
+  position: sticky;
+  top: var(--sidebar-top) !important;
+  z-index: 1000;
+}
+
+#site-navigation h5 {
+  font-size: 16px;
+  font-weight: 600;
+  color: #000;
+}
+
+#site-navigation h6 {
+  font-size: 14px;
+  font-weight: 600;
+  color: #000;
+  text-transform: uppercase;
+}
+
+/* Hide the default sidebar content */
+#site-navigation > div.bd-sidebar__content {
+  display: none;
+}
+#site-navigation > div.rtd-footer-container {
+  display: none;
+}
+
+.searchDiv {
+  margin-bottom: 2em;
+}
+
+#searchInput {
+  width: 100%;
+  color: #5F6469;
+  border: 1px solid #D2DCE6;
+  height: 50px;
+  border-radius: 4px;
+  background-color: #F9FAFB;
+  background-image: url("data:image/svg+xml,%3Csvg width='25' height='25' viewBox='0 0 25 25' fill='none' xmlns='http://www.w3.org/2000/svg'%3E%3Cg id='Systems / search-line' clip-path='url(%23clip0_1_150)'%3E%3Crect width='24' height='24' transform='translate(0.398529 0.0546875)' fill='%23F9FAFB'/%3E%3Cg id='Group'%3E%3Cpath id='Vector' d='M18.4295 16.6717L22.7125 20.9537L21.2975 22.3687L17.0155 18.0857C15.4223 19.3629 13.4405 20.0576 11.3985 20.0547C6.43053 20.0547 2.39853 16.0227 2.39853 11.0547C2.39853 6.08669 6.43053 2.05469 11.3985 2.05469C16.3665 2.05469 20.3985 6.08669 20.3985 11.0547C20.4014 13.0967 19.7068 15.0784 18.4295 16.6717ZM16.4235 15.9297C17.6926 14.6246 18.4014 12.8751 18.3985 11.0547C18.3985 7.18669 15.2655 4.05469 11.3985 4.05469C7.53053 4.05469 4.39853 7.18669 4.39853 11.0547C4.39853 14.9217 7.53053 18.0547 11.3985 18.0547C13.219 18.0576 14.9684 17.3488 16.2735 16.0797L16.4235 15.9297V15.9297Z' fill='%238C9196'/%3E%3C/g%3E%3C/g%3E%3Cdefs%3E%3CclipPath id='clip0_1_150'%3E%3Crect width='24' height='24' fill='white' transform='translate(0.398529 0.0546875)'/%3E%3C/clipPath%3E%3C/defs%3E%3C/svg%3E%0A");
+  background-repeat: no-repeat;
+  background-position-x: 0.5em;
+  background-position-y: center;
+  background-size: 1.5em;
+  padding-left: 3em;
+}
+
+#searchInput::placeholder {
+  color: #5F6469;
+  opacity: 1;
+}
+
+.tag {
+    margin-bottom: 5px;
+    font-size: small;
+    color: #000000;
+    border: 1px solid #D2DCE6;
+    border-radius: 14px;
+    display: flex;
+    flex-direction: row;
+    align-items: center;
+    width: fit-content;
+    gap: 1em;
+}
+
+.tag.btn-outline-primary {
+  color: #000000;
+  padding: 3px 12px 3px 12px;
+  line-height: 20px;
+}
+
+.tag-btn-wrapper {
+  display: flex;
+  flex-direction: row;
+  flex-wrap: wrap;
+  gap: 1em;
+}
+
+div.sd-container-fluid.docutils > div {
+  gap: var(--ray-example-gallery-gap-y) var(--ray-example-gallery-gap-x);
+  display: grid;
+  grid-template-columns: 1fr;
+}
+
+/* Reflow to a 2-column format for normal screens */
+@media screen and (min-width: 768px) {
+  div.sd-container-fluid.docutils > div {
+    grid-template-columns: 1fr 1fr;
+  }
+}
+
+div.gallery-item {
+  width: auto;
+}
+
+div.gallery-item > div.sd-card {
+  border-radius: 8px;
+  box-shadow: 0px 4px 10px 0px rgba(0, 0, 0, 0.05) !important;
+}
+
+/* Example gallery "Tutorial" title */
+div.sd-card-title > span.sd-bg-success.sd-bg-text-success {
+  color: #2F80ED !important;
+  font-weight: 500;
+  background: linear-gradient(180deg, rgba(25, 177, 226, 0.2) 0%, rgba(0, 109, 255, 0.2) 100%);
+  background-color: initial !important;
+}
+
+/* Example gallery "Code example" title */
+div.sd-card-title > span.sd-bg-secondary.sd-bg-text-secondary {
+  color: #219653 !important;
+  font-weight: 500;
+  background: linear-gradient(180deg, rgba(29, 151, 108, 0.2) 0%, rgba(0, 226, 147, 0.2) 100%);
+  background-color: initial !important;
+}
+
+/* Example gallery "Blog" title */
+div.sd-card-title > span.sd-bg-primary.sd-bg-text-primary {
+  color: #F2994A !important;
+  font-weight: 500;
+  background: linear-gradient(180deg, rgba(255, 230, 5, 0.2) 0%, rgba(255, 185, 80, 0.2) 100%);
+  background-color: initial !important;
+}
+
+/* Example gallery "Video" title */
+div.sd-card-title > span.sd-bg-warning.sd-bg-text-warning {
+  color: #EB5757 !important;
+  font-weight: 500;
+  background: linear-gradient(180deg, rgba(150, 7, 7, 0.2) 0%, rgba(255, 115, 115, 0.2) 100%);
+  background-color: initial !important;
+}
+
+/* Example gallery "Course" title */
+div.sd-card-title > span.sd-bg-info.sd-bg-text-info {
+  color: #7A64FF !important;
+  font-weight: 500;
+  background: linear-gradient(180deg, rgba(53, 25, 226, 0.2) 0%, rgba(183, 149, 255, 0.2) 100%);
+  background-color: initial !important;
+}
+
+div.sd-card-body > p.sd-card-text > a {
+  text-align: initial;
+}
+
+div.sd-card-body > p.sd-card-text > a > span {
+  color: rgb(81, 81, 81);
+}
+
+#main-content {
+  max-width: 100%;
+}
+
+#noMatches {
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  justify-content: center;
+}
+
+#noMatchesInnerContent {
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  justify-content: center;
+}
+
+#noMatches.hidden,.gallery-item.hidden {
+  display: none !important;
+}
+
+.btn-primary {
+  color: #004293;
+  background: rgba(61, 138, 233, 0.20);
+  padding: 3px 12px 3px 12px;
+  border: 1px solid #D2DCE6;
+}
+
+button.try-anyscale {
+  background-color: initial !important;
+  width: fit-content;
+  padding: 0 !important;
+  margin-left: auto !important;
+  float: initial !important;
+}
+
+button.try-anyscale > svg {
+  display: none;
+}
+
+button.try-anyscale > i {
+  display: none;
+}
+
+button.try-anyscale > span {
+  margin: 0;
+  text-decoration-line: underline;
+  font-weight: 500;
+  color: #000;
+}
+
+.top-nav-content {
+  justify-content: initial;
+}
+
+/* Hide nav bar that has github, fullscreen, and print icons */
+div.header-article.row.sticky-top.noprint {
+  display: none !important;
+}
+
+/* Hide the footer with 'prev article' and 'next article' buttons */
+.footer-article.hidden {
+  display: none !important;
+}
--- a/docs/_static/css/termynal.css
+++ b/docs/_static/css/termynal.css
@@ -0,0 +1,108 @@
+/**
+ * termynal.js
+ *
+ * @author Ines Montani <ines@ines.io>
+ * @version 0.0.1
+ * @license MIT
+ */
+
+ :root {
+    --color-bg: #252a33;
+    --color-text: #eee;
+    --color-text-subtle: #a2a2a2;
+}
+
+[data-termynal] {
+    width: auto;
+    max-width: 100%;
+    background: var(--color-bg);
+    color: var(--color-text);
+    font-size: 18px;
+    font-family: 'Fira Mono', Consolas, Menlo, Monaco, 'Courier New', Courier, monospace;
+    border-radius: 4px;
+    padding: 75px 45px 35px;
+    position: relative;
+    -webkit-box-sizing: border-box;
+            box-sizing: border-box;
+}
+
+[data-termynal]:before {
+    content: '';
+    position: absolute;
+    top: 15px;
+    left: 15px;
+    display: inline-block;
+    width: 15px;
+    height: 15px;
+    border-radius: 50%;
+    /* A little hack to display the window buttons in one pseudo element. */
+    background: #d9515d;
+    -webkit-box-shadow: 25px 0 0 #f4c025, 50px 0 0 #3ec930;
+            box-shadow: 25px 0 0 #f4c025, 50px 0 0 #3ec930;
+}
+
+[data-termynal]:after {
+    content: 'bash';
+    position: absolute;
+    color: var(--color-text-subtle);
+    top: 5px;
+    left: 0;
+    width: 100%;
+    text-align: center;
+}
+
+[data-ty] {
+    display: block;
+    line-height: 2;
+}
+
+[data-ty]:before {
+    /* Set up defaults and ensure empty lines are displayed. */
+    content: '';
+    display: inline-block;
+    vertical-align: middle;
+}
+
+[data-ty="input"]:before,
+[data-ty-prompt]:before {
+    margin-right: 0.75em;
+    color: var(--color-text-subtle);
+}
+
+[data-ty="input"]:before {
+    content: '$';
+}
+
+[data-ty][data-ty-prompt]:before {
+    content: attr(data-ty-prompt);
+}
+
+[data-ty-cursor]:after {
+    content: attr(data-ty-cursor);
+    font-family: monospace;
+    margin-left: 0.5em;
+    -webkit-animation: blink 1s infinite;
+            animation: blink 1s infinite;
+}
+
+a[data-terminal-control] {
+    text-align: right;
+    display: block;
+    color: #aebbff;
+}
+
+
+/* Cursor animation */
+
+@-webkit-keyframes blink {
+    50% {
+        opacity: 0;
+    }
+}
+
+@keyframes blink {
+    50% {
+        opacity: 0;
+    }
+}
+
--- a/docs/_static/css/use_cases.css
+++ b/docs/_static/css/use_cases.css
@@ -0,0 +1,23 @@
+.query-param-ref-wrapper {
+  display: flex;
+  justify-content: center;
+  align-items: center;
+  border: 1px solid #8C9196;
+  border-radius: 8px;
+}
+
+.example-gallery-link {
+  padding: 1em 2em 1em 2em;
+  text-decoration: none !important;
+  color: black !important;
+  display: flex;
+  align-items: center;
+}
+
+/* Shooting star icon next to gallery links */
+a.example-gallery-link::before {
+  content: url("data:image/svg+xml,%3Csvg width='24' height='24' viewBox='0 0 24 24' fill='none' xmlns='http://www.w3.org/2000/svg'%3E%3Cg id='Group'%3E%3Cpath id='Vector' d='M15.199 9.945C14.7653 9.53412 14.4863 8.98641 14.409 8.394L14.006 5.311L11.276 6.797C10.7511 7.08302 10.1436 7.17943 9.55597 7.07L6.49997 6.5L7.06997 9.556C7.1794 10.1437 7.08299 10.7511 6.79697 11.276L5.31097 14.006L8.39397 14.409C8.98603 14.4865 9.53335 14.7655 9.94397 15.199L12.082 17.456L13.418 14.649C13.6744 14.1096 14.1087 13.6749 14.648 13.418L17.456 12.082L15.199 9.945ZM15.224 15.508L13.011 20.158C12.9691 20.2459 12.9065 20.3223 12.8285 20.3806C12.7505 20.4389 12.6594 20.4774 12.5633 20.4926C12.4671 20.5079 12.3686 20.4995 12.2764 20.4682C12.1842 20.4369 12.101 20.3836 12.034 20.313L8.49197 16.574C8.39735 16.4742 8.27131 16.41 8.13497 16.392L3.02797 15.724C2.93149 15.7113 2.83954 15.6753 2.76006 15.6191C2.68058 15.563 2.61596 15.4883 2.57177 15.4016C2.52758 15.3149 2.50514 15.2187 2.5064 15.1214C2.50765 15.0241 2.53256 14.9285 2.57897 14.843L5.04097 10.319C5.10642 10.198 5.12831 10.0582 5.10297 9.923L4.15997 4.86C4.14207 4.76417 4.14778 4.66541 4.17662 4.57229C4.20546 4.47916 4.25656 4.39446 4.3255 4.32553C4.39444 4.25659 4.47913 4.20549 4.57226 4.17665C4.66539 4.14781 4.76414 4.14209 4.85997 4.16L9.92297 5.103C10.0582 5.12834 10.198 5.10645 10.319 5.041L14.843 2.579C14.9286 2.53257 15.0242 2.50769 15.1216 2.50648C15.219 2.50528 15.3152 2.52781 15.4019 2.57211C15.4887 2.61641 15.5633 2.68116 15.6194 2.76076C15.6755 2.84036 15.7114 2.93242 15.724 3.029L16.392 8.135C16.4099 8.27134 16.4742 8.39737 16.574 8.492L20.313 12.034C20.3836 12.101 20.4369 12.1842 20.4682 12.2765C20.4995 12.3687 20.5079 12.4671 20.4926 12.5633C20.4774 12.6595 20.4389 12.7505 20.3806 12.8285C20.3223 12.9065 20.2459 12.9691 20.158 13.011L15.508 15.224C15.3835 15.2832 15.2832 15.3835 15.224 15.508ZM16.021 17.435L17.435 16.021L21.678 20.263L20.263 21.678L16.021 17.435Z' fill='black'/%3E%3C/g%3E%3C/svg%3E%0A");
+  display: flex;
+  align-items: center;
+  margin-right: 0.5em;
+}
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -14,7 +14,7 @@ project = "DB-GPT"
 copyright = "2023, csunny"
 author = "csunny"

-version = "👏👏 0.4.0"
+version = "👏👏 0.4.1"
 html_title = project + " " + version

 # -- General configuration ---------------------------------------------------
@@ -30,10 +30,24 @@ extensions = [
    "myst_nb",
    "sphinx_copybutton",
    "sphinx_panels",
+    "sphinx_tabs.tabs",
    "IPython.sphinxext.ipython_console_highlighting",
+    "sphinx.ext.autosectionlabel",
 ]
 source_suffix = [".ipynb", ".html", ".md", ".rst"]

+
+myst_enable_extensions = [
+    "dollarmath",
+    "amsmath",
+    "deflist",
+    "html_admonition",
+    "html_image",
+    "colon_fence",
+    "smartquotes",
+    "replacements",
+]
+
 # autodoc_pydantic_model_show_json = False
 # autodoc_pydantic_field_list_validators = False
 # autodoc_pydantic_config_members = False
@@ -53,8 +67,18 @@ locales_dirs = ["./locales/"]
 gettext_compact = False
 gettext_uuid = True

+
+def setup(app):
+    app.add_css_file("css/custom.css")
+    app.add_css_file("css/examples.css")
+    app.add_css_file("css/termynal.css")
+    # app.add_css_file("css/use_cases.css")
+
+
 # -- Options for HTML output -------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output

-html_theme = "furo"
+html_theme = "sphinx_book_theme"
+
+
 html_static_path = ["_static"]
--- a/docs/getting_started/application/chatdb/chatdb.md
+++ b/docs/getting_started/application/chatdb/chatdb.md
@@ -4,6 +4,12 @@ ChatData generates SQL from natural language and executes it. ChatDB involves co
 Database, including metadata about databases, tables, and
 fields.![db plugins demonstration](https://github.com/eosphoros-ai/DB-GPT/assets/13723926/d8bfeee9-e982-465e-a2b8-1164b673847e)

+```{admonition} The LLM (Language Model) suitable for the ChatData scene is 
+* chatgpt3.5.
+* chatgpt4.
+* Vicuna-v1.5.
+```
+
 ### 1.Choose Datasource

 If you are using DB-GPT for the first time, you need to add a data source and set the relevant connection information
--- a/docs/getting_started/application/chatexcel/chatexcel.md
+++ b/docs/getting_started/application/chatexcel/chatexcel.md
@@ -3,6 +3,11 @@ ChatExcel
 ![db plugins demonstration](https://github.com/eosphoros-ai/DB-GPT/assets/13723926/4113ac15-83c2-4350-86c0-5fc795677abd)
 ChatExcel uses natural language to analyze and query Excel data.![db plugins demonstration](../../../../assets/chat_excel/chat_excel_1.png)

+```{admonition} The LLM (Language Model) suitable for the ChatExcel scene is 
+* chatgpt3.5.
+* chatgpt4.
+```
+
 ### 1.Select And Upload Excel or CSV File
 Select your excel or csv file to upload and start the conversation.
 ```{tip}
--- a/docs/getting_started/application/dashboard/dashboard.md
+++ b/docs/getting_started/application/dashboard/dashboard.md
@@ -4,6 +4,11 @@ The purpose of the DB-GPT Dashboard is to empower data analysts with efficiency.
 technology, allowing business analysts to perform self-service analysis directly using natural language and gain
 insights into their respective areas of business.

+```{admonition} The LLM (Language Model) suitable for the Dashboard scene is
+* chatgpt3.5.
+* chatgpt4.
+```
+
 ```{note} Dashboard now support Datasource Type
 * Mysql
 * Sqlite
--- a/docs/getting_started/faq/llm/llm_faq.md
+++ b/docs/getting_started/faq/llm/llm_faq.md
@@ -1,6 +1,6 @@
 LLM USE FAQ
 ==================================
-##### Q1:how to use openai chatgpt service
+##### Q1: how to use openai chatgpt service
 change your LLM_MODEL in `.env`
 ````shell
 LLM_MODEL=proxyllm
@@ -15,7 +15,7 @@ PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions

 make sure your openapi API_KEY is available

-##### Q2 What difference between `python dbgpt_server --light` and `python dbgpt_server`
+##### Q2: What difference between `python dbgpt_server --light` and `python dbgpt_server`
 ```{note}
 * `python dbgpt_server --light` dbgpt_server does not start the llm service. Users can deploy the llm service separately by using `python llmserver`, and dbgpt_server accesses the llm service through set the LLM_SERVER environment variable in .env. The purpose is to allow for the separate deployment of dbgpt's backend service and llm service.

@@ -35,7 +35,7 @@ python pilot/server/dbgpt_server.py --light
 ```


-##### Q3 How to use MultiGPUs
+##### Q3: How to use MultiGPUs

 DB-GPT will use all available gpu by default. And you can modify the setting `CUDA_VISIBLE_DEVICES=0,1` in `.env` file
 to use the specific gpu IDs.
@@ -52,7 +52,7 @@ CUDA_VISIBLE_DEVICES=3,4,5,6 python3 pilot/server/dbgpt_server.py

 You can modify the setting `MAX_GPU_MEMORY=xxGib` in `.env` file to configure the maximum memory used by each GPU.

-##### Q4 Not Enough Memory
+##### Q4: Not Enough Memory

 DB-GPT supported 8-bit quantization and 4-bit quantization.

@@ -60,9 +60,9 @@ You can modify the setting `QUANTIZE_8bit=True` or `QUANTIZE_4bit=True` in `.env

 Llama-2-70b with 8-bit quantization can run with 80 GB of VRAM, and 4-bit quantization can run with 48 GB of VRAM.

-Note: you need to install the latest dependencies according to [requirements.txt](https://github.com/eosphoros-ai/DB-GPT/blob/main/requirements.txt).
+Note: you need to install the quantization dependencies with `pip install -e ".[quantization]"`

-##### Q5 How to Add LLM Service dynamic local mode
+##### Q5: How to Add LLM Service dynamic local mode

 Now DB-GPT through multi-llm service switch, so how to add llm service dynamic,

@@ -75,7 +75,7 @@ eg: dbgpt model start --model_name chatglm2-6b --model_path /root/DB-GPT/models/
 chatgpt
 eg: dbgpt model start --model_name chatgpt_proxyllm --model_path chatgpt_proxyllm --proxy_api_key ${OPENAI_KEY} --proxy_server_url {OPENAI_URL}
 ```
-##### Q6 How to Add LLM Service dynamic in remote mode
+##### Q6: How to Add LLM Service dynamic in remote mode
 If you  deploy llm service in remote machine instance, and you want to add model service to dbgpt server to manage

 use dbgpt start worker and set --controller_addr.
@@ -88,13 +88,13 @@ eg: dbgpt start worker --model_name vicuna-13b-v1.5 \

 ```

-##### Q7 dbgpt command not found
+##### Q7: dbgpt command not found

 ```commandline
 pip install -e "pip install -e ".[default]"
 ```

-##### Q8 When starting the worker_manager on a cloud server and registering it with the controller, it is noticed that the worker's exposed IP is a private IP instead of a public IP, which leads to the inability to access the service.
+##### Q8: When starting the worker_manager on a cloud server and registering it with the controller, it is noticed that the worker's exposed IP is a private IP instead of a public IP, which leads to the inability to access the service.

 ```commandline

@@ -103,4 +103,14 @@ pip install -e "pip install -e ".[default]"
                                  automatically determined
 ```

+##### Q9: How to customize model path and prompt template
+
+DB-GPT will read the model path from `pilot.configs.model_config.LLM_MODEL_CONFIG` based on the `LLM_MODEL`.
+Of course, you can use the environment variable `LLM_MODEL_PATH` to specify the model path and `LLM_PROMPT_TEMPLATE` to specify your model prompt template.
+
+```
+LLM_MODEL=vicuna-13b-v1.5
+LLM_MODEL_PATH=/app/models/vicuna-13b-v1.5
+# LLM_PROMPT_TEMPLATE=vicuna_v1.1
+```

--- a/docs/getting_started/install.rst
+++ b/docs/getting_started/install.rst
@@ -18,7 +18,7 @@ DB-GPT product is a Web application that you can chat database, chat knowledge,
   :name: deploy
   :hidden:

-   ./install/deploy/deploy.md
+   ./install/deploy.rst
   ./install/docker/docker.md
   ./install/docker_compose/docker_compose.md
   ./install/cluster/cluster.rst
--- a/docs/getting_started/install/cluster/cluster.rst
+++ b/docs/getting_started/install/cluster/cluster.rst
@@ -77,3 +77,4 @@ By analyzing this information, we can identify performance bottlenecks in model

   ./vms/standalone.md
   ./vms/index.md
+   ./openai.md
--- a/docs/getting_started/install/cluster/openai.md
+++ b/docs/getting_started/install/cluster/openai.md
@@ -0,0 +1,51 @@
+OpenAI-Compatible RESTful APIs
+==================================
+(openai-apis-index)=
+
+### Install Prepare
+
+You must [deploy DB-GPT cluster](https://db-gpt.readthedocs.io/en/latest/getting_started/install/cluster/vms/index.html) first.
+
+### Launch Model API Server
+
+```bash
+dbgpt start apiserver --controller_addr http://127.0.0.1:8000 --api_keys EMPTY
+```
+By default, the Model API Server starts on port 8100.
+
+### Validate with cURL
+
+#### List models
+
+```bash
+curl http://127.0.0.1:8100/api/v1/models \
+-H "Authorization: Bearer EMPTY" \
+-H "Content-Type: application/json"
+```
+
+#### Chat completions
+
+```bash
+curl http://127.0.0.1:8100/api/v1/chat/completions \
+-H "Authorization: Bearer EMPTY" \
+-H "Content-Type: application/json" \
+-d '{"model": "vicuna-13b-v1.5", "messages": [{"role": "user", "content": "hello"}]}'
+```
+
+### Validate with OpenAI Official SDK
+
+#### Chat completions
+
+```python
+import openai
+openai.api_key = "EMPTY"
+openai.api_base = "http://127.0.0.1:8100/api/v1"
+model = "vicuna-13b-v1.5"
+
+completion = openai.ChatCompletion.create(
+  model=model,
+  messages=[{"role": "user", "content": "hello"}]
+)
+# print the completion
+print(completion.choices[0].message.content)
+```
--- a/docs/getting_started/install/deploy.rst
+++ b/docs/getting_started/install/deploy.rst
@@ -0,0 +1,425 @@
+.. _installation:
+
+Installation From Source
+==============
+
+To get started, install DB-GPT with the following steps.
+
+
+1.Preparation
+-----------------
+**Download DB-GPT**
+
+.. code-block:: shell
+
+    git clone https://github.com/eosphoros-ai/DB-GPT.git
+
+**Install Miniconda**
+
+We use Sqlite as default database, so there is no need for database installation.  If you choose to connect to other databases, you can follow our tutorial for installation and configuration.
+For the entire installation process of DB-GPT, we use the miniconda3 virtual environment. Create a virtual environment and install the Python dependencies.
+`How to install Miniconda <https://docs.conda.io/en/latest/miniconda.html>`_
+
+.. code-block:: shell
+
+    python>=3.10
+    conda create -n dbgpt_env python=3.10
+    conda activate dbgpt_env
+    # it will take some minutes
+    pip install -e ".[default]"
+
+.. code-block:: shell
+
+    cp .env.template .env
+
+2.Deploy LLM Service
+-----------------
+DB-GPT can be deployed on servers with low hardware requirements or on servers with high hardware requirements.
+
+If you are low hardware requirements you can install DB-GPT by Using third-part LLM REST API Service OpenAI, Azure, tongyi.
+
+.. tip::
+
+        As our project has the ability to achieve OpenAI performance of over 85%,
+
+
+.. note::
+
+        Notice make sure you have install git-lfs
+
+        centos:yum install git-lfs
+
+        ubuntu:apt-get install git-lfs
+
+        macos:brew install git-lfs
+
+.. tabs::
+
+    .. tab:: OpenAI
+
+        Installing Dependencies
+
+        .. code-block::
+
+          pip install -e ".[openai]"
+
+        Download embedding model
+
+        .. code-block:: shell
+
+            cd DB-GPT
+            mkdir models and cd models
+
+            #### embedding model
+            git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese
+            or
+            git clone https://huggingface.co/moka-ai/m3e-large
+
+        Configure LLM_MODEL, PROXY_API_URL and API_KEY in `.env` file
+
+        .. code-block:: shell
+
+            LLM_MODEL=chatgpt_proxyllm
+            PROXY_API_KEY={your-openai-sk}
+            PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions
+
+        .. tip::
+
+            Make sure your .env configuration is not overwritten
+
+
+    .. tab:: Vicuna
+        `Vicuna-v1.5 <https://huggingface.co/lmsys/vicuna-13b-v1.5>`_ based on llama-2 has been released, we recommend you set `LLM_MODEL=vicuna-13b-v1.5` to try this model)
+
+        .. list-table:: vicuna-v1.5 hardware requirements
+            :widths: 50 50 50
+            :header-rows: 1
+
+            * - Model
+              - Quantize
+              - VRAM Size
+            * - vicuna-7b-v1.5
+              - 4-bit
+              - 8 GB
+            * - vicuna-7b-v1.5
+              - 8-bit
+              - 12 GB
+            * - vicuna-13b-v1.5
+              - 4-bit
+              - 12 GB
+            * - vicuna-13b-v1.5
+              - 8-bit
+              - 20 GB
+
+
+        .. code-block:: shell
+
+            cd DB-GPT
+            mkdir models and cd models
+
+            #### embedding model
+            git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese
+            or
+            git clone https://huggingface.co/moka-ai/m3e-large
+
+            #### llm model, if you use openai or Azure or tongyi llm api service, you don't need to download llm model
+            git clone https://huggingface.co/lmsys/vicuna-13b-v1.5
+
+        The model files are large and will take a long time to download.
+
+        **Configure LLM_MODEL in `.env` file**
+
+
+        .. code-block:: shell
+
+            LLM_MODEL=vicuna-13b-v1.5
+
+    .. tab:: Baichuan
+
+        .. list-table:: Baichuan hardware requirements
+            :widths: 50 50 50
+            :header-rows: 1
+
+            * - Model
+              - Quantize
+              - VRAM Size
+            * - baichuan-7b
+              - 4-bit
+              - 8 GB
+            * - baichuan-7b
+              - 8-bit
+              - 12 GB
+            * - baichuan-13b
+              - 4-bit
+              - 12 GB
+            * - baichuan-13b
+              - 8-bit
+              - 20 GB
+
+
+        .. code-block:: shell
+
+            cd DB-GPT
+            mkdir models and cd models
+
+            #### embedding model
+            git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese
+            or
+            git clone https://huggingface.co/moka-ai/m3e-large
+
+            #### llm model
+            git clone https://huggingface.co/baichuan-inc/Baichuan2-7B-Chat
+            or
+            git clone https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat
+
+        The model files are large and will take a long time to download.
+
+        **Configure LLM_MODEL in `.env` file**
+
+        please rename Baichuan path to "baichuan2-13b" or "baichuan2-7b"
+
+        .. code-block:: shell
+
+            LLM_MODEL=baichuan2-13b
+
+    .. tab:: ChatGLM
+
+
+        .. code-block:: shell
+
+            cd DB-GPT
+            mkdir models and cd models
+
+            #### embedding model
+            git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese
+            or
+            git clone https://huggingface.co/moka-ai/m3e-large
+
+            #### llm model
+            git clone https://huggingface.co/THUDM/chatglm2-6b
+
+        The model files are large and will take a long time to download.
+
+        **Configure LLM_MODEL in `.env` file**
+
+        please rename chatglm model path to "chatglm2-6b"
+
+        .. code-block:: shell
+
+            LLM_MODEL=chatglm2-6b
+
+    .. tab:: Other LLM API
+
+        Download embedding model
+
+        .. code-block:: shell
+
+            cd DB-GPT
+            mkdir models and cd models
+
+            #### embedding model
+            git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese
+            or
+            git clone https://huggingface.co/moka-ai/m3e-large
+
+        Now DB-GPT support LLM REST API TYPE:
+
+        .. note::
+
+            * OpenAI
+            * Azure
+            * Aliyun tongyi
+            * Baidu wenxin
+            * Zhipu
+            * Baichuan
+            * Bard
+
+        Configure LLM_MODEL and PROXY_API_URL and API_KEY in `.env` file
+
+        .. code-block:: shell
+
+            #OpenAI
+            LLM_MODEL=chatgpt_proxyllm
+            PROXY_API_KEY={your-openai-sk}
+            PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions
+
+            #Azure
+            LLM_MODEL=chatgpt_proxyllm
+            PROXY_API_KEY={your-azure-sk}
+            PROXY_API_BASE=https://{your domain}.openai.azure.com/
+            PROXY_API_TYPE=azure
+            PROXY_SERVER_URL=xxxx
+            PROXY_API_VERSION=2023-05-15
+            PROXYLLM_BACKEND=gpt-35-turbo
+
+            #Aliyun tongyi
+            LLM_MODEL=tongyi_proxyllm
+            TONGYI_PROXY_API_KEY={your-tongyi-sk}
+            PROXY_SERVER_URL={your_service_url}
+
+            ## Baidu wenxin
+            LLM_MODEL=wenxin_proxyllm
+            PROXY_SERVER_URL={your_service_url}
+            WEN_XIN_MODEL_VERSION={version}
+            WEN_XIN_API_KEY={your-wenxin-sk}
+            WEN_XIN_SECRET_KEY={your-wenxin-sct}
+
+            ## Zhipu
+            LLM_MODEL=zhipu_proxyllm
+            PROXY_SERVER_URL={your_service_url}
+            ZHIPU_MODEL_VERSION={version}
+            ZHIPU_PROXY_API_KEY={your-zhipu-sk}
+
+            ## Baichuan
+            LLM_MODEL=bc_proxyllm
+            PROXY_SERVER_URL={your_service_url}
+            BAICHUN_MODEL_NAME={version}
+            BAICHUAN_PROXY_API_KEY={your-baichuan-sk}
+            BAICHUAN_PROXY_API_SECRET={your-baichuan-sct}
+
+            ## bard
+            LLM_MODEL=bard_proxyllm
+            PROXY_SERVER_URL={your_service_url}
+            # from https://bard.google.com/     f12-> application-> __Secure-1PSID
+            BARD_PROXY_API_KEY={your-bard-token}
+
+        .. tip::
+
+            Make sure your .env configuration is not overwritten
+
+    .. tab:: llama.cpp
+
+        DB-GPT already supports `llama.cpp <https://github.com/ggerganov/llama.cpp>`_ via `llama-cpp-python <https://github.com/abetlen/llama-cpp-python>`_ .
+
+        **Preparing Model Files**
+
+        To use llama.cpp, you need to prepare a gguf format model file, and there are two common ways to obtain it, you can choose either:
+
+        **1. Download a pre-converted model file.**
+
+        Suppose you want to use `Vicuna 13B v1.5 <https://huggingface.co/lmsys/vicuna-13b-v1.5>`_ , you can download the file already converted from `TheBloke/vicuna-13B-v1.5-GGUF <https://huggingface.co/TheBloke/vicuna-13B-v1.5-GGUF>`_ , only one file is needed. Download it to the `models` directory and rename it to `ggml-model-q4_0.gguf`.
+
+        .. code-block::
+
+          wget https://huggingface.co/TheBloke/vicuna-13B-v1.5-GGUF/resolve/main/vicuna-13b-v1.5.Q4_K_M.gguf -O models/ggml-model-q4_0.gguf
+
+        **2. Convert It Yourself**
+
+        You can convert the model file yourself according to the instructions in `llama.cpp#prepare-data--run <https://github.com/ggerganov/llama.cpp#prepare-data--run>`_ , and put the converted file in the models directory and rename it to `ggml-model-q4_0.gguf`.
+
+        **Installing Dependencies**
+
+        llama.cpp is an optional dependency in DB-GPT, and you can manually install it using the following command:
+
+        .. code-block::
+
+            pip install -e ".[llama_cpp]"
+
+
+        **3.Modifying the Configuration File**
+
+        Next, you can directly modify your `.env` file to enable llama.cpp.
+
+        .. code-block::
+
+            LLM_MODEL=llama-cpp
+            llama_cpp_prompt_template=vicuna_v1.1
+
+        Then you can run it according to `Run <https://db-gpt.readthedocs.io/en/latest/getting_started/install/deploy/deploy.html#run>`_
+
+
+        **More Configurations**
+
+        In DB-GPT, the model configuration can be done through  `{model name}_{config key}`.
+
+        .. list-table:: More Configurations
+            :widths: 50 50 50
+            :header-rows: 1
+
+            * - Environment Variable Key
+              - Default
+              - Description
+            * - llama_cpp_prompt_template
+              - None
+              - Prompt template name, now support: zero_shot, vicuna_v1.1,alpaca,llama-2,baichuan-chat,internlm-chat, If None, the prompt template is automatically determined from model path。
+            * - llama_cpp_model_path
+              - None
+              - Model path
+            * - llama_cpp_n_gpu_layers
+              - 1000000000
+              - Number of layers to offload to the GPU, Set this to 1000000000 to offload all layers to the GPU. If your GPU VRAM is not enough, you can set a low number, eg: 10
+            * - llama_cpp_n_threads
+              - None
+              - Number of threads to use. If None, the number of threads is automatically determined
+            * - llama_cpp_n_batch
+              - 512
+              - Maximum number of prompt tokens to batch together when calling llama_eval
+            * - llama_cpp_n_gqa
+              - None
+              - Grouped-query attention. Must be 8 for llama-2 70b.
+            * - llama_cpp_rms_norm_eps
+              - 5e-06
+              - 5e-6 is a good value for llama-2 models.
+            * - llama_cpp_cache_capacity
+              - None
+              - Maximum cache capacity. Examples: 2000MiB, 2GiB
+            * - llama_cpp_prefer_cpu
+              - False
+              - If a GPU is available, it will be preferred by default, unless prefer_cpu=False is configured.
+
+
+    .. tab:: vllm
+
+        vLLM is a fast and easy-to-use library for LLM inference and serving.
+
+        **Running vLLM**
+
+        **1.Installing Dependencies**
+
+        vLLM is an optional dependency in DB-GPT, and you can manually install it using the following command:
+
+        .. code-block::
+
+          pip install -e ".[vllm]"
+
+        **2.Modifying the Configuration File**
+
+        Next, you can directly modify your .env file to enable vllm.
+
+        .. code-block::
+
+            LLM_MODEL=vicuna-13b-v1.5
+            MODEL_TYPE=vllm
+
+        You can view the models supported by vLLM `here <https://vllm.readthedocs.io/en/latest/models/supported_models.html#supported-models>`_
+
+        Then you can run it according to `Run <https://db-gpt.readthedocs.io/en/latest/getting_started/install/deploy/deploy.html#run>`_
+
+
+
+
+
+3.Prepare sql example(Optional)
+-----------------
+**(Optional) load examples into SQLite**
+
+.. code-block:: shell
+
+        bash ./scripts/examples/load_examples.sh
+
+
+On windows platform:
+
+.. code-block:: shell
+
+        .\scripts\examples\load_examples.bat
+
+4.Run db-gpt server
+-----------------
+
+.. code-block:: shell
+
+       python pilot/server/dbgpt_server.py
+
+**Open http://localhost:5000 with your browser to see the product.**
+
--- a/docs/getting_started/install/environment/environment.md
+++ b/docs/getting_started/install/environment/environment.md
@@ -6,7 +6,9 @@ LLM Model Name, see /pilot/configs/model_config.LLM_MODEL_CONFIG
 * LLM_MODEL=vicuna-13b 

 MODEL_SERVER_ADDRESS
+
 * MODEL_SERVER=http://127.0.0.1:8000 
+
 LIMIT_MODEL_CONCURRENCY

 * LIMIT_MODEL_CONCURRENCY=5 
@@ -59,11 +61,11 @@ Embedding Chunk size, default 500
 Embedding Chunk Overlap, default 100
 * KNOWLEDGE_CHUNK_OVERLAP=100

-embeding recall top k,5
+embedding recall top k,5

 * KNOWLEDGE_SEARCH_TOP_SIZE=5 

-embeding recall max token ,2000
+embedding recall max token ,2000

 * KNOWLEDGE_SEARCH_MAX_TOKEN=5 
 ```
@@ -84,21 +86,6 @@ embeding recall max token ,2000
 * WEAVIATE_URL=https://kt-region-m8hcy0wc.weaviate.network
 ```

-```{admonition} Vector Store SETTINGS
-#### Chroma
-* VECTOR_STORE_TYPE=Chroma
-#### MILVUS
-* VECTOR_STORE_TYPE=Milvus
-* MILVUS_URL=127.0.0.1
-* MILVUS_PORT=19530
-* MILVUS_USERNAME
-* MILVUS_PASSWORD
-* MILVUS_SECURE=
-
-#### WEAVIATE
-* WEAVIATE_URL=https://kt-region-m8hcy0wc.weaviate.network
-```
-
 ```{admonition} Multi-GPU Setting
 See https://developer.nvidia.com/blog/cuda-pro-tip-control-gpu-visibility-cuda_visible_devices/
 If CUDA_VISIBLE_DEVICES is not configured, all available gpus will be used
--- a/docs/getting_started/install/llm/proxyllm/proxyllm.md
+++ b/docs/getting_started/install/llm/proxyllm/proxyllm.md
@@ -24,9 +24,12 @@ PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions

 #Azure
 LLM_MODEL=chatgpt_proxyllm
-OPENAI_API_TYPE=azure
-PROXY_API_KEY={your-openai-sk}
-PROXY_SERVER_URL=https://xx.openai.azure.com/v1/chat/completions
+PROXY_API_KEY={your-azure-sk}
+PROXY_API_BASE=https://{your domain}.openai.azure.com/
+PROXY_API_TYPE=azure
+PROXY_SERVER_URL=xxxx
+PROXY_API_VERSION=2023-05-15
+PROXYLLM_BACKEND=gpt-35-turbo

 #Aliyun tongyi
 LLM_MODEL=tongyi_proxyllm
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -91,35 +91,6 @@ It's very important for DB-GPT, DB-GPT also provide standard, extendable interfa
   ./modules/knowledge.rst
   ./modules/vector.rst

-Use Cases
---------
-
-| Best Practices and built-in implementations for common DB-GPT use cases:
-
- `Sql generation and diagnosis <./use_cases/sql_generation_and_diagnosis.html>`_: SQL generation and diagnosis.
-
- `knownledge Based QA <./use_cases/knownledge_based_qa.html>`_: A important scene for user to chat with database documents, codes, bugs and schemas.
-
- `Chatbots <./use_cases/chatbots.html>`_: Language model love to chat, use multi models to chat.
-
- `Querying Database Data <./use_cases/query_database_data.html>`_: Query and Analysis data from databases and give charts.
-
- `Interacting with apis <./use_cases/interacting_with_api.html>`_: Interact with apis, such as create a table, deploy a database cluster, create a database and so on.
-
- `Tool use with plugins <./use_cases/tool_use_with_plugin>`_: According to Plugin use tools to manage databases autonomoly.
-
-.. toctree::
-   :maxdepth: 2
-   :caption: Use Cases
-   :name: use_cases
-   :hidden:
-
-   ./use_cases/sql_generation_and_diagnosis.md
-   ./use_cases/knownledge_based_qa.md
-   ./use_cases/chatbots.md
-   ./use_cases/query_database_data.md
-   ./use_cases/interacting_with_api.md
-   ./use_cases/tool_use_with_plugin.md

 Reference
 -----------
--- a/docs/locales/zh_CN/LC_MESSAGES/getting_started/application/chatdb/chatdb.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/getting_started/application/chatdb/chatdb.po
@@ -8,7 +8,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: DB-GPT 👏👏 0.3.5\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2023-09-26 17:51+0800\n"
+"POT-Creation-Date: 2023-11-03 15:33+0800\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language: zh_CN\n"
@@ -20,12 +20,12 @@ msgstr ""
 "Generated-By: Babel 2.12.1\n"

 #: ../../getting_started/application/chatdb/chatdb.md:1
-#: 0cf45852c1fd430090da81836bc961c7
+#: c1489293ce464cee9577b0aa9a3f3037
 msgid "ChatData & ChatDB"
 msgstr "ChatData & ChatDB"

 #: ../../getting_started/application/chatdb/chatdb.md:3
-#: 6dc94a787ff844caa21074d71aaf351a
+#: 2c421938f270427dbd0ffff892b1a5a1
 #, fuzzy
 msgid ""
 "ChatData generates SQL from natural language and executes it. ChatDB "
@@ -38,47 +38,68 @@ msgstr ""
 "plugins demonstration](../../../../assets/chat_data/chat_data.jpg)"

 #: ../../getting_started/application/chatdb/chatdb.md:3
-#: ../../getting_started/application/chatdb/chatdb.md:20
-#: ../../getting_started/application/chatdb/chatdb.md:24
-#: ../../getting_started/application/chatdb/chatdb.md:28
-#: ../../getting_started/application/chatdb/chatdb.md:43
-#: ../../getting_started/application/chatdb/chatdb.md:48
+#: ../../getting_started/application/chatdb/chatdb.md:26
+#: ../../getting_started/application/chatdb/chatdb.md:30
+#: ../../getting_started/application/chatdb/chatdb.md:34
+#: ../../getting_started/application/chatdb/chatdb.md:49
 #: ../../getting_started/application/chatdb/chatdb.md:54
-#: 826032e82a0a40b2bd122a90a35d0161 91652ef9e3224290b0c89112bcca4474
-#: d396ffa33eef4bef8471040369414420 d7f176a7794048d3ac3573970db86d9d
-#: f80e5611eca64f86baeeed6c860061f9
+#: ../../getting_started/application/chatdb/chatdb.md:60
+#: 1467691a6012498795a94a14f7eba307 32315140835948c58e1721c7e2fa88a9
+#: 3b8e3c3396ff47348105a6dec9e755ba a314854e9be945dd88ad241bfa340870
+#: d94d5f0e608f4399a0e10d593f0ab1da e0ca6ec1841040bc828ce2ef29c387b6
 msgid "db plugins demonstration"
 msgstr "db plugins demonstration"

 #: ../../getting_started/application/chatdb/chatdb.md:7
-#: aa0f978d3ad34b71aacf143a4c807ba1
+#: 67cb0954cfa54e629b75cf9a241f6b9d
+#, fuzzy
+msgid "The LLM (Language Model) suitable for the ChatData scene is"
+msgstr "ChatData场景适用的LLM      * chatgpt3.5.     * chatgpt4.     * Vicuna-v1.5."
+
+#: ../../getting_started/application/chatdb/chatdb.md:8
+#: c973e19574e2405a96eb003c64063bfc
+msgid "chatgpt3.5."
+msgstr ""
+
+#: ../../getting_started/application/chatdb/chatdb.md:9
+#: 649b2382378c416591db7038a269c33b
+msgid "chatgpt4."
+msgstr ""
+
+#: ../../getting_started/application/chatdb/chatdb.md:10
+#: fac49de88fe3409f818193b953714cb9
+msgid "Vicuna-v1.5."
+msgstr ""
+
+#: ../../getting_started/application/chatdb/chatdb.md:13
+#: 8bd004993a834b0797ebcb5b6a6b1a23
 msgid "1.Choose Datasource"
 msgstr "1.Choose Datasource"

-#: ../../getting_started/application/chatdb/chatdb.md:9
-#: 8a2338e2fbae44f1b61b2fcf062499d3
+#: ../../getting_started/application/chatdb/chatdb.md:15
+#: 34abfdced7804b51a212c0e567ffda6b
 msgid ""
 "If you are using DB-GPT for the first time, you need to add a data source"
 " and set the relevant connection information for the data source."
 msgstr "如果你是第一次使用DB-GPT, 首先需要添加数据源，设置数据源的相关连接信息"

-#: ../../getting_started/application/chatdb/chatdb.md:13
-#: f1d165ab8b564445880e581a2e554434
+#: ../../getting_started/application/chatdb/chatdb.md:19
+#: 3a8d16a8a32c4ac5affbd8093677b4f8
 msgid "there are some example data in DB-GPT-NEW/DB-GPT/docker/examples"
 msgstr "在DB-GPT-NEW/DB-GPT/docker/examples有数据示例"

-#: ../../getting_started/application/chatdb/chatdb.md:15
-#: dd390cb518094c96bf5430bfa821830f
+#: ../../getting_started/application/chatdb/chatdb.md:21
+#: 2c3333a2705648148f79623c220d90cd
 msgid "you can execute sql script to generate data."
 msgstr "你可以通过执行sql脚本生成测试数据"

-#: ../../getting_started/application/chatdb/chatdb.md:18
-#: aebd974d23124daa80af6d74431d1ce3
+#: ../../getting_started/application/chatdb/chatdb.md:24
+#: 4994182137574d14a3eefb421ceccd8e
 msgid "1.1 Datasource management"
 msgstr "1.1 Datasource management"

-#: ../../getting_started/application/chatdb/chatdb.md:20
-#: af4d12aaed5c4fc484a3e7a755a666c2
+#: ../../getting_started/application/chatdb/chatdb.md:26
+#: 94680e1487d84092abc51a7da9bf1075
 msgid ""
 "![db plugins demonstration](https://github.com/eosphoros-ai/DB-"
 "GPT/assets/13723926/7678f07e-9eee-40a9-b980-5b3978a0ed52)"
@@ -86,13 +107,13 @@ msgstr ""
 "![db plugins demonstration](https://github.com/eosphoros-ai/DB-"
 "GPT/assets/13723926/7678f07e-9eee-40a9-b980-5b3978a0ed52)"

-#: ../../getting_started/application/chatdb/chatdb.md:22
-#: 34b7b9ce0f0142af8179a8e1763a32f8
+#: ../../getting_started/application/chatdb/chatdb.md:28
+#: 236dbd6d6cb4467593bf30597ecb215c
 msgid "1.2 Connection management"
 msgstr "1.2 Connection管理"

-#: ../../getting_started/application/chatdb/chatdb.md:24
-#: 00a1af9f4e0a45b9a398f641c8198114
+#: ../../getting_started/application/chatdb/chatdb.md:30
+#: 6611193e600c4452ac8a9769c6230590
 msgid ""
 "![db plugins demonstration](https://github.com/eosphoros-ai/DB-"
 "GPT/assets/13723926/25b8f5a9-d322-459e-a8b2-bfe8cb42bdd6)"
@@ -100,13 +121,13 @@ msgstr ""
 "![db plugins demonstration](https://github.com/eosphoros-ai/DB-"
 "GPT/assets/13723926/25b8f5a9-d322-459e-a8b2-bfe8cb42bdd6)"

-#: ../../getting_started/application/chatdb/chatdb.md:26
-#: 3b8efc25b482480b8d0f4afe5304ece0
+#: ../../getting_started/application/chatdb/chatdb.md:32
+#: 7cceb9703af54970bee4a50fb07d4509
 msgid "1.3 Add Datasource"
 msgstr "1.3 添加Datasource"

-#: ../../getting_started/application/chatdb/chatdb.md:28
-#: d36a476e1eb34a46b2d35e6c1c4c39dd
+#: ../../getting_started/application/chatdb/chatdb.md:34
+#: 83c9e18cb87b4f0d9b0ce5e68b7fea77
 msgid ""
 "![db plugins demonstration](https://github.com/eosphoros-ai/DB-"
 "GPT/assets/13723926/19ce31a7-4061-4da8-a9cb-efca396cc085)"
@@ -114,54 +135,54 @@ msgstr ""
 "![db plugins demonstration](https://github.com/eosphoros-ai/DB-"
 "GPT/assets/13723926/19ce31a7-4061-4da8-a9cb-efca396cc085)"

-#: ../../getting_started/application/chatdb/chatdb.md:31
-#: 9205388f91404099bf1add6d55f33801
+#: ../../getting_started/application/chatdb/chatdb.md:37
+#: 143fb04274cd486687c5766179f6103e
 msgid "now DB-GPT support Datasource Type"
 msgstr "DB-GPT支持数据源类型"

-#: ../../getting_started/application/chatdb/chatdb.md:33
-#: 197722ccd9e54f8196e3037f0ebd4165
+#: ../../getting_started/application/chatdb/chatdb.md:39
+#: 8bcf83e66b2d4d858407fc2b21b8fe85
 msgid "Mysql"
 msgstr "Mysql"

-#: ../../getting_started/application/chatdb/chatdb.md:34
-#: e859c194648440b19941a42635f37ac5
+#: ../../getting_started/application/chatdb/chatdb.md:40
+#: cd74abd5d6f4410ca001a3de2685e768
 msgid "Sqlite"
 msgstr "Sqlite"

-#: ../../getting_started/application/chatdb/chatdb.md:35
-#: 91c695f437064f01bf1d7c85a0ecf5b4
+#: ../../getting_started/application/chatdb/chatdb.md:41
+#: fc5e01baba43449f8c3eb9b4b36a0ed8
 msgid "DuckDB"
 msgstr "DuckDB"

-#: ../../getting_started/application/chatdb/chatdb.md:36
-#: 0a8ff591969c4944890415a84aa64173
+#: ../../getting_started/application/chatdb/chatdb.md:42
+#: 10b6fe2153cd4ceba949687a54c3a68c
 msgid "Clickhouse"
 msgstr "Clickhouse"

-#: ../../getting_started/application/chatdb/chatdb.md:37
-#: d52ec849653141dc95862e82ce5777e0
+#: ../../getting_started/application/chatdb/chatdb.md:43
+#: 9ce0a41784f041d39138a81099c386e9
 #, fuzzy
 msgid "Mssql"
 msgstr "Mysql"

-#: ../../getting_started/application/chatdb/chatdb.md:38
-#: 430a72d857114422aeecd5595df41881
+#: ../../getting_started/application/chatdb/chatdb.md:44
+#: 4af6eb835e954e0d937e98b308fb512b
 msgid "Spark"
 msgstr "Spark"

-#: ../../getting_started/application/chatdb/chatdb.md:41
-#: b615a70971e7443291ba33e8bc12b437
+#: ../../getting_started/application/chatdb/chatdb.md:47
+#: 8aaa3a73090b4805b2dddf1cc355d83c
 msgid "2.ChatData"
 msgstr "2.ChatData"

-#: ../../getting_started/application/chatdb/chatdb.md:42
-#: e3542c64926143958e71c7cb21d25c78
+#: ../../getting_started/application/chatdb/chatdb.md:48
+#: a34c79c99bd34233ae92d3090ff0b877
 msgid "Preview Mode"
 msgstr "Preview Mode"

-#: ../../getting_started/application/chatdb/chatdb.md:43
-#: e32f26b7c22141e181b5345a644dffd5
+#: ../../getting_started/application/chatdb/chatdb.md:49
+#: 39e31a2a01494d4191d415a2240e026d
 #, fuzzy
 msgid ""
 "After successfully setting up the data source, you can start conversing "
@@ -173,13 +194,13 @@ msgstr ""
 "设置数据源成功后就可以和数据库进行对话了。你可以让它帮你生成SQL，也可以和问它数据库元数据的相关信息。 ![db plugins "
 "demonstration](../../../../assets/chat_data/chatdata_eg.png)"

-#: ../../getting_started/application/chatdb/chatdb.md:47
-#: 4d5c0465a01b4f5a964d0e803f9cbc89
+#: ../../getting_started/application/chatdb/chatdb.md:53
+#: 999c78e8b604493a8190b0e1258d0da4
 msgid "Editor Mode"
 msgstr "Editor Mode"

-#: ../../getting_started/application/chatdb/chatdb.md:48
-#: 79b088787e8f43258bcc4292c89ad1b0
+#: ../../getting_started/application/chatdb/chatdb.md:54
+#: e4a61d1e62c743f8b13dbed92ec265ba
 msgid ""
 "In Editor Mode, you can edit your sql and execute it. ![db plugins "
 "demonstration](https://github.com/eosphoros-ai/DB-"
@@ -189,13 +210,13 @@ msgstr ""
 "/eosphoros-ai/DB-"
 "GPT/assets/13723926/1a896dc1-7c0e-4354-8629-30357ffd8d7f)"

-#: ../../getting_started/application/chatdb/chatdb.md:52
-#: 9efaf27749614cd4bea07146edddf558
+#: ../../getting_started/application/chatdb/chatdb.md:58
+#: b3a0d94083524d249f97dd426e1e1f26
 msgid "3.ChatDB"
 msgstr "3.ChatDB"

-#: ../../getting_started/application/chatdb/chatdb.md:54
-#: b2dc15f067064c60974e532c3e2f5893
+#: ../../getting_started/application/chatdb/chatdb.md:60
+#: 8f4bd453447f48019a597eb3e4a59875
 msgid ""
 "![db plugins demonstration](https://github.com/eosphoros-ai/DB-"
 "GPT/assets/13723926/e04bc1b1-2c58-4b33-af62-97e89098ace7)"
--- a/docs/locales/zh_CN/LC_MESSAGES/getting_started/application/chatexcel/chatexcel.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/getting_started/application/chatexcel/chatexcel.po
@@ -8,7 +8,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: DB-GPT 👏👏 0.3.6\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2023-08-29 21:14+0800\n"
+"POT-Creation-Date: 2023-11-03 15:33+0800\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language: zh_CN\n"
@@ -20,13 +20,13 @@ msgstr ""
 "Generated-By: Babel 2.12.1\n"

 #: ../../getting_started/application/chatexcel/chatexcel.md:1
-#: ../../getting_started/application/chatexcel/chatexcel.md:9
-#: 6efcbf4652954b27beb55f600cfe75c7 eefb0c3bc131439fb2dd4045761f1ae9
+#: ../../getting_started/application/chatexcel/chatexcel.md:14
+#: 5e9c1de21de240839a510b9e05afcba1 96556d6d1d734f67ab15e548c9fdce2f
 msgid "ChatExcel"
 msgstr ""

 #: ../../getting_started/application/chatexcel/chatexcel.md:3
-#: 5fc4ddd2690f46658df1e09c601d81ad
+#: 19590f67feea4f2580602538b79cd138
 #, fuzzy
 msgid ""
 "![db plugins demonstration](https://github.com/eosphoros-ai/DB-"
@@ -36,61 +36,75 @@ msgid ""
 msgstr "使用自然语言进行Excel数据的分析处理"

 #: ../../getting_started/application/chatexcel/chatexcel.md:3
-#: ../../getting_started/application/chatexcel/chatexcel.md:13
-#: ../../getting_started/application/chatexcel/chatexcel.md:17
-#: ../../getting_started/application/chatexcel/chatexcel.md:21
-#: 4c91baf5f0b244abb021f461851674cc 4eead9a4f81e4855a5c362774696999c
-#: 5f309a06170946108ae70806dff157ea 790016c9c68f4a29a84b7ef8e14d6dc2
-#: 93db1eb6af69452982f6028eff626a57 e758c8b320894e2b93f8db78459b7a1f
-#: ea3c99f7eafc4ae0a19706a47e4c7bf6 f18d2b88de244173ab2673f2a5e828c0
+#: ../../getting_started/application/chatexcel/chatexcel.md:18
+#: ../../getting_started/application/chatexcel/chatexcel.md:22
+#: ../../getting_started/application/chatexcel/chatexcel.md:26
+#: 286462d6d79b4189bcd9d17937a96233 7009abd113244cc5badb8e90837dc9af
+#: 7eb17d02534d4f77b7602f3756da0d5e f5c1cfd0f77d4199a45988114174c67f
 msgid "db plugins demonstration"
 msgstr ""

 #: ../../getting_started/application/chatexcel/chatexcel.md:6
-#: 45f137031025409ba2ada9c8f7c5f1e4
+#: 338f0b19314f4026b84a670d35056e6c
+#, fuzzy
+msgid "The LLM (Language Model) suitable for the ChatExcel scene is"
+msgstr "ChatExcel场景适用的LLM 是 scene is      * chatgpt3.5.     * chatgpt4."
+
+#: ../../getting_started/application/chatexcel/chatexcel.md:7
+#: bc09e8af60b64a8fbeecedb927a5a854
+msgid "chatgpt3.5."
+msgstr ""
+
+#: ../../getting_started/application/chatexcel/chatexcel.md:8
+#: e840c31d671946c190e27e1b7dd28647
+msgid "chatgpt4."
+msgstr ""
+
+#: ../../getting_started/application/chatexcel/chatexcel.md:11
+#: 2a710e2650bb44ef9d4a1ee4b8225a35
 msgid "1.Select And Upload Excel or CSV File"
 msgstr ""

-#: ../../getting_started/application/chatexcel/chatexcel.md:7
-#: cd282be2b4ef49ea8b0eaa3d53042f22
+#: ../../getting_started/application/chatexcel/chatexcel.md:12
+#: df48b1003f3640cfa621e416f0405e8d
 msgid "Select your excel or csv file to upload and start the conversation."
 msgstr "选择你的Excel或者CSV文件上传开始对话"

-#: ../../getting_started/application/chatexcel/chatexcel.md:11
-#: a5ebc8643eff4b44a951b28d85488143
+#: ../../getting_started/application/chatexcel/chatexcel.md:16
+#: 7ef5d5ebb634406ea4b566bbf5e30fd7
 msgid ""
 "The ChatExcel function supports Excel and CSV format files, select the "
 "corresponding file to use."
 msgstr "ChatExcel功能支持Excel和CSV格式的文件，选择对应格式的文件开始使用"

-#: ../../getting_started/application/chatexcel/chatexcel.md:13
-#: d52927be09654c8daf29e2ef0c60a671
+#: ../../getting_started/application/chatexcel/chatexcel.md:18
+#: 40c79b71820f44439b1f541db2be9dd9
 msgid ""
 "![db plugins "
 "demonstration](../../../../assets/chat_excel/chat_excel_2.png) ![db "
 "plugins demonstration](../../../../assets/chat_excel/chat_excel_3.png)"
 msgstr ""

-#: ../../getting_started/application/chatexcel/chatexcel.md:16
-#: d86202165fdc4da6be06024b45f9af55
+#: ../../getting_started/application/chatexcel/chatexcel.md:21
+#: 0dd469b6f56a442485392346065e345d
 msgid "2.Wait for Data Processing"
 msgstr "等待数据处理"

-#: ../../getting_started/application/chatexcel/chatexcel.md:17
-#: 3de7205fbdc741e2b79996d67264c058
+#: ../../getting_started/application/chatexcel/chatexcel.md:22
+#: 0e9213342664465187981d6fea41e7ba
 msgid ""
 "After the data is uploaded, it will first learn and process the data "
 "structure and field meaning. ![db plugins "
 "demonstration](../../../../assets/chat_excel/chat_excel_4.png)"
 msgstr "等待数据上传完成，会自动进行数据结构的学习和处理"

-#: ../../getting_started/application/chatexcel/chatexcel.md:20
-#: fb0620dec5a24b469ceccf86e918fe54
+#: ../../getting_started/application/chatexcel/chatexcel.md:25
+#: dd2047d1199542f7abda4767b953cfac
 msgid "3.Use Data Analysis Calculation"
 msgstr "开始使用数据分析计算"

-#: ../../getting_started/application/chatexcel/chatexcel.md:21
-#: 221733f01fe04e38b19f191d4001c7a7
+#: ../../getting_started/application/chatexcel/chatexcel.md:26
+#: 4e168def205743c898586e99e34d3e18
 msgid ""
 "Now you can use natural language to analyze and query data in the dialog "
 "box. ![db plugins "
--- a/docs/locales/zh_CN/LC_MESSAGES/getting_started/application/dashboard/dashboard.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/getting_started/application/dashboard/dashboard.po
@@ -8,7 +8,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: DB-GPT 👏👏 0.3.5\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2023-08-29 20:50+0800\n"
+"POT-Creation-Date: 2023-11-03 15:33+0800\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language: zh_CN\n"
@@ -20,12 +20,12 @@ msgstr ""
 "Generated-By: Babel 2.12.1\n"

 #: ../../getting_started/application/dashboard/dashboard.md:1
-#: 2a1224e675d144269e5cc3695d4d60b4
+#: 3e4ef12c51894e5d93adcbf090a4783b
 msgid "Dashboard"
 msgstr "Dashboard"

 #: ../../getting_started/application/dashboard/dashboard.md:3
-#: 2b6d2f94f73d43e68806bf4c6d0d9269
+#: df9eb8d62bbe460d94e448c23fffa529
 msgid ""
 "The purpose of the DB-GPT Dashboard is to empower data analysts with "
 "efficiency. DB-GPT provides intelligent reporting technology, allowing "
@@ -33,38 +33,54 @@ msgid ""
 " language and gain insights into their respective areas of business."
 msgstr "DB-GPT Dashboard目的是赋能数据分析人员。DB-GPT通过提供智能报表技术，使得业务分析人员可以直接使用简单的自然语言进行自助分析。"

+#: ../../getting_started/application/dashboard/dashboard.md:7
+#: 3ce90998ab9547fc95105f0000e232da
+#, fuzzy
+msgid "The LLM (Language Model) suitable for the Dashboard scene is"
+msgstr "Dashboard场景适用的LLM是      * chatgpt3.5.     * chatgpt4."
+
 #: ../../getting_started/application/dashboard/dashboard.md:8
-#: 9612fa76c4264bab8e629ac50959faa9
+#: be8fe8f9d9344e64acf51e54f8a17d7c
+msgid "chatgpt3.5."
+msgstr ""
+
+#: ../../getting_started/application/dashboard/dashboard.md:9
+#: 82ef8f03db3f4741a92113a485a40599
+msgid "chatgpt4."
+msgstr ""
+
+#: ../../getting_started/application/dashboard/dashboard.md:13
+#: da3aaa82126942058e72c472e2495641
 msgid "Dashboard now support Datasource Type"
 msgstr "Dashboard目前支持的数据源类型"

-#: ../../getting_started/application/dashboard/dashboard.md:9
-#: bb0b15742ebe41628fb0d1fc38caabe2
+#: ../../getting_started/application/dashboard/dashboard.md:14
+#: d21cfc9594a24b69a5040f60cba00abb
 msgid "Mysql"
 msgstr "Mysql"

-#: ../../getting_started/application/dashboard/dashboard.md:10
-#: 35491581125b4bdd8422f35b11c7bc2c
+#: ../../getting_started/application/dashboard/dashboard.md:15
+#: fc0e345321e84de5b9a96107fef5467e
 msgid "Sqlite"
 msgstr "Sqlite"

-#: ../../getting_started/application/dashboard/dashboard.md:11
-#: 8c4389354e0344aa9a781bdfc94c2cac
+#: ../../getting_started/application/dashboard/dashboard.md:16
+#: f5cc4387997441b88a9f10ba9b35b621
 msgid "DuckDB"
 msgstr "DuckDB"

-#: ../../getting_started/application/dashboard/dashboard.md:13
-#: 18e8c60f5c2f4aa698cec1e8e8b354c8
+#: ../../getting_started/application/dashboard/dashboard.md:18
+#: d8a86c3d05f84df0aa63c88f5dbfbd78
 msgid "Steps to Dashboard In DB-GPT"
 msgstr "Dashboard使用步骤"

-#: ../../getting_started/application/dashboard/dashboard.md:15
-#: 94f98e0f5c2e451ba29b9b77c4139ed9
+#: ../../getting_started/application/dashboard/dashboard.md:20
+#: dbf7b3f73677411a8e660f8d90c11e05
 msgid "1 add datasource"
 msgstr "1.添加数据源"

-#: ../../getting_started/application/dashboard/dashboard.md:17
-#: 34e1211e65b940c3beb6234bcfa423a1
+#: ../../getting_started/application/dashboard/dashboard.md:22
+#: 70a599bc716a4af2b5a07a40de6fed9d
 #, fuzzy
 msgid ""
 "If you are using Dashboard for the first time, you need to mock some data"
@@ -74,18 +90,18 @@ msgid ""
 "8c36-213b1c6fb5dd)"
 msgstr "如果你是第一次使用Dashboard，需要构造测试数据，DB-GPT在pilot/mock_datas/提供了测试数据，只需要将数据源进行添加即可"

-#: ../../getting_started/application/dashboard/dashboard.md:17
-#: f29905929b32442ba05833b6c52a11be
+#: ../../getting_started/application/dashboard/dashboard.md:22
+#: e38d15f091264563840cf15f0a2fe97c
 msgid "add_datasource"
 msgstr "添加数据源"

-#: ../../getting_started/application/dashboard/dashboard.md:21
-#: 367a487dd1d54681a6e83d8fdda5b793
+#: ../../getting_started/application/dashboard/dashboard.md:26
+#: c6e87a5b20bb439ab1b3dc88e764d20b
 msgid "2.Choose Dashboard Mode"
 msgstr "2.进入Dashboard"

-#: ../../getting_started/application/dashboard/dashboard.md:23
-#: 1ee1e980934e4a618591b7c43921c304
+#: ../../getting_started/application/dashboard/dashboard.md:28
+#: 54c25766650546998d3aaa3655f5275b
 msgid ""
 "![create_space](https://github.com/eosphoros-ai/DB-"
 "GPT/assets/13723926/5e888880-0e97-4b60-8e5c-b7e7224197f0)"
@@ -93,18 +109,18 @@ msgstr ""
 "![create_space](https://github.com/eosphoros-ai/DB-"
 "GPT/assets/13723926/5e888880-0e97-4b60-8e5c-b7e7224197f0)"

-#: ../../getting_started/application/dashboard/dashboard.md:23
-#: 12c756afdad740a9afc9cb46cc834af8
+#: ../../getting_started/application/dashboard/dashboard.md:28
+#: ea1781528db04000ab4a72308c7be97e
 msgid "create_space"
 msgstr "create_space"

-#: ../../getting_started/application/dashboard/dashboard.md:25
-#: 5a575b17408c42fbacd32d8ff792d5a8
+#: ../../getting_started/application/dashboard/dashboard.md:30
+#: 5de9b0f0853443368d90e42114e99d6e
 msgid "3.Select Datasource"
 msgstr "3.选择数据源"

-#: ../../getting_started/application/dashboard/dashboard.md:27
-#: ae051f852a5a4044a147c853cc3fba60
+#: ../../getting_started/application/dashboard/dashboard.md:32
+#: 3d4c429c4660414a8d5c44dea0ea0192
 msgid ""
 "![document](https://github.com/eosphoros-ai/DB-"
 "GPT/assets/13723926/da2ac8b5-eca4-48ef-938f-f9dc1ca711b3)"
@@ -112,19 +128,19 @@ msgstr ""
 "![document](https://github.com/eosphoros-ai/DB-"
 "GPT/assets/13723926/da2ac8b5-eca4-48ef-938f-f9dc1ca711b3)"

-#: ../../getting_started/application/dashboard/dashboard.md:27
-#: ../../getting_started/application/dashboard/dashboard.md:31
-#: 94907bb0dc694bc3a4d2ee57a84b8242 ecc0666385904fce8bb1000735482f65
+#: ../../getting_started/application/dashboard/dashboard.md:32
+#: ../../getting_started/application/dashboard/dashboard.md:36
+#: 338912391ae441328549accdb6d5522b
 msgid "document"
 msgstr "document"

-#: ../../getting_started/application/dashboard/dashboard.md:29
-#: c8697e93661c48b19674e63094ba7486
+#: ../../getting_started/application/dashboard/dashboard.md:34
+#: 2c0fd7e79393417aa218908c5cc89461
 msgid "4.Input your analysis goals"
 msgstr "4.输入分析目标"

-#: ../../getting_started/application/dashboard/dashboard.md:31
-#: 473fc0d00ab54ee6bc5c21e017591cc4
+#: ../../getting_started/application/dashboard/dashboard.md:36
+#: fb0bb655581a4109a5510240e54db006
 #, fuzzy
 msgid ""
 "![document](https://github.com/eosphoros-ai/DB-"
@@ -136,19 +152,19 @@ msgstr ""
 "![document](https://github.com/eosphoros-ai/DB-GPT/assets/13723926"
 "/3d14a2da-165e-4b2f-a921-325c20fe5ae9)"

-#: ../../getting_started/application/dashboard/dashboard.md:31
-#: ../../getting_started/application/dashboard/dashboard.md:35
-#: 00597e1268544d97a3de368b04d5dcf8 350d04e4b7204823b7a03c0a7606c951
+#: ../../getting_started/application/dashboard/dashboard.md:36
+#: ../../getting_started/application/dashboard/dashboard.md:40
+#: 44680217a9794eddb97bcb98593a1071
 msgid "db plugins demonstration"
 msgstr ""

-#: ../../getting_started/application/dashboard/dashboard.md:34
-#: b48cc911c1614def9e4738d35e8b754c
+#: ../../getting_started/application/dashboard/dashboard.md:39
+#: 4a9a8eac8e77465a9519b532afdfd1b7
 msgid "5.Adjust and modify your report"
 msgstr "5.调整"

-#: ../../getting_started/application/dashboard/dashboard.md:35
-#: b0442bbc0f6c4c33914814ac92fc4b13
+#: ../../getting_started/application/dashboard/dashboard.md:40
+#: b56da5e50ced4085bb376caa26e50e78
 msgid ""
 "![db plugins "
 "demonstration](../../../../assets/chat_dashboard/chat_dashboard_2.png)"
--- a/docs/locales/zh_CN/LC_MESSAGES/getting_started/faq/llm/llm_faq.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/getting_started/faq/llm/llm_faq.po
@@ -8,7 +8,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: DB-GPT 👏👏 0.3.5\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2023-10-20 22:29+0800\n"
+"POT-Creation-Date: 2023-10-30 11:37+0800\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language: zh_CN\n"
@@ -19,34 +19,36 @@ msgstr ""
 "Content-Transfer-Encoding: 8bit\n"
 "Generated-By: Babel 2.12.1\n"

-#: ../../getting_started/faq/llm/llm_faq.md:1 54763acec7da4deb90669195c54ec3a1
+#: ../../getting_started/faq/llm/llm_faq.md:1 98e23f85313c45169ff2ba7f80193356
 msgid "LLM USE FAQ"
 msgstr "LLM模型使用FAQ"

-#: ../../getting_started/faq/llm/llm_faq.md:3 66f73fd2ee7b462e92d3f263792a5e33
-msgid "Q1:how to use openai chatgpt service"
+#: ../../getting_started/faq/llm/llm_faq.md:3 0d49acfb4af947cb969b249346b00d33
+#, fuzzy
+msgid "Q1: how to use openai chatgpt service"
 msgstr "我怎么使用OPENAI服务"

-#: ../../getting_started/faq/llm/llm_faq.md:4 9d178d8462b74cb188bbacf2ac2ac12b
+#: ../../getting_started/faq/llm/llm_faq.md:4 7010fec33e264987a29de86c54da93e8
 #, fuzzy
 msgid "change your LLM_MODEL in `.env`"
 msgstr "通过在.env文件设置LLM_MODEL"

-#: ../../getting_started/faq/llm/llm_faq.md:9 f7ca82f257be4ac09639a7f8af5e83eb
+#: ../../getting_started/faq/llm/llm_faq.md:9 0982d6d5d0b3434fb00698aaf675f3f3
 msgid "set your OPENAPI KEY"
 msgstr "set your OPENAPI KEY"

-#: ../../getting_started/faq/llm/llm_faq.md:16 d6255b20dce34a2690df7e2af3505d97
+#: ../../getting_started/faq/llm/llm_faq.md:16 63650494c1574de09c007e1d470dd53d
 msgid "make sure your openapi API_KEY is available"
 msgstr "确认openapi API_KEY是否可用"

-#: ../../getting_started/faq/llm/llm_faq.md:18 6f1c6dbdb31f4210a6d21f0f3a6ae589
+#: ../../getting_started/faq/llm/llm_faq.md:18 5721ec71e344499d96c55b7e531d7c08
+#, fuzzy
 msgid ""
-"Q2 What difference between `python dbgpt_server --light` and `python "
+"Q2: What difference between `python dbgpt_server --light` and `python "
 "dbgpt_server`"
-msgstr "Q2 `python dbgpt_server --light` 和 `python dbgpt_server`的区别是什么?"
+msgstr "Q2: `python dbgpt_server --light` 和 `python dbgpt_server`的区别是什么?"

-#: ../../getting_started/faq/llm/llm_faq.md:20 b839771ae9e34e998b0edf8d69deabdd
+#: ../../getting_started/faq/llm/llm_faq.md:20 76a650f195dd40b6a3a3564030cdc040
 msgid ""
 "`python dbgpt_server --light` dbgpt_server does not start the llm "
 "service. Users can deploy the llm service separately by using `python "
@@ -58,75 +60,75 @@ msgstr ""
 "用户可以通过`python "
 "llmserver`单独部署模型服务，dbgpt_server通过LLM_SERVER环境变量来访问模型服务。目的是为了可以将dbgpt后台服务和大模型服务分离部署。"

-#: ../../getting_started/faq/llm/llm_faq.md:22 aba39cef6fe84799bcd03e8f36c41296
+#: ../../getting_started/faq/llm/llm_faq.md:22 8cd87e3504784d9e891e1fb96c79e143
 msgid ""
 "`python dbgpt_server` dbgpt_server service and the llm service are "
 "deployed on the same instance. when dbgpt_server starts the service, it "
 "also starts the llm service at the same time."
 msgstr "`python dbgpt_server` 是将后台服务和模型服务部署在同一台实例上.dbgpt_server在启动服务的时候同时开启模型服务."

-#: ../../getting_started/faq/llm/llm_faq.md:27 c65270d479af49e28e99b35a7932adbd
+#: ../../getting_started/faq/llm/llm_faq.md:27 58a6eaf57e6d425685f67058b1a642d4
 msgid ""
 "If you want to access an external LLM service(deployed by DB-GPT), you "
 "need to"
 msgstr "如果模型服务部署(通过DB-GPT部署)在别的机器，想通过dbgpt服务访问模型服务"

-#: ../../getting_started/faq/llm/llm_faq.md:29 da153e6d18c543f28e0c4e85618e3d3d
+#: ../../getting_started/faq/llm/llm_faq.md:29 67ac8823ca2e49ba9c833368e2cfb53c
 msgid ""
 "1.set the variables LLM_MODEL=YOUR_MODEL_NAME, "
 "MODEL_SERVER=YOUR_MODEL_SERVER（eg:http://localhost:5000） in the .env "
 "file."
 msgstr ""

-#: ../../getting_started/faq/llm/llm_faq.md:31 cd89b8a2075f4407b8036a74151a6377
+#: ../../getting_started/faq/llm/llm_faq.md:31 e5c066bcdf0649a1b33bbfc7fd3b1a66
 msgid "2.execute dbgpt_server.py in light mode"
 msgstr "2.execute dbgpt_server.py light 模式"

-#: ../../getting_started/faq/llm/llm_faq.md:33 8f4b9401ac4f4a25a7479bee9ef5e8c1
+#: ../../getting_started/faq/llm/llm_faq.md:33 402ff01d7ee94d97be4a0eb964e39b97
 msgid "python pilot/server/dbgpt_server.py --light"
 msgstr ""

-#: ../../getting_started/faq/llm/llm_faq.md:38 69e1064cd7554ce6b49da732f800eacc
+#: ../../getting_started/faq/llm/llm_faq.md:38 86190c689d8f4d9a9b58d904e0b5867b
 #, fuzzy
-msgid "Q3 How to use MultiGPUs"
-msgstr "Q2 怎么使用 MultiGPUs"
+msgid "Q3: How to use MultiGPUs"
+msgstr "Q3: 怎么使用 MultiGPUs"

-#: ../../getting_started/faq/llm/llm_faq.md:40 6de3f105ce96430db5756f38bbd9ca12
+#: ../../getting_started/faq/llm/llm_faq.md:40 6b08cff88750440b98956203d8b8a084
 msgid ""
 "DB-GPT will use all available gpu by default. And you can modify the "
 "setting `CUDA_VISIBLE_DEVICES=0,1` in `.env` file to use the specific gpu"
 " IDs."
 msgstr "DB-GPT默认加载可利用的gpu，你也可以通过修改 在`.env`文件 `CUDA_VISIBLE_DEVICES=0,1`来指定gpu IDs"

-#: ../../getting_started/faq/llm/llm_faq.md:43 87cb9bfb20af4b259d719df797c42a7d
+#: ../../getting_started/faq/llm/llm_faq.md:43 93b39089e5be4475b9e90e7813f5a7d9
 msgid ""
 "Optionally, you can also specify the gpu ID to use before the starting "
 "command, as shown below:"
 msgstr "你也可以指定gpu ID启动"

-#: ../../getting_started/faq/llm/llm_faq.md:53 bcfa35cda6304ee5ab9a775a2d4eda63
+#: ../../getting_started/faq/llm/llm_faq.md:53 62e3074c109d401fa4bf1ddbdc6c7be1
 msgid ""
 "You can modify the setting `MAX_GPU_MEMORY=xxGib` in `.env` file to "
 "configure the maximum memory used by each GPU."
 msgstr "同时你可以通过在.env文件设置`MAX_GPU_MEMORY=xxGib`修改每个GPU的最大使用内存"

-#: ../../getting_started/faq/llm/llm_faq.md:55 a05c5484927844c8bb4791f0a9ccc82e
+#: ../../getting_started/faq/llm/llm_faq.md:55 d235bd83545c476f8e12572658d1c723
 #, fuzzy
-msgid "Q4 Not Enough Memory"
-msgstr "Q3 机器显存不够 "
+msgid "Q4: Not Enough Memory"
+msgstr "Q4: 机器显存不够 "

-#: ../../getting_started/faq/llm/llm_faq.md:57 fe17a023b6eb4a92b1b927e1b94e3784
+#: ../../getting_started/faq/llm/llm_faq.md:57 b3243ed9147f42bba987d7f9b778e66f
 msgid "DB-GPT supported 8-bit quantization and 4-bit quantization."
 msgstr "DB-GPT 支持 8-bit quantization 和 4-bit quantization."

-#: ../../getting_started/faq/llm/llm_faq.md:59 76c3684c10864b8e87e5c2255b6c0b7f
+#: ../../getting_started/faq/llm/llm_faq.md:59 1ddb9f94ab994bfebfee46d1c19888d4
 msgid ""
 "You can modify the setting `QUANTIZE_8bit=True` or `QUANTIZE_4bit=True` "
 "in `.env` file to use quantization(8-bit quantization is enabled by "
 "default)."
 msgstr "你可以通过在.env文件设置`QUANTIZE_8bit=True` or `QUANTIZE_4bit=True`"

-#: ../../getting_started/faq/llm/llm_faq.md:61 c5d849a38f1a4f0687bbcffb6699dc39
+#: ../../getting_started/faq/llm/llm_faq.md:61 54b85daa3fb24b17b67a6da31d2be8b0
 msgid ""
 "Llama-2-70b with 8-bit quantization can run with 80 GB of VRAM, and 4-bit"
 " quantization can run with 48 GB of VRAM."
@@ -134,49 +136,77 @@ msgstr ""
 "Llama-2-70b with 8-bit quantization 可以运行在 80 GB VRAM机器， 4-bit "
 "quantization可以运行在 48 GB VRAM"

-#: ../../getting_started/faq/llm/llm_faq.md:63 867329a5e3b0403083e96f72b8747fb2
+#: ../../getting_started/faq/llm/llm_faq.md:63 097d680aed184fee9eceebee55a47ac1
 msgid ""
-"Note: you need to install the latest dependencies according to "
-"[requirements.txt](https://github.com/eosphoros-ai/DB-"
-"GPT/blob/main/requirements.txt)."
+"Note: you need to install the quantization dependencies with `pip install"
+" -e \".[quantization]\"`"
 msgstr ""

-#: ../../getting_started/faq/llm/llm_faq.md:65 60ceee25e9fb4ddba40c5306bfb0a82f
+#: ../../getting_started/faq/llm/llm_faq.md:65 f3a51056043c49eb84471040f2b364aa
 #, fuzzy
-msgid "Q5 How to Add LLM Service dynamic local mode"
-msgstr "Q5 怎样动态新增模型服务"
+msgid "Q5: How to Add LLM Service dynamic local mode"
+msgstr "Q5: 怎样动态新增模型服务"

-#: ../../getting_started/faq/llm/llm_faq.md:67 c99eb7f7ae844884a8f0da94238ea7e0
+#: ../../getting_started/faq/llm/llm_faq.md:67 43ee6b0f23814c94a4ddb2429801a5e1
 msgid ""
 "Now DB-GPT through multi-llm service switch, so how to add llm service "
 "dynamic,"
 msgstr "DB-GPT支持多个模型服务切换, 怎样添加一个模型服务呢"

-#: ../../getting_started/faq/llm/llm_faq.md:78 cd89b8a2075f4407b8036a74151a6377
+#: ../../getting_started/faq/llm/llm_faq.md:78 c217bbf0d2b6425fa7a1c691b7704a8d
 #, fuzzy
-msgid "Q6 How to Add LLM Service dynamic in remote mode"
-msgstr "Q5 怎样动态新增模型服务"
+msgid "Q6: How to Add LLM Service dynamic in remote mode"
+msgstr "Q6: 怎样动态新增模型服务"

-#: ../../getting_started/faq/llm/llm_faq.md:79 8833ce89465848259b08ef0a4fa68d96
+#: ../../getting_started/faq/llm/llm_faq.md:79 195bdaa937a94c7aa0d8c6e1a5430d6e
 msgid ""
 "If you  deploy llm service in remote machine instance, and you want to "
 "add model service to dbgpt server to manage"
 msgstr "如果你想在远程机器实例部署大模型服务并添加到本地dbgpt_server进行管理"

-#: ../../getting_started/faq/llm/llm_faq.md:81 992eb37e3cca48829636c15ba3ec2ee8
+#: ../../getting_started/faq/llm/llm_faq.md:81 c64098b838a94821963a1d16e56497ff
 msgid "use dbgpt start worker and set --controller_addr."
 msgstr "使用1`dbgpt start worker`命令并设置注册地址--controller_addr"

-#: ../../getting_started/faq/llm/llm_faq.md:91 0d06d7d6dd3d4780894ecd914c89b5a2
+#: ../../getting_started/faq/llm/llm_faq.md:91 cb12d5e9d9d24f14abc3ebea877a4b24
 #, fuzzy
-msgid "Q7 dbgpt command not found"
-msgstr "Q6 dbgpt command not found"
+msgid "Q7: dbgpt command not found"
+msgstr "Q7: dbgpt command not found"

-#: ../../getting_started/faq/llm/llm_faq.md:97 5d9beed0d95a4503a43d0e025664273b
+#: ../../getting_started/faq/llm/llm_faq.md:97 f95cdccfa82d4b3eb2a23dd297131faa
+#, fuzzy
 msgid ""
-"Q8 When starting the worker_manager on a cloud server and registering it "
-"with the controller, it is noticed that the worker's exposed IP is a "
+"Q8: When starting the worker_manager on a cloud server and registering it"
+" with the controller, it is noticed that the worker's exposed IP is a "
 "private IP instead of a public IP, which leads to the inability to access"
 " the service."
-msgstr "云服务器启动worker_manager注册到controller时，发现worker暴露的ip是私网ip, 没有以公网ip暴露，导致服务访问不到"
+msgstr ""
+"Q8: 云服务器启动worker_manager注册到controller时，发现worker暴露的ip是私网ip, "
+"没有以公网ip暴露，导致服务访问不到"
+
+#: ../../getting_started/faq/llm/llm_faq.md:106
+#: 739a2983f3484acf98e877dc12f4ccda
+msgid "Q9: How to customize model path and prompt template"
+msgstr "Q9: 如何自定义模型路径和 prompt 模板"
+
+#: ../../getting_started/faq/llm/llm_faq.md:108
+#: 8b82a33a311649c7850c30c00c987c72
+#, fuzzy
+msgid ""
+"DB-GPT will read the model path from "
+"`pilot.configs.model_config.LLM_MODEL_CONFIG` based on the `LLM_MODEL`. "
+"Of course, you can use the environment variable `LLM_MODEL_PATH` to "
+"specify the model path and `LLM_PROMPT_TEMPLATE` to specify your model "
+"prompt template."
+msgstr ""
+"DB-GPT 会根据 `LLM_MODEL` 从 `pilot.configs.model_config.LLM_MODEL_CONFIG` "
+"中读取模型路径。当然，你可以使用环境 `LLM_MODEL_PATH` 来指定模型路径，以及使用 `LLM_PROMPT_TEMPLATE` "
+"来指定模型的 prompt 模板。"
+
+#~ msgid ""
+#~ "Note: you need to install the "
+#~ "latest dependencies according to "
+#~ "[requirements.txt](https://github.com/eosphoros-ai/DB-"
+#~ "GPT/blob/main/requirements.txt)."
+#~ msgstr ""

--- a/docs/locales/zh_CN/LC_MESSAGES/getting_started/install/cluster/openai.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/getting_started/install/cluster/openai.po
@@ -0,0 +1,71 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2023, csunny
+# This file is distributed under the same license as the DB-GPT package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: DB-GPT 👏👏 0.4.0\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2023-11-02 21:09+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.12.1\n"
+
+#: ../../getting_started/install/cluster/openai.md:1
+#: 01f4e2bf853341198633b367efec1522
+msgid "OpenAI-Compatible RESTful APIs"
+msgstr "OpenAI RESTful 兼容接口"
+
+#: ../../getting_started/install/cluster/openai.md:5
+#: d8717e42335e4027bf4e76b3d28768ee
+msgid "Install Prepare"
+msgstr "安装准备"
+
+#: ../../getting_started/install/cluster/openai.md:7
+#: 9a48d8ee116942468de4c6faf9a64758
+msgid ""
+"You must [deploy DB-GPT cluster](https://db-"
+"gpt.readthedocs.io/en/latest/getting_started/install/cluster/vms/index.html)"
+" first."
+msgstr "你必须先部署 [DB-GPT 集群]"
+"(https://db-gpt.readthedocs.io/projects/db-gpt-docs-zh-cn/zh-cn/latest/getting_started/install/cluster/vms/index.html)。"
+
+#: ../../getting_started/install/cluster/openai.md:9
+#: 7673a7121f004f7ca6b1a94a7e238fa3
+msgid "Launch Model API Server"
+msgstr "启动模型 API Server"
+
+#: ../../getting_started/install/cluster/openai.md:14
+#: 84a925c2cbcd4e4895a1d2d2fe8f720f
+msgid "By default, the Model API Server starts on port 8100."
+msgstr "默认情况下，模型 API Server 使用 8100 端口启动。"
+
+#: ../../getting_started/install/cluster/openai.md:16
+#: e53ed41977cd4721becd51eba05c6609
+msgid "Validate with cURL"
+msgstr "通过 cURL 验证"
+
+#: ../../getting_started/install/cluster/openai.md:18
+#: 7c883b410b5c4e53a256bf17c1ded80d
+msgid "List models"
+msgstr "列出模型"
+
+#: ../../getting_started/install/cluster/openai.md:26
+#: ../../getting_started/install/cluster/openai.md:37
+#: 7cf0ed13f0754f149ec085cd6cf7a45a 990d5d5ed5d64ab49550e68495b9e7a0
+msgid "Chat completions"
+msgstr ""
+
+#: ../../getting_started/install/cluster/openai.md:35
+#: 81583edd22df44e091d18a0832278131
+msgid "Validate with OpenAI Official SDK"
+msgstr "通过 OpenAI 官方 SDK 验证"
+
--- a/docs/locales/zh_CN/LC_MESSAGES/getting_started/install/deploy.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/getting_started/install/deploy.po
@@ -0,0 +1,652 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2023, csunny
+# This file is distributed under the same license as the DB-GPT package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: DB-GPT 👏👏 0.4.0\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2023-11-06 19:38+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.12.1\n"
+
+#: ../../getting_started/install/deploy.rst:4 f3ea3305f122460aaa11999edc4b5de6
+msgid "Installation From Source"
+msgstr "源码安装"
+
+#: ../../getting_started/install/deploy.rst:6 bb941f2bd56d4eb48f7c4f75ebd74176
+msgid "To get started, install DB-GPT with the following steps."
+msgstr "按照以下步骤进行安装"
+
+#: ../../getting_started/install/deploy.rst:10 27a1e092c1f945ceb9946ebdaf89b600
+msgid "1.Preparation"
+msgstr "1.准备"
+
+#: ../../getting_started/install/deploy.rst:11 5c5bfbdc74a14c3b9b1f1ed66617cac8
+msgid "**Download DB-GPT**"
+msgstr "**下载DB-GPT项目**"
+
+#: ../../getting_started/install/deploy.rst:17 3065ee2f34f9417598a37fd699a4863e
+msgid "**Install Miniconda**"
+msgstr "**安装Miniconda**"
+
+#: ../../getting_started/install/deploy.rst:19 f9f3a653ffb8447284686aa37a7bb79a
+msgid ""
+"We use Sqlite as default database, so there is no need for database "
+"installation.  If you choose to connect to other databases, you can "
+"follow our tutorial for installation and configuration. For the entire "
+"installation process of DB-GPT, we use the miniconda3 virtual "
+"environment. Create a virtual environment and install the Python "
+"dependencies. `How to install Miniconda "
+"<https://docs.conda.io/en/latest/miniconda.html>`_"
+msgstr ""
+"目前使用Sqlite作为默认数据库，因此DB-"
+"GPT快速部署不需要部署相关数据库服务。如果你想使用其他数据库，需要先部署相关数据库服务。我们目前使用Miniconda进行python环境和包依赖管理。`如何安装"
+" Miniconda <https://docs.conda.io/en/latest/miniconda.html>`_ 。"
+
+#: ../../getting_started/install/deploy.rst:36 a2cd2fdd1d16421f9cbe341040b153b6
+msgid "2.Deploy LLM Service"
+msgstr "2.部署LLM服务"
+
+#: ../../getting_started/install/deploy.rst:37 180a121e3c994a92a917ace80bf12386
+msgid ""
+"DB-GPT can be deployed on servers with low hardware requirements or on "
+"servers with high hardware requirements."
+msgstr "DB-GPT可以部署在对硬件要求不高的服务器，也可以部署在对硬件要求高的服务器"
+
+#: ../../getting_started/install/deploy.rst:39 395608515c0348d5849030b58da7b659
+msgid ""
+"If you are low hardware requirements you can install DB-GPT by Using "
+"third-part LLM REST API Service OpenAI, Azure, tongyi."
+msgstr "低硬件要求模式适用于对接第三方模型服务的 API，比如 OpenAI、通义千问、 文心一言等。"
+
+#: ../../getting_started/install/deploy.rst:43 e29297e61e2e4d05ba88f0e1c2b1f365
+msgid "As our project has the ability to achieve OpenAI performance of over 85%,"
+msgstr "使用OpenAI服务可以让DB-GPT准确率达到85%"
+
+#: ../../getting_started/install/deploy.rst:48 d0d70d51e8684c2891c58a6da4941a52
+msgid "Notice make sure you have install git-lfs"
+msgstr "确认是否已经安装git-lfs"
+
+#: ../../getting_started/install/deploy.rst:50 0d2781fd38eb467ebad2a3c310a344e6
+msgid "centos:yum install git-lfs"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:52 1574ea24ad6443409070aa3a1f7abe87
+msgid "ubuntu:apt-get install git-lfs"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:54 ad86473d5c87447091c713f45cbfed0e
+msgid "macos:brew install git-lfs"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:58
+#: ../../getting_started/install/deploy.rst:229
+#: 3dd1e40f33924faab63634907a7f6511 dce32420face4ab2b99caf7f3900ede9
+msgid "OpenAI"
+msgstr "OpenAI"
+
+#: ../../getting_started/install/deploy.rst:60 1f66400540114de2820761ef80137805
+msgid "Installing Dependencies"
+msgstr "安装依赖"
+
+#: ../../getting_started/install/deploy.rst:66
+#: ../../getting_started/install/deploy.rst:213
+#: 31b856a6fc094334a37914c046cb1bb1 42b2f6d36ca4487f8e31d59bba123fca
+msgid "Download embedding model"
+msgstr "下载 embedding 模型"
+
+#: ../../getting_started/install/deploy.rst:78 f970fb69e47c40d7bda381ec6f045829
+msgid "Configure LLM_MODEL, PROXY_API_URL and API_KEY in `.env` file"
+msgstr "在 `.env` 文件中设置 LLM_MODEL、PROXY_API_URL 和 API_KEY"
+
+#: ../../getting_started/install/deploy.rst:88
+#: ../../getting_started/install/deploy.rst:288
+#: 6ca04c88fc60480db2ebdc9b234a0bbb 709cfe74c45c4eff83a7d77bb30b4a2b
+msgid "Make sure your .env configuration is not overwritten"
+msgstr "确保你的 .env 文件不会被覆盖"
+
+#: ../../getting_started/install/deploy.rst:91 147aea0d753f44588f4a0c56002334ab
+msgid "Vicuna"
+msgstr "Vicuna"
+
+#: ../../getting_started/install/deploy.rst:92 6a0bd60c4ca2478cb0f3d85aff70cd3b
+msgid ""
+"`Vicuna-v1.5 <https://huggingface.co/lmsys/vicuna-13b-v1.5>`_ based on "
+"llama-2 has been released, we recommend you set `LLM_MODEL=vicuna-"
+"13b-v1.5` to try this model)"
+msgstr ""
+"基于 llama-2 的模型 `Vicuna-v1.5 <https://huggingface.co/lmsys/vicuna-"
+"13b-v1.5>`_ 已经发布，我们推荐你通过配置 `LLM_MODEL=vicuna-13b-v1.5` 来尝试这个模型"
+
+#: ../../getting_started/install/deploy.rst:94 6a111c2ef31f41d4b737cf8b6f36fb16
+msgid "vicuna-v1.5 hardware requirements"
+msgstr "vicuna-v1.5 的硬件要求"
+
+#: ../../getting_started/install/deploy.rst:98
+#: ../../getting_started/install/deploy.rst:143
+#: dc24c0238ce141df8bdce26cc0e2ddbb e04f1ea4b36940f3a28b66cdff7b702e
+msgid "Model"
+msgstr "模型"
+
+#: ../../getting_started/install/deploy.rst:99
+#: ../../getting_started/install/deploy.rst:144
+#: b6473e65ca1a437a84226531be4da26d e0a2f7580685480aa13ca462418764d3
+msgid "Quantize"
+msgstr "量化"
+
+#: ../../getting_started/install/deploy.rst:100
+#: ../../getting_started/install/deploy.rst:145
+#: 56471c3b174d4adf9e8cb5bebaa300a6 d82297b8b9c148c3906d8ee4ed10d8a0
+msgid "VRAM Size"
+msgstr "显存"
+
+#: ../../getting_started/install/deploy.rst:101
+#: ../../getting_started/install/deploy.rst:104
+#: 1214432602fe47a28479ce3e21a7d88b 51838e72e42248f199653f1bf08c8155
+msgid "vicuna-7b-v1.5"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:102
+#: ../../getting_started/install/deploy.rst:108
+#: ../../getting_started/install/deploy.rst:147
+#: ../../getting_started/install/deploy.rst:153
+#: a64439f4e6f64c42bb76fbb819556784 ed95f498641e4a0f976318df608a1d67
+#: fc400814509048b4a1cbe1e07c539285 ff7a8cb2cce8438cb6cb0d80dabfc2b5
+msgid "4-bit"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:103
+#: ../../getting_started/install/deploy.rst:148
+#: 2726e8a278c34e6db59147e9f66f2436 5feab5755a41403c9d641da697de4651
+msgid "8 GB"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:105
+#: ../../getting_started/install/deploy.rst:111
+#: ../../getting_started/install/deploy.rst:150
+#: ../../getting_started/install/deploy.rst:156
+#: 1984406682da4da3ad7b275e44085d07 2f027d838d0c46409e54c066d7983aae
+#: 5c5878fe64944872b6769f075fedca05 e2507408a9c5423988e17b7029b487e4
+msgid "8-bit"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:106
+#: ../../getting_started/install/deploy.rst:109
+#: ../../getting_started/install/deploy.rst:151
+#: ../../getting_started/install/deploy.rst:154
+#: 332f50702c7b46e79ea0af5cbf86c6d5 381d23253cfd40109bacefca6a179f91
+#: aafe2423c25546e789e4804e3fd91d1d cc56990a58e941d6ba023cbd4dca0357
+msgid "12 GB"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:107
+#: ../../getting_started/install/deploy.rst:110
+#: 1f14e2fa6d41493cb208f55eddff9773 6457f6307d8546beb5f2fb69c30922d8
+msgid "vicuna-13b-v1.5"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:112
+#: ../../getting_started/install/deploy.rst:157
+#: e24d3a36b5ce4cfe861dce2d1c4db592 f2e66b2da7954aaab0ee526b25a371f5
+msgid "20 GB"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:128
+#: ../../getting_started/install/deploy.rst:175
+#: ../../getting_started/install/deploy.rst:201
+#: 1719c11f92874c47a87c00c634b9fad8 4596fcbe415d42fdbb29b92964fae070
+#: e639ae6076a64b7b9de08527966e4550
+msgid "The model files are large and will take a long time to download."
+msgstr "这个模型权重文件比较大，需要花费较长时间来下载。"
+
+#: ../../getting_started/install/deploy.rst:130
+#: ../../getting_started/install/deploy.rst:177
+#: ../../getting_started/install/deploy.rst:203
+#: 4ec1492d389f403ebd9dd805fcaac68e ac6c68e2bf9b47c694ea8e0506014b10
+#: e39be72282e64760903aaba45f8effb8
+msgid "**Configure LLM_MODEL in `.env` file**"
+msgstr "**在 `.env` 文件中配置 LLM_MODEL**"
+
+#: ../../getting_started/install/deploy.rst:137
+#: ../../getting_started/install/deploy.rst:234
+#: 7ce4e2253ef24a7ea890ade04ce36682 b9d5bf4fa09649c4a098503132ce7c0c
+msgid "Baichuan"
+msgstr "百川"
+
+#: ../../getting_started/install/deploy.rst:139
+#: ffdad6a70558457fa825bad4d811100d
+msgid "Baichuan hardware requirements"
+msgstr "百川 的硬件要求"
+
+#: ../../getting_started/install/deploy.rst:146
+#: ../../getting_started/install/deploy.rst:149
+#: 59d9b64f54d34971a68e93e3101def06 a66ce354d8f143ce920303241cd8947e
+msgid "baichuan-7b"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:152
+#: ../../getting_started/install/deploy.rst:155
+#: c530662259ca4ec5b03a18e4b690e17a fa3af65ecca54daab961f55729bbc40e
+msgid "baichuan-13b"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:179
+#: efd73637994a4b7c97ef3557e1f3161c
+msgid "please rename Baichuan path to \"baichuan2-13b\" or \"baichuan2-7b\""
+msgstr "将Baichuan模型目录修改为\"baichuan2-13b\" 或 \"baichuan2-7b\""
+
+#: ../../getting_started/install/deploy.rst:185
+#: 435a3f0d0fe84b49a7305e2c0f51a5df
+msgid "ChatGLM"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:205
+#: 165e23d3d40d4756b5a6a2580d015213
+msgid "please rename chatglm model path to \"chatglm2-6b\""
+msgstr "将 chatglm 模型目录修改为\"chatglm2-6b\""
+
+#: ../../getting_started/install/deploy.rst:211
+#: b651ebb5e0424b8992bc8b49d2280bee
+msgid "Other LLM API"
+msgstr "其它模型 API"
+
+#: ../../getting_started/install/deploy.rst:225
+#: 4eabdc25f4a34676b3ece620c88d866f
+msgid "Now DB-GPT support LLM REST API TYPE:"
+msgstr "目前DB-GPT支持的大模型 REST API 类型:"
+
+#: ../../getting_started/install/deploy.rst:230
+#: d361963cc3404e5ca55a823f1f1f545c
+msgid "Azure"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:231
+#: 3b0f17c74aaa4bbd9db935973fa1c36b
+msgid "Aliyun tongyi"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:232
+#: 7c4c457a499943b8804e31046551006d
+msgid "Baidu wenxin"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:233
+#: ac1880a995184295acf07fff987d7c56
+msgid "Zhipu"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:235
+#: 6927500d7d3445b7b1981da1df4e1666
+msgid "Bard"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:237
+#: 419d564de18c485780d9336b852735b6
+msgid "Configure LLM_MODEL and PROXY_API_URL and API_KEY in `.env` file"
+msgstr "在`.env`文件设置 LLM_MODEL、PROXY_API_URL和 API_KEY"
+
+#: ../../getting_started/install/deploy.rst:290
+#: 71d5203682e24e2e896e4b9913471f78
+msgid "llama.cpp"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:292
+#: 36a2b82f711a4c0f9491aca9c84d3c91
+msgid ""
+"DB-GPT already supports `llama.cpp "
+"<https://github.com/ggerganov/llama.cpp>`_ via `llama-cpp-python "
+"<https://github.com/abetlen/llama-cpp-python>`_ ."
+msgstr ""
+"DB-GPT 已经通过 `llama-cpp-python <https://github.com/abetlen/llama-cpp-"
+"python>`_ 支持了 `llama.cpp <https://github.com/ggerganov/llama.cpp>`_ 。"
+
+#: ../../getting_started/install/deploy.rst:294
+#: 439064115dca4ae08d8e60041f2ffe17
+msgid "**Preparing Model Files**"
+msgstr "**准备模型文件**"
+
+#: ../../getting_started/install/deploy.rst:296
+#: 7291d6fa20b34942926e7765c01f25c9
+msgid ""
+"To use llama.cpp, you need to prepare a gguf format model file, and there"
+" are two common ways to obtain it, you can choose either:"
+msgstr "为了使用 llama.cpp，你需要准备 gguf 格式的文件，你可以通过以下两种方法获取"
+
+#: ../../getting_started/install/deploy.rst:298
+#: 45752f3f5dd847469da0c5edddc530fa
+msgid "**1. Download a pre-converted model file.**"
+msgstr "**1.下载已转换的模型文件.**"
+
+#: ../../getting_started/install/deploy.rst:300
+#: c451db2157ff49b2b4992aed9907ddfa
+msgid ""
+"Suppose you want to use `Vicuna 13B v1.5 <https://huggingface.co/lmsys"
+"/vicuna-13b-v1.5>`_ , you can download the file already converted from "
+"`TheBloke/vicuna-13B-v1.5-GGUF <https://huggingface.co/TheBloke/vicuna-"
+"13B-v1.5-GGUF>`_ , only one file is needed. Download it to the `models` "
+"directory and rename it to `ggml-model-q4_0.gguf`."
+msgstr ""
+"假设您想使用 `Vicuna 13B v1.5 <https://huggingface.co/lmsys/vicuna-"
+"13b-v1.5>`_ 您可以从 `TheBloke/vicuna-"
+"13B-v1.5-GGUF <https://huggingface.co/TheBloke/vicuna-"
+"13B-v1.5-GGUF>`_ 下载已转换的文件，只需要一个文件。将其下载到models目录并将其重命名为 `ggml-"
+"model-q4_0.gguf`。"
+
+#: ../../getting_started/install/deploy.rst:306
+#: f5b92b51622b43d398b3dc13a5892c29
+msgid "**2. Convert It Yourself**"
+msgstr "**2. 自行转换**"
+
+#: ../../getting_started/install/deploy.rst:308
+#: 8838ae6dcecf44ecad3fd963980c8eb3
+msgid ""
+"You can convert the model file yourself according to the instructions in "
+"`llama.cpp#prepare-data--run <https://github.com/ggerganov/llama.cpp"
+"#prepare-data--run>`_ , and put the converted file in the models "
+"directory and rename it to `ggml-model-q4_0.gguf`."
+msgstr ""
+"您可以根据 `llama.cpp#prepare-data--run <https://github.com/ggerganov/llama.cpp"
+"#prepare-data--run>`_ 中的说明自行转换模型文件，并把转换后的文件放在models目录中，并重命名为`ggml-"
+"model-q4_0.gguf`。"
+
+#: ../../getting_started/install/deploy.rst:310
+#: 3fe28d6e5eaa4bdf9c5c44a914c3577c
+msgid "**Installing Dependencies**"
+msgstr "**安装依赖**"
+
+#: ../../getting_started/install/deploy.rst:312
+#: bdc10d2e88cc4c3f84a8c4a8dc2037a9
+msgid ""
+"llama.cpp is an optional dependency in DB-GPT, and you can manually "
+"install it using the following command:"
+msgstr "llama.cpp在DB-GPT中是可选安装项, 你可以通过以下命令进行安装"
+
+#: ../../getting_started/install/deploy.rst:319
+#: 9c136493448b43b5b27f66af74ff721e
+msgid "**3.Modifying the Configuration File**"
+msgstr "**3.修改配置文件**"
+
+#: ../../getting_started/install/deploy.rst:321
+#: c835a7dee1dd409fb861e7b886c6dc5b
+msgid "Next, you can directly modify your `.env` file to enable llama.cpp."
+msgstr "修改`.env`文件使用llama.cpp"
+
+#: ../../getting_started/install/deploy.rst:328
+#: ../../getting_started/install/deploy.rst:396
+#: 296e6d08409544918fee0c31b1bf195c a81e5d882faf4722b0e10d53f635f53c
+msgid ""
+"Then you can run it according to `Run <https://db-"
+"gpt.readthedocs.io/en/latest/getting_started/install/deploy/deploy.html#run>`_"
+msgstr ""
+"然后你可以根据 `运行 <https://db-gpt.readthedocs.io/projects/db-gpt-docs-zh-"
+"cn/zh_CN/latest/getting_started/install/deploy/deploy.html#run>`_ 来运行。"
+
+#: ../../getting_started/install/deploy.rst:331
+#: 0f7f487ee11a4e01a95f7c504f0469ba
+msgid "**More Configurations**"
+msgstr "**更多配置文件**"
+
+#: ../../getting_started/install/deploy.rst:333
+#: b0f9964497f64fb5b3740099232cd72b
+msgid ""
+"In DB-GPT, the model configuration can be done through  `{model "
+"name}_{config key}`."
+msgstr "在DB-GPT中，模型配置可以通过`{模型名称}_{配置名}` 来配置。"
+
+#: ../../getting_started/install/deploy.rst:335
+#: 7c225de4fe9d4dd3a3c2b2a33802e656
+msgid "More Configurations"
+msgstr "**更多配置文件**"
+
+#: ../../getting_started/install/deploy.rst:339
+#: 5cc1671910314796a9ce0b5107d3c9fe
+msgid "Environment Variable Key"
+msgstr "环境变量Key"
+
+#: ../../getting_started/install/deploy.rst:340
+#: 4359ed4e11bb47ad89a605cbf9016cd5
+msgid "Default"
+msgstr "默认值"
+
+#: ../../getting_started/install/deploy.rst:341
+#: 5cf0efc6d1014665bb9dbdae96bf2726
+msgid "Description"
+msgstr "描述"
+
+#: ../../getting_started/install/deploy.rst:342
+#: e7c291f80a9a40fa90d642901eca02c6
+msgid "llama_cpp_prompt_template"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:343
+#: ../../getting_started/install/deploy.rst:346
+#: ../../getting_started/install/deploy.rst:352
+#: ../../getting_started/install/deploy.rst:358
+#: ../../getting_started/install/deploy.rst:364
+#: 07dc7fc4e51e4d9faf8e5221bcf03ee0 549f3c57a2e9427880e457e653ce1182
+#: 7ad961957f7b49d08e4aff347749b78d c1eab368175c4fa88fe0b471919523b2
+#: e2e0bf9903484972b6d20e6837010029
+msgid "None"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:344
+#: 6b5044a2009f432c92fcd65db42506d8
+msgid ""
+"Prompt template name, now support: zero_shot, vicuna_v1.1,alpaca,llama-2"
+",baichuan-chat,internlm-chat, If None, the prompt template is "
+"automatically determined from model path。"
+msgstr ""
+"Prompt template 现在可以支持`zero_shot, vicuna_v1.1,alpaca,llama-2,baichuan-"
+"chat,internlm-chat`, 如果是None, 可以根据模型路径来自动获取模型 Prompt template"
+
+#: ../../getting_started/install/deploy.rst:345
+#: e01c860441ad43b88c0a8d012f97d2d8
+msgid "llama_cpp_model_path"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:347
+#: 1cb68d772e454812a1a0c6de4950b8ce
+msgid "Model path"
+msgstr "模型路径"
+
+#: ../../getting_started/install/deploy.rst:348
+#: 6dac03820edb4fbd8a0856405e84c5bc
+msgid "llama_cpp_n_gpu_layers"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:349
+#: 8cd5607b7941427f9a342ca7a00e5778
+msgid "1000000000"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:350
+#: 61c9297656da434aa7ac2b49cf61ea9d
+msgid ""
+"Number of layers to offload to the GPU, Set this to 1000000000 to offload"
+" all layers to the GPU. If your GPU VRAM is not enough, you can set a low"
+" number, eg: 10"
+msgstr "要将多少网络层转移到GPU上，将其设置为1000000000以将所有层转移到GPU上。如果您的 GPU 内存不足，可以设置较低的数字，例如：10。"
+
+#: ../../getting_started/install/deploy.rst:351
+#: 8c2d2182557a483aa2fda590c24faaf3
+msgid "llama_cpp_n_threads"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:353
+#: cc442f61ffc442ecbd98c1e7f5598e1a
+msgid ""
+"Number of threads to use. If None, the number of threads is automatically"
+" determined"
+msgstr "要使用的线程数量。如果为None，则线程数量将自动确定。"
+
+#: ../../getting_started/install/deploy.rst:354
+#: 8d5e917d86f048348106e6923638a0c2
+msgid "llama_cpp_n_batch"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:355
+#: ee2719a0a8cd4a77846cffd8e675638f
+msgid "512"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:356
+#: 845b354315384762a611ad2daa539d57
+msgid "Maximum number of prompt tokens to batch together when calling llama_eval"
+msgstr "在调用llama_eval时，批处理在一起的prompt tokens的最大数量"
+
+#: ../../getting_started/install/deploy.rst:357
+#: a95e788bfa5f46f3bcd6356dfd9f87eb
+msgid "llama_cpp_n_gqa"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:359
+#: 23ad9b5f34b5440bb90b2b21bab25763
+msgid "Grouped-query attention. Must be 8 for llama-2 70b."
+msgstr "对于 llama-2 70B 模型，Grouped-query attention 必须为8。"
+
+#: ../../getting_started/install/deploy.rst:360
+#: 9ce25b7966fc40ec8be47ecfaf5f9994
+msgid "llama_cpp_rms_norm_eps"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:361
+#: 58365f0d36af447ba976213646018431
+msgid "5e-06"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:362
+#: d00b742a759140b795ba5949f1ce9a36
+msgid "5e-6 is a good value for llama-2 models."
+msgstr "对于llama-2模型来说，5e-6是一个不错的值。"
+
+#: ../../getting_started/install/deploy.rst:363
+#: b9972e9b19354f55a5e6d9c50513a620
+msgid "llama_cpp_cache_capacity"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:365
+#: 3c98c5396dd74db8b6d70fc50fa0754f
+msgid "Maximum cache capacity. Examples: 2000MiB, 2GiB"
+msgstr "模型缓存最大值. 例如: 2000MiB, 2GiB"
+
+#: ../../getting_started/install/deploy.rst:366
+#: 4277e155992c4442b69d665d6269bed6
+msgid "llama_cpp_prefer_cpu"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:367
+#: 049169c1210a4ecabb25702ed813ea0a
+msgid "False"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:368
+#: 60a39e93e7874491a93893de78b7d37e
+msgid ""
+"If a GPU is available, it will be preferred by default, unless "
+"prefer_cpu=False is configured."
+msgstr "如果有可用的GPU，默认情况下会优先使用GPU，除非配置了 prefer_cpu=False。"
+
+#: ../../getting_started/install/deploy.rst:371
+#: 7c86780fbf634de8873afd439389cf89
+msgid "vllm"
+msgstr ""
+
+#: ../../getting_started/install/deploy.rst:373
+#: e2827892e43d420c85b8b83c4855d197
+msgid "vLLM is a fast and easy-to-use library for LLM inference and serving."
+msgstr "vLLM 是一个快速且易于使用的 LLM 推理和服务的库。"
+
+#: ../../getting_started/install/deploy.rst:375
+#: 81bbfa3876a74244acc82d295803fdd4
+msgid "**Running vLLM**"
+msgstr "**运行vLLM**"
+
+#: ../../getting_started/install/deploy.rst:377
+#: 75bc518b444c417ba4d9c15246549327
+msgid "**1.Installing Dependencies**"
+msgstr "**1.安装依赖**"
+
+#: ../../getting_started/install/deploy.rst:379
+#: 725c620b0a5045c1a64a3b2a2e9b48f3
+msgid ""
+"vLLM is an optional dependency in DB-GPT, and you can manually install it"
+" using the following command:"
+msgstr "vLLM 在 DB-GPT 是一个可选依赖, 你可以使用下面的命令手动安装它："
+
+#: ../../getting_started/install/deploy.rst:385
+#: 6f4b540107764f3592cc07cf170e4911
+msgid "**2.Modifying the Configuration File**"
+msgstr "**2.修改配置文件**"
+
+#: ../../getting_started/install/deploy.rst:387
+#: b8576a1572674c4890e09b73e02cf0e8
+msgid "Next, you can directly modify your .env file to enable vllm."
+msgstr "你可以直接修改你的 `.env` 文件"
+
+#: ../../getting_started/install/deploy.rst:394
+#: b006745f3aee4651aaa0cf79081b5d7f
+msgid ""
+"You can view the models supported by vLLM `here "
+"<https://vllm.readthedocs.io/en/latest/models/supported_models.html"
+"#supported-models>`_"
+msgstr ""
+"你可以在 `这里 "
+"<https://vllm.readthedocs.io/en/latest/models/supported_models.html"
+"#supported-models>`_ 查看 vLLM 支持的模型。"
+
+#: ../../getting_started/install/deploy.rst:403
+#: bc8057ee75e14737bf8fca3ceb555dac
+msgid "3.Prepare sql example(Optional)"
+msgstr "3.准备 sql example(可选)"
+
+#: ../../getting_started/install/deploy.rst:404
+#: 9b0b9112237c4b3aaa1dd5d704ea32e6
+msgid "**(Optional) load examples into SQLite**"
+msgstr "**(可选) 加载样例数据到 SQLite 数据库中**"
+
+#: ../../getting_started/install/deploy.rst:411
+#: 0815e13b96264ffcba1526c82ba2e7c8
+msgid "On windows platform:"
+msgstr "在 Windows 平台："
+
+#: ../../getting_started/install/deploy.rst:418
+#: 577a4167ecac4fa88586961f225f0487
+msgid "4.Run db-gpt server"
+msgstr "4.运行db-gpt server"
+
+#: ../../getting_started/install/deploy.rst:424
+#: a9f96b064b674f80824257b4b0a18e2a
+msgid "**Open http://localhost:5000 with your browser to see the product.**"
+msgstr "打开浏览器访问http://localhost:5000"
+
+#~ msgid ""
+#~ "DB-GPT can be deployed on servers"
+#~ " with low hardware requirements or on"
+#~ " servers with high hardware requirements."
+#~ " You can install DB-GPT by "
+#~ "Using third-part LLM REST API "
+#~ "Service OpenAI, Azure."
+#~ msgstr ""
+
+#~ msgid ""
+#~ "And you can also install DB-GPT"
+#~ " by deploy LLM Service by download"
+#~ " LLM model."
+#~ msgstr ""
+
+#~ msgid "百川"
+#~ msgstr ""
+
+#~ msgid "百川 硬件要求"
+#~ msgstr ""
+
--- a/docs/locales/zh_CN/LC_MESSAGES/getting_started/install/environment/environment.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/getting_started/install/environment/environment.po
@@ -8,7 +8,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: DB-GPT 👏👏 0.3.5\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2023-08-17 13:07+0800\n"
+"POT-Creation-Date: 2023-11-02 21:04+0800\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language: zh_CN\n"
@@ -20,290 +20,292 @@ msgstr ""
 "Generated-By: Babel 2.12.1\n"

 #: ../../getting_started/install/environment/environment.md:1
-#: be341d16f7b24bf4ad123ab78a6d855a
+#: a17719d2f4374285a7beb4d1db470146
 #, fuzzy
 msgid "Environment Parameter"
 msgstr "环境变量说明"

 #: ../../getting_started/install/environment/environment.md:4
-#: 46eddb27c90f41548ea9a724bbcebd37
+#: 9a62e6fff7914eeaa2d195ddef4fcb61
 msgid "LLM MODEL Config"
 msgstr "模型配置"

 #: ../../getting_started/install/environment/environment.md:5
-#: 7deaa85df4a04fb098f5994547a8724f
+#: 90e3991538324ecfac8cac7ef2103ac2
 msgid "LLM Model Name, see /pilot/configs/model_config.LLM_MODEL_CONFIG"
 msgstr "LLM Model Name, see /pilot/configs/model_config.LLM_MODEL_CONFIG"

 #: ../../getting_started/install/environment/environment.md:6
-#: 3902801c546547b3a4009df681ef7d52
+#: 1f45af01100c4586acbc05469e3006bc
 msgid "LLM_MODEL=vicuna-13b"
 msgstr "LLM_MODEL=vicuna-13b"

 #: ../../getting_started/install/environment/environment.md:8
-#: 84b0fdbfa1544ec28751e9b69b00cc02
+#: bed14b704f154c2db525f7fafd3aa5a4
 msgid "MODEL_SERVER_ADDRESS"
 msgstr "MODEL_SERVER_ADDRESS"

 #: ../../getting_started/install/environment/environment.md:9
-#: 0b430bfab77d405989470d00ca3f6fe0
+#: ea42946cfe4f4ad996bf82c1996e7344
 msgid "MODEL_SERVER=http://127.0.0.1:8000  LIMIT_MODEL_CONCURRENCY"
 msgstr "MODEL_SERVER=http://127.0.0.1:8000  LIMIT_MODEL_CONCURRENCY"

 #: ../../getting_started/install/environment/environment.md:12
-#: b477a25586c546729a93fb6785b7b2ec
+#: 021c261231f342fdba34098b1baa06fd
 msgid "LIMIT_MODEL_CONCURRENCY=5"
 msgstr "LIMIT_MODEL_CONCURRENCY=5"

 #: ../../getting_started/install/environment/environment.md:14
-#: 1d6ea800af384fff9c265610f71cc94e
+#: afaf0ba7fd09463d8ff74b514ed7264c
 msgid "MAX_POSITION_EMBEDDINGS"
 msgstr "MAX_POSITION_EMBEDDINGS"

 #: ../../getting_started/install/environment/environment.md:16
-#: 388e758ce4ea4692a4c34294cebce7f2
+#: e4517a942bca4361a64a00408f993f5b
 msgid "MAX_POSITION_EMBEDDINGS=4096"
 msgstr "MAX_POSITION_EMBEDDINGS=4096"

 #: ../../getting_started/install/environment/environment.md:18
-#: 16a307dce1294ceba892ff93ae4e81c0
+#: 78d2ef04ed4548b9b7b0fb8ae35c9d5c
 msgid "QUANTIZE_QLORA"
 msgstr "QUANTIZE_QLORA"

 #: ../../getting_started/install/environment/environment.md:20
-#: 93ceb2b2fcd5454b82eefb0ae8c7ae77
+#: bfa65db03c6d46bba293331f03ab15ac
 msgid "QUANTIZE_QLORA=True"
 msgstr "QUANTIZE_QLORA=True"

 #: ../../getting_started/install/environment/environment.md:22
-#: 15ffa35d023a4530b02a85ee6168dd4b
+#: 1947d45a7f184821910b4834ad5f1897
 msgid "QUANTIZE_8bit"
 msgstr "QUANTIZE_8bit"

 #: ../../getting_started/install/environment/environment.md:24
-#: 81df248ac5cb4ab0b13a711505f6a177
+#: 4a2ee2919d0e4bdaa13c9d92eefd2aac
 msgid "QUANTIZE_8bit=True"
 msgstr "QUANTIZE_8bit=True"

 #: ../../getting_started/install/environment/environment.md:27
-#: 15cc7b7d41ad44f0891c1189709f00f1
+#: 348dc1e411b54ab09414f40a20e934e4
 msgid "LLM PROXY Settings"
 msgstr "LLM PROXY Settings"

 #: ../../getting_started/install/environment/environment.md:28
-#: e6c1115a39404f11b193a1593bc51a22
+#: a692e78425a040f5828ab54ff9a33f77
 msgid "OPENAI Key"
 msgstr "OPENAI Key"

 #: ../../getting_started/install/environment/environment.md:30
-#: 8157e0a831fe4506a426822b7565e4f6
+#: 940d00e25a424acf92951a314a64e5ea
 msgid "PROXY_API_KEY={your-openai-sk}"
 msgstr "PROXY_API_KEY={your-openai-sk}"

 #: ../../getting_started/install/environment/environment.md:31
-#: 89b34d00bdb64e738bd9bc8c086b1f02
+#: 4bd27547ae6041679e91f2a363cd1deb
 msgid "PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions"
 msgstr "PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions"

 #: ../../getting_started/install/environment/environment.md:33
-#: 7a97df730aeb484daf19c8172e61a290
+#: cfa3071afb0b47baad6bd729d4a02cb9
 msgid "from https://bard.google.com/     f12-> application-> __Secure-1PSID"
 msgstr "from https://bard.google.com/     f12-> application-> __Secure-1PSID"

 #: ../../getting_started/install/environment/environment.md:35
-#: d430ddf726a049c0a9e0a9bfd5a6fe0e
+#: a17efa03b10f47f68afac9e865982a75
 msgid "BARD_PROXY_API_KEY={your-bard-token}"
 msgstr "BARD_PROXY_API_KEY={your-bard-token}"

 #: ../../getting_started/install/environment/environment.md:38
-#: 23d6b0da3e7042abb55f6181c4a382d2
+#: 6bcfe90574da4d82a459e8e11bf73cba
 msgid "DATABASE SETTINGS"
 msgstr "DATABASE SETTINGS"

 #: ../../getting_started/install/environment/environment.md:39
-#: dbae0a2d847f41f5be9396a160ef88d0
+#: 2b1e62d9bf5d4af5a22f68c8248eaafb
 msgid "SQLite database (Current default database)"
 msgstr "SQLite database (Current default database)"

 #: ../../getting_started/install/environment/environment.md:40
-#: bdb55b7280c341a981e9d338cce53345
+#: 8a909ac3b3c943da8dbc4e8dd596c80c
 msgid "LOCAL_DB_PATH=data/default_sqlite.db"
 msgstr "LOCAL_DB_PATH=data/default_sqlite.db"

 #: ../../getting_started/install/environment/environment.md:41
-#: 739d67927a9d46b28500deba1917916b
+#: 90ae6507932f4815b6e180051738bb93
 msgid "LOCAL_DB_TYPE=sqlite # Database Type default:sqlite"
 msgstr "LOCAL_DB_TYPE=sqlite # Database Type default:sqlite"

 #: ../../getting_started/install/environment/environment.md:43
-#: eb4717bce6a6483b86d9780d924c5ff1
+#: d2ce34e0dcf44ccf9e8007d548ba7b0a
 msgid "MYSQL database"
 msgstr "MYSQL database"

 #: ../../getting_started/install/environment/environment.md:44
-#: 0f4cdf0ff5dd4ff0b397dfa88541a2e1
+#: c07159d63c334f6cbb95fcc30bfb7ea5
 msgid "LOCAL_DB_TYPE=mysql"
 msgstr "LOCAL_DB_TYPE=mysql"

 #: ../../getting_started/install/environment/environment.md:45
-#: c971ead492c34487bd766300730a9cba
+#: e16700b2ea8d411e91d010c1cde7aecc
 msgid "LOCAL_DB_USER=root"
 msgstr "LOCAL_DB_USER=root"

 #: ../../getting_started/install/environment/environment.md:46
-#: 02828b29ad044eeab890a2f8af0e5907
+#: bfc2dce1bf374121b6861e677b4e1ffa
 msgid "LOCAL_DB_PASSWORD=aa12345678"
 msgstr "LOCAL_DB_PASSWORD=aa12345678"

 #: ../../getting_started/install/environment/environment.md:47
-#: 53dc7f15b3934987b1f4c2e2d0b11299
+#: bc384739f5b04e21a34d0d2b78e7906c
 msgid "LOCAL_DB_HOST=127.0.0.1"
 msgstr "LOCAL_DB_HOST=127.0.0.1"

 #: ../../getting_started/install/environment/environment.md:48
-#: 1ac95fc482934247a118bab8dcebeb57
+#: e5253d452e0d42b7ac308fe6fbfb5017
 msgid "LOCAL_DB_PORT=3306"
 msgstr "LOCAL_DB_PORT=3306"

 #: ../../getting_started/install/environment/environment.md:51
-#: 34e46aa926844be19c7196759b03af63
+#: 9ca8f6fe06ed4cbab390f94be252e165
 msgid "EMBEDDING SETTINGS"
 msgstr "EMBEDDING SETTINGS"

 #: ../../getting_started/install/environment/environment.md:52
-#: 2b5aa08cc995495e85a1f7dc4f97b5d7
+#: 76c7c260293c4b49bae057143fd48377
 msgid "EMBEDDING MODEL Name, see /pilot/configs/model_config.LLM_MODEL_CONFIG"
 msgstr "EMBEDDING模型, 参考see /pilot/configs/model_config.LLM_MODEL_CONFIG"

 #: ../../getting_started/install/environment/environment.md:53
-#: 0de0ca551ed040248406f848feca541d
+#: f1d63a0128ce493cae37d34f1976bcca
 msgid "EMBEDDING_MODEL=text2vec"
 msgstr "EMBEDDING_MODEL=text2vec"

 #: ../../getting_started/install/environment/environment.md:55
-#: 43019fb570904c9981eb68f33e64569c
+#: b8fbb99109d04781b2dd5bc5d6efa5bd
 msgid "Embedding Chunk size, default 500"
 msgstr "Embedding 切片大小, 默认500"

 #: ../../getting_started/install/environment/environment.md:57
-#: 7e3f93854873461286e96887e04167aa
+#: bf8256576ea34f6a9c5f261ab9aab676
 msgid "KNOWLEDGE_CHUNK_SIZE=500"
 msgstr "KNOWLEDGE_CHUNK_SIZE=500"

 #: ../../getting_started/install/environment/environment.md:59
-#: 9504f4a59ae74352a524b7741113e2d6
+#: 9b156c6b599b4c02a58ce023b4ff25f2
 msgid "Embedding Chunk Overlap, default 100"
 msgstr "Embedding chunk Overlap, 文本块之间的最大重叠量。保留一些重叠可以保持文本块之间的连续性（例如使用滑动窗口）,默认100"

 #: ../../getting_started/install/environment/environment.md:60
-#: 24e6119c2051479bbd9dba71a9c23dbe
+#: dcafd903c36041ac85ac99a14dbee512
 msgid "KNOWLEDGE_CHUNK_OVERLAP=100"
 msgstr "KNOWLEDGE_CHUNK_OVERLAP=100"

 #: ../../getting_started/install/environment/environment.md:62
-#: 0d180d7f2230442abee901c19526e442
-msgid "embeding recall top k,5"
+#: 6c3244b7e5e24b0188c7af4bb52e9134
+#, fuzzy
+msgid "embedding recall top k,5"
 msgstr "embedding 召回topk, 默认5"

 #: ../../getting_started/install/environment/environment.md:64
-#: a5bb9ab2ba50411cbbe87f7836bfbb6d
+#: f4a2f30551cf4fe1a7ff3c7c74ec77be
 msgid "KNOWLEDGE_SEARCH_TOP_SIZE=5"
 msgstr "KNOWLEDGE_SEARCH_TOP_SIZE=5"

 #: ../../getting_started/install/environment/environment.md:66
-#: 183b8dd78cba4ae19bd2e08d69d21e0b
-msgid "embeding recall max token ,2000"
+#: 593f2512362f467e92fdaa60dd5903a0
+#, fuzzy
+msgid "embedding recall max token ,2000"
 msgstr "embedding向量召回最大token, 默认2000"

 #: ../../getting_started/install/environment/environment.md:68
-#: ce0c711febcb44c18ae0fc858c3718d1
+#: 83d6d28914be4d6282d457272e508ddc
 msgid "KNOWLEDGE_SEARCH_MAX_TOKEN=5"
 msgstr "KNOWLEDGE_SEARCH_MAX_TOKEN=5"

 #: ../../getting_started/install/environment/environment.md:71
 #: ../../getting_started/install/environment/environment.md:87
-#: 4cab1f399cc245b4a1a1976d2c4fc926 ec9cec667a1c4473bf9a796a26e1ce20
+#: 6bc1b9d995e74294a1c78e783c550db7 d33c77ded834438e9f4a2df06e7e041a
 msgid "Vector Store SETTINGS"
 msgstr "Vector Store SETTINGS"

 #: ../../getting_started/install/environment/environment.md:72
 #: ../../getting_started/install/environment/environment.md:88
-#: 4dd04aadd46948a5b1dcf01fdb0ef074 bab7d512f33e40cf9e10f0da67e699c8
+#: 9cafa06e2d584f70afd848184e0fa52a f01057251b8b4ffea806192dfe1048ed
 msgid "Chroma"
 msgstr "Chroma"

 #: ../../getting_started/install/environment/environment.md:73
 #: ../../getting_started/install/environment/environment.md:89
-#: 13eec36741b14e028e2d3859a320826e ab3ffbcf9358401993af636ba9ab2e2d
+#: e6c16fab37484769b819aeecbc13e6db faad299722e5400e95ec6ac3c1e018b8
 msgid "VECTOR_STORE_TYPE=Chroma"
 msgstr "VECTOR_STORE_TYPE=Chroma"

 #: ../../getting_started/install/environment/environment.md:74
 #: ../../getting_started/install/environment/environment.md:90
-#: d15b91e2a2884f23a1dd2d54783b0638 d1f856d571b547098bb0c2a18f9f1979
+#: 4eca3a51716d406f8ffd49c06550e871 581ee9dd38064b119660c44bdd00cbaa
 msgid "MILVUS"
 msgstr "MILVUS"

 #: ../../getting_started/install/environment/environment.md:75
 #: ../../getting_started/install/environment/environment.md:91
-#: 1e165f6c934343c7808459cc7a65bc70 985dd60c2b7d4baaa6601a810a6522d7
+#: 814c93048bed46589358a854d6c99683 b72b1269a2224f5f961214e41c019f21
 msgid "VECTOR_STORE_TYPE=Milvus"
 msgstr "VECTOR_STORE_TYPE=Milvus"

 #: ../../getting_started/install/environment/environment.md:76
 #: ../../getting_started/install/environment/environment.md:92
-#: a1a53f051cee40ed886346a94babd75a d263e8eaee684935a58f0a4fe61c6f0e
+#: 73ae665f1db9402883662734588fd02c c4da20319c994e83ba5a7706db967178
 msgid "MILVUS_URL=127.0.0.1"
 msgstr "MILVUS_URL=127.0.0.1"

 #: ../../getting_started/install/environment/environment.md:77
 #: ../../getting_started/install/environment/environment.md:93
-#: 2741a312db1a4c6a8a1c1d62415c5fba d03bbf921ddd4f4bb715fe5610c3d0aa
+#: e30c5288516d42aa858a485db50490c1 f843b2e58bcb4e4594e3c28499c341d0
 msgid "MILVUS_PORT=19530"
 msgstr "MILVUS_PORT=19530"

 #: ../../getting_started/install/environment/environment.md:78
 #: ../../getting_started/install/environment/environment.md:94
-#: d0786490d38c4e4f971cc14f62fe1fc8 e9e0854873dc4c209861ee4eb77d25cd
+#: 158669efcc7d4bcaac1c8dd01b499029 24e88ffd32f242f281c56c0ec3ad2639
 msgid "MILVUS_USERNAME"
 msgstr "MILVUS_USERNAME"

 #: ../../getting_started/install/environment/environment.md:79
 #: ../../getting_started/install/environment/environment.md:95
-#: 9a82d07153cc432ebe754b5bc02fde0d a6485c1cfa7d4069a6894c43674c8c2b
+#: 111a985297184c8aa5a0dd8e14a58445 6602093a6bb24d6792548e2392105c82
 msgid "MILVUS_PASSWORD"
 msgstr "MILVUS_PASSWORD"

 #: ../../getting_started/install/environment/environment.md:80
 #: ../../getting_started/install/environment/environment.md:96
-#: 2f233f32b8ba408a9fbadb21fabb99ec 809b3219dd824485bc2cfc898530d708
+#: 47bdfcd78fbe4ccdb5f49b717a6d01a6 b96c0545b2044926a8a8190caf94ad25
 msgid "MILVUS_SECURE="
 msgstr "MILVUS_SECURE="

 #: ../../getting_started/install/environment/environment.md:82
 #: ../../getting_started/install/environment/environment.md:98
-#: f00603661f2b42e1bd2bca74ad1e3c31 f378e16fdec44c559e34c6929de812e8
+#: 755c32b5d6c54607907a138b5474c0ec ff4f2a7ddaa14f089dda7a14e1062c36
 msgid "WEAVIATE"
 msgstr "WEAVIATE"

 #: ../../getting_started/install/environment/environment.md:83
-#: da2049ebc6874cf0a6b562e0e2fd9ec7
+#: 23b2ce83385d40a589a004709f9864be
 msgid "VECTOR_STORE_TYPE=Weaviate"
 msgstr "VECTOR_STORE_TYPE=Weaviate"

 #: ../../getting_started/install/environment/environment.md:84
 #: ../../getting_started/install/environment/environment.md:99
-#: 25f1246629934289aad7ef01c7304097 c9fe0e413d9a4fc8abf86b3ed99e0581
+#: 9acef304d89a448a9e734346705ba872 cf5151b6c1594ccd8beb1c3f77769acb
 msgid "WEAVIATE_URL=https://kt-region-m8hcy0wc.weaviate.network"
 msgstr "WEAVIATE_URL=https://kt-region-m8hcy0wc.weaviate.network"

 #: ../../getting_started/install/environment/environment.md:102
-#: ba7c9e707f6a4cd6b99e52b58da3ab2d
+#: c3003516b2364051bf34f8c3086e348a
 msgid "Multi-GPU Setting"
 msgstr "Multi-GPU Setting"

 #: ../../getting_started/install/environment/environment.md:103
-#: 5ca75fdf2c264b2c844d77f659b4f0b3
+#: ade8fc381c5e438aa29d159c10041713
 msgid ""
 "See https://developer.nvidia.com/blog/cuda-pro-tip-control-gpu-"
 "visibility-cuda_visible_devices/ If CUDA_VISIBLE_DEVICES is not "
@@ -313,49 +315,49 @@ msgstr ""
 "cuda_visible_devices/ 如果 CUDA_VISIBLE_DEVICES没有设置, 会使用所有可用的gpu"

 #: ../../getting_started/install/environment/environment.md:106
-#: de92eb310aff43fbbbf3c5a116c3b2c6
+#: e137bd19be5e410ba6709027dbf2923a
 msgid "CUDA_VISIBLE_DEVICES=0"
 msgstr "CUDA_VISIBLE_DEVICES=0"

 #: ../../getting_started/install/environment/environment.md:108
-#: d2641df6123a442b8e4444ad5f01a9aa
+#: 7669947acbdc4b1d92bcc029a8353a5d
 msgid ""
 "Optionally, you can also specify the gpu ID to use before the starting "
 "command"
 msgstr "你也可以通过启动命令设置gpu ID"

 #: ../../getting_started/install/environment/environment.md:110
-#: 76c66179d11a4e5fa369421378609aae
+#: 751743d1753b4051beea46371278d793
 msgid "CUDA_VISIBLE_DEVICES=3,4,5,6"
 msgstr "CUDA_VISIBLE_DEVICES=3,4,5,6"

 #: ../../getting_started/install/environment/environment.md:112
-#: 29bd0f01fdf540ad98385ea8473f7647
+#: 3acc3de0af0d4df2bb575e161e377f85
 msgid "You can configure the maximum memory used by each GPU."
 msgstr "可以设置GPU的最大内存"

 #: ../../getting_started/install/environment/environment.md:114
-#: 31e5e23838734ba7a2810e2387e6d6a0
+#: 67f1d9b172b84294a44ecace5436e6e0
 msgid "MAX_GPU_MEMORY=16Gib"
 msgstr "MAX_GPU_MEMORY=16Gib"

 #: ../../getting_started/install/environment/environment.md:117
-#: 99aa63ab1ae049d9b94536d6a96f3443
+#: 3c69dfe48bcf46b89b76cac1e7849a66
 msgid "Other Setting"
 msgstr "Other Setting"

 #: ../../getting_started/install/environment/environment.md:118
-#: 3168732183874bffb59a3575d3473d62
+#: d5015b70f4fe4d20a63de9d87f86957a
 msgid "Language Settings(influence prompt language)"
 msgstr "Language Settings(涉及prompt语言以及知识切片方式)"

 #: ../../getting_started/install/environment/environment.md:119
-#: 73eb0a96f29b4739bd456faa9cb5033d
+#: 5543c28bb8e34c9fb3bb6b063c2b1750
 msgid "LANGUAGE=en"
 msgstr "LANGUAGE=en"

 #: ../../getting_started/install/environment/environment.md:120
-#: c6646b78c6cf4d25a13108232f5b2046
+#: cb4ed5b892ee41068c1ca76cb29aa400
 msgid "LANGUAGE=zh"
 msgstr "LANGUAGE=zh"

--- a/docs/locales/zh_CN/LC_MESSAGES/index.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/index.po
@@ -8,7 +8,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: DB-GPT 0.3.0\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2023-10-25 23:56+0800\n"
+"POT-Creation-Date: 2023-11-06 19:00+0800\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language: zh_CN\n"
@@ -19,31 +19,27 @@ msgstr ""
 "Content-Transfer-Encoding: 8bit\n"
 "Generated-By: Babel 2.12.1\n"

-#: ../../index.rst:34 ../../index.rst:45 71dd3acc56354242aad5a920c2805328
+#: ../../index.rst:34 ../../index.rst:45 8bc3a47457a34995816985436034e233
 msgid "Getting Started"
 msgstr "开始"

-#: ../../index.rst:60 ../../index.rst:81 0f2fc16a44b043019556f5f3e0d0e2c0
+#: ../../index.rst:60 ../../index.rst:81 1a4e8a5dc7754967a0af9fb3d2e53017
 msgid "Modules"
 msgstr "模块"

-#: ../../index.rst:95 ../../index.rst:111 2624521b920a4b3b9eac3fec76635ab8
-msgid "Use Cases"
-msgstr "示例"
-
-#: ../../index.rst:125 ../../index.rst:128 accec2bb9c5149f184a87e03955d6b22
+#: ../../index.rst:96 ../../index.rst:99 c815772ae8514f0c9b26911b0dd73f54
 msgid "Reference"
 msgstr "参考"

-#: ../../index.rst:138 ../../index.rst:144 26278dabd4944d1a9f14330e83935162
+#: ../../index.rst:109 ../../index.rst:115 dabe4c3409df489f84e4ec588f2b34a5
 msgid "Resources"
 msgstr "资源"

-#: ../../index.rst:7 9277d505dda74ae0862cd09d05cf5e63
+#: ../../index.rst:7 7626b01b253546ac83ca0cf130dfa091
 msgid "Welcome to DB-GPT!"
 msgstr "欢迎来到DB-GPT中文文档"

-#: ../../index.rst:8 9fa76a01965746978a00ac411fca13a8
+#: ../../index.rst:8 6037e5e0d7f7428ba92315a91ccfd53f
 msgid ""
 "As large models are released and iterated upon, they are becoming "
 "increasingly intelligent. However, in the process of using large models, "
@@ -61,7 +57,7 @@ msgstr ""
 "，我们启动了DB-"
 "GPT项目，为所有基于数据库的场景构建一个完整的私有大模型解决方案。该方案“”支持本地部署，既可应用于“独立私有环境”，又可根据业务模块进行“独立部署”和“隔离”，确保“大模型”的能力绝对私有、安全、可控。"

-#: ../../index.rst:10 b12b6f91c5664f61aa9e4d7cd500b922
+#: ../../index.rst:10 ab2a181d517047e6992171786c83f8e3
 msgid ""
 "**DB-GPT** is an experimental open-source project that uses localized GPT"
 " large models to interact with your data and environment. With this "
@@ -71,39 +67,39 @@ msgstr ""
 "DB-GPT 是一个开源的以数据库为基础的GPT实验项目，使用本地化的GPT大模型与您的数据和环境进行交互，无数据泄露风险100% 私密，100%"
 " 安全。"

-#: ../../index.rst:12 7032e17191394f7090141927644fb512
+#: ../../index.rst:12 9cfb7515430d49af8a1ca47f60264a58
 msgid "**Features**"
 msgstr "特性"

-#: ../../index.rst:13 5a7a8e5eace34d5f9a4f779bf5122928
+#: ../../index.rst:13 2a1f84e455c84d9ca66c65f92e5b0d78
 msgid ""
 "Currently, we have released multiple key features, which are listed below"
 " to demonstrate our current capabilities:"
 msgstr "目前我们已经发布了多种关键的特性，这里一一列举展示一下当前发布的能力。"

-#: ../../index.rst:15 5d0f67aacb8b4bc893a306ccbd6a3778
+#: ../../index.rst:15 43de30ce92da4c3cbe43ae4e4c9f1869
 msgid "SQL language capabilities - SQL generation - SQL diagnosis"
 msgstr "SQL语言能力 - SQL生成 - SQL诊断"

-#: ../../index.rst:19 556eaf756fec431ca5c453208292ab4f
+#: ../../index.rst:19 edfeef5284e7426a9e551e782bc5702c
 msgid ""
 "Private domain Q&A and data processing - Database knowledge Q&A - Data "
 "processing"
 msgstr "私有领域问答与数据处理 - 数据库知识问答 - 数据处理"

-#: ../../index.rst:23 5148ad898ec041858eddbeaa646d3f1b
+#: ../../index.rst:23 7a42f17049b943f88dd8f17baa440144
 msgid ""
 "Plugins - Support custom plugin execution tasks and natively support the "
 "Auto-GPT plugin, such as:"
 msgstr "插件模型 - 支持自定义插件执行任务，并原生支持Auto-GPT插件，例如:* SQL自动执行，获取查询结果 * 自动爬取学习知识"

-#: ../../index.rst:26 34c7ff33bc1c401480603a5197ecb1c4
+#: ../../index.rst:26 8b48d7b60bbc439da50a624c4048e6f6
 msgid ""
 "Unified vector storage/indexing of knowledge base - Support for "
 "unstructured data such as PDF, Markdown, CSV, and WebURL"
 msgstr "知识库统一向量存储/索引 - 非结构化数据支持包括PDF、MarkDown、CSV、WebURL"

-#: ../../index.rst:29 9d7095e5b08249e6bb5c724929537e6c
+#: ../../index.rst:29 97df482893924bd18e9a101922e7c374
 #, fuzzy
 msgid ""
 "Multi LLMs Support - Supports multiple large language models, currently "
@@ -111,63 +107,63 @@ msgid ""
 "codet5p"
 msgstr "多模型支持 - 支持多种大语言模型, 当前已支持Vicuna(7b,13b), ChatGLM-6b(int4, int8)"

-#: ../../index.rst:35 caa368eab40e4efb953865740a3c9018
+#: ../../index.rst:35 1ef26ead30ed4b7fb966c8a17307cdc5
 msgid ""
 "How to get started using DB-GPT to interact with your data and "
 "environment."
 msgstr "开始使用DB-GPT与您的数据环境进行交互。"

-#: ../../index.rst:36 34cecad11f8b4a3e96bfa0a31814e3d2
+#: ../../index.rst:36 3b44ab3576944bf6aa221f35bc051f4e
 #, fuzzy
 msgid "`Quickstart Guide <./getting_started/getting_started.html>`_"
 msgstr "`使用指南 <./getting_started/getting_started.html>`_"

-#: ../../index.rst:38 892598cdc16d45c68383033b08b7233f
+#: ../../index.rst:38 430cb239cdce42a0b62db46aba3f3bdb
 msgid "Concepts and terminology"
 msgstr "相关概念"

-#: ../../index.rst:40 887cc43a3a134aba96eb7ca11e5ca86f
+#: ../../index.rst:40 ded4d9f80066498e90ba6214520013f7
 #, fuzzy
 msgid "`Concepts and Terminology  <./getting_started/concepts.html>`_"
 msgstr "`相关概念 <./getting_started/concepts.html>`_"

-#: ../../index.rst:42 133e25c7dce046b1ab262489ecb60b4a
+#: ../../index.rst:42 cd662e53621e474d901146813c750044
 msgid "Coming soon..."
 msgstr ""

-#: ../../index.rst:44 a9e0812d32714a6f81ed75aa70f0c20e
+#: ../../index.rst:44 15edba57f1de44af8aff76735a2593de
 msgid "`Tutorials <.getting_started/tutorials.html>`_"
 msgstr "`教程 <.getting_started/tutorials.html>`_"

-#: ../../index.rst:62 4fccfd3082174f58926a9811f39e4d96
+#: ../../index.rst:62 779454b29d8e4e6eb21497025922d1b8
 msgid ""
 "These modules are the core abstractions with which we can interact with "
 "data and environment smoothly."
 msgstr "这些模块是我们可以与数据和环境顺利地进行交互的核心组成。"

-#: ../../index.rst:63 ecac40207ada454e9a68356f575dbca9
+#: ../../index.rst:63 bcd0e8c88c7b4807a91dd442416bec19
 msgid ""
 "It's very important for DB-GPT, DB-GPT also provide standard, extendable "
 "interfaces."
 msgstr "DB-GPT还提供了标准的、可扩展的接口。"

-#: ../../index.rst:65 9d852bed582449e89dc13312ddf29eed
+#: ../../index.rst:65 1e785dc6925045e8ba106cf4a3b17cac
 msgid ""
 "The docs for each module contain quickstart examples, how to guides, "
 "reference docs, and conceptual guides."
 msgstr "每个模块的文档都包含快速入门的例子、操作指南、参考文档和相关概念等内容。"

-#: ../../index.rst:67 3167446539de449aba2de694fe901bcf
+#: ../../index.rst:67 9c9fddd14bfd40339889f5d1f0b04163
 msgid "The modules are as follows"
 msgstr "组成模块如下："

-#: ../../index.rst:69 442683a5f154429da87f452e49bcbb5c
+#: ../../index.rst:69 4a19083cadd04b8e8b649a622e0ceccd
 msgid ""
 "`LLMs <./modules/llms.html>`_: Supported multi models management and "
 "integrations."
 msgstr "`LLMs <./modules/llms.html>`_:基于FastChat提供大模型的运行环境。支持多模型管理和集成。 "

-#: ../../index.rst:71 3cf320b6199e4ce78235bce8b1be60a2
+#: ../../index.rst:71 436a139225574aa5b066a1835d38238d
 msgid ""
 "`Prompts <./modules/prompts.html>`_: Prompt management, optimization, and"
 " serialization for multi database."
@@ -175,86 +171,41 @@ msgstr ""
 "`Prompt自动生成与优化 <./modules/prompts.html>`_: 自动化生成高质量的Prompt "
 "，并进行优化，提高系统的响应效率"

-#: ../../index.rst:73 4f29ed67ea2a4a3ca824ac8b8b33cae6
+#: ../../index.rst:73 6c53edfb2e494c5fba6efb5ade48c310
 msgid "`Plugins <./modules/plugins.html>`_: Plugins management, scheduler."
 msgstr "`Agent与插件： <./modules/plugins.html>`_:提供Agent和插件机制，使得用户可以自定义并增强系统的行为。"

-#: ../../index.rst:75 d651f9d93bb54b898ef97407501cc6cf
+#: ../../index.rst:75 6328760e8faf4e8296f3e1edd486316c
 #, fuzzy
 msgid ""
 "`Knowledge <./modules/knowledge.html>`_: Knowledge management, embedding,"
 " and search."
 msgstr "`知识库能力： <./modules/knowledge.html>`_: 支持私域知识库问答能力, "

-#: ../../index.rst:77 fd6dd2adcd844baa84602b650d89e507
+#: ../../index.rst:77 da272ccf56e3498d92009ac7101b0c45
 msgid ""
 "`Connections <./modules/connections.html>`_: Supported multi databases "
 "connection. management connections and interact with this."
 msgstr "`连接模块 <./modules/connections.html>`_: 用于连接不同的模块和数据源，实现数据的流转和交互 "

-#: ../../index.rst:79 7e388a9d8c044169923508ccdeb2d9a5
+#: ../../index.rst:79 1a0551f62d9d418a9dec267fbcb49af0
 #, fuzzy
 msgid "`Vector <./modules/vector.html>`_: Supported multi vector database."
 msgstr "`LLMs <./modules/llms.html>`_:基于FastChat提供大模型的运行环境。支持多模型管理和集成。 "

-#: ../../index.rst:97 9d37bf061a784d5ca92d1de33b0834f3
-msgid "Best Practices and built-in implementations for common DB-GPT use cases:"
-msgstr "DB-GPT用例的最佳实践和内置方法:"
-
-#: ../../index.rst:99 ba264bbe31d24c7887a30cbd5442e157
-msgid ""
-"`Sql generation and diagnosis "
-"<./use_cases/sql_generation_and_diagnosis.html>`_: SQL generation and "
-"diagnosis."
-msgstr "`Sql生成和诊断 <./use_cases/sql_generation_and_diagnosis.html>`_: Sql生成和诊断。"
-
-#: ../../index.rst:101 1166f6aeba064a3990d4b0caa87db274
-msgid ""
-"`knownledge Based QA <./use_cases/knownledge_based_qa.html>`_: A "
-"important scene for user to chat with database documents, codes, bugs and"
-" schemas."
-msgstr "`知识库问答 <./use_cases/knownledge_based_qa.html>`_:  用户与数据库文档、代码和bug聊天的重要场景\""
-
-#: ../../index.rst:103 b32610ada3a0440e9b029b8dffe7c79e
-msgid ""
-"`Chatbots <./use_cases/chatbots.html>`_: Language model love to chat, use"
-" multi models to chat."
-msgstr "`聊天机器人 <./use_cases/chatbots.html>`_: 使用多模型进行对话"
-
-#: ../../index.rst:105 d9348b8112df4839ab14a74a42b63715
-msgid ""
-"`Querying Database Data <./use_cases/query_database_data.html>`_: Query "
-"and Analysis data from databases and give charts."
-msgstr "`查询数据库数据 <./use_cases/query_database_data.html>`_:从数据库中查询和分析数据并给出图表。"
-
-#: ../../index.rst:107 ef978dc1f4254e5eb4ca487c31c03f7c
-msgid ""
-"`Interacting with apis <./use_cases/interacting_with_api.html>`_: "
-"Interact with apis, such as create a table, deploy a database cluster, "
-"create a database and so on."
-msgstr ""
-"`API交互 <./use_cases/interacting_with_api.html>`_: "
-"与API交互，例如创建表、部署数据库集群、创建数据库等。"
-
-#: ../../index.rst:109 49a549d7d38f493ba48e162785b4ac5d
-msgid ""
-"`Tool use with plugins <./use_cases/tool_use_with_plugin>`_: According to"
-" Plugin use tools to manage databases autonomoly."
-msgstr "`插件工具 <./use_cases/tool_use_with_plugin>`_: 根据插件使用工具自主管理数据库。"
-
-#: ../../index.rst:126 db86500484fa4f14918b0ad4e5a7326d
+#: ../../index.rst:97 9aceee0dbe1e4f7da499ac6aab23aea2
 msgid ""
 "Full documentation on all methods, classes, installation methods, and "
 "integration setups for DB-GPT."
 msgstr "关于DB-GPT的所有方法、类、安装方法和集成设置的完整文档。"

-#: ../../index.rst:140 a44abbb370a841658801bb2729fa62c9
+#: ../../index.rst:111 c9a729f4e1964894bae215793647ab75
 msgid ""
 "Additional resources we think may be useful as you develop your "
 "application!"
 msgstr "“我们认为在您开发应用程序时可能有用的其他资源!”"

-#: ../../index.rst:142 4d0da8471db240dba842949b6796be7a
+#: ../../index.rst:113 06e6e4b7776c405fa94ae7b59253162d
 msgid ""
 "`Discord <https://discord.gg/eZHE94MN>`_: if your have some problem or "
 "ideas, you can talk from discord."
@@ -272,3 +223,58 @@ msgstr "`Discord <https://discord.gg/eZHE94MN>`_:如果您有任何问题，可
 #~ msgid "Guides for how other companies/products can be used with DB-GPT"
 #~ msgstr "其他公司/产品如何与DB-GPT一起使用的方法指南"

+#~ msgid "Use Cases"
+#~ msgstr "示例"
+
+#~ msgid ""
+#~ "Best Practices and built-in "
+#~ "implementations for common DB-GPT use"
+#~ " cases:"
+#~ msgstr "DB-GPT用例的最佳实践和内置方法:"
+
+#~ msgid ""
+#~ "`Sql generation and diagnosis "
+#~ "<./use_cases/sql_generation_and_diagnosis.html>`_: SQL "
+#~ "generation and diagnosis."
+#~ msgstr "`Sql生成和诊断 <./use_cases/sql_generation_and_diagnosis.html>`_: Sql生成和诊断。"
+
+#~ msgid ""
+#~ "`knownledge Based QA "
+#~ "<./use_cases/knownledge_based_qa.html>`_: A important "
+#~ "scene for user to chat with "
+#~ "database documents, codes, bugs and "
+#~ "schemas."
+#~ msgstr ""
+#~ "`知识库问答 <./use_cases/knownledge_based_qa.html>`_:  "
+#~ "用户与数据库文档、代码和bug聊天的重要场景\""
+
+#~ msgid ""
+#~ "`Chatbots <./use_cases/chatbots.html>`_: Language "
+#~ "model love to chat, use multi "
+#~ "models to chat."
+#~ msgstr "`聊天机器人 <./use_cases/chatbots.html>`_: 使用多模型进行对话"
+
+#~ msgid ""
+#~ "`Querying Database Data "
+#~ "<./use_cases/query_database_data.html>`_: Query and "
+#~ "Analysis data from databases and give"
+#~ " charts."
+#~ msgstr "`查询数据库数据 <./use_cases/query_database_data.html>`_:从数据库中查询和分析数据并给出图表。"
+
+#~ msgid ""
+#~ "`Interacting with apis "
+#~ "<./use_cases/interacting_with_api.html>`_: Interact with"
+#~ " apis, such as create a table, "
+#~ "deploy a database cluster, create a "
+#~ "database and so on."
+#~ msgstr ""
+#~ "`API交互 <./use_cases/interacting_with_api.html>`_: "
+#~ "与API交互，例如创建表、部署数据库集群、创建数据库等。"
+
+#~ msgid ""
+#~ "`Tool use with plugins "
+#~ "<./use_cases/tool_use_with_plugin>`_: According to "
+#~ "Plugin use tools to manage databases "
+#~ "autonomoly."
+#~ msgstr "`插件工具 <./use_cases/tool_use_with_plugin>`_: 根据插件使用工具自主管理数据库。"
+
--- a/docs/locales/zh_CN/LC_MESSAGES/modules/knowledge.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/modules/knowledge.po
@@ -8,7 +8,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: DB-GPT 0.3.0\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2023-07-13 15:39+0800\n"
+"POT-Creation-Date: 2023-11-02 21:04+0800\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language: zh_CN\n"
@@ -19,103 +19,84 @@ msgstr ""
 "Content-Transfer-Encoding: 8bit\n"
 "Generated-By: Babel 2.12.1\n"

-#: ../../modules/knowledge.rst:2 ../../modules/knowledge.rst:136
-#: 3cc8fa6e9fbd4d889603d99424e9529a
+#: ../../modules/knowledge.md:1 b94b3b15cb2441ed9d78abd222a717b7
 msgid "Knowledge"
 msgstr "知识"

-#: ../../modules/knowledge.rst:4 0465a393d9d541958c39c1d07c885d1f
+#: ../../modules/knowledge.md:3 c6d6e308a6ce42948d29e928136ef561
 #, fuzzy
 msgid ""
 "As the knowledge base is currently the most significant user demand "
 "scenario, we natively support the construction and processing of "
 "knowledge bases. At the same time, we also provide multiple knowledge "
-"base management strategies in this project, such as pdf knowledge,md "
-"knowledge, txt knowledge, word knowledge, ppt knowledge:"
+"base management strategies in this project, such as:"
 msgstr ""
 "由于知识库是当前用户需求最显著的场景，我们原生支持知识库的构建和处理。同时，我们还在本项目中提供了多种知识库管理策略，如:pdf,md , "
 "txt, word, ppt"

-#: ../../modules/knowledge.rst:6 e670cbe14d8e4da88ba935e4120c31e0
-msgid ""
-"We currently support many document formats: raw text, txt, pdf, md, html,"
-" doc, ppt, and url. In the future, we will continue to support more types"
-" of knowledge, including audio, video, various databases, and big data "
-"sources. Of course, we look forward to your active participation in "
-"contributing code."
+#: ../../modules/knowledge.md:4 268abc408d40410ba90cf5f121dc5270
+msgid "Default built-in knowledge base"
 msgstr ""

-#: ../../modules/knowledge.rst:9 e0bf601a1a0c458297306db6ff79f931
-msgid "**Create your own knowledge repository**"
+#: ../../modules/knowledge.md:5 558c3364c38b458a8ebf81030efc2a48
+msgid "Custom addition of knowledge bases"
+msgstr ""
+
+#: ../../modules/knowledge.md:6 9cb3ce62da1440579c095848c7aef88c
+msgid ""
+"Various usage scenarios such as constructing knowledge bases through "
+"plugin capabilities and web crawling. Users only need to organize the "
+"knowledge documents, and they can use our existing capabilities to build "
+"the knowledge base required for the large model."
+msgstr ""
+
+#: ../../modules/knowledge.md:9 b8ca6bc4dd9845baa56e36eea7fac2a2
+#, fuzzy
+msgid "Create your own knowledge repository"
 msgstr "创建你自己的知识库"

-#: ../../modules/knowledge.rst:11 bb26708135d44615be3c1824668010f6
-msgid "1.prepare"
-msgstr "准备"
+#: ../../modules/knowledge.md:11 17d7178a67924f43aa5b6293707ef041
+msgid ""
+"1.Place personal knowledge files or folders in the pilot/datasets "
+"directory."
+msgstr ""

-#: ../../modules/knowledge.rst:13 c150a0378f3e4625908fa0d8a25860e9
+#: ../../modules/knowledge.md:13 31c31f14bf444981939689f9a9fb038a
 #, fuzzy
 msgid ""
-"We currently support many document formats: TEXT(raw text), "
-"DOCUMENT(.txt, .pdf, .md, .doc, .ppt, .html), and URL."
+"We currently support many document formats: txt, pdf, md, html, doc, ppt,"
+" and url."
 msgstr "当前支持txt, pdf, md, html, doc, ppt, url文档格式"

-#: ../../modules/knowledge.rst:15 7f9f02a93d5d4325b3d2d976f4bb28a0
+#: ../../modules/knowledge.md:15 9ad2f2e05f8842a9b9d8469a3704df23
 msgid "before execution:"
 msgstr "开始前"

-#: ../../modules/knowledge.rst:24 59699a8385e04982a992cf0d71f6dcd5
-#, fuzzy
+#: ../../modules/knowledge.md:22 6fd2775914b641c4b8e486417b558ea6
 msgid ""
-"2.prepare embedding model, you can download from https://huggingface.co/."
-" Notice you have installed git-lfs."
+"2.Update your .env, set your vector store type, VECTOR_STORE_TYPE=Chroma "
+"(now only support Chroma and Milvus, if you set Milvus, please set "
+"MILVUS_URL and MILVUS_PORT)"
 msgstr ""
-"提前准备Embedding Model, 你可以在https://huggingface.co/进行下载，注意：你需要先安装git-lfs.eg:"
-" git clone https://huggingface.co/THUDM/chatglm2-6b"

-#: ../../modules/knowledge.rst:27 2be1a17d0b54476b9dea080d244fd747
-msgid ""
-"eg: git clone https://huggingface.co/sentence-transformers/all-"
-"MiniLM-L6-v2"
-msgstr "eg: git clone https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2"
-
-#: ../../modules/knowledge.rst:33 d328f6e243624c9488ebd27c9324621b
-msgid ""
-"3.prepare vector_store instance and vector store config, now we support "
-"Chroma, Milvus and Weaviate."
-msgstr "提前准备向量数据库环境，目前支持Chroma, Milvus and Weaviate向量数据库"
-
-#: ../../modules/knowledge.rst:63 44f97154eff647d399fd30b6f9e3b867
-msgid ""
-"3.init Url Type EmbeddingEngine api and embedding your document into "
-"vector store in your code."
-msgstr "初始化 Url类型 EmbeddingEngine api， 将url文档embedding向量化到向量数据库 "
-
-#: ../../modules/knowledge.rst:75 e2581b414f0148bca88253c7af9cd591
-msgid "If you want to add your source_reader or text_splitter, do this:"
-msgstr "如果你想手动添加你自定义的source_reader和text_splitter, 请参考:"
-
-#: ../../modules/knowledge.rst:95 74c110414f924bbfa3d512e45ba2f30f
-#, fuzzy
-msgid ""
-"4.init Document Type EmbeddingEngine api and embedding your document into"
-" vector store in your code. Document type can be .txt, .pdf, .md, .doc, "
-".ppt."
+#: ../../modules/knowledge.md:25 131c5f58898a4682940910980edb2043
+msgid "2.Run the knowledge repository initialization command"
 msgstr ""
-"初始化 文档型类型 EmbeddingEngine api， 将文档embedding向量化到向量数据库(文档可以是.txt, .pdf, "
-".md, .html, .doc, .ppt)"

-#: ../../modules/knowledge.rst:108 0afd40098d5f4dfd9e44fe1d8004da25
+#: ../../modules/knowledge.md:31 2cf550f17881497bb881b19efcc18c23
 msgid ""
-"5.init TEXT Type EmbeddingEngine api and embedding your document into "
-"vector store in your code."
-msgstr "初始化TEXT类型 EmbeddingEngine api， 将文档embedding向量化到向量数据库"
+"Optionally, you can run `dbgpt knowledge load --help` command to see more"
+" usage."
+msgstr ""

-#: ../../modules/knowledge.rst:120 a66961bf3efd41fa8ea938129446f5a5
-msgid "4.similar search based on your knowledge base. ::"
-msgstr "在知识库进行相似性搜索"
+#: ../../modules/knowledge.md:33 c8a2ea571b944bdfbcad48fa8b54fcc9
+msgid ""
+"3.Add the knowledge repository in the interface by entering the name of "
+"your knowledge repository (if not specified, enter \"default\") so you "
+"can use it for Q&A based on your knowledge base."
+msgstr ""

-#: ../../modules/knowledge.rst:126 b7066f408378450db26770f83fbd2716
+#: ../../modules/knowledge.md:35 b701170ad75e49dea7d7734c15681e0f
 msgid ""
 "Note that the default vector model used is text2vec-large-chinese (which "
 "is a large model, so if your personal computer configuration is not "
@@ -125,48 +106,6 @@ msgstr ""
 "注意，这里默认向量模型是text2vec-large-chinese(模型比较大，如果个人电脑配置不够建议采用text2vec-base-"
 "chinese),因此确保需要将模型download下来放到models目录中。"

-#: ../../modules/knowledge.rst:128 58481d55cab74936b6e84b24c39b1674
-#, fuzzy
-msgid ""
-"`pdf_embedding <./knowledge/pdf/pdf_embedding.html>`_: supported pdf "
-"embedding."
-msgstr "pdf_embedding <./knowledge/pdf_embedding.html>`_: supported pdf embedding."
-
-#: ../../modules/knowledge.rst:129 fbb013c4f1bc46af910c91292f6690cf
-#, fuzzy
-msgid ""
-"`markdown_embedding <./knowledge/markdown/markdown_embedding.html>`_: "
-"supported markdown embedding."
-msgstr "pdf_embedding <./knowledge/pdf_embedding.html>`_: supported pdf embedding."
-
-#: ../../modules/knowledge.rst:130 59d45732f4914d16b4e01aee0992edf7
-#, fuzzy
-msgid ""
-"`word_embedding <./knowledge/word/word_embedding.html>`_: supported word "
-"embedding."
-msgstr "pdf_embedding <./knowledge/pdf_embedding.html>`_: supported pdf embedding."
-
-#: ../../modules/knowledge.rst:131 df0e6f311861423e885b38e020a7c0f0
-#, fuzzy
-msgid ""
-"`url_embedding <./knowledge/url/url_embedding.html>`_: supported url "
-"embedding."
-msgstr "pdf_embedding <./knowledge/pdf_embedding.html>`_: supported pdf embedding."
-
-#: ../../modules/knowledge.rst:132 7c550c1f5bc34fe9986731fb465e12cd
-#, fuzzy
-msgid ""
-"`ppt_embedding <./knowledge/ppt/ppt_embedding.html>`_: supported ppt "
-"embedding."
-msgstr "pdf_embedding <./knowledge/pdf_embedding.html>`_: supported pdf embedding."
-
-#: ../../modules/knowledge.rst:133 8648684cb191476faeeb548389f79050
-#, fuzzy
-msgid ""
-"`string_embedding <./knowledge/string/string_embedding.html>`_: supported"
-" raw text embedding."
-msgstr "pdf_embedding <./knowledge/pdf_embedding.html>`_: supported pdf embedding."
-
 #~ msgid "before execution:  python -m spacy download zh_core_web_sm"
 #~ msgstr "在执行之前请先执行python -m spacy download zh_core_web_sm"

@@ -201,3 +140,112 @@ msgstr "pdf_embedding <./knowledge/pdf_embedding.html>`_: supported pdf embeddin
 #~ "and MILVUS_PORT)"
 #~ msgstr "2.更新你的.env，设置你的向量存储类型，VECTOR_STORE_TYPE=Chroma(现在只支持Chroma和Milvus，如果你设置了Milvus，请设置MILVUS_URL和MILVUS_PORT)"

+#~ msgid ""
+#~ "We currently support many document "
+#~ "formats: raw text, txt, pdf, md, "
+#~ "html, doc, ppt, and url. In the"
+#~ " future, we will continue to support"
+#~ " more types of knowledge, including "
+#~ "audio, video, various databases, and big"
+#~ " data sources. Of course, we look "
+#~ "forward to your active participation in"
+#~ " contributing code."
+#~ msgstr ""
+
+#~ msgid "1.prepare"
+#~ msgstr "准备"
+
+#~ msgid ""
+#~ "2.prepare embedding model, you can "
+#~ "download from https://huggingface.co/. Notice "
+#~ "you have installed git-lfs."
+#~ msgstr ""
+#~ "提前准备Embedding Model, 你可以在https://huggingface.co/进行下载，注意"
+#~ "：你需要先安装git-lfs.eg: git clone "
+#~ "https://huggingface.co/THUDM/chatglm2-6b"
+
+#~ msgid ""
+#~ "eg: git clone https://huggingface.co/sentence-"
+#~ "transformers/all-MiniLM-L6-v2"
+#~ msgstr ""
+#~ "eg: git clone https://huggingface.co/sentence-"
+#~ "transformers/all-MiniLM-L6-v2"
+
+#~ msgid ""
+#~ "3.prepare vector_store instance and vector "
+#~ "store config, now we support Chroma, "
+#~ "Milvus and Weaviate."
+#~ msgstr "提前准备向量数据库环境，目前支持Chroma, Milvus and Weaviate向量数据库"
+
+#~ msgid ""
+#~ "3.init Url Type EmbeddingEngine api and"
+#~ " embedding your document into vector "
+#~ "store in your code."
+#~ msgstr "初始化 Url类型 EmbeddingEngine api， 将url文档embedding向量化到向量数据库 "
+
+#~ msgid "If you want to add your source_reader or text_splitter, do this:"
+#~ msgstr "如果你想手动添加你自定义的source_reader和text_splitter, 请参考:"
+
+#~ msgid ""
+#~ "4.init Document Type EmbeddingEngine api "
+#~ "and embedding your document into vector"
+#~ " store in your code. Document type"
+#~ " can be .txt, .pdf, .md, .doc, "
+#~ ".ppt."
+#~ msgstr ""
+#~ "初始化 文档型类型 EmbeddingEngine api， "
+#~ "将文档embedding向量化到向量数据库(文档可以是.txt, .pdf, .md, .html,"
+#~ " .doc, .ppt)"
+
+#~ msgid ""
+#~ "5.init TEXT Type EmbeddingEngine api and"
+#~ " embedding your document into vector "
+#~ "store in your code."
+#~ msgstr "初始化TEXT类型 EmbeddingEngine api， 将文档embedding向量化到向量数据库"
+
+#~ msgid "4.similar search based on your knowledge base. ::"
+#~ msgstr "在知识库进行相似性搜索"
+
+#~ msgid ""
+#~ "`pdf_embedding <./knowledge/pdf/pdf_embedding.html>`_: "
+#~ "supported pdf embedding."
+#~ msgstr ""
+#~ "pdf_embedding <./knowledge/pdf_embedding.html>`_: "
+#~ "supported pdf embedding."
+
+#~ msgid ""
+#~ "`markdown_embedding "
+#~ "<./knowledge/markdown/markdown_embedding.html>`_: supported "
+#~ "markdown embedding."
+#~ msgstr ""
+#~ "pdf_embedding <./knowledge/pdf_embedding.html>`_: "
+#~ "supported pdf embedding."
+
+#~ msgid ""
+#~ "`word_embedding <./knowledge/word/word_embedding.html>`_: "
+#~ "supported word embedding."
+#~ msgstr ""
+#~ "pdf_embedding <./knowledge/pdf_embedding.html>`_: "
+#~ "supported pdf embedding."
+
+#~ msgid ""
+#~ "`url_embedding <./knowledge/url/url_embedding.html>`_: "
+#~ "supported url embedding."
+#~ msgstr ""
+#~ "pdf_embedding <./knowledge/pdf_embedding.html>`_: "
+#~ "supported pdf embedding."
+
+#~ msgid ""
+#~ "`ppt_embedding <./knowledge/ppt/ppt_embedding.html>`_: "
+#~ "supported ppt embedding."
+#~ msgstr ""
+#~ "pdf_embedding <./knowledge/pdf_embedding.html>`_: "
+#~ "supported pdf embedding."
+
+#~ msgid ""
+#~ "`string_embedding <./knowledge/string/string_embedding.html>`_:"
+#~ " supported raw text embedding."
+#~ msgstr ""
+#~ "pdf_embedding <./knowledge/pdf_embedding.html>`_: "
+#~ "supported pdf embedding."
+
--- a/docs/locales/zh_CN/LC_MESSAGES/modules/plugins.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/modules/plugins.po
@@ -8,7 +8,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: DB-GPT 0.3.0\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2023-06-14 21:47+0800\n"
+"POT-Creation-Date: 2023-11-03 15:33+0800\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language: zh_CN\n"
@@ -19,11 +19,11 @@ msgstr ""
 "Content-Transfer-Encoding: 8bit\n"
 "Generated-By: Babel 2.12.1\n"

-#: ../../modules/plugins.md:1 8e0200134cca45b6aead6d05b60ca95a
+#: ../../modules/plugins.md:1 e8c539b65ccd459793e8ed3812903578
 msgid "Plugins"
 msgstr "插件"

-#: ../../modules/plugins.md:3 d0d532cfe9b44fa0916d7d5b912a744a
+#: ../../modules/plugins.md:3 0d6f6bdcf843416fb35d9f51df52bead
 msgid ""
 "The ability of Agent and Plugin is the core of whether large models can "
 "be automated. In this project, we natively support the plugin mode, and "
@@ -35,49 +35,62 @@ msgstr ""
 "Agent与插件能力是大模型能否自动化的核心，在本的项目中，原生支持插件模式，大模型可以自动化完成目标。 同时为了充分发挥社区的优势"
 "，本项目中所用的插件原生支持Auto-GPT插件生态，即Auto-GPT的插件可以直接在我们的项目中运行。"

-#: ../../modules/plugins.md:5 2f78a6b397a24f34b0d5771ca93efb0b
+#: ../../modules/plugins.md:5 625763bc41fe417c8e4ea03ab2f8fdfc
+#, fuzzy
+msgid "The LLM (Language Model) suitable for the Plugin scene is"
+msgstr "Plugin场景适用的LLM是      * chatgpt3.5.     * chatgpt4."
+
+#: ../../modules/plugins.md:6 b3bd64693a4f4bf8b64b9224d3e1532e
+msgid "chatgpt3.5."
+msgstr ""
+
+#: ../../modules/plugins.md:7 46d9220e63384594b54c2c176077d962
+msgid "chatgpt4."
+msgstr ""
+
+#: ../../modules/plugins.md:10 8c539e139f6648b2bef5dc683b8e093c
 #, fuzzy
 msgid "Local Plugins"
 msgstr "插件"

-#: ../../modules/plugins.md:7 54a817a638c3440989191b3bffaca6de
+#: ../../modules/plugins.md:12 2cc7ba992d524913b3377cad3bf747d3
 msgid "1.1 How to write local plugins."
 msgstr ""

-#: ../../modules/plugins.md:9 fbdc0a9d327f432aa6a380117dfb2f11
+#: ../../modules/plugins.md:14 eddffc1d2c434e45890a9befa1bb5160
 msgid ""
 "Local plugins use the Auto-GPT plugin template. A simple example is as "
 "follows: first write a plugin file called \"sql_executor.py\"."
 msgstr ""

-#: ../../modules/plugins.md:39 dc398ab427bd4d15b3b7c8cb1ff032b3
+#: ../../modules/plugins.md:44 06efbea552bb4dc7828d842b779e41d4
 msgid ""
 "Then set the \"can_handle_post_prompt\" method of the plugin template to "
 "True. In the \"post_prompt\" method, write the prompt information and the"
 " mapped plugin function."
 msgstr ""

-#: ../../modules/plugins.md:81 c9d4019392bf452e906057cbe9271005
+#: ../../modules/plugins.md:86 afd3cfb379bb463e97e515ae65790830
 msgid "1.2 How to use local plugins"
 msgstr ""

-#: ../../modules/plugins.md:83 9beaed4a71124ecf9544a1dba0d1e722
+#: ../../modules/plugins.md:88 f43a70e4cb5c4846a5bb8df3853021ba
 msgid ""
 "Pack your plugin project into `your-plugin.zip` and place it in the "
 "`/plugins/` directory of the DB-GPT project. After starting the "
 "webserver, you can select and use it in the `Plugin Model` section."
 msgstr ""

-#: ../../modules/plugins.md:86 9a1439c883a947d7acac3fd1196b3c1e
+#: ../../modules/plugins.md:91 8269458bd7f5480dbc56100865eb1eb0
 #, fuzzy
 msgid "Public Plugins"
 msgstr "插件"

-#: ../../modules/plugins.md:88 2ed4c509bf5848adb3fa163752a1cfa1
+#: ../../modules/plugins.md:93 ec5bb7b6b2cf464d8b8400f3dfd9a50e
 msgid "1.1 How to use public plugins"
 msgstr ""

-#: ../../modules/plugins.md:90 dd5ba8d582204b2f89ce802a1232b11d
+#: ../../modules/plugins.md:95 3025a85c905c49b6b2ac3f5c39c84c93
 msgid ""
 "By default, after launching the webserver, plugins from the public plugin"
 " library `DB-GPT-Plugins` will be automatically loaded. For more details,"
@@ -85,17 +98,17 @@ msgid ""
 "Plugins)"
 msgstr ""

-#: ../../modules/plugins.md:92 244f0591bc5045eab175754521b414c4
+#: ../../modules/plugins.md:97 e73d7779df254ba49fe7123ce06353aa
 msgid "1.2 Contribute to the DB-GPT-Plugins repository"
 msgstr ""

-#: ../../modules/plugins.md:94 e00bac1a299b46caa19b9cf16709d6ba
+#: ../../modules/plugins.md:99 3297fb00dfc940e8a614c3858640cfe5
 msgid ""
 "Please refer to the plugin development process in the public plugin "
 "library, and put the configuration parameters in `.plugin_env`"
 msgstr ""

-#: ../../modules/plugins.md:96 315fbf576ea24158adc7b564f53940e0
+#: ../../modules/plugins.md:101 13280b270b304e139ed67e5b0dafa5b4
 msgid ""
 "We warmly welcome everyone to contribute plugins to the public plugin "
 "library!"
--- a/docs/locales/zh_CN/LC_MESSAGES/modules/prompts.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/modules/prompts.po
@@ -8,7 +8,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: DB-GPT 👏👏 0.4.0\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2023-10-26 19:57+0800\n"
+"POT-Creation-Date: 2023-11-03 11:47+0800\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language: zh_CN\n"
@@ -17,13 +17,13 @@ msgstr ""
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=utf-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"Generated-By: Babel 2.13.1\n"
+"Generated-By: Babel 2.12.1\n"

-#: ../../modules/prompts.md:1 00907f941b7743b99278d7fd0b67e5d3
+#: ../../modules/prompts.md:1 3c5bdc61dc4a4301acdc9775c854a896
 msgid "Prompts"
 msgstr "Prompts"

-#: ../../modules/prompts.md:3 f1c7042cba32483b90ab22db28e369a9
+#: ../../modules/prompts.md:3 118fc2b85e8b4e02a6868b3bc2a7892c
 msgid ""
 "**Prompt** is a very important part of the interaction between the large "
 "model and the user, and to a certain extent, it determines the quality "
@@ -33,96 +33,116 @@ msgid ""
 "users to use large language models."
 msgstr "**Prompt**是大模型与用户交互中非常重要的一环，在一定程度上决定了大模型生成答案的质量和准确性。在这个项目中，我们会根据用户输入和使用场景自动地优化相应提示，让用户更轻松、更高效地使用大语言模型。"

-#: ../../modules/prompts.md:5 695c05f96a2c4077a0a25f5f6dc22d31
+#: ../../modules/prompts.md:5 41614effa0a445b7b5a119311b902305
 msgid "Prompt Management"
 msgstr "Prompt 管理"

-#: ../../modules/prompts.md:7 325254a0ceba48b8aa19a4cf66d18c12
+#: ../../modules/prompts.md:7 a8ed0a7b3d1243ffa1ed80c24d1ab518
 msgid ""
 "Here, you can choose to create a Prompt in **Public Prompts** space or "
 "**Private Prompts** space."
 msgstr "该页面允许用户选择**公共Prompts**或者**私有Prompts**空间来创建相应的 Prompt。"

-#: ../../modules/prompts.md:11 4528a48a14d246d8874c1b31819d3f4f
+#: ../../modules/prompts.md:9 ../../modules/prompts.md:17
+#: ../../modules/prompts.md:31 ../../modules/prompts.md:45
+#: 68db272acc6b4572aa275940da4b788b 92d46d647bbb4035add92f750511a840
+#: af1789fae8cb47b8a81e68520086f35e d7c2f6f43b5c406d82b7dc5bd92d183c
+#: e2f91ca11e784fe5943d0738671f68bf
+msgid "image"
+msgstr ""
+
+#: ../../modules/prompts.md:11 102220bf95f04f81acc9a0093458f297
 msgid ""
 "The difference between **Public Prompts** and **Private Prompts** is that"
 " Prompts in **Public Prompts** space can be viewed and used by all users,"
 " while prompts in **Private Prompts** space can only be viewed and used "
 "by the owner."
-msgstr "**公共 Prompts**和**私有 Prompts**空间的区别在于，**公共 Prompts**空间下的 Prompt 可供所有的用户查看和使用，而**私有 Prompts**空间下的 Prompt 只能被所有者查看和使用。"
+msgstr ""
+"**公共 Prompts**和**私有 Prompts**空间的区别在于，**公共 Prompts**空间下的 Prompt "
+"可供所有的用户查看和使用，而**私有 Prompts**空间下的 Prompt 只能被所有者查看和使用。"

-#: ../../modules/prompts.md:13 45fdbc12758c4cb3aa71f5616c75a6e6
+#: ../../modules/prompts.md:13 2e0d2f6b335a4aacbdc83b7b7042a701
 msgid "Create Prompt"
 msgstr "创建 Prompt"

-#: ../../modules/prompts.md:15 850b9c136d5949939c3966b6bff8204e
+#: ../../modules/prompts.md:15 c9f8c3d1698941e08b90a35fffb2fce1
 msgid "Click the \"Add Prompts\" button to pop up the following subpage:"
 msgstr "点击 \"新增Prompts\"按钮可以弹出如下的子页面："

-#: ../../modules/prompts.md:19 9db6614fcc454bddbf2e3257232e7c93
+#: ../../modules/prompts.md:19 23ed81a83ab2458f826f2b5d9c55a89a
 msgid ""
 "**Scene**: It is assumed here that when we have a lot of Prompts, we "
 "often classify the Prompts according to scene, such as Prompts in the "
 "chat knowledge scene, Prompts in the chat data scene, Prompts in the chat"
 " normal scene, etc."
-msgstr "**场景**：这里假设，当我们有很多 Prompts 时，往往会根据场景对 Prompts 进行分类，比如在 DB-GPT 项目中，chat knowledge 场景的 Prompts、chat data 场景的 Prompts、chat normal 场景的 Prompts 等等。"
+msgstr ""
+"**场景**：这里假设，当我们有很多 Prompts 时，往往会根据场景对 Prompts 进行分类，比如在 DB-GPT 项目中，chat "
+"knowledge 场景的 Prompts、chat data 场景的 Prompts、chat normal 场景的 Prompts 等等。"

-#: ../../modules/prompts.md:21 45a0a38ceafe4575a5957b2de8c0661f
+#: ../../modules/prompts.md:21 11299da493e741869fe67237f1cb1794
 msgid ""
 "**Sub Scene**: Continuing with the above, assuming that we have a lot of "
 "Prompts, scene classification alone is not enough. For example, in the "
 "chat data scenario, there can be many types of sub-scene: anomaly "
 "recognition sub scene, attribution analysis sub scene, etc. sub scene is "
 "used to distinguish subcategories under each scene."
-msgstr "**次级场景**：接着上面的内容，如果我们的 Prompt 很多时，仅使用场景一级分类是不够的。例如，在 chat data 场景中，还可以细分为很多的次级场景：异常识别次级场景、归因分析次级场景等等。次级场景是用于区分每个场景下的子类别。"
+msgstr ""
+"**次级场景**：接着上面的内容，如果我们的 Prompt 很多时，仅使用场景一级分类是不够的。例如，在 chat data "
+"场景中，还可以细分为很多的次级场景：异常识别次级场景、归因分析次级场景等等。次级场景是用于区分每个场景下的子类别。"

-#: ../../modules/prompts.md:23 78544fc530114bb0bc0a753811e9ce58
+#: ../../modules/prompts.md:23 c15d62af27094d14acb6428c0e3e1a1d
 msgid ""
 "**Name**: Considering that a Prompt generally contains a lot of content, "
 "for ease of use and easy search, we need to name the Prompt. Note: The "
 "name of the Prompt is not allowed to be repeated. Name is the unique key "
 "that identifies a Prompt."
-msgstr "**名称**：考虑到每个 Prompt 的内容会非常多，为了方便用户使用和搜索，我们需要给每个 Prompt 命名。注意：Prompt 的名称不允许重复，名称是一个 Prompt 的唯一键。"
+msgstr ""
+"**名称**：考虑到每个 Prompt 的内容会非常多，为了方便用户使用和搜索，我们需要给每个 Prompt 命名。注意：Prompt "
+"的名称不允许重复，名称是一个 Prompt 的唯一键。"

-#: ../../modules/prompts.md:25 f44598d2f2a4429d8c1624feca65c867
+#: ../../modules/prompts.md:25 621fe9c729c94e9bbde637b5a1856284
 msgid "**Content**: Here is the actual Prompt content that will be input to LLM."
 msgstr "**内容**：这里是实际要输入 LLM 的提示内容。"

-#: ../../modules/prompts.md:27 2384fc6006e74e9f94966f6112e6bb06
+#: ../../modules/prompts.md:27 ac2f153f704c4841a044daaf6548262b
 msgid "Edit Prompt"
 msgstr "编辑 Prompt"

-#: ../../modules/prompts.md:29 efe8e16acc584f258645b1b20d891f71
+#: ../../modules/prompts.md:29 3d6238ea482842e0968f691f3fd0c947
 msgid ""
 "Existing Prompts can be edited. Note that except **name**, other items "
 "can be modified."
 msgstr "已有的 Prompts 可以被编辑，除了名称不可修改，其余的内容均可修改。"

-#: ../../modules/prompts.md:33 b1a82b4d88584275955087114cdcf574
+#: ../../modules/prompts.md:33 7cbe985fd9534471bce5f93a93da82fd
 msgid "Delete Prompt"
 msgstr "删除 Prompt"

-#: ../../modules/prompts.md:35 e4e55367b8574324937f8b3006e7d3cd
+#: ../../modules/prompts.md:35 849ab9ef2a2c4a29bb827eb373f37b7d
 msgid ""
 "Ordinary users can only delete Prompts created by themselves in the "
 "private Prompts space. Administrator users can delete Prompts in public "
 "Prompts spaces and private Prompts spaces."
-msgstr "普通用户只能删除他们自己在私有 Prompts 空间中创建的 Prompts，管理员可以删除 公共 Prompts 空间下的 Prompts，也可以删除私有 Prompts 空间下的 Prompts（即使 Prompts 的创建者不是管理员）。"
+msgstr ""
+"普通用户只能删除他们自己在私有 Prompts 空间中创建的 Prompts，管理员可以删除 公共 Prompts 空间下的 "
+"Prompts，也可以删除私有 Prompts 空间下的 Prompts（即使 Prompts 的创建者不是管理员）。"

-#: ../../modules/prompts.md:38 ad8210816c524908bd131ffa9adae07c
+#: ../../modules/prompts.md:38 191921e5664d4326b01f0c45dc88a1e5
 msgid "Use Prompt"
 msgstr "使用 Prompt"

-#: ../../modules/prompts.md:40 e7dd1e88f47647ecbfaf02a0d068b3f9
+#: ../../modules/prompts.md:40 87ad58641f834f30bce178e748d75284
 msgid ""
 "Users can find and use Prompts next to the input boxes in each scene. "
 "Click to view all contents of Prompts library."
 msgstr "用户可以在每个场景中的输入框旁边找到并使用 Prompts。 点击悬浮图标可以查看当前用户能使用的全部 Prompts。"

-#: ../../modules/prompts.md:42 38b6c173235c4f8499da14496a5a78b3
+#: ../../modules/prompts.md:42 60458c7980174c73bc0d56e9e27cd2b3
 msgid ""
 "✓ Hover the mouse over each Prompt to preview the Prompt content.   ✓ "
 "Click Prompt to automatically fill in the Prompt content in the input "
 "box."
-msgstr "✓ 将鼠标悬停在每个 Prompt 上，可预览 Prompt 的内容。  ✓ 单击对应的 Prompt，可自动将 Prompt 的内容填充到输入框中。"
+msgstr ""
+"✓ 将鼠标悬停在每个 Prompt 上，可预览 Prompt 的内容。  ✓ 单击对应的 Prompt，可自动将 Prompt "
+"的内容填充到输入框中。"

--- a/docs/modules/knownledge.md
+++ b/docs/modules/knownledge.md
@@ -1,4 +1,4 @@
-# Knownledge
+# Knowledge

 As the knowledge base is currently the most significant user demand scenario, we natively support the construction and processing of knowledge bases. At the same time, we also provide multiple knowledge base management strategies in this project, such as:
 1. Default built-in knowledge base
--- a/docs/modules/plugins.md
+++ b/docs/modules/plugins.md
@@ -2,6 +2,11 @@

 The ability of Agent and Plugin is the core of whether large models can be automated. In this project, we natively support the plugin mode, and large models can automatically achieve their goals. At the same time, in order to give full play to the advantages of the community, the plugins used in this project natively support the Auto-GPT plugin ecology, that is, Auto-GPT plugins can directly run in our project.

+```{admonition} The LLM (Language Model) suitable for the Plugin scene is 
+* chatgpt3.5.
+* chatgpt4.
+```
+
 ## Local Plugins

 ### 1.1 How to write local plugins.
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -9,9 +9,9 @@ sphinx_book_theme
 sphinx_rtd_theme==1.0.0
 sphinx-typlog-theme==0.8.0
 sphinx-panels
+sphinx-tabs==3.4.0
 toml
 myst_nb
 sphinx_copybutton
 pydata-sphinx-theme==0.13.1
-pydantic-settings
 furo
--- a/pilot/base_modules/agent/db/init.py
+++ b/pilot/base_modules/agent/db/init.py
--- a/pilot/base_modules/agent/plugins_util.py
+++ b/pilot/base_modules/agent/plugins_util.py
@@ -111,7 +111,7 @@ def load_native_plugins(cfg: Config):
                print("save file")
                cfg.set_plugins(scan_plugins(cfg.debug_mode))
            else:
-                print("get file faild，response code：", response.status_code)
+                print("get file failed，response code：", response.status_code)
        except Exception as e:
            print("load plugin from git exception!" + str(e))

--- a/pilot/common/chat_util.py
+++ b/pilot/common/chat_util.py
@@ -0,0 +1,55 @@
+import asyncio
+from typing import Coroutine, List, Any
+
+from starlette.responses import StreamingResponse
+
+from pilot.scene.base_chat import BaseChat
+from pilot.scene.chat_factory import ChatFactory
+
+chat_factory = ChatFactory()
+
+
+async def llm_chat_response_nostream(chat_scene: str, **chat_param):
+    """llm_chat_response_nostream"""
+    chat: BaseChat = chat_factory.get_implementation(chat_scene, **chat_param)
+    res = await chat.get_llm_response()
+    return res
+
+
+async def llm_chat_response(chat_scene: str, **chat_param):
+    chat: BaseChat = chat_factory.get_implementation(chat_scene, **chat_param)
+    return chat.stream_call()
+
+
+def run_async_tasks(
+    tasks: List[Coroutine],
+    show_progress: bool = False,
+    progress_bar_desc: str = "Running async tasks",
+) -> List[Any]:
+    """Run a list of async tasks."""
+
+    tasks_to_execute: List[Any] = tasks
+    if show_progress:
+        try:
+            import nest_asyncio
+            from tqdm.asyncio import tqdm
+
+            nest_asyncio.apply()
+            loop = asyncio.get_event_loop()
+
+            async def _tqdm_gather() -> List[Any]:
+                return await tqdm.gather(*tasks_to_execute, desc=progress_bar_desc)
+
+            tqdm_outputs: List[Any] = loop.run_until_complete(_tqdm_gather())
+            return tqdm_outputs
+        # run the operation w/o tqdm on hitting a fatal
+        # may occur in some environments where tqdm.asyncio
+        # is not supported
+        except Exception:
+            pass
+
+    async def _gather() -> List[Any]:
+        return await asyncio.gather(*tasks_to_execute)
+
+    outputs: List[Any] = asyncio.run(_gather())
+    return outputs
--- a/pilot/component.py
+++ b/pilot/component.py
@@ -46,10 +46,13 @@ class ComponentType(str, Enum):
    WORKER_MANAGER = "dbgpt_worker_manager"
    WORKER_MANAGER_FACTORY = "dbgpt_worker_manager_factory"
    MODEL_CONTROLLER = "dbgpt_model_controller"
+    MODEL_REGISTRY = "dbgpt_model_registry"
+    MODEL_API_SERVER = "dbgpt_model_api_server"
    AGENT_HUB = "dbgpt_agent_hub"
    EXECUTOR_DEFAULT = "dbgpt_thread_pool_default"
    TRACER = "dbgpt_tracer"
    TRACER_SPAN_STORAGE = "dbgpt_tracer_span_storage"
+    RAG_GRAPH_DEFAULT = "dbgpt_rag_engine_default"


 class BaseComponent(LifeCycle, ABC):
@@ -68,7 +71,6 @@ class BaseComponent(LifeCycle, ABC):
        This method needs to be implemented by every component to define how it integrates
        with the main system app.
        """
-        pass


 T = TypeVar("T", bound=BaseComponent)
@@ -90,13 +92,28 @@ class SystemApp(LifeCycle):
        """Returns the internal ASGI app."""
        return self._asgi_app

-    def register(self, component: Type[BaseComponent], *args, **kwargs):
-        """Register a new component by its type."""
+    def register(self, component: Type[BaseComponent], *args, **kwargs) -> T:
+        """Register a new component by its type.
+
+        Args:
+            component (Type[BaseComponent]): The component class to register
+
+        Returns:
+            T: The instance of registered component
+        """
        instance = component(self, *args, **kwargs)
        self.register_instance(instance)
+        return instance

-    def register_instance(self, instance: T):
-        """Register an already initialized component."""
+    def register_instance(self, instance: T) -> T:
+        """Register an already initialized component.
+
+        Args:
+            instance (T): The component instance to register
+
+        Returns:
+            T: The instance of registered component
+        """
        name = instance.name
        if isinstance(name, ComponentType):
            name = name.value
@@ -107,18 +124,34 @@ class SystemApp(LifeCycle):
        logger.info(f"Register component with name {name} and instance: {instance}")
        self.components[name] = instance
        instance.init_app(self)
+        return instance

    def get_component(
        self,
        name: Union[str, ComponentType],
        component_type: Type[T],
        default_component=_EMPTY_DEFAULT_COMPONENT,
+        or_register_component: Type[BaseComponent] = None,
+        *args,
+        **kwargs,
    ) -> T:
-        """Retrieve a registered component by its name and type."""
+        """Retrieve a registered component by its name and type.
+
+        Args:
+            name (Union[str, ComponentType]): Component name
+            component_type (Type[T]): The type of current retrieve component
+            default_component : The default component instance if not retrieve by name
+            or_register_component (Type[BaseComponent]): The new component to register if not retrieve by name
+
+        Returns:
+            T: The instance retrieved by component name
+        """
        if isinstance(name, ComponentType):
            name = name.value
        component = self.components.get(name)
        if not component:
+            if or_register_component:
+                return self.register(or_register_component, *args, **kwargs)
            if default_component != _EMPTY_DEFAULT_COMPONENT:
                return default_component
            raise ValueError(f"No component found with name {name}")
--- a/pilot/configs/config.py
+++ b/pilot/configs/config.py
@@ -194,6 +194,8 @@ class Config(metaclass=Singleton):

        ### LLM Model Service Configuration
        self.LLM_MODEL = os.getenv("LLM_MODEL", "vicuna-13b-v1.5")
+        self.LLM_MODEL_PATH = os.getenv("LLM_MODEL_PATH")
+
        ### Proxy llm backend, this configuration is only valid when "LLM_MODEL=proxyllm"
        ### When we use the rest API provided by deployment frameworks like fastchat as a proxyllm, "PROXYLLM_BACKEND" is the model they actually deploy.
        ### We need to use "PROXYLLM_BACKEND" to load the prompt of the corresponding scene.
--- a/pilot/configs/model_config.py
+++ b/pilot/configs/model_config.py
@@ -78,6 +78,10 @@ LLM_MODEL_CONFIG = {
    "internlm-7b": os.path.join(MODEL_PATH, "internlm-chat-7b"),
    "internlm-7b-8k": os.path.join(MODEL_PATH, "internlm-chat-7b-8k"),
    "internlm-20b": os.path.join(MODEL_PATH, "internlm-chat-20b"),
+    "codellama-7b": os.path.join(MODEL_PATH, "CodeLlama-7b-Instruct-hf"),
+    "codellama-7b-sql-sft": os.path.join(MODEL_PATH, "codellama-7b-sql-sft"),
+    "codellama-13b": os.path.join(MODEL_PATH, "CodeLlama-13b-Instruct-hf"),
+    "codellama-13b-sql-sft": os.path.join(MODEL_PATH, "codellama-13b-sql-sft"),
    # For test now
    "opt-125m": os.path.join(MODEL_PATH, "opt-125m"),
 }
--- a/pilot/connections/rdbms/conn_clickhouse.py
+++ b/pilot/connections/rdbms/conn_clickhouse.py
@@ -106,3 +106,13 @@ class ClickhouseConnect(RDBMSDatabase):
        return [
            (table_comment[0], table_comment[1]) for table_comment in table_comments
        ]
+
+    def table_simple_info(self):
+        # group_concat() not supported in clickhouse, use arrayStringConcat+groupArray instead; and quotes need to be escaped
+        _sql = f"""
+                select concat(TABLE_NAME, \'(\' , arrayStringConcat(groupArray(column_name),\'-\'), \')\') as schema_info 
+                from information_schema.COLUMNS where table_schema=\'{self.get_current_db_name()}\' group by TABLE_NAME; """
+
+        cursor = self.session.execute(text(_sql))
+        results = cursor.fetchall()
+        return results
--- a/pilot/embedding_engine/knowledge_type.py
+++ b/pilot/embedding_engine/knowledge_type.py
@@ -41,7 +41,11 @@ class KnowledgeType(Enum):


 def get_knowledge_embedding(
-    knowledge_type, knowledge_source, vector_store_config, source_reader, text_splitter
+    knowledge_type,
+    knowledge_source,
+    vector_store_config=None,
+    source_reader=None,
+    text_splitter=None,
 ):
    match knowledge_type:
        case KnowledgeType.DOCUMENT.value:
--- a/pilot/embedding_engine/source_embedding.py
+++ b/pilot/embedding_engine/source_embedding.py
@@ -38,11 +38,11 @@ class SourceEmbedding(ABC):
           - embedding_args: Optional
        """
        self.file_path = file_path
-        self.vector_store_config = vector_store_config
+        self.vector_store_config = vector_store_config or {}
        self.source_reader = source_reader or None
        self.text_splitter = text_splitter or None
        self.embedding_args = embedding_args
-        self.embeddings = vector_store_config["embeddings"]
+        self.embeddings = self.vector_store_config.get("embeddings", None)

    @abstractmethod
    @register
--- a/pilot/embedding_engine/url_embedding.py
+++ b/pilot/embedding_engine/url_embedding.py
@@ -33,7 +33,7 @@ class URLEmbedding(SourceEmbedding):
            file_path, vector_store_config, source_reader=None, text_splitter=None
        )
        self.file_path = file_path
-        self.vector_store_config = vector_store_config
+        self.vector_store_config = vector_store_config or None
        self.source_reader = source_reader or None
        self.text_splitter = text_splitter or None

--- a/pilot/graph_engine/init.py
+++ b/pilot/graph_engine/init.py
--- a/pilot/graph_engine/graph_engine.py
+++ b/pilot/graph_engine/graph_engine.py
@@ -0,0 +1,207 @@
+import logging
+from typing import Any, Optional, Callable, Tuple, List
+
+from langchain.schema import Document
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+
+from pilot.embedding_engine import KnowledgeType
+from pilot.embedding_engine.knowledge_type import get_knowledge_embedding
+from pilot.graph_engine.index_struct import KG
+from pilot.graph_engine.node import TextNode
+from pilot.utils import utils
+
+logger = logging.getLogger(__name__)
+
+
+class RAGGraphEngine:
+    """Knowledge RAG Graph Engine.
+    Build a RAG Graph Client can extract triplets and insert into graph store.
+    Args:
+        knowledge_type (Optional[str]): Default: KnowledgeType.DOCUMENT.value
+            extracting triplets.
+        knowledge_source (Optional[str]):
+        model_name (Optional[str]): llm model name
+        graph_store (Optional[GraphStore]): The graph store to use.refrence:llama-index
+        include_embeddings (bool): Whether to include embeddings in the index.
+            Defaults to False.
+        max_object_length (int): The maximum length of the object in a triplet.
+            Defaults to 128.
+        extract_triplet_fn (Optional[Callable]): The function to use for
+            extracting triplets. Defaults to None.
+    """
+
+    index_struct_cls = KG
+
+    def __init__(
+        self,
+        knowledge_type: Optional[str] = KnowledgeType.DOCUMENT.value,
+        knowledge_source: Optional[str] = None,
+        text_splitter=None,
+        graph_store=None,
+        index_struct: Optional[KG] = None,
+        model_name: Optional[str] = None,
+        max_triplets_per_chunk: int = 10,
+        include_embeddings: bool = False,
+        max_object_length: int = 128,
+        extract_triplet_fn: Optional[Callable] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize params."""
+        from llama_index.graph_stores import SimpleGraphStore
+
+        # need to set parameters before building index in base class.
+        self.knowledge_source = knowledge_source
+        self.knowledge_type = knowledge_type
+        self.model_name = model_name
+        self.text_splitter = text_splitter
+        self.index_struct = index_struct
+        self.include_embeddings = include_embeddings
+        self.graph_store = graph_store or SimpleGraphStore()
+        # self.graph_store = graph_store
+        self.max_triplets_per_chunk = max_triplets_per_chunk
+        self._max_object_length = max_object_length
+        self._extract_triplet_fn = extract_triplet_fn
+
+    def knowledge_graph(self, docs=None):
+        """knowledge docs into graph store"""
+        if not docs:
+            if self.text_splitter:
+                self.text_splitter = RecursiveCharacterTextSplitter(
+                    chunk_size=2000, chunk_overlap=100
+                )
+            knowledge_source = get_knowledge_embedding(
+                knowledge_type=self.knowledge_type,
+                knowledge_source=self.knowledge_source,
+                text_splitter=self.text_splitter,
+            )
+            docs = knowledge_source.read()
+        if self.index_struct is None:
+            self.index_struct = self._build_index_from_docs(docs)
+
+    def _extract_triplets(self, text: str) -> List[Tuple[str, str, str]]:
+        """Extract triplets from text by function or llm"""
+        if self._extract_triplet_fn is not None:
+            return self._extract_triplet_fn(text)
+        else:
+            return self._llm_extract_triplets(text)
+
+    def _llm_extract_triplets(self, text: str) -> List[Tuple[str, str, str]]:
+        """Extract triplets from text by llm"""
+        from pilot.scene.base import ChatScene
+        from pilot.common.chat_util import llm_chat_response_nostream
+        import uuid
+
+        chat_param = {
+            "chat_session_id": uuid.uuid1(),
+            "current_user_input": text,
+            "select_param": "triplet",
+            "model_name": self.model_name,
+        }
+        loop = utils.get_or_create_event_loop()
+        triplets = loop.run_until_complete(
+            llm_chat_response_nostream(
+                ChatScene.ExtractTriplet.value(), **{"chat_param": chat_param}
+            )
+        )
+        return triplets
+
+    def _build_index_from_docs(self, documents: List[Document]) -> KG:
+        """Build the index from nodes.
+        Args:documents:List[Document]
+        """
+        index_struct = self.index_struct_cls()
+        triplets = []
+        for doc in documents:
+            trips = self._extract_triplets_task([doc], index_struct)
+            triplets.extend(trips)
+        print(triplets)
+        text_node = TextNode(text=doc.page_content, metadata=doc.metadata)
+        for triplet in triplets:
+            subj, _, obj = triplet
+            self.graph_store.upsert_triplet(*triplet)
+            index_struct.add_node([subj, obj], text_node)
+        return index_struct
+        # num_threads = 5
+        # chunk_size = (
+        #     len(documents)
+        #     if (len(documents) < num_threads)
+        #     else len(documents) // num_threads
+        # )
+        #
+        # import concurrent
+        # triples = []
+        # future_tasks = []
+        # with concurrent.futures.ThreadPoolExecutor() as executor:
+        #     for i in range(num_threads):
+        #         start = i * chunk_size
+        #         end = start + chunk_size if i < num_threads - 1 else None
+        #         # doc = documents[start:end]
+        #         future_tasks.append(
+        #             executor.submit(
+        #                 self._extract_triplets_task,
+        #                 documents[start:end],
+        #                 index_struct,
+        #             )
+        #         )
+        #         # for doc in documents[start:end]:
+        #         #     future_tasks.append(
+        #         #         executor.submit(
+        #         #             self._extract_triplets_task,
+        #         #             doc,
+        #         #             index_struct,
+        #         #         )
+        #         #     )
+        #
+        # # result = [future.result() for future in future_tasks]
+        # completed_futures, _ = concurrent.futures.wait(future_tasks, return_when=concurrent.futures.ALL_COMPLETED)
+        # for future in completed_futures:
+        #     # 获取已完成的future的结果并添加到results列表中
+        #     result = future.result()
+        #     triplets.extend(result)
+        # print(f"total triplets-{triples}")
+        # for triplet in triplets:
+        #     subj, _, obj = triplet
+        #     self.graph_store.upsert_triplet(*triplet)
+        #     # index_struct.add_node([subj, obj], text_node)
+        # return index_struct
+        # for doc in documents:
+        #     triplets = self._extract_triplets(doc.page_content)
+        #     if len(triplets) == 0:
+        #         continue
+        #     text_node = TextNode(text=doc.page_content, metadata=doc.metadata)
+        #     logger.info(f"extracted knowledge triplets: {triplets}")
+        #     for triplet in triplets:
+        #         subj, _, obj = triplet
+        #         self.graph_store.upsert_triplet(*triplet)
+        #         index_struct.add_node([subj, obj], text_node)
+        #
+        # return index_struct
+
+    def search(self, query):
+        from pilot.graph_engine.graph_search import RAGGraphSearch
+
+        graph_search = RAGGraphSearch(graph_engine=self)
+        return graph_search.search(query)
+
+    def _extract_triplets_task(self, docs, index_struct):
+        triple_results = []
+        for doc in docs:
+            import threading
+
+            thread_id = threading.get_ident()
+            print(f"current thread-{thread_id} begin extract triplets task")
+            triplets = self._extract_triplets(doc.page_content)
+            if len(triplets) == 0:
+                triplets = []
+            text_node = TextNode(text=doc.page_content, metadata=doc.metadata)
+            logger.info(f"extracted knowledge triplets: {triplets}")
+            print(
+                f"current thread-{thread_id} end extract triplets tasks, triplets-{triplets}"
+            )
+            triple_results.extend(triplets)
+        return triple_results
+        # for triplet in triplets:
+        #     subj, _, obj = triplet
+        #     self.graph_store.upsert_triplet(*triplet)
+        #     self.graph_store.upsert_triplet(*triplet)
+        #     index_struct.add_node([subj, obj], text_node)
--- a/pilot/graph_engine/graph_factory.py
+++ b/pilot/graph_engine/graph_factory.py
@@ -0,0 +1,34 @@
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from typing import Any, Type
+
+from pilot.component import BaseComponent, ComponentType
+
+
+class RAGGraphFactory(BaseComponent, ABC):
+    name = ComponentType.RAG_GRAPH_DEFAULT.value
+
+    @abstractmethod
+    def create(self, model_name: str = None, embedding_cls: Type = None):
+        """Create RAG Graph Engine"""
+
+
+class DefaultRAGGraphFactory(RAGGraphFactory):
+    def __init__(
+        self, system_app=None, default_model_name: str = None, **kwargs: Any
+    ) -> None:
+        super().__init__(system_app=system_app)
+        self._default_model_name = default_model_name
+        self.kwargs = kwargs
+        from pilot.graph_engine.graph_engine import RAGGraphEngine
+
+        self.rag_engine = RAGGraphEngine(model_name="proxyllm")
+
+    def init_app(self, system_app):
+        pass
+
+    def create(self, model_name: str = None, rag_cls: Type = None):
+        if not model_name:
+            model_name = self._default_model_name
+
+        return self.rag_engine
--- a/pilot/graph_engine/graph_search.py
+++ b/pilot/graph_engine/graph_search.py
@@ -0,0 +1,197 @@
+import logging
+import os
+from collections import defaultdict
+from concurrent.futures import ThreadPoolExecutor
+from typing import List, Optional, Dict, Any, Set, Callable
+
+from langchain.schema import Document
+
+from pilot.graph_engine.node import BaseNode, TextNode, NodeWithScore
+from pilot.graph_engine.search import BaseSearch, SearchMode
+
+logger = logging.getLogger(__name__)
+DEFAULT_NODE_SCORE = 1000.0
+GLOBAL_EXPLORE_NODE_LIMIT = 3
+REL_TEXT_LIMIT = 30
+
+
+class RAGGraphSearch(BaseSearch):
+    """RAG Graph Search.
+
+    args:
+        graph_engine RAGGraphEngine.
+        model_name (str): model name
+            (see :ref:`Prompt-Templates`).
+        text_qa_template (Optional[BasePromptTemplate]): A Question Answering Prompt
+            (see :ref:`Prompt-Templates`).
+        max_keywords_per_query (int): Maximum number of keywords to extract from query.
+        num_chunks_per_query (int): Maximum number of text chunks to query.
+        search_mode (Optional[SearchMode]): Specifies whether to use keyowrds, default SearchMode.KEYWORD
+            embeddings, or both to find relevant triplets. Should be one of "keyword",
+            "embedding", or "hybrid".
+        graph_store_query_depth (int): The depth of the graph store query.
+        extract_subject_entities_fn (Optional[Callback]): extract_subject_entities callback.
+    """
+
+    def __init__(
+        self,
+        graph_engine,
+        model_name: str = None,
+        max_keywords_per_query: int = 10,
+        num_chunks_per_query: int = 10,
+        search_mode: Optional[SearchMode] = SearchMode.KEYWORD,
+        graph_store_query_depth: int = 2,
+        extract_subject_entities_fn: Optional[Callable] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize params."""
+        from pilot.graph_engine.graph_engine import RAGGraphEngine
+
+        self.graph_engine: RAGGraphEngine = graph_engine
+        self.model_name = model_name or self.graph_engine.model_name
+        self._index_struct = self.graph_engine.index_struct
+        self.max_keywords_per_query = max_keywords_per_query
+        self.num_chunks_per_query = num_chunks_per_query
+        self._search_mode = search_mode
+
+        self._graph_store = self.graph_engine.graph_store
+        self.graph_store_query_depth = graph_store_query_depth
+        self._verbose = kwargs.get("verbose", False)
+        refresh_schema = kwargs.get("refresh_schema", False)
+        self.extract_subject_entities_fn = extract_subject_entities_fn
+        self.executor = ThreadPoolExecutor(max_workers=os.cpu_count() * 5)
+        try:
+            self._graph_schema = self._graph_store.get_schema(refresh=refresh_schema)
+        except NotImplementedError:
+            self._graph_schema = ""
+        except Exception as e:
+            logger.warn(f"can not to find graph schema: {e}")
+            self._graph_schema = ""
+
+    async def _extract_subject_entities(self, query_str: str) -> Set[str]:
+        """extract subject entities."""
+        if self.extract_subject_entities_fn is not None:
+            return await self.extract_subject_entities_fn(query_str)
+        else:
+            return await self._extract_entities_by_llm(query_str)
+
+    async def _extract_entities_by_llm(self, text: str) -> Set[str]:
+        """extract subject entities from text by llm"""
+        from pilot.scene.base import ChatScene
+        from pilot.common.chat_util import llm_chat_response_nostream
+        import uuid
+
+        chat_param = {
+            "chat_session_id": uuid.uuid1(),
+            "current_user_input": text,
+            "select_param": "entity",
+            "model_name": self.model_name,
+        }
+        # loop = utils.get_or_create_event_loop()
+        # entities = loop.run_until_complete(
+        #     llm_chat_response_nostream(
+        #         ChatScene.ExtractEntity.value(), **{"chat_param": chat_param}
+        #     )
+        # )
+        return await llm_chat_response_nostream(
+            ChatScene.ExtractEntity.value(), **{"chat_param": chat_param}
+        )
+
+    async def _search(
+        self,
+        query_str: str,
+    ) -> List[Document]:
+        """Get nodes for response."""
+        node_visited = set()
+        keywords = await self._extract_subject_entities(query_str)
+        print(f"extract entities: {keywords}\n")
+        rel_texts = []
+        cur_rel_map = {}
+        chunk_indices_count: Dict[str, int] = defaultdict(int)
+        if self._search_mode != SearchMode.EMBEDDING:
+            for keyword in keywords:
+                keyword = keyword.lower()
+                subjs = set((keyword,))
+                # node_ids = self._index_struct.search_node_by_keyword(keyword)
+                # for node_id in node_ids[:GLOBAL_EXPLORE_NODE_LIMIT]:
+                #     if node_id in node_visited:
+                #         continue
+                #
+                #     # if self._include_text:
+                #     #     chunk_indices_count[node_id] += 1
+                #
+                #     node_visited.add(node_id)
+
+                rel_map = self._graph_store.get_rel_map(
+                    list(subjs), self.graph_store_query_depth
+                )
+                logger.debug(f"rel_map: {rel_map}")
+
+                if not rel_map:
+                    continue
+                rel_texts.extend(
+                    [
+                        str(rel_obj)
+                        for rel_objs in rel_map.values()
+                        for rel_obj in rel_objs
+                    ]
+                )
+                cur_rel_map.update(rel_map)
+
+        sorted_nodes_with_scores = []
+        if not rel_texts:
+            logger.info("> No relationships found, returning nodes found by keywords.")
+            if len(sorted_nodes_with_scores) == 0:
+                logger.info("> No nodes found by keywords, returning empty response.")
+            return [Document(page_content="No relationships found.")]
+
+        # add relationships as Node
+        # TODO: make initial text customizable
+        rel_initial_text = (
+            f"The following are knowledge sequence in max depth"
+            f" {self.graph_store_query_depth} "
+            f"in the form of directed graph like:\n"
+            f"`subject -[predicate]->, object, <-[predicate_next_hop]-,"
+            f" object_next_hop ...`"
+        )
+        rel_info = [rel_initial_text] + rel_texts
+        rel_node_info = {
+            "kg_rel_texts": rel_texts,
+            "kg_rel_map": cur_rel_map,
+        }
+        if self._graph_schema != "":
+            rel_node_info["kg_schema"] = {"schema": self._graph_schema}
+        rel_info_text = "\n".join(
+            [
+                str(item)
+                for sublist in rel_info
+                for item in (sublist if isinstance(sublist, list) else [sublist])
+            ]
+        )
+        if self._verbose:
+            print(f"KG context:\n{rel_info_text}\n", color="blue")
+        rel_text_node = TextNode(
+            text=rel_info_text,
+            metadata=rel_node_info,
+            excluded_embed_metadata_keys=["kg_rel_map", "kg_rel_texts"],
+            excluded_llm_metadata_keys=["kg_rel_map", "kg_rel_texts"],
+        )
+        # this node is constructed from rel_texts, give high confidence to avoid cutoff
+        sorted_nodes_with_scores.append(
+            NodeWithScore(node=rel_text_node, score=DEFAULT_NODE_SCORE)
+        )
+        docs = [
+            Document(page_content=node.text, metadata=node.metadata)
+            for node in sorted_nodes_with_scores
+        ]
+        return docs
+
+    def _get_metadata_for_response(
+        self, nodes: List[BaseNode]
+    ) -> Optional[Dict[str, Any]]:
+        """Get metadata for response."""
+        for node in nodes:
+            if node.metadata is None or "kg_rel_map" not in node.metadata:
+                continue
+            return node.metadata
+        raise ValueError("kg_rel_map must be found in at least one Node.")
--- a/pilot/graph_engine/index_struct.py
+++ b/pilot/graph_engine/index_struct.py
@@ -0,0 +1,259 @@
+"""Data structures.
+
+Nodes are decoupled from the indices.
+
+"""
+
+import uuid
+from abc import abstractmethod
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional, Sequence, Set
+
+from dataclasses_json import DataClassJsonMixin
+
+
+from pilot.graph_engine.index_type import IndexStructType
+from pilot.graph_engine.node import TextNode, BaseNode
+
+# TODO: legacy backport of old Node class
+Node = TextNode
+
+
+@dataclass
+class IndexStruct(DataClassJsonMixin):
+    """A base data struct for a LlamaIndex."""
+
+    index_id: str = field(default_factory=lambda: str(uuid.uuid4()))
+    summary: Optional[str] = None
+
+    def get_summary(self) -> str:
+        """Get text summary."""
+        if self.summary is None:
+            raise ValueError("summary field of the index_struct not set.")
+        return self.summary
+
+    @classmethod
+    @abstractmethod
+    def get_type(cls):
+        """Get index struct type."""
+
+
+@dataclass
+class IndexGraph(IndexStruct):
+    """A graph representing the tree-structured index."""
+
+    # mapping from index in tree to Node doc id.
+    all_nodes: Dict[int, str] = field(default_factory=dict)
+    root_nodes: Dict[int, str] = field(default_factory=dict)
+    node_id_to_children_ids: Dict[str, List[str]] = field(default_factory=dict)
+
+    @property
+    def node_id_to_index(self) -> Dict[str, int]:
+        """Map from node id to index."""
+        return {node_id: index for index, node_id in self.all_nodes.items()}
+
+    @property
+    def size(self) -> int:
+        """Get the size of the graph."""
+        return len(self.all_nodes)
+
+    def get_index(self, node: BaseNode) -> int:
+        """Get index of node."""
+        return self.node_id_to_index[node.node_id]
+
+    def insert(
+        self,
+        node: BaseNode,
+        index: Optional[int] = None,
+        children_nodes: Optional[Sequence[BaseNode]] = None,
+    ) -> None:
+        """Insert node."""
+        index = index or self.size
+        node_id = node.node_id
+
+        self.all_nodes[index] = node_id
+
+        if children_nodes is None:
+            children_nodes = []
+        children_ids = [n.node_id for n in children_nodes]
+        self.node_id_to_children_ids[node_id] = children_ids
+
+    def get_children(self, parent_node: Optional[BaseNode]) -> Dict[int, str]:
+        """Get children nodes."""
+        if parent_node is None:
+            return self.root_nodes
+        else:
+            parent_id = parent_node.node_id
+            children_ids = self.node_id_to_children_ids[parent_id]
+            return {
+                self.node_id_to_index[child_id]: child_id for child_id in children_ids
+            }
+
+    def insert_under_parent(
+        self,
+        node: BaseNode,
+        parent_node: Optional[BaseNode],
+        new_index: Optional[int] = None,
+    ) -> None:
+        """Insert under parent node."""
+        new_index = new_index or self.size
+        if parent_node is None:
+            self.root_nodes[new_index] = node.node_id
+            self.node_id_to_children_ids[node.node_id] = []
+        else:
+            if parent_node.node_id not in self.node_id_to_children_ids:
+                self.node_id_to_children_ids[parent_node.node_id] = []
+            self.node_id_to_children_ids[parent_node.node_id].append(node.node_id)
+
+        self.all_nodes[new_index] = node.node_id
+
+    @classmethod
+    def get_type(cls) -> IndexStructType:
+        """Get type."""
+        return IndexStructType.TREE
+
+
+@dataclass
+class KeywordTable(IndexStruct):
+    """A table of keywords mapping keywords to text chunks."""
+
+    table: Dict[str, Set[str]] = field(default_factory=dict)
+
+    def add_node(self, keywords: List[str], node: BaseNode) -> None:
+        """Add text to table."""
+        for keyword in keywords:
+            if keyword not in self.table:
+                self.table[keyword] = set()
+            self.table[keyword].add(node.node_id)
+
+    @property
+    def node_ids(self) -> Set[str]:
+        """Get all node ids."""
+        return set.union(*self.table.values())
+
+    @property
+    def keywords(self) -> Set[str]:
+        """Get all keywords in the table."""
+        return set(self.table.keys())
+
+    @property
+    def size(self) -> int:
+        """Get the size of the table."""
+        return len(self.table)
+
+    @classmethod
+    def get_type(cls) -> IndexStructType:
+        """Get type."""
+        return IndexStructType.KEYWORD_TABLE
+
+
+@dataclass
+class IndexList(IndexStruct):
+    """A list of documents."""
+
+    nodes: List[str] = field(default_factory=list)
+
+    def add_node(self, node: BaseNode) -> None:
+        """Add text to table, return current position in list."""
+        # don't worry about child indices for now, nodes are all in order
+        self.nodes.append(node.node_id)
+
+    @classmethod
+    def get_type(cls) -> IndexStructType:
+        """Get type."""
+        return IndexStructType.LIST
+
+
+@dataclass
+class IndexDict(IndexStruct):
+    """A simple dictionary of documents."""
+
+    # TODO: slightly deprecated, should likely be a list or set now
+    # mapping from vector store id to node doc_id
+    nodes_dict: Dict[str, str] = field(default_factory=dict)
+
+    # TODO: deprecated, not used
+    # mapping from node doc_id to vector store id
+    doc_id_dict: Dict[str, List[str]] = field(default_factory=dict)
+
+    # TODO: deprecated, not used
+    # this should be empty for all other indices
+    embeddings_dict: Dict[str, List[float]] = field(default_factory=dict)
+
+    def add_node(
+        self,
+        node: BaseNode,
+        text_id: Optional[str] = None,
+    ) -> str:
+        """Add text to table, return current position in list."""
+        # # don't worry about child indices for now, nodes are all in order
+        # self.nodes_dict[int_id] = node
+        vector_id = text_id if text_id is not None else node.node_id
+        self.nodes_dict[vector_id] = node.node_id
+
+        return vector_id
+
+    def delete(self, doc_id: str) -> None:
+        """Delete a Node."""
+        del self.nodes_dict[doc_id]
+
+    @classmethod
+    def get_type(cls) -> IndexStructType:
+        """Get type."""
+        return IndexStructType.VECTOR_STORE
+
+
+@dataclass
+class KG(IndexStruct):
+    """A table of keywords mapping keywords to text chunks."""
+
+    # Unidirectional
+
+    # table of keywords to node ids
+    table: Dict[str, Set[str]] = field(default_factory=dict)
+
+    # TODO: legacy attribute, remove in future releases
+    rel_map: Dict[str, List[List[str]]] = field(default_factory=dict)
+
+    # TBD, should support vector store, now we just persist the embedding memory
+    # maybe chainable abstractions for *_stores could be designed
+    embedding_dict: Dict[str, List[float]] = field(default_factory=dict)
+
+    @property
+    def node_ids(self) -> Set[str]:
+        """Get all node ids."""
+        return set.union(*self.table.values())
+
+    def add_to_embedding_dict(self, triplet_str: str, embedding: List[float]) -> None:
+        """Add embedding to dict."""
+        self.embedding_dict[triplet_str] = embedding
+
+    def add_node(self, keywords: List[str], node: BaseNode) -> None:
+        """Add text to table."""
+        node_id = node.node_id
+        for keyword in keywords:
+            keyword = keyword.lower()
+            if keyword not in self.table:
+                self.table[keyword] = set()
+            self.table[keyword].add(node_id)
+
+    def search_node_by_keyword(self, keyword: str) -> List[str]:
+        """Search for nodes by keyword."""
+        if keyword not in self.table:
+            return []
+        return list(self.table[keyword])
+
+    @classmethod
+    def get_type(cls) -> IndexStructType:
+        """Get type."""
+        return IndexStructType.KG
+
+
+@dataclass
+class EmptyIndexStruct(IndexStruct):
+    """Empty index."""
+
+    @classmethod
+    def get_type(cls) -> IndexStructType:
+        """Get type."""
+        return IndexStructType.EMPTY
--- a/pilot/graph_engine/index_type.py
+++ b/pilot/graph_engine/index_type.py
@@ -0,0 +1,48 @@
+"""IndexStructType class."""
+
+from enum import Enum
+
+
+class IndexStructType(str, Enum):
+    """Index struct type. Identifier for a "type" of index.
+
+    Attributes:
+        TREE ("tree"): Tree index. See :ref:`Ref-Indices-Tree` for tree indices.
+        LIST ("list"): Summary index. See :ref:`Ref-Indices-List` for summary indices.
+        KEYWORD_TABLE ("keyword_table"): Keyword table index. See
+            :ref:`Ref-Indices-Table`
+            for keyword table indices.
+        DICT ("dict"): Faiss Vector Store Index. See
+            :ref:`Ref-Indices-VectorStore`
+            for more information on the faiss vector store index.
+        SIMPLE_DICT ("simple_dict"): Simple Vector Store Index. See
+            :ref:`Ref-Indices-VectorStore`
+            for more information on the simple vector store index.
+        KG ("kg"): Knowledge Graph index.
+            See :ref:`Ref-Indices-Knowledge-Graph` for KG indices.
+        DOCUMENT_SUMMARY ("document_summary"): Document Summary Index.
+            See :ref:`Ref-Indices-Document-Summary` for Summary Indices.
+
+    """
+
+    # TODO: refactor so these are properties on the base class
+
+    NODE = "node"
+    TREE = "tree"
+    LIST = "list"
+    KEYWORD_TABLE = "keyword_table"
+
+    DICT = "dict"
+    # simple
+    SIMPLE_DICT = "simple_dict"
+    # for KG index
+    KG = "kg"
+    SIMPLE_KG = "simple_kg"
+    NEBULAGRAPH = "nebulagraph"
+    FALKORDB = "falkordb"
+
+    # EMPTY
+    EMPTY = "empty"
+    COMPOSITE = "composite"
+
+    DOCUMENT_SUMMARY = "document_summary"
--- a/pilot/graph_engine/kv_index.py
+++ b/pilot/graph_engine/kv_index.py
@@ -0,0 +1,74 @@
+from typing import List, Optional
+from llama_index.data_structs.data_structs import IndexStruct
+from llama_index.storage.index_store.utils import (
+    index_struct_to_json,
+    json_to_index_struct,
+)
+from llama_index.storage.kvstore.types import BaseKVStore
+
+DEFAULT_NAMESPACE = "index_store"
+
+
+class KVIndexStore:
+    """Key-Value Index store.
+
+    Args:
+        kvstore (BaseKVStore): key-value store
+        namespace (str): namespace for the index store
+
+    """
+
+    def __init__(self, kvstore: BaseKVStore, namespace: Optional[str] = None) -> None:
+        """Init a KVIndexStore."""
+        self._kvstore = kvstore
+        self._namespace = namespace or DEFAULT_NAMESPACE
+        self._collection = f"{self._namespace}/data"
+
+    def add_index_struct(self, index_struct: IndexStruct) -> None:
+        """Add an index struct.
+
+        Args:
+            index_struct (IndexStruct): index struct
+
+        """
+        key = index_struct.index_id
+        data = index_struct_to_json(index_struct)
+        self._kvstore.put(key, data, collection=self._collection)
+
+    def delete_index_struct(self, key: str) -> None:
+        """Delete an index struct.
+
+        Args:
+            key (str): index struct key
+
+        """
+        self._kvstore.delete(key, collection=self._collection)
+
+    def get_index_struct(
+        self, struct_id: Optional[str] = None
+    ) -> Optional[IndexStruct]:
+        """Get an index struct.
+
+        Args:
+            struct_id (Optional[str]): index struct id
+
+        """
+        if struct_id is None:
+            structs = self.index_structs()
+            assert len(structs) == 1
+            return structs[0]
+        else:
+            json = self._kvstore.get(struct_id, collection=self._collection)
+            if json is None:
+                return None
+            return json_to_index_struct(json)
+
+    def index_structs(self) -> List[IndexStruct]:
+        """Get all index structs.
+
+        Returns:
+            List[IndexStruct]: index structs
+
+        """
+        jsons = self._kvstore.get_all(collection=self._collection)
+        return [json_to_index_struct(json) for json in jsons.values()]
--- a/pilot/graph_engine/node.py
+++ b/pilot/graph_engine/node.py
@@ -0,0 +1,570 @@
+"""Base schema for data structures."""
+import json
+import textwrap
+import uuid
+from abc import abstractmethod
+from enum import Enum, auto
+from hashlib import sha256
+from typing import Any, Dict, List, Optional, Union
+
+from langchain.schema import Document
+from pydantic import BaseModel, Field, root_validator
+from typing_extensions import Self
+
+
+DEFAULT_TEXT_NODE_TMPL = "{metadata_str}\n\n{content}"
+DEFAULT_METADATA_TMPL = "{key}: {value}"
+# NOTE: for pretty printing
+TRUNCATE_LENGTH = 350
+WRAP_WIDTH = 70
+
+
+class BaseComponent(BaseModel):
+    """Base component object to caputure class names."""
+
+    """reference llama-index"""
+
+    @classmethod
+    @abstractmethod
+    def class_name(cls) -> str:
+        """Get class name."""
+
+    def to_dict(self, **kwargs: Any) -> Dict[str, Any]:
+        data = self.dict(**kwargs)
+        data["class_name"] = self.class_name()
+        return data
+
+    def to_json(self, **kwargs: Any) -> str:
+        data = self.to_dict(**kwargs)
+        return json.dumps(data)
+
+    # TODO: return type here not supported by current mypy version
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any], **kwargs: Any) -> Self:  # type: ignore
+        if isinstance(kwargs, dict):
+            data.update(kwargs)
+
+        data.pop("class_name", None)
+        return cls(**data)
+
+    @classmethod
+    def from_json(cls, data_str: str, **kwargs: Any) -> Self:  # type: ignore
+        data = json.loads(data_str)
+        return cls.from_dict(data, **kwargs)
+
+
+class NodeRelationship(str, Enum):
+    """Node relationships used in `BaseNode` class.
+
+    Attributes:
+        SOURCE: The node is the source document.
+        PREVIOUS: The node is the previous node in the document.
+        NEXT: The node is the next node in the document.
+        PARENT: The node is the parent node in the document.
+        CHILD: The node is a child node in the document.
+
+    """
+
+    SOURCE = auto()
+    PREVIOUS = auto()
+    NEXT = auto()
+    PARENT = auto()
+    CHILD = auto()
+
+
+class ObjectType(str, Enum):
+    TEXT = auto()
+    IMAGE = auto()
+    INDEX = auto()
+    DOCUMENT = auto()
+
+
+class MetadataMode(str, Enum):
+    ALL = auto()
+    EMBED = auto()
+    LLM = auto()
+    NONE = auto()
+
+
+class RelatedNodeInfo(BaseComponent):
+    node_id: str
+    node_type: Optional[ObjectType] = None
+    metadata: Dict[str, Any] = Field(default_factory=dict)
+    hash: Optional[str] = None
+
+    @classmethod
+    def class_name(cls) -> str:
+        """Get class name."""
+        return "RelatedNodeInfo"
+
+
+RelatedNodeType = Union[RelatedNodeInfo, List[RelatedNodeInfo]]
+
+
+# Node classes for indexes
+class BaseNode(BaseComponent):
+    """Base node Object.
+
+    Generic abstract interface for retrievable nodes
+
+    """
+
+    class Config:
+        allow_population_by_field_name = True
+
+    id_: str = Field(
+        default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the node."
+    )
+    embedding: Optional[List[float]] = Field(
+        default=None, description="Embedding of the node."
+    )
+
+    """"
+    metadata fields
+    - injected as part of the text shown to LLMs as context
+    - injected as part of the text for generating embeddings
+    - used by vector DBs for metadata filtering
+
+    """
+    metadata: Dict[str, Any] = Field(
+        default_factory=dict,
+        description="A flat dictionary of metadata fields",
+        alias="extra_info",
+    )
+    excluded_embed_metadata_keys: List[str] = Field(
+        default_factory=list,
+        description="Metadata keys that are exluded from text for the embed model.",
+    )
+    excluded_llm_metadata_keys: List[str] = Field(
+        default_factory=list,
+        description="Metadata keys that are exluded from text for the LLM.",
+    )
+    relationships: Dict[NodeRelationship, RelatedNodeType] = Field(
+        default_factory=dict,
+        description="A mapping of relationships to other node information.",
+    )
+    hash: str = Field(default="", description="Hash of the node content.")
+
+    @classmethod
+    @abstractmethod
+    def get_type(cls) -> str:
+        """Get Object type."""
+
+    @abstractmethod
+    def get_content(self, metadata_mode: MetadataMode = MetadataMode.ALL) -> str:
+        """Get object content."""
+
+    @abstractmethod
+    def get_metadata_str(self, mode: MetadataMode = MetadataMode.ALL) -> str:
+        """Metadata string."""
+
+    @abstractmethod
+    def set_content(self, value: Any) -> None:
+        """Set the content of the node."""
+
+    @property
+    def node_id(self) -> str:
+        return self.id_
+
+    @node_id.setter
+    def node_id(self, value: str) -> None:
+        self.id_ = value
+
+    @property
+    def source_node(self) -> Optional[RelatedNodeInfo]:
+        """Source object node.
+
+        Extracted from the relationships field.
+
+        """
+        if NodeRelationship.SOURCE not in self.relationships:
+            return None
+
+        relation = self.relationships[NodeRelationship.SOURCE]
+        if isinstance(relation, list):
+            raise ValueError("Source object must be a single RelatedNodeInfo object")
+        return relation
+
+    @property
+    def prev_node(self) -> Optional[RelatedNodeInfo]:
+        """Prev node."""
+        if NodeRelationship.PREVIOUS not in self.relationships:
+            return None
+
+        relation = self.relationships[NodeRelationship.PREVIOUS]
+        if not isinstance(relation, RelatedNodeInfo):
+            raise ValueError("Previous object must be a single RelatedNodeInfo object")
+        return relation
+
+    @property
+    def next_node(self) -> Optional[RelatedNodeInfo]:
+        """Next node."""
+        if NodeRelationship.NEXT not in self.relationships:
+            return None
+
+        relation = self.relationships[NodeRelationship.NEXT]
+        if not isinstance(relation, RelatedNodeInfo):
+            raise ValueError("Next object must be a single RelatedNodeInfo object")
+        return relation
+
+    @property
+    def parent_node(self) -> Optional[RelatedNodeInfo]:
+        """Parent node."""
+        if NodeRelationship.PARENT not in self.relationships:
+            return None
+
+        relation = self.relationships[NodeRelationship.PARENT]
+        if not isinstance(relation, RelatedNodeInfo):
+            raise ValueError("Parent object must be a single RelatedNodeInfo object")
+        return relation
+
+    @property
+    def child_nodes(self) -> Optional[List[RelatedNodeInfo]]:
+        """Child nodes."""
+        if NodeRelationship.CHILD not in self.relationships:
+            return None
+
+        relation = self.relationships[NodeRelationship.CHILD]
+        if not isinstance(relation, list):
+            raise ValueError("Child objects must be a list of RelatedNodeInfo objects.")
+        return relation
+
+    @property
+    def ref_doc_id(self) -> Optional[str]:
+        """Deprecated: Get ref doc id."""
+        source_node = self.source_node
+        if source_node is None:
+            return None
+        return source_node.node_id
+
+    @property
+    def extra_info(self) -> Dict[str, Any]:
+        """TODO: DEPRECATED: Extra info."""
+        return self.metadata
+
+    def __str__(self) -> str:
+        source_text_truncated = truncate_text(
+            self.get_content().strip(), TRUNCATE_LENGTH
+        )
+        source_text_wrapped = textwrap.fill(
+            f"Text: {source_text_truncated}\n", width=WRAP_WIDTH
+        )
+        return f"Node ID: {self.node_id}\n{source_text_wrapped}"
+
+    def truncate_text(text: str, max_length: int) -> str:
+        """Truncate text to a maximum length."""
+        if len(text) <= max_length:
+            return text
+        return text[: max_length - 3] + "..."
+
+    def get_embedding(self) -> List[float]:
+        """Get embedding.
+
+        Errors if embedding is None.
+
+        """
+        if self.embedding is None:
+            raise ValueError("embedding not set.")
+        return self.embedding
+
+    def as_related_node_info(self) -> RelatedNodeInfo:
+        """Get node as RelatedNodeInfo."""
+        return RelatedNodeInfo(
+            node_id=self.node_id, metadata=self.metadata, hash=self.hash
+        )
+
+
+class TextNode(BaseNode):
+    text: str = Field(default="", description="Text content of the node.")
+    start_char_idx: Optional[int] = Field(
+        default=None, description="Start char index of the node."
+    )
+    end_char_idx: Optional[int] = Field(
+        default=None, description="End char index of the node."
+    )
+    text_template: str = Field(
+        default=DEFAULT_TEXT_NODE_TMPL,
+        description=(
+            "Template for how text is formatted, with {content} and "
+            "{metadata_str} placeholders."
+        ),
+    )
+    metadata_template: str = Field(
+        default=DEFAULT_METADATA_TMPL,
+        description=(
+            "Template for how metadata is formatted, with {key} and "
+            "{value} placeholders."
+        ),
+    )
+    metadata_seperator: str = Field(
+        default="\n",
+        description="Seperator between metadata fields when converting to string.",
+    )
+
+    @classmethod
+    def class_name(cls) -> str:
+        """Get class name."""
+        return "TextNode"
+
+    @root_validator
+    def _check_hash(cls, values: dict) -> dict:
+        """Generate a hash to represent the node."""
+        text = values.get("text", "")
+        metadata = values.get("metadata", {})
+        doc_identity = str(text) + str(metadata)
+        values["hash"] = str(
+            sha256(doc_identity.encode("utf-8", "surrogatepass")).hexdigest()
+        )
+        return values
+
+    @classmethod
+    def get_type(cls) -> str:
+        """Get Object type."""
+        return ObjectType.TEXT
+
+    def get_content(self, metadata_mode: MetadataMode = MetadataMode.NONE) -> str:
+        """Get object content."""
+        metadata_str = self.get_metadata_str(mode=metadata_mode).strip()
+        if not metadata_str:
+            return self.text
+
+        return self.text_template.format(
+            content=self.text, metadata_str=metadata_str
+        ).strip()
+
+    def get_metadata_str(self, mode: MetadataMode = MetadataMode.ALL) -> str:
+        """metadata info string."""
+        if mode == MetadataMode.NONE:
+            return ""
+
+        usable_metadata_keys = set(self.metadata.keys())
+        if mode == MetadataMode.LLM:
+            for key in self.excluded_llm_metadata_keys:
+                if key in usable_metadata_keys:
+                    usable_metadata_keys.remove(key)
+        elif mode == MetadataMode.EMBED:
+            for key in self.excluded_embed_metadata_keys:
+                if key in usable_metadata_keys:
+                    usable_metadata_keys.remove(key)
+
+        return self.metadata_seperator.join(
+            [
+                self.metadata_template.format(key=key, value=str(value))
+                for key, value in self.metadata.items()
+                if key in usable_metadata_keys
+            ]
+        )
+
+    def set_content(self, value: str) -> None:
+        """Set the content of the node."""
+        self.text = value
+
+    def get_node_info(self) -> Dict[str, Any]:
+        """Get node info."""
+        return {"start": self.start_char_idx, "end": self.end_char_idx}
+
+    def get_text(self) -> str:
+        return self.get_content(metadata_mode=MetadataMode.NONE)
+
+    @property
+    def node_info(self) -> Dict[str, Any]:
+        """Deprecated: Get node info."""
+        return self.get_node_info()
+
+
+# TODO: legacy backport of old Node class
+Node = TextNode
+
+
+class ImageNode(TextNode):
+    """Node with image."""
+
+    # TODO: store reference instead of actual image
+    # base64 encoded image str
+    image: Optional[str] = None
+
+    @classmethod
+    def get_type(cls) -> str:
+        return ObjectType.IMAGE
+
+    @classmethod
+    def class_name(cls) -> str:
+        """Get class name."""
+        return "ImageNode"
+
+
+class IndexNode(TextNode):
+    """Node with reference to any object.
+
+    This can include other indices, query engines, retrievers.
+
+    This can also include other nodes (though this is overlapping with `relationships`
+    on the Node class).
+
+    """
+
+    index_id: str
+
+    @classmethod
+    def from_text_node(
+        cls,
+        node: TextNode,
+        index_id: str,
+    ) -> "IndexNode":
+        """Create index node from text node."""
+        # copy all attributes from text node, add index id
+        return cls(
+            **node.dict(),
+            index_id=index_id,
+        )
+
+    @classmethod
+    def get_type(cls) -> str:
+        return ObjectType.INDEX
+
+    @classmethod
+    def class_name(cls) -> str:
+        """Get class name."""
+        return "IndexNode"
+
+
+class NodeWithScore(BaseComponent):
+    node: BaseNode
+    score: Optional[float] = None
+
+    def __str__(self) -> str:
+        return f"{self.node}\nScore: {self.score: 0.3f}\n"
+
+    def get_score(self, raise_error: bool = False) -> float:
+        """Get score."""
+        if self.score is None:
+            if raise_error:
+                raise ValueError("Score not set.")
+            else:
+                return 0.0
+        else:
+            return self.score
+
+    @classmethod
+    def class_name(cls) -> str:
+        """Get class name."""
+        return "NodeWithScore"
+
+    ##### pass through methods to BaseNode #####
+    @property
+    def node_id(self) -> str:
+        return self.node.node_id
+
+    @property
+    def id_(self) -> str:
+        return self.node.id_
+
+    @property
+    def text(self) -> str:
+        if isinstance(self.node, TextNode):
+            return self.node.text
+        else:
+            raise ValueError("Node must be a TextNode to get text.")
+
+    @property
+    def metadata(self) -> Dict[str, Any]:
+        return self.node.metadata
+
+    @property
+    def embedding(self) -> Optional[List[float]]:
+        return self.node.embedding
+
+    def get_text(self) -> str:
+        if isinstance(self.node, TextNode):
+            return self.node.get_text()
+        else:
+            raise ValueError("Node must be a TextNode to get text.")
+
+    def get_content(self, metadata_mode: MetadataMode = MetadataMode.NONE) -> str:
+        return self.node.get_content(metadata_mode=metadata_mode)
+
+    def get_embedding(self) -> List[float]:
+        return self.node.get_embedding()
+
+
+# Document Classes for Readers
+
+
+class Document(TextNode):
+    """Generic interface for a data document.
+
+    This document connects to data sources.
+
+    """
+
+    # TODO: A lot of backwards compatibility logic here, clean up
+    id_: str = Field(
+        default_factory=lambda: str(uuid.uuid4()),
+        description="Unique ID of the node.",
+        alias="doc_id",
+    )
+
+    _compat_fields = {"doc_id": "id_", "extra_info": "metadata"}
+
+    @classmethod
+    def get_type(cls) -> str:
+        """Get Document type."""
+        return ObjectType.DOCUMENT
+
+    @property
+    def doc_id(self) -> str:
+        """Get document ID."""
+        return self.id_
+
+    def __str__(self) -> str:
+        source_text_truncated = truncate_text(
+            self.get_content().strip(), TRUNCATE_LENGTH
+        )
+        source_text_wrapped = textwrap.fill(
+            f"Text: {source_text_truncated}\n", width=WRAP_WIDTH
+        )
+        return f"Doc ID: {self.doc_id}\n{source_text_wrapped}"
+
+    def get_doc_id(self) -> str:
+        """TODO: Deprecated: Get document ID."""
+        return self.id_
+
+    def __setattr__(self, name: str, value: object) -> None:
+        if name in self._compat_fields:
+            name = self._compat_fields[name]
+        super().__setattr__(name, value)
+
+    def to_langchain_format(self) -> Document:
+        """Convert struct to LangChain document format."""
+        metadata = self.metadata or {}
+        return Document(page_content=self.text, metadata=metadata)
+
+    @classmethod
+    def from_langchain_format(cls, doc: Document) -> "Document":
+        """Convert struct from LangChain document format."""
+        return cls(text=doc.page_content, metadata=doc.metadata)
+
+    @classmethod
+    def example(cls) -> "Document":
+        document = Document(
+            text="",
+            metadata={"filename": "README.md", "category": "codebase"},
+        )
+        return document
+
+    @classmethod
+    def class_name(cls) -> str:
+        """Get class name."""
+        return "Document"
+
+
+class ImageDocument(Document):
+    """Data document containing an image."""
+
+    # base64 encoded image str
+    image: Optional[str] = None
+
+    @classmethod
+    def class_name(cls) -> str:
+        """Get class name."""
+        return "ImageDocument"
--- a/pilot/graph_engine/search.py
+++ b/pilot/graph_engine/search.py
@@ -0,0 +1,44 @@
+from abc import ABC, abstractmethod
+from enum import Enum
+
+
+class SearchMode(str, Enum):
+    """Query mode enum for Knowledge Graphs.
+
+    Can be passed as the enum struct, or as the underlying string.
+
+    Attributes:
+        KEYWORD ("keyword"): Default query mode, using keywords to find triplets.
+        EMBEDDING ("embedding"): Embedding mode, using embeddings to find
+            similar triplets.
+        HYBRID ("hybrid"): Hyrbid mode, combining both keywords and embeddings
+            to find relevant triplets.
+    """
+
+    KEYWORD = "keyword"
+    EMBEDDING = "embedding"
+    HYBRID = "hybrid"
+
+
+class BaseSearch(ABC):
+    """Base Search."""
+
+    async def search(self, query: str):
+        """Retrieve nodes given query.
+
+        Args:
+            query (QueryType): Either a query string or
+                a QueryBundle object.
+
+        """
+        # if isinstance(query, str):
+        return await self._search(query)
+
+    @abstractmethod
+    async def _search(self, query: str):
+        """search nodes given query.
+
+        Implemented by the user.
+
+        """
+        pass
--- a/pilot/model/adapter.py
+++ b/pilot/model/adapter.py
@@ -320,6 +320,19 @@ class Llama2Adapter(BaseLLMAdaper):
        return model, tokenizer


+class CodeLlamaAdapter(BaseLLMAdaper):
+    """The model adapter for codellama"""
+
+    def match(self, model_path: str):
+        return "codellama" in model_path.lower()
+
+    def loader(self, model_path: str, from_pretrained_kwargs: dict):
+        model, tokenizer = super().loader(model_path, from_pretrained_kwargs)
+        model.config.eos_token_id = tokenizer.eos_token_id
+        model.config.pad_token_id = tokenizer.pad_token_id
+        return model, tokenizer
+
+
 class BaichuanAdapter(BaseLLMAdaper):
    """The model adapter for Baichuan models (e.g., baichuan-inc/Baichuan-13B-Chat)"""

@@ -420,6 +433,7 @@ register_llm_model_adapters(FalconAdapater)
 register_llm_model_adapters(GorillaAdapter)
 register_llm_model_adapters(GPT4AllAdapter)
 register_llm_model_adapters(Llama2Adapter)
+register_llm_model_adapters(CodeLlamaAdapter)
 register_llm_model_adapters(BaichuanAdapter)
 register_llm_model_adapters(WizardLMAdapter)
 register_llm_model_adapters(LlamaCppAdapater)
--- a/pilot/model/base.py
+++ b/pilot/model/base.py
@@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-

 from enum import Enum
-from typing import TypedDict, Optional, Dict, List
+from typing import TypedDict, Optional, Dict, List, Any
 from dataclasses import dataclass, asdict
 from datetime import datetime
 from pilot.utils.parameter_utils import ParameterDescription
@@ -52,6 +52,8 @@ class ModelOutput:
    text: str
    error_code: int
    model_context: Dict = None
+    finish_reason: str = None
+    usage: Dict[str, Any] = None

    def to_dict(self) -> Dict:
        return asdict(self)
--- a/pilot/model/cli.py
+++ b/pilot/model/cli.py
@@ -8,6 +8,7 @@ from pilot.configs.model_config import LOGDIR
 from pilot.model.base import WorkerApplyType
 from pilot.model.parameter import (
    ModelControllerParameters,
+    ModelAPIServerParameters,
    ModelWorkerParameters,
    ModelParameters,
    BaseParameters,
@@ -441,15 +442,27 @@ def stop_model_worker(port: int):


@click.command(name="apiserver")
+@EnvArgumentParser.create_click_option(ModelAPIServerParameters)
 def start_apiserver(**kwargs):
-    """Start apiserver(TODO)"""
-    raise NotImplementedError
+    """Start apiserver"""
+
+    if kwargs["daemon"]:
+        log_file = os.path.join(LOGDIR, "model_apiserver_uvicorn.log")
+        _run_current_with_daemon("ModelAPIServer", log_file)
+    else:
+        from pilot.model.cluster import run_apiserver
+
+        run_apiserver()


@click.command(name="apiserver")
-def stop_apiserver(**kwargs):
-    """Start apiserver(TODO)"""
-    raise NotImplementedError
+@add_stop_server_options
+def stop_apiserver(port: int):
+    """Stop apiserver"""
+    name = "ModelAPIServer"
+    if port:
+        name = f"{name}-{port}"
+    _stop_service("apiserver", name, port=port)


 def _stop_all_model_server(**kwargs):
--- a/pilot/model/cluster/init.py
+++ b/pilot/model/cluster/init.py
@@ -21,6 +21,7 @@ from pilot.model.cluster.controller.controller import (
    run_model_controller,
    BaseModelController,
 )
+from pilot.model.cluster.apiserver.api import run_apiserver

 from pilot.model.cluster.worker.remote_manager import RemoteWorkerManager

@@ -40,4 +41,5 @@ __all__ = [
    "ModelRegistryClient",
    "RemoteWorkerManager",
    "run_model_controller",
+    "run_apiserver",
 ]
--- a/pilot/model/cluster/apiserver/init.py
+++ b/pilot/model/cluster/apiserver/init.py
--- a/pilot/model/cluster/apiserver/api.py
+++ b/pilot/model/cluster/apiserver/api.py
@@ -0,0 +1,443 @@
+"""A server that provides OpenAI-compatible RESTful APIs. It supports:
+- Chat Completions. (Reference: https://platform.openai.com/docs/api-reference/chat)
+
+Adapted from https://github.com/lm-sys/FastChat/blob/main/fastchat/serve/openai_api_server.py
+"""
+from typing import Optional, List, Dict, Any, Generator
+
+import logging
+import asyncio
+import shortuuid
+import json
+from fastapi import APIRouter, FastAPI
+from fastapi import Depends, HTTPException
+from fastapi.exceptions import RequestValidationError
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse
+from fastapi.security.http import HTTPAuthorizationCredentials, HTTPBearer
+
+from pydantic import BaseSettings
+
+from fastchat.protocol.openai_api_protocol import (
+    ChatCompletionResponse,
+    ChatCompletionResponseStreamChoice,
+    ChatCompletionStreamResponse,
+    ChatMessage,
+    ChatCompletionResponseChoice,
+    DeltaMessage,
+    EmbeddingsRequest,
+    EmbeddingsResponse,
+    ErrorResponse,
+    ModelCard,
+    ModelList,
+    ModelPermission,
+    UsageInfo,
+)
+from fastchat.protocol.api_protocol import (
+    APIChatCompletionRequest,
+    APITokenCheckRequest,
+    APITokenCheckResponse,
+    APITokenCheckResponseItem,
+)
+from fastchat.serve.openai_api_server import create_error_response, check_requests
+from fastchat.constants import ErrorCode
+
+from pilot.component import BaseComponent, ComponentType, SystemApp
+from pilot.utils.parameter_utils import EnvArgumentParser
+from pilot.scene.base_message import ModelMessage, ModelMessageRoleType
+from pilot.model.base import ModelInstance, ModelOutput
+from pilot.model.parameter import ModelAPIServerParameters, WorkerType
+from pilot.model.cluster import ModelRegistry, ModelRegistryClient
+from pilot.model.cluster.manager_base import WorkerManager, WorkerManagerFactory
+from pilot.utils.utils import setup_logging
+
+logger = logging.getLogger(__name__)
+
+
+class APIServerException(Exception):
+    def __init__(self, code: int, message: str):
+        self.code = code
+        self.message = message
+
+
+class APISettings(BaseSettings):
+    api_keys: Optional[List[str]] = None
+
+
+api_settings = APISettings()
+get_bearer_token = HTTPBearer(auto_error=False)
+
+
+async def check_api_key(
+    auth: Optional[HTTPAuthorizationCredentials] = Depends(get_bearer_token),
+) -> str:
+    if api_settings.api_keys:
+        if auth is None or (token := auth.credentials) not in api_settings.api_keys:
+            raise HTTPException(
+                status_code=401,
+                detail={
+                    "error": {
+                        "message": "",
+                        "type": "invalid_request_error",
+                        "param": None,
+                        "code": "invalid_api_key",
+                    }
+                },
+            )
+        return token
+    else:
+        # api_keys not set; allow all
+        return None
+
+
+class APIServer(BaseComponent):
+    name = ComponentType.MODEL_API_SERVER
+
+    def init_app(self, system_app: SystemApp):
+        self.system_app = system_app
+
+    def get_worker_manager(self) -> WorkerManager:
+        """Get the worker manager component instance
+
+        Raises:
+            APIServerException: If can't get worker manager component instance
+        """
+        worker_manager = self.system_app.get_component(
+            ComponentType.WORKER_MANAGER_FACTORY, WorkerManagerFactory
+        ).create()
+        if not worker_manager:
+            raise APIServerException(
+                ErrorCode.INTERNAL_ERROR,
+                f"Could not get component {ComponentType.WORKER_MANAGER_FACTORY} from system_app",
+            )
+        return worker_manager
+
+    def get_model_registry(self) -> ModelRegistry:
+        """Get the model registry component instance
+
+        Raises:
+            APIServerException: If can't get model registry component instance
+        """
+
+        controller = self.system_app.get_component(
+            ComponentType.MODEL_REGISTRY, ModelRegistry
+        )
+        if not controller:
+            raise APIServerException(
+                ErrorCode.INTERNAL_ERROR,
+                f"Could not get component {ComponentType.MODEL_REGISTRY} from system_app",
+            )
+        return controller
+
+    async def get_model_instances_or_raise(
+        self, model_name: str
+    ) -> List[ModelInstance]:
+        """Get healthy model instances with request model name
+
+        Args:
+            model_name (str): Model name
+
+        Raises:
+            APIServerException: If can't get healthy model instances with request model name
+        """
+        registry = self.get_model_registry()
+        registry_model_name = f"{model_name}@llm"
+        model_instances = await registry.get_all_instances(
+            registry_model_name, healthy_only=True
+        )
+        if not model_instances:
+            all_instances = await registry.get_all_model_instances(healthy_only=True)
+            models = [
+                ins.model_name.split("@llm")[0]
+                for ins in all_instances
+                if ins.model_name.endswith("@llm")
+            ]
+            if models:
+                models = "&&".join(models)
+                message = f"Only {models} allowed now, your model {model_name}"
+            else:
+                message = f"No models allowed now, your model {model_name}"
+            raise APIServerException(ErrorCode.INVALID_MODEL, message)
+        return model_instances
+
+    async def get_available_models(self) -> ModelList:
+        """Return available models
+
+        Just include LLM and embedding models.
+
+        Returns:
+            List[ModelList]: The list of models.
+        """
+        registry = self.get_model_registry()
+        model_instances = await registry.get_all_model_instances(healthy_only=True)
+        model_name_set = set()
+        for inst in model_instances:
+            name, worker_type = WorkerType.parse_worker_key(inst.model_name)
+            if worker_type == WorkerType.LLM or worker_type == WorkerType.TEXT2VEC:
+                model_name_set.add(name)
+        models = list(model_name_set)
+        models.sort()
+        # TODO: return real model permission details
+        model_cards = []
+        for m in models:
+            model_cards.append(
+                ModelCard(
+                    id=m, root=m, owned_by="DB-GPT", permission=[ModelPermission()]
+                )
+            )
+        return ModelList(data=model_cards)
+
+    async def chat_completion_stream_generator(
+        self, model_name: str, params: Dict[str, Any], n: int
+    ) -> Generator[str, Any, None]:
+        """Chat stream completion generator
+
+        Args:
+            model_name (str): Model name
+            params (Dict[str, Any]): The parameters pass to model worker
+            n (int): How many completions to generate for each prompt.
+        """
+        worker_manager = self.get_worker_manager()
+        id = f"chatcmpl-{shortuuid.random()}"
+        finish_stream_events = []
+        for i in range(n):
+            # First chunk with role
+            choice_data = ChatCompletionResponseStreamChoice(
+                index=i,
+                delta=DeltaMessage(role="assistant"),
+                finish_reason=None,
+            )
+            chunk = ChatCompletionStreamResponse(
+                id=id, choices=[choice_data], model=model_name
+            )
+            yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n"
+
+            previous_text = ""
+            async for model_output in worker_manager.generate_stream(params):
+                model_output: ModelOutput = model_output
+                if model_output.error_code != 0:
+                    yield f"data: {json.dumps(model_output.to_dict(), ensure_ascii=False)}\n\n"
+                    yield "data: [DONE]\n\n"
+                    return
+                decoded_unicode = model_output.text.replace("\ufffd", "")
+                delta_text = decoded_unicode[len(previous_text) :]
+                previous_text = (
+                    decoded_unicode
+                    if len(decoded_unicode) > len(previous_text)
+                    else previous_text
+                )
+
+                if len(delta_text) == 0:
+                    delta_text = None
+                choice_data = ChatCompletionResponseStreamChoice(
+                    index=i,
+                    delta=DeltaMessage(content=delta_text),
+                    finish_reason=model_output.finish_reason,
+                )
+                chunk = ChatCompletionStreamResponse(
+                    id=id, choices=[choice_data], model=model_name
+                )
+                if delta_text is None:
+                    if model_output.finish_reason is not None:
+                        finish_stream_events.append(chunk)
+                    continue
+                yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n"
+        # There is not "content" field in the last delta message, so exclude_none to exclude field "content".
+        for finish_chunk in finish_stream_events:
+            yield f"data: {finish_chunk.json(exclude_none=True, ensure_ascii=False)}\n\n"
+        yield "data: [DONE]\n\n"
+
+    async def chat_completion_generate(
+        self, model_name: str, params: Dict[str, Any], n: int
+    ) -> ChatCompletionResponse:
+        """Generate completion
+        Args:
+            model_name (str): Model name
+            params (Dict[str, Any]): The parameters pass to model worker
+            n (int): How many completions to generate for each prompt.
+        """
+        worker_manager: WorkerManager = self.get_worker_manager()
+        choices = []
+        chat_completions = []
+        for i in range(n):
+            model_output = asyncio.create_task(worker_manager.generate(params))
+            chat_completions.append(model_output)
+        try:
+            all_tasks = await asyncio.gather(*chat_completions)
+        except Exception as e:
+            return create_error_response(ErrorCode.INTERNAL_ERROR, str(e))
+        usage = UsageInfo()
+        for i, model_output in enumerate(all_tasks):
+            model_output: ModelOutput = model_output
+            if model_output.error_code != 0:
+                return create_error_response(model_output.error_code, model_output.text)
+            choices.append(
+                ChatCompletionResponseChoice(
+                    index=i,
+                    message=ChatMessage(role="assistant", content=model_output.text),
+                    finish_reason=model_output.finish_reason or "stop",
+                )
+            )
+            if model_output.usage:
+                task_usage = UsageInfo.parse_obj(model_output.usage)
+                for usage_key, usage_value in task_usage.dict().items():
+                    setattr(usage, usage_key, getattr(usage, usage_key) + usage_value)
+
+        return ChatCompletionResponse(model=model_name, choices=choices, usage=usage)
+
+
+def get_api_server() -> APIServer:
+    api_server = global_system_app.get_component(
+        ComponentType.MODEL_API_SERVER, APIServer, default_component=None
+    )
+    if not api_server:
+        global_system_app.register(APIServer)
+    return global_system_app.get_component(ComponentType.MODEL_API_SERVER, APIServer)
+
+
+router = APIRouter()
+
+
+@router.get("/v1/models", dependencies=[Depends(check_api_key)])
+async def get_available_models(api_server: APIServer = Depends(get_api_server)):
+    return await api_server.get_available_models()
+
+
+@router.post("/v1/chat/completions", dependencies=[Depends(check_api_key)])
+async def create_chat_completion(
+    request: APIChatCompletionRequest, api_server: APIServer = Depends(get_api_server)
+):
+    await api_server.get_model_instances_or_raise(request.model)
+    error_check_ret = check_requests(request)
+    if error_check_ret is not None:
+        return error_check_ret
+    params = {
+        "model": request.model,
+        "messages": ModelMessage.to_dict_list(
+            ModelMessage.from_openai_messages(request.messages)
+        ),
+        "echo": False,
+    }
+    if request.temperature:
+        params["temperature"] = request.temperature
+    if request.top_p:
+        params["top_p"] = request.top_p
+    if request.max_tokens:
+        params["max_new_tokens"] = request.max_tokens
+    if request.stop:
+        params["stop"] = request.stop
+    if request.user:
+        params["user"] = request.user
+
+    # TODO check token length
+    if request.stream:
+        generator = api_server.chat_completion_stream_generator(
+            request.model, params, request.n
+        )
+        return StreamingResponse(generator, media_type="text/event-stream")
+    return await api_server.chat_completion_generate(request.model, params, request.n)
+
+
+def _initialize_all(controller_addr: str, system_app: SystemApp):
+    from pilot.model.cluster import RemoteWorkerManager, ModelRegistryClient
+    from pilot.model.cluster.worker.manager import _DefaultWorkerManagerFactory
+
+    if not system_app.get_component(
+        ComponentType.MODEL_REGISTRY, ModelRegistry, default_component=None
+    ):
+        # Register model registry if not exist
+        registry = ModelRegistryClient(controller_addr)
+        registry.name = ComponentType.MODEL_REGISTRY.value
+        system_app.register_instance(registry)
+
+    registry = system_app.get_component(
+        ComponentType.MODEL_REGISTRY, ModelRegistry, default_component=None
+    )
+    worker_manager = RemoteWorkerManager(registry)
+
+    # Register worker manager component if not exist
+    system_app.get_component(
+        ComponentType.WORKER_MANAGER_FACTORY,
+        WorkerManagerFactory,
+        or_register_component=_DefaultWorkerManagerFactory,
+        worker_manager=worker_manager,
+    )
+    # Register api server component if not exist
+    system_app.get_component(
+        ComponentType.MODEL_API_SERVER, APIServer, or_register_component=APIServer
+    )
+
+
+def initialize_apiserver(
+    controller_addr: str,
+    app=None,
+    system_app: SystemApp = None,
+    host: str = None,
+    port: int = None,
+    api_keys: List[str] = None,
+):
+    global global_system_app
+    global api_settings
+    embedded_mod = True
+    if not app:
+        embedded_mod = False
+        app = FastAPI()
+        app.add_middleware(
+            CORSMiddleware,
+            allow_origins=["*"],
+            allow_credentials=True,
+            allow_methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"],
+            allow_headers=["*"],
+        )
+
+    if not system_app:
+        system_app = SystemApp(app)
+    global_system_app = system_app
+
+    if api_keys:
+        api_settings.api_keys = api_keys
+
+    app.include_router(router, prefix="/api", tags=["APIServer"])
+
+    @app.exception_handler(APIServerException)
+    async def validation_apiserver_exception_handler(request, exc: APIServerException):
+        return create_error_response(exc.code, exc.message)
+
+    @app.exception_handler(RequestValidationError)
+    async def validation_exception_handler(request, exc):
+        return create_error_response(ErrorCode.VALIDATION_TYPE_ERROR, str(exc))
+
+    _initialize_all(controller_addr, system_app)
+
+    if not embedded_mod:
+        import uvicorn
+
+        uvicorn.run(app, host=host, port=port, log_level="info")
+
+
+def run_apiserver():
+    parser = EnvArgumentParser()
+    env_prefix = "apiserver_"
+    apiserver_params: ModelAPIServerParameters = parser.parse_args_into_dataclass(
+        ModelAPIServerParameters,
+        env_prefixes=[env_prefix],
+    )
+    setup_logging(
+        "pilot",
+        logging_level=apiserver_params.log_level,
+        logger_filename=apiserver_params.log_file,
+    )
+    api_keys = None
+    if apiserver_params.api_keys:
+        api_keys = apiserver_params.api_keys.strip().split(",")
+
+    initialize_apiserver(
+        apiserver_params.controller_addr,
+        host=apiserver_params.host,
+        port=apiserver_params.port,
+        api_keys=api_keys,
+    )
+
+
+if __name__ == "__main__":
+    run_apiserver()
--- a/pilot/model/cluster/apiserver/tests/init.py
+++ b/pilot/model/cluster/apiserver/tests/init.py
--- a/pilot/model/cluster/apiserver/tests/test_api.py
+++ b/pilot/model/cluster/apiserver/tests/test_api.py
@@ -0,0 +1,248 @@
+import pytest
+import pytest_asyncio
+from aioresponses import aioresponses
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from httpx import AsyncClient, HTTPError
+
+from pilot.component import SystemApp
+from pilot.utils.openai_utils import chat_completion_stream, chat_completion
+
+from pilot.model.cluster.apiserver.api import (
+    api_settings,
+    initialize_apiserver,
+    ModelList,
+    UsageInfo,
+    ChatCompletionResponse,
+    ChatCompletionResponseStreamChoice,
+    ChatCompletionStreamResponse,
+    ChatMessage,
+    ChatCompletionResponseChoice,
+    DeltaMessage,
+)
+from pilot.model.cluster.tests.conftest import _new_cluster
+
+from pilot.model.cluster.worker.manager import _DefaultWorkerManagerFactory
+
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"],
+    allow_headers=["*"],
+)
+
+
+@pytest_asyncio.fixture
+async def system_app():
+    return SystemApp(app)
+
+
+@pytest_asyncio.fixture
+async def client(request, system_app: SystemApp):
+    param = getattr(request, "param", {})
+    api_keys = param.get("api_keys", [])
+    client_api_key = param.get("client_api_key")
+    if "num_workers" not in param:
+        param["num_workers"] = 2
+    if "api_keys" in param:
+        del param["api_keys"]
+    headers = {}
+    if client_api_key:
+        headers["Authorization"] = "Bearer " + client_api_key
+    print(f"param: {param}")
+    if api_settings:
+        # Clear global api keys
+        api_settings.api_keys = []
+    async with AsyncClient(app=app, base_url="http://test", headers=headers) as client:
+        async with _new_cluster(**param) as cluster:
+            worker_manager, model_registry = cluster
+            system_app.register(_DefaultWorkerManagerFactory, worker_manager)
+            system_app.register_instance(model_registry)
+            # print(f"Instances {model_registry.registry}")
+            initialize_apiserver(None, app, system_app, api_keys=api_keys)
+            yield client
+
+
+@pytest.mark.asyncio
+async def test_get_all_models(client: AsyncClient):
+    res = await client.get("/api/v1/models")
+    res.status_code == 200
+    model_lists = ModelList.parse_obj(res.json())
+    print(f"model list json: {res.json()}")
+    assert model_lists.object == "list"
+    assert len(model_lists.data) == 2
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "client, expected_messages",
+    [
+        ({"stream_messags": ["Hello", " world."]}, "Hello world."),
+        ({"stream_messags": ["你好，我是", "张三。"]}, "你好，我是张三。"),
+    ],
+    indirect=["client"],
+)
+async def test_chat_completions(client: AsyncClient, expected_messages):
+    chat_data = {
+        "model": "test-model-name-0",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "stream": True,
+    }
+    full_text = ""
+    async for text in chat_completion_stream(
+        "/api/v1/chat/completions", chat_data, client
+    ):
+        full_text += text
+    assert full_text == expected_messages
+
+    assert (
+        await chat_completion("/api/v1/chat/completions", chat_data, client)
+        == expected_messages
+    )
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "client, expected_messages, client_api_key",
+    [
+        (
+            {"stream_messags": ["Hello", " world."], "api_keys": ["abc"]},
+            "Hello world.",
+            "abc",
+        ),
+        ({"stream_messags": ["你好，我是", "张三。"], "api_keys": ["abc"]}, "你好，我是张三。", "abc"),
+    ],
+    indirect=["client"],
+)
+async def test_chat_completions_with_openai_lib_async_no_stream(
+    client: AsyncClient, expected_messages: str, client_api_key: str
+):
+    import openai
+
+    openai.api_key = client_api_key
+    openai.api_base = "http://test/api/v1"
+
+    model_name = "test-model-name-0"
+
+    with aioresponses() as mocked:
+        mock_message = {"text": expected_messages}
+        one_res = ChatCompletionResponseChoice(
+            index=0,
+            message=ChatMessage(role="assistant", content=expected_messages),
+            finish_reason="stop",
+        )
+        data = ChatCompletionResponse(
+            model=model_name, choices=[one_res], usage=UsageInfo()
+        )
+        mock_message = f"{data.json(exclude_unset=True, ensure_ascii=False)}\n\n"
+        # Mock http request
+        mocked.post(
+            "http://test/api/v1/chat/completions", status=200, body=mock_message
+        )
+        completion = await openai.ChatCompletion.acreate(
+            model=model_name,
+            messages=[{"role": "user", "content": "Hello! What is your name?"}],
+        )
+        assert completion.choices[0].message.content == expected_messages
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "client, expected_messages, client_api_key",
+    [
+        (
+            {"stream_messags": ["Hello", " world."], "api_keys": ["abc"]},
+            "Hello world.",
+            "abc",
+        ),
+        ({"stream_messags": ["你好，我是", "张三。"], "api_keys": ["abc"]}, "你好，我是张三。", "abc"),
+    ],
+    indirect=["client"],
+)
+async def test_chat_completions_with_openai_lib_async_stream(
+    client: AsyncClient, expected_messages: str, client_api_key: str
+):
+    import openai
+
+    openai.api_key = client_api_key
+    openai.api_base = "http://test/api/v1"
+
+    model_name = "test-model-name-0"
+
+    with aioresponses() as mocked:
+        mock_message = {"text": expected_messages}
+        choice_data = ChatCompletionResponseStreamChoice(
+            index=0,
+            delta=DeltaMessage(content=expected_messages),
+            finish_reason="stop",
+        )
+        chunk = ChatCompletionStreamResponse(
+            id=0, choices=[choice_data], model=model_name
+        )
+        mock_message = f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n"
+        mocked.post(
+            "http://test/api/v1/chat/completions",
+            status=200,
+            body=mock_message,
+            content_type="text/event-stream",
+        )
+
+        stream_stream_resp = ""
+        async for stream_resp in await openai.ChatCompletion.acreate(
+            model=model_name,
+            messages=[{"role": "user", "content": "Hello! What is your name?"}],
+            stream=True,
+        ):
+            stream_stream_resp = stream_resp.choices[0]["delta"].get("content", "")
+        assert stream_stream_resp == expected_messages
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "client, expected_messages, api_key_is_error",
+    [
+        (
+            {
+                "stream_messags": ["Hello", " world."],
+                "api_keys": ["abc", "xx"],
+                "client_api_key": "abc",
+            },
+            "Hello world.",
+            False,
+        ),
+        ({"stream_messags": ["你好，我是", "张三。"]}, "你好，我是张三。", False),
+        (
+            {"stream_messags": ["你好，我是", "张三。"], "api_keys": ["abc", "xx"]},
+            "你好，我是张三。",
+            True,
+        ),
+        (
+            {
+                "stream_messags": ["你好，我是", "张三。"],
+                "api_keys": ["abc", "xx"],
+                "client_api_key": "error_api_key",
+            },
+            "你好，我是张三。",
+            True,
+        ),
+    ],
+    indirect=["client"],
+)
+async def test_chat_completions_with_api_keys(
+    client: AsyncClient, expected_messages: str, api_key_is_error: bool
+):
+    chat_data = {
+        "model": "test-model-name-0",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "stream": True,
+    }
+    if api_key_is_error:
+        with pytest.raises(HTTPError):
+            await chat_completion("/api/v1/chat/completions", chat_data, client)
+    else:
+        assert (
+            await chat_completion("/api/v1/chat/completions", chat_data, client)
+            == expected_messages
+        )
--- a/pilot/model/cluster/controller/controller.py
+++ b/pilot/model/cluster/controller/controller.py
@@ -13,7 +13,7 @@ from pilot.utils.api_utils import (
    _api_remote as api_remote,
    _sync_api_remote as sync_api_remote,
 )
-from pilot.utils.utils import setup_logging
+from pilot.utils.utils import setup_logging, setup_http_service_logging

 logger = logging.getLogger(__name__)

@@ -66,7 +66,9 @@ class LocalModelController(BaseModelController):
            f"Get all instances with {model_name}, healthy_only: {healthy_only}"
        )
        if not model_name:
-            return await self.registry.get_all_model_instances()
+            return await self.registry.get_all_model_instances(
+                healthy_only=healthy_only
+            )
        else:
            return await self.registry.get_all_instances(model_name, healthy_only)

@@ -98,8 +100,10 @@ class _RemoteModelController(BaseModelController):


 class ModelRegistryClient(_RemoteModelController, ModelRegistry):
-    async def get_all_model_instances(self) -> List[ModelInstance]:
-        return await self.get_all_instances()
+    async def get_all_model_instances(
+        self, healthy_only: bool = False
+    ) -> List[ModelInstance]:
+        return await self.get_all_instances(healthy_only=healthy_only)

    @sync_api_remote(path="/api/controller/models")
    def sync_get_all_instances(
@@ -149,6 +153,7 @@ def initialize_controller(
    else:
        import uvicorn

+        setup_http_service_logging()
        app = FastAPI()
        app.include_router(router, prefix="/api", tags=["Model"])
        uvicorn.run(app, host=host, port=port, log_level="info")
@@ -179,7 +184,8 @@ def run_model_controller():
    parser = EnvArgumentParser()
    env_prefix = "controller_"
    controller_params: ModelControllerParameters = parser.parse_args_into_dataclass(
-        ModelControllerParameters, env_prefix=env_prefix
+        ModelControllerParameters,
+        env_prefixes=[env_prefix],
    )

    setup_logging(
--- a/pilot/model/cluster/registry.py
+++ b/pilot/model/cluster/registry.py
@@ -1,22 +1,37 @@
 import random
 import threading
 import time
+import logging
 from abc import ABC, abstractmethod
 from collections import defaultdict
 from datetime import datetime, timedelta
-from typing import Dict, List, Tuple
+from typing import Dict, List, Optional, Tuple
 import itertools

+from pilot.component import BaseComponent, ComponentType, SystemApp
 from pilot.model.base import ModelInstance


-class ModelRegistry(ABC):
+logger = logging.getLogger(__name__)
+
+
+class ModelRegistry(BaseComponent, ABC):
    """
    Abstract base class for a model registry. It provides an interface
    for registering, deregistering, fetching instances, and sending heartbeats
    for instances.
    """

+    name = ComponentType.MODEL_REGISTRY
+
+    def __init__(self, system_app: SystemApp | None = None):
+        self.system_app = system_app
+        super().__init__(system_app)
+
+    def init_app(self, system_app: SystemApp):
+        """Initialize the component with the main application."""
+        self.system_app = system_app
+
    @abstractmethod
    async def register_instance(self, instance: ModelInstance) -> bool:
        """
@@ -65,9 +80,11 @@ class ModelRegistry(ABC):
        """Fetch all instances of a given model. Optionally, fetch only the healthy instances."""

    @abstractmethod
-    async def get_all_model_instances(self) -> List[ModelInstance]:
+    async def get_all_model_instances(
+        self, healthy_only: bool = False
+    ) -> List[ModelInstance]:
        """
-        Fetch all instances of all models
+        Fetch all instances of all models, Optionally, fetch only the healthy instances.

        Returns:
        - List[ModelInstance]: A list of instances for the all models.
@@ -105,8 +122,12 @@ class ModelRegistry(ABC):

 class EmbeddedModelRegistry(ModelRegistry):
    def __init__(
-        self, heartbeat_interval_secs: int = 60, heartbeat_timeout_secs: int = 120
+        self,
+        system_app: SystemApp | None = None,
+        heartbeat_interval_secs: int = 60,
+        heartbeat_timeout_secs: int = 120,
    ):
+        super().__init__(system_app)
        self.registry: Dict[str, List[ModelInstance]] = defaultdict(list)
        self.heartbeat_interval_secs = heartbeat_interval_secs
        self.heartbeat_timeout_secs = heartbeat_timeout_secs
@@ -180,9 +201,14 @@ class EmbeddedModelRegistry(ModelRegistry):
            instances = [ins for ins in instances if ins.healthy == True]
        return instances

-    async def get_all_model_instances(self) -> List[ModelInstance]:
-        print(self.registry)
-        return list(itertools.chain(*self.registry.values()))
+    async def get_all_model_instances(
+        self, healthy_only: bool = False
+    ) -> List[ModelInstance]:
+        logger.debug("Current registry metadata:\n{self.registry}")
+        instances = list(itertools.chain(*self.registry.values()))
+        if healthy_only:
+            instances = [ins for ins in instances if ins.healthy == True]
+        return instances

    async def send_heartbeat(self, instance: ModelInstance) -> bool:
        _, exist_ins = self._get_instances(
--- a/pilot/model/cluster/tests/init.py
+++ b/pilot/model/cluster/tests/init.py
--- a/pilot/model/cluster/worker/tests/base_tests.py
+++ b/pilot/model/cluster/worker/tests/base_tests.py
@@ -6,6 +6,7 @@ from pilot.model.parameter import ModelParameters, ModelWorkerParameters, Worker
 from pilot.model.base import ModelOutput
 from pilot.model.cluster.worker_base import ModelWorker
 from pilot.model.cluster.worker.manager import (
+    WorkerManager,
    LocalWorkerManager,
    RegisterFunc,
    DeregisterFunc,
@@ -13,6 +14,23 @@ from pilot.model.cluster.worker.manager import (
    ApplyFunction,
 )

+from pilot.model.base import ModelInstance
+from pilot.model.cluster.registry import ModelRegistry, EmbeddedModelRegistry
+
+
+@pytest.fixture
+def model_registry(request):
+    return EmbeddedModelRegistry()
+
+
+@pytest.fixture
+def model_instance():
+    return ModelInstance(
+        model_name="test_model",
+        host="192.168.1.1",
+        port=5000,
+    )
+

 class MockModelWorker(ModelWorker):
    def __init__(
@@ -51,8 +69,10 @@ class MockModelWorker(ModelWorker):
            raise Exception("Stop worker error for mock")

    def generate_stream(self, params: Dict) -> Iterator[ModelOutput]:
+        full_text = ""
        for msg in self.stream_messags:
-            yield ModelOutput(text=msg, error_code=0)
+            full_text += msg
+            yield ModelOutput(text=full_text, error_code=0)

    def generate(self, params: Dict) -> ModelOutput:
        output = None
@@ -67,6 +87,8 @@ class MockModelWorker(ModelWorker):
 _TEST_MODEL_NAME = "vicuna-13b-v1.5"
 _TEST_MODEL_PATH = "/app/models/vicuna-13b-v1.5"

+ClusterType = Tuple[WorkerManager, ModelRegistry]
+

 def _new_worker_params(
    model_name: str = _TEST_MODEL_NAME,
@@ -85,7 +107,9 @@ def _create_workers(
    worker_type: str = WorkerType.LLM.value,
    stream_messags: List[str] = None,
    embeddings: List[List[float]] = None,
-) -> List[Tuple[ModelWorker, ModelWorkerParameters]]:
+    host: str = "127.0.0.1",
+    start_port=8001,
+) -> List[Tuple[ModelWorker, ModelWorkerParameters, ModelInstance]]:
    workers = []
    for i in range(num_workers):
        model_name = f"test-model-name-{i}"
@@ -98,10 +122,16 @@ def _create_workers(
            stream_messags=stream_messags,
            embeddings=embeddings,
        )
+        model_instance = ModelInstance(
+            model_name=WorkerType.to_worker_key(model_name, worker_type),
+            host=host,
+            port=start_port + i,
+            healthy=True,
+        )
        worker_params = _new_worker_params(
            model_name, model_path, worker_type=worker_type
        )
-        workers.append((worker, worker_params))
+        workers.append((worker, worker_params, model_instance))
    return workers


@@ -127,12 +157,12 @@ async def _start_worker_manager(**kwargs):
        model_registry=model_registry,
    )

-    for worker, worker_params in _create_workers(
+    for worker, worker_params, model_instance in _create_workers(
        num_workers, error_worker, stop_error, stream_messags, embeddings
    ):
        worker_manager.add_worker(worker, worker_params)
    if workers:
-        for worker, worker_params in workers:
+        for worker, worker_params, model_instance in workers:
            worker_manager.add_worker(worker, worker_params)

    if start:
@@ -143,6 +173,15 @@ async def _start_worker_manager(**kwargs):
        await worker_manager.stop()


+async def _create_model_registry(
+    workers: List[Tuple[ModelWorker, ModelWorkerParameters, ModelInstance]]
+) -> ModelRegistry:
+    registry = EmbeddedModelRegistry()
+    for _, _, inst in workers:
+        assert await registry.register_instance(inst) == True
+    return registry
+
+
@pytest_asyncio.fixture
 async def manager_2_workers(request):
    param = getattr(request, "param", {})
@@ -166,3 +205,27 @@ async def manager_2_embedding_workers(request):
    )
    async with _start_worker_manager(workers=workers, **param) as worker_manager:
        yield (worker_manager, workers)
+
+
+@asynccontextmanager
+async def _new_cluster(**kwargs) -> ClusterType:
+    num_workers = kwargs.get("num_workers", 0)
+    workers = _create_workers(
+        num_workers, stream_messags=kwargs.get("stream_messags", [])
+    )
+    if "num_workers" in kwargs:
+        del kwargs["num_workers"]
+    registry = await _create_model_registry(
+        workers,
+    )
+    async with _start_worker_manager(workers=workers, **kwargs) as worker_manager:
+        yield (worker_manager, registry)
+
+
+@pytest_asyncio.fixture
+async def cluster_2_workers(request):
+    param = getattr(request, "param", {})
+    workers = _create_workers(2)
+    registry = await _create_model_registry(workers)
+    async with _start_worker_manager(workers=workers, **param) as worker_manager:
+        yield (worker_manager, registry)
--- a/pilot/model/cluster/worker/default_worker.py
+++ b/pilot/model/cluster/worker/default_worker.py
@@ -76,7 +76,7 @@ class DefaultModelWorker(ModelWorker):
        model_type = self.llm_adapter.model_type()
        model_params: ModelParameters = model_args.parse_args_into_dataclass(
            param_cls,
-            env_prefix=env_prefix,
+            env_prefixes=[env_prefix, "LLM_"],
            command_args=command_args,
            model_name=self.model_name,
            model_path=self.model_path,
@@ -256,15 +256,22 @@ class DefaultModelWorker(ModelWorker):
        return params, model_context, generate_stream_func, model_span

    def _handle_output(self, output, previous_response, model_context):
+        finish_reason = None
+        usage = None
        if isinstance(output, dict):
            finish_reason = output.get("finish_reason")
+            usage = output.get("usage")
            output = output["text"]
            if finish_reason is not None:
                logger.info(f"finish_reason: {finish_reason}")
        incremental_output = output[len(previous_response) :]
        print(incremental_output, end="", flush=True)
        model_output = ModelOutput(
-            text=output, error_code=0, model_context=model_context
+            text=output,
+            error_code=0,
+            model_context=model_context,
+            finish_reason=finish_reason,
+            usage=usage,
        )
        return model_output, incremental_output, output

--- a/pilot/model/cluster/worker/embedding_worker.py
+++ b/pilot/model/cluster/worker/embedding_worker.py
@@ -106,7 +106,7 @@ def _parse_embedding_params(
    env_prefix = EnvArgumentParser.get_env_prefix(model_name)
    model_params: BaseEmbeddingModelParameters = model_args.parse_args_into_dataclass(
        param_cls,
-        env_prefix=env_prefix,
+        env_prefixes=[env_prefix],
        command_args=command_args,
        model_name=model_name,
        model_path=model_path,
--- a/pilot/model/cluster/worker/manager.py
+++ b/pilot/model/cluster/worker/manager.py
@@ -38,7 +38,7 @@ from pilot.utils.parameter_utils import (
    _dict_to_command_args,
    _get_dict_from_obj,
 )
-from pilot.utils.utils import setup_logging
+from pilot.utils.utils import setup_logging, setup_http_service_logging
 from pilot.utils.tracer import initialize_tracer, root_tracer, SpanType, SpanTypeRunName
 from pilot.utils.system_utils import get_system_info

@@ -99,9 +99,7 @@ class LocalWorkerManager(WorkerManager):
        )

    def _worker_key(self, worker_type: str, model_name: str) -> str:
-        if isinstance(worker_type, WorkerType):
-            worker_type = worker_type.value
-        return f"{model_name}@{worker_type}"
+        return WorkerType.to_worker_key(model_name, worker_type)

    async def run_blocking_func(self, func, *args):
        if asyncio.iscoroutinefunction(func):
@@ -735,6 +733,8 @@ def _setup_fastapi(
 ):
    if not app:
        app = FastAPI()
+        setup_http_service_logging()
+
    if worker_params.standalone:
        from pilot.model.cluster.controller.controller import initialize_controller
        from pilot.model.cluster.controller.controller import (
@@ -781,7 +781,7 @@ def _parse_worker_params(
        env_prefix = EnvArgumentParser.get_env_prefix(model_name)
    worker_params: ModelWorkerParameters = worker_args.parse_args_into_dataclass(
        ModelWorkerParameters,
-        env_prefix=env_prefix,
+        env_prefixes=[env_prefix],
        model_name=model_name,
        model_path=model_path,
        **kwargs,
@@ -790,7 +790,7 @@ def _parse_worker_params(
    # Read parameters agein with prefix of model name.
    new_worker_params = worker_args.parse_args_into_dataclass(
        ModelWorkerParameters,
-        env_prefix=env_prefix,
+        env_prefixes=[env_prefix],
        model_name=worker_params.model_name,
        model_path=worker_params.model_path,
        **kwargs,
@@ -1021,6 +1021,7 @@ def run_worker_manager(
        system_app,
        os.path.join(LOGDIR, worker_params.tracer_file),
        root_operation_name="DB-GPT-WorkerManager-Entry",
+        tracer_storage_cls=worker_params.tracer_storage_cls,
    )

    _start_local_worker(worker_manager, worker_params)
--- a/pilot/model/cluster/worker/remote_worker.py
+++ b/pilot/model/cluster/worker/remote_worker.py
@@ -13,7 +13,7 @@ class RemoteModelWorker(ModelWorker):
    def __init__(self) -> None:
        self.headers = {}
        # TODO Configured by ModelParameters
-        self.timeout = 180
+        self.timeout = 360
        self.host = None
        self.port = None

--- a/pilot/model/cluster/worker/tests/test_manager.py
+++ b/pilot/model/cluster/worker/tests/test_manager.py
@@ -3,7 +3,7 @@ import pytest
 from typing import List, Iterator, Dict, Tuple
 from dataclasses import asdict
 from pilot.model.parameter import ModelParameters, ModelWorkerParameters, WorkerType
-from pilot.model.base import ModelOutput, WorkerApplyType
+from pilot.model.base import ModelOutput, WorkerApplyType, ModelInstance
 from pilot.model.cluster.base import WorkerApplyRequest, WorkerStartupRequest
 from pilot.model.cluster.worker_base import ModelWorker
 from pilot.model.cluster.manager_base import WorkerRunData
@@ -14,7 +14,7 @@ from pilot.model.cluster.worker.manager import (
    SendHeartbeatFunc,
    ApplyFunction,
 )
-from pilot.model.cluster.worker.tests.base_tests import (
+from pilot.model.cluster.tests.conftest import (
    MockModelWorker,
    manager_2_workers,
    manager_with_2_workers,
@@ -216,7 +216,7 @@ async def test__remove_worker():
    workers = _create_workers(3)
    async with _start_worker_manager(workers=workers, stop=False) as manager:
        assert len(manager.workers) == 3
-        for _, worker_params in workers:
+        for _, worker_params, _ in workers:
            manager._remove_worker(worker_params)
        not_exist_parmas = _new_worker_params(
            model_name="this is a not exist worker params"
@@ -229,7 +229,7 @@ async def test__remove_worker():
 async def test_model_startup(mock_build_worker):
    async with _start_worker_manager() as manager:
        workers = _create_workers(1)
-        worker, worker_params = workers[0]
+        worker, worker_params, model_instance = workers[0]
        mock_build_worker.return_value = worker

        req = WorkerStartupRequest(
@@ -245,7 +245,7 @@ async def test_model_startup(mock_build_worker):

    async with _start_worker_manager() as manager:
        workers = _create_workers(1, error_worker=True)
-        worker, worker_params = workers[0]
+        worker, worker_params, model_instance = workers[0]
        mock_build_worker.return_value = worker
        req = WorkerStartupRequest(
            host="127.0.0.1",
@@ -263,7 +263,7 @@ async def test_model_startup(mock_build_worker):
 async def test_model_shutdown(mock_build_worker):
    async with _start_worker_manager(start=False, stop=False) as manager:
        workers = _create_workers(1)
-        worker, worker_params = workers[0]
+        worker, worker_params, model_instance = workers[0]
        mock_build_worker.return_value = worker

        req = WorkerStartupRequest(
@@ -298,7 +298,7 @@ async def test_get_model_instances(is_async):
    workers = _create_workers(3)
    async with _start_worker_manager(workers=workers, stop=False) as manager:
        assert len(manager.workers) == 3
-        for _, worker_params in workers:
+        for _, worker_params, _ in workers:
            model_name = worker_params.model_name
            worker_type = worker_params.worker_type
            if is_async:
@@ -326,7 +326,7 @@ async def test__simple_select(
    ]
 ):
    manager, workers = manager_with_2_workers
-    for _, worker_params in workers:
+    for _, worker_params, _ in workers:
        model_name = worker_params.model_name
        worker_type = worker_params.worker_type
        instances = await manager.get_model_instances(worker_type, model_name)
@@ -351,7 +351,7 @@ async def test_select_one_instance(
    ],
 ):
    manager, workers = manager_with_2_workers
-    for _, worker_params in workers:
+    for _, worker_params, _ in workers:
        model_name = worker_params.model_name
        worker_type = worker_params.worker_type
        if is_async:
@@ -376,7 +376,7 @@ async def test__get_model(
    ],
 ):
    manager, workers = manager_with_2_workers
-    for _, worker_params in workers:
+    for _, worker_params, _ in workers:
        model_name = worker_params.model_name
        worker_type = worker_params.worker_type
        params = {"model": model_name}
@@ -403,13 +403,13 @@ async def test_generate_stream(
    expected_messages: str,
 ):
    manager, workers = manager_with_2_workers
-    for _, worker_params in workers:
+    for _, worker_params, _ in workers:
        model_name = worker_params.model_name
        worker_type = worker_params.worker_type
        params = {"model": model_name}
        text = ""
        async for out in manager.generate_stream(params):
-            text += out.text
+            text = out.text
        assert text == expected_messages


@@ -417,8 +417,8 @@ async def test_generate_stream(
@pytest.mark.parametrize(
    "manager_with_2_workers, expected_messages",
    [
-        ({"stream_messags": ["Hello", " world."]}, " world."),
-        ({"stream_messags": ["你好，我是", "张三。"]}, "张三。"),
+        ({"stream_messags": ["Hello", " world."]}, "Hello world."),
+        ({"stream_messags": ["你好，我是", "张三。"]}, "你好，我是张三。"),
    ],
    indirect=["manager_with_2_workers"],
 )
@@ -429,7 +429,7 @@ async def test_generate(
    expected_messages: str,
 ):
    manager, workers = manager_with_2_workers
-    for _, worker_params in workers:
+    for _, worker_params, _ in workers:
        model_name = worker_params.model_name
        worker_type = worker_params.worker_type
        params = {"model": model_name}
@@ -454,7 +454,7 @@ async def test_embeddings(
    is_async: bool,
 ):
    manager, workers = manager_2_embedding_workers
-    for _, worker_params in workers:
+    for _, worker_params, _ in workers:
        model_name = worker_params.model_name
        worker_type = worker_params.worker_type
        params = {"model": model_name, "input": ["hello", "world"]}
@@ -472,7 +472,7 @@ async def test_parameter_descriptions(
    ]
 ):
    manager, workers = manager_with_2_workers
-    for _, worker_params in workers:
+    for _, worker_params, _ in workers:
        model_name = worker_params.model_name
        worker_type = worker_params.worker_type
        params = await manager.parameter_descriptions(worker_type, model_name)
--- a/pilot/model/conversation.py
+++ b/pilot/model/conversation.py
@@ -339,6 +339,27 @@ register_conv_template(
    )
 )

+
+# codellama template
+# reference: https://github.com/facebookresearch/llama/blob/cfc3fc8c1968d390eb830e65c63865e980873a06/llama/generation.py#L212
+# reference2 : https://github.com/eosphoros-ai/DB-GPT-Hub/blob/main/README.zh.md
+register_conv_template(
+    Conversation(
+        name="codellama",
+        system="<s>[INST] <<SYS>>\nI want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request."
+        "If you don't know the answer to the request, please don't share false information.\n<</SYS>>\n\n",
+        roles=("[INST]", "[/INST]"),
+        messages=(),
+        offset=0,
+        sep_style=SeparatorStyle.LLAMA2,
+        sep=" ",
+        sep2=" </s><s>",
+        stop_token_ids=[2],
+        system_formatter=lambda msg: f"<s>[INST] <<SYS>>\n{msg}\n<</SYS>>\n\n",
+    )
+)
+
+
 # Alpaca default template
 register_conv_template(
    Conversation(
--- a/pilot/model/loader.py
+++ b/pilot/model/loader.py
@@ -95,7 +95,7 @@ class ModelLoader:
        env_prefix = env_prefix.replace("-", "_")
        model_params = args_parser.parse_args_into_dataclass(
            param_cls,
-            env_prefix=env_prefix,
+            env_prefixes=[env_prefix],
            device=self.device,
            model_path=self.model_path,
            model_name=self.model_name,
--- a/pilot/model/model_adapter.py
+++ b/pilot/model/model_adapter.py
@@ -45,6 +45,10 @@ _OLD_MODELS = [
    "llama-cpp",
    "proxyllm",
    "gptj-6b",
+    "codellama-13b-sql-sft",
+    "codellama-7b",
+    "codellama-7b-sql-sft",
+    "codellama-13b",
 ]


@@ -148,8 +152,12 @@ class LLMModelAdaper:
                conv.append_message(conv.roles[1], content)
            else:
                raise ValueError(f"Unknown role: {role}")
+
        if system_messages:
-            conv.set_system_message("".join(system_messages))
+            if isinstance(conv, Conversation):
+                conv.set_system_message("".join(system_messages))
+            else:
+                conv.update_system_message("".join(system_messages))

        # Add a blank message for the assistant.
        conv.append_message(conv.roles[1], None)
@@ -445,17 +453,50 @@ class VLLMModelAdaperWrapper(LLMModelAdaper):

 # Covering the configuration of fastcaht, we will regularly feedback the code here to fastchat.
 # We also recommend that you modify it directly in the fastchat repository.
+
+# source: https://huggingface.co/BAAI/AquilaChat2-34B/blob/4608b75855334b93329a771aee03869dbf7d88cc/predict.py#L212
 register_conv_template(
    Conversation(
-        name="internlm-chat",
-        system_message="A chat between a curious <|User|> and an <|Bot|>. The <|Bot|> gives helpful, detailed, and polite answers to the <|User|>'s questions.\n\n",
-        roles=("<|User|>", "<|Bot|>"),
-        sep_style=SeparatorStyle.CHATINTERN,
-        sep="<eoh>",
-        sep2="<eoa>",
-        stop_token_ids=[1, 103028],
-        # TODO feedback stop_str to fastchat
-        stop_str="<eoa>",
+        name="aquila-legacy",
+        system_message="A chat between a curious human and an artificial intelligence assistant. "
+        "The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n",
+        roles=("### Human: ", "### Assistant: ", "System"),
+        messages=(),
+        offset=0,
+        sep_style=SeparatorStyle.NO_COLON_TWO,
+        sep="\n",
+        sep2="</s>",
+        stop_str=["</s>", "[UNK]"],
+    ),
+    override=True,
+)
+# source: https://huggingface.co/BAAI/AquilaChat2-34B/blob/4608b75855334b93329a771aee03869dbf7d88cc/predict.py#L227
+register_conv_template(
+    Conversation(
+        name="aquila",
+        system_message="A chat between a curious human and an artificial intelligence assistant. "
+        "The assistant gives helpful, detailed, and polite answers to the human's questions.",
+        roles=("Human", "Assistant", "System"),
+        messages=(),
+        offset=0,
+        sep_style=SeparatorStyle.ADD_COLON_TWO,
+        sep="###",
+        sep2="</s>",
+        stop_str=["</s>", "[UNK]"],
+    ),
+    override=True,
+)
+# source: https://huggingface.co/BAAI/AquilaChat2-34B/blob/4608b75855334b93329a771aee03869dbf7d88cc/predict.py#L242
+register_conv_template(
+    Conversation(
+        name="aquila-v1",
+        roles=("<|startofpiece|>", "<|endofpiece|>", ""),
+        messages=(),
+        offset=0,
+        sep_style=SeparatorStyle.NO_COLON_TWO,
+        sep="",
+        sep2="</s>",
+        stop_str=["</s>", "<|endoftext|>"],
    ),
    override=True,
 )
--- a/pilot/model/parameter.py
+++ b/pilot/model/parameter.py
@@ -1,9 +1,10 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
+
 import os
 from dataclasses import dataclass, field
 from enum import Enum
-from typing import Dict, Optional
+from typing import Dict, Optional, Union, Tuple

 from pilot.model.conversation import conv_templates
 from pilot.utils.parameter_utils import BaseParameters
@@ -19,6 +20,35 @@ class WorkerType(str, Enum):
    def values():
        return [item.value for item in WorkerType]

+    @staticmethod
+    def to_worker_key(worker_name, worker_type: Union[str, "WorkerType"]) -> str:
+        """Generate worker key from worker name and worker type
+
+        Args:
+            worker_name (str): Worker name(eg., chatglm2-6b)
+            worker_type (Union[str, "WorkerType"]): Worker type(eg., 'llm', or [`WorkerType.LLM`])
+
+        Returns:
+            str: Generated worker key
+        """
+        if "@" in worker_name:
+            raise ValueError(f"Invaild symbol '@' in your worker name {worker_name}")
+        if isinstance(worker_type, WorkerType):
+            worker_type = worker_type.value
+        return f"{worker_name}@{worker_type}"
+
+    @staticmethod
+    def parse_worker_key(worker_key: str) -> Tuple[str, str]:
+        """Parse worker name and worker type from worker key
+
+        Args:
+            worker_key (str): Worker key generated by [`WorkerType.to_worker_key`]
+
+        Returns:
+            Tuple[str, str]: Worker name and worker type
+        """
+        return tuple(worker_key.split("@"))
+

@dataclass
 class ModelControllerParameters(BaseParameters):
@@ -58,6 +88,68 @@ class ModelControllerParameters(BaseParameters):
            "help": "The filename to store tracer span records",
        },
    )
+    tracer_storage_cls: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "The storage class to storage tracer span records",
+        },
+    )
+
+
+@dataclass
+class ModelAPIServerParameters(BaseParameters):
+    host: Optional[str] = field(
+        default="0.0.0.0", metadata={"help": "Model API server deploy host"}
+    )
+    port: Optional[int] = field(
+        default=8100, metadata={"help": "Model API server deploy port"}
+    )
+    daemon: Optional[bool] = field(
+        default=False, metadata={"help": "Run Model API server in background"}
+    )
+    controller_addr: Optional[str] = field(
+        default="http://127.0.0.1:8000",
+        metadata={"help": "The Model controller address to connect"},
+    )
+
+    api_keys: Optional[str] = field(
+        default=None,
+        metadata={"help": "Optional list of comma separated API keys"},
+    )
+
+    log_level: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "Logging level",
+            "valid_values": [
+                "FATAL",
+                "ERROR",
+                "WARNING",
+                "WARNING",
+                "INFO",
+                "DEBUG",
+                "NOTSET",
+            ],
+        },
+    )
+    log_file: Optional[str] = field(
+        default="dbgpt_model_apiserver.log",
+        metadata={
+            "help": "The filename to store log",
+        },
+    )
+    tracer_file: Optional[str] = field(
+        default="dbgpt_model_apiserver_tracer.jsonl",
+        metadata={
+            "help": "The filename to store tracer span records",
+        },
+    )
+    tracer_storage_cls: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "The storage class to storage tracer span records",
+        },
+    )


@dataclass
@@ -146,6 +238,12 @@ class ModelWorkerParameters(BaseModelParameters):
            "help": "The filename to store tracer span records",
        },
    )
+    tracer_storage_cls: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "The storage class to storage tracer span records",
+        },
+    )


@dataclass
--- a/pilot/model/proxy/llms/chatgpt.py
+++ b/pilot/model/proxy/llms/chatgpt.py
@@ -5,8 +5,6 @@ import os
 from typing import List
 import logging

-import openai
-
 from pilot.model.proxy.llms.proxy_model import ProxyModel
 from pilot.model.parameter import ProxyModelParameters
 from pilot.scene.base_message import ModelMessage, ModelMessageRoleType
@@ -15,6 +13,14 @@ logger = logging.getLogger(__name__)


 def _initialize_openai(params: ProxyModelParameters):
+    try:
+        import openai
+    except ImportError as exc:
+        raise ValueError(
+            "Could not import python package: openai "
+            "Please install openai by command `pip install openai` "
+        ) from exc
+
    api_type = params.proxy_api_type or os.getenv("OPENAI_API_TYPE", "open_ai")

    api_base = params.proxy_api_base or os.getenv(
@@ -106,6 +112,8 @@ def _build_request(model: ProxyModel, params):
 def chatgpt_generate_stream(
    model: ProxyModel, tokenizer, params, device, context_len=2048
 ):
+    import openai
+
    history, payloads = _build_request(model, params)

    res = openai.ChatCompletion.create(messages=history, **payloads)
@@ -121,6 +129,8 @@ def chatgpt_generate_stream(
 async def async_chatgpt_generate_stream(
    model: ProxyModel, tokenizer, params, device, context_len=2048
 ):
+    import openai
+
    history, payloads = _build_request(model, params)

    res = await openai.ChatCompletion.acreate(messages=history, **payloads)
--- a/pilot/openapi/api_v1/api_v1.py
+++ b/pilot/openapi/api_v1/api_v1.py
@@ -172,7 +172,7 @@ async def test_connect(db_config: DBConfig = Body()):
        CFG.LOCAL_DB_MANAGE.test_connect(db_config)
        return Result.succ(True)
    except Exception as e:
-        return Result.faild(code="E1001", msg=str(e))
+        return Result.failed(code="E1001", msg=str(e))


@router.post("/v1/chat/db/summary", response_model=Result[bool])
@@ -305,7 +305,7 @@ async def params_load(
        return Result.succ(get_hist_messages(conv_uid))
    except Exception as e:
        logger.error("excel load error!", e)
-        return Result.faild(code="E000X", msg=f"File Load Error {e}")
+        return Result.failed(code="E000X", msg=f"File Load Error {e}")


@router.post("/v1/chat/dialogue/delete")
@@ -352,7 +352,7 @@ async def get_chat_instance(dialogue: ConversationVo = Body()) -> BaseChat:

    if not ChatScene.is_valid_mode(dialogue.chat_mode):
        raise StopAsyncIteration(
-            Result.faild("Unsupported Chat Mode," + dialogue.chat_mode + "!")
+            Result.failed("Unsupported Chat Mode," + dialogue.chat_mode + "!")
        )

    chat_param = {
@@ -430,7 +430,7 @@ async def model_types(controller: BaseModelController = Depends(get_model_contro
        return Result.succ(list(types))

    except Exception as e:
-        return Result.faild(code="E000X", msg=f"controller model types error {e}")
+        return Result.failed(code="E000X", msg=f"controller model types error {e}")


@router.get("/v1/model/supports")
@@ -440,7 +440,7 @@ async def model_supports(worker_manager: WorkerManager = Depends(get_worker_mana
        models = await worker_manager.supported_models()
        return Result.succ(FlatSupportedModel.from_supports(models))
    except Exception as e:
-        return Result.faild(code="E000X", msg=f"Fetch supportd models error {e}")
+        return Result.failed(code="E000X", msg=f"Fetch supportd models error {e}")


 async def no_stream_generator(chat):
--- a/pilot/openapi/api_v1/editor/api_editor_v1.py
+++ b/pilot/openapi/api_v1/editor/api_editor_v1.py
@@ -107,7 +107,7 @@ async def get_editor_sql(con_uid: str, round: int):
                            .replace("\n", " ")
                        )
                        return Result.succ(json.loads(context))
-    return Result.faild(msg="not have sql!")
+    return Result.failed(msg="not have sql!")


@router.post("/v1/editor/sql/run", response_model=Result[SqlRunData])
@@ -116,7 +116,7 @@ async def editor_sql_run(run_param: dict = Body()):
    db_name = run_param["db_name"]
    sql = run_param["sql"]
    if not db_name and not sql:
-        return Result.faild("SQL run param error！")
+        return Result.failed("SQL run param error！")
    conn = CFG.LOCAL_DB_MANAGE.get_connect(db_name)

    try:
@@ -170,7 +170,7 @@ async def sql_editor_submit(sql_edit_context: ChatSqlEditContext = Body()):
                    )
            history_mem.update(history_messages)
            return Result.succ(None)
-    return Result.faild(msg="Edit Faild!")
+    return Result.failed(msg="Edit Failed!")


@router.get("/v1/editor/chart/list", response_model=Result[ChartList])
@@ -192,7 +192,7 @@ async def get_editor_chart_list(con_uid: str):
                    charts=json.loads(element["data"]["content"]),
                )
                return Result.succ(chart_list)
-    return Result.faild(msg="Not have charts!")
+    return Result.failed(msg="Not have charts!")


@router.post("/v1/editor/chart/info", response_model=Result[ChartDetail])
@@ -211,7 +211,7 @@ async def get_editor_chart_info(param: dict = Body()):
            logger.error(
                "this dashboard dialogue version too old, can't support editor!"
            )
-            return Result.faild(
+            return Result.failed(
                msg="this dashboard dialogue version too old, can't support editor!"
            )
        for element in last_round["messages"]:
@@ -235,7 +235,7 @@ async def get_editor_chart_info(param: dict = Body()):
                )

                return Result.succ(detail)
-    return Result.faild(msg="Can't Find Chart Detail Info!")
+    return Result.failed(msg="Can't Find Chart Detail Info!")


@router.post("/v1/editor/chart/run", response_model=Result[ChartRunData])
@@ -245,7 +245,7 @@ async def editor_chart_run(run_param: dict = Body()):
    sql = run_param["sql"]
    chart_type = run_param["chart_type"]
    if not db_name and not sql:
-        return Result.faild("SQL run param error！")
+        return Result.failed("SQL run param error！")
    try:
        dashboard_data_loader: DashboardDataLoader = DashboardDataLoader()
        db_conn = CFG.LOCAL_DB_MANAGE.get_connect(db_name)
@@ -335,7 +335,7 @@ async def chart_editor_submit(chart_edit_context: ChatChartEditContext = Body())
                        )
            except Exception as e:
                logger.error(f"edit chart exception!{str(e)}", e)
-                return Result.faild(msg=f"Edit chart exception!{str(e)}")
+                return Result.failed(msg=f"Edit chart exception!{str(e)}")
            history_mem.update(history_messages)
            return Result.succ(None)
-    return Result.faild(msg="Edit Faild!")
+    return Result.failed(msg="Edit Failed!")
--- a/pilot/openapi/api_view_model.py
+++ b/pilot/openapi/api_view_model.py
@@ -17,11 +17,11 @@ class Result(Generic[T], BaseModel):
        return Result(success=True, err_code=None, err_msg=None, data=data)

    @classmethod
-    def faild(cls, msg):
+    def failed(cls, msg):
        return Result(success=False, err_code="E000X", err_msg=msg, data=None)

    @classmethod
-    def faild(cls, code, msg):
+    def failed(cls, code, msg):
        return Result(success=False, err_code=code, err_msg=msg, data=None)


--- a/pilot/openapi/base.py
+++ b/pilot/openapi/base.py
@@ -7,4 +7,4 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE
    message = ""
    for error in exc.errors():
        message += ".".join(error.get("loc")) + ":" + error.get("msg") + ";"
-    return Result.faild(code="E0001", msg=message)
+    return Result.failed(code="E0001", msg=message)
--- a/pilot/scene/base.py
+++ b/pilot/scene/base.py
@@ -82,6 +82,30 @@ class ChatScene(Enum):
        "Dialogue through natural language and private documents and knowledge bases.",
        ["Knowledge Space Select"],
    )
+    ExtractTriplet = Scene(
+        "extract_triplet",
+        "Extract Triplet",
+        "Extract Triplet",
+        ["Extract Select"],
+        True,
+    )
+    ExtractSummary = Scene(
+        "extract_summary",
+        "Extract Summary",
+        "Extract Summary",
+        ["Extract Select"],
+        True,
+    )
+    ExtractRefineSummary = Scene(
+        "extract_refine_summary",
+        "Extract Summary",
+        "Extract Summary",
+        ["Extract Select"],
+        True,
+    )
+    ExtractEntity = Scene(
+        "extract_entity", "Extract Entity", "Extract Entity", ["Extract Select"], True
+    )

    @staticmethod
    def of_mode(mode):
--- a/pilot/scene/base_chat.py
+++ b/pilot/scene/base_chat.py
@@ -13,6 +13,7 @@ from pilot.scene.base_message import ModelMessage, ModelMessageRoleType
 from pilot.scene.message import OnceConversation
 from pilot.utils import get_or_create_event_loop
 from pilot.utils.executor_utils import ExecutorFactory, blocking_func_to_async
+from pilot.utils.tracer import root_tracer, trace
 from pydantic import Extra
 from pilot.memory.chat_history.chat_hisotry_factory import ChatHistory

@@ -38,6 +39,7 @@ class BaseChat(ABC):

        arbitrary_types_allowed = True

+    @trace("BaseChat.__init__")
    def __init__(self, chat_param: Dict):
        """Chat Module Initialization
        Args:
@@ -128,17 +130,29 @@ class BaseChat(ABC):
        return speak_to_user

    async def __call_base(self):
-        input_values = await self.generate_input_values()
+        import inspect
+
+        input_values = (
+            await self.generate_input_values()
+            if inspect.isawaitable(self.generate_input_values())
+            else self.generate_input_values()
+        )
        ### Chat sequence advance
        self.current_message.chat_order = len(self.history_message) + 1
        self.current_message.add_user_message(self.current_user_input)
        self.current_message.start_date = datetime.datetime.now().strftime(
            "%Y-%m-%d %H:%M:%S"
        )
-
        self.current_message.tokens = 0
        if self.prompt_template.template:
-            current_prompt = self.prompt_template.format(**input_values)
+            metadata = {
+                "template_scene": self.prompt_template.template_scene,
+                "input_values": input_values,
+            }
+            with root_tracer.start_span(
+                "BaseChat.__call_base.prompt_template.format", metadata=metadata
+            ):
+                current_prompt = self.prompt_template.format(**input_values)
            self.current_message.add_system_message(current_prompt)

        llm_messages = self.generate_llm_messages()
@@ -146,7 +160,6 @@ class BaseChat(ABC):
            # Not new server mode, we convert the message format(List[ModelMessage]) to list of dict
            # fix the error of "Object of type ModelMessage is not JSON serializable" when passing the payload to request.post
            llm_messages = list(map(lambda m: m.dict(), llm_messages))
-
        payload = {
            "model": self.llm_model,
            "prompt": self.generate_llm_text(),
@@ -161,6 +174,9 @@ class BaseChat(ABC):
    def stream_plugin_call(self, text):
        return text

+    def knowledge_reference_call(self, text):
+        return text
+
    async def check_iterator_end(iterator):
        try:
            await asyncio.anext(iterator)
@@ -168,6 +184,14 @@ class BaseChat(ABC):
        except StopAsyncIteration:
            return True  # 迭代器已经执行结束

+    def _get_span_metadata(self, payload: Dict) -> Dict:
+        metadata = {k: v for k, v in payload.items()}
+        del metadata["prompt"]
+        metadata["messages"] = list(
+            map(lambda m: m if isinstance(m, dict) else m.dict(), metadata["messages"])
+        )
+        return metadata
+
    async def stream_call(self):
        # TODO Retry when server connection error
        payload = await self.__call_base()
@@ -175,6 +199,10 @@ class BaseChat(ABC):
        self.skip_echo_len = len(payload.get("prompt").replace("</s>", " ")) + 11
        logger.info(f"Requert: \n{payload}")
        ai_response_text = ""
+        span = root_tracer.start_span(
+            "BaseChat.stream_call", metadata=self._get_span_metadata(payload)
+        )
+        payload["span_id"] = span.span_id
        try:
            from pilot.model.cluster import WorkerManagerFactory

@@ -190,17 +218,93 @@ class BaseChat(ABC):
                view_msg = view_msg.replace("\n", "\\n")
                yield view_msg
            self.current_message.add_ai_message(msg)
+            view_msg = self.knowledge_reference_call(msg)
            self.current_message.add_view_message(view_msg)
+            span.end()
+        except Exception as e:
+            print(traceback.format_exc())
+            logger.error("model response parase failed！" + str(e))
+            self.current_message.add_view_message(
+                f"""<span style=\"color:red\">ERROR!</span>{str(e)}\n  {ai_response_text} """
+            )
+            ### store current conversation
+            span.end(metadata={"error": str(e)})
+        self.memory.append(self.current_message)
+
+    async def nostream_call(self):
+        payload = await self.__call_base()
+        logger.info(f"Request: \n{payload}")
+        ai_response_text = ""
+        span = root_tracer.start_span(
+            "BaseChat.nostream_call", metadata=self._get_span_metadata(payload)
+        )
+        payload["span_id"] = span.span_id
+        try:
+            from pilot.model.cluster import WorkerManagerFactory
+
+            worker_manager = CFG.SYSTEM_APP.get_component(
+                ComponentType.WORKER_MANAGER_FACTORY, WorkerManagerFactory
+            ).create()
+
+            with root_tracer.start_span("BaseChat.invoke_worker_manager.generate"):
+                model_output = await worker_manager.generate(payload)
+
+            ### output parse
+            ai_response_text = (
+                self.prompt_template.output_parser.parse_model_nostream_resp(
+                    model_output, self.prompt_template.sep
+                )
+            )
+            ### model result deal
+            self.current_message.add_ai_message(ai_response_text)
+            prompt_define_response = (
+                self.prompt_template.output_parser.parse_prompt_response(
+                    ai_response_text
+                )
+            )
+            metadata = {
+                "model_output": model_output.to_dict(),
+                "ai_response_text": ai_response_text,
+                "prompt_define_response": self._parse_prompt_define_response(
+                    prompt_define_response
+                ),
+            }
+            with root_tracer.start_span("BaseChat.do_action", metadata=metadata):
+                ###  run
+                result = await blocking_func_to_async(
+                    self._executor, self.do_action, prompt_define_response
+                )
+
+
+            ### llm speaker
+            speak_to_user = self.get_llm_speak(prompt_define_response)
+
+            # view_message = self.prompt_template.output_parser.parse_view_response(
+            #     speak_to_user, result
+            # )
+            view_message = await blocking_func_to_async(
+                self._executor,
+                self.prompt_template.output_parser.parse_view_response,
+                speak_to_user,
+                result,
+                prompt_define_response
+            )
+
+            view_message = view_message.replace("\n", "\\n")
+            self.current_message.add_view_message(view_message)
+            span.end()
        except Exception as e:
            print(traceback.format_exc())
            logger.error("model response parase faild！" + str(e))
            self.current_message.add_view_message(
                f"""<span style=\"color:red\">ERROR!</span>{str(e)}\n  {ai_response_text} """
            )
-            ### store current conversation
+            span.end(metadata={"error": str(e)})
+        ### store dialogue
        self.memory.append(self.current_message)
+        return self.current_ai_response()

-    async def nostream_call(self):
+    async def get_llm_response(self):
        payload = await self.__call_base()
        logger.info(f"Request: \n{payload}")
        ai_response_text = ""
@@ -221,41 +325,19 @@ class BaseChat(ABC):
            )
            ### model result deal
            self.current_message.add_ai_message(ai_response_text)
+            prompt_define_response = None
            prompt_define_response = (
                self.prompt_template.output_parser.parse_prompt_response(
                    ai_response_text
                )
            )
-            ###  run
-            result = await blocking_func_to_async(
-                self._executor, self.do_action, prompt_define_response
-            )
-
-            ### llm speaker
-            speak_to_user = self.get_llm_speak(prompt_define_response)
-
-            # view_message = self.prompt_template.output_parser.parse_view_response(
-            #     speak_to_user, result
-            # )
-            view_message = await blocking_func_to_async(
-                self._executor,
-                self.prompt_template.output_parser.parse_view_response,
-                speak_to_user,
-                result,
-                prompt_define_response
-            )
-
-            view_message = view_message.replace("\n", "\\n")
-            self.current_message.add_view_message(view_message)
        except Exception as e:
            print(traceback.format_exc())
-            logger.error("model response parase faild！" + str(e))
+            logger.error("model response parse failed！" + str(e))
            self.current_message.add_view_message(
-                f"""<span style=\"color:red\">ERROR!</span>{str(e)}\n  {ai_response_text} """
+                f"""model response parse failed！{str(e)}\n  {ai_response_text} """
            )
-        ### store dialogue
-        self.memory.append(self.current_message)
-        return self.current_ai_response()
+        return prompt_define_response

    def _blocking_stream_call(self):
        logger.warn(
@@ -302,7 +384,7 @@ class BaseChat(ABC):
        text += self.__load_example_messages()

        ### Load History
-        text += self.__load_histroy_messages()
+        text += self.__load_history_messages()

        ### Load User Input
        text += self.__load_user_message()
@@ -328,7 +410,7 @@ class BaseChat(ABC):
        messages += self.__load_example_messages(str_message=False)

        ### Load History
-        messages += self.__load_histroy_messages(str_message=False)
+        messages += self.__load_history_messages(str_message=False)

        ### Load User Input
        messages += self.__load_user_message(str_message=False)
@@ -384,7 +466,7 @@ class BaseChat(ABC):
                        )
        return example_text if str_message else example_messages

-    def __load_histroy_messages(self, str_message: bool = True):
+    def __load_history_messages(self, str_message: bool = True):
        history_text = ""
        history_messages = []
        if self.prompt_template.need_historical_messages:
@@ -470,3 +552,21 @@ class BaseChat(ABC):

        """
        pass
+
+    def _parse_prompt_define_response(self, prompt_define_response: Any) -> Any:
+        if not prompt_define_response:
+            return ""
+        if isinstance(prompt_define_response, str) or isinstance(
+            prompt_define_response, dict
+        ):
+            return prompt_define_response
+        if isinstance(prompt_define_response, tuple):
+            if hasattr(prompt_define_response, "_asdict"):
+                # namedtuple
+                return prompt_define_response._asdict()
+            else:
+                return dict(
+                    zip(range(len(prompt_define_response)), prompt_define_response)
+                )
+        else:
+            return prompt_define_response
--- a/pilot/scene/base_message.py
+++ b/pilot/scene/base_message.py
@@ -1,7 +1,7 @@
 from __future__ import annotations

 from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Tuple, Optional
+from typing import Any, Dict, List, Tuple, Optional, Union

 from pydantic import BaseModel, Field, root_validator

@@ -70,14 +70,6 @@ class SystemMessage(BaseMessage):
        return "system"


-class ModelMessage(BaseModel):
-    """Type of message that interaction between dbgpt-server and llm-server"""
-
-    """Similar to openai's message format"""
-    role: str
-    content: str
-
-
 class ModelMessageRoleType:
    """ "Type of ModelMessage role"""

@@ -87,6 +79,45 @@ class ModelMessageRoleType:
    VIEW = "view"


+class ModelMessage(BaseModel):
+    """Type of message that interaction between dbgpt-server and llm-server"""
+
+    """Similar to openai's message format"""
+    role: str
+    content: str
+
+    @staticmethod
+    def from_openai_messages(
+        messages: Union[str, List[Dict[str, str]]]
+    ) -> List["ModelMessage"]:
+        """Openai message format to current ModelMessage format"""
+        if isinstance(messages, str):
+            return [ModelMessage(role=ModelMessageRoleType.HUMAN, content=messages)]
+        result = []
+        for message in messages:
+            msg_role = message["role"]
+            content = message["content"]
+            if msg_role == "system":
+                result.append(
+                    ModelMessage(role=ModelMessageRoleType.SYSTEM, content=content)
+                )
+            elif msg_role == "user":
+                result.append(
+                    ModelMessage(role=ModelMessageRoleType.HUMAN, content=content)
+                )
+            elif msg_role == "assistant":
+                result.append(
+                    ModelMessage(role=ModelMessageRoleType.AI, content=content)
+                )
+            else:
+                raise ValueError(f"Unknown role: {msg_role}")
+        return result
+
+    @staticmethod
+    def to_dict_list(messages: List["ModelMessage"]) -> List[Dict[str, str]]:
+        return list(map(lambda m: m.dict(), messages))
+
+
 class Generation(BaseModel):
    """Output of a single generation."""

--- a/pilot/scene/chat_agent/chat.py
+++ b/pilot/scene/chat_agent/chat.py
@@ -11,6 +11,7 @@ from pilot.common.string_utils import extract_content
 from .prompt import prompt
 from pilot.component import ComponentType
 from pilot.base_modules.agent.controller import ModuleAgent
+from pilot.utils.tracer import root_tracer, trace

 CFG = Config()

@@ -51,6 +52,7 @@ class ChatAgent(BaseChat):

        self.api_call = ApiCall(plugin_generator=self.plugins_prompt_generator)

+    @trace()
    async def generate_input_values(self) -> Dict[str, str]:
        input_values = {
            "user_goal": self.current_user_input,
@@ -63,7 +65,10 @@ class ChatAgent(BaseChat):

    def stream_plugin_call(self, text):
        text = text.replace("\n", " ")
-        return self.api_call.run(text)
+        with root_tracer.start_span(
+            "ChatAgent.stream_plugin_call.api_call", metadata={"text": text}
+        ):
+            return self.api_call.run(text)

    def __list_to_prompt_str(self, list: List) -> str:
        return "\n".join(f"{i + 1 + 1}. {item}" for i, item in enumerate(list))
--- a/pilot/scene/chat_dashboard/chat.py
+++ b/pilot/scene/chat_dashboard/chat.py
@@ -13,6 +13,7 @@ from pilot.scene.chat_dashboard.data_preparation.report_schma import (
 from pilot.scene.chat_dashboard.prompt import prompt
 from pilot.scene.chat_dashboard.data_loader import DashboardDataLoader
 from pilot.utils.executor_utils import blocking_func_to_async
+from pilot.utils.tracer import root_tracer, trace

 CFG = Config()

@@ -53,6 +54,7 @@ class ChatDashboard(BaseChat):
            data = f.read()
        return json.loads(data)

+    @trace()
    async def generate_input_values(self) -> Dict:
        try:
            from pilot.summary.db_summary_client import DBSummaryClient
--- a/pilot/scene/chat_dashboard/data_loader.py
+++ b/pilot/scene/chat_dashboard/data_loader.py
@@ -52,8 +52,8 @@ class DashboardDataLoader:
                        values.append(value_item)
            return field_names, values
        except Exception as e:
-            logger.debug("Prepare Chart Data Faild!" + str(e))
-            raise ValueError("Prepare Chart Data Faild!")
+            logger.debug("Prepare Chart Data Failed!" + str(e))
+            raise ValueError("Prepare Chart Data Failed!")

    def get_chart_values_by_db(self, db_name: str, chart_sql: str):
        logger.info(f"get_chart_values_by_db:{db_name},{chart_sql}")
--- a/pilot/scene/chat_dashboard/prompt.py
+++ b/pilot/scene/chat_dashboard/prompt.py
@@ -42,7 +42,7 @@ RESPONSE_FORMAT = [

 PROMPT_SEP = SeparatorStyle.SINGLE.value

-PROMPT_NEED_NEED_STREAM_OUT = False
+PROMPT_NEED_STREAM_OUT = False

 prompt = PromptTemplate(
    template_scene=ChatScene.ChatDashboard.value(),
@@ -50,9 +50,9 @@ prompt = PromptTemplate(
    response_format=json.dumps(RESPONSE_FORMAT, indent=4),
    template_define=PROMPT_SCENE_DEFINE,
    template=_DEFAULT_TEMPLATE,
-    stream_out=PROMPT_NEED_NEED_STREAM_OUT,
+    stream_out=PROMPT_NEED_STREAM_OUT,
    output_parser=ChatDashboardOutputParser(
-        sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_NEED_STREAM_OUT
+        sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_STREAM_OUT
    ),
 )
 CFG.prompt_template_registry.register(prompt, is_default=True)
--- a/pilot/scene/chat_data/chat_excel/excel_analyze/chat.py
+++ b/pilot/scene/chat_data/chat_excel/excel_analyze/chat.py
@@ -73,6 +73,7 @@ class ChatExcel(BaseChat):
            # ]
        return "\n".join(f"{key}:{value}" for dict_item in antv_charts for key, value in dict_item.items())

+    @trace()
    async def generate_input_values(self) -> Dict:
        input_values = {
            "user_input": self.current_user_input,
@@ -87,7 +88,7 @@ class ChatExcel(BaseChat):
            return None
        chat_param = {
            "chat_session_id": self.chat_session_id,
-            "user_input": f"{self.excel_reader.excel_file_name} analyze！",
+            "user_input": "[" + self.excel_reader.excel_file_name + "]" + " Analyze！",
            "parent_mode": self.chat_mode,
            "select_param": self.excel_reader.excel_file_name,
            "excel_reader": self.excel_reader,
@@ -99,4 +100,9 @@ class ChatExcel(BaseChat):

    def stream_plugin_call(self, text):
        text = text.replace("\n", " ")
-        return self.api_call.display_sql_llmvis(text, self.excel_reader.get_df_by_sql_ex)
+        with root_tracer.start_span(
+            "ChatExcel.stream_plugin_call.run_display_sql", metadata={"text": text}
+        ):
+            return self.api_call.run_display_sql(
+                text, self.excel_reader.get_df_by_sql_ex
+            )
--- a/pilot/scene/chat_data/chat_excel/excel_analyze/prompt.py
+++ b/pilot/scene/chat_data/chat_excel/excel_analyze/prompt.py
@@ -54,7 +54,7 @@ _PROMPT_SCENE_DEFINE = (

 PROMPT_SEP = SeparatorStyle.SINGLE.value

-PROMPT_NEED_NEED_STREAM_OUT = True
+PROMPT_NEED_STREAM_OUT = True

 # Temperature is a configuration hyperparameter that controls the randomness of language model output.
 # A high temperature produces more unpredictable and creative results, while a low temperature produces more common and conservative output.
@@ -66,9 +66,9 @@ prompt = PromptTemplate(
    input_variables=["user_input", "table_name", "disply_type"],
    template_define=_PROMPT_SCENE_DEFINE,
    template=_DEFAULT_TEMPLATE,
-    stream_out=PROMPT_NEED_NEED_STREAM_OUT,
+    stream_out=PROMPT_NEED_STREAM_OUT,
    output_parser=ChatExcelOutputParser(
-        sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_NEED_STREAM_OUT
+        sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_STREAM_OUT
    ),
    need_historical_messages=True,
    # example_selector=sql_data_example,
--- a/pilot/scene/chat_data/chat_excel/excel_learning/chat.py
+++ b/pilot/scene/chat_data/chat_excel/excel_learning/chat.py
@@ -13,6 +13,7 @@ from pilot.scene.chat_data.chat_excel.excel_learning.prompt import prompt
 from pilot.scene.chat_data.chat_excel.excel_reader import ExcelReader
 from pilot.json_utils.utilities import DateTimeEncoder
 from pilot.utils.executor_utils import blocking_func_to_async
+from pilot.utils.tracer import root_tracer, trace

 CFG = Config()

@@ -44,7 +45,7 @@ class ExcelLearning(BaseChat):
        if parent_mode:
            self.current_message.chat_mode = parent_mode.value()

-
+    @trace()
    async def generate_input_values(self) -> Dict:
        # colunms, datas = self.excel_reader.get_sample_data()
        colunms, datas = await blocking_func_to_async(
--- a/pilot/scene/chat_data/chat_excel/excel_learning/prompt.py
+++ b/pilot/scene/chat_data/chat_excel/excel_learning/prompt.py
@@ -61,7 +61,7 @@ PROMPT_SCENE_DEFINE = (

 PROMPT_SEP = SeparatorStyle.SINGLE.value

-PROMPT_NEED_NEED_STREAM_OUT = False
+PROMPT_NEED_STREAM_OUT = False

 # Temperature is a configuration hyperparameter that controls the randomness of language model output.
 # A high temperature produces more unpredictable and creative results, while a low temperature produces more common and conservative output.
@@ -74,9 +74,9 @@ prompt = PromptTemplate(
    response_format=json.dumps(RESPONSE_FORMAT_SIMPLE, ensure_ascii=False, indent=4),
    template_define=PROMPT_SCENE_DEFINE,
    template=_DEFAULT_TEMPLATE,
-    stream_out=PROMPT_NEED_NEED_STREAM_OUT,
+    stream_out=PROMPT_NEED_STREAM_OUT,
    output_parser=LearningExcelOutputParser(
-        sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_NEED_STREAM_OUT
+        sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_STREAM_OUT
    ),
    # example_selector=sql_data_example,
    temperature=PROMPT_TEMPERATURE,
--- a/pilot/scene/chat_db/auto_execute/chat.py
+++ b/pilot/scene/chat_db/auto_execute/chat.py
@@ -5,6 +5,8 @@ from pilot.scene.base import ChatScene
 from pilot.common.sql_database import Database
 from pilot.configs.config import Config
 from pilot.scene.chat_db.auto_execute.prompt import prompt
+from pilot.utils.executor_utils import blocking_func_to_async
+from pilot.utils.tracer import root_tracer, trace
 from pilot.base_modules.agent.commands.command_mange import ApiCall

 CFG = Config()
@@ -35,12 +37,16 @@ class ChatWithDbAutoExecute(BaseChat):
            raise ValueError(
                f"{ChatScene.ChatWithDbExecute.value} mode should chose db!"
            )
+        with root_tracer.start_span(
+            "ChatWithDbAutoExecute.get_connect", metadata={"db_name": self.db_name}
+        ):
+            self.database = CFG.LOCAL_DB_MANAGE.get_connect(self.db_name)

-        self.database = CFG.LOCAL_DB_MANAGE.get_connect(self.db_name)
        self.top_k: int = 50
-        self.api_call = ApiCall(display_registry=CFG.command_disply)

-    async def generate_input_values(self):
+
+    @trace()
+    async def generate_input_values(self) -> Dict:
        """
        generate input values
        """
@@ -50,18 +56,20 @@ class ChatWithDbAutoExecute(BaseChat):
            raise ValueError("Could not import DBSummaryClient. ")
        client = DBSummaryClient(system_app=CFG.SYSTEM_APP)
        try:
-            table_infos = client.get_db_summary(
-                dbname=self.db_name,
-                query=self.current_user_input,
-                topk=CFG.KNOWLEDGE_SEARCH_TOP_SIZE,
-            )
+            with root_tracer.start_span("ChatWithDbAutoExecute.get_db_summary"):
+                table_infos = await blocking_func_to_async(
+                    self._executor,
+                    client.get_db_summary,
+                    self.db_name,
+                    self.current_user_input,
+                    CFG.KNOWLEDGE_SEARCH_TOP_SIZE,
+                )
        except Exception as e:
            print("db summary find error!" + str(e))
-            table_infos = self.database.table_simple_info()
        if not table_infos:
-            table_infos = self.database.table_simple_info()
-
-        # table_infos = self.database.table_simple_info()
+            table_infos = await blocking_func_to_async(
+                self._executor, self.database.table_simple_info
+            )

        input_values = {
            # "input": self.current_user_input,
--- a/pilot/scene/chat_db/auto_execute/out_parser.py
+++ b/pilot/scene/chat_db/auto_execute/out_parser.py
@@ -14,6 +14,9 @@ class SqlAction(NamedTuple):
    sql: str
    thoughts: Dict

+    def to_dict(self) -> Dict[str, Dict]:
+        return {"sql": self.sql, "thoughts": self.thoughts}
+

 logger = logging.getLogger(__name__)

--- a/pilot/scene/chat_db/auto_execute/prompt.py
+++ b/pilot/scene/chat_db/auto_execute/prompt.py
@@ -59,7 +59,7 @@ RESPONSE_FORMAT_SIMPLE = {

 PROMPT_SEP = SeparatorStyle.SINGLE.value

-PROMPT_NEED_NEED_STREAM_OUT = False
+PROMPT_NEED_STREAM_OUT = False

 # Temperature is a configuration hyperparameter that controls the randomness of language model output.
 # A high temperature produces more unpredictable and creative results, while a low temperature produces more common and conservative output.
@@ -72,9 +72,9 @@ prompt = PromptTemplate(
    response_format=json.dumps(RESPONSE_FORMAT_SIMPLE, ensure_ascii=False, indent=4),
    template_define=PROMPT_SCENE_DEFINE,
    template=_DEFAULT_TEMPLATE,
-    stream_out=PROMPT_NEED_NEED_STREAM_OUT,
+    stream_out=PROMPT_NEED_STREAM_OUT,
    output_parser=DbChatOutputParser(
-        sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_NEED_STREAM_OUT
+        sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_STREAM_OUT
    ),
    # example_selector=sql_data_example,
    temperature=PROMPT_TEMPERATURE,
--- a/pilot/scene/chat_db/auto_execute/prompt_baichuan.py
+++ b/pilot/scene/chat_db/auto_execute/prompt_baichuan.py
@@ -36,7 +36,7 @@ RESPONSE_FORMAT_SIMPLE = {

 PROMPT_SEP = SeparatorStyle.SINGLE.value

-PROMPT_NEED_NEED_STREAM_OUT = False
+PROMPT_NEED_STREAM_OUT = False

 # Temperature is a configuration hyperparameter that controls the randomness of language model output.
 # A high temperature produces more unpredictable and creative results, while a low temperature produces more common and conservative output.
@@ -50,9 +50,9 @@ prompt = PromptTemplate(
    template_is_strict=False,
    template_define=PROMPT_SCENE_DEFINE,
    template=_DEFAULT_TEMPLATE,
-    stream_out=PROMPT_NEED_NEED_STREAM_OUT,
+    stream_out=PROMPT_NEED_STREAM_OUT,
    output_parser=DbChatOutputParser(
-        sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_NEED_STREAM_OUT
+        sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_STREAM_OUT
    ),
    # example_selector=sql_data_example,
    temperature=PROMPT_TEMPERATURE,
--- a/pilot/scene/chat_db/professional_qa/chat.py
+++ b/pilot/scene/chat_db/professional_qa/chat.py
@@ -6,6 +6,7 @@ from pilot.common.sql_database import Database
 from pilot.configs.config import Config
 from pilot.scene.chat_db.professional_qa.prompt import prompt
 from pilot.utils.executor_utils import blocking_func_to_async
+from pilot.utils.tracer import root_tracer, trace

 CFG = Config()

@@ -39,6 +40,7 @@ class ChatWithDbQA(BaseChat):
            else len(self.tables)
        )

+    @trace()
    async def generate_input_values(self) -> Dict:
        table_info = ""
        dialect = "mysql"
--- a/pilot/scene/chat_db/professional_qa/prompt.py
+++ b/pilot/scene/chat_db/professional_qa/prompt.py
@@ -54,7 +54,7 @@ _DEFAULT_TEMPLATE = (

 PROMPT_SEP = SeparatorStyle.SINGLE.value

-PROMPT_NEED_NEED_STREAM_OUT = True
+PROMPT_NEED_STREAM_OUT = True

 prompt = PromptTemplate(
    template_scene=ChatScene.ChatWithDbQA.value(),
@@ -62,9 +62,9 @@ prompt = PromptTemplate(
    response_format=None,
    template_define=PROMPT_SCENE_DEFINE,
    template=_DEFAULT_TEMPLATE,
-    stream_out=PROMPT_NEED_NEED_STREAM_OUT,
+    stream_out=PROMPT_NEED_STREAM_OUT,
    output_parser=NormalChatOutputParser(
-        sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_NEED_STREAM_OUT
+        sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_STREAM_OUT
    ),
 )

--- a/pilot/scene/chat_execution/chat.py
+++ b/pilot/scene/chat_execution/chat.py
@@ -6,6 +6,7 @@ from pilot.configs.config import Config
 from pilot.base_modules.agent.commands.command import execute_command
 from pilot.base_modules.agent import PluginPromptGenerator
 from .prompt import prompt
+from pilot.utils.tracer import root_tracer, trace

 CFG = Config()

@@ -50,6 +51,7 @@ class ChatWithPlugin(BaseChat):
                    self.plugins_prompt_generator
                )

+    @trace()
    async def generate_input_values(self) -> Dict:
        input_values = {
            "input": self.current_user_input,
--- a/Show More
+++ b/Show More