diff --git a/assets/schema/knowledge_management.sql b/assets/schema/knowledge_management.sql index e38f731d6..a6f1bc478 100644 --- a/assets/schema/knowledge_management.sql +++ b/assets/schema/knowledge_management.sql @@ -34,6 +34,7 @@ CREATE TABLE `knowledge_document` ( `content` LONGTEXT NOT NULL COMMENT 'knowledge embedding sync result', `result` TEXT NULL COMMENT 'knowledge content', `vector_ids` LONGTEXT NULL COMMENT 'vector_ids', + `summary` LONGTEXT NULL COMMENT 'knowledge summary', `gmt_created` TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT 'created time', `gmt_modified` TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'update time', PRIMARY KEY (`id`), diff --git a/docker/base/Dockerfile b/docker/base/Dockerfile index 63486e260..e4fad9697 100644 --- a/docker/base/Dockerfile +++ b/docker/base/Dockerfile @@ -28,7 +28,9 @@ WORKDIR /app # RUN pip3 install -i $PIP_INDEX_URL ".[all]" RUN pip3 install --upgrade pip -i $PIP_INDEX_URL \ - && pip3 install -i $PIP_INDEX_URL ".[$DB_GPT_INSTALL_MODEL]" + && pip3 install -i $PIP_INDEX_URL ".[$DB_GPT_INSTALL_MODEL]" \ + # install openai for proxyllm + && pip3 install -i $PIP_INDEX_URL ".[openai]" RUN (if [ "${LANGUAGE}" = "zh" ]; \ # language is zh, download zh_core_web_sm from github diff --git a/docs/getting_started/install/deploy.rst b/docs/getting_started/install/deploy.rst index 4d7d0d211..075a07403 100644 --- a/docs/getting_started/install/deploy.rst +++ b/docs/getting_started/install/deploy.rst @@ -57,6 +57,12 @@ If you are low hardware requirements you can install DB-GPT by Using third-part .. tab:: OpenAI + Installing Dependencies + + .. code-block:: + + pip install -e ".[openai]" + Download embedding model .. code-block:: shell @@ -69,7 +75,7 @@ If you are low hardware requirements you can install DB-GPT by Using third-part or git clone https://huggingface.co/moka-ai/m3e-large - Configure LLM_MODEL and PROXY_API_URL and API_KEY in `.env` file + Configure LLM_MODEL, PROXY_API_URL and API_KEY in `.env` file .. code-block:: shell @@ -291,7 +297,7 @@ If you are low hardware requirements you can install DB-GPT by Using third-part **1. Download a pre-converted model file.** - Suppose you want to use [Vicuna 13B v1.5](https://huggingface.co/lmsys/vicuna-13b-v1.5), you can download the file already converted from [TheBloke/vicuna-13B-v1.5-GGUF](https://huggingface.co/TheBloke/vicuna-13B-v1.5-GGUF), only one file is needed. Download it to the `models` directory and rename it to `ggml-model-q4_0.gguf`. + Suppose you want to use `Vicuna 13B v1.5 `_ , you can download the file already converted from `TheBloke/vicuna-13B-v1.5-GGUF `_ , only one file is needed. Download it to the `models` directory and rename it to `ggml-model-q4_0.gguf`. .. code-block:: @@ -299,7 +305,7 @@ If you are low hardware requirements you can install DB-GPT by Using third-part **2. Convert It Yourself** - You can convert the model file yourself according to the instructions in [llama.cpp#prepare-data--run](https://github.com/ggerganov/llama.cpp#prepare-data--run), and put the converted file in the models directory and rename it to `ggml-model-q4_0.gguf`. + You can convert the model file yourself according to the instructions in `llama.cpp#prepare-data--run `_ , and put the converted file in the models directory and rename it to `ggml-model-q4_0.gguf`. **Installing Dependencies** diff --git a/docs/getting_started/install/environment/environment.md b/docs/getting_started/install/environment/environment.md index 021bbe861..11aec8d40 100644 --- a/docs/getting_started/install/environment/environment.md +++ b/docs/getting_started/install/environment/environment.md @@ -6,7 +6,9 @@ LLM Model Name, see /pilot/configs/model_config.LLM_MODEL_CONFIG * LLM_MODEL=vicuna-13b MODEL_SERVER_ADDRESS + * MODEL_SERVER=http://127.0.0.1:8000 + LIMIT_MODEL_CONCURRENCY * LIMIT_MODEL_CONCURRENCY=5 @@ -84,21 +86,6 @@ embedding recall max token ,2000 * WEAVIATE_URL=https://kt-region-m8hcy0wc.weaviate.network ``` -```{admonition} Vector Store SETTINGS -#### Chroma -* VECTOR_STORE_TYPE=Chroma -#### MILVUS -* VECTOR_STORE_TYPE=Milvus -* MILVUS_URL=127.0.0.1 -* MILVUS_PORT=19530 -* MILVUS_USERNAME -* MILVUS_PASSWORD -* MILVUS_SECURE= - -#### WEAVIATE -* WEAVIATE_URL=https://kt-region-m8hcy0wc.weaviate.network -``` - ```{admonition} Multi-GPU Setting See https://developer.nvidia.com/blog/cuda-pro-tip-control-gpu-visibility-cuda_visible_devices/ If CUDA_VISIBLE_DEVICES is not configured, all available gpus will be used diff --git a/docs/locales/zh_CN/LC_MESSAGES/getting_started/install/deploy.po b/docs/locales/zh_CN/LC_MESSAGES/getting_started/install/deploy.po index 2947c34c8..f4c85b8b4 100644 --- a/docs/locales/zh_CN/LC_MESSAGES/getting_started/install/deploy.po +++ b/docs/locales/zh_CN/LC_MESSAGES/getting_started/install/deploy.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: DB-GPT 👏👏 0.4.0\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2023-11-04 15:08+0800\n" +"POT-Creation-Date: 2023-11-06 19:38+0800\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language: zh_CN\n" @@ -19,27 +19,27 @@ msgstr "" "Content-Transfer-Encoding: 8bit\n" "Generated-By: Babel 2.12.1\n" -#: ../../getting_started/install/deploy.rst:4 bb7e1a7b70624a3d91804c5ce2d5216b +#: ../../getting_started/install/deploy.rst:4 f3ea3305f122460aaa11999edc4b5de6 msgid "Installation From Source" msgstr "源码安装" -#: ../../getting_started/install/deploy.rst:6 7524477e8b8a456b8e2b942621a6b225 +#: ../../getting_started/install/deploy.rst:6 bb941f2bd56d4eb48f7c4f75ebd74176 msgid "To get started, install DB-GPT with the following steps." msgstr "按照以下步骤进行安装" -#: ../../getting_started/install/deploy.rst:10 871cdb0a7e004668b518168461d76945 +#: ../../getting_started/install/deploy.rst:10 27a1e092c1f945ceb9946ebdaf89b600 msgid "1.Preparation" msgstr "1.准备" -#: ../../getting_started/install/deploy.rst:11 d845f48ec05a4bb7a5b0f95a0644029d +#: ../../getting_started/install/deploy.rst:11 5c5bfbdc74a14c3b9b1f1ed66617cac8 msgid "**Download DB-GPT**" msgstr "**下载DB-GPT项目**" -#: ../../getting_started/install/deploy.rst:17 85df7bb2b73141058ba0ca5a73e46701 +#: ../../getting_started/install/deploy.rst:17 3065ee2f34f9417598a37fd699a4863e msgid "**Install Miniconda**" msgstr "**安装Miniconda**" -#: ../../getting_started/install/deploy.rst:19 b7b3e120ded344d09fb2d7a3a7903497 +#: ../../getting_started/install/deploy.rst:19 f9f3a653ffb8447284686aa37a7bb79a msgid "" "We use Sqlite as default database, so there is no need for database " "installation. If you choose to connect to other databases, you can " @@ -50,386 +50,397 @@ msgid "" "`_" msgstr "" "目前使用Sqlite作为默认数据库,因此DB-" -"GPT快速部署不需要部署相关数据库服务。如果你想使用其他数据库,需要先部署相关数据库服务。我们目前使用Miniconda进行python环境和包依赖管理[安装" -" Miniconda](https://docs.conda.io/en/latest/miniconda.html)" +"GPT快速部署不需要部署相关数据库服务。如果你想使用其他数据库,需要先部署相关数据库服务。我们目前使用Miniconda进行python环境和包依赖管理。`如何安装" +" Miniconda `_ 。" -#: ../../getting_started/install/deploy.rst:36 a5e34db6a6b8450c8a1f35377a1dc93b +#: ../../getting_started/install/deploy.rst:36 a2cd2fdd1d16421f9cbe341040b153b6 msgid "2.Deploy LLM Service" msgstr "2.部署LLM服务" -#: ../../getting_started/install/deploy.rst:37 5c49829b086e4d5fbcb5a971a34fa3b5 +#: ../../getting_started/install/deploy.rst:37 180a121e3c994a92a917ace80bf12386 msgid "" "DB-GPT can be deployed on servers with low hardware requirements or on " "servers with high hardware requirements." msgstr "DB-GPT可以部署在对硬件要求不高的服务器,也可以部署在对硬件要求高的服务器" -#: ../../getting_started/install/deploy.rst:39 7c2f7e2b49054d17850942bef9570c45 +#: ../../getting_started/install/deploy.rst:39 395608515c0348d5849030b58da7b659 msgid "" "If you are low hardware requirements you can install DB-GPT by Using " "third-part LLM REST API Service OpenAI, Azure, tongyi." -msgstr "Low hardware requirements模式适用于对接第三方模型服务的api,比如OpenAI, 通义千问, 文心.cpp。" +msgstr "低硬件要求模式适用于对接第三方模型服务的 API,比如 OpenAI、通义千问、 文心一言等。" -#: ../../getting_started/install/deploy.rst:43 8c392e63073a47698e38456c5994ff25 +#: ../../getting_started/install/deploy.rst:43 e29297e61e2e4d05ba88f0e1c2b1f365 msgid "As our project has the ability to achieve OpenAI performance of over 85%," msgstr "使用OpenAI服务可以让DB-GPT准确率达到85%" -#: ../../getting_started/install/deploy.rst:48 148f8d85c02345219ac535003fe05fca +#: ../../getting_started/install/deploy.rst:48 d0d70d51e8684c2891c58a6da4941a52 msgid "Notice make sure you have install git-lfs" msgstr "确认是否已经安装git-lfs" -#: ../../getting_started/install/deploy.rst:50 7f719273d1ff4d888376030531a560c4 +#: ../../getting_started/install/deploy.rst:50 0d2781fd38eb467ebad2a3c310a344e6 msgid "centos:yum install git-lfs" msgstr "" -#: ../../getting_started/install/deploy.rst:52 f5c2035bca214e3fbef2b70be0e76247 +#: ../../getting_started/install/deploy.rst:52 1574ea24ad6443409070aa3a1f7abe87 msgid "ubuntu:apt-get install git-lfs" msgstr "" -#: ../../getting_started/install/deploy.rst:54 cb1b52b9323e4e9fbccfd3c42067a568 +#: ../../getting_started/install/deploy.rst:54 ad86473d5c87447091c713f45cbfed0e msgid "macos:brew install git-lfs" msgstr "" #: ../../getting_started/install/deploy.rst:58 -#: ../../getting_started/install/deploy.rst:223 -#: 9c601ee0c06646d58cbd2c2fff342cb5 d3fe6c712b4145d0856487c65057c2a0 +#: ../../getting_started/install/deploy.rst:229 +#: 3dd1e40f33924faab63634907a7f6511 dce32420face4ab2b99caf7f3900ede9 msgid "OpenAI" msgstr "OpenAI" -#: ../../getting_started/install/deploy.rst:60 -#: ../../getting_started/install/deploy.rst:207 -#: 5e0e84bd11084d158e18b702022aaa96 6a9c9830e5184eac8b6b7425780f4a3e +#: ../../getting_started/install/deploy.rst:60 1f66400540114de2820761ef80137805 +msgid "Installing Dependencies" +msgstr "安装依赖" + +#: ../../getting_started/install/deploy.rst:66 +#: ../../getting_started/install/deploy.rst:213 +#: 31b856a6fc094334a37914c046cb1bb1 42b2f6d36ca4487f8e31d59bba123fca msgid "Download embedding model" -msgstr "下载embedding model" +msgstr "下载 embedding 模型" -#: ../../getting_started/install/deploy.rst:72 -#: ../../getting_started/install/deploy.rst:231 -#: 953d6931627641b985588abf8b6ad9fd d52e287164ca44f79709f4645a1bb774 -msgid "Configure LLM_MODEL and PROXY_API_URL and API_KEY in `.env` file" -msgstr "在`.env`文件设置LLM_MODEL and PROXY_API_URL and API_KEY" +#: ../../getting_started/install/deploy.rst:78 f970fb69e47c40d7bda381ec6f045829 +msgid "Configure LLM_MODEL, PROXY_API_URL and API_KEY in `.env` file" +msgstr "在 `.env` 文件中设置 LLM_MODEL、PROXY_API_URL 和 API_KEY" -#: ../../getting_started/install/deploy.rst:82 -#: ../../getting_started/install/deploy.rst:282 -#: 6dc89616c0bf46a898a49606d1349039 e30b7458fc0d46e8bcf0ec61a2be2e19 +#: ../../getting_started/install/deploy.rst:88 +#: ../../getting_started/install/deploy.rst:288 +#: 6ca04c88fc60480db2ebdc9b234a0bbb 709cfe74c45c4eff83a7d77bb30b4a2b msgid "Make sure your .env configuration is not overwritten" -msgstr "认.env文件不会被覆盖\"" +msgstr "确保你的 .env 文件不会被覆盖" -#: ../../getting_started/install/deploy.rst:85 7adce7ac878d4b1ab9249d840d302520 +#: ../../getting_started/install/deploy.rst:91 147aea0d753f44588f4a0c56002334ab msgid "Vicuna" msgstr "Vicuna" -#: ../../getting_started/install/deploy.rst:86 44a32c4cd0b140838b49f739d68ff42d +#: ../../getting_started/install/deploy.rst:92 6a0bd60c4ca2478cb0f3d85aff70cd3b msgid "" "`Vicuna-v1.5 `_ based on " "llama-2 has been released, we recommend you set `LLM_MODEL=vicuna-" "13b-v1.5` to try this model)" msgstr "" +"基于 llama-2 的模型 `Vicuna-v1.5 `_ 已经发布,我们推荐你通过配置 `LLM_MODEL=vicuna-13b-v1.5` 来尝试这个模型" -#: ../../getting_started/install/deploy.rst:88 f4991de79c434961b0ee777cf42dc491 +#: ../../getting_started/install/deploy.rst:94 6a111c2ef31f41d4b737cf8b6f36fb16 msgid "vicuna-v1.5 hardware requirements" -msgstr "" +msgstr "vicuna-v1.5 的硬件要求" -#: ../../getting_started/install/deploy.rst:92 -#: ../../getting_started/install/deploy.rst:137 -#: 596405ba21d046c993c18d0bcaf95048 ca993a5f1b194707bbe3d5d6e9d670d8 +#: ../../getting_started/install/deploy.rst:98 +#: ../../getting_started/install/deploy.rst:143 +#: dc24c0238ce141df8bdce26cc0e2ddbb e04f1ea4b36940f3a28b66cdff7b702e msgid "Model" -msgstr "" - -#: ../../getting_started/install/deploy.rst:93 -#: ../../getting_started/install/deploy.rst:138 -#: 8437bb47daa94ca8a43b0eadc1286842 d040d964198d4c95afa6dd0d86586cc2 -msgid "Quantize" -msgstr "" - -#: ../../getting_started/install/deploy.rst:94 -#: ../../getting_started/install/deploy.rst:139 -#: 453bc4416d89421699d90d940498b9ac 5de996bb955f4c6ca8d5651985ced255 -msgid "VRAM Size" -msgstr "" - -#: ../../getting_started/install/deploy.rst:95 -#: ../../getting_started/install/deploy.rst:98 077d52b8588e4ddd956acb7ff86bc458 -#: fcf0ca75422343d19b12502fb7746f08 -msgid "vicuna-7b-v1.5" -msgstr "" - -#: ../../getting_started/install/deploy.rst:96 -#: ../../getting_started/install/deploy.rst:102 -#: ../../getting_started/install/deploy.rst:141 -#: ../../getting_started/install/deploy.rst:147 -#: 799b310b76454f79a261300253a9bb75 bd98df81ec8941b69c7d1de107154b1b -#: ca71731754f545d589e506cb53e6f042 d4023553e4384cc4955ef5bf4dc4d05b -msgid "4-bit" -msgstr "" - -#: ../../getting_started/install/deploy.rst:97 -#: ../../getting_started/install/deploy.rst:142 -#: adfc22d9c6394046bb632831a4a3b066 e4e7762c714a49828da630a5f9d641dc -msgid "8 GB" -msgstr "" +msgstr "模型" #: ../../getting_started/install/deploy.rst:99 -#: ../../getting_started/install/deploy.rst:105 #: ../../getting_started/install/deploy.rst:144 -#: ../../getting_started/install/deploy.rst:150 -#: 0d6bd1c4a6464df38723de978c09e8bc 106e20f7727f43d280b7560dc0f19bbc -#: 5a95691d1d134638bffa478daf2577a0 f8add03506244306b1ac6e10c5a2bc1f -msgid "8-bit" -msgstr "" +#: b6473e65ca1a437a84226531be4da26d e0a2f7580685480aa13ca462418764d3 +msgid "Quantize" +msgstr "量化" #: ../../getting_started/install/deploy.rst:100 -#: ../../getting_started/install/deploy.rst:103 #: ../../getting_started/install/deploy.rst:145 -#: ../../getting_started/install/deploy.rst:148 -#: 03222e6a682e406bb56c21025412fbe5 2eb88af3efb74fb79899d7818a79d177 -#: 9705b9d7d2084f0b8867f6aeec66251b bca6e5151e244a318f4c2078c3f82118 -msgid "12 GB" -msgstr "" +#: 56471c3b174d4adf9e8cb5bebaa300a6 d82297b8b9c148c3906d8ee4ed10d8a0 +msgid "VRAM Size" +msgstr "显存" #: ../../getting_started/install/deploy.rst:101 #: ../../getting_started/install/deploy.rst:104 -#: 1596cbb77c954d1db00e2fa283d66a9a c6e8dc1600d14ee5a47f45f75ec709f4 -msgid "vicuna-13b-v1.5" +#: 1214432602fe47a28479ce3e21a7d88b 51838e72e42248f199653f1bf08c8155 +msgid "vicuna-7b-v1.5" +msgstr "" + +#: ../../getting_started/install/deploy.rst:102 +#: ../../getting_started/install/deploy.rst:108 +#: ../../getting_started/install/deploy.rst:147 +#: ../../getting_started/install/deploy.rst:153 +#: a64439f4e6f64c42bb76fbb819556784 ed95f498641e4a0f976318df608a1d67 +#: fc400814509048b4a1cbe1e07c539285 ff7a8cb2cce8438cb6cb0d80dabfc2b5 +msgid "4-bit" +msgstr "" + +#: ../../getting_started/install/deploy.rst:103 +#: ../../getting_started/install/deploy.rst:148 +#: 2726e8a278c34e6db59147e9f66f2436 5feab5755a41403c9d641da697de4651 +msgid "8 GB" +msgstr "" + +#: ../../getting_started/install/deploy.rst:105 +#: ../../getting_started/install/deploy.rst:111 +#: ../../getting_started/install/deploy.rst:150 +#: ../../getting_started/install/deploy.rst:156 +#: 1984406682da4da3ad7b275e44085d07 2f027d838d0c46409e54c066d7983aae +#: 5c5878fe64944872b6769f075fedca05 e2507408a9c5423988e17b7029b487e4 +msgid "8-bit" msgstr "" #: ../../getting_started/install/deploy.rst:106 +#: ../../getting_started/install/deploy.rst:109 #: ../../getting_started/install/deploy.rst:151 -#: 00ee98593b7f4b29b5d97fa5a323bfa1 0169214dcf8d46c791b3482c81283fa0 +#: ../../getting_started/install/deploy.rst:154 +#: 332f50702c7b46e79ea0af5cbf86c6d5 381d23253cfd40109bacefca6a179f91 +#: aafe2423c25546e789e4804e3fd91d1d cc56990a58e941d6ba023cbd4dca0357 +msgid "12 GB" +msgstr "" + +#: ../../getting_started/install/deploy.rst:107 +#: ../../getting_started/install/deploy.rst:110 +#: 1f14e2fa6d41493cb208f55eddff9773 6457f6307d8546beb5f2fb69c30922d8 +msgid "vicuna-13b-v1.5" +msgstr "" + +#: ../../getting_started/install/deploy.rst:112 +#: ../../getting_started/install/deploy.rst:157 +#: e24d3a36b5ce4cfe861dce2d1c4db592 f2e66b2da7954aaab0ee526b25a371f5 msgid "20 GB" msgstr "" -#: ../../getting_started/install/deploy.rst:122 -#: ../../getting_started/install/deploy.rst:169 -#: ../../getting_started/install/deploy.rst:195 -#: 852f800e380c44d3b1b5bdaa881406b7 89ddd6c4a4874f4f8b7f051d797e364a -#: a2b4436f8fcb44ada3b5bf5c1099e646 +#: ../../getting_started/install/deploy.rst:128 +#: ../../getting_started/install/deploy.rst:175 +#: ../../getting_started/install/deploy.rst:201 +#: 1719c11f92874c47a87c00c634b9fad8 4596fcbe415d42fdbb29b92964fae070 +#: e639ae6076a64b7b9de08527966e4550 msgid "The model files are large and will take a long time to download." -msgstr "" +msgstr "这个模型权重文件比较大,需要花费较长时间来下载。" -#: ../../getting_started/install/deploy.rst:124 -#: ../../getting_started/install/deploy.rst:171 -#: ../../getting_started/install/deploy.rst:197 -#: 18ccf7acdf3045588a19866c0161f44b 3d3d93649ec943c8b2b725807e969cbf -#: 6205975466f54d8fbf8e30b58df6b9c2 +#: ../../getting_started/install/deploy.rst:130 +#: ../../getting_started/install/deploy.rst:177 +#: ../../getting_started/install/deploy.rst:203 +#: 4ec1492d389f403ebd9dd805fcaac68e ac6c68e2bf9b47c694ea8e0506014b10 +#: e39be72282e64760903aaba45f8effb8 msgid "**Configure LLM_MODEL in `.env` file**" -msgstr "" +msgstr "**在 `.env` 文件中配置 LLM_MODEL**" -#: ../../getting_started/install/deploy.rst:131 -#: ../../getting_started/install/deploy.rst:228 -#: 022addaea4ea432584b8a27ea83a9dc4 7a95d25c58d2491e855dff42f6d3a686 +#: ../../getting_started/install/deploy.rst:137 +#: ../../getting_started/install/deploy.rst:234 +#: 7ce4e2253ef24a7ea890ade04ce36682 b9d5bf4fa09649c4a098503132ce7c0c msgid "Baichuan" -msgstr "" +msgstr "百川" -#: ../../getting_started/install/deploy.rst:133 -#: b5374f6d0ca44ee9bd984c0198515861 +#: ../../getting_started/install/deploy.rst:139 +#: ffdad6a70558457fa825bad4d811100d msgid "Baichuan hardware requirements" -msgstr "" - -#: ../../getting_started/install/deploy.rst:140 -#: ../../getting_started/install/deploy.rst:143 -#: 6a5e6034c185455d81e8ba5349dbc191 9d0109817c524747909417924bb0bd31 -msgid "baichuan-7b" -msgstr "" +msgstr "百川 的硬件要求" #: ../../getting_started/install/deploy.rst:146 #: ../../getting_started/install/deploy.rst:149 -#: b3300ed57f0c4ed98816652ea4a5c3c8 d57e98b46fb14f649d6ac8df27edcfd8 +#: 59d9b64f54d34971a68e93e3101def06 a66ce354d8f143ce920303241cd8947e +msgid "baichuan-7b" +msgstr "" + +#: ../../getting_started/install/deploy.rst:152 +#: ../../getting_started/install/deploy.rst:155 +#: c530662259ca4ec5b03a18e4b690e17a fa3af65ecca54daab961f55729bbc40e msgid "baichuan-13b" msgstr "" -#: ../../getting_started/install/deploy.rst:173 -#: ac684ccbea7d4f5380c43ad02a1b7231 +#: ../../getting_started/install/deploy.rst:179 +#: efd73637994a4b7c97ef3557e1f3161c msgid "please rename Baichuan path to \"baichuan2-13b\" or \"baichuan2-7b\"" msgstr "将Baichuan模型目录修改为\"baichuan2-13b\" 或 \"baichuan2-7b\"" -#: ../../getting_started/install/deploy.rst:179 -#: d2e283112ed1440b93510c331922b37c +#: ../../getting_started/install/deploy.rst:185 +#: 435a3f0d0fe84b49a7305e2c0f51a5df msgid "ChatGLM" msgstr "" -#: ../../getting_started/install/deploy.rst:199 -#: ae30d065461e446e86b02d3903108a02 -msgid "please rename chatglm model path to \"chatglm2-6b\"" -msgstr "将chatglm模型目录修改为\"chatglm2-6b\"" - #: ../../getting_started/install/deploy.rst:205 -#: 81a8983e89c4400abc20080c86865c02 +#: 165e23d3d40d4756b5a6a2580d015213 +msgid "please rename chatglm model path to \"chatglm2-6b\"" +msgstr "将 chatglm 模型目录修改为\"chatglm2-6b\"" + +#: ../../getting_started/install/deploy.rst:211 +#: b651ebb5e0424b8992bc8b49d2280bee msgid "Other LLM API" -msgstr "" +msgstr "其它模型 API" -#: ../../getting_started/install/deploy.rst:219 -#: 3df3d5e0e4f64813a7ca2adccd056c4a +#: ../../getting_started/install/deploy.rst:225 +#: 4eabdc25f4a34676b3ece620c88d866f msgid "Now DB-GPT support LLM REST API TYPE:" -msgstr "目前DB-GPT支持的大模型REST API类型:" +msgstr "目前DB-GPT支持的大模型 REST API 类型:" -#: ../../getting_started/install/deploy.rst:224 -#: 4f7cb7a3f324410d8a34ba96d7c9536a +#: ../../getting_started/install/deploy.rst:230 +#: d361963cc3404e5ca55a823f1f1f545c msgid "Azure" msgstr "" -#: ../../getting_started/install/deploy.rst:225 -#: ee6cbe48a8094637b6b8a049446042c6 +#: ../../getting_started/install/deploy.rst:231 +#: 3b0f17c74aaa4bbd9db935973fa1c36b msgid "Aliyun tongyi" msgstr "" -#: ../../getting_started/install/deploy.rst:226 -#: a3d9c061b0f440258be88cc3143b1887 +#: ../../getting_started/install/deploy.rst:232 +#: 7c4c457a499943b8804e31046551006d msgid "Baidu wenxin" msgstr "" -#: ../../getting_started/install/deploy.rst:227 -#: 176001fb6eaf448f9eca1abd256b3ef4 +#: ../../getting_started/install/deploy.rst:233 +#: ac1880a995184295acf07fff987d7c56 msgid "Zhipu" msgstr "" -#: ../../getting_started/install/deploy.rst:229 -#: 17fecc140be2474ea1566d44641a27c0 +#: ../../getting_started/install/deploy.rst:235 +#: 6927500d7d3445b7b1981da1df4e1666 msgid "Bard" msgstr "" -#: ../../getting_started/install/deploy.rst:284 -#: bc34832cec754984a40c4d01526f0f83 +#: ../../getting_started/install/deploy.rst:237 +#: 419d564de18c485780d9336b852735b6 +msgid "Configure LLM_MODEL and PROXY_API_URL and API_KEY in `.env` file" +msgstr "在`.env`文件设置 LLM_MODEL、PROXY_API_URL和 API_KEY" + +#: ../../getting_started/install/deploy.rst:290 +#: 71d5203682e24e2e896e4b9913471f78 msgid "llama.cpp" msgstr "" -#: ../../getting_started/install/deploy.rst:286 -#: aa4b14e726dc447f92dec48f38a3d770 +#: ../../getting_started/install/deploy.rst:292 +#: 36a2b82f711a4c0f9491aca9c84d3c91 msgid "" "DB-GPT already supports `llama.cpp " "`_ via `llama-cpp-python " "`_ ." msgstr "" -"DB-GPT 已经支持了 `llama.cpp `_ via " -"`llama-cpp-python `_ ." +"DB-GPT 已经通过 `llama-cpp-python `_ 支持了 `llama.cpp `_ 。" -#: ../../getting_started/install/deploy.rst:288 -#: e97a868ad05b48ee8d25017457c1b7ee +#: ../../getting_started/install/deploy.rst:294 +#: 439064115dca4ae08d8e60041f2ffe17 msgid "**Preparing Model Files**" -msgstr "**准备Model文件**" +msgstr "**准备模型文件**" -#: ../../getting_started/install/deploy.rst:290 -#: 8efc7824082545a4a98ebbd6b12fa00f +#: ../../getting_started/install/deploy.rst:296 +#: 7291d6fa20b34942926e7765c01f25c9 msgid "" "To use llama.cpp, you need to prepare a gguf format model file, and there" " are two common ways to obtain it, you can choose either:" -msgstr "使用 llama.cpp,你需要准备 gguf 格式的文件,你可以通过以下两种方法获取" +msgstr "为了使用 llama.cpp,你需要准备 gguf 格式的文件,你可以通过以下两种方法获取" -#: ../../getting_started/install/deploy.rst:292 -#: 5b9f3a35779c4f89a4b9051904c5f7d8 +#: ../../getting_started/install/deploy.rst:298 +#: 45752f3f5dd847469da0c5edddc530fa msgid "**1. Download a pre-converted model file.**" msgstr "**1.下载已转换的模型文件.**" -#: ../../getting_started/install/deploy.rst:294 -#: e15f33a2e5fc4caaa9ad2e10cabf9d32 +#: ../../getting_started/install/deploy.rst:300 +#: c451db2157ff49b2b4992aed9907ddfa msgid "" -"Suppose you want to use [Vicuna 13B v1.5](https://huggingface.co/lmsys" -"/vicuna-13b-v1.5), you can download the file already converted from " -"[TheBloke/vicuna-13B-v1.5-GGUF](https://huggingface.co/TheBloke/vicuna-" -"13B-v1.5-GGUF), only one file is needed. Download it to the `models` " +"Suppose you want to use `Vicuna 13B v1.5 `_ , you can download the file already converted from " +"`TheBloke/vicuna-13B-v1.5-GGUF `_ , only one file is needed. Download it to the `models` " "directory and rename it to `ggml-model-q4_0.gguf`." msgstr "" -"假设您想使用[Vicuna 13B v1.5](https://huggingface.co/lmsys/vicuna-" -"13b-v1.5)您可以从[TheBloke/vicuna-" -"13B-v1.5-GGUF](https://huggingface.co/TheBloke/vicuna-" -"13B-v1.5-GGUF)下载已转换的文件,只需要一个文件。将其下载到models目录并将其重命名为 `ggml-" +"假设您想使用 `Vicuna 13B v1.5 `_ 您可以从 `TheBloke/vicuna-" +"13B-v1.5-GGUF `_ 下载已转换的文件,只需要一个文件。将其下载到models目录并将其重命名为 `ggml-" "model-q4_0.gguf`。" -#: ../../getting_started/install/deploy.rst:300 -#: 81ab603cd9b94cabb48add9541590b8b +#: ../../getting_started/install/deploy.rst:306 +#: f5b92b51622b43d398b3dc13a5892c29 msgid "**2. Convert It Yourself**" msgstr "**2. 自行转换**" -#: ../../getting_started/install/deploy.rst:302 -#: f9b46aa70a0842d6acc7e837c8b27778 +#: ../../getting_started/install/deploy.rst:308 +#: 8838ae6dcecf44ecad3fd963980c8eb3 msgid "" "You can convert the model file yourself according to the instructions in " -"[llama.cpp#prepare-data--run](https://github.com/ggerganov/llama.cpp" -"#prepare-data--run), and put the converted file in the models directory " -"and rename it to `ggml-model-q4_0.gguf`." +"`llama.cpp#prepare-data--run `_ , and put the converted file in the models " +"directory and rename it to `ggml-model-q4_0.gguf`." msgstr "" -"您可以根据[llama.cpp#prepare-data--run](https://github.com/ggerganov/llama.cpp" -"#prepare-data--run)中的说明自行转换模型文件,并把转换后的文件放在models目录中,并重命名为`ggml-" +"您可以根据 `llama.cpp#prepare-data--run `_ 中的说明自行转换模型文件,并把转换后的文件放在models目录中,并重命名为`ggml-" "model-q4_0.gguf`。" -#: ../../getting_started/install/deploy.rst:304 -#: 5278c05568dd4ad688bcc181e45caa18 +#: ../../getting_started/install/deploy.rst:310 +#: 3fe28d6e5eaa4bdf9c5c44a914c3577c msgid "**Installing Dependencies**" msgstr "**安装依赖**" -#: ../../getting_started/install/deploy.rst:306 -#: bd67bf651a9a4096a37e73851b5fad98 +#: ../../getting_started/install/deploy.rst:312 +#: bdc10d2e88cc4c3f84a8c4a8dc2037a9 msgid "" "llama.cpp is an optional dependency in DB-GPT, and you can manually " "install it using the following command:" msgstr "llama.cpp在DB-GPT中是可选安装项, 你可以通过以下命令进行安装" -#: ../../getting_started/install/deploy.rst:313 -#: 71ed6ae4d0aa4d3ca27ebfffc42a5095 +#: ../../getting_started/install/deploy.rst:319 +#: 9c136493448b43b5b27f66af74ff721e msgid "**3.Modifying the Configuration File**" msgstr "**3.修改配置文件**" -#: ../../getting_started/install/deploy.rst:315 -#: a0a2fdb799524ef4938df2c306536e2a +#: ../../getting_started/install/deploy.rst:321 +#: c835a7dee1dd409fb861e7b886c6dc5b msgid "Next, you can directly modify your `.env` file to enable llama.cpp." msgstr "修改`.env`文件使用llama.cpp" -#: ../../getting_started/install/deploy.rst:322 -#: ../../getting_started/install/deploy.rst:390 -#: 03694cbb7dfa432cb32116925357f008 +#: ../../getting_started/install/deploy.rst:328 +#: ../../getting_started/install/deploy.rst:396 +#: 296e6d08409544918fee0c31b1bf195c a81e5d882faf4722b0e10d53f635f53c msgid "" "Then you can run it according to `Run `_" msgstr "" -"然后你可以根据[运行](https://db-gpt.readthedocs.io/projects/db-gpt-docs-zh-" -"cn/zh_CN/latest/getting_started/install/deploy/deploy.html#run)来运行" +"然后你可以根据 `运行 `_ 来运行。" -#: ../../getting_started/install/deploy.rst:325 -#: c9bbc15ea2fc4562bdf87a4e099600a0 +#: ../../getting_started/install/deploy.rst:331 +#: 0f7f487ee11a4e01a95f7c504f0469ba msgid "**More Configurations**" msgstr "**更多配置文件**" -#: ../../getting_started/install/deploy.rst:327 -#: 9beff887d8554d1dac169de93625c1b2 +#: ../../getting_started/install/deploy.rst:333 +#: b0f9964497f64fb5b3740099232cd72b msgid "" "In DB-GPT, the model configuration can be done through `{model " "name}_{config key}`." msgstr "在DB-GPT中,模型配置可以通过`{模型名称}_{配置名}` 来配置。" -#: ../../getting_started/install/deploy.rst:329 -#: 6cad64bcc9f141de85479203b21f7f9e +#: ../../getting_started/install/deploy.rst:335 +#: 7c225de4fe9d4dd3a3c2b2a33802e656 msgid "More Configurations" msgstr "**更多配置文件**" -#: ../../getting_started/install/deploy.rst:333 -#: de97b0d5b59c4bfabbd47b7e1766cc70 +#: ../../getting_started/install/deploy.rst:339 +#: 5cc1671910314796a9ce0b5107d3c9fe msgid "Environment Variable Key" msgstr "环境变量Key" -#: ../../getting_started/install/deploy.rst:334 -#: 76afa3655f644576b54b4422bad3fcad +#: ../../getting_started/install/deploy.rst:340 +#: 4359ed4e11bb47ad89a605cbf9016cd5 msgid "Default" msgstr "默认值" -#: ../../getting_started/install/deploy.rst:335 -#: 0dcabfeeb4874e08ba53d4b4aa6a1d73 +#: ../../getting_started/install/deploy.rst:341 +#: 5cf0efc6d1014665bb9dbdae96bf2726 msgid "Description" msgstr "描述" -#: ../../getting_started/install/deploy.rst:336 -#: c1670aec27cf4c068d825456817d7667 +#: ../../getting_started/install/deploy.rst:342 +#: e7c291f80a9a40fa90d642901eca02c6 msgid "llama_cpp_prompt_template" msgstr "" -#: ../../getting_started/install/deploy.rst:337 -#: ../../getting_started/install/deploy.rst:340 +#: ../../getting_started/install/deploy.rst:343 #: ../../getting_started/install/deploy.rst:346 #: ../../getting_started/install/deploy.rst:352 #: ../../getting_started/install/deploy.rst:358 -#: 49c8f875066f49c6aac195805f88e4a9 +#: ../../getting_started/install/deploy.rst:364 +#: 07dc7fc4e51e4d9faf8e5221bcf03ee0 549f3c57a2e9427880e457e653ce1182 +#: 7ad961957f7b49d08e4aff347749b78d c1eab368175c4fa88fe0b471919523b2 +#: e2e0bf9903484972b6d20e6837010029 msgid "None" msgstr "" -#: ../../getting_started/install/deploy.rst:338 -#: bb31e19533e5434b832d5975bf579dc3 +#: ../../getting_started/install/deploy.rst:344 +#: 6b5044a2009f432c92fcd65db42506d8 msgid "" "Prompt template name, now support: zero_shot, vicuna_v1.1,alpaca,llama-2" ",baichuan-chat,internlm-chat, If None, the prompt template is " @@ -438,183 +449,183 @@ msgstr "" "Prompt template 现在可以支持`zero_shot, vicuna_v1.1,alpaca,llama-2,baichuan-" "chat,internlm-chat`, 如果是None, 可以根据模型路径来自动获取模型 Prompt template" -#: ../../getting_started/install/deploy.rst:339 -#: 38ed23e19e2a47cf8fbacb035cfe1292 +#: ../../getting_started/install/deploy.rst:345 +#: e01c860441ad43b88c0a8d012f97d2d8 msgid "llama_cpp_model_path" msgstr "" -#: ../../getting_started/install/deploy.rst:341 -#: eb069fdfdfeb4b17b723ee6733ba50c2 +#: ../../getting_started/install/deploy.rst:347 +#: 1cb68d772e454812a1a0c6de4950b8ce msgid "Model path" msgstr "模型路径" -#: ../../getting_started/install/deploy.rst:342 -#: 348d3ff3e4f44ceb99aadead11f5cca5 +#: ../../getting_started/install/deploy.rst:348 +#: 6dac03820edb4fbd8a0856405e84c5bc msgid "llama_cpp_n_gpu_layers" msgstr "" -#: ../../getting_started/install/deploy.rst:343 -#: 007deecf593b4553b6ca8df3e9240a28 +#: ../../getting_started/install/deploy.rst:349 +#: 8cd5607b7941427f9a342ca7a00e5778 msgid "1000000000" msgstr "" -#: ../../getting_started/install/deploy.rst:344 -#: 4da435c417f0482581895bf4d052a6a1 +#: ../../getting_started/install/deploy.rst:350 +#: 61c9297656da434aa7ac2b49cf61ea9d msgid "" "Number of layers to offload to the GPU, Set this to 1000000000 to offload" " all layers to the GPU. If your GPU VRAM is not enough, you can set a low" " number, eg: 10" msgstr "要将多少网络层转移到GPU上,将其设置为1000000000以将所有层转移到GPU上。如果您的 GPU 内存不足,可以设置较低的数字,例如:10。" -#: ../../getting_started/install/deploy.rst:345 -#: 4f186505ea81467590cf817d116e6879 +#: ../../getting_started/install/deploy.rst:351 +#: 8c2d2182557a483aa2fda590c24faaf3 msgid "llama_cpp_n_threads" msgstr "" -#: ../../getting_started/install/deploy.rst:347 -#: b94dc2fb2cd14a4a8f38ba95b05fbb5b +#: ../../getting_started/install/deploy.rst:353 +#: cc442f61ffc442ecbd98c1e7f5598e1a msgid "" "Number of threads to use. If None, the number of threads is automatically" " determined" msgstr "要使用的线程数量。如果为None,则线程数量将自动确定。" -#: ../../getting_started/install/deploy.rst:348 -#: 867f2357430440eba4e749a8a39bff18 +#: ../../getting_started/install/deploy.rst:354 +#: 8d5e917d86f048348106e6923638a0c2 msgid "llama_cpp_n_batch" msgstr "" -#: ../../getting_started/install/deploy.rst:349 -#: 78516f7b23264147bae11e13426097eb +#: ../../getting_started/install/deploy.rst:355 +#: ee2719a0a8cd4a77846cffd8e675638f msgid "512" msgstr "" -#: ../../getting_started/install/deploy.rst:350 -#: c2ec97ffef3e4a70afef6634e78801a2 +#: ../../getting_started/install/deploy.rst:356 +#: 845b354315384762a611ad2daa539d57 msgid "Maximum number of prompt tokens to batch together when calling llama_eval" msgstr "在调用llama_eval时,批处理在一起的prompt tokens的最大数量" -#: ../../getting_started/install/deploy.rst:351 -#: 29ad3c5ab7134da49d6fca3b42d734d6 +#: ../../getting_started/install/deploy.rst:357 +#: a95e788bfa5f46f3bcd6356dfd9f87eb msgid "llama_cpp_n_gqa" msgstr "" -#: ../../getting_started/install/deploy.rst:353 -#: e5bc322b0c824465b1adbd162838a3b7 +#: ../../getting_started/install/deploy.rst:359 +#: 23ad9b5f34b5440bb90b2b21bab25763 msgid "Grouped-query attention. Must be 8 for llama-2 70b." msgstr "对于 llama-2 70B 模型,Grouped-query attention 必须为8。" -#: ../../getting_started/install/deploy.rst:354 -#: 43e7d412238a4f0ba8227938d9fa4172 +#: ../../getting_started/install/deploy.rst:360 +#: 9ce25b7966fc40ec8be47ecfaf5f9994 msgid "llama_cpp_rms_norm_eps" msgstr "" -#: ../../getting_started/install/deploy.rst:355 -#: 6328db04645b4b089593291e2ca13f79 +#: ../../getting_started/install/deploy.rst:361 +#: 58365f0d36af447ba976213646018431 msgid "5e-06" msgstr "" -#: ../../getting_started/install/deploy.rst:356 -#: 6c8bf631cece42fa86954f5cf2d75503 +#: ../../getting_started/install/deploy.rst:362 +#: d00b742a759140b795ba5949f1ce9a36 msgid "5e-6 is a good value for llama-2 models." msgstr "对于llama-2模型来说,5e-6是一个不错的值。" -#: ../../getting_started/install/deploy.rst:357 -#: 22ebee39fe8b4cc18378447cac67e631 +#: ../../getting_started/install/deploy.rst:363 +#: b9972e9b19354f55a5e6d9c50513a620 msgid "llama_cpp_cache_capacity" msgstr "" -#: ../../getting_started/install/deploy.rst:359 -#: 4dc98bede90f4237829f65513e4adf61 +#: ../../getting_started/install/deploy.rst:365 +#: 3c98c5396dd74db8b6d70fc50fa0754f msgid "Maximum cache capacity. Examples: 2000MiB, 2GiB" msgstr "模型缓存最大值. 例如: 2000MiB, 2GiB" -#: ../../getting_started/install/deploy.rst:360 -#: bead617b5af943dab0bc1209823d3c22 +#: ../../getting_started/install/deploy.rst:366 +#: 4277e155992c4442b69d665d6269bed6 msgid "llama_cpp_prefer_cpu" msgstr "" -#: ../../getting_started/install/deploy.rst:361 -#: 8efb44f1cfc54b1a8b3ca8ea138113ee +#: ../../getting_started/install/deploy.rst:367 +#: 049169c1210a4ecabb25702ed813ea0a msgid "False" msgstr "" -#: ../../getting_started/install/deploy.rst:362 -#: b854e6a44c8348ceb72ac382b73ceec5 +#: ../../getting_started/install/deploy.rst:368 +#: 60a39e93e7874491a93893de78b7d37e msgid "" "If a GPU is available, it will be preferred by default, unless " "prefer_cpu=False is configured." msgstr "如果有可用的GPU,默认情况下会优先使用GPU,除非配置了 prefer_cpu=False。" -#: ../../getting_started/install/deploy.rst:365 -#: 41b0440eb0074e5fa8c87dd404efe0ce +#: ../../getting_started/install/deploy.rst:371 +#: 7c86780fbf634de8873afd439389cf89 msgid "vllm" msgstr "" -#: ../../getting_started/install/deploy.rst:367 -#: 6fc445675bf74b2ea2fe0c7f2a64db69 +#: ../../getting_started/install/deploy.rst:373 +#: e2827892e43d420c85b8b83c4855d197 msgid "vLLM is a fast and easy-to-use library for LLM inference and serving." -msgstr "\"vLLM 是一个快速且易于使用的 LLM 推理和服务的库。" +msgstr "vLLM 是一个快速且易于使用的 LLM 推理和服务的库。" -#: ../../getting_started/install/deploy.rst:369 -#: aab5f913d29445c4b345180b8793ebc7 +#: ../../getting_started/install/deploy.rst:375 +#: 81bbfa3876a74244acc82d295803fdd4 msgid "**Running vLLM**" msgstr "**运行vLLM**" -#: ../../getting_started/install/deploy.rst:371 -#: a9560d697821404cb0abdabf9f479645 +#: ../../getting_started/install/deploy.rst:377 +#: 75bc518b444c417ba4d9c15246549327 msgid "**1.Installing Dependencies**" msgstr "**1.安装依赖**" -#: ../../getting_started/install/deploy.rst:373 -#: b008224d4f314e5c988335262c95a42e +#: ../../getting_started/install/deploy.rst:379 +#: 725c620b0a5045c1a64a3b2a2e9b48f3 msgid "" "vLLM is an optional dependency in DB-GPT, and you can manually install it" " using the following command:" msgstr "vLLM 在 DB-GPT 是一个可选依赖, 你可以使用下面的命令手动安装它:" -#: ../../getting_started/install/deploy.rst:379 -#: 7dc0c8e996124177935a4e0d9ef19837 +#: ../../getting_started/install/deploy.rst:385 +#: 6f4b540107764f3592cc07cf170e4911 msgid "**2.Modifying the Configuration File**" msgstr "**2.修改配置文件**" -#: ../../getting_started/install/deploy.rst:381 -#: 49667576b44c46bf87d5bf4d207dd63a +#: ../../getting_started/install/deploy.rst:387 +#: b8576a1572674c4890e09b73e02cf0e8 msgid "Next, you can directly modify your .env file to enable vllm." msgstr "你可以直接修改你的 `.env` 文件" -#: ../../getting_started/install/deploy.rst:388 -#: f16a78f0ee6545babab0d66f12654c0a +#: ../../getting_started/install/deploy.rst:394 +#: b006745f3aee4651aaa0cf79081b5d7f msgid "" "You can view the models supported by vLLM `here " "`_" msgstr "" -"你可以在 " -"[这里](https://vllm.readthedocs.io/en/latest/models/supported_models.html" -"#supported-models) 查看 vLLM 支持的模型。" +"你可以在 `这里 " +"`_ 查看 vLLM 支持的模型。" -#: ../../getting_started/install/deploy.rst:397 -#: f7ae366723e9494d8177eeb963ba0ed9 +#: ../../getting_started/install/deploy.rst:403 +#: bc8057ee75e14737bf8fca3ceb555dac msgid "3.Prepare sql example(Optional)" msgstr "3.准备 sql example(可选)" -#: ../../getting_started/install/deploy.rst:398 -#: d302aac8ddb346598fc9d73e0f6c2cbc +#: ../../getting_started/install/deploy.rst:404 +#: 9b0b9112237c4b3aaa1dd5d704ea32e6 msgid "**(Optional) load examples into SQLite**" -msgstr "**(可选) load examples into SQLite**" +msgstr "**(可选) 加载样例数据到 SQLite 数据库中**" -#: ../../getting_started/install/deploy.rst:405 -#: ae2a315e40854f27a074f2c0f2506014 +#: ../../getting_started/install/deploy.rst:411 +#: 0815e13b96264ffcba1526c82ba2e7c8 msgid "On windows platform:" -msgstr "" +msgstr "在 Windows 平台:" -#: ../../getting_started/install/deploy.rst:412 -#: 36b4b7a813844af3a4ec8062b39059a3 +#: ../../getting_started/install/deploy.rst:418 +#: 577a4167ecac4fa88586961f225f0487 msgid "4.Run db-gpt server" msgstr "4.运行db-gpt server" -#: ../../getting_started/install/deploy.rst:418 -#: e56f15e563484027b7efb2147c9b71a7 +#: ../../getting_started/install/deploy.rst:424 +#: a9f96b064b674f80824257b4b0a18e2a msgid "**Open http://localhost:5000 with your browser to see the product.**" msgstr "打开浏览器访问http://localhost:5000" @@ -633,3 +644,9 @@ msgstr "打开浏览器访问http://localhost:5000" #~ " LLM model." #~ msgstr "" +#~ msgid "百川" +#~ msgstr "" + +#~ msgid "百川 硬件要求" +#~ msgstr "" + diff --git a/docs/locales/zh_CN/LC_MESSAGES/index.po b/docs/locales/zh_CN/LC_MESSAGES/index.po index d210535e7..1badfc0e4 100644 --- a/docs/locales/zh_CN/LC_MESSAGES/index.po +++ b/docs/locales/zh_CN/LC_MESSAGES/index.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: DB-GPT 0.3.0\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2023-10-25 23:56+0800\n" +"POT-Creation-Date: 2023-11-06 19:00+0800\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language: zh_CN\n" @@ -19,31 +19,27 @@ msgstr "" "Content-Transfer-Encoding: 8bit\n" "Generated-By: Babel 2.12.1\n" -#: ../../index.rst:34 ../../index.rst:45 71dd3acc56354242aad5a920c2805328 +#: ../../index.rst:34 ../../index.rst:45 8bc3a47457a34995816985436034e233 msgid "Getting Started" msgstr "开始" -#: ../../index.rst:60 ../../index.rst:81 0f2fc16a44b043019556f5f3e0d0e2c0 +#: ../../index.rst:60 ../../index.rst:81 1a4e8a5dc7754967a0af9fb3d2e53017 msgid "Modules" msgstr "模块" -#: ../../index.rst:95 ../../index.rst:111 2624521b920a4b3b9eac3fec76635ab8 -msgid "Use Cases" -msgstr "示例" - -#: ../../index.rst:125 ../../index.rst:128 accec2bb9c5149f184a87e03955d6b22 +#: ../../index.rst:96 ../../index.rst:99 c815772ae8514f0c9b26911b0dd73f54 msgid "Reference" msgstr "参考" -#: ../../index.rst:138 ../../index.rst:144 26278dabd4944d1a9f14330e83935162 +#: ../../index.rst:109 ../../index.rst:115 dabe4c3409df489f84e4ec588f2b34a5 msgid "Resources" msgstr "资源" -#: ../../index.rst:7 9277d505dda74ae0862cd09d05cf5e63 +#: ../../index.rst:7 7626b01b253546ac83ca0cf130dfa091 msgid "Welcome to DB-GPT!" msgstr "欢迎来到DB-GPT中文文档" -#: ../../index.rst:8 9fa76a01965746978a00ac411fca13a8 +#: ../../index.rst:8 6037e5e0d7f7428ba92315a91ccfd53f msgid "" "As large models are released and iterated upon, they are becoming " "increasingly intelligent. However, in the process of using large models, " @@ -61,7 +57,7 @@ msgstr "" ",我们启动了DB-" "GPT项目,为所有基于数据库的场景构建一个完整的私有大模型解决方案。该方案“”支持本地部署,既可应用于“独立私有环境”,又可根据业务模块进行“独立部署”和“隔离”,确保“大模型”的能力绝对私有、安全、可控。" -#: ../../index.rst:10 b12b6f91c5664f61aa9e4d7cd500b922 +#: ../../index.rst:10 ab2a181d517047e6992171786c83f8e3 msgid "" "**DB-GPT** is an experimental open-source project that uses localized GPT" " large models to interact with your data and environment. With this " @@ -71,39 +67,39 @@ msgstr "" "DB-GPT 是一个开源的以数据库为基础的GPT实验项目,使用本地化的GPT大模型与您的数据和环境进行交互,无数据泄露风险100% 私密,100%" " 安全。" -#: ../../index.rst:12 7032e17191394f7090141927644fb512 +#: ../../index.rst:12 9cfb7515430d49af8a1ca47f60264a58 msgid "**Features**" msgstr "特性" -#: ../../index.rst:13 5a7a8e5eace34d5f9a4f779bf5122928 +#: ../../index.rst:13 2a1f84e455c84d9ca66c65f92e5b0d78 msgid "" "Currently, we have released multiple key features, which are listed below" " to demonstrate our current capabilities:" msgstr "目前我们已经发布了多种关键的特性,这里一一列举展示一下当前发布的能力。" -#: ../../index.rst:15 5d0f67aacb8b4bc893a306ccbd6a3778 +#: ../../index.rst:15 43de30ce92da4c3cbe43ae4e4c9f1869 msgid "SQL language capabilities - SQL generation - SQL diagnosis" msgstr "SQL语言能力 - SQL生成 - SQL诊断" -#: ../../index.rst:19 556eaf756fec431ca5c453208292ab4f +#: ../../index.rst:19 edfeef5284e7426a9e551e782bc5702c msgid "" "Private domain Q&A and data processing - Database knowledge Q&A - Data " "processing" msgstr "私有领域问答与数据处理 - 数据库知识问答 - 数据处理" -#: ../../index.rst:23 5148ad898ec041858eddbeaa646d3f1b +#: ../../index.rst:23 7a42f17049b943f88dd8f17baa440144 msgid "" "Plugins - Support custom plugin execution tasks and natively support the " "Auto-GPT plugin, such as:" msgstr "插件模型 - 支持自定义插件执行任务,并原生支持Auto-GPT插件,例如:* SQL自动执行,获取查询结果 * 自动爬取学习知识" -#: ../../index.rst:26 34c7ff33bc1c401480603a5197ecb1c4 +#: ../../index.rst:26 8b48d7b60bbc439da50a624c4048e6f6 msgid "" "Unified vector storage/indexing of knowledge base - Support for " "unstructured data such as PDF, Markdown, CSV, and WebURL" msgstr "知识库统一向量存储/索引 - 非结构化数据支持包括PDF、MarkDown、CSV、WebURL" -#: ../../index.rst:29 9d7095e5b08249e6bb5c724929537e6c +#: ../../index.rst:29 97df482893924bd18e9a101922e7c374 #, fuzzy msgid "" "Multi LLMs Support - Supports multiple large language models, currently " @@ -111,63 +107,63 @@ msgid "" "codet5p" msgstr "多模型支持 - 支持多种大语言模型, 当前已支持Vicuna(7b,13b), ChatGLM-6b(int4, int8)" -#: ../../index.rst:35 caa368eab40e4efb953865740a3c9018 +#: ../../index.rst:35 1ef26ead30ed4b7fb966c8a17307cdc5 msgid "" "How to get started using DB-GPT to interact with your data and " "environment." msgstr "开始使用DB-GPT与您的数据环境进行交互。" -#: ../../index.rst:36 34cecad11f8b4a3e96bfa0a31814e3d2 +#: ../../index.rst:36 3b44ab3576944bf6aa221f35bc051f4e #, fuzzy msgid "`Quickstart Guide <./getting_started/getting_started.html>`_" msgstr "`使用指南 <./getting_started/getting_started.html>`_" -#: ../../index.rst:38 892598cdc16d45c68383033b08b7233f +#: ../../index.rst:38 430cb239cdce42a0b62db46aba3f3bdb msgid "Concepts and terminology" msgstr "相关概念" -#: ../../index.rst:40 887cc43a3a134aba96eb7ca11e5ca86f +#: ../../index.rst:40 ded4d9f80066498e90ba6214520013f7 #, fuzzy msgid "`Concepts and Terminology <./getting_started/concepts.html>`_" msgstr "`相关概念 <./getting_started/concepts.html>`_" -#: ../../index.rst:42 133e25c7dce046b1ab262489ecb60b4a +#: ../../index.rst:42 cd662e53621e474d901146813c750044 msgid "Coming soon..." msgstr "" -#: ../../index.rst:44 a9e0812d32714a6f81ed75aa70f0c20e +#: ../../index.rst:44 15edba57f1de44af8aff76735a2593de msgid "`Tutorials <.getting_started/tutorials.html>`_" msgstr "`教程 <.getting_started/tutorials.html>`_" -#: ../../index.rst:62 4fccfd3082174f58926a9811f39e4d96 +#: ../../index.rst:62 779454b29d8e4e6eb21497025922d1b8 msgid "" "These modules are the core abstractions with which we can interact with " "data and environment smoothly." msgstr "这些模块是我们可以与数据和环境顺利地进行交互的核心组成。" -#: ../../index.rst:63 ecac40207ada454e9a68356f575dbca9 +#: ../../index.rst:63 bcd0e8c88c7b4807a91dd442416bec19 msgid "" "It's very important for DB-GPT, DB-GPT also provide standard, extendable " "interfaces." msgstr "DB-GPT还提供了标准的、可扩展的接口。" -#: ../../index.rst:65 9d852bed582449e89dc13312ddf29eed +#: ../../index.rst:65 1e785dc6925045e8ba106cf4a3b17cac msgid "" "The docs for each module contain quickstart examples, how to guides, " "reference docs, and conceptual guides." msgstr "每个模块的文档都包含快速入门的例子、操作指南、参考文档和相关概念等内容。" -#: ../../index.rst:67 3167446539de449aba2de694fe901bcf +#: ../../index.rst:67 9c9fddd14bfd40339889f5d1f0b04163 msgid "The modules are as follows" msgstr "组成模块如下:" -#: ../../index.rst:69 442683a5f154429da87f452e49bcbb5c +#: ../../index.rst:69 4a19083cadd04b8e8b649a622e0ceccd msgid "" "`LLMs <./modules/llms.html>`_: Supported multi models management and " "integrations." msgstr "`LLMs <./modules/llms.html>`_:基于FastChat提供大模型的运行环境。支持多模型管理和集成。 " -#: ../../index.rst:71 3cf320b6199e4ce78235bce8b1be60a2 +#: ../../index.rst:71 436a139225574aa5b066a1835d38238d msgid "" "`Prompts <./modules/prompts.html>`_: Prompt management, optimization, and" " serialization for multi database." @@ -175,86 +171,41 @@ msgstr "" "`Prompt自动生成与优化 <./modules/prompts.html>`_: 自动化生成高质量的Prompt " ",并进行优化,提高系统的响应效率" -#: ../../index.rst:73 4f29ed67ea2a4a3ca824ac8b8b33cae6 +#: ../../index.rst:73 6c53edfb2e494c5fba6efb5ade48c310 msgid "`Plugins <./modules/plugins.html>`_: Plugins management, scheduler." msgstr "`Agent与插件: <./modules/plugins.html>`_:提供Agent和插件机制,使得用户可以自定义并增强系统的行为。" -#: ../../index.rst:75 d651f9d93bb54b898ef97407501cc6cf +#: ../../index.rst:75 6328760e8faf4e8296f3e1edd486316c #, fuzzy msgid "" "`Knowledge <./modules/knowledge.html>`_: Knowledge management, embedding," " and search." msgstr "`知识库能力: <./modules/knowledge.html>`_: 支持私域知识库问答能力, " -#: ../../index.rst:77 fd6dd2adcd844baa84602b650d89e507 +#: ../../index.rst:77 da272ccf56e3498d92009ac7101b0c45 msgid "" "`Connections <./modules/connections.html>`_: Supported multi databases " "connection. management connections and interact with this." msgstr "`连接模块 <./modules/connections.html>`_: 用于连接不同的模块和数据源,实现数据的流转和交互 " -#: ../../index.rst:79 7e388a9d8c044169923508ccdeb2d9a5 +#: ../../index.rst:79 1a0551f62d9d418a9dec267fbcb49af0 #, fuzzy msgid "`Vector <./modules/vector.html>`_: Supported multi vector database." msgstr "`LLMs <./modules/llms.html>`_:基于FastChat提供大模型的运行环境。支持多模型管理和集成。 " -#: ../../index.rst:97 9d37bf061a784d5ca92d1de33b0834f3 -msgid "Best Practices and built-in implementations for common DB-GPT use cases:" -msgstr "DB-GPT用例的最佳实践和内置方法:" - -#: ../../index.rst:99 ba264bbe31d24c7887a30cbd5442e157 -msgid "" -"`Sql generation and diagnosis " -"<./use_cases/sql_generation_and_diagnosis.html>`_: SQL generation and " -"diagnosis." -msgstr "`Sql生成和诊断 <./use_cases/sql_generation_and_diagnosis.html>`_: Sql生成和诊断。" - -#: ../../index.rst:101 1166f6aeba064a3990d4b0caa87db274 -msgid "" -"`knownledge Based QA <./use_cases/knownledge_based_qa.html>`_: A " -"important scene for user to chat with database documents, codes, bugs and" -" schemas." -msgstr "`知识库问答 <./use_cases/knownledge_based_qa.html>`_: 用户与数据库文档、代码和bug聊天的重要场景\"" - -#: ../../index.rst:103 b32610ada3a0440e9b029b8dffe7c79e -msgid "" -"`Chatbots <./use_cases/chatbots.html>`_: Language model love to chat, use" -" multi models to chat." -msgstr "`聊天机器人 <./use_cases/chatbots.html>`_: 使用多模型进行对话" - -#: ../../index.rst:105 d9348b8112df4839ab14a74a42b63715 -msgid "" -"`Querying Database Data <./use_cases/query_database_data.html>`_: Query " -"and Analysis data from databases and give charts." -msgstr "`查询数据库数据 <./use_cases/query_database_data.html>`_:从数据库中查询和分析数据并给出图表。" - -#: ../../index.rst:107 ef978dc1f4254e5eb4ca487c31c03f7c -msgid "" -"`Interacting with apis <./use_cases/interacting_with_api.html>`_: " -"Interact with apis, such as create a table, deploy a database cluster, " -"create a database and so on." -msgstr "" -"`API交互 <./use_cases/interacting_with_api.html>`_: " -"与API交互,例如创建表、部署数据库集群、创建数据库等。" - -#: ../../index.rst:109 49a549d7d38f493ba48e162785b4ac5d -msgid "" -"`Tool use with plugins <./use_cases/tool_use_with_plugin>`_: According to" -" Plugin use tools to manage databases autonomoly." -msgstr "`插件工具 <./use_cases/tool_use_with_plugin>`_: 根据插件使用工具自主管理数据库。" - -#: ../../index.rst:126 db86500484fa4f14918b0ad4e5a7326d +#: ../../index.rst:97 9aceee0dbe1e4f7da499ac6aab23aea2 msgid "" "Full documentation on all methods, classes, installation methods, and " "integration setups for DB-GPT." msgstr "关于DB-GPT的所有方法、类、安装方法和集成设置的完整文档。" -#: ../../index.rst:140 a44abbb370a841658801bb2729fa62c9 +#: ../../index.rst:111 c9a729f4e1964894bae215793647ab75 msgid "" "Additional resources we think may be useful as you develop your " "application!" msgstr "“我们认为在您开发应用程序时可能有用的其他资源!”" -#: ../../index.rst:142 4d0da8471db240dba842949b6796be7a +#: ../../index.rst:113 06e6e4b7776c405fa94ae7b59253162d msgid "" "`Discord `_: if your have some problem or " "ideas, you can talk from discord." @@ -272,3 +223,58 @@ msgstr "`Discord `_:如果您有任何问题,可 #~ msgid "Guides for how other companies/products can be used with DB-GPT" #~ msgstr "其他公司/产品如何与DB-GPT一起使用的方法指南" +#~ msgid "Use Cases" +#~ msgstr "示例" + +#~ msgid "" +#~ "Best Practices and built-in " +#~ "implementations for common DB-GPT use" +#~ " cases:" +#~ msgstr "DB-GPT用例的最佳实践和内置方法:" + +#~ msgid "" +#~ "`Sql generation and diagnosis " +#~ "<./use_cases/sql_generation_and_diagnosis.html>`_: SQL " +#~ "generation and diagnosis." +#~ msgstr "`Sql生成和诊断 <./use_cases/sql_generation_and_diagnosis.html>`_: Sql生成和诊断。" + +#~ msgid "" +#~ "`knownledge Based QA " +#~ "<./use_cases/knownledge_based_qa.html>`_: A important " +#~ "scene for user to chat with " +#~ "database documents, codes, bugs and " +#~ "schemas." +#~ msgstr "" +#~ "`知识库问答 <./use_cases/knownledge_based_qa.html>`_: " +#~ "用户与数据库文档、代码和bug聊天的重要场景\"" + +#~ msgid "" +#~ "`Chatbots <./use_cases/chatbots.html>`_: Language " +#~ "model love to chat, use multi " +#~ "models to chat." +#~ msgstr "`聊天机器人 <./use_cases/chatbots.html>`_: 使用多模型进行对话" + +#~ msgid "" +#~ "`Querying Database Data " +#~ "<./use_cases/query_database_data.html>`_: Query and " +#~ "Analysis data from databases and give" +#~ " charts." +#~ msgstr "`查询数据库数据 <./use_cases/query_database_data.html>`_:从数据库中查询和分析数据并给出图表。" + +#~ msgid "" +#~ "`Interacting with apis " +#~ "<./use_cases/interacting_with_api.html>`_: Interact with" +#~ " apis, such as create a table, " +#~ "deploy a database cluster, create a " +#~ "database and so on." +#~ msgstr "" +#~ "`API交互 <./use_cases/interacting_with_api.html>`_: " +#~ "与API交互,例如创建表、部署数据库集群、创建数据库等。" + +#~ msgid "" +#~ "`Tool use with plugins " +#~ "<./use_cases/tool_use_with_plugin>`_: According to " +#~ "Plugin use tools to manage databases " +#~ "autonomoly." +#~ msgstr "`插件工具 <./use_cases/tool_use_with_plugin>`_: 根据插件使用工具自主管理数据库。" + diff --git a/pilot/base_modules/agent/plugins_util.py b/pilot/base_modules/agent/plugins_util.py index cf8fb8df3..b5f12c5eb 100644 --- a/pilot/base_modules/agent/plugins_util.py +++ b/pilot/base_modules/agent/plugins_util.py @@ -111,7 +111,7 @@ def load_native_plugins(cfg: Config): print("save file") cfg.set_plugins(scan_plugins(cfg.debug_mode)) else: - print("get file faild,response code:", response.status_code) + print("get file failed,response code:", response.status_code) except Exception as e: print("load plugin from git exception!" + str(e)) diff --git a/pilot/connections/rdbms/conn_clickhouse.py b/pilot/connections/rdbms/conn_clickhouse.py index 0433b4b76..b2762556a 100644 --- a/pilot/connections/rdbms/conn_clickhouse.py +++ b/pilot/connections/rdbms/conn_clickhouse.py @@ -106,3 +106,13 @@ class ClickhouseConnect(RDBMSDatabase): return [ (table_comment[0], table_comment[1]) for table_comment in table_comments ] + + def table_simple_info(self): + # group_concat() not supported in clickhouse, use arrayStringConcat+groupArray instead; and quotes need to be escaped + _sql = f""" + select concat(TABLE_NAME, \'(\' , arrayStringConcat(groupArray(column_name),\'-\'), \')\') as schema_info + from information_schema.COLUMNS where table_schema=\'{self.get_current_db_name()}\' group by TABLE_NAME; """ + + cursor = self.session.execute(text(_sql)) + results = cursor.fetchall() + return results diff --git a/pilot/model/cluster/worker/manager.py b/pilot/model/cluster/worker/manager.py index 2dcfb086e..d67519f59 100644 --- a/pilot/model/cluster/worker/manager.py +++ b/pilot/model/cluster/worker/manager.py @@ -1021,6 +1021,7 @@ def run_worker_manager( system_app, os.path.join(LOGDIR, worker_params.tracer_file), root_operation_name="DB-GPT-WorkerManager-Entry", + tracer_storage_cls=worker_params.tracer_storage_cls, ) _start_local_worker(worker_manager, worker_params) diff --git a/pilot/model/parameter.py b/pilot/model/parameter.py index e21de1c42..79558e02c 100644 --- a/pilot/model/parameter.py +++ b/pilot/model/parameter.py @@ -88,6 +88,12 @@ class ModelControllerParameters(BaseParameters): "help": "The filename to store tracer span records", }, ) + tracer_storage_cls: Optional[str] = field( + default=None, + metadata={ + "help": "The storage class to storage tracer span records", + }, + ) @dataclass @@ -138,6 +144,12 @@ class ModelAPIServerParameters(BaseParameters): "help": "The filename to store tracer span records", }, ) + tracer_storage_cls: Optional[str] = field( + default=None, + metadata={ + "help": "The storage class to storage tracer span records", + }, + ) @dataclass @@ -226,6 +238,12 @@ class ModelWorkerParameters(BaseModelParameters): "help": "The filename to store tracer span records", }, ) + tracer_storage_cls: Optional[str] = field( + default=None, + metadata={ + "help": "The storage class to storage tracer span records", + }, + ) @dataclass diff --git a/pilot/model/proxy/llms/tongyi.py b/pilot/model/proxy/llms/tongyi.py index fb826e49c..e101db47e 100644 --- a/pilot/model/proxy/llms/tongyi.py +++ b/pilot/model/proxy/llms/tongyi.py @@ -36,7 +36,10 @@ def tongyi_generate_stream( if message.role == ModelMessageRoleType.HUMAN: history.append({"role": "user", "content": message.content}) for message in messages: - if message.role == ModelMessageRoleType.SYSTEM: + if ( + message.role == ModelMessageRoleType.SYSTEM + or message.role == ModelMessageRoleType.HUMAN + ): history.append({"role": "user", "content": message.content}) # elif message.role == ModelMessageRoleType.HUMAN: # history.append({"role": "user", "content": message.content}) @@ -45,17 +48,24 @@ def tongyi_generate_stream( else: pass - # temp_his = history[::-1] - temp_his = history + temp_his = history[::-1] last_user_input = None for m in temp_his: if m["role"] == "user": last_user_input = m break - if last_user_input: + temp_his = history + prompt_input = None + for m in temp_his: + if m["role"] == "user": + prompt_input = m + break + + if last_user_input and prompt_input and last_user_input != prompt_input: history.remove(last_user_input) - history.append(last_user_input) + history.remove(prompt_input) + history.append(prompt_input) gen = Generation() res = gen.call( diff --git a/pilot/openapi/api_v1/api_v1.py b/pilot/openapi/api_v1/api_v1.py index 23cba7948..d33bd1df1 100644 --- a/pilot/openapi/api_v1/api_v1.py +++ b/pilot/openapi/api_v1/api_v1.py @@ -172,7 +172,7 @@ async def test_connect(db_config: DBConfig = Body()): CFG.LOCAL_DB_MANAGE.test_connect(db_config) return Result.succ(True) except Exception as e: - return Result.faild(code="E1001", msg=str(e)) + return Result.failed(code="E1001", msg=str(e)) @router.post("/v1/chat/db/summary", response_model=Result[bool]) @@ -305,7 +305,7 @@ async def params_load( return Result.succ(get_hist_messages(conv_uid)) except Exception as e: logger.error("excel load error!", e) - return Result.faild(code="E000X", msg=f"File Load Error {e}") + return Result.failed(code="E000X", msg=f"File Load Error {e}") @router.post("/v1/chat/dialogue/delete") @@ -352,7 +352,7 @@ async def get_chat_instance(dialogue: ConversationVo = Body()) -> BaseChat: if not ChatScene.is_valid_mode(dialogue.chat_mode): raise StopAsyncIteration( - Result.faild("Unsupported Chat Mode," + dialogue.chat_mode + "!") + Result.failed("Unsupported Chat Mode," + dialogue.chat_mode + "!") ) chat_param = { @@ -430,7 +430,7 @@ async def model_types(controller: BaseModelController = Depends(get_model_contro return Result.succ(list(types)) except Exception as e: - return Result.faild(code="E000X", msg=f"controller model types error {e}") + return Result.failed(code="E000X", msg=f"controller model types error {e}") @router.get("/v1/model/supports") @@ -440,7 +440,7 @@ async def model_supports(worker_manager: WorkerManager = Depends(get_worker_mana models = await worker_manager.supported_models() return Result.succ(FlatSupportedModel.from_supports(models)) except Exception as e: - return Result.faild(code="E000X", msg=f"Fetch supportd models error {e}") + return Result.failed(code="E000X", msg=f"Fetch supportd models error {e}") async def no_stream_generator(chat): diff --git a/pilot/openapi/api_v1/editor/api_editor_v1.py b/pilot/openapi/api_v1/editor/api_editor_v1.py index e41998942..86b98c9a0 100644 --- a/pilot/openapi/api_v1/editor/api_editor_v1.py +++ b/pilot/openapi/api_v1/editor/api_editor_v1.py @@ -107,7 +107,7 @@ async def get_editor_sql(con_uid: str, round: int): .replace("\n", " ") ) return Result.succ(json.loads(context)) - return Result.faild(msg="not have sql!") + return Result.failed(msg="not have sql!") @router.post("/v1/editor/sql/run", response_model=Result[SqlRunData]) @@ -116,7 +116,7 @@ async def editor_sql_run(run_param: dict = Body()): db_name = run_param["db_name"] sql = run_param["sql"] if not db_name and not sql: - return Result.faild("SQL run param error!") + return Result.failed("SQL run param error!") conn = CFG.LOCAL_DB_MANAGE.get_connect(db_name) try: @@ -169,7 +169,7 @@ async def sql_editor_submit(sql_edit_context: ChatSqlEditContext = Body()): ) history_mem.update(history_messages) return Result.succ(None) - return Result.faild(msg="Edit Faild!") + return Result.failed(msg="Edit Failed!") @router.get("/v1/editor/chart/list", response_model=Result[ChartList]) @@ -191,7 +191,7 @@ async def get_editor_chart_list(con_uid: str): charts=json.loads(element["data"]["content"]), ) return Result.succ(chart_list) - return Result.faild(msg="Not have charts!") + return Result.failed(msg="Not have charts!") @router.post("/v1/editor/chart/info", response_model=Result[ChartDetail]) @@ -210,7 +210,7 @@ async def get_editor_chart_info(param: dict = Body()): logger.error( "this dashboard dialogue version too old, can't support editor!" ) - return Result.faild( + return Result.failed( msg="this dashboard dialogue version too old, can't support editor!" ) for element in last_round["messages"]: @@ -234,7 +234,7 @@ async def get_editor_chart_info(param: dict = Body()): ) return Result.succ(detail) - return Result.faild(msg="Can't Find Chart Detail Info!") + return Result.failed(msg="Can't Find Chart Detail Info!") @router.post("/v1/editor/chart/run", response_model=Result[ChartRunData]) @@ -244,7 +244,7 @@ async def editor_chart_run(run_param: dict = Body()): sql = run_param["sql"] chart_type = run_param["chart_type"] if not db_name and not sql: - return Result.faild("SQL run param error!") + return Result.failed("SQL run param error!") try: dashboard_data_loader: DashboardDataLoader = DashboardDataLoader() db_conn = CFG.LOCAL_DB_MANAGE.get_connect(db_name) @@ -334,7 +334,7 @@ async def chart_editor_submit(chart_edit_context: ChatChartEditContext = Body()) ) except Exception as e: logger.error(f"edit chart exception!{str(e)}", e) - return Result.faild(msg=f"Edit chart exception!{str(e)}") + return Result.failed(msg=f"Edit chart exception!{str(e)}") history_mem.update(history_messages) return Result.succ(None) - return Result.faild(msg="Edit Faild!") + return Result.failed(msg="Edit Failed!") diff --git a/pilot/openapi/api_view_model.py b/pilot/openapi/api_view_model.py index 60065f2f2..af1aa4b9c 100644 --- a/pilot/openapi/api_view_model.py +++ b/pilot/openapi/api_view_model.py @@ -17,11 +17,11 @@ class Result(Generic[T], BaseModel): return Result(success=True, err_code=None, err_msg=None, data=data) @classmethod - def faild(cls, msg): + def failed(cls, msg): return Result(success=False, err_code="E000X", err_msg=msg, data=None) @classmethod - def faild(cls, code, msg): + def failed(cls, code, msg): return Result(success=False, err_code=code, err_msg=msg, data=None) diff --git a/pilot/openapi/base.py b/pilot/openapi/base.py index 506254ec7..d8c814787 100644 --- a/pilot/openapi/base.py +++ b/pilot/openapi/base.py @@ -7,4 +7,4 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE message = "" for error in exc.errors(): message += ".".join(error.get("loc")) + ":" + error.get("msg") + ";" - return Result.faild(code="E0001", msg=message) + return Result.failed(code="E0001", msg=message) diff --git a/pilot/scene/base_chat.py b/pilot/scene/base_chat.py index 34f294c31..24ec1c928 100644 --- a/pilot/scene/base_chat.py +++ b/pilot/scene/base_chat.py @@ -13,6 +13,7 @@ from pilot.scene.base_message import ModelMessage, ModelMessageRoleType from pilot.scene.message import OnceConversation from pilot.utils import get_or_create_event_loop from pilot.utils.executor_utils import ExecutorFactory, blocking_func_to_async +from pilot.utils.tracer import root_tracer, trace from pydantic import Extra from pilot.memory.chat_history.chat_hisotry_factory import ChatHistory @@ -38,6 +39,7 @@ class BaseChat(ABC): arbitrary_types_allowed = True + @trace("BaseChat.__init__") def __init__(self, chat_param: Dict): """Chat Module Initialization Args: @@ -143,7 +145,14 @@ class BaseChat(ABC): ) self.current_message.tokens = 0 if self.prompt_template.template: - current_prompt = self.prompt_template.format(**input_values) + metadata = { + "template_scene": self.prompt_template.template_scene, + "input_values": input_values, + } + with root_tracer.start_span( + "BaseChat.__call_base.prompt_template.format", metadata=metadata + ): + current_prompt = self.prompt_template.format(**input_values) self.current_message.add_system_message(current_prompt) llm_messages = self.generate_llm_messages() @@ -175,6 +184,14 @@ class BaseChat(ABC): except StopAsyncIteration: return True # 迭代器已经执行结束 + def _get_span_metadata(self, payload: Dict) -> Dict: + metadata = {k: v for k, v in payload.items()} + del metadata["prompt"] + metadata["messages"] = list( + map(lambda m: m if isinstance(m, dict) else m.dict(), metadata["messages"]) + ) + return metadata + async def stream_call(self): # TODO Retry when server connection error payload = await self.__call_base() @@ -182,6 +199,10 @@ class BaseChat(ABC): self.skip_echo_len = len(payload.get("prompt").replace("", " ")) + 11 logger.info(f"Requert: \n{payload}") ai_response_text = "" + span = root_tracer.start_span( + "BaseChat.stream_call", metadata=self._get_span_metadata(payload) + ) + payload["span_id"] = span.span_id try: from pilot.model.cluster import WorkerManagerFactory @@ -199,19 +220,25 @@ class BaseChat(ABC): self.current_message.add_ai_message(msg) view_msg = self.knowledge_reference_call(msg) self.current_message.add_view_message(view_msg) + span.end() except Exception as e: print(traceback.format_exc()) - logger.error("model response parase faild!" + str(e)) + logger.error("model response parase failed!" + str(e)) self.current_message.add_view_message( f"""ERROR!{str(e)}\n {ai_response_text} """ ) ### store current conversation + span.end(metadata={"error": str(e)}) self.memory.append(self.current_message) async def nostream_call(self): payload = await self.__call_base() logger.info(f"Request: \n{payload}") ai_response_text = "" + span = root_tracer.start_span( + "BaseChat.nostream_call", metadata=self._get_span_metadata(payload) + ) + payload["span_id"] = span.span_id try: from pilot.model.cluster import WorkerManagerFactory @@ -219,7 +246,8 @@ class BaseChat(ABC): ComponentType.WORKER_MANAGER_FACTORY, WorkerManagerFactory ).create() - model_output = await worker_manager.generate(payload) + with root_tracer.start_span("BaseChat.invoke_worker_manager.generate"): + model_output = await worker_manager.generate(payload) ### output parse ai_response_text = ( @@ -234,11 +262,18 @@ class BaseChat(ABC): ai_response_text ) ) - ### run - # result = self.do_action(prompt_define_response) - result = await blocking_func_to_async( - self._executor, self.do_action, prompt_define_response - ) + metadata = { + "model_output": model_output.to_dict(), + "ai_response_text": ai_response_text, + "prompt_define_response": self._parse_prompt_define_response( + prompt_define_response + ), + } + with root_tracer.start_span("BaseChat.do_action", metadata=metadata): + ### run + result = await blocking_func_to_async( + self._executor, self.do_action, prompt_define_response + ) ### llm speaker speak_to_user = self.get_llm_speak(prompt_define_response) @@ -255,12 +290,14 @@ class BaseChat(ABC): view_message = view_message.replace("\n", "\\n") self.current_message.add_view_message(view_message) + span.end() except Exception as e: print(traceback.format_exc()) logger.error("model response parase faild!" + str(e)) self.current_message.add_view_message( f"""ERROR!{str(e)}\n {ai_response_text} """ ) + span.end(metadata={"error": str(e)}) ### store dialogue self.memory.append(self.current_message) return self.current_ai_response() @@ -345,7 +382,7 @@ class BaseChat(ABC): text += self.__load_example_messages() ### Load History - text += self.__load_histroy_messages() + text += self.__load_history_messages() ### Load User Input text += self.__load_user_message() @@ -371,7 +408,7 @@ class BaseChat(ABC): messages += self.__load_example_messages(str_message=False) ### Load History - messages += self.__load_histroy_messages(str_message=False) + messages += self.__load_history_messages(str_message=False) ### Load User Input messages += self.__load_user_message(str_message=False) @@ -427,7 +464,7 @@ class BaseChat(ABC): ) return example_text if str_message else example_messages - def __load_histroy_messages(self, str_message: bool = True): + def __load_history_messages(self, str_message: bool = True): history_text = "" history_messages = [] if self.prompt_template.need_historical_messages: @@ -513,3 +550,21 @@ class BaseChat(ABC): """ pass + + def _parse_prompt_define_response(self, prompt_define_response: Any) -> Any: + if not prompt_define_response: + return "" + if isinstance(prompt_define_response, str) or isinstance( + prompt_define_response, dict + ): + return prompt_define_response + if isinstance(prompt_define_response, tuple): + if hasattr(prompt_define_response, "_asdict"): + # namedtuple + return prompt_define_response._asdict() + else: + return dict( + zip(range(len(prompt_define_response)), prompt_define_response) + ) + else: + return prompt_define_response diff --git a/pilot/scene/chat_agent/chat.py b/pilot/scene/chat_agent/chat.py index d9a8f60c1..81af1b3b1 100644 --- a/pilot/scene/chat_agent/chat.py +++ b/pilot/scene/chat_agent/chat.py @@ -11,6 +11,7 @@ from pilot.common.string_utils import extract_content from .prompt import prompt from pilot.component import ComponentType from pilot.base_modules.agent.controller import ModuleAgent +from pilot.utils.tracer import root_tracer, trace CFG = Config() @@ -51,6 +52,7 @@ class ChatAgent(BaseChat): self.api_call = ApiCall(plugin_generator=self.plugins_prompt_generator) + @trace() async def generate_input_values(self) -> Dict[str, str]: input_values = { "user_goal": self.current_user_input, @@ -63,7 +65,10 @@ class ChatAgent(BaseChat): def stream_plugin_call(self, text): text = text.replace("\n", " ") - return self.api_call.run(text) + with root_tracer.start_span( + "ChatAgent.stream_plugin_call.api_call", metadata={"text": text} + ): + return self.api_call.run(text) def __list_to_prompt_str(self, list: List) -> str: return "\n".join(f"{i + 1 + 1}. {item}" for i, item in enumerate(list)) diff --git a/pilot/scene/chat_dashboard/chat.py b/pilot/scene/chat_dashboard/chat.py index 211aa7c04..6771fb3fc 100644 --- a/pilot/scene/chat_dashboard/chat.py +++ b/pilot/scene/chat_dashboard/chat.py @@ -13,6 +13,7 @@ from pilot.scene.chat_dashboard.data_preparation.report_schma import ( from pilot.scene.chat_dashboard.prompt import prompt from pilot.scene.chat_dashboard.data_loader import DashboardDataLoader from pilot.utils.executor_utils import blocking_func_to_async +from pilot.utils.tracer import root_tracer, trace CFG = Config() @@ -53,6 +54,7 @@ class ChatDashboard(BaseChat): data = f.read() return json.loads(data) + @trace() async def generate_input_values(self) -> Dict: try: from pilot.summary.db_summary_client import DBSummaryClient diff --git a/pilot/scene/chat_dashboard/data_loader.py b/pilot/scene/chat_dashboard/data_loader.py index faabe542a..970fc92dd 100644 --- a/pilot/scene/chat_dashboard/data_loader.py +++ b/pilot/scene/chat_dashboard/data_loader.py @@ -52,8 +52,8 @@ class DashboardDataLoader: values.append(value_item) return field_names, values except Exception as e: - logger.debug("Prepare Chart Data Faild!" + str(e)) - raise ValueError("Prepare Chart Data Faild!") + logger.debug("Prepare Chart Data Failed!" + str(e)) + raise ValueError("Prepare Chart Data Failed!") def get_chart_values_by_db(self, db_name: str, chart_sql: str): logger.info(f"get_chart_values_by_db:{db_name},{chart_sql}") diff --git a/pilot/scene/chat_dashboard/prompt.py b/pilot/scene/chat_dashboard/prompt.py index 9fed97f8f..7f1dd4090 100644 --- a/pilot/scene/chat_dashboard/prompt.py +++ b/pilot/scene/chat_dashboard/prompt.py @@ -42,7 +42,7 @@ RESPONSE_FORMAT = [ PROMPT_SEP = SeparatorStyle.SINGLE.value -PROMPT_NEED_NEED_STREAM_OUT = False +PROMPT_NEED_STREAM_OUT = False prompt = PromptTemplate( template_scene=ChatScene.ChatDashboard.value(), @@ -50,9 +50,9 @@ prompt = PromptTemplate( response_format=json.dumps(RESPONSE_FORMAT, indent=4), template_define=PROMPT_SCENE_DEFINE, template=_DEFAULT_TEMPLATE, - stream_out=PROMPT_NEED_NEED_STREAM_OUT, + stream_out=PROMPT_NEED_STREAM_OUT, output_parser=ChatDashboardOutputParser( - sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_NEED_STREAM_OUT + sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_STREAM_OUT ), ) CFG.prompt_template_registry.register(prompt, is_default=True) diff --git a/pilot/scene/chat_data/chat_excel/excel_analyze/chat.py b/pilot/scene/chat_data/chat_excel/excel_analyze/chat.py index 064e7586c..fefc8142c 100644 --- a/pilot/scene/chat_data/chat_excel/excel_analyze/chat.py +++ b/pilot/scene/chat_data/chat_excel/excel_analyze/chat.py @@ -14,6 +14,7 @@ from pilot.scene.chat_data.chat_excel.excel_learning.chat import ExcelLearning from pilot.common.path_utils import has_path from pilot.configs.model_config import LLM_MODEL_CONFIG, KNOWLEDGE_UPLOAD_ROOT_PATH from pilot.base_modules.agent.common.schema import Status +from pilot.utils.tracer import root_tracer, trace CFG = Config() @@ -62,6 +63,7 @@ class ChatExcel(BaseChat): # ] return "\n".join(f"{i+1}. {item}" for i, item in enumerate(command_strings)) + @trace() async def generate_input_values(self) -> Dict: input_values = { "user_input": self.current_user_input, @@ -88,4 +90,9 @@ class ChatExcel(BaseChat): def stream_plugin_call(self, text): text = text.replace("\n", " ") - return self.api_call.run_display_sql(text, self.excel_reader.get_df_by_sql_ex) + with root_tracer.start_span( + "ChatExcel.stream_plugin_call.run_display_sql", metadata={"text": text} + ): + return self.api_call.run_display_sql( + text, self.excel_reader.get_df_by_sql_ex + ) diff --git a/pilot/scene/chat_data/chat_excel/excel_analyze/prompt.py b/pilot/scene/chat_data/chat_excel/excel_analyze/prompt.py index 23c86bd4d..c1dfdfee3 100644 --- a/pilot/scene/chat_data/chat_excel/excel_analyze/prompt.py +++ b/pilot/scene/chat_data/chat_excel/excel_analyze/prompt.py @@ -51,7 +51,7 @@ _PROMPT_SCENE_DEFINE = ( PROMPT_SEP = SeparatorStyle.SINGLE.value -PROMPT_NEED_NEED_STREAM_OUT = True +PROMPT_NEED_STREAM_OUT = True # Temperature is a configuration hyperparameter that controls the randomness of language model output. # A high temperature produces more unpredictable and creative results, while a low temperature produces more common and conservative output. @@ -63,9 +63,9 @@ prompt = PromptTemplate( input_variables=["user_input", "table_name", "disply_type"], template_define=_PROMPT_SCENE_DEFINE, template=_DEFAULT_TEMPLATE, - stream_out=PROMPT_NEED_NEED_STREAM_OUT, + stream_out=PROMPT_NEED_STREAM_OUT, output_parser=ChatExcelOutputParser( - sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_NEED_STREAM_OUT + sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_STREAM_OUT ), need_historical_messages=True, # example_selector=sql_data_example, diff --git a/pilot/scene/chat_data/chat_excel/excel_learning/chat.py b/pilot/scene/chat_data/chat_excel/excel_learning/chat.py index f05221eba..7d1730ad0 100644 --- a/pilot/scene/chat_data/chat_excel/excel_learning/chat.py +++ b/pilot/scene/chat_data/chat_excel/excel_learning/chat.py @@ -13,6 +13,7 @@ from pilot.scene.chat_data.chat_excel.excel_learning.prompt import prompt from pilot.scene.chat_data.chat_excel.excel_reader import ExcelReader from pilot.json_utils.utilities import DateTimeEncoder from pilot.utils.executor_utils import blocking_func_to_async +from pilot.utils.tracer import root_tracer, trace CFG = Config() @@ -44,6 +45,7 @@ class ExcelLearning(BaseChat): if parent_mode: self.current_message.chat_mode = parent_mode.value() + @trace() async def generate_input_values(self) -> Dict: # colunms, datas = self.excel_reader.get_sample_data() colunms, datas = await blocking_func_to_async( diff --git a/pilot/scene/chat_data/chat_excel/excel_learning/prompt.py b/pilot/scene/chat_data/chat_excel/excel_learning/prompt.py index aefd96a71..df17aec6b 100644 --- a/pilot/scene/chat_data/chat_excel/excel_learning/prompt.py +++ b/pilot/scene/chat_data/chat_excel/excel_learning/prompt.py @@ -67,7 +67,7 @@ PROMPT_SCENE_DEFINE = ( PROMPT_SEP = SeparatorStyle.SINGLE.value -PROMPT_NEED_NEED_STREAM_OUT = False +PROMPT_NEED_STREAM_OUT = False # Temperature is a configuration hyperparameter that controls the randomness of language model output. # A high temperature produces more unpredictable and creative results, while a low temperature produces more common and conservative output. @@ -80,9 +80,9 @@ prompt = PromptTemplate( response_format=json.dumps(RESPONSE_FORMAT_SIMPLE, ensure_ascii=False, indent=4), template_define=PROMPT_SCENE_DEFINE, template=_DEFAULT_TEMPLATE, - stream_out=PROMPT_NEED_NEED_STREAM_OUT, + stream_out=PROMPT_NEED_STREAM_OUT, output_parser=LearningExcelOutputParser( - sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_NEED_STREAM_OUT + sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_STREAM_OUT ), # example_selector=sql_data_example, temperature=PROMPT_TEMPERATURE, diff --git a/pilot/scene/chat_db/auto_execute/chat.py b/pilot/scene/chat_db/auto_execute/chat.py index d9b901772..4d4bf3c0c 100644 --- a/pilot/scene/chat_db/auto_execute/chat.py +++ b/pilot/scene/chat_db/auto_execute/chat.py @@ -6,6 +6,7 @@ from pilot.common.sql_database import Database from pilot.configs.config import Config from pilot.scene.chat_db.auto_execute.prompt import prompt from pilot.utils.executor_utils import blocking_func_to_async +from pilot.utils.tracer import root_tracer, trace CFG = Config() @@ -35,10 +36,13 @@ class ChatWithDbAutoExecute(BaseChat): raise ValueError( f"{ChatScene.ChatWithDbExecute.value} mode should chose db!" ) - - self.database = CFG.LOCAL_DB_MANAGE.get_connect(self.db_name) + with root_tracer.start_span( + "ChatWithDbAutoExecute.get_connect", metadata={"db_name": self.db_name} + ): + self.database = CFG.LOCAL_DB_MANAGE.get_connect(self.db_name) self.top_k: int = 200 + @trace() async def generate_input_values(self) -> Dict: """ generate input values @@ -55,13 +59,14 @@ class ChatWithDbAutoExecute(BaseChat): # query=self.current_user_input, # topk=CFG.KNOWLEDGE_SEARCH_TOP_SIZE, # ) - table_infos = await blocking_func_to_async( - self._executor, - client.get_db_summary, - self.db_name, - self.current_user_input, - CFG.KNOWLEDGE_SEARCH_TOP_SIZE, - ) + with root_tracer.start_span("ChatWithDbAutoExecute.get_db_summary"): + table_infos = await blocking_func_to_async( + self._executor, + client.get_db_summary, + self.db_name, + self.current_user_input, + CFG.KNOWLEDGE_SEARCH_TOP_SIZE, + ) except Exception as e: print("db summary find error!" + str(e)) if not table_infos: @@ -80,4 +85,8 @@ class ChatWithDbAutoExecute(BaseChat): def do_action(self, prompt_response): print(f"do_action:{prompt_response}") - return self.database.run(prompt_response.sql) + with root_tracer.start_span( + "ChatWithDbAutoExecute.do_action.run_sql", + metadata=prompt_response.to_dict(), + ): + return self.database.run(prompt_response.sql) diff --git a/pilot/scene/chat_db/auto_execute/out_parser.py b/pilot/scene/chat_db/auto_execute/out_parser.py index 577cac1ef..e583d945a 100644 --- a/pilot/scene/chat_db/auto_execute/out_parser.py +++ b/pilot/scene/chat_db/auto_execute/out_parser.py @@ -12,6 +12,9 @@ class SqlAction(NamedTuple): sql: str thoughts: Dict + def to_dict(self) -> Dict[str, Dict]: + return {"sql": self.sql, "thoughts": self.thoughts} + logger = logging.getLogger(__name__) diff --git a/pilot/scene/chat_db/auto_execute/prompt.py b/pilot/scene/chat_db/auto_execute/prompt.py index abc889cec..9b4bcb6a5 100644 --- a/pilot/scene/chat_db/auto_execute/prompt.py +++ b/pilot/scene/chat_db/auto_execute/prompt.py @@ -33,7 +33,7 @@ RESPONSE_FORMAT_SIMPLE = { PROMPT_SEP = SeparatorStyle.SINGLE.value -PROMPT_NEED_NEED_STREAM_OUT = False +PROMPT_NEED_STREAM_OUT = False # Temperature is a configuration hyperparameter that controls the randomness of language model output. # A high temperature produces more unpredictable and creative results, while a low temperature produces more common and conservative output. @@ -46,9 +46,9 @@ prompt = PromptTemplate( response_format=json.dumps(RESPONSE_FORMAT_SIMPLE, ensure_ascii=False, indent=4), template_define=PROMPT_SCENE_DEFINE, template=_DEFAULT_TEMPLATE, - stream_out=PROMPT_NEED_NEED_STREAM_OUT, + stream_out=PROMPT_NEED_STREAM_OUT, output_parser=DbChatOutputParser( - sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_NEED_STREAM_OUT + sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_STREAM_OUT ), # example_selector=sql_data_example, temperature=PROMPT_TEMPERATURE, diff --git a/pilot/scene/chat_db/auto_execute/prompt_baichuan.py b/pilot/scene/chat_db/auto_execute/prompt_baichuan.py index 95aa962fa..4888cbd7f 100644 --- a/pilot/scene/chat_db/auto_execute/prompt_baichuan.py +++ b/pilot/scene/chat_db/auto_execute/prompt_baichuan.py @@ -36,7 +36,7 @@ RESPONSE_FORMAT_SIMPLE = { PROMPT_SEP = SeparatorStyle.SINGLE.value -PROMPT_NEED_NEED_STREAM_OUT = False +PROMPT_NEED_STREAM_OUT = False # Temperature is a configuration hyperparameter that controls the randomness of language model output. # A high temperature produces more unpredictable and creative results, while a low temperature produces more common and conservative output. @@ -50,9 +50,9 @@ prompt = PromptTemplate( template_is_strict=False, template_define=PROMPT_SCENE_DEFINE, template=_DEFAULT_TEMPLATE, - stream_out=PROMPT_NEED_NEED_STREAM_OUT, + stream_out=PROMPT_NEED_STREAM_OUT, output_parser=DbChatOutputParser( - sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_NEED_STREAM_OUT + sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_STREAM_OUT ), # example_selector=sql_data_example, temperature=PROMPT_TEMPERATURE, diff --git a/pilot/scene/chat_db/professional_qa/chat.py b/pilot/scene/chat_db/professional_qa/chat.py index 5ae76d37d..fde28d91b 100644 --- a/pilot/scene/chat_db/professional_qa/chat.py +++ b/pilot/scene/chat_db/professional_qa/chat.py @@ -6,6 +6,7 @@ from pilot.common.sql_database import Database from pilot.configs.config import Config from pilot.scene.chat_db.professional_qa.prompt import prompt from pilot.utils.executor_utils import blocking_func_to_async +from pilot.utils.tracer import root_tracer, trace CFG = Config() @@ -39,6 +40,7 @@ class ChatWithDbQA(BaseChat): else len(self.tables) ) + @trace() async def generate_input_values(self) -> Dict: table_info = "" dialect = "mysql" diff --git a/pilot/scene/chat_db/professional_qa/prompt.py b/pilot/scene/chat_db/professional_qa/prompt.py index ca4110398..c84f2eb7a 100644 --- a/pilot/scene/chat_db/professional_qa/prompt.py +++ b/pilot/scene/chat_db/professional_qa/prompt.py @@ -54,7 +54,7 @@ _DEFAULT_TEMPLATE = ( PROMPT_SEP = SeparatorStyle.SINGLE.value -PROMPT_NEED_NEED_STREAM_OUT = True +PROMPT_NEED_STREAM_OUT = True prompt = PromptTemplate( template_scene=ChatScene.ChatWithDbQA.value(), @@ -62,9 +62,9 @@ prompt = PromptTemplate( response_format=None, template_define=PROMPT_SCENE_DEFINE, template=_DEFAULT_TEMPLATE, - stream_out=PROMPT_NEED_NEED_STREAM_OUT, + stream_out=PROMPT_NEED_STREAM_OUT, output_parser=NormalChatOutputParser( - sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_NEED_STREAM_OUT + sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_STREAM_OUT ), ) diff --git a/pilot/scene/chat_execution/chat.py b/pilot/scene/chat_execution/chat.py index bdd78d7b7..2615918ff 100644 --- a/pilot/scene/chat_execution/chat.py +++ b/pilot/scene/chat_execution/chat.py @@ -6,6 +6,7 @@ from pilot.configs.config import Config from pilot.base_modules.agent.commands.command import execute_command from pilot.base_modules.agent import PluginPromptGenerator from .prompt import prompt +from pilot.utils.tracer import root_tracer, trace CFG = Config() @@ -50,6 +51,7 @@ class ChatWithPlugin(BaseChat): self.plugins_prompt_generator ) + @trace() async def generate_input_values(self) -> Dict: input_values = { "input": self.current_user_input, diff --git a/pilot/scene/chat_knowledge/inner_db_summary/chat.py b/pilot/scene/chat_knowledge/inner_db_summary/chat.py index 07a64aea9..f7c81bd77 100644 --- a/pilot/scene/chat_knowledge/inner_db_summary/chat.py +++ b/pilot/scene/chat_knowledge/inner_db_summary/chat.py @@ -4,6 +4,7 @@ from pilot.scene.base import ChatScene from pilot.configs.config import Config from pilot.scene.chat_knowledge.inner_db_summary.prompt import prompt +from pilot.utils.tracer import root_tracer, trace CFG = Config() @@ -31,6 +32,7 @@ class InnerChatDBSummary(BaseChat): self.db_input = db_select self.db_summary = db_summary + @trace() async def generate_input_values(self) -> Dict: input_values = { "db_input": self.db_input, diff --git a/pilot/scene/chat_knowledge/inner_db_summary/prompt.py b/pilot/scene/chat_knowledge/inner_db_summary/prompt.py index 924fab2c6..3f81906c0 100644 --- a/pilot/scene/chat_knowledge/inner_db_summary/prompt.py +++ b/pilot/scene/chat_knowledge/inner_db_summary/prompt.py @@ -33,7 +33,7 @@ RESPONSE_FORMAT = {"table": ["orders", "products"]} PROMPT_SEP = SeparatorStyle.SINGLE.value -PROMPT_NEED_NEED_STREAM_OUT = False +PROMPT_NEED_STREAM_OUT = False prompt = PromptTemplate( template_scene=ChatScene.InnerChatDBSummary.value(), @@ -41,9 +41,9 @@ prompt = PromptTemplate( response_format=json.dumps(RESPONSE_FORMAT, indent=4), template_define=PROMPT_SCENE_DEFINE, template=_DEFAULT_TEMPLATE + PROMPT_RESPONSE, - stream_out=PROMPT_NEED_NEED_STREAM_OUT, + stream_out=PROMPT_NEED_STREAM_OUT, output_parser=NormalChatOutputParser( - sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_NEED_STREAM_OUT + sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_STREAM_OUT ), ) diff --git a/pilot/scene/chat_knowledge/v1/chat.py b/pilot/scene/chat_knowledge/v1/chat.py index d57b32b25..a9c63b268 100644 --- a/pilot/scene/chat_knowledge/v1/chat.py +++ b/pilot/scene/chat_knowledge/v1/chat.py @@ -15,6 +15,7 @@ from pilot.configs.model_config import ( from pilot.scene.chat_knowledge.v1.prompt import prompt from pilot.server.knowledge.service import KnowledgeService from pilot.utils.executor_utils import blocking_func_to_async +from pilot.utils.tracer import root_tracer, trace CFG = Config() @@ -92,6 +93,7 @@ class ChatKnowledge(BaseChat): """return reference""" return text + f"\n\n{self.parse_source_view(self.sources)}" + @trace() async def generate_input_values(self) -> Dict: if self.space_context: self.prompt_template.template_define = self.space_context["prompt"]["scene"] diff --git a/pilot/scene/chat_knowledge/v1/prompt.py b/pilot/scene/chat_knowledge/v1/prompt.py index 394906562..ea55fca5a 100644 --- a/pilot/scene/chat_knowledge/v1/prompt.py +++ b/pilot/scene/chat_knowledge/v1/prompt.py @@ -33,7 +33,7 @@ _DEFAULT_TEMPLATE = ( PROMPT_SEP = SeparatorStyle.SINGLE.value -PROMPT_NEED_NEED_STREAM_OUT = True +PROMPT_NEED_STREAM_OUT = True prompt = PromptTemplate( template_scene=ChatScene.ChatKnowledge.value(), @@ -41,9 +41,9 @@ prompt = PromptTemplate( response_format=None, template_define=PROMPT_SCENE_DEFINE, template=_DEFAULT_TEMPLATE, - stream_out=PROMPT_NEED_NEED_STREAM_OUT, + stream_out=PROMPT_NEED_STREAM_OUT, output_parser=NormalChatOutputParser( - sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_NEED_STREAM_OUT + sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_STREAM_OUT ), ) diff --git a/pilot/scene/chat_knowledge/v1/prompt_chatglm.py b/pilot/scene/chat_knowledge/v1/prompt_chatglm.py index 7f66c1e6f..898699e89 100644 --- a/pilot/scene/chat_knowledge/v1/prompt_chatglm.py +++ b/pilot/scene/chat_knowledge/v1/prompt_chatglm.py @@ -33,7 +33,7 @@ _DEFAULT_TEMPLATE = ( PROMPT_SEP = SeparatorStyle.SINGLE.value -PROMPT_NEED_NEED_STREAM_OUT = True +PROMPT_NEED_STREAM_OUT = True prompt = PromptTemplate( template_scene=ChatScene.ChatKnowledge.value(), @@ -41,9 +41,9 @@ prompt = PromptTemplate( response_format=None, template_define=None, template=_DEFAULT_TEMPLATE, - stream_out=PROMPT_NEED_NEED_STREAM_OUT, + stream_out=PROMPT_NEED_STREAM_OUT, output_parser=NormalChatOutputParser( - sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_NEED_STREAM_OUT + sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_STREAM_OUT ), ) diff --git a/pilot/scene/chat_normal/chat.py b/pilot/scene/chat_normal/chat.py index 5999d5c3c..0191ef943 100644 --- a/pilot/scene/chat_normal/chat.py +++ b/pilot/scene/chat_normal/chat.py @@ -5,6 +5,7 @@ from pilot.scene.base import ChatScene from pilot.configs.config import Config from pilot.scene.chat_normal.prompt import prompt +from pilot.utils.tracer import root_tracer, trace CFG = Config() @@ -21,6 +22,7 @@ class ChatNormal(BaseChat): chat_param=chat_param, ) + @trace() async def generate_input_values(self) -> Dict: input_values = {"input": self.current_user_input} return input_values diff --git a/pilot/scene/chat_normal/prompt.py b/pilot/scene/chat_normal/prompt.py index ad0724874..dae412987 100644 --- a/pilot/scene/chat_normal/prompt.py +++ b/pilot/scene/chat_normal/prompt.py @@ -11,7 +11,7 @@ CFG = Config() PROMPT_SEP = SeparatorStyle.SINGLE.value -PROMPT_NEED_NEED_STREAM_OUT = True +PROMPT_NEED_STREAM_OUT = True prompt = PromptTemplate( template_scene=ChatScene.ChatNormal.value(), @@ -19,9 +19,9 @@ prompt = PromptTemplate( response_format=None, template_define=PROMPT_SCENE_DEFINE, template=None, - stream_out=PROMPT_NEED_NEED_STREAM_OUT, + stream_out=PROMPT_NEED_STREAM_OUT, output_parser=NormalChatOutputParser( - sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_NEED_STREAM_OUT + sep=PROMPT_SEP, is_stream_out=PROMPT_NEED_STREAM_OUT ), ) diff --git a/pilot/server/base.py b/pilot/server/base.py index 71faeb821..488c919c3 100644 --- a/pilot/server/base.py +++ b/pilot/server/base.py @@ -147,6 +147,12 @@ class WebWerverParameters(BaseParameters): "help": "The filename to store tracer span records", }, ) + tracer_storage_cls: Optional[str] = field( + default=None, + metadata={ + "help": "The storage class to storage tracer span records", + }, + ) disable_alembic_upgrade: Optional[bool] = field( default=False, metadata={ diff --git a/pilot/server/dbgpt_server.py b/pilot/server/dbgpt_server.py index e94526b9a..c26803cfd 100644 --- a/pilot/server/dbgpt_server.py +++ b/pilot/server/dbgpt_server.py @@ -195,7 +195,11 @@ def run_uvicorn(param: WebWerverParameters): def run_webserver(param: WebWerverParameters = None): if not param: param = _get_webserver_params() - initialize_tracer(system_app, os.path.join(LOGDIR, param.tracer_file)) + initialize_tracer( + system_app, + os.path.join(LOGDIR, param.tracer_file), + tracer_storage_cls=param.tracer_storage_cls, + ) with root_tracer.start_span( "run_webserver", diff --git a/pilot/server/knowledge/api.py b/pilot/server/knowledge/api.py index 164158694..16ee8926c 100644 --- a/pilot/server/knowledge/api.py +++ b/pilot/server/knowledge/api.py @@ -48,7 +48,7 @@ def space_add(request: KnowledgeSpaceRequest): knowledge_space_service.create_knowledge_space(request) return Result.succ([]) except Exception as e: - return Result.faild(code="E000X", msg=f"space add error {e}") + return Result.failed(code="E000X", msg=f"space add error {e}") @router.post("/knowledge/space/list") @@ -57,7 +57,7 @@ def space_list(request: KnowledgeSpaceRequest): try: return Result.succ(knowledge_space_service.get_knowledge_space(request)) except Exception as e: - return Result.faild(code="E000X", msg=f"space list error {e}") + return Result.failed(code="E000X", msg=f"space list error {e}") @router.post("/knowledge/space/delete") @@ -66,7 +66,7 @@ def space_delete(request: KnowledgeSpaceRequest): try: return Result.succ(knowledge_space_service.delete_space(request.name)) except Exception as e: - return Result.faild(code="E000X", msg=f"space list error {e}") + return Result.failed(code="E000X", msg=f"space list error {e}") @router.post("/knowledge/{space_name}/arguments") @@ -75,7 +75,7 @@ def arguments(space_name: str): try: return Result.succ(knowledge_space_service.arguments(space_name)) except Exception as e: - return Result.faild(code="E000X", msg=f"space list error {e}") + return Result.failed(code="E000X", msg=f"space list error {e}") @router.post("/knowledge/{space_name}/argument/save") @@ -86,7 +86,7 @@ def arguments_save(space_name: str, argument_request: SpaceArgumentRequest): knowledge_space_service.argument_save(space_name, argument_request) ) except Exception as e: - return Result.faild(code="E000X", msg=f"space list error {e}") + return Result.failed(code="E000X", msg=f"space list error {e}") @router.post("/knowledge/{space_name}/document/add") @@ -100,7 +100,7 @@ def document_add(space_name: str, request: KnowledgeDocumentRequest): ) # return Result.succ([]) except Exception as e: - return Result.faild(code="E000X", msg=f"document add error {e}") + return Result.failed(code="E000X", msg=f"document add error {e}") @router.post("/knowledge/{space_name}/document/list") @@ -111,7 +111,7 @@ def document_list(space_name: str, query_request: DocumentQueryRequest): knowledge_space_service.get_knowledge_documents(space_name, query_request) ) except Exception as e: - return Result.faild(code="E000X", msg=f"document list error {e}") + return Result.failed(code="E000X", msg=f"document list error {e}") @router.post("/knowledge/{space_name}/document/delete") @@ -122,7 +122,7 @@ def document_delete(space_name: str, query_request: DocumentQueryRequest): knowledge_space_service.delete_document(space_name, query_request.doc_name) ) except Exception as e: - return Result.faild(code="E000X", msg=f"document list error {e}") + return Result.failed(code="E000X", msg=f"document list error {e}") @router.post("/knowledge/{space_name}/document/upload") @@ -159,9 +159,9 @@ async def document_upload( ) ) # return Result.succ([]) - return Result.faild(code="E000X", msg=f"doc_file is None") + return Result.failed(code="E000X", msg=f"doc_file is None") except Exception as e: - return Result.faild(code="E000X", msg=f"document add error {e}") + return Result.failed(code="E000X", msg=f"document add error {e}") @router.post("/knowledge/{space_name}/document/sync") @@ -173,7 +173,7 @@ def document_sync(space_name: str, request: DocumentSyncRequest): ) return Result.succ([]) except Exception as e: - return Result.faild(code="E000X", msg=f"document sync error {e}") + return Result.failed(code="E000X", msg=f"document sync error {e}") @router.post("/knowledge/{space_name}/chunk/list") @@ -182,7 +182,7 @@ def document_list(space_name: str, query_request: ChunkQueryRequest): try: return Result.succ(knowledge_space_service.get_document_chunks(query_request)) except Exception as e: - return Result.faild(code="E000X", msg=f"document chunk list error {e}") + return Result.failed(code="E000X", msg=f"document chunk list error {e}") @router.post("/knowledge/{vector_name}/query") diff --git a/pilot/server/llm_manage/api.py b/pilot/server/llm_manage/api.py index f5602d3c4..617018642 100644 --- a/pilot/server/llm_manage/api.py +++ b/pilot/server/llm_manage/api.py @@ -33,9 +33,9 @@ async def model_params(): params.append(model_dict) return Result.succ(params) if not worker_instance: - return Result.faild(code="E000X", msg=f"can not find worker manager") + return Result.failed(code="E000X", msg=f"can not find worker manager") except Exception as e: - return Result.faild(code="E000X", msg=f"model stop failed {e}") + return Result.failed(code="E000X", msg=f"model stop failed {e}") @router.get("/v1/worker/model/list") @@ -78,7 +78,7 @@ async def model_list(): return Result.succ(responses) except Exception as e: - return Result.faild(code="E000X", msg=f"model list error {e}") + return Result.failed(code="E000X", msg=f"model list error {e}") @router.post("/v1/worker/model/stop") @@ -91,11 +91,11 @@ async def model_stop(request: WorkerStartupRequest): ComponentType.WORKER_MANAGER_FACTORY, WorkerManagerFactory ).create() if not worker_manager: - return Result.faild(code="E000X", msg=f"can not find worker manager") + return Result.failed(code="E000X", msg=f"can not find worker manager") request.params = {} return Result.succ(await worker_manager.model_shutdown(request)) except Exception as e: - return Result.faild(code="E000X", msg=f"model stop failed {e}") + return Result.failed(code="E000X", msg=f"model stop failed {e}") @router.post("/v1/worker/model/start") @@ -106,7 +106,7 @@ async def model_start(request: WorkerStartupRequest): ComponentType.WORKER_MANAGER_FACTORY, WorkerManagerFactory ).create() if not worker_manager: - return Result.faild(code="E000X", msg=f"can not find worker manager") + return Result.failed(code="E000X", msg=f"can not find worker manager") return Result.succ(await worker_manager.model_startup(request)) except Exception as e: - return Result.faild(code="E000X", msg=f"model start failed {e}") + return Result.failed(code="E000X", msg=f"model start failed {e}") diff --git a/pilot/server/prompt/api.py b/pilot/server/prompt/api.py index b94546891..0be4140a7 100644 --- a/pilot/server/prompt/api.py +++ b/pilot/server/prompt/api.py @@ -11,12 +11,12 @@ prompt_manage_service = PromptManageService() @router.post("/prompt/add") def prompt_add(request: PromptManageRequest): - print(f"/space/add params: {request}") + print(f"/prompt/add params: {request}") try: prompt_manage_service.create_prompt(request) return Result.succ([]) except Exception as e: - return Result.faild(code="E010X", msg=f"prompt add error {e}") + return Result.failed(code="E010X", msg=f"prompt add error {e}") @router.post("/prompt/list") @@ -25,7 +25,7 @@ def prompt_list(request: PromptManageRequest): try: return Result.succ(prompt_manage_service.get_prompts(request)) except Exception as e: - return Result.faild(code="E010X", msg=f"prompt list error {e}") + return Result.failed(code="E010X", msg=f"prompt list error {e}") @router.post("/prompt/update") @@ -34,7 +34,7 @@ def prompt_update(request: PromptManageRequest): try: return Result.succ(prompt_manage_service.update_prompt(request)) except Exception as e: - return Result.faild(code="E010X", msg=f"prompt update error {e}") + return Result.failed(code="E010X", msg=f"prompt update error {e}") @router.post("/prompt/delete") @@ -43,4 +43,4 @@ def prompt_delete(request: PromptManageRequest): try: return Result.succ(prompt_manage_service.delete_prompt(request.prompt_name)) except Exception as e: - return Result.faild(code="E010X", msg=f"prompt delete error {e}") + return Result.failed(code="E010X", msg=f"prompt delete error {e}") diff --git a/pilot/utils/executor_utils.py b/pilot/utils/executor_utils.py index 2aac0d04d..26ee3c66e 100644 --- a/pilot/utils/executor_utils.py +++ b/pilot/utils/executor_utils.py @@ -1,5 +1,6 @@ from typing import Callable, Awaitable, Any import asyncio +import contextvars from abc import ABC, abstractmethod from concurrent.futures import Executor, ThreadPoolExecutor from functools import partial @@ -55,6 +56,12 @@ async def blocking_func_to_async( """ if asyncio.iscoroutinefunction(func): raise ValueError(f"The function {func} is not blocking function") + + # This function will be called within the new thread, capturing the current context + ctx = contextvars.copy_context() + + def run_with_context(): + return ctx.run(partial(func, *args, **kwargs)) + loop = asyncio.get_event_loop() - sync_function_noargs = partial(func, *args, **kwargs) - return await loop.run_in_executor(executor, sync_function_noargs) + return await loop.run_in_executor(executor, run_with_context) diff --git a/pilot/utils/tracer/__init__.py b/pilot/utils/tracer/__init__.py index 16509ff43..6f77cfd6c 100644 --- a/pilot/utils/tracer/__init__.py +++ b/pilot/utils/tracer/__init__.py @@ -7,9 +7,14 @@ from pilot.utils.tracer.base import ( SpanStorageType, TracerContext, ) -from pilot.utils.tracer.span_storage import MemorySpanStorage, FileSpanStorage +from pilot.utils.tracer.span_storage import ( + MemorySpanStorage, + FileSpanStorage, + SpanStorageContainer, +) from pilot.utils.tracer.tracer_impl import ( root_tracer, + trace, initialize_tracer, DefaultTracer, TracerManager, @@ -25,7 +30,9 @@ __all__ = [ "TracerContext", "MemorySpanStorage", "FileSpanStorage", + "SpanStorageContainer", "root_tracer", + "trace", "initialize_tracer", "DefaultTracer", "TracerManager", diff --git a/pilot/utils/tracer/base.py b/pilot/utils/tracer/base.py index e227d6314..625e9aabd 100644 --- a/pilot/utils/tracer/base.py +++ b/pilot/utils/tracer/base.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Dict, Callable, Optional +from typing import Dict, Callable, Optional, List from dataclasses import dataclass from abc import ABC, abstractmethod from enum import Enum @@ -121,6 +121,11 @@ class SpanStorage(BaseComponent, ABC): def append_span(self, span: Span): """Store the given span. This needs to be implemented by subclasses.""" + def append_span_batch(self, spans: List[Span]): + """Store the span batch""" + for span in spans: + self.append_span(span) + class Tracer(BaseComponent, ABC): """Abstract base class for tracing operations. diff --git a/pilot/utils/tracer/span_storage.py b/pilot/utils/tracer/span_storage.py index 3070fb834..57321a316 100644 --- a/pilot/utils/tracer/span_storage.py +++ b/pilot/utils/tracer/span_storage.py @@ -5,11 +5,12 @@ import datetime import threading import queue import logging +from typing import Optional, List +from concurrent.futures import Executor, ThreadPoolExecutor from pilot.component import SystemApp from pilot.utils.tracer.base import Span, SpanStorage - logger = logging.getLogger(__name__) @@ -24,8 +25,81 @@ class MemorySpanStorage(SpanStorage): self.spans.append(span) +class SpanStorageContainer(SpanStorage): + def __init__( + self, + system_app: SystemApp | None = None, + batch_size=10, + flush_interval=10, + executor: Executor = None, + ): + super().__init__(system_app) + if not executor: + executor = ThreadPoolExecutor(thread_name_prefix="trace_storage_sync_") + self.executor = executor + self.storages: List[SpanStorage] = [] + self.last_date = ( + datetime.datetime.now().date() + ) # Store the current date for checking date changes + self.queue = queue.Queue() + self.batch_size = batch_size + self.flush_interval = flush_interval + self.last_flush_time = time.time() + self.flush_signal_queue = queue.Queue() + self.flush_thread = threading.Thread( + target=self._flush_to_storages, daemon=True + ) + self.flush_thread.start() + + def append_storage(self, storage: SpanStorage): + """Append sotrage to container + + Args: + storage ([`SpanStorage`]): The storage to be append to current container + """ + self.storages.append(storage) + + def append_span(self, span: Span): + self.queue.put(span) + if self.queue.qsize() >= self.batch_size: + try: + self.flush_signal_queue.put_nowait(True) + except queue.Full: + pass # If the signal queue is full, it's okay. The flush thread will handle it. + + def _flush_to_storages(self): + while True: + interval = time.time() - self.last_flush_time + if interval < self.flush_interval: + try: + self.flush_signal_queue.get( + block=True, timeout=self.flush_interval - interval + ) + except Exception: + # Timeout + pass + + spans_to_write = [] + while not self.queue.empty(): + spans_to_write.append(self.queue.get()) + for s in self.storages: + + def append_and_ignore_error( + storage: SpanStorage, spans_to_write: List[SpanStorage] + ): + try: + storage.append_span_batch(spans_to_write) + except Exception as e: + logger.warn( + f"Append spans to storage {str(storage)} failed: {str(e)}, span_data: {spans_to_write}" + ) + + self.executor.submit(append_and_ignore_error, s, spans_to_write) + self.last_flush_time = time.time() + + class FileSpanStorage(SpanStorage): - def __init__(self, filename: str, batch_size=10, flush_interval=10): + def __init__(self, filename: str): super().__init__() self.filename = filename # Split filename into prefix and suffix @@ -36,29 +110,18 @@ class FileSpanStorage(SpanStorage): datetime.datetime.now().date() ) # Store the current date for checking date changes self.queue = queue.Queue() - self.batch_size = batch_size - self.flush_interval = flush_interval - self.last_flush_time = time.time() - self.flush_signal_queue = queue.Queue() if not os.path.exists(filename): # New file if not exist os.makedirs(os.path.dirname(filename), exist_ok=True) with open(filename, "a"): pass - self.flush_thread = threading.Thread(target=self._flush_to_file, daemon=True) - self.flush_thread.start() def append_span(self, span: Span): - span_data = span.to_dict() - logger.debug(f"append span: {span_data}") - self.queue.put(span_data) + self._write_to_file([span]) - if self.queue.qsize() >= self.batch_size: - try: - self.flush_signal_queue.put_nowait(True) - except queue.Full: - pass # If the signal queue is full, it's okay. The flush thread will handle it. + def append_span_batch(self, spans: List[Span]): + self._write_to_file(spans) def _get_dated_filename(self, date: datetime.date) -> str: """Return the filename based on a specific date.""" @@ -73,31 +136,15 @@ class FileSpanStorage(SpanStorage): os.rename(self.filename, self._get_dated_filename(self.last_date)) self.last_date = current_date - def _write_to_file(self): + def _write_to_file(self, spans: List[Span]): self._roll_over_if_needed() - spans_to_write = [] - while not self.queue.empty(): - spans_to_write.append(self.queue.get()) with open(self.filename, "a") as file: - for span_data in spans_to_write: + for span in spans: + span_data = span.to_dict() try: file.write(json.dumps(span_data, ensure_ascii=False) + "\n") except Exception as e: logger.warning( f"Write span to file failed: {str(e)}, span_data: {span_data}" ) - - def _flush_to_file(self): - while True: - interval = time.time() - self.last_flush_time - if interval < self.flush_interval: - try: - self.flush_signal_queue.get( - block=True, timeout=self.flush_interval - interval - ) - except Exception: - # Timeout - pass - self._write_to_file() - self.last_flush_time = time.time() diff --git a/pilot/utils/tracer/tests/test_span_storage.py b/pilot/utils/tracer/tests/test_span_storage.py index 9ca727995..6da0797fc 100644 --- a/pilot/utils/tracer/tests/test_span_storage.py +++ b/pilot/utils/tracer/tests/test_span_storage.py @@ -7,44 +7,53 @@ import time from unittest.mock import patch from datetime import datetime, timedelta -from pilot.utils.tracer import SpanStorage, FileSpanStorage, Span, SpanType +from pilot.utils.tracer import ( + SpanStorage, + FileSpanStorage, + Span, + SpanType, + SpanStorageContainer, +) @pytest.fixture def storage(request): if not request or not hasattr(request, "param"): - batch_size = 10 - flush_interval = 10 file_does_not_exist = False else: - batch_size = request.param.get("batch_size", 10) - flush_interval = request.param.get("flush_interval", 10) file_does_not_exist = request.param.get("file_does_not_exist", False) if file_does_not_exist: with tempfile.TemporaryDirectory() as tmp_dir: filename = os.path.join(tmp_dir, "non_existent_file.jsonl") - storage_instance = FileSpanStorage( - filename, batch_size=batch_size, flush_interval=flush_interval - ) + storage_instance = FileSpanStorage(filename) yield storage_instance else: with tempfile.NamedTemporaryFile(delete=True) as tmp_file: filename = tmp_file.name - storage_instance = FileSpanStorage( - filename, batch_size=batch_size, flush_interval=flush_interval - ) + storage_instance = FileSpanStorage(filename) yield storage_instance +@pytest.fixture +def storage_container(request): + if not request or not hasattr(request, "param"): + batch_size = 10 + flush_interval = 10 + else: + batch_size = request.param.get("batch_size", 10) + flush_interval = request.param.get("flush_interval", 10) + storage_container = SpanStorageContainer( + batch_size=batch_size, flush_interval=flush_interval + ) + yield storage_container + + def read_spans_from_file(filename): with open(filename, "r") as f: return [json.loads(line) for line in f.readlines()] -@pytest.mark.parametrize( - "storage", [{"batch_size": 1, "flush_interval": 5}], indirect=True -) def test_write_span(storage: SpanStorage): span = Span("1", "a", SpanType.BASE, "b", "op1") storage.append_span(span) @@ -55,9 +64,6 @@ def test_write_span(storage: SpanStorage): assert spans_in_file[0]["trace_id"] == "1" -@pytest.mark.parametrize( - "storage", [{"batch_size": 1, "flush_interval": 5}], indirect=True -) def test_incremental_write(storage: SpanStorage): span1 = Span("1", "a", SpanType.BASE, "b", "op1") span2 = Span("2", "c", SpanType.BASE, "d", "op2") @@ -70,9 +76,6 @@ def test_incremental_write(storage: SpanStorage): assert len(spans_in_file) == 2 -@pytest.mark.parametrize( - "storage", [{"batch_size": 2, "flush_interval": 5}], indirect=True -) def test_sync_and_async_append(storage: SpanStorage): span = Span("1", "a", SpanType.BASE, "b", "op1") @@ -88,27 +91,7 @@ def test_sync_and_async_append(storage: SpanStorage): assert len(spans_in_file) == 2 -@pytest.mark.asyncio -async def test_flush_policy(storage: SpanStorage): - span = Span("1", "a", SpanType.BASE, "b", "op1") - - for _ in range(storage.batch_size - 1): - storage.append_span(span) - - spans_in_file = read_spans_from_file(storage.filename) - assert len(spans_in_file) == 0 - - # Trigger batch write - storage.append_span(span) - await asyncio.sleep(0.1) - - spans_in_file = read_spans_from_file(storage.filename) - assert len(spans_in_file) == storage.batch_size - - -@pytest.mark.parametrize( - "storage", [{"batch_size": 2, "file_does_not_exist": True}], indirect=True -) +@pytest.mark.parametrize("storage", [{"file_does_not_exist": True}], indirect=True) def test_non_existent_file(storage: SpanStorage): span = Span("1", "a", SpanType.BASE, "b", "op1") span2 = Span("2", "c", SpanType.BASE, "d", "op2") @@ -116,7 +99,7 @@ def test_non_existent_file(storage: SpanStorage): time.sleep(0.1) spans_in_file = read_spans_from_file(storage.filename) - assert len(spans_in_file) == 0 + assert len(spans_in_file) == 1 storage.append_span(span2) time.sleep(0.1) @@ -126,9 +109,7 @@ def test_non_existent_file(storage: SpanStorage): assert spans_in_file[1]["trace_id"] == "2" -@pytest.mark.parametrize( - "storage", [{"batch_size": 1, "file_does_not_exist": True}], indirect=True -) +@pytest.mark.parametrize("storage", [{"file_does_not_exist": True}], indirect=True) def test_log_rollover(storage: SpanStorage): # mock start date mock_start_date = datetime(2023, 10, 18, 23, 59) @@ -167,3 +148,27 @@ def test_log_rollover(storage: SpanStorage): spans_in_dated_file = read_spans_from_file(dated_filename) assert len(spans_in_dated_file) == 1 assert spans_in_dated_file[0]["trace_id"] == "1" + + +@pytest.mark.asyncio +@pytest.mark.parametrize("storage_container", [{"batch_size": 5}], indirect=True) +async def test_container_flush_policy( + storage_container: SpanStorageContainer, storage: FileSpanStorage +): + storage_container.append_storage(storage) + span = Span("1", "a", SpanType.BASE, "b", "op1") + + filename = storage.filename + + for _ in range(storage_container.batch_size - 1): + storage_container.append_span(span) + + spans_in_file = read_spans_from_file(filename) + assert len(spans_in_file) == 0 + + # Trigger batch write + storage_container.append_span(span) + await asyncio.sleep(0.1) + + spans_in_file = read_spans_from_file(filename) + assert len(spans_in_file) == storage_container.batch_size diff --git a/pilot/utils/tracer/tracer_cli.py b/pilot/utils/tracer/tracer_cli.py index 7df18f516..3fb9cba31 100644 --- a/pilot/utils/tracer/tracer_cli.py +++ b/pilot/utils/tracer/tracer_cli.py @@ -303,8 +303,6 @@ def chat( print(table.get_formatted_string(out_format=output, **out_kwargs)) if sys_table: print(sys_table.get_formatted_string(out_format=output, **out_kwargs)) - if hide_conv: - return if not found_trace_id: print(f"Can't found conversation with trace_id: {trace_id}") @@ -315,9 +313,12 @@ def chat( trace_spans = [s for s in reversed(trace_spans)] hierarchy = _build_trace_hierarchy(trace_spans) if tree: - print("\nInvoke Trace Tree:\n") + print(f"\nInvoke Trace Tree(trace_id: {trace_id}):\n") _print_trace_hierarchy(hierarchy) + if hide_conv: + return + trace_spans = _get_ordered_trace_from(hierarchy) table = PrettyTable(["Key", "Value Value"], title="Chat Trace Details") split_long_text = output == "text" @@ -340,36 +341,43 @@ def chat( table.add_row(["echo", metadata.get("echo")]) elif "error" in metadata: table.add_row(["BaseChat Error", metadata.get("error")]) - if op == "BaseChat.nostream_call" and not sp["end_time"]: - if "model_output" in metadata: - table.add_row( - [ - "BaseChat model_output", - split_string_by_terminal_width( - metadata.get("model_output").get("text"), - split=split_long_text, - ), - ] - ) - if "ai_response_text" in metadata: - table.add_row( - [ - "BaseChat ai_response_text", - split_string_by_terminal_width( - metadata.get("ai_response_text"), split=split_long_text - ), - ] - ) - if "prompt_define_response" in metadata: - table.add_row( - [ - "BaseChat prompt_define_response", - split_string_by_terminal_width( - metadata.get("prompt_define_response"), - split=split_long_text, - ), - ] + if op == "BaseChat.do_action" and not sp["end_time"]: + if "model_output" in metadata: + table.add_row( + [ + "BaseChat model_output", + split_string_by_terminal_width( + metadata.get("model_output").get("text"), + split=split_long_text, + ), + ] + ) + if "ai_response_text" in metadata: + table.add_row( + [ + "BaseChat ai_response_text", + split_string_by_terminal_width( + metadata.get("ai_response_text"), split=split_long_text + ), + ] + ) + if "prompt_define_response" in metadata: + prompt_define_response = metadata.get("prompt_define_response") or "" + if isinstance(prompt_define_response, dict) or isinstance( + prompt_define_response, type([]) + ): + prompt_define_response = json.dumps( + prompt_define_response, ensure_ascii=False ) + table.add_row( + [ + "BaseChat prompt_define_response", + split_string_by_terminal_width( + prompt_define_response, + split=split_long_text, + ), + ] + ) if op == "DefaultModelWorker_call.generate_stream_func": if not sp["end_time"]: table.add_row(["llm_adapter", metadata.get("llm_adapter")]) diff --git a/pilot/utils/tracer/tracer_impl.py b/pilot/utils/tracer/tracer_impl.py index bda25ab4d..6bbad084c 100644 --- a/pilot/utils/tracer/tracer_impl.py +++ b/pilot/utils/tracer/tracer_impl.py @@ -1,6 +1,10 @@ from typing import Dict, Optional from contextvars import ContextVar from functools import wraps +import asyncio +import inspect +import logging + from pilot.component import SystemApp, ComponentType from pilot.utils.tracer.base import ( @@ -12,6 +16,9 @@ from pilot.utils.tracer.base import ( TracerContext, ) from pilot.utils.tracer.span_storage import MemorySpanStorage +from pilot.utils.module_utils import import_from_checked_string + +logger = logging.getLogger(__name__) class DefaultTracer(Tracer): @@ -154,26 +161,51 @@ class TracerManager: root_tracer: TracerManager = TracerManager() -def trace(operation_name: str, **trace_kwargs): +def trace(operation_name: Optional[str] = None, **trace_kwargs): def decorator(func): @wraps(func) - async def wrapper(*args, **kwargs): - with root_tracer.start_span(operation_name, **trace_kwargs): + def sync_wrapper(*args, **kwargs): + name = ( + operation_name if operation_name else _parse_operation_name(func, *args) + ) + with root_tracer.start_span(name, **trace_kwargs): + return func(*args, **kwargs) + + @wraps(func) + async def async_wrapper(*args, **kwargs): + name = ( + operation_name if operation_name else _parse_operation_name(func, *args) + ) + with root_tracer.start_span(name, **trace_kwargs): return await func(*args, **kwargs) - return wrapper + if asyncio.iscoroutinefunction(func): + return async_wrapper + else: + return sync_wrapper return decorator +def _parse_operation_name(func, *args): + self_name = None + if inspect.signature(func).parameters.get("self"): + self_name = args[0].__class__.__name__ + func_name = func.__name__ + if self_name: + return f"{self_name}.{func_name}" + return func_name + + def initialize_tracer( system_app: SystemApp, tracer_filename: str, root_operation_name: str = "DB-GPT-Web-Entry", + tracer_storage_cls: str = None, ): if not system_app: return - from pilot.utils.tracer.span_storage import FileSpanStorage + from pilot.utils.tracer.span_storage import FileSpanStorage, SpanStorageContainer trace_context_var = ContextVar( "trace_context", @@ -181,7 +213,15 @@ def initialize_tracer( ) tracer = DefaultTracer(system_app) - system_app.register_instance(FileSpanStorage(tracer_filename)) + storage_container = SpanStorageContainer(system_app) + storage_container.append_storage(FileSpanStorage(tracer_filename)) + + if tracer_storage_cls: + logger.info(f"Begin parse storage class {tracer_storage_cls}") + storage = import_from_checked_string(tracer_storage_cls, SpanStorage) + storage_container.append_storage(storage()) + + system_app.register_instance(storage_container) system_app.register_instance(tracer) root_tracer.initialize(system_app, trace_context_var) if system_app.app: