diff --git a/.github/workflows/run_colossalqa_unit_tests.yml b/.github/workflows/run_colossalqa_unit_tests.yml new file mode 100644 index 000000000..763db2772 --- /dev/null +++ b/.github/workflows/run_colossalqa_unit_tests.yml @@ -0,0 +1,54 @@ +name: Run colossalqa unit tests + +on: + pull_request: + types: [synchronize, opened, reopened] + paths: + - 'applications/ColossalQA/colossalqa/**' + - 'applications/ColossalQA/requirements.txt' + - 'applications/ColossalQA/setup.py' + - 'applications/ColossalQA/tests/**' + - 'applications/ColossalQA/pytest.ini' + +jobs: + tests: + name: Run colossalqa unit tests + if: | + github.event.pull_request.draft == false && + github.base_ref == 'main' && + github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' + runs-on: [self-hosted, gpu] + container: + image: hpcaitech/pytorch-cuda:1.12.0-11.3.0 + volumes: + - /data/scratch/test_data_colossalqa:/data/scratch/test_data_colossalqa + - /data/scratch/llama-tiny:/data/scratch/llama-tiny + options: --gpus all --rm + timeout-minutes: 30 + defaults: + run: + shell: bash + steps: + - name: Checkout ColossalAI + uses: actions/checkout@v2 + + - name: Install colossalqa + run: | + cd applications/ColossalQA + pip install -e . + + - name: Execute Unit Testing + run: | + cd applications/ColossalQA + pytest tests/ + env: + NCCL_SHM_DISABLE: 1 + MAX_JOBS: 8 + ZH_MODEL_PATH: bigscience/bloom-560m + ZH_MODEL_NAME: bloom + EN_MODEL_PATH: bigscience/bloom-560m + EN_MODEL_NAME: bloom + TEST_DATA_PATH_EN: /data/scratch/test_data_colossalqa/companies.txt + TEST_DATA_PATH_ZH: /data/scratch/test_data_colossalqa/companies_zh.txt + TEST_DOCUMENT_LOADER_DATA_PATH: /data/scratch/test_data_colossalqa/tests/* + SQL_FILE_PATH: /data/scratch/test_data_colossalqa/sql_file_path \ No newline at end of file diff --git a/LICENSE b/LICENSE index b3eb43520..bacb03e72 100644 --- a/LICENSE +++ b/LICENSE @@ -527,3 +527,28 @@ Copyright 2021- HPC-AI Technology Inc. All rights reserved. LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + + ---------------- LICENSE FOR LangChain TEAM ---------------- + + The MIT License + + Copyright (c) Harrison Chase + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. diff --git a/applications/ColossalQA/.gitignore b/applications/ColossalQA/.gitignore new file mode 100644 index 000000000..5f5e159a2 --- /dev/null +++ b/applications/ColossalQA/.gitignore @@ -0,0 +1,152 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ +docs/.build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# IDE +.idea/ +.vscode/ + +# macos +*.DS_Store +#data/ + +docs/.build + +# pytorch checkpoint +*.pt + +# sql +*.db + +# wandb log +example/wandb/ +example/ui/gradio/ +example/vector_db_for_test +examples/awesome-chatgpt-prompts/ diff --git a/applications/ColossalQA/README.md b/applications/ColossalQA/README.md new file mode 100644 index 000000000..d9ffe5beb --- /dev/null +++ b/applications/ColossalQA/README.md @@ -0,0 +1,258 @@ +# ColossalQA - Langchain-based Document Retrieval Conversation System + +## Table of Contents + +- [Table of Contents](#table-of-contents) +- [Overall Implementation](#overall-implementation) +- [Install](#install) +- [How to Use](#how-to-use) +- Examples + - [A Simple Web UI Demo](examples/webui_demo/README.md) + - [Local Chinese Retrieval QA + Chat](examples/retrieval_conversation_zh.py) + - [Local English Retrieval QA + Chat](examples/retrieval_conversation_en.py) + - [Local Bi-lingual Retrieval QA + Chat](examples/retrieval_conversation_universal.py) + - [Experimental AI Agent Based on Chatgpt + Chat](examples/conversation_agent_chatgpt.py) +- Use cases + - [English customer service chatbot](examples/retrieval_conversation_en_customer_service.py) + - [Chinese customer service intent classification](examples/retrieval_intent_classification_zh_customer_service.py) + +**As Colossal-AI is undergoing some major updates, this project will be actively maintained to stay in line with the Colossal-AI project.** + +## Overall Implementation + +### Highlevel Design + + + +<p align="center"> +Fig.1. Design of the document retrieval conversation system +</p> + +Retrieval-based Question Answering (QA) is a crucial application of natural language processing that aims to find the most relevant answers based on the information from a corpus of text documents in response to user queries. Vector stores, which represent documents and queries as vectors in a high-dimensional space, have gained popularity for their effectiveness in retrieval QA tasks. + +#### Step 1: Collect Data + +A successful retrieval QA system starts with high-quality data. You need a collection of text documents that's related to your application. You may also need to manually design how your data will be presented to the language model. + +#### Step 2: Split Data + +Document data is usually too long to fit into the prompt due to the context length limitation of LLMs. Supporting documents need to be splited into short chunks before constructing vector stores. In this demo, we use neural text spliter for better performance. + +#### Step 3: Construct Vector Stores +Choose a embedding function and embed your text chunk into high dimensional vectors. Once you have vectors for your documents, you need to create a vector store. The vector store should efficiently index and retrieve documents based on vector similarity. In this demo, we use [Chroma](https://python.langchain.com/docs/integrations/vectorstores/chroma) and incrementally update indexes of vector stores. Through incremental update, one can update and maintain a vector store without recalculating every embedding. +You are free to choose any vectorstore from a varity of [vector stores](https://python.langchain.com/docs/integrations/vectorstores/) supported by Langchain. However, the incremental update only works with LangChain vectorstore's that support: +- Document addition by id (add_documents method with ids argument) +- Delete by id (delete method with) + +#### Step 4: Retrieve Relative Text +Upon querying, we will run a reference resolution on user's input, the goal of this step is to remove ambiguous reference in user's query such as "this company", "him". We then embed the query with the same embedding function and query the vectorstore to retrieve the top-k most similar documents. + +#### Step 5: Format Prompt +The prompt carries essential information including task description, conversation history, retrived documents, and user's query for the LLM to generate a response. Please refer to this [README](./colossalqa/prompt/README.md) for more details. + +#### Step 6: Inference +Pass the prompt to the LLM with additional generaton arguments to get agent response. You can control the generation with additional arguments such as temperature, top_k, top_p, max_new_tokens. You can also define when to stop by passing the stop substring to the retrieval QA chain. + +#### Step 7: Update Memory +We designed a memory module that automatically summarize overlength conversation to fit the max context length of LLM. In this step, we update the memory with the newly generated response. To fix into the context length of a given LLM, we sumarize the overlength part of historical conversation and present the rest in round-based conversation format. Fig.2. shows how the memory is updated. Please refer to this [README](./colossalqa/prompt/README.md) for dialogue format. + + +<p align="center"> +Fig.2. Design of the memory module +</p> + +### Supported Language Models (LLMs) and Embedding Models + +Our platform accommodates two kinds of LLMs: API-accessible and locally run models. For the API-style LLMs, we support ChatGPT, Pangu, and models deployed through the vLLM API Server. For locally operated LLMs, we are compatible with any language model that can be initiated using [`transformers.AutoModel.from_pretrained`](https://huggingface.co/transformers/v3.0.2/model_doc/auto.html#transformers.AutoModel.from_pretrained). However, due to the dependence of retrieval-based QA on the language model's abilities in zero-shot learning, instruction following, and logical reasoning, smaller models are typically not advised. In our local demo, we utilize ChatGLM2 for Chinese and LLaMa2 for English. Modifying the base LLM requires corresponding adjustments to the prompts. + +Here are some sample codes to load different types of LLM. + +```python +# For locally-run LLM +from colossalqa.local.llm import ColossalAPI, ColossalLLM +api = ColossalAPI('chatglm2', 'path_to_chatglm2_checkpoint') +llm = ColossalLLM(n=1, api=api) + +# For LLMs running on the vLLM API Server +from colossalqa.local.llm import VllmAPI, VllmLLM +vllm_api = VllmAPI("Your_vLLM_Host", "Your_vLLM_Port") +llm = VllmLLM(n=1, api=vllm_api) + +# For ChatGPT LLM +from langchain.llms import OpenAI +llm = OpenAI(openai_api_key="YOUR_OPENAI_API_KEY") + +# For Pangu LLM +# set up your authentification info +from colossalqa.local.pangu_llm import Pangu +os.environ["URL"] = "" +os.environ["URLNAME"] = "" +os.environ["PASSWORD"] = "" +os.environ["DOMAIN_NAME"] = "" + +llm = Pangu(id=1) +llm.set_auth_config() +``` + +Regarding embedding models, we support all models that can be loaded via ["langchain.embeddings.HuggingFaceEmbeddings"](https://api.python.langchain.com/en/latest/embeddings/langchain.embeddings.huggingface.HuggingFaceEmbeddings.html). The default embedding model used in this demo is ["moka-ai/m3e-base"](https://huggingface.co/moka-ai/m3e-base), which enables consistent text similarity computations in both Chinese and English. + +In the future, supported LLM will also include models running on colossal inference and serving framework. + +## Install + +Install colossalqa +```bash +# python==3.8.17 +cd ColossalAI/applications/ColossalQA +pip install -e . +``` + +To use the vLLM for providing LLM services via an API, please consult the official guide [here](https://vllm.readthedocs.io/en/latest/getting_started/quickstart.html#api-server) to start the API server. It's important to set up a new virtual environment for installing vLLM, as there are currently some dependency conflicts between vLLM and ColossalQA when installed on the same machine. + +## How to Use + +### Collect Your Data + +For ChatGPT based Agent we support document retrieval and simple sql search. +If you want to run the demo locally, we provided document retrieval based conversation system built upon langchain. It accept a wide range of documents. After collecting your data, put your data under a folder. + +Read comments under ./colossalqa/data_loader for more detail regarding supported data formats. + +### Run The Script + +We provide a simple Web UI demo of ColossalQA, enabling you to upload your files as a knowledge base and interact with them through a chat interface in your browser. More details can be found [here](examples/webui_demo/README.md) + + +We also provided some scripts for Chinese document retrieval based conversation system, English document retrieval based conversation system, Bi-lingual document retrieval based conversation system and an experimental AI agent with document retrieval and SQL query functionality. The Bi-lingual one is a high-level wrapper for the other two clases. We write different scripts for different languages because retrieval QA requires different embedding models, LLMs, prompts for different language setting. For now, we use LLaMa2 for English retrieval QA and ChatGLM2 for Chinese retrieval QA for better performance. + +To run the bi-lingual scripts. +```bash +python retrieval_conversation_universal.py \ + --en_model_path /path/to/Llama-2-7b-hf \ + --zh_model_path /path/to/chatglm2-6b \ + --zh_model_name chatglm2 \ + --en_model_name llama \ + --sql_file_path /path/to/any/folder +``` + +To run retrieval_conversation_en.py. +```bash +python retrieval_conversation_en.py \ + --model_path /path/to/Llama-2-7b-hf \ + --model_name llama \ + --sql_file_path /path/to/any/folder +``` + +To run retrieval_conversation_zh.py. +```bash +python retrieval_conversation_zh.py \ + --model_path /path/to/chatglm2-6b \ + --model_name chatglm2 \ + --sql_file_path /path/to/any/folder +``` + +To run retrieval_conversation_chatgpt.py. +```bash +python retrieval_conversation_chatgpt.py \ + --open_ai_key_path /path/to/plain/text/openai/key/file \ + --sql_file_path /path/to/any/folder +``` + +To run conversation_agent_chatgpt.py. +```bash +python conversation_agent_chatgpt.py \ + --open_ai_key_path /path/to/plain/text/openai/key/file +``` + +After runing the script, it will ask you to provide the path to your data during the execution of the script. You can also pass a glob path to load multiple files at once. Please read this [guide](https://docs.python.org/3/library/glob.html) on how to define glob path. Follow the instruction and provide all files for your retrieval conversation system then type "ESC" to finish loading documents. If csv files are provided, please use "," as delimiter and "\"" as quotation mark. For json and jsonl files. The default format is +``` +{ + "data":[ + {"content":"XXX"}, + {"content":"XXX"} + ... + ] +} +``` +For other formats, please refer to [this document](https://python.langchain.com/docs/modules/data_connection/document_loaders/json) on how to define schema for data loading. There are no other formatting constraints for loading documents type files. For loading table type files, we use pandas, please refer to [Pandas-Input/Output](https://pandas.pydata.org/pandas-docs/stable/reference/io.html) for file format details. + +We also support another kay-value mode that utilizes a user-defined key to calculate the embeddings of the vector store. If a query matches a specific key, the value corresponding to that key will be used to generate the prompt. For instance, in the document below, "My coupon isn't working." will be employed during indexing, whereas "Question: My coupon isn't working.\nAnswer: We apologize for ... apply it to?" will appear in the final prompt. This format is typically useful when the task involves carrying on a conversation with readily accessible conversation data, such as customer service, question answering. +```python +Document(page_content="My coupon isn't working.", metadata={'is_key_value_mapping': True, 'seq_num': 36, 'source': 'XXX.json', 'value': "Question: My coupon isn't working.\nAnswer:We apologize for the inconvenience. Can you please provide the coupon code and the product name or SKU you're trying to apply it to?"}) +``` + +For now, we only support the key-value mode for json data files. You can run the script retrieval_conversation_en_customer_service.py by the following command. + +```bash +python retrieval_conversation_en_customer_service.py \ + --model_path /path/to/Llama-2-7b-hf \ + --model_name llama \ + --sql_file_path /path/to/any/folder +``` + +## The Plan + +- [x] build document retrieval QA tool +- [x] Add memory +- [x] Add demo for AI agent with SQL query +- [x] Add customer retriever for fast construction and retrieving (with incremental update) + +## Reference + +```bibtex +@software{Chase_LangChain_2022, +author = {Chase, Harrison}, +month = oct, +title = {{LangChain}}, +url = {https://github.com/hwchase17/langchain}, +year = {2022} +} +``` +```bibtex +@inproceedings{DBLP:conf/asru/ZhangCLLW21, + author = {Qinglin Zhang and + Qian Chen and + Yali Li and + Jiaqing Liu and + Wen Wang}, + title = {Sequence Model with Self-Adaptive Sliding Window for Efficient Spoken + Document Segmentation}, + booktitle = {{IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU} + 2021, Cartagena, Colombia, December 13-17, 2021}, + pages = {411--418}, + publisher = {{IEEE}}, + year = {2021}, + url = {https://doi.org/10.1109/ASRU51503.2021.9688078}, + doi = {10.1109/ASRU51503.2021.9688078}, + timestamp = {Wed, 09 Feb 2022 09:03:04 +0100}, + biburl = {https://dblp.org/rec/conf/asru/ZhangCLLW21.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} +``` +```bibtex +@misc{touvron2023llama, + title={Llama 2: Open Foundation and Fine-Tuned Chat Models}, + author={Hugo Touvron and Louis Martin and Kevin Stone and Peter Albert and Amjad Almahairi and Yasmine Babaei and Nikolay Bashlykov and Soumya Batra and Prajjwal Bhargava and Shruti Bhosale and Dan Bikel and Lukas Blecher and Cristian Canton Ferrer and Moya Chen and Guillem Cucurull and David Esiobu and Jude Fernandes and Jeremy Fu and Wenyin Fu and Brian Fuller and Cynthia Gao and Vedanuj Goswami and Naman Goyal and Anthony Hartshorn and Saghar Hosseini and Rui Hou and Hakan Inan and Marcin Kardas and Viktor Kerkez and Madian Khabsa and Isabel Kloumann and Artem Korenev and Punit Singh Koura and Marie-Anne Lachaux and Thibaut Lavril and Jenya Lee and Diana Liskovich and Yinghai Lu and Yuning Mao and Xavier Martinet and Todor Mihaylov and Pushkar Mishra and Igor Molybog and Yixin Nie and Andrew Poulton and Jeremy Reizenstein and Rashi Rungta and Kalyan Saladi and Alan Schelten and Ruan Silva and Eric Michael Smith and Ranjan Subramanian and Xiaoqing Ellen Tan and Binh Tang and Ross Taylor and Adina Williams and Jian Xiang Kuan and Puxin Xu and Zheng Yan and Iliyan Zarov and Yuchen Zhang and Angela Fan and Melanie Kambadur and Sharan Narang and Aurelien Rodriguez and Robert Stojnic and Sergey Edunov and Thomas Scialom}, + year={2023}, + eprint={2307.09288}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +``` +```bibtex +@article{zeng2022glm, + title={Glm-130b: An open bilingual pre-trained model}, + author={Zeng, Aohan and Liu, Xiao and Du, Zhengxiao and Wang, Zihan and Lai, Hanyu and Ding, Ming and Yang, Zhuoyi and Xu, Yifan and Zheng, Wendi and Xia, Xiao and others}, + journal={arXiv preprint arXiv:2210.02414}, + year={2022} +} +``` +```bibtex +@inproceedings{du2022glm, + title={GLM: General Language Model Pretraining with Autoregressive Blank Infilling}, + author={Du, Zhengxiao and Qian, Yujie and Liu, Xiao and Ding, Ming and Qiu, Jiezhong and Yang, Zhilin and Tang, Jie}, + booktitle={Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, + pages={320--335}, + year={2022} +} +``` diff --git a/applications/ColossalQA/colossalqa/__init__.py b/applications/ColossalQA/colossalqa/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/applications/ColossalQA/colossalqa/chain/__init__.py b/applications/ColossalQA/colossalqa/chain/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/applications/ColossalQA/colossalqa/chain/memory/__init__.py b/applications/ColossalQA/colossalqa/chain/memory/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/applications/ColossalQA/colossalqa/chain/memory/summary.py b/applications/ColossalQA/colossalqa/chain/memory/summary.py new file mode 100644 index 000000000..1d63bbc4a --- /dev/null +++ b/applications/ColossalQA/colossalqa/chain/memory/summary.py @@ -0,0 +1,103 @@ +""" +Custom SummarizerMixin base class and ConversationSummaryMemory class + +Modified from Original Source + +This code is based on LangChain Ai's langchain, which can be found at +https://github.com/langchain-ai/langchain +The original code is licensed under the MIT license. +""" + +from __future__ import annotations + +from typing import Any, Dict, List, Type + +from langchain.chains.llm import LLMChain +from langchain.memory.chat_memory import BaseChatMemory +from langchain.memory.prompt import SUMMARY_PROMPT +from langchain.pydantic_v1 import BaseModel, root_validator +from langchain.schema import BaseChatMessageHistory, BasePromptTemplate +from langchain.schema.language_model import BaseLanguageModel +from langchain.schema.messages import BaseMessage, SystemMessage, get_buffer_string + + +class SummarizerMixin(BaseModel): + """ + Mixin for summarizer. + """ + + human_prefix: str = "Human" + ai_prefix: str = "Assistant" + llm: BaseLanguageModel + prompt: BasePromptTemplate = SUMMARY_PROMPT + summary_message_cls: Type[BaseMessage] = SystemMessage + llm_kwargs: Dict = {} + + def predict_new_summary(self, messages: List[BaseMessage], existing_summary: str, stop: List = []) -> str: + """ + Recursively summarize a conversation by generating a new summary using + the last round of conversation and the existing summary. + """ + new_lines = get_buffer_string( + messages, + human_prefix=self.human_prefix, + ai_prefix=self.ai_prefix, + ) + + chain = LLMChain(llm=self.llm, prompt=self.prompt, llm_kwargs=self.llm_kwargs) + return chain.predict(summary=existing_summary, new_lines=new_lines, stop=stop) + + +class ConversationSummaryMemory(BaseChatMemory, SummarizerMixin): + """Conversation summarizer to chat memory.""" + + buffer: str = "" + memory_key: str = "history" + + @classmethod + def from_messages( + cls, + llm: BaseLanguageModel, + chat_memory: BaseChatMessageHistory, + summarize_step: int = 2, + **kwargs: Any, + ) -> ConversationSummaryMemory: + obj = cls(llm=llm, chat_memory=chat_memory, **kwargs) + for i in range(0, len(obj.chat_memory.messages), summarize_step): + obj.buffer = obj.predict_new_summary(obj.chat_memory.messages[i : i + summarize_step], obj.buffer) + return obj + + @property + def memory_variables(self) -> List[str]: + """Will always return list of memory variables.""" + return [self.memory_key] + + def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + """Return history buffer.""" + if self.return_messages: + buffer: Any = [self.summary_message_cls(content=self.buffer)] + else: + buffer = self.buffer + return {self.memory_key: buffer} + + @root_validator() + def validate_prompt_input_variables(cls, values: Dict) -> Dict: + """Validate that prompt input variables are consistent.""" + prompt_variables = values["prompt"].input_variables + expected_keys = {"summary", "new_lines"} + if expected_keys != set(prompt_variables): + raise ValueError( + "Got unexpected prompt input variables. The prompt expects " + f"{prompt_variables}, but it should have {expected_keys}." + ) + return values + + def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None: + """Save context from this conversation to buffer.""" + super().save_context(inputs, outputs) + self.buffer = self.predict_new_summary(self.chat_memory.messages[-2:], self.buffer) + + def clear(self) -> None: + """Clear memory contents.""" + super().clear() + self.buffer = "" diff --git a/applications/ColossalQA/colossalqa/chain/retrieval_qa/__init__.py b/applications/ColossalQA/colossalqa/chain/retrieval_qa/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/applications/ColossalQA/colossalqa/chain/retrieval_qa/base.py b/applications/ColossalQA/colossalqa/chain/retrieval_qa/base.py new file mode 100644 index 000000000..e80befdac --- /dev/null +++ b/applications/ColossalQA/colossalqa/chain/retrieval_qa/base.py @@ -0,0 +1,214 @@ +""" +Chain for question-answering against a vector database. + +Modified from Original Source + +This code is based on LangChain Ai's langchain, which can be found at +https://github.com/langchain-ai/langchain +The original code is licensed under the MIT license. +""" +from __future__ import annotations + +import copy +import inspect +from typing import Any, Dict, List, Optional + +from colossalqa.chain.retrieval_qa.load_chain import load_qa_chain +from colossalqa.chain.retrieval_qa.stuff import CustomStuffDocumentsChain +from langchain.callbacks.manager import AsyncCallbackManagerForChainRun, CallbackManagerForChainRun, Callbacks +from langchain.chains.llm import LLMChain +from langchain.chains.question_answering.stuff_prompt import PROMPT_SELECTOR +from langchain.chains.retrieval_qa.base import BaseRetrievalQA +from langchain.prompts import PromptTemplate +from langchain.pydantic_v1 import Field +from langchain.schema import BaseRetriever, Document +from langchain.schema.language_model import BaseLanguageModel + +class CustomBaseRetrievalQA(BaseRetrievalQA): + """Base class for question-answering chains.""" + + @classmethod + def from_llm( + cls, + llm: BaseLanguageModel, + prompt: Optional[PromptTemplate] = None, + callbacks: Callbacks = None, + **kwargs: Any, + ) -> BaseRetrievalQA: + """Initialize from LLM.""" + llm_kwargs = kwargs.pop("llm_kwargs", {}) + _prompt = prompt or PROMPT_SELECTOR.get_prompt(llm) + llm_chain = LLMChain(llm=llm, prompt=_prompt, callbacks=callbacks, llm_kwargs=llm_kwargs) + document_prompt = kwargs.get( + "document_prompt", PromptTemplate(input_variables=["page_content"], template="Context:\n{page_content}") + ) + combine_documents_chain = CustomStuffDocumentsChain( + llm_chain=llm_chain, + document_variable_name="context", + document_prompt=document_prompt, + callbacks=callbacks, + ) + + return cls( + combine_documents_chain=combine_documents_chain, + callbacks=callbacks, + **kwargs, + ) + + @classmethod + def from_chain_type( + cls, + llm: BaseLanguageModel, + chain_type: str = "stuff", + chain_type_kwargs: Optional[dict] = None, + **kwargs: Any, + ) -> BaseRetrievalQA: + """Load chain from chain type.""" + llm_kwargs = kwargs.pop("llm_kwargs", {}) + _chain_type_kwargs = chain_type_kwargs or {} + combine_documents_chain = load_qa_chain(llm, chain_type=chain_type, **_chain_type_kwargs, llm_kwargs=llm_kwargs) + return cls(combine_documents_chain=combine_documents_chain, **kwargs) + + def _call( + self, + inputs: Dict[str, Any], + run_manager: Optional[CallbackManagerForChainRun] = None, + ) -> Dict[str, Any]: + """Run get_relevant_text and llm on input query. + + If chain has 'return_source_documents' as 'True', returns + the retrieved documents as well under the key 'source_documents'. + + Example: + .. code-block:: python + + res = indexqa({'query': 'This is my query'}) + answer, docs = res['result'], res['source_documents'] + """ + _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager() + question = inputs[self.input_key] + accepts_run_manager = "run_manager" in inspect.signature(self._get_docs).parameters + if accepts_run_manager: + docs = self._get_docs(question, run_manager=_run_manager) + else: + docs = self._get_docs(question) # type: ignore[call-arg] + + kwargs = { + k: v + for k, v in inputs.items() + if k in ["stop", "temperature", "top_k", "top_p", "max_new_tokens", "doc_prefix"] + } + answers = [] + if self.combine_documents_chain.memory is not None: + buffered_history_backup, summarized_history_temp_backup = copy.deepcopy( + self.combine_documents_chain.memory.buffered_history + ), copy.deepcopy(self.combine_documents_chain.memory.summarized_history_temp) + else: + buffered_history_backup = None + summarized_history_temp_backup = None + + answer = self.combine_documents_chain.run( + input_documents=docs, question=question, callbacks=_run_manager.get_child(), **kwargs + ) + if summarized_history_temp_backup is not None and buffered_history_backup is not None: + ( + self.combine_documents_chain.memory.buffered_history, + self.combine_documents_chain.memory.summarized_history_temp, + ) = copy.deepcopy(buffered_history_backup), copy.deepcopy(summarized_history_temp_backup) + + # if rejection_trigger_keywords is not given, return the response from LLM directly + rejection_trigger_keywrods = inputs.get('rejection_trigger_keywrods', []) + answer = answer if all([rej not in answer for rej in rejection_trigger_keywrods]) else None + if answer is None: + answer = inputs.get('rejection_answer', "抱歉,根据提供的信息无法回答该问题。") + if self.combine_documents_chain.memory is not None: + self.combine_documents_chain.memory.save_context({"question": question}, {"output": answer}) + + if self.return_source_documents: + return {self.output_key: answer, "source_documents": docs} + else: + return {self.output_key: answer} + + async def _acall( + self, + inputs: Dict[str, Any], + run_manager: Optional[AsyncCallbackManagerForChainRun] = None, + ) -> Dict[str, Any]: + """Run get_relevant_text and llm on input query. + + If chain has 'return_source_documents' as 'True', returns + the retrieved documents as well under the key 'source_documents'. + + Example: + .. code-block:: python + + res = indexqa({'query': 'This is my query'}) + answer, docs = res['result'], res['source_documents'] + """ + _run_manager = run_manager or AsyncCallbackManagerForChainRun.get_noop_manager() + question = inputs[self.input_key] + accepts_run_manager = "run_manager" in inspect.signature(self._aget_docs).parameters + if accepts_run_manager: + docs = await self._aget_docs(question, run_manager=_run_manager) + else: + docs = await self._aget_docs(question) # type: ignore[call-arg] + kwargs = { + k: v + for k, v in inputs.items() + if k in ["stop", "temperature", "top_k", "top_p", "max_new_tokens", "doc_prefix"] + } + answer = await self.combine_documents_chain.arun( + input_documents=docs, question=question, callbacks=_run_manager.get_child(), **kwargs + ) + # if rejection_trigger_keywords is not given, return the response from LLM directly + rejection_trigger_keywrods = inputs.get('rejection_trigger_keywrods', []) + answer = answer if all([rej not in answer for rej in rejection_trigger_keywrods]) or len(rejection_trigger_keywrods)==0 else None + if answer is None: + answer = inputs.get('rejection_answer', "抱歉,根据提供的信息无法回答该问题。") + self.combine_documents_chain.memory.save_context({"question": question}, {"output": answer}) + + if self.return_source_documents: + return {self.output_key: answer, "source_documents": docs} + else: + return {self.output_key: answer} + + +class RetrievalQA(CustomBaseRetrievalQA): + """Chain for question-answering against an index. + + Example: + .. code-block:: python + + from langchain.llms import OpenAI + from langchain.chains import RetrievalQA + from langchain.faiss import FAISS + from langchain.vectorstores.base import VectorStoreRetriever + retriever = VectorStoreRetriever(vectorstore=FAISS(...)) + retrievalQA = RetrievalQA.from_llm(llm=OpenAI(), retriever=retriever) + + """ + + retriever: BaseRetriever = Field(exclude=True) + + def _get_docs( + self, + question: str, + *, + run_manager: CallbackManagerForChainRun, + ) -> List[Document]: + """Get docs.""" + return self.retriever.get_relevant_documents(question, callbacks=run_manager.get_child()) + + async def _aget_docs( + self, + question: str, + *, + run_manager: AsyncCallbackManagerForChainRun, + ) -> List[Document]: + """Get docs.""" + return await self.retriever.aget_relevant_documents(question, callbacks=run_manager.get_child()) + + @property + def _chain_type(self) -> str: + """Return the chain type.""" + return "retrieval_qa" diff --git a/applications/ColossalQA/colossalqa/chain/retrieval_qa/load_chain.py b/applications/ColossalQA/colossalqa/chain/retrieval_qa/load_chain.py new file mode 100644 index 000000000..a2b1f81e3 --- /dev/null +++ b/applications/ColossalQA/colossalqa/chain/retrieval_qa/load_chain.py @@ -0,0 +1,87 @@ +""" +Load question answering chains. +For now, only the stuffed chain is modified + +Modified from Original Source + +This code is based on LangChain Ai's langchain, which can be found at +https://github.com/langchain-ai/langchain +The original code is licensed under the MIT license. +""" +import copy +from typing import Any, Mapping, Optional, Protocol + +from colossalqa.chain.retrieval_qa.stuff import CustomStuffDocumentsChain +from langchain.callbacks.base import BaseCallbackManager +from langchain.callbacks.manager import Callbacks +from langchain.chains.combine_documents.base import BaseCombineDocumentsChain +from langchain.chains.llm import LLMChain +from langchain.chains.question_answering import stuff_prompt +from langchain.schema.language_model import BaseLanguageModel +from langchain.schema.prompt_template import BasePromptTemplate + + +class LoadingCallable(Protocol): + """Interface for loading the combine documents chain.""" + + def __call__(self, llm: BaseLanguageModel, **kwargs: Any) -> BaseCombineDocumentsChain: + """Callable to load the combine documents chain.""" + + +def _load_stuff_chain( + llm: BaseLanguageModel, + prompt: Optional[BasePromptTemplate] = None, + document_variable_name: str = "context", + verbose: Optional[bool] = None, + callback_manager: Optional[BaseCallbackManager] = None, + callbacks: Callbacks = None, + **kwargs: Any, +) -> CustomStuffDocumentsChain: + _prompt = prompt or stuff_prompt.PROMPT_SELECTOR.get_prompt(llm) + if "llm_kwargs" in kwargs: + llm_kwargs = copy.deepcopy(kwargs["llm_kwargs"]) + del kwargs["llm_kwargs"] + else: + llm_kwargs = {} + llm_chain = LLMChain( + llm=llm, + prompt=_prompt, + verbose=verbose, + callback_manager=callback_manager, + callbacks=callbacks, + llm_kwargs=llm_kwargs, + ) + return CustomStuffDocumentsChain( + llm_chain=llm_chain, + document_variable_name=document_variable_name, + verbose=verbose, + callback_manager=callback_manager, + callbacks=callbacks, + **kwargs, + ) + + +def load_qa_chain( + llm: BaseLanguageModel, + chain_type: str = "stuff", + verbose: Optional[bool] = None, + callback_manager: Optional[BaseCallbackManager] = None, + **kwargs: Any, +) -> BaseCombineDocumentsChain: + """Load question answering chain. + + Args: + llm: Language Model to use in the chain. + chain_type: Type of document combining chain to use. Should be one of "stuff", + "map_reduce", "map_rerank", and "refine". + verbose: Whether chains should be run in verbose mode or not. Note that this + applies to all chains that make up the final chain. + callback_manager: Callback manager to use for the chain. + + Returns: + A chain to use for question answering. + """ + loader_mapping: Mapping[str, LoadingCallable] = {"stuff": _load_stuff_chain} + if chain_type not in loader_mapping: + raise ValueError(f"Got unsupported chain type: {chain_type}. " f"Should be one of {loader_mapping.keys()}") + return loader_mapping[chain_type](llm, verbose=verbose, callback_manager=callback_manager, **kwargs) diff --git a/applications/ColossalQA/colossalqa/chain/retrieval_qa/stuff.py b/applications/ColossalQA/colossalqa/chain/retrieval_qa/stuff.py new file mode 100644 index 000000000..bf7ad0ffc --- /dev/null +++ b/applications/ColossalQA/colossalqa/chain/retrieval_qa/stuff.py @@ -0,0 +1,91 @@ +""" +Chain that combines documents by stuffing into context + +Modified from Original Source + +This code is based on LangChain Ai's langchain, which can be found at +https://github.com/langchain-ai/langchain +The original code is licensed under the MIT license. +""" +import copy +from typing import Any, List + +from langchain.chains.combine_documents.stuff import StuffDocumentsChain +from langchain.docstore.document import Document +from langchain.schema import format_document + + +class CustomStuffDocumentsChain(StuffDocumentsChain): + """Chain that combines documents by stuffing into context. + + This chain takes a list of documents and first combines them into a single string. + It does this by formatting each document into a string with the `document_prompt` + and then joining them together with `document_separator`. It then adds that new + string to the inputs with the variable name set by `document_variable_name`. + Those inputs are then passed to the `llm_chain`. + + Example: + .. code-block:: python + + from langchain.chains import StuffDocumentsChain, LLMChain + from langchain.prompts import PromptTemplate + from langchain.llms import OpenAI + + # This controls how each document will be formatted. Specifically, + # it will be passed to `format_document` - see that function for more + # details. + document_prompt = PromptTemplate( + input_variables=["page_content"], + template="{page_content}" + ) + document_variable_name = "context" + llm = OpenAI() + # The prompt here should take as an input variable the + # `document_variable_name` + prompt = PromptTemplate.from_template( + "Summarize this content: {context}" + ) + llm_chain = LLMChain(llm=llm, prompt=prompt) + chain = StuffDocumentsChain( + llm_chain=llm_chain, + document_prompt=document_prompt, + document_variable_name=document_variable_name + ) + """ + + def _get_inputs(self, docs: List[Document], **kwargs: Any) -> dict: + """Construct inputs from kwargs and docs. + + Format and the join all the documents together into one input with name + `self.document_variable_name`. The pluck any additional variables + from **kwargs. + + Args: + docs: List of documents to format and then join into single input + **kwargs: additional inputs to chain, will pluck any other required + arguments from here. + + Returns: + dictionary of inputs to LLMChain + """ + # Format each document according to the prompt + + # if the document is in the key-value format has a 'is_key_value_mapping'=True in meta_data and has 'value' in metadata + # use the value to replace the key + doc_prefix = kwargs.get("doc_prefix", "Supporting Document") + docs_ = [] + for id, doc in enumerate(docs): + doc_ = copy.deepcopy(doc) + if doc_.metadata.get("is_key_value_mapping", False) and "value" in doc_.metadata: + doc_.page_content = str(doc_.metadata["value"]) + prefix = doc_prefix + str(id) + doc_.page_content = str(prefix + ":" + (" " if doc_.page_content[0] != " " else "") + doc_.page_content) + docs_.append(doc_) + + doc_strings = [format_document(doc, self.document_prompt) for doc in docs_] + arg_list = ["stop", "temperature", "top_k", "top_p", "max_new_tokens"] + arg_list.extend(self.llm_chain.prompt.input_variables) + # Join the documents together to put them in the prompt. + inputs = {k: v for k, v in kwargs.items() if k in arg_list} + inputs[self.document_variable_name] = self.document_separator.join(doc_strings) + return inputs diff --git a/applications/ColossalQA/colossalqa/data_loader/__init__.py b/applications/ColossalQA/colossalqa/data_loader/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/applications/ColossalQA/colossalqa/data_loader/document_loader.py b/applications/ColossalQA/colossalqa/data_loader/document_loader.py new file mode 100644 index 000000000..0fe1e4d1a --- /dev/null +++ b/applications/ColossalQA/colossalqa/data_loader/document_loader.py @@ -0,0 +1,128 @@ +""" +Class for loading document type data +""" + +import glob +from typing import List + +from colossalqa.mylogging import get_logger +from langchain.document_loaders import ( + JSONLoader, + PyPDFLoader, + TextLoader, + UnstructuredHTMLLoader, + UnstructuredMarkdownLoader, +) +from langchain.document_loaders.csv_loader import CSVLoader + +logger = get_logger() + +SUPPORTED_DATA_FORMAT = [".csv", ".json", ".html", ".md", ".pdf", ".txt", ".jsonl"] + + +class DocumentLoader: + """ + Load documents from different files into list of langchain Documents + """ + + def __init__(self, files: List, **kwargs) -> None: + """ + Args: + files: list of files (list[file path, name]) + **kwargs: keyword type arguments, useful for certain document types + """ + self.data = {} + self.kwargs = kwargs + + for item in files: + path = item[0] if isinstance(item, list) else item + logger.info(f"Loading data from {path}") + self.load_data(path) + logger.info("Data loaded") + + self.all_data = [] + for key in self.data: + if isinstance(self.data[key], list): + for item in self.data[key]: + if isinstance(item, list): + self.all_data.extend(item) + else: + self.all_data.append(item) + + def load_data(self, path: str) -> None: + """ + Load data. Please refer to https://python.langchain.com/docs/modules/data_connection/document_loaders/ + for sepcific format requirements. + Args: + path: path to a file + To load files with glob path, here are some examples. + Load all file from directory: folder1/folder2/* + Load all pdf file from directory: folder1/folder2/*.pdf + """ + files = [] + + # Handle glob expression + try: + files = glob.glob(path) + except Exception as e: + logger.error(e) + if len(files) == 0: + raise ValueError("Unsupported file/directory format. For directories, please use glob expression") + elif len(files) == 1: + path = files[0] + else: + for file in files: + self.load_data(file) + return + + # Load data if the path is a file + logger.info(f"load {path}", verbose=True) + if path.endswith(".csv"): + # Load csv + loader = CSVLoader(file_path=path, encoding="utf8") + data = loader.load() + self.data[path] = data + elif path.endswith(".txt"): + # Load txt + loader = TextLoader(path, encoding="utf8") + data = loader.load() + self.data[path] = data + elif path.endswith("html"): + # Load html + loader = UnstructuredHTMLLoader(path, encoding="utf8") + data = loader.load() + self.data[path] = data + elif path.endswith("json"): + # Load json + loader = JSONLoader( + file_path=path, + jq_schema=self.kwargs.get("jq_schema", ".data[]"), + content_key=self.kwargs.get("content_key", "content"), + metadata_func=self.kwargs.get("metadata_func", None), + ) + + data = loader.load() + self.data[path] = data + elif path.endswith("jsonl"): + # Load jsonl + loader = JSONLoader( + file_path=path, jq_schema=self.kwargs.get("jq_schema", ".data[].content"), json_lines=True + ) + data = loader.load() + self.data[path] = data + elif path.endswith(".md"): + # Load markdown + loader = UnstructuredMarkdownLoader(path) + data = loader.load() + self.data[path] = data + elif path.endswith(".pdf"): + # Load pdf + loader = PyPDFLoader(path) + data = loader.load_and_split() + self.data[path] = data + else: + if "." in path.split("/")[-1]: + raise ValueError(f"Unsupported file format {path}. Supported formats: {SUPPORTED_DATA_FORMAT}") + else: + # May ba a directory, we strictly follow the glob path and will not load files in subdirectories + pass diff --git a/applications/ColossalQA/colossalqa/data_loader/table_dataloader.py b/applications/ColossalQA/colossalqa/data_loader/table_dataloader.py new file mode 100644 index 000000000..cad482544 --- /dev/null +++ b/applications/ColossalQA/colossalqa/data_loader/table_dataloader.py @@ -0,0 +1,119 @@ +''' +Class for loading table type data. please refer to Pandas-Input/Output for file format details. +''' + + +import os +import glob +import pandas as pd +from sqlalchemy import create_engine +from colossalqa.utils import drop_table +from colossalqa.mylogging import get_logger + +logger = get_logger() + +SUPPORTED_DATA_FORMAT = ['.csv','.xlsx', '.xls','.json','.html','.h5', '.hdf5','.parquet','.feather','.dta'] + +class TableLoader: + ''' + Load tables from different files and serve a sql database for database operations + ''' + def __init__(self, files: str, + sql_path:str='sqlite:///mydatabase.db', + verbose=False, **kwargs) -> None: + ''' + Args: + files: list of files (list[file path, name]) + sql_path: how to serve the sql database + **kwargs: keyword type arguments, useful for certain document types + ''' + self.data = {} + self.verbose = verbose + self.sql_path = sql_path + self.kwargs = kwargs + self.sql_engine = create_engine(self.sql_path) + drop_table(self.sql_engine) + + self.sql_engine = create_engine(self.sql_path) + for item in files: + path = item[0] + dataset_name = item[1] + if not os.path.exists(path): + raise FileNotFoundError(f"{path} doesn't exists") + if not any([path.endswith(i) for i in SUPPORTED_DATA_FORMAT]): + raise TypeError(f"{path} not supported. Supported type {SUPPORTED_DATA_FORMAT}") + + logger.info("loading data", verbose=self.verbose) + self.load_data(path) + logger.info("data loaded", verbose=self.verbose) + self.to_sql(path, dataset_name) + + def load_data(self, path): + ''' + Load data and serve the data as sql database. + Data must be in pandas format + ''' + files = [] + # Handle glob expression + try: + files = glob.glob(path) + except Exception as e: + logger.error(e) + if len(files)==0: + raise ValueError("Unsupported file/directory format. For directories, please use glob expression") + elif len(files)==1: + path = files[0] + else: + for file in files: + self.load_data(file) + + if path.endswith('.csv'): + # Load csv + self.data[path] = pd.read_csv(path) + elif path.endswith('.xlsx') or path.endswith('.xls'): + # Load excel + self.data[path] = pd.read_excel(path) # You can adjust the sheet_name as needed + elif path.endswith('.json'): + # Load json + self.data[path] = pd.read_json(path) + elif path.endswith('.html'): + # Load html + html_tables = pd.read_html(path) + # Choose the desired table from the list of DataFrame objects + self.data[path] = html_tables[0] # You may need to adjust this index + elif path.endswith('.h5') or path.endswith('.hdf5'): + # Load h5 + self.data[path] = pd.read_hdf(path, key=self.kwargs.get('key', 'data')) # You can adjust the key as needed + elif path.endswith('.parquet'): + # Load parquet + self.data[path] = pd.read_parquet(path, engine='fastparquet') + elif path.endswith('.feather'): + # Load feather + self.data[path] = pd.read_feather(path) + elif path.endswith('.dta'): + # Load dta + self.data[path] = pd.read_stata(path) + else: + raise ValueError("Unsupported file format") + + def to_sql(self, path, table_name): + ''' + Serve the data as sql database. + ''' + self.data[path].to_sql(table_name, con=self.sql_engine, if_exists='replace', index=False) + logger.info(f"Loaded to Sqlite3\nPath: {path}", verbose=self.verbose) + return self.sql_path + + def get_sql_path(self): + return self.sql_path + + def __del__(self): + if self.sql_engine: + drop_table(self.sql_engine) + self.sql_engine.dispose() + del self.data + del self.sql_engine + + + + diff --git a/applications/ColossalQA/colossalqa/local/__init__.py b/applications/ColossalQA/colossalqa/local/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/applications/ColossalQA/colossalqa/local/colossalcloud_llm.py b/applications/ColossalQA/colossalqa/local/colossalcloud_llm.py new file mode 100644 index 000000000..62aead66c --- /dev/null +++ b/applications/ColossalQA/colossalqa/local/colossalcloud_llm.py @@ -0,0 +1,125 @@ +""" +LLM wrapper for LLMs running on ColossalCloud Platform + +Usage: + +os.environ['URL'] = "" +os.environ['HOST'] = "" + +gen_config = { + 'max_new_tokens': 100, + # 'top_k': 2, + 'top_p': 0.9, + 'temperature': 0.5, + 'repetition_penalty': 2, + } + +llm = ColossalCloudLLM(n=1) +llm.set_auth_config() +resp = llm(prompt='What do you call a three-ton kangaroo?', **gen_config) +print(resp) # super-heavyweight awesome-natured yawning Australian creature! + +""" +import json +from typing import Any, List, Mapping, Optional + +import requests +from langchain.llms.base import LLM +from langchain.utils import get_from_dict_or_env + + +class ColossalCloudLLM(LLM): + """ + A custom LLM class that integrates LLMs running on the ColossalCloud Platform + + """ + n: int + gen_config: dict = None + auth_config: dict = None + valid_gen_para: list = ['max_new_tokens', 'top_k', + 'top_p', 'temperature', 'repetition_penalty'] + + def __init__(self, gen_config=None, **kwargs): + """ + Args: + gen_config: config for generation, + max_new_tokens: 50 by default + top_k: (1, vocab_size) + top_p: (0, 1) if not None + temperature: (0, inf) if not None + repetition_penalty: (1, inf) if not None + """ + super(ColossalCloudLLM, self).__init__(**kwargs) + if gen_config is None: + self.gen_config = {"max_new_tokens": 50} + else: + assert "max_new_tokens" in gen_config, "max_new_tokens is a compulsory key in the gen config" + self.gen_config = gen_config + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return {"n": self.n} + + @property + def _llm_type(self) -> str: + return 'ColossalCloudLLM' + + def set_auth_config(self, **kwargs): + url = get_from_dict_or_env(kwargs, "url", "URL") + host = get_from_dict_or_env(kwargs, "host", "HOST") + + auth_config = {} + auth_config['endpoint'] = url + auth_config['Host'] = host + self.auth_config = auth_config + + def _call(self, prompt: str, stop=None, **kwargs: Any) -> str: + """ + Args: + prompt: The prompt to pass into the model. + stop: A list of strings to stop generation when encountered + + Returns: + The string generated by the model + """ + # Update the generation arguments + for key, value in kwargs.items(): + if key not in self.valid_gen_para: + raise KeyError(f"Invalid generation parameter: '{key}'. Valid keys are: {', '.join(self.valid_gen_para)}") + if key in self.gen_config: + self.gen_config[key] = value + + resp_text = self.text_completion(prompt, self.gen_config, self.auth_config) + # TODO: This may cause excessive tokens count + if stop is not None: + for stopping_words in stop: + if stopping_words in resp_text: + resp_text = resp_text.split(stopping_words)[0] + return resp_text + + + def text_completion(self, prompt, gen_config, auth_config): + # Complusory Parameters + endpoint = auth_config.pop('endpoint') + max_new_tokens = gen_config.pop('max_new_tokens') + # Optional Parameters + optional_params = ['top_k', 'top_p', 'temperature', 'repetition_penalty'] # Self.optional + gen_config = {key: gen_config[key] for key in optional_params if key in gen_config} + # Define the data payload + data = { + "max_new_tokens": max_new_tokens, + "history": [ + {"instruction": prompt, "response": ""} + ], + **gen_config + } + headers = { + "Content-Type": "application/json", + **auth_config # 'Host', + } + # Make the POST request + response = requests.post(endpoint, headers=headers, data=json.dumps(data)) + response.raise_for_status() # raise error if return code is not 200(success) + # Check the response + return response.text diff --git a/applications/ColossalQA/colossalqa/local/llm.py b/applications/ColossalQA/colossalqa/local/llm.py new file mode 100644 index 000000000..ff7346adc --- /dev/null +++ b/applications/ColossalQA/colossalqa/local/llm.py @@ -0,0 +1,183 @@ +""" +API and LLM warpper class for running LLMs locally + +Usage: + +import os +model_path = os.environ.get("ZH_MODEL_PATH") +model_name = "chatglm2" +colossal_api = ColossalAPI(model_name, model_path) +llm = ColossalLLM(n=1, api=colossal_api) +TEST_PROMPT_CHATGLM="续写文章:惊蛰一过,春寒加剧。先是料料峭峭,继而雨季开始," +logger.info(llm(TEST_PROMPT_CHATGLM, max_new_tokens=100), verbose=True) + +""" +from typing import Any, List, Mapping, Optional + +import torch +from colossalqa.local.utils import get_response, post_http_request +from colossalqa.mylogging import get_logger +from langchain.callbacks.manager import CallbackManagerForLLMRun +from langchain.llms.base import LLM +from transformers import AutoModelForCausalLM, AutoTokenizer + +logger = get_logger() + + +class ColossalAPI: + """ + API for calling LLM.generate + """ + + __instances = dict() + + def __init__(self, model_type: str, model_path: str, ckpt_path: str = None) -> None: + """ + Configurate model + """ + if model_type + model_path + (ckpt_path or "") in ColossalAPI.__instances: + return + else: + ColossalAPI.__instances[model_type + model_path + (ckpt_path or "")] = self + self.model_type = model_type + self.model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, trust_remote_code=True) + + if ckpt_path is not None: + state_dict = torch.load(ckpt_path) + self.model.load_state_dict(state_dict) + self.model.to(torch.cuda.current_device()) + + # Configurate tokenizer + self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) + + self.model.eval() + + @staticmethod + def get_api(model_type: str, model_path: str, ckpt_path: str = None): + if model_type + model_path + (ckpt_path or "") in ColossalAPI.__instances: + return ColossalAPI.__instances[model_type + model_path + (ckpt_path or "")] + else: + return ColossalAPI(model_type, model_path, ckpt_path) + + def generate(self, input: str, **kwargs) -> str: + """ + Generate response given the prompt + Args: + input: input string + **kwargs: language model keyword type arguments, such as top_k, top_p, temperature, max_new_tokens... + Returns: + output: output string + """ + if self.model_type in ["chatglm", "chatglm2"]: + inputs = { + k: v.to(torch.cuda.current_device()) for k, v in self.tokenizer(input, return_tensors="pt").items() + } + else: + inputs = { + "input_ids": self.tokenizer(input, return_tensors="pt")["input_ids"].to(torch.cuda.current_device()) + } + + output = self.model.generate(**inputs, **kwargs) + output = output.cpu() + prompt_len = inputs["input_ids"].size(1) + response = output[0, prompt_len:] + output = self.tokenizer.decode(response, skip_special_tokens=True) + return output + + +class VllmAPI: + def __init__(self, host: str = "localhost", port: int = 8077) -> None: + # Configurate api for model served through web + self.host = host + self.port = port + self.url = f"http://{self.host}:{self.port}/generate" + + def generate(self, input: str, **kwargs): + output = get_response(post_http_request(input, self.url, **kwargs))[0] + return output[len(input) :] + + +class ColossalLLM(LLM): + """ + Langchain LLM wrapper for a local LLM + """ + + n: int + api: Any + kwargs = {"max_new_tokens": 100} + + @property + def _llm_type(self) -> str: + return "custom" + + def _call( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> str: + logger.info(f"kwargs:{kwargs}\nstop:{stop}\nprompt:{prompt}", verbose=self.verbose) + for k in self.kwargs: + if k not in kwargs: + kwargs[k] = self.kwargs[k] + + generate_args = {k: kwargs[k] for k in kwargs if k not in ["stop", "n"]} + out = self.api.generate(prompt, **generate_args) + if isinstance(stop, list) and len(stop) != 0: + for stopping_words in stop: + if stopping_words in out: + out = out.split(stopping_words)[0] + logger.info(f"{prompt}{out}", verbose=self.verbose) + return out + + @property + def _identifying_params(self) -> Mapping[str, int]: + """Get the identifying parameters.""" + return {"n": self.n} + + +class VllmLLM(LLM): + """ + Langchain LLM wrapper for a local LLM + """ + + n: int + api: Any + kwargs = {"max_new_tokens": 100} + + @property + def _llm_type(self) -> str: + return "custom" + + def _call( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> str: + for k in self.kwargs: + if k not in kwargs: + kwargs[k] = self.kwargs[k] + logger.info(f"kwargs:{kwargs}\nstop:{stop}\nprompt:{prompt}", verbose=self.verbose) + generate_args = {k: kwargs[k] for k in kwargs if k in ["n", "max_tokens", "temperature", "stream"]} + out = self.api.generate(prompt, **generate_args) + if len(stop) != 0: + for stopping_words in stop: + if stopping_words in out: + out = out.split(stopping_words)[0] + logger.info(f"{prompt}{out}", verbose=self.verbose) + return out + + def set_host_port(self, host: str = "localhost", port: int = 8077, **kwargs) -> None: + if "max_tokens" not in kwargs: + kwargs["max_tokens"] = 100 + self.kwargs = kwargs + self.api = VllmAPI(host=host, port=port) + + @property + def _identifying_params(self) -> Mapping[str, int]: + """Get the identifying parameters.""" + return {"n": self.n} + diff --git a/applications/ColossalQA/colossalqa/local/pangu_llm.py b/applications/ColossalQA/colossalqa/local/pangu_llm.py new file mode 100644 index 000000000..b8befa92b --- /dev/null +++ b/applications/ColossalQA/colossalqa/local/pangu_llm.py @@ -0,0 +1,150 @@ +""" +LLM wrapper for Pangu + +Usage: + +# URL: “盘古大模型套件管理”->点击“服务管理”->“模型列表”->点击想要使用的模型的“复制路径” +# USERNAME: 华为云控制台:“我的凭证”->“API凭证”下的“IAM用户名”,也就是你登录IAM账户的名字 +# PASSWORD: IAM用户的密码 +# DOMAIN_NAME: 华为云控制台:“我的凭证”->“API凭证”下的“用户名”,也就是公司管理IAM账户的总账户名 + +os.environ["URL"] = "" +os.environ["URLNAME"] = "" +os.environ["PASSWORD"] = "" +os.environ["DOMAIN_NAME"] = "" + +pg = Pangu(id=1) +pg.set_auth_config() + +res = pg('你是谁') # 您好,我是华为盘古大模型。我能够通过和您对话互动为您提供帮助。请问您有什么想问我的吗? +""" + +import http.client +import json +from typing import Any, List, Mapping, Optional + +import requests +from langchain.llms.base import LLM +from langchain.utils import get_from_dict_or_env + + +class Pangu(LLM): + """ + A custom LLM class that integrates pangu models + + """ + + n: int + gen_config: dict = None + auth_config: dict = None + + def __init__(self, gen_config=None, **kwargs): + super(Pangu, self).__init__(**kwargs) + if gen_config is None: + self.gen_config = {"user": "User", "max_tokens": 50, "temperature": 0.95, "n": 1} + else: + self.gen_config = gen_config + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return {"n": self.n} + + @property + def _llm_type(self) -> str: + return "pangu" + + def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs) -> str: + """ + Args: + prompt: The prompt to pass into the model. + stop: A list of strings to stop generation when encountered + + Returns: + The string generated by the model + """ + # Update the generation arguments + for key, value in kwargs.items(): + if key in self.gen_config: + self.gen_config[key] = value + + response = self.text_completion(prompt, self.gen_config, self.auth_config) + text = response["choices"][0]["text"] + if stop is not None: + for stopping_words in stop: + if stopping_words in text: + text = text.split(stopping_words)[0] + return text + + def set_auth_config(self, **kwargs): + url = get_from_dict_or_env(kwargs, "url", "URL") + username = get_from_dict_or_env(kwargs, "username", "USERNAME") + password = get_from_dict_or_env(kwargs, "password", "PASSWORD") + domain_name = get_from_dict_or_env(kwargs, "domain_name", "DOMAIN_NAME") + + region = url.split(".")[1] + auth_config = {} + auth_config["endpoint"] = url[url.find("https://") + 8 : url.find(".com") + 4] + auth_config["resource_path"] = url[url.find(".com") + 4 :] + auth_config["auth_token"] = self.get_latest_auth_token(region, username, password, domain_name) + self.auth_config = auth_config + + def get_latest_auth_token(self, region, username, password, domain_name): + url = f"https://iam.{region}.myhuaweicloud.com/v3/auth/tokens" + payload = json.dumps( + { + "auth": { + "identity": { + "methods": ["password"], + "password": {"user": {"name": username, "password": password, "domain": {"name": domain_name}}}, + }, + "scope": {"project": {"name": region}}, + } + } + ) + headers = {"Content-Type": "application/json"} + + response = requests.request("POST", url, headers=headers, data=payload) + return response.headers["X-Subject-Token"] + + def text_completion(self, text, gen_config, auth_config): + conn = http.client.HTTPSConnection(auth_config["endpoint"]) + payload = json.dumps( + { + "prompt": text, + "user": gen_config["user"], + "max_tokens": gen_config["max_tokens"], + "temperature": gen_config["temperature"], + "n": gen_config["n"], + } + ) + headers = { + "X-Auth-Token": auth_config["auth_token"], + "Content-Type": "application/json", + } + conn.request("POST", auth_config["resource_path"], payload, headers) + res = conn.getresponse() + data = res.read() + data = json.loads(data.decode("utf-8")) + return data + + def chat_model(self, messages, gen_config, auth_config): + conn = http.client.HTTPSConnection(auth_config["endpoint"]) + payload = json.dumps( + { + "messages": messages, + "user": gen_config["user"], + "max_tokens": gen_config["max_tokens"], + "temperature": gen_config["temperature"], + "n": gen_config["n"], + } + ) + headers = { + "X-Auth-Token": auth_config["auth_token"], + "Content-Type": "application/json", + } + conn.request("POST", auth_config["resource_path"], payload, headers) + res = conn.getresponse() + data = res.read() + data = json.loads(data.decode("utf-8")) + return data diff --git a/applications/ColossalQA/colossalqa/local/utils.py b/applications/ColossalQA/colossalqa/local/utils.py new file mode 100644 index 000000000..ed90264ca --- /dev/null +++ b/applications/ColossalQA/colossalqa/local/utils.py @@ -0,0 +1,29 @@ +""" +Generation utilities +""" +import json +from typing import List + +import requests + + +def post_http_request( + prompt: str, api_url: str, n: int = 1, max_tokens: int = 100, temperature: float = 0.0, stream: bool = False +) -> requests.Response: + headers = {"User-Agent": "Test Client"} + pload = { + "prompt": prompt, + "n": 1, + "use_beam_search": False, + "temperature": temperature, + "max_tokens": max_tokens, + "stream": stream, + } + response = requests.post(api_url, headers=headers, json=pload, stream=True, timeout=3) + return response + + +def get_response(response: requests.Response) -> List[str]: + data = json.loads(response.content) + output = data["text"] + return output diff --git a/applications/ColossalQA/colossalqa/memory.py b/applications/ColossalQA/colossalqa/memory.py new file mode 100644 index 000000000..255df68a3 --- /dev/null +++ b/applications/ColossalQA/colossalqa/memory.py @@ -0,0 +1,168 @@ +""" +Implement a memory class for storing conversation history +Support long term and short term memory +""" +from typing import Any, Dict, List + +from colossalqa.chain.memory.summary import ConversationSummaryMemory +from colossalqa.chain.retrieval_qa.load_chain import load_qa_chain +from langchain.chains.combine_documents.base import BaseCombineDocumentsChain +from langchain.memory.chat_message_histories.in_memory import ChatMessageHistory +from langchain.schema import BaseChatMessageHistory +from langchain.schema.messages import BaseMessage +from langchain.schema.retriever import BaseRetriever +from pydantic import Field + + +class ConversationBufferWithSummary(ConversationSummaryMemory): + """Memory class for storing information about entities.""" + + # Define dictionary to store information about entities. + # Store the most recent conversation history + buffered_history: BaseChatMessageHistory = Field(default_factory=ChatMessageHistory) + # Temp buffer + summarized_history_temp: BaseChatMessageHistory = Field(default_factory=ChatMessageHistory) + human_prefix: str = "Human" + ai_prefix: str = "Assistant" + buffer: str = "" # Formated conversation in str + existing_summary: str = "" # Summarization of stale converstion in str + # Define key to pass information about entities into prompt. + memory_key: str = "chat_history" + input_key: str = "question" + retriever: BaseRetriever = None + max_tokens: int = 2000 + chain: BaseCombineDocumentsChain = None + input_chain_type_kwargs: List = {} + + @property + def buffer(self) -> Any: + """String buffer of memory.""" + return self.buffer_as_messages if self.return_messages else self.buffer_as_str + + @property + def buffer_as_str(self) -> str: + """Exposes the buffer as a string in case return_messages is True.""" + self.buffer = self.format_dialogue() + return self.buffer + + @property + def buffer_as_messages(self) -> List[BaseMessage]: + """Exposes the buffer as a list of messages in case return_messages is False.""" + return self.buffered_history.messages + + def clear(self): + """Clear all the memory""" + self.buffered_history.clear() + self.summarized_history_temp.clear() + + def initiate_document_retrieval_chain( + self, llm: Any, prompt_template: Any, retriever: Any, chain_type_kwargs: Dict[str, Any] = {} + ) -> None: + """ + Since we need to calculate the length of the prompt, we need to initiate a retrieval chain + to calculate the length of the prompt. + Args: + llm: the language model for the retrieval chain (we won't actually return the output) + prompt_template: the prompt template for constructing the retrieval chain + retriever: the retriever for the retrieval chain + max_tokens: the max length of the prompt (not include the output) + chain_type_kwargs: the kwargs for the retrieval chain + memory_key: the key for the chat history + input_key: the key for the input query + """ + self.retriever = retriever + input_chain_type_kwargs = {k: v for k, v in chain_type_kwargs.items() if k not in [self.memory_key]} + self.input_chain_type_kwargs = input_chain_type_kwargs + self.chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt_template, **self.input_chain_type_kwargs) + + @property + def memory_variables(self) -> List[str]: + """Define the variables we are providing to the prompt.""" + return [self.memory_key] + + def format_dialogue(self, lang: str = "en") -> str: + """Format memory into two parts--- summarization of historical conversation and most recent conversation""" + if len(self.summarized_history_temp.messages) != 0: + for i in range(int(len(self.summarized_history_temp.messages) / 2)): + self.existing_summary = ( + self.predict_new_summary( + self.summarized_history_temp.messages[i * 2 : i * 2 + 2], self.existing_summary, stop=["\n\n"] + ) + .strip() + .split("\n")[0] + .strip() + ) + for i in range(int(len(self.summarized_history_temp.messages) / 2)): + self.summarized_history_temp.messages.pop(0) + self.summarized_history_temp.messages.pop(0) + conversation_buffer = [] + for t in self.buffered_history.messages: + if t.type == "human": + prefix = self.human_prefix + else: + prefix = self.ai_prefix + conversation_buffer.append(prefix + ": " + t.content) + conversation_buffer = "\n".join(conversation_buffer) + if len(self.existing_summary) > 0: + if lang == "en": + message = f"A summarization of historical conversation:\n{self.existing_summary}\nMost recent conversation:\n{conversation_buffer}" + elif lang == "zh": + message = f"历史对话概要:\n{self.existing_summary}\n最近的对话:\n{conversation_buffer}" + else: + raise ValueError("Unsupported language") + return message + else: + message = conversation_buffer + return message + + def get_conversation_length(self): + """Get the length of the formatted conversation""" + prompt = self.format_dialogue() + length = self.llm.get_num_tokens(prompt) + return length + + def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, str]: + """Load the memory variables. + Summarize oversize conversation to fit into the length constraint defined by max_tokene + Args: + inputs: the kwargs of the chain of your definition + Returns: + a dict that maps from memory key to the formated dialogue + the formated dialogue has the following format + if conversation is too long: + A summarization of historical conversation: + {summarization} + Most recent conversation: + Human: XXX + Assistant: XXX + ... + otherwise + Human: XXX + Assistant: XXX + ... + """ + # Calculate remain length + if "input_documents" in inputs: + # Run in a retrieval qa chain + docs = inputs["input_documents"] + else: + # For test + docs = self.retriever.get_relevant_documents(inputs[self.input_key]) + inputs[self.memory_key] = "" + inputs = {k: v for k, v in inputs.items() if k in [self.chain.input_key, self.input_key, self.memory_key]} + prompt_length = self.chain.prompt_length(docs, **inputs) + remain = self.max_tokens - prompt_length + while self.get_conversation_length() > remain: + if len(self.buffered_history.messages) <= 2: + raise RuntimeError("Exeeed max_tokens, trunck size of retrieved documents is too large") + temp = self.buffered_history.messages.pop(0) + self.summarized_history_temp.messages.append(temp) + temp = self.buffered_history.messages.pop(0) + self.summarized_history_temp.messages.append(temp) + return {self.memory_key: self.format_dialogue()} + + def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None: + """Save context from this conversation to buffer.""" + input_str, output_str = self._get_input_output(inputs, outputs) + self.buffered_history.add_user_message(input_str.strip()) + self.buffered_history.add_ai_message(output_str.strip()) diff --git a/applications/ColossalQA/colossalqa/mylogging.py b/applications/ColossalQA/colossalqa/mylogging.py new file mode 100644 index 000000000..574c33b41 --- /dev/null +++ b/applications/ColossalQA/colossalqa/mylogging.py @@ -0,0 +1,92 @@ +""" +Class for logging with extra control for debugging +""" +import logging + + +class ColossalQALogger: + """This is a distributed event logger class essentially based on :class:`logging`. + + Args: + name (str): The name of the logger. + + Note: + Logging types: ``info``, ``warning``, ``debug`` and ``error`` + """ + + __instances = dict() + + def __init__(self, name): + if name in ColossalQALogger.__instances: + raise ValueError("Logger with the same name has been created") + else: + self._name = name + self._logger = logging.getLogger(name) + + ColossalQALogger.__instances[name] = self + + @staticmethod + def get_instance(name: str): + """Get the unique single logger instance based on name. + + Args: + name (str): The name of the logger. + + Returns: + DistributedLogger: A DistributedLogger object + """ + if name in ColossalQALogger.__instances: + return ColossalQALogger.__instances[name] + else: + logger = ColossalQALogger(name=name) + return logger + + def info(self, message: str, verbose: bool = False) -> None: + """Log an info message. + + Args: + message (str): The message to be logged. + verbose (bool): Whether to print the message to stdout. + """ + if verbose: + logging.basicConfig(level=logging.INFO) + self._logger.info(message) + + def warning(self, message: str, verbose: bool = False) -> None: + """Log a warning message. + + Args: + message (str): The message to be logged. + verbose (bool): Whether to print the message to stdout. + """ + if verbose: + self._logger.warning(message) + + def debug(self, message: str, verbose: bool = False) -> None: + """Log a debug message. + + Args: + message (str): The message to be logged. + verbose (bool): Whether to print the message to stdout. + """ + if verbose: + self._logger.debug(message) + + def error(self, message: str) -> None: + """Log an error message. + + Args: + message (str): The message to be logged. + """ + self._logger.error(message) + + +def get_logger(name: str = None, level=logging.INFO) -> ColossalQALogger: + """ + Get the logger by name, if name is None, return the default logger + """ + if name: + logger = ColossalQALogger.get_instance(name=name) + else: + logger = ColossalQALogger.get_instance(name="colossalqa") + return logger diff --git a/applications/ColossalQA/colossalqa/prompt/README.md b/applications/ColossalQA/colossalqa/prompt/README.md new file mode 100644 index 000000000..e5c74906b --- /dev/null +++ b/applications/ColossalQA/colossalqa/prompt/README.md @@ -0,0 +1,144 @@ +# Prompt Design Guide + +For the retriever conversation system, users can customize three prompts. + +## The Retrieval QA Prompt +This is the prompt for retrieval QA, the input is user's inputs, the retrieved documents, the historical conversation. + +### Chinese +``` +你是一个善于解答用户问题的AI助手。在保证安全的前提下,回答问题要尽可能有帮助。你的答案不应该包含任何有害的、不道德的、种族主义的、性别歧视的、危险的或非法的内容。请确保你的回答是公正和积极的。 +如果不能根据给定的上下文推断出答案,请不要分享虚假、不确定的信息。 +使用提供的背景信息和聊天记录对用户的输入作出回应或继续对话。您应该只生成一个回复。不需要跟进回答。请使用中文作答。 + +背景信息: +[retrieved documents] + +聊天记录: +[historical conversation, overlength chat history will be summarized] + +用户: [question] +Assistant: +``` + +### English +``` +[INST] <<SYS>>Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. +If the answer cannot be infered based on the given context, please don't share false information.<</SYS>> +Use the context and chat history to respond to the human's input at the end or carry on the conversation. You should generate one response only. No following up is needed. + +context: +[retrieved documents] + +chat history +[historical conversation, overlength chat history will be summarized] + +Human: {question} +Assistant: +``` + +## Summarization Prompt +This prompt is used by the memory module to recursively summarize overlength conversation to shrink the length of the prompt. + +## Disambiguity Prompt +This prompt is used to perform zero-shot reference resolution to disambiguate entity references within user's questions. + +## Final Prompt Examples +Assume k=3 for the retriever. + +### English +Note that the "[INST] <<SYS>>...<</SYS>>" template is the specific prompt format used in LLaMA2. +#### Normal Length +``` +[INST] <<SYS>>Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. +If the answer cannot be infered based on the given context, please don't share false information.<</SYS>> +Use the context and chat history to respond to the human's input at the end or carry on the conversation. You should generate one response only. No following up is needed. + +context: +[document 1] + +[document 2] + +[document 3] + +chat history +Human: XXX +Assistant: XXX +... + +Human: {question} +Assistant: +``` + +#### Overlength +``` +[INST] <<SYS>>Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. +If the answer cannot be infered based on the given context, please don't share false information.<</SYS>> +Use the context and chat history to respond to the human's input at the end or carry on the conversation. You should generate one response only. No following up is needed. + +context: +[document 1] + +[document 2] + +[document 3] + +chat history +A summarization of historical conversation: +[one line summary of historical conversation] +Most recent conversation: +Human: XXX +Assistant: XXX +... + +Human: {question} +Assistant: +``` + +### Chinese +#### Normal Length +``` +你是一个善于解答用户问题的AI助手。在保证安全的前提下,回答问题要尽可能有帮助。你的答案不应该包含任何有害的、不道德的、种族主义的、性别歧视的、危险的或非法的内容。请确保你的回答是公正和积极的。 +如果不能根据给定的上下文推断出答案,请不要分享虚假、不确定的信息。 +使用提供的背景信息和聊天记录对用户的输入作出回应或继续对话。您应该只生成一个回复。不需要跟进回答。请使用中文作答。 + +背景信息: +[document 1] + +[document 2] + +[document 3] + +聊天记录: +用户: XXX +Assistant: XXX +... + +用户: [question] +Assistant: +``` + +#### Overlength +``` +你是一个善于解答用户问题的AI助手。在保证安全的前提下,回答问题要尽可能有帮助。你的答案不应该包含任何有害的、不道德的、种族主义的、性别歧视的、危险的或非法的内容。请确保你的回答是公正和积极的。 +如果不能根据给定的上下文推断出答案,请不要分享虚假、不确定的信息。 +使用提供的背景信息和聊天记录对用户的输入作出回应或继续对话。您应该只生成一个回复。不需要跟进回答。请使用中文作答。 + +背景信息: +[document 1] + +[document 2] + +[document 3] + +聊天记录: +历史对话概要: +[one line summary of historical conversation] +最近的对话: +用户: XXX +Assistant: XXX +... + +用户: [question] +Assistant: +``` diff --git a/applications/ColossalQA/colossalqa/prompt/prompt.py b/applications/ColossalQA/colossalqa/prompt/prompt.py new file mode 100644 index 000000000..a77230786 --- /dev/null +++ b/applications/ColossalQA/colossalqa/prompt/prompt.py @@ -0,0 +1,124 @@ +""" +All custom prompt templates are defined here. +""" + +from langchain.prompts.prompt import PromptTemplate + +_CUSTOM_SUMMARIZER_TEMPLATE_ZH = """请递进式地总结所提供的当前对话,将当前对话的摘要内容添加到先前已有的摘要上,返回一个融合了当前对话的新的摘要。 + +例1: +已有的摘要: +人类问Assistant对人工智能的看法。人工智能认为人工智能是一种善的力量。 + +新的对话内容: +人类: 为什么你认为人工智能是一种好的力量? +Assistant: 因为人工智能将帮助人类充分发挥潜力。 + +新的摘要: +人类问Assistant对人工智能的看法。人工智能认为人工智能是一种积极的力量,因为它将帮助人类充分发挥潜力。 +示例结束 + +已有的摘要: +{summary} + +新的对话内容: +{new_lines} + +新的摘要:""" + + +# Chinese retrieval qa prompt + +_ZH_RETRIEVAL_QA_PROMPT = """<指令>根据下列支持文档和对话历史,简洁和专业地来回答问题。如果无法从支持文档中得到答案,请说 “根据已知信息无法回答该问题”。回答中请不要涉及支持文档中没有提及的信息,答案请使用中文。 </指令> + +{context} + +<对话历史> +{chat_history} +</对话历史> + +<问题>{question}</问题> +答案:""" + +ZH_RETRIEVAL_QA_TRIGGER_KEYWORDS = ["无法回答该问题"] +ZH_RETRIEVAL_QA_REJECTION_ANSWER = "抱歉,根据提供的信息无法回答该问题。" + + +_ZH_RETRIEVAL_CLASSIFICATION_USE_CASE = """使用提供的参考案例判断客户遇到的故障所属的故障原因分类。 + +背景信息: +{context} + +客服记录: +{question} +故障原因分类:""" + +_ZH_DISAMBIGUATION_PROMPT = """你是一个乐于助人、恭敬而诚实的助手。你总是按照指示去做。 +请用聊天记录中提到的具体名称或实体名称替换给定句子中的任何模糊或有歧义的指代,如果没有提供聊天记录或句子中不包含模糊或有歧义的指代,则只输出原始句子。您的输出应该是消除歧义的句子本身(与“消除歧义的句子:”在同一行中),并且不包含任何其他内容。 + +下面是一个例子: +聊天记录: +用户: 我有一个朋友,张三。你认识他吗? +Assistant: 我认识一个叫张三的人 + +句子: 他最喜欢的食物是什么? +消除歧义的句子: 张三最喜欢的食物是什么? + +聊天记录: +{chat_history} + +句子: {input} +消除歧义的句子:""" + +# English retrieval qa prompt + +_EN_RETRIEVAL_QA_PROMPT = """[INST] <<SYS>>Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist content. +If the answer cannot be infered based on the given context, please say "I cannot answer the question based on the information given.".<</SYS>> +Use the context and chat history to answer the question. + +context: +{context} + +chat history +{chat_history} + +question: {question} +answer:""" +EN_RETRIEVAL_QA_TRIGGER_KEYWORDS = ["cannot answer the question"] +EN_RETRIEVAL_QA_REJECTION_ANSWER = "Sorry, this question cannot be answered based on the information provided." + +_EN_DISAMBIGUATION_PROMPT = """[INST] <<SYS>>You are a helpful, respectful and honest assistant. You always follow the instruction.<</SYS>> +Please replace any ambiguous references in the given sentence with the specific names or entities mentioned in the chat history or just output the original sentence if no chat history is provided or if the sentence doesn't contain ambiguous references. Your output should be the disambiguated sentence itself (in the same line as "disambiguated sentence:") and contain nothing else. + +Here is an example: +Chat history: +Human: I have a friend, Mike. Do you know him? +Assistant: Yes, I know a person named Mike + +sentence: What's his favorate food? +disambiguated sentence: What's Mike's favorate food? +[/INST] +Chat history: +{chat_history} + +sentence: {input} +disambiguated sentence:""" + + +PROMPT_RETRIEVAL_QA_EN = PromptTemplate( + template=_EN_RETRIEVAL_QA_PROMPT, input_variables=["question", "chat_history", "context"] +) + +PROMPT_DISAMBIGUATE_EN = PromptTemplate(template=_EN_DISAMBIGUATION_PROMPT, input_variables=["chat_history", "input"]) + +SUMMARY_PROMPT_ZH = PromptTemplate(input_variables=["summary", "new_lines"], template=_CUSTOM_SUMMARIZER_TEMPLATE_ZH) + +PROMPT_DISAMBIGUATE_ZH = PromptTemplate(template=_ZH_DISAMBIGUATION_PROMPT, input_variables=["chat_history", "input"]) + +PROMPT_RETRIEVAL_QA_ZH = PromptTemplate( + template=_ZH_RETRIEVAL_QA_PROMPT, input_variables=["question", "chat_history", "context"] +) + +PROMPT_RETRIEVAL_CLASSIFICATION_USE_CASE_ZH = PromptTemplate( + template=_ZH_RETRIEVAL_CLASSIFICATION_USE_CASE, input_variables=["question", "context"] +) diff --git a/applications/ColossalQA/colossalqa/retrieval_conversation_en.py b/applications/ColossalQA/colossalqa/retrieval_conversation_en.py new file mode 100644 index 000000000..d2626321d --- /dev/null +++ b/applications/ColossalQA/colossalqa/retrieval_conversation_en.py @@ -0,0 +1,87 @@ +""" +Script for Chinese retrieval based conversation system backed by ChatGLM +""" +from typing import Tuple + +from colossalqa.chain.retrieval_qa.base import RetrievalQA +from colossalqa.local.llm import ColossalAPI, ColossalLLM +from colossalqa.memory import ConversationBufferWithSummary +from colossalqa.mylogging import get_logger +from colossalqa.prompt.prompt import PROMPT_DISAMBIGUATE_EN, PROMPT_RETRIEVAL_QA_EN +from colossalqa.retriever import CustomRetriever +from langchain import LLMChain + +logger = get_logger() + + +class EnglishRetrievalConversation: + """ + Wrapper class for Chinese retrieval conversation system + """ + + def __init__(self, retriever: CustomRetriever, model_path: str, model_name: str) -> None: + """ + Setup retrieval qa chain for Chinese retrieval based QA + """ + logger.info(f"model_name: {model_name}; model_path: {model_path}", verbose=True) + colossal_api = ColossalAPI.get_api(model_name, model_path) + self.llm = ColossalLLM(n=1, api=colossal_api) + + # Define the retriever + self.retriever = retriever + + # Define the chain to preprocess the input + # Disambiguate the input. e.g. "What is the capital of that country?" -> "What is the capital of France?" + # Prompt is summarization prompt + self.llm_chain_disambiguate = LLMChain( + llm=self.llm, + prompt=PROMPT_DISAMBIGUATE_EN, + llm_kwargs={"max_new_tokens": 30, "temperature": 0.6, "do_sample": True}, + ) + + self.retriever.set_rephrase_handler(self.disambiguity) + # Define memory with summarization ability + self.memory = ConversationBufferWithSummary( + llm=self.llm, llm_kwargs={"max_new_tokens": 50, "temperature": 0.6, "do_sample": True} + ) + self.memory.initiate_document_retrieval_chain( + self.llm, + PROMPT_RETRIEVAL_QA_EN, + self.retriever, + chain_type_kwargs={ + "chat_history": "", + }, + ) + self.retrieval_chain = RetrievalQA.from_chain_type( + llm=self.llm, + verbose=False, + chain_type="stuff", + retriever=self.retriever, + chain_type_kwargs={"prompt": PROMPT_RETRIEVAL_QA_EN, "memory": self.memory}, + llm_kwargs={"max_new_tokens": 50, "temperature": 0.75, "do_sample": True}, + ) + + def disambiguity(self, input: str): + out = self.llm_chain_disambiguate.run(input=input, chat_history=self.memory.buffer, stop=["\n"]) + return out.split("\n")[0] + + @classmethod + def from_retriever( + cls, retriever: CustomRetriever, model_path: str, model_name: str + ) -> "EnglishRetrievalConversation": + return cls(retriever, model_path, model_name) + + def run(self, user_input: str, memory: ConversationBufferWithSummary) -> Tuple[str, ConversationBufferWithSummary]: + if memory: + # TODO add translation chain here + self.memory.buffered_history.messages = memory.buffered_history.messages + self.memory.summarized_history_temp.messages = memory.summarized_history_temp.messages + return ( + self.retrieval_chain.run( + query=user_input, + stop=[self.memory.human_prefix + ": "], + rejection_trigger_keywrods=["cannot answer the question"], + rejection_answer="Sorry, this question cannot be answered based on the information provided.", + ).split("\n")[0], + self.memory, + ) diff --git a/applications/ColossalQA/colossalqa/retrieval_conversation_universal.py b/applications/ColossalQA/colossalqa/retrieval_conversation_universal.py new file mode 100644 index 000000000..76bec715f --- /dev/null +++ b/applications/ColossalQA/colossalqa/retrieval_conversation_universal.py @@ -0,0 +1,138 @@ +""" +Multilingual retrieval based conversation system +""" +from typing import List + +from colossalqa.data_loader.document_loader import DocumentLoader +from colossalqa.mylogging import get_logger +from colossalqa.retrieval_conversation_en import EnglishRetrievalConversation +from colossalqa.retrieval_conversation_zh import ChineseRetrievalConversation +from colossalqa.retriever import CustomRetriever +from colossalqa.text_splitter import ChineseTextSplitter +from colossalqa.utils import detect_lang_naive +from langchain.embeddings import HuggingFaceEmbeddings +from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter + +logger = get_logger() + + +class UniversalRetrievalConversation: + """ + Wrapper class for bilingual retrieval conversation system + """ + + def __init__( + self, + embedding_model_path: str = "moka-ai/m3e-base", + embedding_model_device: str = "cpu", + zh_model_path: str = None, + zh_model_name: str = None, + en_model_path: str = None, + en_model_name: str = None, + sql_file_path: str = None, + files_zh: List[List[str]] = None, + files_en: List[List[str]] = None, + text_splitter_chunk_size=100, + text_splitter_chunk_overlap=10, + ) -> None: + """ + Warpper for multilingual retrieval qa class (Chinese + English) + Args: + embedding_model_path: local or huggingface embedding model + embedding_model_device: + files_zh: [[file_path, name_of_file, separator],...] defines the files used as supporting documents for Chinese retrieval QA + files_en: [[file_path, name_of_file, separator],...] defines the files used as supporting documents for English retrieval QA + """ + self.embedding = HuggingFaceEmbeddings( + model_name=embedding_model_path, + model_kwargs={"device": embedding_model_device}, + encode_kwargs={"normalize_embeddings": False}, + ) + print("Select files for constructing Chinese retriever") + docs_zh = self.load_supporting_docs( + files=files_zh, + text_splitter=ChineseTextSplitter( + chunk_size=text_splitter_chunk_size, chunk_overlap=text_splitter_chunk_overlap + ), + ) + # Create retriever + self.information_retriever_zh = CustomRetriever( + k=3, sql_file_path=sql_file_path.replace(".db", "_zh.db"), verbose=True + ) + self.information_retriever_zh.add_documents( + docs=docs_zh, cleanup="incremental", mode="by_source", embedding=self.embedding + ) + + print("Select files for constructing English retriever") + docs_en = self.load_supporting_docs( + files=files_en, + text_splitter=RecursiveCharacterTextSplitter( + chunk_size=text_splitter_chunk_size, chunk_overlap=text_splitter_chunk_overlap + ), + ) + # Create retriever + self.information_retriever_en = CustomRetriever( + k=3, sql_file_path=sql_file_path.replace(".db", "_en.db"), verbose=True + ) + self.information_retriever_en.add_documents( + docs=docs_en, cleanup="incremental", mode="by_source", embedding=self.embedding + ) + + self.chinese_retrieval_conversation = ChineseRetrievalConversation.from_retriever( + self.information_retriever_zh, model_path=zh_model_path, model_name=zh_model_name + ) + self.english_retrieval_conversation = EnglishRetrievalConversation.from_retriever( + self.information_retriever_en, model_path=en_model_path, model_name=en_model_name + ) + self.memory = None + + def load_supporting_docs(self, files: List[List[str]] = None, text_splitter: TextSplitter = None): + """ + Load supporting documents, currently, all documents will be stored in one vector store + """ + documents = [] + if files: + for file in files: + retriever_data = DocumentLoader([[file["data_path"], file["name"]]]).all_data + splits = text_splitter.split_documents(retriever_data) + documents.extend(splits) + else: + while True: + file = input("Select a file to load or press Enter to exit:") + if file == "": + break + data_name = input("Enter a short description of the data:") + separator = input( + "Enter a separator to force separating text into chunks, if no separator is given, the defaut separator is '\\n\\n', press ENTER directly to skip:" + ) + separator = separator if separator != "" else "\n\n" + retriever_data = DocumentLoader([[file, data_name.replace(" ", "_")]]).all_data + + # Split + splits = text_splitter.split_documents(retriever_data) + documents.extend(splits) + return documents + + def start_test_session(self): + """ + Simple multilingual session for testing purpose, with naive language selection mechanism + """ + while True: + user_input = input("User: ") + lang = detect_lang_naive(user_input) + if "END" == user_input: + print("Agent: Happy to chat with you :)") + break + agent_response = self.run(user_input, which_language=lang) + print(f"Agent: {agent_response}") + + def run(self, user_input: str, which_language=str): + """ + Generate the response given the user input and a str indicates the language requirement of the output string + """ + assert which_language in ["zh", "en"] + if which_language == "zh": + agent_response, self.memory = self.chinese_retrieval_conversation.run(user_input, self.memory) + else: + agent_response, self.memory = self.english_retrieval_conversation.run(user_input, self.memory) + return agent_response.split("\n")[0] diff --git a/applications/ColossalQA/colossalqa/retrieval_conversation_zh.py b/applications/ColossalQA/colossalqa/retrieval_conversation_zh.py new file mode 100644 index 000000000..484be21c1 --- /dev/null +++ b/applications/ColossalQA/colossalqa/retrieval_conversation_zh.py @@ -0,0 +1,94 @@ +""" +Script for Chinese retrieval based conversation system backed by ChatGLM +""" +from typing import Tuple + +from colossalqa.chain.retrieval_qa.base import RetrievalQA +from colossalqa.local.llm import ColossalAPI, ColossalLLM +from colossalqa.memory import ConversationBufferWithSummary +from colossalqa.mylogging import get_logger +from colossalqa.prompt.prompt import PROMPT_DISAMBIGUATE_ZH, PROMPT_RETRIEVAL_QA_ZH, SUMMARY_PROMPT_ZH +from colossalqa.retriever import CustomRetriever +from langchain import LLMChain + +logger = get_logger() + + +class ChineseRetrievalConversation: + """ + Wrapper class for Chinese retrieval conversation system + """ + + def __init__(self, retriever: CustomRetriever, model_path: str, model_name: str) -> None: + """ + Setup retrieval qa chain for Chinese retrieval based QA + """ + # Local coati api + logger.info(f"model_name: {model_name}; model_path: {model_path}", verbose=True) + colossal_api = ColossalAPI.get_api(model_name, model_path) + self.llm = ColossalLLM(n=1, api=colossal_api) + + # Define the retriever + self.retriever = retriever + + # Define the chain to preprocess the input + # Disambiguate the input. e.g. "What is the capital of that country?" -> "What is the capital of France?" + # Prompt is summarization prompt + self.llm_chain_disambiguate = LLMChain( + llm=self.llm, + prompt=PROMPT_DISAMBIGUATE_ZH, + llm_kwargs={"max_new_tokens": 30, "temperature": 0.6, "do_sample": True}, + ) + + self.retriever.set_rephrase_handler(self.disambiguity) + # Define memory with summarization ability + self.memory = ConversationBufferWithSummary( + llm=self.llm, + prompt=SUMMARY_PROMPT_ZH, + human_prefix="用户", + ai_prefix="Assistant", + max_tokens=2000, + llm_kwargs={"max_new_tokens": 50, "temperature": 0.6, "do_sample": True}, + ) + self.memory.initiate_document_retrieval_chain( + self.llm, + PROMPT_RETRIEVAL_QA_ZH, + self.retriever, + chain_type_kwargs={ + "chat_history": "", + }, + ) + self.retrieval_chain = RetrievalQA.from_chain_type( + llm=self.llm, + verbose=False, + chain_type="stuff", + retriever=self.retriever, + chain_type_kwargs={"prompt": PROMPT_RETRIEVAL_QA_ZH, "memory": self.memory}, + llm_kwargs={"max_new_tokens": 150, "temperature": 0.9, "do_sample": True}, + ) + + def disambiguity(self, input: str): + out = self.llm_chain_disambiguate.run(input=input, chat_history=self.memory.buffer, stop=["\n"]) + return out.split("\n")[0] + + @classmethod + def from_retriever( + cls, retriever: CustomRetriever, model_path: str, model_name: str + ) -> "ChineseRetrievalConversation": + return cls(retriever, model_path, model_name) + + def run(self, user_input: str, memory: ConversationBufferWithSummary) -> Tuple[str, ConversationBufferWithSummary]: + if memory: + # TODO add translation chain here + self.memory.buffered_history.messages = memory.buffered_history.messages + self.memory.summarized_history_temp.messages = memory.summarized_history_temp.messages + return ( + self.retrieval_chain.run( + query=user_input, + stop=["</答案>"], + doc_prefix="支持文档", + rejection_trigger_keywrods=["无法回答该问题"], + rejection_answer="抱歉,根据提供的信息无法回答该问题。", + ).split("\n")[0], + self.memory, + ) diff --git a/applications/ColossalQA/colossalqa/retriever.py b/applications/ColossalQA/colossalqa/retriever.py new file mode 100644 index 000000000..9ea6d5b08 --- /dev/null +++ b/applications/ColossalQA/colossalqa/retriever.py @@ -0,0 +1,166 @@ +""" +Code for custom retriver with incremental update +""" +import copy +import hashlib +import os +from collections import defaultdict +from typing import Any, Callable, Dict, List + +from colossalqa.mylogging import get_logger +from langchain.callbacks.manager import CallbackManagerForRetrieverRun +from langchain.embeddings.base import Embeddings +from langchain.indexes import SQLRecordManager, index +from langchain.schema.retriever import BaseRetriever, Document +from langchain.vectorstores.base import VectorStore +from langchain.vectorstores.chroma import Chroma + +logger = get_logger() + + +class CustomRetriever(BaseRetriever): + """ + Custom retriever class with support for incremental update of indexes + """ + + vector_stores: Dict[str, VectorStore] = {} + sql_index_database: Dict[str, str] = {} + record_managers: Dict[str, SQLRecordManager] = {} + sql_db_chains = [] + k = 3 + rephrase_handler: Callable = None + buffer: Dict = [] + buffer_size: int = 5 + verbose: bool = False + sql_file_path: str = None + + @classmethod + def from_documents( + cls, + documents: List[Document], + embeddings: Embeddings, + **kwargs: Any, + ) -> BaseRetriever: + k = kwargs.pop("k", 3) + cleanup = kwargs.pop("cleanup", "incremental") + mode = kwargs.pop("mode", "by_source") + ret = cls(k=k) + ret.add_documents(documents, embedding=embeddings, cleanup=cleanup, mode=mode) + return ret + + def add_documents( + self, + docs: Dict[str, Document] = [], + cleanup: str = "incremental", + mode: str = "by_source", + embedding: Embeddings = None, + ) -> None: + """ + Add documents to retriever + Args: + docs: the documents to add + cleanup: choose from "incremental" (update embeddings, skip existing embeddings) and "full" (destory and rebuild retriever) + mode: choose from "by source" (documents are grouped by source) and "merge" (documents are merged into one vector store) + """ + if cleanup == "full": + # Cleanup + for source in self.vector_stores: + os.remove(self.sql_index_database[source]) + # Add documents + data_by_source = defaultdict(list) + if mode == "by_source": + for doc in docs: + data_by_source[doc.metadata["source"]].append(doc) + elif mode == "merge": + data_by_source["merged"] = docs + for source in data_by_source: + if source not in self.vector_stores: + hash_encoding = hashlib.sha3_224(source.encode()).hexdigest() + if os.path.exists(f"{self.sql_file_path}/{hash_encoding}.db"): + # Remove the stale file + os.remove(f"{self.sql_file_path}/{hash_encoding}.db") + # Create a new sql database to store indexes, sql files are stored in the same directory as the source file + sql_path = f"sqlite:///{self.sql_file_path}/{hash_encoding}.db" + self.vector_stores[source] = Chroma(embedding_function=embedding, collection_name=hash_encoding) + self.sql_index_database[source] = f"{self.sql_file_path}/{hash_encoding}.db" + self.record_managers[source] = SQLRecordManager(source, db_url=sql_path) + self.record_managers[source].create_schema() + index( + data_by_source[source], + self.record_managers[source], + self.vector_stores[source], + cleanup=cleanup, + source_id_key="source", + ) + + def __del__(self): + for source in self.sql_index_database: + if os.path.exists(self.sql_index_database[source]): + os.remove(self.sql_index_database[source]) + + def set_sql_database_chain(self, db_chains) -> None: + """ + set sql agent chain to retrieve information from sql database + Not used in this version + """ + self.sql_db_chains = db_chains + + def set_rephrase_handler(self, handler: Callable = None) -> None: + """ + Set a handler to preprocess the input str before feed into the retriever + """ + self.rephrase_handler = handler + + def _get_relevant_documents( + self, + query: str, + *, + run_manager: CallbackManagerForRetrieverRun = None, + score_threshold: float = None, + return_scores: bool = False, + ) -> List[Document]: + """ + This function is called by the retriever to get the relevant documents. + recent vistied queries are stored in buffer, if the query is in buffer, return the documents directly + + Args: + query: the query to be searched + run_manager: the callback manager for retriever run + Returns: + documents: the relevant documents + """ + for buffered_doc in self.buffer: + if buffered_doc[0] == query: + return buffered_doc[1] + query_ = str(query) + # Use your existing retriever to get the documents + if self.rephrase_handler: + query = self.rephrase_handler(query) + documents = [] + for k in self.vector_stores: + # Retrieve documents from each retriever + vectorstore = self.vector_stores[k] + documents.extend(vectorstore.similarity_search_with_score(query, self.k, score_threshold=score_threshold)) + # print(documents) + # Return the top k documents among all retrievers + documents = sorted(documents, key=lambda x: x[1], reverse=False)[: self.k] + if return_scores: + # Return score + documents = copy.deepcopy(documents) + for doc in documents: + doc[0].metadata["score"] = doc[1] + documents = [doc[0] for doc in documents] + # Retrieve documents from sql database (not applicable for the local chains) + for sql_chain in self.sql_db_chains: + documents.append( + Document( + page_content=f"Query: {query} Answer: {sql_chain.run(query)}", metadata={"source": "sql_query"} + ) + ) + if len(self.buffer) < self.buffer_size: + self.buffer.append([query_, documents]) + else: + self.buffer.pop(0) + self.buffer.append([query_, documents]) + logger.info(f"retrieved documents:\n{str(documents)}", verbose=self.verbose) + return documents diff --git a/applications/ColossalQA/colossalqa/text_splitter/__init__.py b/applications/ColossalQA/colossalqa/text_splitter/__init__.py new file mode 100644 index 000000000..b56fdfe8d --- /dev/null +++ b/applications/ColossalQA/colossalqa/text_splitter/__init__.py @@ -0,0 +1 @@ +from .chinese_text_splitter import ChineseTextSplitter diff --git a/applications/ColossalQA/colossalqa/text_splitter/chinese_text_splitter.py b/applications/ColossalQA/colossalqa/text_splitter/chinese_text_splitter.py new file mode 100644 index 000000000..3815f5ed2 --- /dev/null +++ b/applications/ColossalQA/colossalqa/text_splitter/chinese_text_splitter.py @@ -0,0 +1,56 @@ +""" +Code for Chinese text splitter +""" +from typing import Any, List, Optional + +from colossalqa.text_splitter.utils import get_cleaned_paragraph +from langchain.text_splitter import RecursiveCharacterTextSplitter + + +class ChineseTextSplitter(RecursiveCharacterTextSplitter): + def __init__(self, separators: Optional[List[str]] = None, is_separator_regrx: bool = False, **kwargs: Any): + self._separators = separators or ["\n\n", "\n", ",", "。", "!", "?", "?"] + if "chunk_size" not in kwargs: + kwargs["chunk_size"] = 50 + if "chunk_overlap" not in kwargs: + kwargs["chunk_overlap"] = 10 + super().__init__(separators=separators, keep_separator=True, **kwargs) + self._is_separator_regex = is_separator_regrx + + def split_text(self, text: str) -> List[str]: + """Return the list of separated text chunks""" + cleaned_paragraph = get_cleaned_paragraph(text) + splitted = [] + for paragraph in cleaned_paragraph: + segs = super().split_text(paragraph) + for i in range(len(segs) - 1): + if segs[i][-1] not in self._separators: + pos = text.find(segs[i]) + pos_end = pos + len(segs[i]) + if i > 0: + last_sentence_start = max([text.rfind(m, 0, pos) for m in ["。", "!", "?"]]) + pos = last_sentence_start + 1 + segs[i] = str(text[pos:pos_end]) + if i != len(segs) - 1: + next_sentence_end = max([text.find(m, pos_end) for m in ["。", "!", "?"]]) + segs[i] = str(text[pos : next_sentence_end + 1]) + splitted.append(segs[i]) + if len(splitted) <= 1: + return splitted + splitted_text = [] + i = 1 + if splitted[0] not in splitted[1]: + splitted_text.append([splitted[0], 0]) + if splitted[-1] not in splitted[-2]: + splitted_text.append([splitted[-1], len(splitted) - 1]) + while i < len(splitted) - 1: + if splitted[i] not in splitted[i + 1] and splitted[i] not in splitted[i - 1]: + splitted_text.append([splitted[i], i]) + i += 1 + splitted_text = sorted(splitted_text, key=lambda x: x[1]) + splitted_text = [splitted_text[i][0] for i in range(len(splitted_text))] + ret = [] + for s in splitted_text: + if s not in ret: + ret.append(s) + return ret diff --git a/applications/ColossalQA/colossalqa/text_splitter/utils.py b/applications/ColossalQA/colossalqa/text_splitter/utils.py new file mode 100644 index 000000000..250b46d97 --- /dev/null +++ b/applications/ColossalQA/colossalqa/text_splitter/utils.py @@ -0,0 +1,19 @@ +import re + + +def remove_format(text: str) -> str: + # if the accout of \t, \r, \v, \f is less than 3, replace \t, \r, \v, \f with space + if len(re.findall(r"\s", text.replace(" ", ""))) > 3: + # in case this is a line of a table + return text + return re.sub(r"\s", " ", text) + + +# remove newlines +def get_cleaned_paragraph(s: str) -> str: + text = str(s) + text = re.sub(r"\n{3,}", r"\n", text) # replace \n\n\n... with \n + text = re.sub("\n\n", "", text) + lines = text.split("\n") + lines_remove_format = [remove_format(line) for line in lines] + return lines_remove_format diff --git a/applications/ColossalQA/colossalqa/utils.py b/applications/ColossalQA/colossalqa/utils.py new file mode 100644 index 000000000..cd8c3e5ac --- /dev/null +++ b/applications/ColossalQA/colossalqa/utils.py @@ -0,0 +1,61 @@ +import re +from typing import Union + +from colossalqa.mylogging import get_logger +from sqlalchemy import Engine, MetaData, create_engine +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.ext.declarative import declarative_base + +logger = get_logger() + + +def drop_table(engine: Engine) -> None: + """ + Drop all existing table + """ + Base = declarative_base() + metadata = MetaData() + metadata.reflect(bind=engine) + for key in metadata.tables: + table = metadata.tables[key] + if table is not None: + Base.metadata.drop_all(engine, [table], checkfirst=True) + + +def create_empty_sql_database(database_uri): + try: + # Create an SQLAlchemy engine to connect to the database + engine = create_engine(database_uri) + + # Create the database + engine.connect() + + logger.info(f"Database created at {database_uri}") + except SQLAlchemyError as e: + logger.error(f"Error creating database: {str(e)}") + return engine, database_uri + + +def destroy_sql_database(sql_engine: Union[Engine, str]) -> None: + """ + Destroy an sql database + """ + if isinstance(sql_engine, str): + sql_engine = create_engine(sql_engine) + drop_table(sql_engine) + sql_engine.dispose() + sql_engine = None + + +def detect_lang_naive(s): + """ + Naive function for language detection, should be replaced by an independant layer + """ + remove_nota = "[’·°–!\"#$%&'()*+,-./:;<=>?@,。?★、…【】()《》?“”‘’![\\]^_`{|}~]+" + s = re.sub(remove_nota, "", s) + s = re.sub("[0-9]", "", s).strip() + res = re.sub("[a-zA-Z]", "", s).strip() + if len(res) <= 0: + return "en" + else: + return "zh" diff --git a/applications/ColossalQA/data/data_sample/companies.txt b/applications/ColossalQA/data/data_sample/companies.txt new file mode 100644 index 000000000..05c6148f1 --- /dev/null +++ b/applications/ColossalQA/data/data_sample/companies.txt @@ -0,0 +1,6 @@ +Overview The Straits Times is the English flagship daily of SPH Media, one of the leading media companies in Asia. Launched on July 15, 1845, its comprehensive coverage of news from home and around the world makes The Straits Times the most-read newspaper in Singapore. Quality news, in-depth analyses, impactful commentaries and breaking stories are packaged to give readers riveting accounts of events in Singapore, the region, and beyond. The most read newspaper in Singapore, both in terms of print and digital, it reaches 1.33 million people every day. The Straits Times' key strength is in its world class coverage of news outside Singapore. With 20 bureaus in major cities around the world, The Straits Times correspondents bring world news to readers on a Singapore platter, helping readers to appreciate world events from a Singaporean perspective. Website http://www.straitstimes.com Phone 63196319Phone number is 63196319 Industry Newspaper Publishing Company size 1,001-5,000 employees 183 on LinkedIn Includes members with current employer listed as The Straits Times, including part-time roles. Headquarters Singapore, Singapore Founded 1845 Specialties News and Digital media +About With over 500 properties worldwide, Marriott Hotels has reimagined hospitality to exceed the expectations of business, group, and leisure travelers. +Marriott Hotels, Marriott’s flagship brand of quality-tier, full-service hotels and resorts, provides consistent, dependable and genuinely caring experiences to guests on their terms. Marriott is a brilliant host to guests who effortlessly blend life and work, and who are inspired by how modern travel enhances them both. Our hotels offer warm, professional service; sophisticated yet functional guest room design; lobby spaces that facilitate working, dining and socializing; restaurants and bars serving international cuisine prepared simply and from the freshest ingredients; meeting and event spaces and services that are gold standard; and expansive, 24-hour fitness facilities. +Overview AERCO International, Inc. is a recognized leader in delivering cost-effective, condensing commercial boilers, high-efficiency water heaters across a variety of markets including education, lodging, government, office buildings, healthcare, industrial and multifamily housing. AERCO's system design approach provides customer-specific solutions that deliver superior building performance at a lower operating cost while assuring uptime reliability. When AERCO was founded in 1949, it introduced a revolutionary design for an indirect-fired water heater that heated water on demand, and without storage, at a controlled temperature. This innovation became today's standard for water heaters, maximizing the recovery of latent heat energy and significantly increasing operating efficiency. AERCO continued to innovate and in 1988, introduced the first condensing and fully modulating boiler and water heater to the commercial market. The modulating capability of these products, still unsurpassed more than 25 years later, matches the equipment's output to real-time heating demand, ensuring the units draw no more fuel to operate than is absolutely necessary. This not only saves precious energy, but also ensures money doesn't needlessly disappear "up the stack." AERCO differentiates itself through a solution-based model, leveraging decades of engineering experience and industry application expertise to understand each customer’s unique needs. By partnering directly with customers and end-users to understand their project-specific requirements, AERCO provides tailored application solutions that are comprised of original product technologies including high efficiency condensing products, compact footprints, high turndown ratios, unique fuel delivery, leading control systems and proprietary design elements that combine to deliver up to 99% efficiency. Website http://www.aerco.com Phone 845-580-8000Phone number is 845-580-8000 Industry Industrial Machinery Manufacturing Company size 51-200 employees 119 on LinkedIn Includes members with current employer listed as AERCO International, Inc., including part-time roles. Headquarters Blauvelt, NY Founded 1949 Specialties Leading manufacturer of condensing boilers, water heating and energy recovery products and The originator of semi-instantaneous water heating +Prince PLC: Overview We are a global leader of quality water solutions for residential, industrial, municipal, and commercial settings. Our family of brands offers one of the most varied product lines in the world, with world-class, water-related solutions focused on: • Plumbing & Flow Control • Water Quality & Conditioning • Water Reuse & Drainage • HVAC • Municipal Waterworks Strategic Goals Watts Water is traded on the New York Stock Exchange under the symbol “WTS.” As a public company, growing shareholder value is critical. To that end, we focus on a five-part Global Strategy: Growth, Commercial Excellence, Operational Excellence, “One Watts Water,” and a Talent & Performance Culture. Follow us on all social media platforms @WattsWater Website http://www.watts.com/ Industry Wholesale Building Materials Company size 5,001-10,000 employees 2,248 on LinkedIn Includes members with current employer listed as Watts Water Technologies, including part-time roles. Headquarters North Andover, MA Specialties Plumbing, HVAC, Water Quality, Gas, Conditioning, Waterworks, and Drainage +About Courtyard Hotels is Marriott International’s largest hotel brand, with more than 1,100 hotels in over 50 countries worldwide. So, no matter where passion takes you, you’ll find us there to help you follow it. Proud members of Marriott Bonvoy. \ No newline at end of file diff --git a/applications/ColossalQA/data/data_sample/companies_zh.txt b/applications/ColossalQA/data/data_sample/companies_zh.txt new file mode 100644 index 000000000..a67a93590 --- /dev/null +++ b/applications/ColossalQA/data/data_sample/companies_zh.txt @@ -0,0 +1,6 @@ +《海峡时报》是SPH传媒旗下的英文旗舰日报,SPH传媒是亚洲领先的传媒公司之一。《海峡时报》创刊于1845年7月15日,全面报道国内外新闻,是新加坡发行量最大的报纸。高质量的新闻、深入的分析、有影响力的评论和突发事件,为读者提供新加坡、该地区乃至其他地区的引人入胜的事件报道。无论是纸媒还是电子版,它都是新加坡阅读量最大的报纸,每天有133万人阅读。《海峡时报》的主要优势在于它对新加坡以外新闻的世界级报道。《海峡时报》记者在全球主要城市设有20个分社,用新加坡的盘子把世界新闻带给读者,帮助读者从新加坡的角度了解世界大事。网站http://www.straitstimes.com电话63196319电话63196319工业报纸出版公司规模1,001-5,000员工LinkedIn 183包括目前雇主为海峡时报的成员,包括兼职工作。总部位于新加坡,新加坡成立于1845年,专业从事新闻和数字媒体 +万豪酒店在全球拥有500多家酒店,以超越商务、团体和休闲旅客的期望,重塑酒店服务。 +万豪酒店(Marriott Hotels)是万豪旗下优质、全方位服务酒店和度假村的旗舰品牌,为客人提供始终如一、可靠和真诚关怀的体验。万豪是一个出色的主人,客人可以轻松地将生活和工作融合在一起,并受到现代旅行如何增强两者的启发。我们的酒店提供热情、专业的服务;精致而实用的客房设计;大堂空间,方便工作、餐饮和社交;餐厅和酒吧提供简单的国际美食和最新鲜的食材;会议及活动场地及服务均属黄金标准;还有宽敞的24小时健身设施。 +AERCO International, Inc.是公认的领导者,为教育、住宿、政府、办公楼、医疗保健、工业和多户住宅等各种市场提供具有成本效益的冷凝商用锅炉和高效热水器。AERCO的系统设计方法为客户提供特定的解决方案,以较低的运营成本提供卓越的建筑性能,同时确保正常运行时间的可靠性。AERCO成立于1949年,它推出了一种革命性的设计,用于间接燃烧热水器,在控制温度下按需加热水,而无需储存。这一创新成为当今热水器的标准,最大限度地回收潜热能量,显著提高运行效率。AERCO不断创新,并于1988年向商业市场推出了第一台冷凝和全调制锅炉和热水器。这些产品的调制能力,在超过25年后仍然无与伦比,使设备的输出与实时加热需求相匹配,确保机组不会消耗更多的燃料来运行,除非绝对必要。这不仅节省了宝贵的能源,还确保了钱不会不必要地消失在“堆栈”上。AERCO通过基于解决方案的模式脱颖而出,利用数十年的工程经验和行业应用专业知识来了解每个客户的独特需求。通过与客户和最终用户直接合作,了解他们的项目具体要求,AERCO提供量身定制的应用解决方案,这些解决方案由原创产品技术组成,包括高效冷凝产品,紧凑的足迹,高降压比,独特的燃料输送,领先的控制系统和专有设计元素,结合起来可提供高达99%的效率。网址http://www.aerco.com电话845-580- 8000电话号码845-580-8000工业工业机械制造公司规模51-200名员工LinkedIn上包括当前雇主AERCO International, Inc的成员,包括兼职职位。总部成立于1949年,纽约州布劳维尔特,专长:冷凝锅炉,水加热和能源回收产品的领先制造商,半瞬时水加热的鼻祖 +Prince PLC:概述Prince PLC是为住宅、工业、市政和商业环境提供优质水解决方案的全球领导者。我们的品牌家族提供世界上最多样化的产品线之一,拥有世界级的水相关解决方案,专注于:•管道和流量控制•水质和调理•水再利用和排水•hvac•市政水务战略目标瓦茨水务在纽约证券交易所上市,代码为“WTS”。作为一家上市公司,股东价值的增长至关重要。为此,我们将重点放在五部分全球战略上:增长、卓越商业、卓越运营、“一瓦茨水”以及人才与绩效文化。在所有社交媒体平台关注我们@WattsWater网站http://www.watts.com/行业批发建材公司规模5,001-10,000名员工领英2,248名包括目前雇主为WattsWater Technologies的成员,包括兼职职位。总部北安多弗,MA专业管道,暖通空调,水质,气体,空调,自来水厂和排水 +万怡酒店是万豪国际最大的酒店品牌,在全球50多个国家拥有1100多家酒店。所以,无论你的激情带你去哪里,你都会发现我们会帮助你追随它。万豪酒店的骄傲会员。 \ No newline at end of file diff --git a/applications/ColossalQA/data/data_sample/csv_organization_100.csv b/applications/ColossalQA/data/data_sample/csv_organization_100.csv new file mode 100644 index 000000000..dbe97d5fd --- /dev/null +++ b/applications/ColossalQA/data/data_sample/csv_organization_100.csv @@ -0,0 +1,101 @@ +Index,Organization Id,Company Name,Website,Country,Description,Founded,Industry,Number of employees +1,FAB0d41d5b5d22c,Ferrell LLC,https://price.net/,Papua New Guinea,Horizontal empowering knowledgebase,1990,Plastics,3498 +2,6A7EdDEA9FaDC52,"Mckinney, Riley and Day",http://www.hall-buchanan.info/,Finland,User-centric system-worthy leverage,2015,Glass / Ceramics / Concrete,4952 +3,0bFED1ADAE4bcC1,Hester Ltd,http://sullivan-reed.com/,China,Switchable scalable moratorium,1971,Public Safety,5287 +4,2bFC1Be8a4ce42f,Holder-Sellers,https://becker.com/,Turkmenistan,De-engineered systemic artificial intelligence,2004,Automotive,921 +5,9eE8A6a4Eb96C24,Mayer Group,http://www.brewer.com/,Mauritius,Synchronized needs-based challenge,1991,Transportation,7870 +6,cC757116fe1C085,Henry-Thompson,http://morse.net/,Bahamas,Face-to-face well-modulated customer loyalty,1992,Primary / Secondary Education,4914 +7,219233e8aFF1BC3,Hansen-Everett,https://www.kidd.org/,Pakistan,Seamless disintermediate collaboration,2018,Publishing Industry,7832 +8,ccc93DCF81a31CD,Mcintosh-Mora,https://www.brooks.com/,Heard Island and McDonald Islands,Centralized attitude-oriented capability,1970,Import / Export,4389 +9,0B4F93aA06ED03e,Carr Inc,http://ross.com/,Kuwait,Distributed impactful customer loyalty,1996,Plastics,8167 +10,738b5aDe6B1C6A5,Gaines Inc,http://sandoval-hooper.com/,Uzbekistan,Multi-lateral scalable protocol,1997,Outsourcing / Offshoring,9698 +11,AE61b8Ffebbc476,Kidd Group,http://www.lyons.com/,Bouvet Island (Bouvetoya),Proactive foreground paradigm,2001,Primary / Secondary Education,7473 +12,eb3B7D06cCdD609,Crane-Clarke,https://www.sandoval.com/,Denmark,Front-line clear-thinking encryption,2014,Food / Beverages,9011 +13,8D0c29189C9798B,"Keller, Campos and Black",https://www.garner.info/,Liberia,Ameliorated directional emulation,2020,Museums / Institutions,2862 +14,D2c91cc03CA394c,Glover-Pope,http://www.silva.biz/,United Arab Emirates,Persevering contextually-based approach,2013,Medical Practice,9079 +15,C8AC1eaf9C036F4,Pacheco-Spears,https://aguilar.com/,Sweden,Secured logistical synergy,1984,Maritime,769 +16,b5D10A14f7a8AfE,Hodge-Ayers,http://www.archer-elliott.com/,Honduras,Future-proofed radical implementation,1990,Facilities Services,8508 +17,68139b5C4De03B4,"Bowers, Guerra and Krause",http://www.carrillo-nicholson.com/,Uganda,De-engineered transitional strategy,1972,Primary / Secondary Education,6986 +18,5c2EffEfdba2BdF,Mckenzie-Melton,http://montoya-thompson.com/,Hong Kong,Reverse-engineered heuristic alliance,1998,Investment Management / Hedge Fund / Private Equity,4589 +19,ba179F19F7925f5,Branch-Mann,http://www.lozano.com/,Botswana,Adaptive intangible frame,1999,Architecture / Planning,7961 +20,c1Ce9B350BAc66b,Weiss and Sons,https://barrett.com/,Korea,Sharable optimal functionalities,2011,Plastics,5984 +21,8de40AC4e6EaCa4,"Velez, Payne and Coffey",http://burton.com/,Luxembourg,Mandatory coherent synergy,1986,Wholesale,5010 +22,Aad86a4F0385F2d,Harrell LLC,http://www.frey-rosario.com/,Guadeloupe,Reverse-engineered mission-critical moratorium,2018,Construction,2185 +23,22aC3FFd64fD703,"Eaton, Reynolds and Vargas",http://www.freeman.biz/,Monaco,Self-enabling multi-tasking process improvement,2014,Luxury Goods / Jewelry,8987 +24,5Ec4C272bCf085c,Robbins-Cummings,http://donaldson-wilkins.com/,Belgium,Organic non-volatile hierarchy,1991,Pharmaceuticals,5038 +25,5fDBeA8BB91a000,Jenkins Inc,http://www.kirk.biz/,South Africa,Front-line systematic help-desk,2002,Insurance,1215 +26,dFfD6a6F9AC2d9C,"Greene, Benjamin and Novak",http://www.kent.net/,Romania,Centralized leadingedge moratorium,2012,Museums / Institutions,4941 +27,4B217cC5a0674C5,"Dickson, Richmond and Clay",http://everett.com/,Czech Republic,Team-oriented tangible complexity,1980,Real Estate / Mortgage,3122 +28,88b1f1cDcf59a37,Prince-David,http://thompson.com/,Christmas Island,Virtual holistic methodology,1970,Banking / Mortgage,1046 +29,f9F7bBCAEeC360F,Ayala LLC,http://www.zhang.com/,Philippines,Open-source zero administration hierarchy,2021,Legal Services,7664 +30,7Cb3AeFcE4Ba31e,Rivas Group,https://hebert.org/,Australia,Open-architected well-modulated capacity,1998,Logistics / Procurement,4155 +31,ccBcC32adcbc530,"Sloan, Mays and Whitehead",http://lawson.com/,Chad,Face-to-face high-level conglomeration,1997,Civil Engineering,365 +32,f5afd686b3d05F5,"Durham, Allen and Barnes",http://chan-stafford.org/,Zimbabwe,Synergistic web-enabled framework,1993,Mechanical or Industrial Engineering,6135 +33,38C6cfC5074Fa5e,Fritz-Franklin,http://www.lambert.com/,Nepal,Automated 4thgeneration website,1972,Hospitality,4516 +34,5Cd7efccCcba38f,Burch-Ewing,http://cline.net/,Taiwan,User-centric 4thgeneration system engine,1981,Venture Capital / VC,7443 +35,9E6Acb51e3F9d6F,"Glass, Barrera and Turner",https://dunlap.com/,Kyrgyz Republic,Multi-channeled 3rdgeneration open system,2020,Utilities,2610 +36,4D4d7E18321eaeC,Pineda-Cox,http://aguilar.org/,Bolivia,Fundamental asynchronous capability,2010,Human Resources / HR,1312 +37,485f5d06B938F2b,"Baker, Mccann and Macdonald",http://www.anderson-barker.com/,Kenya,Cross-group user-facing focus group,2013,Legislative Office,1638 +38,19E3a5Bf6dBDc4F,Cuevas-Moss,https://dodson-castaneda.net/,Guatemala,Extended human-resource intranet,1994,Music,9995 +39,6883A965c7b68F7,Hahn PLC,http://newman.com/,Belarus,Organic logistical leverage,2012,Electrical / Electronic Manufacturing,3715 +40,AC5B7AA74Aa4A2E,"Valentine, Ferguson and Kramer",http://stuart.net/,Jersey,Centralized secondary time-frame,1997,Non - Profit / Volunteering,3585 +41,decab0D5027CA6a,Arroyo Inc,https://www.turner.com/,Grenada,Managed demand-driven website,2006,Writing / Editing,9067 +42,dF084FbBb613eea,Walls LLC,http://www.reese-vasquez.biz/,Cape Verde,Self-enabling fresh-thinking installation,1989,Investment Management / Hedge Fund / Private Equity,1678 +43,A2D89Ab9bCcAd4e,"Mitchell, Warren and Schneider",https://fox.biz/,Trinidad and Tobago,Enhanced intangible time-frame,2021,Capital Markets / Hedge Fund / Private Equity,3816 +44,77aDc905434a49f,Prince PLC,https://www.watts.com/,Sweden,Profit-focused coherent installation,2016,Individual / Family Services,7645 +45,235fdEFE2cfDa5F,Brock-Blackwell,http://www.small.com/,Benin,Secured foreground emulation,1986,Online Publishing,7034 +46,1eD64cFe986BBbE,Walton-Barnett,https://ashley-schaefer.com/,Western Sahara,Right-sized clear-thinking flexibility,2001,Luxury Goods / Jewelry,1746 +47,CbBbFcdd0eaE2cF,Bartlett-Arroyo,https://cruz.com/,Northern Mariana Islands,Realigned didactic function,1976,Civic / Social Organization,3987 +48,49aECbDaE6aBD53,"Wallace, Madden and Morris",http://www.blevins-fernandez.biz/,Germany,Persistent real-time customer loyalty,2016,Pharmaceuticals,9443 +49,7b3fe6e7E72bFa4,Berg-Sparks,https://cisneros-love.com/,Canada,Stand-alone static implementation,1974,Arts / Crafts,2073 +50,c6DedA82A8aef7E,Gonzales Ltd,http://bird.com/,Tonga,Managed human-resource policy,1988,Consumer Goods,9069 +51,7D9FBF85cdC3871,Lawson and Sons,https://www.wong.com/,French Southern Territories,Compatible analyzing intranet,2021,Arts / Crafts,3527 +52,7dd18Fb7cB07b65,"Mcguire, Mcconnell and Olsen",https://melton-briggs.com/,Korea,Profound client-server frame,1988,Printing,8445 +53,EF5B55FadccB8Fe,Charles-Phillips,https://bowman.com/,Cote d'Ivoire,Monitored client-server implementation,2012,Mental Health Care,3450 +54,f8D4B99e11fAF5D,Odom Ltd,https://www.humphrey-hess.com/,Cote d'Ivoire,Advanced static process improvement,2012,Management Consulting,1825 +55,e24D21BFd3bF1E5,Richard PLC,https://holden-coleman.net/,Mayotte,Object-based optimizing model,1971,Broadcast Media,4942 +56,B9BdfEB6D3Ca44E,Sampson Ltd,https://blevins.com/,Cayman Islands,Intuitive local adapter,2005,Farming,1418 +57,2a74D6f3D3B268e,"Cherry, Le and Callahan",https://waller-delacruz.biz/,Nigeria,Universal human-resource collaboration,2017,Entertainment / Movie Production,7202 +58,Bf3F3f62c8aBC33,Cherry PLC,https://www.avila.info/,Marshall Islands,Persistent tertiary website,1980,Plastics,8245 +59,aeBe26B80a7a23c,Melton-Nichols,https://kennedy.com/,Palau,User-friendly clear-thinking productivity,2021,Legislative Office,8741 +60,aAeb29ad43886C6,Potter-Walsh,http://thomas-french.org/,Turkey,Optional non-volatile open system,2008,Human Resources / HR,6923 +61,bD1bc6bB6d1FeD3,Freeman-Chen,https://mathis.com/,Timor-Leste,Phased next generation adapter,1973,International Trade / Development,346 +62,EB9f456e8b7022a,Soto Group,https://norris.info/,Vietnam,Enterprise-wide executive installation,1988,Business Supplies / Equipment,9097 +63,Dfef38C51D8DAe3,"Poole, Cruz and Whitney",https://reed.info/,Reunion,Balanced analyzing groupware,1978,Marketing / Advertising / Sales,2992 +64,055ffEfB2Dd95B0,Riley Ltd,http://wiley.com/,Brazil,Optional exuding superstructure,1986,Textiles,9315 +65,cBfe4dbAE1699da,"Erickson, Andrews and Bailey",https://www.hobbs-grant.com/,Eritrea,Vision-oriented secondary project,2014,Consumer Electronics,7829 +66,fdFbecbadcdCdf1,"Wilkinson, Charles and Arroyo",http://hunter-mcfarland.com/,United States Virgin Islands,Assimilated 24/7 archive,1996,Building Materials,602 +67,5DCb8A5a5ca03c0,Floyd Ltd,http://www.whitney.com/,Falkland Islands (Malvinas),Function-based fault-tolerant concept,2017,Public Relations / PR,2911 +68,ce57DCbcFD6d618,Newman-Galloway,https://www.scott.com/,Luxembourg,Enhanced foreground collaboration,1987,Information Technology / IT,3934 +69,5aaD187dc929371,Frazier-Butler,https://www.daugherty-farley.info/,Northern Mariana Islands,Persistent interactive circuit,1972,Outsourcing / Offshoring,5130 +70,902D7Ac8b6d476b,Newton Inc,https://www.richmond-manning.info/,Netherlands Antilles,Fundamental stable info-mediaries,1976,Military Industry,563 +71,32BB9Ff4d939788,Duffy-Levy,https://www.potter.com/,Guernsey,Diverse exuding installation,1982,Wireless,6146 +72,adcB0afbE58bAe3,Wagner LLC,https://decker-esparza.com/,Uruguay,Reactive attitude-oriented toolset,1987,International Affairs,6874 +73,dfcA1c84AdB61Ac,Mccall-Holmes,http://www.dean.com/,Benin,Object-based value-added database,2009,Legal Services,696 +74,208044AC2fe52F3,Massey LLC,https://frazier.biz/,Suriname,Configurable zero administration Graphical User Interface,1986,Accounting,5004 +75,f3C365f0c1A0623,Hicks LLC,http://alvarez.biz/,Pakistan,Quality-focused client-server Graphical User Interface,1970,Computer Software / Engineering,8480 +76,ec5Bdd3CBAfaB93,"Cole, Russell and Avery",http://www.blankenship.com/,Mongolia,De-engineered fault-tolerant challenge,2000,Law Enforcement,7012 +77,DDB19Be7eeB56B4,Cummings-Rojas,https://simon-pearson.com/,Svalbard & Jan Mayen Islands,User-centric modular customer loyalty,2012,Financial Services,7529 +78,dd6CA3d0bc3cAfc,"Beasley, Greene and Mahoney",http://www.petersen-lawrence.com/,Togo,Extended content-based methodology,1976,Religious Institutions,869 +79,A0B9d56e61070e3,"Beasley, Sims and Allison",http://burke.info/,Latvia,Secured zero tolerance hub,1972,Facilities Services,6182 +80,cBa7EFe5D05Adaf,Crawford-Rivera,https://black-ramirez.org/,Cuba,Persevering exuding budgetary management,1999,Online Publishing,7805 +81,Ea3f6D52Ec73563,Montes-Hensley,https://krueger.org/,Liechtenstein,Multi-tiered secondary productivity,2009,Printing,8433 +82,bC0CEd48A8000E0,Velazquez-Odom,https://stokes.com/,Djibouti,Streamlined 6thgeneration function,2002,Alternative Dispute Resolution,4044 +83,c89b9b59BC4baa1,Eaton-Morales,https://www.reeves-graham.com/,Micronesia,Customer-focused explicit frame,1990,Capital Markets / Hedge Fund / Private Equity,7013 +84,FEC51bce8421a7b,"Roberson, Pennington and Palmer",http://www.keith-fisher.com/,Cameroon,Adaptive bi-directional hierarchy,1993,Telecommunications,5571 +85,e0E8e27eAc9CAd5,"George, Russo and Guerra",https://drake.com/,Sweden,Centralized non-volatile capability,1989,Military Industry,2880 +86,B97a6CF9bf5983C,Davila Inc,https://mcconnell.info/,Cocos (Keeling) Islands,Profit-focused dedicated frame,2017,Consumer Electronics,2215 +87,a0a6f9b3DbcBEb5,Mays-Preston,http://www.browning-key.com/,Mali,User-centric heuristic focus group,2006,Military Industry,5786 +88,8cC1bDa330a5871,Pineda-Morton,https://www.carr.com/,United States Virgin Islands,Grass-roots methodical info-mediaries,1991,Printing,6168 +89,ED889CB2FE9cbd3,Huang and Sons,https://www.bolton.com/,Eritrea,Re-contextualized dynamic hierarchy,1981,Semiconductors,7484 +90,F4Dc1417BC6cb8f,Gilbert-Simon,https://www.bradford.biz/,Burundi,Grass-roots radical parallelism,1973,Newspapers / Journalism,1927 +91,7ABc3c7ecA03B34,Sampson-Griffith,http://hendricks.org/,Benin,Multi-layered composite paradigm,1972,Textiles,3881 +92,4e0719FBE38e0aB,Miles-Dominguez,http://www.turner.com/,Gibraltar,Organized empowering forecast,1996,Civic / Social Organization,897 +93,dEbDAAeDfaed00A,Rowe and Sons,https://www.simpson.org/,El Salvador,Balanced multimedia knowledgebase,1978,Facilities Services,8172 +94,61BDeCfeFD0cEF5,"Valenzuela, Holmes and Rowland",https://www.dorsey.net/,Taiwan,Persistent tertiary focus group,1999,Transportation,1483 +95,4e91eD25f486110,"Best, Wade and Shepard",https://zimmerman.com/,Zimbabwe,Innovative background definition,1991,Gambling / Casinos,4873 +96,0a0bfFbBbB8eC7c,Holmes Group,https://mcdowell.org/,Ethiopia,Right-sized zero tolerance focus group,1975,Photography,2988 +97,BA6Cd9Dae2Efd62,Good Ltd,http://duffy.com/,Anguilla,Reverse-engineered composite moratorium,1971,Consumer Services,4292 +98,E7df80C60Abd7f9,Clements-Espinoza,http://www.flowers.net/,Falkland Islands (Malvinas),Progressive modular hub,1991,Broadcast Media,236 +99,AFc285dbE2fEd24,Mendez Inc,https://www.burke.net/,Kyrgyz Republic,User-friendly exuding migration,1993,Education Management,339 +100,e9eB5A60Cef8354,Watkins-Kaiser,http://www.herring.com/,Togo,Synergistic background access,2009,Financial Services,2785 \ No newline at end of file diff --git a/applications/ColossalQA/data/data_sample/custom_service.json b/applications/ColossalQA/data/data_sample/custom_service.json new file mode 100644 index 000000000..33dbdf705 --- /dev/null +++ b/applications/ColossalQA/data/data_sample/custom_service.json @@ -0,0 +1 @@ +{"features":[{"feature_idx":0,"name":"query","type":{"dtype":"string","_type":"Value"}},{"feature_idx":1,"name":"response","type":{"dtype":"string","_type":"Value"}}],"rows":[{"row_idx":0,"row":{"query":"My order hasn't arrived yet.","response":"We apologize for the inconvenience. Can you please provide your order number so we can investigate?"},"truncated_cells":[]},{"row_idx":1,"row":{"query":"I received a damaged product.","response":"We apologize for the inconvenience. Can you please provide a photo of the damaged product so we can assist you further?"},"truncated_cells":[]},{"row_idx":2,"row":{"query":"I need to return an item.","response":"Certainly. Please provide your order number and reason for return, and we will provide you with instructions on how to proceed."},"truncated_cells":[]},{"row_idx":3,"row":{"query":"I want to change my shipping address.","response":"No problem. Can you please provide your order number and the new shipping address you'd like to use?"},"truncated_cells":[]},{"row_idx":4,"row":{"query":"I have a question about my bill.","response":"We'd be happy to help. Can you please provide your account number and a brief description of your question?"},"truncated_cells":[]},{"row_idx":5,"row":{"query":"How do I cancel my subscription?","response":"We're sorry to hear that. Can you please provide your account email and the name of the subscription you'd like to cancel?"},"truncated_cells":[]},{"row_idx":6,"row":{"query":"Can I get a refund for my purchase?","response":"We understand your concern. Please provide your order number and the reason for your refund request so we can assist you further."},"truncated_cells":[]},{"row_idx":7,"row":{"query":"I'd like to track my order.","response":"Of course. Can you please provide your order number so we can check the current status for you?"},"truncated_cells":[]},{"row_idx":8,"row":{"query":"My account has been locked.","response":"We apologize for the inconvenience. Can you please provide your account email so we can help you regain access?"},"truncated_cells":[]},{"row_idx":9,"row":{"query":"I can't find the item I'm looking for.","response":"We're here to help. Can you please provide a description or product name of the item you're looking for so we can assist you?"},"truncated_cells":[]},{"row_idx":10,"row":{"query":"I'm having trouble applying a promo code.","response":"We apologize for the inconvenience. Can you please provide the promo code you're trying to use, and we'll help you resolve the issue?"},"truncated_cells":[]},{"row_idx":11,"row":{"query":"How do I update my payment information?","response":"We can help with that. Can you please provide your account email so we can guide you through updating your payment information?"},"truncated_cells":[]},{"row_idx":12,"row":{"query":"I haven't received a response to my email inquiry.","response":"We apologize for the delay. Can you please provide your ticket number or account email so we can follow up on your inquiry?"},"truncated_cells":[]},{"row_idx":13,"row":{"query":"Can I change the size of an item in my order?","response":"Certainly. Can you please provide your order number and the details of the item you'd like to change?"},"truncated_cells":[]},{"row_idx":14,"row":{"query":"What is the status of my warranty claim?","response":"We'd be happy to check for you. Can you please provide your claim number or the product's serial number?"},"truncated_cells":[]},{"row_idx":15,"row":{"query":"Is my item in stock?","response":"To check availability, can you please provide the product name or SKU so we can assist you?"},"truncated_cells":[]},{"row_idx":16,"row":{"query":"My promo code has expired. Can I still use it?","response":"Unfortunately, expired promo codes cannot be used. However, please provide the code, and we'll see if there's a similar offer currently available."},"truncated_cells":[]},{"row_idx":17,"row":{"query":"How do I reset my password?","response":"We can help with that. Please provide your account email, and we'll send you instructions on how to reset your password."},"truncated_cells":[]},{"row_idx":18,"row":{"query":"Can I place a bulk order?","response":"Certainly. Can you please provide the product name or SKU, and the quantity you'd like to order so we can check availability and pricing?"},"truncated_cells":[]},{"row_idx":19,"row":{"query":"How do I apply for a job at your company?","response":"Thank you for your interest. Can you please provide your email address so we can send you information on how to apply?"},"truncated_cells":[]},{"row_idx":20,"row":{"query":"Can I add an item to an existing order?","response":"We'll do our best to help. Can you please provide your order number and the details of the item you'd like to add?"},"truncated_cells":[]},{"row_idx":21,"row":{"query":"How long does shipping take?","response":"We'd be happy to provide an estimate. Can you please provide your shipping destination and the product name or SKU?"},"truncated_cells":[]},{"row_idx":22,"row":{"query":"I was charged twice for my order.","response":"We apologize for the inconvenience. Can you please provide your order number so we can investigate and resolve the issue?"},"truncated_cells":[]},{"row_idx":23,"row":{"query":"Where can I find your sizing chart?","response":"We'd be happy to help. Can you please provide the product name or SKU so we can direct you to the appropriate sizing chart?"},"truncated_cells":[]},{"row_idx":24,"row":{"query":"I need technical support for a product.","response":"We're here to help. Can you please provide the product name, SKU, or serial number, and a description of the issue you're experiencing?"},"truncated_cells":[]},{"row_idx":25,"row":{"query":"Can I exchange an item for a different color?","response":"Certainly. Can you please provide your order number and the details of the item you'd like to exchange?"},"truncated_cells":[]},{"row_idx":26,"row":{"query":"How do I apply for a store credit card?","response":"Thank you for your interest. Can you please provide your email address so we can send you information on how to apply?"},"truncated_cells":[]},{"row_idx":27,"row":{"query":"Where is your store located?","response":"We'd be happy to help you find a store near you. Can you please provide your city and state or zip code?"},"truncated_cells":[]},{"row_idx":28,"row":{"query":"I can't log into my account.","response":"We apologize for the inconvenience. Can you please provide your account email so we can help you troubleshoot the issue?"},"truncated_cells":[]},{"row_idx":29,"row":{"query":"Do you offer gift wrapping?","response":"We do offer gift wrapping for select items. Can you please provide the product name or SKU so we can confirm if gift wrapping is available?"},"truncated_cells":[]},{"row_idx":30,"row":{"query":"Can I schedule a delivery?","response":"Certainly. Can you please provide your order number and the preferred date and time for delivery?"},"truncated_cells":[]},{"row_idx":31,"row":{"query":"How do I unsubscribe from your newsletter?","response":"We're sorry to see you go. Can you please provide your email address so we can remove you from our mailing list?"},"truncated_cells":[]},{"row_idx":32,"row":{"query":"What is your return policy?","response":"We'd be happy to provide information on our return policy. Can you please provide the product name or SKU so we can give you the most accurate information?"},"truncated_cells":[]},{"row_idx":33,"row":{"query":"Do you offer price matching?","response":"We do offer price matching in certain cases. Can you please provide the product name or SKU and the competitor's pricing information?"},"truncated_cells":[]},{"row_idx":34,"row":{"query":"Are there any current promotions or sales?","response":"We'd be happy to inform you of any current promotions. Can you please provide the product name or SKU you're interested in, or the type of promotion you're looking for?"},"truncated_cells":[]},{"row_idx":35,"row":{"query":"My coupon isn't working.","response":"We apologize for the inconvenience. Can you please provide the coupon code and the product name or SKU you're trying to apply it to?"},"truncated_cells":[]},{"row_idx":36,"row":{"query":"Can you help me with a product recommendation?","response":"Of course! Can you please provide some details about what you're looking for, such as product type, features, or price range?"},"truncated_cells":[]},{"row_idx":37,"row":{"query":"Do you offer international shipping?","response":"We'd be happy to help. Can you please provide your shipping destination and the product name or SKU you're interested in?"},"truncated_cells":[]},{"row_idx":38,"row":{"query":"Can I place an order over the phone?","response":"Certainly. Can you please provide your phone number and the best time to reach you, and one of our representatives will contact you to place the order?"},"truncated_cells":[]},{"row_idx":39,"row":{"query":"How do I know if my order went through?","response":"We can help you with that. Can you please provide your account email or order number so we can check the status for you?"},"truncated_cells":[]},{"row_idx":40,"row":{"query":"Can I use multiple promo codes on one order?","response":"In most cases, only one promo code can be applied per order. Can you please provide the promo codes you're trying to use so we can check their compatibility?"},"truncated_cells":[]},{"row_idx":41,"row":{"query":"What are your customer service hours?","response":"Our customer service team is available 24/7 to assist you. Please feel free to reach out with any questions or concerns."},"truncated_cells":[]},{"row_idx":42,"row":{"query":"Where can I find the user manual for my product?","response":"We'd be happy to help. Can you please provide the product name, SKU, or serial number so we can direct you to the appropriate user manual?"},"truncated_cells":[]},{"row_idx":43,"row":{"query":"Do you offer a warranty on your products?","response":"We do offer warranties on select products. Can you please provide the product name or SKU so we can provide you with warranty information?"},"truncated_cells":[]},{"row_idx":44,"row":{"query":"Can I place an order for in-store pickup?","response":"Certainly. Can you please provide the product name or SKU and the store location where you'd like to pick up your order?"},"truncated_cells":[]},{"row_idx":45,"row":{"query":"How do I sign up for your rewards program?","response":"Thank you for your interest in our rewards program. Can you please provide your email address so we can send you information on how to sign up?"},"truncated_cells":[]},{"row_idx":46,"row":{"query":"Can I pay with a gift card online?","response":"Yes, you can use gift cards for online purchases. Can you please provide the gift card number and the product name or SKU you're interested in?"},"truncated_cells":[]},{"row_idx":47,"row":{"query":"I can't find my order confirmation email.","response":"We apologize for the inconvenience. Can you please provide your account email or order number so we can resend the confirmation email?"},"truncated_cells":[]},{"row_idx":48,"row":{"query":"Do you offer a military discount?","response":"Yes, we do offer a military discount. Can you please provide your military ID or email address so we can apply the discount to your account?"},"truncated_cells":[]},{"row_idx":49,"row":{"query":"What is the processing time for my order?","response":"We'd be happy to provide an estimate. Can you please provide your order number or the product name or SKU?"},"truncated_cells":[]},{"row_idx":50,"row":{"query":"How do I update my shipping preferences?","response":"We can help with that. Can you please provide your account email so we can guide you through updating your shipping preferences?"},"truncated_cells":[]},{"row_idx":51,"row":{"query":"Is my payment information secure?","response":"Yes, we take security very seriously. Can you please provide your account email so we can verify the security measures in place for your payment information?"},"truncated_cells":[]},{"row_idx":52,"row":{"query":"Can I pre-order an item?","response":"Certainly. Can you please provide the product name or SKU and your email address so we can notify you when pre-orders are available?"},"truncated_cells":[]},{"row_idx":53,"row":{"query":"How do I use a gift card in-store?","response":"To use a gift card in-store, simply present the gift card at the time of purchase. Can you please provide the gift card number so we can check the balance for you?"},"truncated_cells":[]},{"row_idx":54,"row":{"query":"Do you have a loyalty program?","response":"Yes, we do have a loyalty program. Can you please provide your email address so we can send you information on how to join and enjoy the benefits?"},"truncated_cells":[]},{"row_idx":55,"row":{"query":"Is there a mobile app for your store?","response":"Yes, we do have a mobile app. Can you please provide your email address so we can send you a link to download the app and instructions on how to use it?"},"truncated_cells":[]},{"row_idx":56,"row":{"query":"I need help assembling my product.","response":"We're here to help. Can you please provide the product name, SKU, or serial number, and a description of the issue you're experiencing during assembly?"},"truncated_cells":[]},{"row_idx":57,"row":{"query":"Do you offer financing options?","response":"We do offer financing options for select purchases. Can you please provide the product name or SKU and your email address so we can send you more information?"},"truncated_cells":[]},{"row_idx":58,"row":{"query":"Can I reserve an item in-store?","response":"Certainly. Can you please provide the product name or SKU and the store location where you'd like to reserve the item?"},"truncated_cells":[]},{"row_idx":59,"row":{"query":"How do I get a price adjustment for a recent purchase?","response":"We'd be happy to help. Can you please provide your order number and the product name or SKU for the item you'd like a price adjustment on?"},"truncated_cells":[]},{"row_idx":60,"row":{"query":"How do I change my email preferences?","response":"We can help with that. Can you please provide your account email so we can guide you through updating your email preferences?"},"truncated_cells":[]},{"row_idx":61,"row":{"query":"Can I use my store credit online?","response":"Yes, you can use store credit for online purchases. Can you please provide the store credit number and the product name or SKU you're interested in?"},"truncated_cells":[]},{"row_idx":62,"row":{"query":"What are the washing instructions for this item?","response":"We'd be happy to help. Can you please provide the product name or SKU so we can provide you with the proper washing instructions?"},"truncated_cells":[]},{"row_idx":63,"row":{"query":"Can I get a replacement part for my product?","response":"Certainly. Can you please provide the product name, SKU, or serial number, and a description of the part you need?"},"truncated_cells":[]},{"row_idx":64,"row":{"query":"Do you offer free shipping?","response":"We do offer free shipping on select orders. Can you please provide the product name or SKU and your shipping destination so we can check if your order qualifies?"},"truncated_cells":[]},{"row_idx":65,"row":{"query":"Can I place a custom order?","response":"We'd be happy to assist you. Can you please provide the product name or SKU and a description of the customizations you'd like?"},"truncated_cells":[]},{"row_idx":66,"row":{"query":"How do I report a problem with your website?","response":"We appreciate your feedback. Can you please provide a description of the issue you're experiencing and your email address so we can follow up with you?"},"truncated_cells":[]},{"row_idx":67,"row":{"query":"What is your policy on price adjustments?","response":"We'd be happy to provide information on our price adjustment policy. Can you please provide the product name or SKU so we can give you the most accurate information?"},"truncated_cells":[]},{"row_idx":68,"row":{"query":"Do you have any upcoming sales or events?","response":"We'd be happy to inform you of any upcoming sales or events. Can you please provide your email address so we can keep you updated?"},"truncated_cells":[]},{"row_idx":69,"row":{"query":"How do I schedule a consultation or appointment?","response":"We'd be happy to help. Can you please provide your name, phone number, and the service you're interested in so we can schedule your appointment?"},"truncated_cells":[]},{"row_idx":70,"row":{"query":"Can I get a copy of my receipt?","response":"Certainly. Can you please provide your order number or account email so we can locate your receipt and send you a copy?"},"truncated_cells":[]},{"row_idx":71,"row":{"query":"Can I use a competitor's coupon at your store?","response":"In some cases, we may accept competitor coupons. Can you please provide the competitor's coupon code and the product name or SKU you'd like to apply it to?"},"truncated_cells":[]},{"row_idx":72,"row":{"query":"Do you have a recycling program?","response":"Yes, we do have a recycling program. Can you please provide your email address so we can send you information on how to participate?"},"truncated_cells":[]},{"row_idx":73,"row":{"query":"How do I report a lost or stolen gift card?","response":"We're sorry to hear that. Can you please provide the gift card number, if available, and your email address so we can assist you further?"},"truncated_cells":[]}],"num_rows_total":74,"num_rows_per_page":100} diff --git a/applications/ColossalQA/data/data_sample/custom_service_classification.json b/applications/ColossalQA/data/data_sample/custom_service_classification.json new file mode 100644 index 000000000..2d7e4b05d --- /dev/null +++ b/applications/ColossalQA/data/data_sample/custom_service_classification.json @@ -0,0 +1,64 @@ +{ + "data": [ + { + "key": "客户反映手机无法接收短信,但可以正常拨打电话,已确认手机号码正常,需要处理。", + "value": "故障原因分类: 短信接收问题" + }, + { + "key": "客户申请开通国际漫游服务,但在目的地无法使用手机信号,已核实客户所在地国家为不支持漫游的区域,已通知客户。", + "value": "故障原因分类: 国际漫游服务" + }, + { + "key": "客户称手机信号时强时弱,经过测试发现在不同区域信号确实存在波动,属于正常现象。", + "value": "故障原因分类: 信号强弱波动" + }, + { + "key": "客户反映在家中无法连接Wi-Fi,建议检查路由器或尝试更换位置。", + "value": "故障原因分类: 家庭网络问题" + }, + { + "key": "客户申请更换新的SIM卡,因旧卡损坏,已为客户办理新卡。", + "value": "故障原因分类: SIM卡更换" + }, + { + "key": "客户反映通话时听不清对方声音,经检查发现是手机内置扬声器故障,建议维修。", + "value": "故障原因分类: 扬声器故障" + }, + { + "key": "客户手机丢失,请求挂失并办理新卡,已为客户挂失旧卡并补办新卡。", + "value": "故障原因分类: 挂失与补办" + }, + { + "key": "客户反映在市区内无法使用手机信号,经排查发现信号塔维护,属于暂时性故障。", + "value": "故障原因分类: 信号塔维护" + }, + { + "key": "客户反映手机充电时出现过热情况,建议更换充电器。", + "value": "故障原因分类: 充电器故障" + }, + { + "key": "客户要求关闭数据漫游功能,已为客户关闭。", + "value": "故障原因分类: 关闭数据漫游" + }, + { + "key": "客户申请办理家庭套餐业务,已为客户办理。", + "value": "故障原因分类: 家庭套餐办理" + }, + { + "key": "客户反映在商场内无法使用手机信号,建议检查手机信号设置。", + "value": "故障原因分类: 手机信号设置" + }, + { + "key": "客户申请开通国际长途业务,已为客户办理。", + "value": "故障原因分类: 国际长途业务办理" + }, + { + "key": "客户反映手机屏幕出现蓝屏,建议客户前往维修。", + "value": "故障原因分类: 手机屏幕故障" + }, + { + "key": "客户申请办理免流量业务,已为客户办理。", + "value": "故障原因分类: 免流量业务办理" + } + ] +} diff --git a/applications/ColossalQA/data/data_sample/custom_service_preprocessed.json b/applications/ColossalQA/data/data_sample/custom_service_preprocessed.json new file mode 100644 index 000000000..f4e095f16 --- /dev/null +++ b/applications/ColossalQA/data/data_sample/custom_service_preprocessed.json @@ -0,0 +1 @@ +{"data": [{"key": "My order hasn't arrived yet.", "value": "We apologize for the inconvenience. Can you please provide your order number so we can investigate?"}, {"key": "I received a damaged product.", "value": "We apologize for the inconvenience. Can you please provide a photo of the damaged product so we can assist you further?"}, {"key": "I need to return an item.", "value": "Certainly. Please provide your order number and reason for return, and we will provide you with instructions on how to proceed."}, {"key": "I want to change my shipping address.", "value": "No problem. Can you please provide your order number and the new shipping address you'd like to use?"}, {"key": "I have a question about my bill.", "value": "We'd be happy to help. Can you please provide your account number and a brief description of your question?"}, {"key": "How do I cancel my subscription?", "value": "We're sorry to hear that. Can you please provide your account email and the name of the subscription you'd like to cancel?"}, {"key": "Can I get a refund for my purchase?", "value": "We understand your concern. Please provide your order number and the reason for your refund request so we can assist you further."}, {"key": "I'd like to track my order.", "value": "Of course. Can you please provide your order number so we can check the current status for you?"}, {"key": "My account has been locked.", "value": "We apologize for the inconvenience. Can you please provide your account email so we can help you regain access?"}, {"key": "I can't find the item I'm looking for.", "value": "We're here to help. Can you please provide a description or product name of the item you're looking for so we can assist you?"}, {"key": "I'm having trouble applying a promo code.", "value": "We apologize for the inconvenience. Can you please provide the promo code you're trying to use, and we'll help you resolve the issue?"}, {"key": "How do I update my payment information?", "value": "We can help with that. Can you please provide your account email so we can guide you through updating your payment information?"}, {"key": "I haven't received a response to my email inquiry.", "value": "We apologize for the delay. Can you please provide your ticket number or account email so we can follow up on your inquiry?"}, {"key": "Can I change the size of an item in my order?", "value": "Certainly. Can you please provide your order number and the details of the item you'd like to change?"}, {"key": "What is the status of my warranty claim?", "value": "We'd be happy to check for you. Can you please provide your claim number or the product's serial number?"}, {"key": "Is my item in stock?", "value": "To check availability, can you please provide the product name or SKU so we can assist you?"}, {"key": "My promo code has expired. Can I still use it?", "value": "Unfortunately, expired promo codes cannot be used. However, please provide the code, and we'll see if there's a similar offer currently available."}, {"key": "How do I reset my password?", "value": "We can help with that. Please provide your account email, and we'll send you instructions on how to reset your password."}, {"key": "Can I place a bulk order?", "value": "Certainly. Can you please provide the product name or SKU, and the quantity you'd like to order so we can check availability and pricing?"}, {"key": "How do I apply for a job at your company?", "value": "Thank you for your interest. Can you please provide your email address so we can send you information on how to apply?"}, {"key": "Can I add an item to an existing order?", "value": "We'll do our best to help. Can you please provide your order number and the details of the item you'd like to add?"}, {"key": "How long does shipping take?", "value": "We'd be happy to provide an estimate. Can you please provide your shipping destination and the product name or SKU?"}, {"key": "I was charged twice for my order.", "value": "We apologize for the inconvenience. Can you please provide your order number so we can investigate and resolve the issue?"}, {"key": "Where can I find your sizing chart?", "value": "We'd be happy to help. Can you please provide the product name or SKU so we can direct you to the appropriate sizing chart?"}, {"key": "I need technical support for a product.", "value": "We're here to help. Can you please provide the product name, SKU, or serial number, and a description of the issue you're experiencing?"}, {"key": "Can I exchange an item for a different color?", "value": "Certainly. Can you please provide your order number and the details of the item you'd like to exchange?"}, {"key": "How do I apply for a store credit card?", "value": "Thank you for your interest. Can you please provide your email address so we can send you information on how to apply?"}, {"key": "Where is your store located?", "value": "We'd be happy to help you find a store near you. Can you please provide your city and state or zip code?"}, {"key": "I can't log into my account.", "value": "We apologize for the inconvenience. Can you please provide your account email so we can help you troubleshoot the issue?"}, {"key": "Do you offer gift wrapping?", "value": "We do offer gift wrapping for select items. Can you please provide the product name or SKU so we can confirm if gift wrapping is available?"}, {"key": "Can I schedule a delivery?", "value": "Certainly. Can you please provide your order number and the preferred date and time for delivery?"}, {"key": "How do I unsubscribe from your newsletter?", "value": "We're sorry to see you go. Can you please provide your email address so we can remove you from our mailing list?"}, {"key": "What is your return policy?", "value": "We'd be happy to provide information on our return policy. Can you please provide the product name or SKU so we can give you the most accurate information?"}, {"key": "Do you offer price matching?", "value": "We do offer price matching in certain cases. Can you please provide the product name or SKU and the competitor's pricing information?"}, {"key": "Are there any current promotions or sales?", "value": "We'd be happy to inform you of any current promotions. Can you please provide the product name or SKU you're interested in, or the type of promotion you're looking for?"}, {"key": "My coupon isn't working.", "value": "We apologize for the inconvenience. Can you please provide the coupon code and the product name or SKU you're trying to apply it to?"}, {"key": "Can you help me with a product recommendation?", "value": "Of course! Can you please provide some details about what you're looking for, such as product type, features, or price range?"}, {"key": "Do you offer international shipping?", "value": "We'd be happy to help. Can you please provide your shipping destination and the product name or SKU you're interested in?"}, {"key": "Can I place an order over the phone?", "value": "Certainly. Can you please provide your phone number and the best time to reach you, and one of our representatives will contact you to place the order?"}, {"key": "How do I know if my order went through?", "value": "We can help you with that. Can you please provide your account email or order number so we can check the status for you?"}, {"key": "Can I use multiple promo codes on one order?", "value": "In most cases, only one promo code can be applied per order. Can you please provide the promo codes you're trying to use so we can check their compatibility?"}, {"key": "What are your customer service hours?", "value": "Our customer service team is available 24/7 to assist you. Please feel free to reach out with any questions or concerns."}, {"key": "Where can I find the user manual for my product?", "value": "We'd be happy to help. Can you please provide the product name, SKU, or serial number so we can direct you to the appropriate user manual?"}, {"key": "Do you offer a warranty on your products?", "value": "We do offer warranties on select products. Can you please provide the product name or SKU so we can provide you with warranty information?"}, {"key": "Can I place an order for in-store pickup?", "value": "Certainly. Can you please provide the product name or SKU and the store location where you'd like to pick up your order?"}, {"key": "How do I sign up for your rewards program?", "value": "Thank you for your interest in our rewards program. Can you please provide your email address so we can send you information on how to sign up?"}, {"key": "Can I pay with a gift card online?", "value": "Yes, you can use gift cards for online purchases. Can you please provide the gift card number and the product name or SKU you're interested in?"}, {"key": "I can't find my order confirmation email.", "value": "We apologize for the inconvenience. Can you please provide your account email or order number so we can resend the confirmation email?"}, {"key": "Do you offer a military discount?", "value": "Yes, we do offer a military discount. Can you please provide your military ID or email address so we can apply the discount to your account?"}, {"key": "What is the processing time for my order?", "value": "We'd be happy to provide an estimate. Can you please provide your order number or the product name or SKU?"}, {"key": "How do I update my shipping preferences?", "value": "We can help with that. Can you please provide your account email so we can guide you through updating your shipping preferences?"}, {"key": "Is my payment information secure?", "value": "Yes, we take security very seriously. Can you please provide your account email so we can verify the security measures in place for your payment information?"}, {"key": "Can I pre-order an item?", "value": "Certainly. Can you please provide the product name or SKU and your email address so we can notify you when pre-orders are available?"}, {"key": "How do I use a gift card in-store?", "value": "To use a gift card in-store, simply present the gift card at the time of purchase. Can you please provide the gift card number so we can check the balance for you?"}, {"key": "Do you have a loyalty program?", "value": "Yes, we do have a loyalty program. Can you please provide your email address so we can send you information on how to join and enjoy the benefits?"}, {"key": "Is there a mobile app for your store?", "value": "Yes, we do have a mobile app. Can you please provide your email address so we can send you a link to download the app and instructions on how to use it?"}, {"key": "I need help assembling my product.", "value": "We're here to help. Can you please provide the product name, SKU, or serial number, and a description of the issue you're experiencing during assembly?"}, {"key": "Do you offer financing options?", "value": "We do offer financing options for select purchases. Can you please provide the product name or SKU and your email address so we can send you more information?"}, {"key": "Can I reserve an item in-store?", "value": "Certainly. Can you please provide the product name or SKU and the store location where you'd like to reserve the item?"}, {"key": "How do I get a price adjustment for a recent purchase?", "value": "We'd be happy to help. Can you please provide your order number and the product name or SKU for the item you'd like a price adjustment on?"}, {"key": "How do I change my email preferences?", "value": "We can help with that. Can you please provide your account email so we can guide you through updating your email preferences?"}, {"key": "Can I use my store credit online?", "value": "Yes, you can use store credit for online purchases. Can you please provide the store credit number and the product name or SKU you're interested in?"}, {"key": "What are the washing instructions for this item?", "value": "We'd be happy to help. Can you please provide the product name or SKU so we can provide you with the proper washing instructions?"}, {"key": "Can I get a replacement part for my product?", "value": "Certainly. Can you please provide the product name, SKU, or serial number, and a description of the part you need?"}, {"key": "Do you offer free shipping?", "value": "We do offer free shipping on select orders. Can you please provide the product name or SKU and your shipping destination so we can check if your order qualifies?"}, {"key": "Can I place a custom order?", "value": "We'd be happy to assist you. Can you please provide the product name or SKU and a description of the customizations you'd like?"}, {"key": "How do I report a problem with your website?", "value": "We appreciate your feedback. Can you please provide a description of the issue you're experiencing and your email address so we can follow up with you?"}, {"key": "What is your policy on price adjustments?", "value": "We'd be happy to provide information on our price adjustment policy. Can you please provide the product name or SKU so we can give you the most accurate information?"}, {"key": "Do you have any upcoming sales or events?", "value": "We'd be happy to inform you of any upcoming sales or events. Can you please provide your email address so we can keep you updated?"}, {"key": "How do I schedule a consultation or appointment?", "value": "We'd be happy to help. Can you please provide your name, phone number, and the service you're interested in so we can schedule your appointment?"}, {"key": "Can I get a copy of my receipt?", "value": "Certainly. Can you please provide your order number or account email so we can locate your receipt and send you a copy?"}, {"key": "Can I use a competitor's coupon at your store?", "value": "In some cases, we may accept competitor coupons. Can you please provide the competitor's coupon code and the product name or SKU you'd like to apply it to?"}, {"key": "Do you have a recycling program?", "value": "Yes, we do have a recycling program. Can you please provide your email address so we can send you information on how to participate?"}, {"key": "How do I report a lost or stolen gift card?", "value": "We're sorry to hear that. Can you please provide the gift card number, if available, and your email address so we can assist you further?"}]} diff --git a/applications/ColossalQA/data/data_sample/luchen_zh.txt b/applications/ColossalQA/data/data_sample/luchen_zh.txt new file mode 100644 index 000000000..afd7fc306 --- /dev/null +++ b/applications/ColossalQA/data/data_sample/luchen_zh.txt @@ -0,0 +1 @@ +潞晨科技是一家致力于“解放AI生产力”的全球性公司,技术团队核心成员来自美国加州伯克利、斯坦福、新加坡国立、南洋理工、清华、北大等国内外知名高校。在高性能计算、人工智能、分布式系统等方面已有十余年的技术积累,并在国际顶级学术刊物或会议发表论文近百篇。公司核心产品面向大模型时代的通用深度学习系统 Colossal-AI,可实现高效快速部署AI大模型训练和推理,降低AI大模型应用成本。公司在种子轮、天使轮融资已获得“清科中国早期投资机构30强”前三甲创新工场、真格基金、蓝驰创投的600万美元投资。 diff --git a/applications/ColossalQA/data/tests/64KB.json b/applications/ColossalQA/data/tests/64KB.json new file mode 100644 index 000000000..99278dc5c --- /dev/null +++ b/applications/ColossalQA/data/tests/64KB.json @@ -0,0 +1,7 @@ +{ + "data":[ + {"content":"Donec lobortis eleifend condimentum. Cras dictum dolor lacinia lectus vehicula rutrum. Maecenas quis nisi nunc. Nam tristique feugiat est vitae mollis. Maecenas quis nisi nunc."}, + {"content":"Aliquam sollicitudin ante ligula, eget malesuada nibh efficitur et. Pellentesque massa sem, scelerisque sit amet odio id, cursus tempor urna. Etiam congue dignissim volutpat. Vestibulum pharetra libero et velit gravida euismod."} + ], + "name":"player" +} \ No newline at end of file diff --git a/applications/ColossalQA/data/tests/companies.csv b/applications/ColossalQA/data/tests/companies.csv new file mode 100644 index 000000000..93dcac9f3 --- /dev/null +++ b/applications/ColossalQA/data/tests/companies.csv @@ -0,0 +1,101 @@ +Index,Organization Id,Name,Website,Country,Description,Founded,Industry,Number of employees +1,FAB0d41d5b5d22c,Ferrell LLC,https://price.net/,Papua New Guinea,Horizontal empowering knowledgebase,1990,Plastics,3498 +2,6A7EdDEA9FaDC52,"Mckinney, Riley and Day",http://www.hall-buchanan.info/,Finland,User-centric system-worthy leverage,2015,Glass / Ceramics / Concrete,4952 +3,0bFED1ADAE4bcC1,Hester Ltd,http://sullivan-reed.com/,China,Switchable scalable moratorium,1971,Public Safety,5287 +4,2bFC1Be8a4ce42f,Holder-Sellers,https://becker.com/,Turkmenistan,De-engineered systemic artificial intelligence,2004,Automotive,921 +5,9eE8A6a4Eb96C24,Mayer Group,http://www.brewer.com/,Mauritius,Synchronized needs-based challenge,1991,Transportation,7870 +6,cC757116fe1C085,Henry-Thompson,http://morse.net/,Bahamas,Face-to-face well-modulated customer loyalty,1992,Primary / Secondary Education,4914 +7,219233e8aFF1BC3,Hansen-Everett,https://www.kidd.org/,Pakistan,Seamless disintermediate collaboration,2018,Publishing Industry,7832 +8,ccc93DCF81a31CD,Mcintosh-Mora,https://www.brooks.com/,Heard Island and McDonald Islands,Centralized attitude-oriented capability,1970,Import / Export,4389 +9,0B4F93aA06ED03e,Carr Inc,http://ross.com/,Kuwait,Distributed impactful customer loyalty,1996,Plastics,8167 +10,738b5aDe6B1C6A5,Gaines Inc,http://sandoval-hooper.com/,Uzbekistan,Multi-lateral scalable protocol,1997,Outsourcing / Offshoring,9698 +11,AE61b8Ffebbc476,Kidd Group,http://www.lyons.com/,Bouvet Island (Bouvetoya),Proactive foreground paradigm,2001,Primary / Secondary Education,7473 +12,eb3B7D06cCdD609,Crane-Clarke,https://www.sandoval.com/,Denmark,Front-line clear-thinking encryption,2014,Food / Beverages,9011 +13,8D0c29189C9798B,"Keller, Campos and Black",https://www.garner.info/,Liberia,Ameliorated directional emulation,2020,Museums / Institutions,2862 +14,D2c91cc03CA394c,Glover-Pope,http://www.silva.biz/,United Arab Emirates,Persevering contextually-based approach,2013,Medical Practice,9079 +15,C8AC1eaf9C036F4,Pacheco-Spears,https://aguilar.com/,Sweden,Secured logistical synergy,1984,Maritime,769 +16,b5D10A14f7a8AfE,Hodge-Ayers,http://www.archer-elliott.com/,Honduras,Future-proofed radical implementation,1990,Facilities Services,8508 +17,68139b5C4De03B4,"Bowers, Guerra and Krause",http://www.carrillo-nicholson.com/,Uganda,De-engineered transitional strategy,1972,Primary / Secondary Education,6986 +18,5c2EffEfdba2BdF,Mckenzie-Melton,http://montoya-thompson.com/,Hong Kong,Reverse-engineered heuristic alliance,1998,Investment Management / Hedge Fund / Private Equity,4589 +19,ba179F19F7925f5,Branch-Mann,http://www.lozano.com/,Botswana,Adaptive intangible frame,1999,Architecture / Planning,7961 +20,c1Ce9B350BAc66b,Weiss and Sons,https://barrett.com/,Korea,Sharable optimal functionalities,2011,Plastics,5984 +21,8de40AC4e6EaCa4,"Velez, Payne and Coffey",http://burton.com/,Luxembourg,Mandatory coherent synergy,1986,Wholesale,5010 +22,Aad86a4F0385F2d,Harrell LLC,http://www.frey-rosario.com/,Guadeloupe,Reverse-engineered mission-critical moratorium,2018,Construction,2185 +23,22aC3FFd64fD703,"Eaton, Reynolds and Vargas",http://www.freeman.biz/,Monaco,Self-enabling multi-tasking process improvement,2014,Luxury Goods / Jewelry,8987 +24,5Ec4C272bCf085c,Robbins-Cummings,http://donaldson-wilkins.com/,Belgium,Organic non-volatile hierarchy,1991,Pharmaceuticals,5038 +25,5fDBeA8BB91a000,Jenkins Inc,http://www.kirk.biz/,South Africa,Front-line systematic help-desk,2002,Insurance,1215 +26,dFfD6a6F9AC2d9C,"Greene, Benjamin and Novak",http://www.kent.net/,Romania,Centralized leadingedge moratorium,2012,Museums / Institutions,4941 +27,4B217cC5a0674C5,"Dickson, Richmond and Clay",http://everett.com/,Czech Republic,Team-oriented tangible complexity,1980,Real Estate / Mortgage,3122 +28,88b1f1cDcf59a37,Prince-David,http://thompson.com/,Christmas Island,Virtual holistic methodology,1970,Banking / Mortgage,1046 +29,f9F7bBCAEeC360F,Ayala LLC,http://www.zhang.com/,Philippines,Open-source zero administration hierarchy,2021,Legal Services,7664 +30,7Cb3AeFcE4Ba31e,Rivas Group,https://hebert.org/,Australia,Open-architected well-modulated capacity,1998,Logistics / Procurement,4155 +31,ccBcC32adcbc530,"Sloan, Mays and Whitehead",http://lawson.com/,Chad,Face-to-face high-level conglomeration,1997,Civil Engineering,365 +32,f5afd686b3d05F5,"Durham, Allen and Barnes",http://chan-stafford.org/,Zimbabwe,Synergistic web-enabled framework,1993,Mechanical or Industrial Engineering,6135 +33,38C6cfC5074Fa5e,Fritz-Franklin,http://www.lambert.com/,Nepal,Automated 4thgeneration website,1972,Hospitality,4516 +34,5Cd7efccCcba38f,Burch-Ewing,http://cline.net/,Taiwan,User-centric 4thgeneration system engine,1981,Venture Capital / VC,7443 +35,9E6Acb51e3F9d6F,"Glass, Barrera and Turner",https://dunlap.com/,Kyrgyz Republic,Multi-channeled 3rdgeneration open system,2020,Utilities,2610 +36,4D4d7E18321eaeC,Pineda-Cox,http://aguilar.org/,Bolivia,Fundamental asynchronous capability,2010,Human Resources / HR,1312 +37,485f5d06B938F2b,"Baker, Mccann and Macdonald",http://www.anderson-barker.com/,Kenya,Cross-group user-facing focus group,2013,Legislative Office,1638 +38,19E3a5Bf6dBDc4F,Cuevas-Moss,https://dodson-castaneda.net/,Guatemala,Extended human-resource intranet,1994,Music,9995 +39,6883A965c7b68F7,Hahn PLC,http://newman.com/,Belarus,Organic logistical leverage,2012,Electrical / Electronic Manufacturing,3715 +40,AC5B7AA74Aa4A2E,"Valentine, Ferguson and Kramer",http://stuart.net/,Jersey,Centralized secondary time-frame,1997,Non - Profit / Volunteering,3585 +41,decab0D5027CA6a,Arroyo Inc,https://www.turner.com/,Grenada,Managed demand-driven website,2006,Writing / Editing,9067 +42,dF084FbBb613eea,Walls LLC,http://www.reese-vasquez.biz/,Cape Verde,Self-enabling fresh-thinking installation,1989,Investment Management / Hedge Fund / Private Equity,1678 +43,A2D89Ab9bCcAd4e,"Mitchell, Warren and Schneider",https://fox.biz/,Trinidad and Tobago,Enhanced intangible time-frame,2021,Capital Markets / Hedge Fund / Private Equity,3816 +44,77aDc905434a49f,Prince PLC,https://www.watts.com/,Sweden,Profit-focused coherent installation,2016,Individual / Family Services,7645 +45,235fdEFE2cfDa5F,Brock-Blackwell,http://www.small.com/,Benin,Secured foreground emulation,1986,Online Publishing,7034 +46,1eD64cFe986BBbE,Walton-Barnett,https://ashley-schaefer.com/,Western Sahara,Right-sized clear-thinking flexibility,2001,Luxury Goods / Jewelry,1746 +47,CbBbFcdd0eaE2cF,Bartlett-Arroyo,https://cruz.com/,Northern Mariana Islands,Realigned didactic function,1976,Civic / Social Organization,3987 +48,49aECbDaE6aBD53,"Wallace, Madden and Morris",http://www.blevins-fernandez.biz/,Germany,Persistent real-time customer loyalty,2016,Pharmaceuticals,9443 +49,7b3fe6e7E72bFa4,Berg-Sparks,https://cisneros-love.com/,Canada,Stand-alone static implementation,1974,Arts / Crafts,2073 +50,c6DedA82A8aef7E,Gonzales Ltd,http://bird.com/,Tonga,Managed human-resource policy,1988,Consumer Goods,9069 +51,7D9FBF85cdC3871,Lawson and Sons,https://www.wong.com/,French Southern Territories,Compatible analyzing intranet,2021,Arts / Crafts,3527 +52,7dd18Fb7cB07b65,"Mcguire, Mcconnell and Olsen",https://melton-briggs.com/,Korea,Profound client-server frame,1988,Printing,8445 +53,EF5B55FadccB8Fe,Charles-Phillips,https://bowman.com/,Cote d'Ivoire,Monitored client-server implementation,2012,Mental Health Care,3450 +54,f8D4B99e11fAF5D,Odom Ltd,https://www.humphrey-hess.com/,Cote d'Ivoire,Advanced static process improvement,2012,Management Consulting,1825 +55,e24D21BFd3bF1E5,Richard PLC,https://holden-coleman.net/,Mayotte,Object-based optimizing model,1971,Broadcast Media,4942 +56,B9BdfEB6D3Ca44E,Sampson Ltd,https://blevins.com/,Cayman Islands,Intuitive local adapter,2005,Farming,1418 +57,2a74D6f3D3B268e,"Cherry, Le and Callahan",https://waller-delacruz.biz/,Nigeria,Universal human-resource collaboration,2017,Entertainment / Movie Production,7202 +58,Bf3F3f62c8aBC33,Cherry PLC,https://www.avila.info/,Marshall Islands,Persistent tertiary website,1980,Plastics,8245 +59,aeBe26B80a7a23c,Melton-Nichols,https://kennedy.com/,Palau,User-friendly clear-thinking productivity,2021,Legislative Office,8741 +60,aAeb29ad43886C6,Potter-Walsh,http://thomas-french.org/,Turkey,Optional non-volatile open system,2008,Human Resources / HR,6923 +61,bD1bc6bB6d1FeD3,Freeman-Chen,https://mathis.com/,Timor-Leste,Phased next generation adapter,1973,International Trade / Development,346 +62,EB9f456e8b7022a,Soto Group,https://norris.info/,Vietnam,Enterprise-wide executive installation,1988,Business Supplies / Equipment,9097 +63,Dfef38C51D8DAe3,"Poole, Cruz and Whitney",https://reed.info/,Reunion,Balanced analyzing groupware,1978,Marketing / Advertising / Sales,2992 +64,055ffEfB2Dd95B0,Riley Ltd,http://wiley.com/,Brazil,Optional exuding superstructure,1986,Textiles,9315 +65,cBfe4dbAE1699da,"Erickson, Andrews and Bailey",https://www.hobbs-grant.com/,Eritrea,Vision-oriented secondary project,2014,Consumer Electronics,7829 +66,fdFbecbadcdCdf1,"Wilkinson, Charles and Arroyo",http://hunter-mcfarland.com/,United States Virgin Islands,Assimilated 24/7 archive,1996,Building Materials,602 +67,5DCb8A5a5ca03c0,Floyd Ltd,http://www.whitney.com/,Falkland Islands (Malvinas),Function-based fault-tolerant concept,2017,Public Relations / PR,2911 +68,ce57DCbcFD6d618,Newman-Galloway,https://www.scott.com/,Luxembourg,Enhanced foreground collaboration,1987,Information Technology / IT,3934 +69,5aaD187dc929371,Frazier-Butler,https://www.daugherty-farley.info/,Northern Mariana Islands,Persistent interactive circuit,1972,Outsourcing / Offshoring,5130 +70,902D7Ac8b6d476b,Newton Inc,https://www.richmond-manning.info/,Netherlands Antilles,Fundamental stable info-mediaries,1976,Military Industry,563 +71,32BB9Ff4d939788,Duffy-Levy,https://www.potter.com/,Guernsey,Diverse exuding installation,1982,Wireless,6146 +72,adcB0afbE58bAe3,Wagner LLC,https://decker-esparza.com/,Uruguay,Reactive attitude-oriented toolset,1987,International Affairs,6874 +73,dfcA1c84AdB61Ac,Mccall-Holmes,http://www.dean.com/,Benin,Object-based value-added database,2009,Legal Services,696 +74,208044AC2fe52F3,Massey LLC,https://frazier.biz/,Suriname,Configurable zero administration Graphical User Interface,1986,Accounting,5004 +75,f3C365f0c1A0623,Hicks LLC,http://alvarez.biz/,Pakistan,Quality-focused client-server Graphical User Interface,1970,Computer Software / Engineering,8480 +76,ec5Bdd3CBAfaB93,"Cole, Russell and Avery",http://www.blankenship.com/,Mongolia,De-engineered fault-tolerant challenge,2000,Law Enforcement,7012 +77,DDB19Be7eeB56B4,Cummings-Rojas,https://simon-pearson.com/,Svalbard & Jan Mayen Islands,User-centric modular customer loyalty,2012,Financial Services,7529 +78,dd6CA3d0bc3cAfc,"Beasley, Greene and Mahoney",http://www.petersen-lawrence.com/,Togo,Extended content-based methodology,1976,Religious Institutions,869 +79,A0B9d56e61070e3,"Beasley, Sims and Allison",http://burke.info/,Latvia,Secured zero tolerance hub,1972,Facilities Services,6182 +80,cBa7EFe5D05Adaf,Crawford-Rivera,https://black-ramirez.org/,Cuba,Persevering exuding budgetary management,1999,Online Publishing,7805 +81,Ea3f6D52Ec73563,Montes-Hensley,https://krueger.org/,Liechtenstein,Multi-tiered secondary productivity,2009,Printing,8433 +82,bC0CEd48A8000E0,Velazquez-Odom,https://stokes.com/,Djibouti,Streamlined 6thgeneration function,2002,Alternative Dispute Resolution,4044 +83,c89b9b59BC4baa1,Eaton-Morales,https://www.reeves-graham.com/,Micronesia,Customer-focused explicit frame,1990,Capital Markets / Hedge Fund / Private Equity,7013 +84,FEC51bce8421a7b,"Roberson, Pennington and Palmer",http://www.keith-fisher.com/,Cameroon,Adaptive bi-directional hierarchy,1993,Telecommunications,5571 +85,e0E8e27eAc9CAd5,"George, Russo and Guerra",https://drake.com/,Sweden,Centralized non-volatile capability,1989,Military Industry,2880 +86,B97a6CF9bf5983C,Davila Inc,https://mcconnell.info/,Cocos (Keeling) Islands,Profit-focused dedicated frame,2017,Consumer Electronics,2215 +87,a0a6f9b3DbcBEb5,Mays-Preston,http://www.browning-key.com/,Mali,User-centric heuristic focus group,2006,Military Industry,5786 +88,8cC1bDa330a5871,Pineda-Morton,https://www.carr.com/,United States Virgin Islands,Grass-roots methodical info-mediaries,1991,Printing,6168 +89,ED889CB2FE9cbd3,Huang and Sons,https://www.bolton.com/,Eritrea,Re-contextualized dynamic hierarchy,1981,Semiconductors,7484 +90,F4Dc1417BC6cb8f,Gilbert-Simon,https://www.bradford.biz/,Burundi,Grass-roots radical parallelism,1973,Newspapers / Journalism,1927 +91,7ABc3c7ecA03B34,Sampson-Griffith,http://hendricks.org/,Benin,Multi-layered composite paradigm,1972,Textiles,3881 +92,4e0719FBE38e0aB,Miles-Dominguez,http://www.turner.com/,Gibraltar,Organized empowering forecast,1996,Civic / Social Organization,897 +93,dEbDAAeDfaed00A,Rowe and Sons,https://www.simpson.org/,El Salvador,Balanced multimedia knowledgebase,1978,Facilities Services,8172 +94,61BDeCfeFD0cEF5,"Valenzuela, Holmes and Rowland",https://www.dorsey.net/,Taiwan,Persistent tertiary focus group,1999,Transportation,1483 +95,4e91eD25f486110,"Best, Wade and Shepard",https://zimmerman.com/,Zimbabwe,Innovative background definition,1991,Gambling / Casinos,4873 +96,0a0bfFbBbB8eC7c,Holmes Group,https://mcdowell.org/,Ethiopia,Right-sized zero tolerance focus group,1975,Photography,2988 +97,BA6Cd9Dae2Efd62,Good Ltd,http://duffy.com/,Anguilla,Reverse-engineered composite moratorium,1971,Consumer Services,4292 +98,E7df80C60Abd7f9,Clements-Espinoza,http://www.flowers.net/,Falkland Islands (Malvinas),Progressive modular hub,1991,Broadcast Media,236 +99,AFc285dbE2fEd24,Mendez Inc,https://www.burke.net/,Kyrgyz Republic,User-friendly exuding migration,1993,Education Management,339 +100,e9eB5A60Cef8354,Watkins-Kaiser,http://www.herring.com/,Togo,Synergistic background access,2009,Financial Services,2785 diff --git a/applications/ColossalQA/data/tests/sample-pdf-file.pdf b/applications/ColossalQA/data/tests/sample-pdf-file.pdf new file mode 100644 index 000000000..4b6eea24d Binary files /dev/null and b/applications/ColossalQA/data/tests/sample-pdf-file.pdf differ diff --git a/applications/ColossalQA/data/tests/test.html b/applications/ColossalQA/data/tests/test.html new file mode 100644 index 000000000..5ad21421d --- /dev/null +++ b/applications/ColossalQA/data/tests/test.html @@ -0,0 +1,1970 @@ +<!DOCTYPE html> +<!-- saved from url=(0046)https://docs.python.org/3/library/logging.html --> +<html><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> + + <meta name="viewport" content="width=device-width, initial-scale=1.0"><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/"> +<meta property="og:title" content="logging — Logging facility for Python"> +<meta property="og:type" content="website"> +<meta property="og:url" content="https://docs.python.org/3/library/logging.html"> +<meta property="og:site_name" content="Python documentation"> +<meta property="og:description" content="Source code: Lib/logging/__init__.py Important: This page contains the API reference information. For tutorial information and discussion of more advanced topics, see Basic Tutorial, Advanced Tutor..."> +<meta property="og:image" content="https://docs.python.org/3/_static/og-image.png"> +<meta property="og:image:alt" content="Python documentation"> +<meta name="description" content="Source code: Lib/logging/__init__.py Important: This page contains the API reference information. For tutorial information and discussion of more advanced topics, see Basic Tutorial, Advanced Tutor..."> +<meta property="og:image:width" content="200"> +<meta property="og:image:height" content="200"> +<meta name="theme-color" content="#3776ab"> + + <title>logging — Logging facility for Python — Python 3.11.5 documentation</title><meta name="viewport" content="width=device-width, initial-scale=1.0"> + + <link rel="stylesheet" type="text/css" href="./test_files/pygments.css"> + <link rel="stylesheet" type="text/css" href="./test_files/pydoctheme.css"> + <link id="pygments_dark_css" media="(prefers-color-scheme: dark)" rel="stylesheet" type="text/css" href="./test_files/pygments_dark.css"> + + <script data-url_root="../" id="documentation_options" src="./test_files/documentation_options.js.download"></script> + <script src="./test_files/jquery.js.download"></script> + <script src="./test_files/underscore.js.download"></script> + <script src="./test_files/doctools.js.download"></script> + + <script src="./test_files/sidebar.js.download"></script> + + <link rel="search" type="application/opensearchdescription+xml" title="Search within Python 3.11.5 documentation" href="https://docs.python.org/3/_static/opensearch.xml"> + <link rel="author" title="About these documents" href="https://docs.python.org/3/about.html"> + <link rel="index" title="Index" href="https://docs.python.org/3/genindex.html"> + <link rel="search" title="Search" href="https://docs.python.org/3/search.html"> + <link rel="copyright" title="Copyright" href="https://docs.python.org/3/copyright.html"> + <link rel="next" title="logging.config — Logging configuration" href="https://docs.python.org/3/library/logging.config.html"> + <link rel="prev" title="getopt — C-style parser for command line options" href="https://docs.python.org/3/library/getopt.html"> + <link rel="canonical" href="https://docs.python.org/3/library/logging.html"> + + + + + + <style> + @media only screen { + table.full-width-table { + width: 100%; + } + } + </style> +<link rel="stylesheet" href="./test_files/pydoctheme_dark.css" media="(prefers-color-scheme: dark)" id="pydoctheme_dark_css"> + <link rel="shortcut icon" type="image/png" href="./test_files/py.svg"> + <script type="text/javascript" src="./test_files/copybutton.js.download"></script> + <script type="text/javascript" src="./test_files/menu.js.download"></script> + <script type="text/javascript" src="./test_files/themetoggle.js.download"></script> + + </head> +<body data-new-gr-c-s-check-loaded="14.1038.0" data-gr-ext-installed=""> +<div class="mobile-nav"> + <input type="checkbox" id="menuToggler" class="toggler__input" aria-controls="navigation" aria-pressed="false" aria-expanded="false" role="button" aria-label="Menu"> + <nav class="nav-content" role="navigation"> + <label for="menuToggler" class="toggler__label"> + <span></span> + </label> + <span class="nav-items-wrapper"> + <a href="https://www.python.org/" class="nav-logo"> + <img src="./test_files/py.svg" alt="Logo"> + </a> + <span class="version_switcher_placeholder"><select id="version_select"><option value="3.13">dev (3.13)</option><option value="3.12">pre (3.12)</option><option value="3.11" selected="selected">3.11.5</option><option value="3.10">3.10</option><option value="3.9">3.9</option><option value="3.8">3.8</option><option value="3.7">3.7</option><option value="3.6">3.6</option><option value="3.5">3.5</option><option value="2.7">2.7</option></select></span> + <form role="search" class="search" action="https://docs.python.org/3/search.html" method="get"> + <svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" class="search-icon"> + <path fill-rule="nonzero" fill="currentColor" d="M15.5 14h-.79l-.28-.27a6.5 6.5 0 001.48-5.34c-.47-2.78-2.79-5-5.59-5.34a6.505 6.505 0 00-7.27 7.27c.34 2.8 2.56 5.12 5.34 5.59a6.5 6.5 0 005.34-1.48l.27.28v.79l4.25 4.25c.41.41 1.08.41 1.49 0 .41-.41.41-1.08 0-1.49L15.5 14zm-6 0C7.01 14 5 11.99 5 9.5S7.01 5 9.5 5 14 7.01 14 9.5 11.99 14 9.5 14z"></path> + </svg> + <input placeholder="Quick search" aria-label="Quick search" type="search" name="q"> + <input type="submit" value="Go"> + </form> + </span> + </nav> + <div class="menu-wrapper"> + <nav class="menu" role="navigation" aria-label="main navigation" tabindex="-1"> + <div class="language_switcher_placeholder"><select id="language_select"><option value="en" selected="selected">English</option><option value="es">Spanish</option><option value="fr">French</option><option value="ja">Japanese</option><option value="ko">Korean</option><option value="pt-br">Brazilian Portuguese</option><option value="tr">Turkish</option><option value="zh-cn">Simplified Chinese</option><option value="zh-tw">Traditional Chinese</option></select></div> + +<label class="theme-selector-label"> + Theme + <select class="theme-selector" oninput="activateTheme(this.value)"> + <option value="auto" selected="">Auto</option> + <option value="light">Light</option> + <option value="dark">Dark</option> + </select> +</label> + <div> + <h3><a href="https://docs.python.org/3/contents.html">Table of Contents</a></h3> + <ul> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#"><code class="xref py py-mod docutils literal notranslate"><span class="pre">logging</span></code> — Logging facility for Python</a><ul> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#logger-objects">Logger Objects</a></li> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging-levels">Logging Levels</a></li> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#handler-objects">Handler Objects</a></li> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#formatter-objects">Formatter Objects</a></li> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#filter-objects">Filter Objects</a></li> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#logrecord-objects">LogRecord Objects</a></li> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#logrecord-attributes">LogRecord attributes</a></li> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#loggeradapter-objects">LoggerAdapter Objects</a></li> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#thread-safety">Thread Safety</a></li> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#module-level-functions">Module-Level Functions</a></li> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#module-level-attributes">Module-Level Attributes</a></li> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#integration-with-the-warnings-module">Integration with the warnings module</a></li> +</ul> +</li> +</ul> + + </div> + <div> + <h4>Previous topic</h4> + <p class="topless"><a href="https://docs.python.org/3/library/getopt.html" title="previous chapter"><code class="xref py py-mod docutils literal notranslate"><span class="pre">getopt</span></code> — C-style parser for command line options</a></p> + </div> + <div> + <h4>Next topic</h4> + <p class="topless"><a href="https://docs.python.org/3/library/logging.config.html" title="next chapter"><code class="xref py py-mod docutils literal notranslate"><span class="pre">logging.config</span></code> — Logging configuration</a></p> + </div> + <div role="note" aria-label="source link"> + <h3>This Page</h3> + <ul class="this-page-menu"> + <li><a href="https://docs.python.org/3/bugs.html">Report a Bug</a></li> + <li> + <a href="https://github.com/python/cpython/blob/3.11/Doc/library/logging.rst" rel="nofollow">Show Source + </a> + </li> + </ul> + </div> + </nav> + </div> +</div> + + + <div class="related" role="navigation" aria-label="related navigation"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="https://docs.python.org/3/genindex.html" title="General Index" accesskey="I">index</a></li> + <li class="right"> + <a href="https://docs.python.org/3/py-modindex.html" title="Python Module Index">modules</a> |</li> + <li class="right"> + <a href="https://docs.python.org/3/library/logging.config.html" title="logging.config — Logging configuration" accesskey="N">next</a> |</li> + <li class="right"> + <a href="https://docs.python.org/3/library/getopt.html" title="getopt — C-style parser for command line options" accesskey="P">previous</a> |</li> + + <li><img src="./test_files/py.svg" alt="python logo" style="vertical-align: middle; margin-top: -1px"></li> + <li><a href="https://www.python.org/">Python</a> »</li> + <li class="switchers"> + <div class="language_switcher_placeholder"><select id="language_select"><option value="en" selected="selected">English</option><option value="es">Spanish</option><option value="fr">French</option><option value="ja">Japanese</option><option value="ko">Korean</option><option value="pt-br">Brazilian Portuguese</option><option value="tr">Turkish</option><option value="zh-cn">Simplified Chinese</option><option value="zh-tw">Traditional Chinese</option></select></div> + <div class="version_switcher_placeholder"><select id="version_select"><option value="3.13">dev (3.13)</option><option value="3.12">pre (3.12)</option><option value="3.11" selected="selected">3.11.5</option><option value="3.10">3.10</option><option value="3.9">3.9</option><option value="3.8">3.8</option><option value="3.7">3.7</option><option value="3.6">3.6</option><option value="3.5">3.5</option><option value="2.7">2.7</option></select></div> + </li> + <li> + + </li> + <li id="cpython-language-and-version"> + <a href="https://docs.python.org/3/index.html">3.11.5 Documentation</a> » + </li> + + <li class="nav-item nav-item-1"><a href="https://docs.python.org/3/library/index.html">The Python Standard Library</a> »</li> + <li class="nav-item nav-item-2"><a href="https://docs.python.org/3/library/allos.html" accesskey="U">Generic Operating System Services</a> »</li> + <li class="nav-item nav-item-this"><a href="https://docs.python.org/3/library/logging.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">logging</span></code> — Logging facility for Python</a></li> + <li class="right"> + + + <div class="inline-search" role="search"> + <form class="inline-search" action="https://docs.python.org/3/search.html" method="get"> + <input placeholder="Quick search" aria-label="Quick search" type="search" name="q"> + <input type="submit" value="Go"> + </form> + </div> + | + </li> + <li class="right"> +<label class="theme-selector-label"> + Theme + <select class="theme-selector" oninput="activateTheme(this.value)"> + <option value="auto" selected="">Auto</option> + <option value="light">Light</option> + <option value="dark">Dark</option> + </select> +</label> |</li> + + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <section id="module-logging"> +<span id="logging-logging-facility-for-python"></span><h1><a class="reference internal" href="https://docs.python.org/3/library/logging.html#module-logging" title="logging: Flexible event logging system for applications."><code class="xref py py-mod docutils literal notranslate"><span class="pre">logging</span></code></a> — Logging facility for Python<a class="headerlink" href="https://docs.python.org/3/library/logging.html#module-logging" title="Permalink to this headline">¶</a></h1> +<p><strong>Source code:</strong> <a class="reference external" href="https://github.com/python/cpython/tree/3.11/Lib/logging/__init__.py">Lib/logging/__init__.py</a></p> +<aside class="sidebar" id="index-0"> +<p class="sidebar-title">Important</p> +<p>This page contains the API reference information. For tutorial +information and discussion of more advanced topics, see</p> +<ul class="simple"> +<li><p><a class="reference internal" href="https://docs.python.org/3/howto/logging.html#logging-basic-tutorial"><span class="std std-ref">Basic Tutorial</span></a></p></li> +<li><p><a class="reference internal" href="https://docs.python.org/3/howto/logging.html#logging-advanced-tutorial"><span class="std std-ref">Advanced Tutorial</span></a></p></li> +<li><p><a class="reference internal" href="https://docs.python.org/3/howto/logging-cookbook.html#logging-cookbook"><span class="std std-ref">Logging Cookbook</span></a></p></li> +</ul> +</aside> +<hr class="docutils"> +<p>This module defines functions and classes which implement a flexible event +logging system for applications and libraries.</p> +<p>The key benefit of having the logging API provided by a standard library module +is that all Python modules can participate in logging, so your application log +can include your own messages integrated with messages from third-party +modules.</p> +<p>The simplest example:</p> +<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>>>> import logging +>>> logging.warning('Watch out!') +WARNING:root:Watch out! +</pre></div> +</div> +<p>The module provides a lot of functionality and flexibility. If you are +unfamiliar with logging, the best way to get to grips with it is to view the +tutorials (<strong>see the links above and on the right</strong>).</p> +<p>The basic classes defined by the module, together with their functions, are +listed below.</p> +<ul class="simple"> +<li><p>Loggers expose the interface that application code directly uses.</p></li> +<li><p>Handlers send the log records (created by loggers) to the appropriate +destination.</p></li> +<li><p>Filters provide a finer grained facility for determining which log records +to output.</p></li> +<li><p>Formatters specify the layout of log records in the final output.</p></li> +</ul> +<section id="logger-objects"> +<span id="logger"></span><h2>Logger Objects<a class="headerlink" href="https://docs.python.org/3/library/logging.html#logger-objects" title="Permalink to this headline">¶</a></h2> +<p>Loggers have the following attributes and methods. Note that Loggers should +<em>NEVER</em> be instantiated directly, but always through the module-level function +<code class="docutils literal notranslate"><span class="pre">logging.getLogger(name)</span></code>. Multiple calls to <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.getLogger" title="logging.getLogger"><code class="xref py py-func docutils literal notranslate"><span class="pre">getLogger()</span></code></a> with the same +name will always return a reference to the same Logger object.</p> +<p>The <code class="docutils literal notranslate"><span class="pre">name</span></code> is potentially a period-separated hierarchical value, like +<code class="docutils literal notranslate"><span class="pre">foo.bar.baz</span></code> (though it could also be just plain <code class="docutils literal notranslate"><span class="pre">foo</span></code>, for example). +Loggers that are further down in the hierarchical list are children of loggers +higher up in the list. For example, given a logger with a name of <code class="docutils literal notranslate"><span class="pre">foo</span></code>, +loggers with names of <code class="docutils literal notranslate"><span class="pre">foo.bar</span></code>, <code class="docutils literal notranslate"><span class="pre">foo.bar.baz</span></code>, and <code class="docutils literal notranslate"><span class="pre">foo.bam</span></code> are all +descendants of <code class="docutils literal notranslate"><span class="pre">foo</span></code>. The logger name hierarchy is analogous to the Python +package hierarchy, and identical to it if you organise your loggers on a +per-module basis using the recommended construction +<code class="docutils literal notranslate"><span class="pre">logging.getLogger(__name__)</span></code>. That’s because in a module, <code class="docutils literal notranslate"><span class="pre">__name__</span></code> +is the module’s name in the Python package namespace.</p> +<dl class="py class"> +<dt class="sig sig-object py" id="logging.Logger"> +<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">Logger</span></span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Logger" title="Permalink to this definition">¶</a></dt> +<dd><dl class="py attribute"> +<dt class="sig sig-object py" id="logging.Logger.propagate"> +<span class="sig-name descname"><span class="pre">propagate</span></span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Logger.propagate" title="Permalink to this definition">¶</a></dt> +<dd><p>If this attribute evaluates to true, events logged to this logger will be +passed to the handlers of higher level (ancestor) loggers, in addition to +any handlers attached to this logger. Messages are passed directly to the +ancestor loggers’ handlers - neither the level nor filters of the ancestor +loggers in question are considered.</p> +<p>If this evaluates to false, logging messages are not passed to the handlers +of ancestor loggers.</p> +<p>Spelling it out with an example: If the propagate attribute of the logger named +<code class="docutils literal notranslate"><span class="pre">A.B.C</span></code> evaluates to true, any event logged to <code class="docutils literal notranslate"><span class="pre">A.B.C</span></code> via a method call such as +<code class="docutils literal notranslate"><span class="pre">logging.getLogger('A.B.C').error(...)</span></code> will [subject to passing that logger’s +level and filter settings] be passed in turn to any handlers attached to loggers +named <code class="docutils literal notranslate"><span class="pre">A.B</span></code>, <code class="docutils literal notranslate"><span class="pre">A</span></code> and the root logger, after first being passed to any handlers +attached to <code class="docutils literal notranslate"><span class="pre">A.B.C</span></code>. If any logger in the chain <code class="docutils literal notranslate"><span class="pre">A.B.C</span></code>, <code class="docutils literal notranslate"><span class="pre">A.B</span></code>, <code class="docutils literal notranslate"><span class="pre">A</span></code> has its +<code class="docutils literal notranslate"><span class="pre">propagate</span></code> attribute set to false, then that is the last logger whose handlers +are offered the event to handle, and propagation stops at that point.</p> +<p>The constructor sets this attribute to <code class="docutils literal notranslate"><span class="pre">True</span></code>.</p> +<div class="admonition note"> +<p class="admonition-title">Note</p> +<p>If you attach a handler to a logger <em>and</em> one or more of its +ancestors, it may emit the same record multiple times. In general, you +should not need to attach a handler to more than one logger - if you just +attach it to the appropriate logger which is highest in the logger +hierarchy, then it will see all events logged by all descendant loggers, +provided that their propagate setting is left set to <code class="docutils literal notranslate"><span class="pre">True</span></code>. A common +scenario is to attach handlers only to the root logger, and to let +propagation take care of the rest.</p> +</div> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Logger.setLevel"> +<span class="sig-name descname"><span class="pre">setLevel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">level</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Logger.setLevel" title="Permalink to this definition">¶</a></dt> +<dd><p>Sets the threshold for this logger to <em>level</em>. Logging messages which are less +severe than <em>level</em> will be ignored; logging messages which have severity <em>level</em> +or higher will be emitted by whichever handler or handlers service this logger, +unless a handler’s level has been set to a higher severity level than <em>level</em>.</p> +<p>When a logger is created, the level is set to <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.NOTSET" title="logging.NOTSET"><code class="xref py py-const docutils literal notranslate"><span class="pre">NOTSET</span></code></a> (which causes +all messages to be processed when the logger is the root logger, or delegation +to the parent when the logger is a non-root logger). Note that the root logger +is created with level <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.WARNING" title="logging.WARNING"><code class="xref py py-const docutils literal notranslate"><span class="pre">WARNING</span></code></a>.</p> +<p>The term ‘delegation to the parent’ means that if a logger has a level of +NOTSET, its chain of ancestor loggers is traversed until either an ancestor with +a level other than NOTSET is found, or the root is reached.</p> +<p>If an ancestor is found with a level other than NOTSET, then that ancestor’s +level is treated as the effective level of the logger where the ancestor search +began, and is used to determine how a logging event is handled.</p> +<p>If the root is reached, and it has a level of NOTSET, then all messages will be +processed. Otherwise, the root’s level will be used as the effective level.</p> +<p>See <a class="reference internal" href="https://docs.python.org/3/library/logging.html#levels"><span class="std std-ref">Logging Levels</span></a> for a list of levels.</p> +<div class="versionchanged"> +<p><span class="versionmodified changed">Changed in version 3.2: </span>The <em>level</em> parameter now accepts a string representation of the +level such as ‘INFO’ as an alternative to the integer constants +such as <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.INFO" title="logging.INFO"><code class="xref py py-const docutils literal notranslate"><span class="pre">INFO</span></code></a>. Note, however, that levels are internally stored +as integers, and methods such as e.g. <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger.getEffectiveLevel" title="logging.Logger.getEffectiveLevel"><code class="xref py py-meth docutils literal notranslate"><span class="pre">getEffectiveLevel()</span></code></a> and +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger.isEnabledFor" title="logging.Logger.isEnabledFor"><code class="xref py py-meth docutils literal notranslate"><span class="pre">isEnabledFor()</span></code></a> will return/expect to be passed integers.</p> +</div> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Logger.isEnabledFor"> +<span class="sig-name descname"><span class="pre">isEnabledFor</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">level</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Logger.isEnabledFor" title="Permalink to this definition">¶</a></dt> +<dd><p>Indicates if a message of severity <em>level</em> would be processed by this logger. +This method checks first the module-level level set by +<code class="docutils literal notranslate"><span class="pre">logging.disable(level)</span></code> and then the logger’s effective level as determined +by <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger.getEffectiveLevel" title="logging.Logger.getEffectiveLevel"><code class="xref py py-meth docutils literal notranslate"><span class="pre">getEffectiveLevel()</span></code></a>.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Logger.getEffectiveLevel"> +<span class="sig-name descname"><span class="pre">getEffectiveLevel</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Logger.getEffectiveLevel" title="Permalink to this definition">¶</a></dt> +<dd><p>Indicates the effective level for this logger. If a value other than +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.NOTSET" title="logging.NOTSET"><code class="xref py py-const docutils literal notranslate"><span class="pre">NOTSET</span></code></a> has been set using <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger.setLevel" title="logging.Logger.setLevel"><code class="xref py py-meth docutils literal notranslate"><span class="pre">setLevel()</span></code></a>, it is returned. Otherwise, +the hierarchy is traversed towards the root until a value other than +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.NOTSET" title="logging.NOTSET"><code class="xref py py-const docutils literal notranslate"><span class="pre">NOTSET</span></code></a> is found, and that value is returned. The value returned is +an integer, typically one of <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.DEBUG" title="logging.DEBUG"><code class="xref py py-const docutils literal notranslate"><span class="pre">logging.DEBUG</span></code></a>, <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.INFO" title="logging.INFO"><code class="xref py py-const docutils literal notranslate"><span class="pre">logging.INFO</span></code></a> +etc.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Logger.getChild"> +<span class="sig-name descname"><span class="pre">getChild</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">suffix</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Logger.getChild" title="Permalink to this definition">¶</a></dt> +<dd><p>Returns a logger which is a descendant to this logger, as determined by the suffix. +Thus, <code class="docutils literal notranslate"><span class="pre">logging.getLogger('abc').getChild('def.ghi')</span></code> would return the same +logger as would be returned by <code class="docutils literal notranslate"><span class="pre">logging.getLogger('abc.def.ghi')</span></code>. This is a +convenience method, useful when the parent logger is named using e.g. <code class="docutils literal notranslate"><span class="pre">__name__</span></code> +rather than a literal string.</p> +<div class="versionadded"> +<p><span class="versionmodified added">New in version 3.2.</span></p> +</div> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Logger.debug"> +<span class="sig-name descname"><span class="pre">debug</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">msg</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Logger.debug" title="Permalink to this definition">¶</a></dt> +<dd><p>Logs a message with level <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.DEBUG" title="logging.DEBUG"><code class="xref py py-const docutils literal notranslate"><span class="pre">DEBUG</span></code></a> on this logger. The <em>msg</em> is the +message format string, and the <em>args</em> are the arguments which are merged into +<em>msg</em> using the string formatting operator. (Note that this means that you can +use keywords in the format string, together with a single dictionary argument.) +No % formatting operation is performed on <em>msg</em> when no <em>args</em> are supplied.</p> +<p>There are four keyword arguments in <em>kwargs</em> which are inspected: +<em>exc_info</em>, <em>stack_info</em>, <em>stacklevel</em> and <em>extra</em>.</p> +<p>If <em>exc_info</em> does not evaluate as false, it causes exception information to be +added to the logging message. If an exception tuple (in the format returned by +<a class="reference internal" href="https://docs.python.org/3/library/sys.html#sys.exc_info" title="sys.exc_info"><code class="xref py py-func docutils literal notranslate"><span class="pre">sys.exc_info()</span></code></a>) or an exception instance is provided, it is used; +otherwise, <a class="reference internal" href="https://docs.python.org/3/library/sys.html#sys.exc_info" title="sys.exc_info"><code class="xref py py-func docutils literal notranslate"><span class="pre">sys.exc_info()</span></code></a> is called to get the exception information.</p> +<p>The second optional keyword argument is <em>stack_info</em>, which defaults to +<code class="docutils literal notranslate"><span class="pre">False</span></code>. If true, stack information is added to the logging +message, including the actual logging call. Note that this is not the same +stack information as that displayed through specifying <em>exc_info</em>: The +former is stack frames from the bottom of the stack up to the logging call +in the current thread, whereas the latter is information about stack frames +which have been unwound, following an exception, while searching for +exception handlers.</p> +<p>You can specify <em>stack_info</em> independently of <em>exc_info</em>, e.g. to just show +how you got to a certain point in your code, even when no exceptions were +raised. The stack frames are printed following a header line which says:</p> +<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>Stack (most recent call last): +</pre></div> +</div> +<p>This mimics the <code class="docutils literal notranslate"><span class="pre">Traceback</span> <span class="pre">(most</span> <span class="pre">recent</span> <span class="pre">call</span> <span class="pre">last):</span></code> which is used when +displaying exception frames.</p> +<p>The third optional keyword argument is <em>stacklevel</em>, which defaults to <code class="docutils literal notranslate"><span class="pre">1</span></code>. +If greater than 1, the corresponding number of stack frames are skipped +when computing the line number and function name set in the <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LogRecord" title="logging.LogRecord"><code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code></a> +created for the logging event. This can be used in logging helpers so that +the function name, filename and line number recorded are not the information +for the helper function/method, but rather its caller. The name of this +parameter mirrors the equivalent one in the <a class="reference internal" href="https://docs.python.org/3/library/warnings.html#module-warnings" title="warnings: Issue warning messages and control their disposition."><code class="xref py py-mod docutils literal notranslate"><span class="pre">warnings</span></code></a> module.</p> +<p>The fourth keyword argument is <em>extra</em> which can be used to pass a +dictionary which is used to populate the __dict__ of the <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LogRecord" title="logging.LogRecord"><code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code></a> +created for the logging event with user-defined attributes. These custom +attributes can then be used as you like. For example, they could be +incorporated into logged messages. For example:</p> +<div class="highlight-python3 notranslate"><div class="highlight" style="position: relative;"><pre><span></span><span class="n">FORMAT</span> <span class="o">=</span> <span class="s1">'</span><span class="si">%(asctime)s</span><span class="s1"> </span><span class="si">%(clientip)-15s</span><span class="s1"> </span><span class="si">%(user)-8s</span><span class="s1"> </span><span class="si">%(message)s</span><span class="s1">'</span> +<span class="n">logging</span><span class="o">.</span><span class="n">basicConfig</span><span class="p">(</span><span class="nb">format</span><span class="o">=</span><span class="n">FORMAT</span><span class="p">)</span> +<span class="n">d</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'clientip'</span><span class="p">:</span> <span class="s1">'192.168.0.1'</span><span class="p">,</span> <span class="s1">'user'</span><span class="p">:</span> <span class="s1">'fbloggs'</span><span class="p">}</span> +<span class="n">logger</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">(</span><span class="s1">'tcpserver'</span><span class="p">)</span> +<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s1">'Protocol problem: </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="s1">'connection reset'</span><span class="p">,</span> <span class="n">extra</span><span class="o">=</span><span class="n">d</span><span class="p">)</span> +</pre></div> +</div> +<p>would print something like</p> +<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>2006-02-08 22:20:02,165 192.168.0.1 fbloggs Protocol problem: connection reset +</pre></div> +</div> +<p>The keys in the dictionary passed in <em>extra</em> should not clash with the keys used +by the logging system. (See the section on <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logrecord-attributes"><span class="std std-ref">LogRecord attributes</span></a> for more +information on which keys are used by the logging system.)</p> +<p>If you choose to use these attributes in logged messages, you need to exercise +some care. In the above example, for instance, the <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Formatter" title="logging.Formatter"><code class="xref py py-class docutils literal notranslate"><span class="pre">Formatter</span></code></a> has been +set up with a format string which expects ‘clientip’ and ‘user’ in the attribute +dictionary of the <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LogRecord" title="logging.LogRecord"><code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code></a>. If these are missing, the message will +not be logged because a string formatting exception will occur. So in this case, +you always need to pass the <em>extra</em> dictionary with these keys.</p> +<p>While this might be annoying, this feature is intended for use in specialized +circumstances, such as multi-threaded servers where the same code executes in +many contexts, and interesting conditions which arise are dependent on this +context (such as remote client IP address and authenticated user name, in the +above example). In such circumstances, it is likely that specialized +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Formatter" title="logging.Formatter"><code class="xref py py-class docutils literal notranslate"><span class="pre">Formatter</span></code></a>s would be used with particular <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Handler" title="logging.Handler"><code class="xref py py-class docutils literal notranslate"><span class="pre">Handler</span></code></a>s.</p> +<p>If no handler is attached to this logger (or any of its ancestors, +taking into account the relevant <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger.propagate" title="logging.Logger.propagate"><code class="xref py py-attr docutils literal notranslate"><span class="pre">Logger.propagate</span></code></a> attributes), +the message will be sent to the handler set on <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.lastResort" title="logging.lastResort"><code class="xref py py-attr docutils literal notranslate"><span class="pre">lastResort</span></code></a>.</p> +<div class="versionchanged"> +<p><span class="versionmodified changed">Changed in version 3.2: </span>The <em>stack_info</em> parameter was added.</p> +</div> +<div class="versionchanged"> +<p><span class="versionmodified changed">Changed in version 3.5: </span>The <em>exc_info</em> parameter can now accept exception instances.</p> +</div> +<div class="versionchanged"> +<p><span class="versionmodified changed">Changed in version 3.8: </span>The <em>stacklevel</em> parameter was added.</p> +</div> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Logger.info"> +<span class="sig-name descname"><span class="pre">info</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">msg</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Logger.info" title="Permalink to this definition">¶</a></dt> +<dd><p>Logs a message with level <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.INFO" title="logging.INFO"><code class="xref py py-const docutils literal notranslate"><span class="pre">INFO</span></code></a> on this logger. The arguments are +interpreted as for <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.debug" title="logging.debug"><code class="xref py py-meth docutils literal notranslate"><span class="pre">debug()</span></code></a>.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Logger.warning"> +<span class="sig-name descname"><span class="pre">warning</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">msg</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Logger.warning" title="Permalink to this definition">¶</a></dt> +<dd><p>Logs a message with level <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.WARNING" title="logging.WARNING"><code class="xref py py-const docutils literal notranslate"><span class="pre">WARNING</span></code></a> on this logger. The arguments are +interpreted as for <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.debug" title="logging.debug"><code class="xref py py-meth docutils literal notranslate"><span class="pre">debug()</span></code></a>.</p> +<div class="admonition note"> +<p class="admonition-title">Note</p> +<p>There is an obsolete method <code class="docutils literal notranslate"><span class="pre">warn</span></code> which is functionally +identical to <code class="docutils literal notranslate"><span class="pre">warning</span></code>. As <code class="docutils literal notranslate"><span class="pre">warn</span></code> is deprecated, please do not use +it - use <code class="docutils literal notranslate"><span class="pre">warning</span></code> instead.</p> +</div> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Logger.error"> +<span class="sig-name descname"><span class="pre">error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">msg</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Logger.error" title="Permalink to this definition">¶</a></dt> +<dd><p>Logs a message with level <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.ERROR" title="logging.ERROR"><code class="xref py py-const docutils literal notranslate"><span class="pre">ERROR</span></code></a> on this logger. The arguments are +interpreted as for <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.debug" title="logging.debug"><code class="xref py py-meth docutils literal notranslate"><span class="pre">debug()</span></code></a>.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Logger.critical"> +<span class="sig-name descname"><span class="pre">critical</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">msg</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Logger.critical" title="Permalink to this definition">¶</a></dt> +<dd><p>Logs a message with level <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.CRITICAL" title="logging.CRITICAL"><code class="xref py py-const docutils literal notranslate"><span class="pre">CRITICAL</span></code></a> on this logger. The arguments are +interpreted as for <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.debug" title="logging.debug"><code class="xref py py-meth docutils literal notranslate"><span class="pre">debug()</span></code></a>.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Logger.log"> +<span class="sig-name descname"><span class="pre">log</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">level</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">msg</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Logger.log" title="Permalink to this definition">¶</a></dt> +<dd><p>Logs a message with integer level <em>level</em> on this logger. The other arguments are +interpreted as for <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.debug" title="logging.debug"><code class="xref py py-meth docutils literal notranslate"><span class="pre">debug()</span></code></a>.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Logger.exception"> +<span class="sig-name descname"><span class="pre">exception</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">msg</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Logger.exception" title="Permalink to this definition">¶</a></dt> +<dd><p>Logs a message with level <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.ERROR" title="logging.ERROR"><code class="xref py py-const docutils literal notranslate"><span class="pre">ERROR</span></code></a> on this logger. The arguments are +interpreted as for <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.debug" title="logging.debug"><code class="xref py py-meth docutils literal notranslate"><span class="pre">debug()</span></code></a>. Exception info is added to the logging +message. This method should only be called from an exception handler.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Logger.addFilter"> +<span class="sig-name descname"><span class="pre">addFilter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">filter</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Logger.addFilter" title="Permalink to this definition">¶</a></dt> +<dd><p>Adds the specified filter <em>filter</em> to this logger.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Logger.removeFilter"> +<span class="sig-name descname"><span class="pre">removeFilter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">filter</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Logger.removeFilter" title="Permalink to this definition">¶</a></dt> +<dd><p>Removes the specified filter <em>filter</em> from this logger.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Logger.filter"> +<span class="sig-name descname"><span class="pre">filter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">record</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Logger.filter" title="Permalink to this definition">¶</a></dt> +<dd><p>Apply this logger’s filters to the record and return <code class="docutils literal notranslate"><span class="pre">True</span></code> if the +record is to be processed. The filters are consulted in turn, until one of +them returns a false value. If none of them return a false value, the record +will be processed (passed to handlers). If one returns a false value, no +further processing of the record occurs.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Logger.addHandler"> +<span class="sig-name descname"><span class="pre">addHandler</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">hdlr</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Logger.addHandler" title="Permalink to this definition">¶</a></dt> +<dd><p>Adds the specified handler <em>hdlr</em> to this logger.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Logger.removeHandler"> +<span class="sig-name descname"><span class="pre">removeHandler</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">hdlr</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Logger.removeHandler" title="Permalink to this definition">¶</a></dt> +<dd><p>Removes the specified handler <em>hdlr</em> from this logger.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Logger.findCaller"> +<span class="sig-name descname"><span class="pre">findCaller</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">stack_info</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">stacklevel</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Logger.findCaller" title="Permalink to this definition">¶</a></dt> +<dd><p>Finds the caller’s source filename and line number. Returns the filename, line +number, function name and stack information as a 4-element tuple. The stack +information is returned as <code class="docutils literal notranslate"><span class="pre">None</span></code> unless <em>stack_info</em> is <code class="docutils literal notranslate"><span class="pre">True</span></code>.</p> +<p>The <em>stacklevel</em> parameter is passed from code calling the <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.debug" title="logging.debug"><code class="xref py py-meth docutils literal notranslate"><span class="pre">debug()</span></code></a> +and other APIs. If greater than 1, the excess is used to skip stack frames +before determining the values to be returned. This will generally be useful +when calling logging APIs from helper/wrapper code, so that the information +in the event log refers not to the helper/wrapper code, but to the code that +calls it.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Logger.handle"> +<span class="sig-name descname"><span class="pre">handle</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">record</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Logger.handle" title="Permalink to this definition">¶</a></dt> +<dd><p>Handles a record by passing it to all handlers associated with this logger and +its ancestors (until a false value of <em>propagate</em> is found). This method is used +for unpickled records received from a socket, as well as those created locally. +Logger-level filtering is applied using <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger.filter" title="logging.Logger.filter"><code class="xref py py-meth docutils literal notranslate"><span class="pre">filter()</span></code></a>.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Logger.makeRecord"> +<span class="sig-name descname"><span class="pre">makeRecord</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">level</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">fn</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">lno</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">msg</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">exc_info</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">func</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">extra</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">sinfo</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Logger.makeRecord" title="Permalink to this definition">¶</a></dt> +<dd><p>This is a factory method which can be overridden in subclasses to create +specialized <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LogRecord" title="logging.LogRecord"><code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code></a> instances.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Logger.hasHandlers"> +<span class="sig-name descname"><span class="pre">hasHandlers</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Logger.hasHandlers" title="Permalink to this definition">¶</a></dt> +<dd><p>Checks to see if this logger has any handlers configured. This is done by +looking for handlers in this logger and its parents in the logger hierarchy. +Returns <code class="docutils literal notranslate"><span class="pre">True</span></code> if a handler was found, else <code class="docutils literal notranslate"><span class="pre">False</span></code>. The method stops searching +up the hierarchy whenever a logger with the ‘propagate’ attribute set to +false is found - that will be the last logger which is checked for the +existence of handlers.</p> +<div class="versionadded"> +<p><span class="versionmodified added">New in version 3.2.</span></p> +</div> +</dd></dl> + +<div class="versionchanged"> +<p><span class="versionmodified changed">Changed in version 3.7: </span>Loggers can now be pickled and unpickled.</p> +</div> +</dd></dl> + +</section> +<section id="logging-levels"> +<span id="levels"></span><h2>Logging Levels<a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging-levels" title="Permalink to this headline">¶</a></h2> +<p>The numeric values of logging levels are given in the following table. These are +primarily of interest if you want to define your own levels, and need them to +have specific values relative to the predefined levels. If you define a level +with the same numeric value, it overwrites the predefined value; the predefined +name is lost.</p> +<div class="responsive-table__container"><table class="docutils align-default"> +<colgroup> +<col style="width: 31%"> +<col style="width: 20%"> +<col style="width: 49%"> +</colgroup> +<thead> +<tr class="row-odd"><th class="head"><p>Level</p></th> +<th class="head"><p>Numeric value</p></th> +<th class="head"><p>What it means / When to use it</p></th> +</tr> +</thead> +<tbody> +<tr class="row-even"><td><dl class="py data"> +<dt class="sig sig-object py" id="logging.NOTSET"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">NOTSET</span></span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.NOTSET" title="Permalink to this definition">¶</a></dt> +<dd></dd></dl> + +</td> +<td><p>0</p></td> +<td><p>When set on a logger, indicates that +ancestor loggers are to be consulted +to determine the effective level. +If that still resolves to +<code class="xref py py-const docutils literal notranslate"><span class="pre">NOTSET</span></code>, then all events +are logged. When set on a handler, +all events are handled.</p></td> +</tr> +<tr class="row-odd"><td><dl class="py data"> +<dt class="sig sig-object py" id="logging.DEBUG"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">DEBUG</span></span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.DEBUG" title="Permalink to this definition">¶</a></dt> +<dd></dd></dl> + +</td> +<td><p>10</p></td> +<td><p>Detailed information, typically only +of interest to a developer trying to +diagnose a problem.</p></td> +</tr> +<tr class="row-even"><td><dl class="py data"> +<dt class="sig sig-object py" id="logging.INFO"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">INFO</span></span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.INFO" title="Permalink to this definition">¶</a></dt> +<dd></dd></dl> + +</td> +<td><p>20</p></td> +<td><p>Confirmation that things are working +as expected.</p></td> +</tr> +<tr class="row-odd"><td><dl class="py data"> +<dt class="sig sig-object py" id="logging.WARNING"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">WARNING</span></span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.WARNING" title="Permalink to this definition">¶</a></dt> +<dd></dd></dl> + +</td> +<td><p>30</p></td> +<td><p>An indication that something +unexpected happened, or that a +problem might occur in the near +future (e.g. ‘disk space low’). The +software is still working as +expected.</p></td> +</tr> +<tr class="row-even"><td><dl class="py data"> +<dt class="sig sig-object py" id="logging.ERROR"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">ERROR</span></span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.ERROR" title="Permalink to this definition">¶</a></dt> +<dd></dd></dl> + +</td> +<td><p>40</p></td> +<td><p>Due to a more serious problem, the +software has not been able to +perform some function.</p></td> +</tr> +<tr class="row-odd"><td><dl class="py data"> +<dt class="sig sig-object py" id="logging.CRITICAL"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">CRITICAL</span></span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.CRITICAL" title="Permalink to this definition">¶</a></dt> +<dd></dd></dl> + +</td> +<td><p>50</p></td> +<td><p>A serious error, indicating that the +program itself may be unable to +continue running.</p></td> +</tr> +</tbody> +</table></div> +</section> +<section id="handler-objects"> +<span id="handler"></span><h2>Handler Objects<a class="headerlink" href="https://docs.python.org/3/library/logging.html#handler-objects" title="Permalink to this headline">¶</a></h2> +<p>Handlers have the following attributes and methods. Note that <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Handler" title="logging.Handler"><code class="xref py py-class docutils literal notranslate"><span class="pre">Handler</span></code></a> +is never instantiated directly; this class acts as a base for more useful +subclasses. However, the <code class="xref py py-meth docutils literal notranslate"><span class="pre">__init__()</span></code> method in subclasses needs to call +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Handler.__init__" title="logging.Handler.__init__"><code class="xref py py-meth docutils literal notranslate"><span class="pre">Handler.__init__()</span></code></a>.</p> +<dl class="py class"> +<dt class="sig sig-object py" id="logging.Handler"> +<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">Handler</span></span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Handler" title="Permalink to this definition">¶</a></dt> +<dd><dl class="py method"> +<dt class="sig sig-object py" id="logging.Handler.__init__"> +<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">level</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">NOTSET</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Handler.__init__" title="Permalink to this definition">¶</a></dt> +<dd><p>Initializes the <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Handler" title="logging.Handler"><code class="xref py py-class docutils literal notranslate"><span class="pre">Handler</span></code></a> instance by setting its level, setting the list +of filters to the empty list and creating a lock (using <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Handler.createLock" title="logging.Handler.createLock"><code class="xref py py-meth docutils literal notranslate"><span class="pre">createLock()</span></code></a>) for +serializing access to an I/O mechanism.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Handler.createLock"> +<span class="sig-name descname"><span class="pre">createLock</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Handler.createLock" title="Permalink to this definition">¶</a></dt> +<dd><p>Initializes a thread lock which can be used to serialize access to underlying +I/O functionality which may not be threadsafe.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Handler.acquire"> +<span class="sig-name descname"><span class="pre">acquire</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Handler.acquire" title="Permalink to this definition">¶</a></dt> +<dd><p>Acquires the thread lock created with <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Handler.createLock" title="logging.Handler.createLock"><code class="xref py py-meth docutils literal notranslate"><span class="pre">createLock()</span></code></a>.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Handler.release"> +<span class="sig-name descname"><span class="pre">release</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Handler.release" title="Permalink to this definition">¶</a></dt> +<dd><p>Releases the thread lock acquired with <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Handler.acquire" title="logging.Handler.acquire"><code class="xref py py-meth docutils literal notranslate"><span class="pre">acquire()</span></code></a>.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Handler.setLevel"> +<span class="sig-name descname"><span class="pre">setLevel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">level</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Handler.setLevel" title="Permalink to this definition">¶</a></dt> +<dd><p>Sets the threshold for this handler to <em>level</em>. Logging messages which are +less severe than <em>level</em> will be ignored. When a handler is created, the +level is set to <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.NOTSET" title="logging.NOTSET"><code class="xref py py-const docutils literal notranslate"><span class="pre">NOTSET</span></code></a> (which causes all messages to be +processed).</p> +<p>See <a class="reference internal" href="https://docs.python.org/3/library/logging.html#levels"><span class="std std-ref">Logging Levels</span></a> for a list of levels.</p> +<div class="versionchanged"> +<p><span class="versionmodified changed">Changed in version 3.2: </span>The <em>level</em> parameter now accepts a string representation of the +level such as ‘INFO’ as an alternative to the integer constants +such as <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.INFO" title="logging.INFO"><code class="xref py py-const docutils literal notranslate"><span class="pre">INFO</span></code></a>.</p> +</div> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Handler.setFormatter"> +<span class="sig-name descname"><span class="pre">setFormatter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">fmt</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Handler.setFormatter" title="Permalink to this definition">¶</a></dt> +<dd><p>Sets the <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Formatter" title="logging.Formatter"><code class="xref py py-class docutils literal notranslate"><span class="pre">Formatter</span></code></a> for this handler to <em>fmt</em>.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Handler.addFilter"> +<span class="sig-name descname"><span class="pre">addFilter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">filter</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Handler.addFilter" title="Permalink to this definition">¶</a></dt> +<dd><p>Adds the specified filter <em>filter</em> to this handler.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Handler.removeFilter"> +<span class="sig-name descname"><span class="pre">removeFilter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">filter</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Handler.removeFilter" title="Permalink to this definition">¶</a></dt> +<dd><p>Removes the specified filter <em>filter</em> from this handler.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Handler.filter"> +<span class="sig-name descname"><span class="pre">filter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">record</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Handler.filter" title="Permalink to this definition">¶</a></dt> +<dd><p>Apply this handler’s filters to the record and return <code class="docutils literal notranslate"><span class="pre">True</span></code> if the +record is to be processed. The filters are consulted in turn, until one of +them returns a false value. If none of them return a false value, the record +will be emitted. If one returns a false value, the handler will not emit the +record.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Handler.flush"> +<span class="sig-name descname"><span class="pre">flush</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Handler.flush" title="Permalink to this definition">¶</a></dt> +<dd><p>Ensure all logging output has been flushed. This version does nothing and is +intended to be implemented by subclasses.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Handler.close"> +<span class="sig-name descname"><span class="pre">close</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Handler.close" title="Permalink to this definition">¶</a></dt> +<dd><p>Tidy up any resources used by the handler. This version does no output but +removes the handler from an internal list of handlers which is closed when +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.shutdown" title="logging.shutdown"><code class="xref py py-func docutils literal notranslate"><span class="pre">shutdown()</span></code></a> is called. Subclasses should ensure that this gets called +from overridden <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Handler.close" title="logging.Handler.close"><code class="xref py py-meth docutils literal notranslate"><span class="pre">close()</span></code></a> methods.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Handler.handle"> +<span class="sig-name descname"><span class="pre">handle</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">record</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Handler.handle" title="Permalink to this definition">¶</a></dt> +<dd><p>Conditionally emits the specified logging record, depending on filters which may +have been added to the handler. Wraps the actual emission of the record with +acquisition/release of the I/O thread lock.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Handler.handleError"> +<span class="sig-name descname"><span class="pre">handleError</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">record</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Handler.handleError" title="Permalink to this definition">¶</a></dt> +<dd><p>This method should be called from handlers when an exception is encountered +during an <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Handler.emit" title="logging.Handler.emit"><code class="xref py py-meth docutils literal notranslate"><span class="pre">emit()</span></code></a> call. If the module-level attribute +<code class="docutils literal notranslate"><span class="pre">raiseExceptions</span></code> is <code class="docutils literal notranslate"><span class="pre">False</span></code>, exceptions get silently ignored. This is +what is mostly wanted for a logging system - most users will not care about +errors in the logging system, they are more interested in application +errors. You could, however, replace this with a custom handler if you wish. +The specified record is the one which was being processed when the exception +occurred. (The default value of <code class="docutils literal notranslate"><span class="pre">raiseExceptions</span></code> is <code class="docutils literal notranslate"><span class="pre">True</span></code>, as that is +more useful during development).</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Handler.format"> +<span class="sig-name descname"><span class="pre">format</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">record</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Handler.format" title="Permalink to this definition">¶</a></dt> +<dd><p>Do formatting for a record - if a formatter is set, use it. Otherwise, use the +default formatter for the module.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Handler.emit"> +<span class="sig-name descname"><span class="pre">emit</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">record</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Handler.emit" title="Permalink to this definition">¶</a></dt> +<dd><p>Do whatever it takes to actually log the specified logging record. This version +is intended to be implemented by subclasses and so raises a +<a class="reference internal" href="https://docs.python.org/3/library/exceptions.html#NotImplementedError" title="NotImplementedError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">NotImplementedError</span></code></a>.</p> +<div class="admonition warning"> +<p class="admonition-title">Warning</p> +<p>This method is called after a handler-level lock is acquired, which +is released after this method returns. When you override this method, note +that you should be careful when calling anything that invokes other parts of +the logging API which might do locking, because that might result in a +deadlock. Specifically:</p> +<ul class="simple"> +<li><p>Logging configuration APIs acquire the module-level lock, and then +individual handler-level locks as those handlers are configured.</p></li> +<li><p>Many logging APIs lock the module-level lock. If such an API is called +from this method, it could cause a deadlock if a configuration call is +made on another thread, because that thread will try to acquire the +module-level lock <em>before</em> the handler-level lock, whereas this thread +tries to acquire the module-level lock <em>after</em> the handler-level lock +(because in this method, the handler-level lock has already been acquired).</p></li> +</ul> +</div> +</dd></dl> + +</dd></dl> + +<p>For a list of handlers included as standard, see <a class="reference internal" href="https://docs.python.org/3/library/logging.handlers.html#module-logging.handlers" title="logging.handlers: Handlers for the logging module."><code class="xref py py-mod docutils literal notranslate"><span class="pre">logging.handlers</span></code></a>.</p> +</section> +<section id="formatter-objects"> +<span id="id1"></span><h2>Formatter Objects<a class="headerlink" href="https://docs.python.org/3/library/logging.html#formatter-objects" title="Permalink to this headline">¶</a></h2> +<p><a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Formatter" title="logging.Formatter"><code class="xref py py-class docutils literal notranslate"><span class="pre">Formatter</span></code></a> objects have the following attributes and methods. They are +responsible for converting a <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LogRecord" title="logging.LogRecord"><code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code></a> to (usually) a string which can +be interpreted by either a human or an external system. The base +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Formatter" title="logging.Formatter"><code class="xref py py-class docutils literal notranslate"><span class="pre">Formatter</span></code></a> allows a formatting string to be specified. If none is +supplied, the default value of <code class="docutils literal notranslate"><span class="pre">'%(message)s'</span></code> is used, which just includes +the message in the logging call. To have additional items of information in the +formatted output (such as a timestamp), keep reading.</p> +<p>A Formatter can be initialized with a format string which makes use of knowledge +of the <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LogRecord" title="logging.LogRecord"><code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code></a> attributes - such as the default value mentioned above +making use of the fact that the user’s message and arguments are pre-formatted +into a <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LogRecord" title="logging.LogRecord"><code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code></a>’s <em>message</em> attribute. This format string contains +standard Python %-style mapping keys. See section <a class="reference internal" href="https://docs.python.org/3/library/stdtypes.html#old-string-formatting"><span class="std std-ref">printf-style String Formatting</span></a> +for more information on string formatting.</p> +<p>The useful mapping keys in a <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LogRecord" title="logging.LogRecord"><code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code></a> are given in the section on +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logrecord-attributes"><span class="std std-ref">LogRecord attributes</span></a>.</p> +<dl class="py class"> +<dt class="sig sig-object py" id="logging.Formatter"> +<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">Formatter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">fmt</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">datefmt</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">style</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'%'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">validate</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">defaults</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Formatter" title="Permalink to this definition">¶</a></dt> +<dd><p>Returns a new instance of the <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Formatter" title="logging.Formatter"><code class="xref py py-class docutils literal notranslate"><span class="pre">Formatter</span></code></a> class. The instance is +initialized with a format string for the message as a whole, as well as a +format string for the date/time portion of a message. If no <em>fmt</em> is +specified, <code class="docutils literal notranslate"><span class="pre">'%(message)s'</span></code> is used. If no <em>datefmt</em> is specified, a format +is used which is described in the <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Formatter.formatTime" title="logging.Formatter.formatTime"><code class="xref py py-meth docutils literal notranslate"><span class="pre">formatTime()</span></code></a> documentation.</p> +<p>The <em>style</em> parameter can be one of ‘%’, ‘{’ or ‘$’ and determines how +the format string will be merged with its data: using one of %-formatting, +<a class="reference internal" href="https://docs.python.org/3/library/stdtypes.html#str.format" title="str.format"><code class="xref py py-meth docutils literal notranslate"><span class="pre">str.format()</span></code></a> or <a class="reference internal" href="https://docs.python.org/3/library/string.html#string.Template" title="string.Template"><code class="xref py py-class docutils literal notranslate"><span class="pre">string.Template</span></code></a>. This only applies to the +format string <em>fmt</em> (e.g. <code class="docutils literal notranslate"><span class="pre">'%(message)s'</span></code> or <code class="docutils literal notranslate"><span class="pre">{message}</span></code>), not to the +actual log messages passed to <code class="docutils literal notranslate"><span class="pre">Logger.debug</span></code> etc; see +<a class="reference internal" href="https://docs.python.org/3/howto/logging-cookbook.html#formatting-styles"><span class="std std-ref">Using particular formatting styles throughout your application</span></a> for more information on using {- and $-formatting +for log messages.</p> +<p>The <em>defaults</em> parameter can be a dictionary with default values to use in +custom fields. For example: +<code class="docutils literal notranslate"><span class="pre">logging.Formatter('%(ip)s</span> <span class="pre">%(message)s',</span> <span class="pre">defaults={"ip":</span> <span class="pre">None})</span></code></p> +<div class="versionchanged"> +<p><span class="versionmodified changed">Changed in version 3.2: </span>The <em>style</em> parameter was added.</p> +</div> +<div class="versionchanged"> +<p><span class="versionmodified changed">Changed in version 3.8: </span>The <em>validate</em> parameter was added. Incorrect or mismatched style and fmt +will raise a <code class="docutils literal notranslate"><span class="pre">ValueError</span></code>. +For example: <code class="docutils literal notranslate"><span class="pre">logging.Formatter('%(asctime)s</span> <span class="pre">-</span> <span class="pre">%(message)s',</span> <span class="pre">style='{')</span></code>.</p> +</div> +<div class="versionchanged"> +<p><span class="versionmodified changed">Changed in version 3.10: </span>The <em>defaults</em> parameter was added.</p> +</div> +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Formatter.format"> +<span class="sig-name descname"><span class="pre">format</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">record</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Formatter.format" title="Permalink to this definition">¶</a></dt> +<dd><p>The record’s attribute dictionary is used as the operand to a string +formatting operation. Returns the resulting string. Before formatting the +dictionary, a couple of preparatory steps are carried out. The <em>message</em> +attribute of the record is computed using <em>msg</em> % <em>args</em>. If the +formatting string contains <code class="docutils literal notranslate"><span class="pre">'(asctime)'</span></code>, <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Formatter.formatTime" title="logging.Formatter.formatTime"><code class="xref py py-meth docutils literal notranslate"><span class="pre">formatTime()</span></code></a> is called +to format the event time. If there is exception information, it is +formatted using <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Formatter.formatException" title="logging.Formatter.formatException"><code class="xref py py-meth docutils literal notranslate"><span class="pre">formatException()</span></code></a> and appended to the message. Note +that the formatted exception information is cached in attribute +<em>exc_text</em>. This is useful because the exception information can be +pickled and sent across the wire, but you should be careful if you have +more than one <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Formatter" title="logging.Formatter"><code class="xref py py-class docutils literal notranslate"><span class="pre">Formatter</span></code></a> subclass which customizes the formatting +of exception information. In this case, you will have to clear the cached +value (by setting the <em>exc_text</em> attribute to <code class="docutils literal notranslate"><span class="pre">None</span></code>) after a formatter +has done its formatting, so that the next formatter to handle the event +doesn’t use the cached value, but recalculates it afresh.</p> +<p>If stack information is available, it’s appended after the exception +information, using <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Formatter.formatStack" title="logging.Formatter.formatStack"><code class="xref py py-meth docutils literal notranslate"><span class="pre">formatStack()</span></code></a> to transform it if necessary.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Formatter.formatTime"> +<span class="sig-name descname"><span class="pre">formatTime</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">record</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">datefmt</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Formatter.formatTime" title="Permalink to this definition">¶</a></dt> +<dd><p>This method should be called from <a class="reference internal" href="https://docs.python.org/3/library/functions.html#format" title="format"><code class="xref py py-meth docutils literal notranslate"><span class="pre">format()</span></code></a> by a formatter which +wants to make use of a formatted time. This method can be overridden in +formatters to provide for any specific requirement, but the basic behavior +is as follows: if <em>datefmt</em> (a string) is specified, it is used with +<a class="reference internal" href="https://docs.python.org/3/library/time.html#time.strftime" title="time.strftime"><code class="xref py py-func docutils literal notranslate"><span class="pre">time.strftime()</span></code></a> to format the creation time of the +record. Otherwise, the format ‘%Y-%m-%d %H:%M:%S,uuu’ is used, where the +uuu part is a millisecond value and the other letters are as per the +<a class="reference internal" href="https://docs.python.org/3/library/time.html#time.strftime" title="time.strftime"><code class="xref py py-func docutils literal notranslate"><span class="pre">time.strftime()</span></code></a> documentation. An example time in this format is +<code class="docutils literal notranslate"><span class="pre">2003-01-23</span> <span class="pre">00:29:50,411</span></code>. The resulting string is returned.</p> +<p>This function uses a user-configurable function to convert the creation +time to a tuple. By default, <a class="reference internal" href="https://docs.python.org/3/library/time.html#time.localtime" title="time.localtime"><code class="xref py py-func docutils literal notranslate"><span class="pre">time.localtime()</span></code></a> is used; to change +this for a particular formatter instance, set the <code class="docutils literal notranslate"><span class="pre">converter</span></code> attribute +to a function with the same signature as <a class="reference internal" href="https://docs.python.org/3/library/time.html#time.localtime" title="time.localtime"><code class="xref py py-func docutils literal notranslate"><span class="pre">time.localtime()</span></code></a> or +<a class="reference internal" href="https://docs.python.org/3/library/time.html#time.gmtime" title="time.gmtime"><code class="xref py py-func docutils literal notranslate"><span class="pre">time.gmtime()</span></code></a>. To change it for all formatters, for example if you +want all logging times to be shown in GMT, set the <code class="docutils literal notranslate"><span class="pre">converter</span></code> +attribute in the <code class="docutils literal notranslate"><span class="pre">Formatter</span></code> class.</p> +<div class="versionchanged"> +<p><span class="versionmodified changed">Changed in version 3.3: </span>Previously, the default format was hard-coded as in this example: +<code class="docutils literal notranslate"><span class="pre">2010-09-06</span> <span class="pre">22:38:15,292</span></code> where the part before the comma is +handled by a strptime format string (<code class="docutils literal notranslate"><span class="pre">'%Y-%m-%d</span> <span class="pre">%H:%M:%S'</span></code>), and the +part after the comma is a millisecond value. Because strptime does not +have a format placeholder for milliseconds, the millisecond value is +appended using another format string, <code class="docutils literal notranslate"><span class="pre">'%s,%03d'</span></code> — and both of these +format strings have been hardcoded into this method. With the change, +these strings are defined as class-level attributes which can be +overridden at the instance level when desired. The names of the +attributes are <code class="docutils literal notranslate"><span class="pre">default_time_format</span></code> (for the strptime format string) +and <code class="docutils literal notranslate"><span class="pre">default_msec_format</span></code> (for appending the millisecond value).</p> +</div> +<div class="versionchanged"> +<p><span class="versionmodified changed">Changed in version 3.9: </span>The <code class="docutils literal notranslate"><span class="pre">default_msec_format</span></code> can be <code class="docutils literal notranslate"><span class="pre">None</span></code>.</p> +</div> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Formatter.formatException"> +<span class="sig-name descname"><span class="pre">formatException</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">exc_info</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Formatter.formatException" title="Permalink to this definition">¶</a></dt> +<dd><p>Formats the specified exception information (a standard exception tuple as +returned by <a class="reference internal" href="https://docs.python.org/3/library/sys.html#sys.exc_info" title="sys.exc_info"><code class="xref py py-func docutils literal notranslate"><span class="pre">sys.exc_info()</span></code></a>) as a string. This default implementation +just uses <a class="reference internal" href="https://docs.python.org/3/library/traceback.html#traceback.print_exception" title="traceback.print_exception"><code class="xref py py-func docutils literal notranslate"><span class="pre">traceback.print_exception()</span></code></a>. The resulting string is +returned.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Formatter.formatStack"> +<span class="sig-name descname"><span class="pre">formatStack</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">stack_info</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Formatter.formatStack" title="Permalink to this definition">¶</a></dt> +<dd><p>Formats the specified stack information (a string as returned by +<a class="reference internal" href="https://docs.python.org/3/library/traceback.html#traceback.print_stack" title="traceback.print_stack"><code class="xref py py-func docutils literal notranslate"><span class="pre">traceback.print_stack()</span></code></a>, but with the last newline removed) as a +string. This default implementation just returns the input value.</p> +</dd></dl> + +</dd></dl> + +<dl class="py class"> +<dt class="sig sig-object py" id="logging.BufferingFormatter"> +<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">BufferingFormatter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">linefmt</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.BufferingFormatter" title="Permalink to this definition">¶</a></dt> +<dd><p>A base formatter class suitable for subclassing when you want to format a +number of records. You can pass a <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Formatter" title="logging.Formatter"><code class="xref py py-class docutils literal notranslate"><span class="pre">Formatter</span></code></a> instance which you want +to use to format each line (that corresponds to a single record). If not +specified, the default formatter (which just outputs the event message) is +used as the line formatter.</p> +<dl class="py method"> +<dt class="sig sig-object py" id="logging.BufferingFormatter.formatHeader"> +<span class="sig-name descname"><span class="pre">formatHeader</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">records</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.BufferingFormatter.formatHeader" title="Permalink to this definition">¶</a></dt> +<dd><p>Return a header for a list of <em>records</em>. The base implementation just +returns the empty string. You will need to override this method if you +want specific behaviour, e.g. to show the count of records, a title or a +separator line.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.BufferingFormatter.formatFooter"> +<span class="sig-name descname"><span class="pre">formatFooter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">records</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.BufferingFormatter.formatFooter" title="Permalink to this definition">¶</a></dt> +<dd><p>Return a footer for a list of <em>records</em>. The base implementation just +returns the empty string. You will need to override this method if you +want specific behaviour, e.g. to show the count of records or a separator +line.</p> +</dd></dl> + +<dl class="py method"> +<dt class="sig sig-object py" id="logging.BufferingFormatter.format"> +<span class="sig-name descname"><span class="pre">format</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">records</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.BufferingFormatter.format" title="Permalink to this definition">¶</a></dt> +<dd><p>Return formatted text for a list of <em>records</em>. The base implementation +just returns the empty string if there are no records; otherwise, it +returns the concatenation of the header, each record formatted with the +line formatter, and the footer.</p> +</dd></dl> + +</dd></dl> + +</section> +<section id="filter-objects"> +<span id="filter"></span><h2>Filter Objects<a class="headerlink" href="https://docs.python.org/3/library/logging.html#filter-objects" title="Permalink to this headline">¶</a></h2> +<p><code class="docutils literal notranslate"><span class="pre">Filters</span></code> can be used by <code class="docutils literal notranslate"><span class="pre">Handlers</span></code> and <code class="docutils literal notranslate"><span class="pre">Loggers</span></code> for more sophisticated +filtering than is provided by levels. The base filter class only allows events +which are below a certain point in the logger hierarchy. For example, a filter +initialized with ‘A.B’ will allow events logged by loggers ‘A.B’, ‘A.B.C’, +‘A.B.C.D’, ‘A.B.D’ etc. but not ‘A.BB’, ‘B.A.B’ etc. If initialized with the +empty string, all events are passed.</p> +<dl class="py class"> +<dt class="sig sig-object py" id="logging.Filter"> +<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">Filter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">''</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Filter" title="Permalink to this definition">¶</a></dt> +<dd><p>Returns an instance of the <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Filter" title="logging.Filter"><code class="xref py py-class docutils literal notranslate"><span class="pre">Filter</span></code></a> class. If <em>name</em> is specified, it +names a logger which, together with its children, will have its events allowed +through the filter. If <em>name</em> is the empty string, allows every event.</p> +<dl class="py method"> +<dt class="sig sig-object py" id="logging.Filter.filter"> +<span class="sig-name descname"><span class="pre">filter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">record</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.Filter.filter" title="Permalink to this definition">¶</a></dt> +<dd><p>Is the specified record to be logged? Returns zero for no, nonzero for +yes. If deemed appropriate, the record may be modified in-place by this +method.</p> +</dd></dl> + +</dd></dl> + +<p>Note that filters attached to handlers are consulted before an event is +emitted by the handler, whereas filters attached to loggers are consulted +whenever an event is logged (using <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.debug" title="logging.debug"><code class="xref py py-meth docutils literal notranslate"><span class="pre">debug()</span></code></a>, <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.info" title="logging.info"><code class="xref py py-meth docutils literal notranslate"><span class="pre">info()</span></code></a>, +etc.), before sending an event to handlers. This means that events which have +been generated by descendant loggers will not be filtered by a logger’s filter +setting, unless the filter has also been applied to those descendant loggers.</p> +<p>You don’t actually need to subclass <code class="docutils literal notranslate"><span class="pre">Filter</span></code>: you can pass any instance +which has a <code class="docutils literal notranslate"><span class="pre">filter</span></code> method with the same semantics.</p> +<div class="versionchanged"> +<p><span class="versionmodified changed">Changed in version 3.2: </span>You don’t need to create specialized <code class="docutils literal notranslate"><span class="pre">Filter</span></code> classes, or use other +classes with a <code class="docutils literal notranslate"><span class="pre">filter</span></code> method: you can use a function (or other +callable) as a filter. The filtering logic will check to see if the filter +object has a <code class="docutils literal notranslate"><span class="pre">filter</span></code> attribute: if it does, it’s assumed to be a +<code class="docutils literal notranslate"><span class="pre">Filter</span></code> and its <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Filter.filter" title="logging.Filter.filter"><code class="xref py py-meth docutils literal notranslate"><span class="pre">filter()</span></code></a> method is called. Otherwise, it’s +assumed to be a callable and called with the record as the single +parameter. The returned value should conform to that returned by +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Filter.filter" title="logging.Filter.filter"><code class="xref py py-meth docutils literal notranslate"><span class="pre">filter()</span></code></a>.</p> +</div> +<p>Although filters are used primarily to filter records based on more +sophisticated criteria than levels, they get to see every record which is +processed by the handler or logger they’re attached to: this can be useful if +you want to do things like counting how many records were processed by a +particular logger or handler, or adding, changing or removing attributes in +the <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LogRecord" title="logging.LogRecord"><code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code></a> being processed. Obviously changing the LogRecord needs +to be done with some care, but it does allow the injection of contextual +information into logs (see <a class="reference internal" href="https://docs.python.org/3/howto/logging-cookbook.html#filters-contextual"><span class="std std-ref">Using Filters to impart contextual information</span></a>).</p> +</section> +<section id="logrecord-objects"> +<span id="log-record"></span><h2>LogRecord Objects<a class="headerlink" href="https://docs.python.org/3/library/logging.html#logrecord-objects" title="Permalink to this headline">¶</a></h2> +<p><a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LogRecord" title="logging.LogRecord"><code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code></a> instances are created automatically by the <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger" title="logging.Logger"><code class="xref py py-class docutils literal notranslate"><span class="pre">Logger</span></code></a> +every time something is logged, and can be created manually via +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.makeLogRecord" title="logging.makeLogRecord"><code class="xref py py-func docutils literal notranslate"><span class="pre">makeLogRecord()</span></code></a> (for example, from a pickled event received over the +wire).</p> +<dl class="py class"> +<dt class="sig sig-object py" id="logging.LogRecord"> +<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">LogRecord</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">level</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">pathname</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">lineno</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">msg</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">exc_info</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">func</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">sinfo</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.LogRecord" title="Permalink to this definition">¶</a></dt> +<dd><p>Contains all the information pertinent to the event being logged.</p> +<p>The primary information is passed in <em>msg</em> and <em>args</em>, +which are combined using <code class="docutils literal notranslate"><span class="pre">msg</span> <span class="pre">%</span> <span class="pre">args</span></code> to create +the <code class="xref py py-attr docutils literal notranslate"><span class="pre">message</span></code> attribute of the record.</p> +<dl class="field-list simple"> +<dt class="field-odd">Parameters</dt> +<dd class="field-odd"><ul class="simple"> +<li><p><strong>name</strong> (<a class="reference internal" href="https://docs.python.org/3/library/stdtypes.html#str" title="str"><em>str</em></a>) – The name of the logger used to log the event +represented by this <code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code>. +Note that the logger name in the <code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code> +will always have this value, +even though it may be emitted by a handler +attached to a different (ancestor) logger.</p></li> +<li><p><strong>level</strong> (<a class="reference internal" href="https://docs.python.org/3/library/functions.html#int" title="int"><em>int</em></a>) – The <a class="reference internal" href="https://docs.python.org/3/library/logging.html#levels"><span class="std std-ref">numeric level</span></a> of the logging event +(such as <code class="docutils literal notranslate"><span class="pre">10</span></code> for <code class="docutils literal notranslate"><span class="pre">DEBUG</span></code>, <code class="docutils literal notranslate"><span class="pre">20</span></code> for <code class="docutils literal notranslate"><span class="pre">INFO</span></code>, etc). +Note that this is converted to <em>two</em> attributes of the LogRecord: +<code class="xref py py-attr docutils literal notranslate"><span class="pre">levelno</span></code> for the numeric value +and <code class="xref py py-attr docutils literal notranslate"><span class="pre">levelname</span></code> for the corresponding level name.</p></li> +<li><p><strong>pathname</strong> (<a class="reference internal" href="https://docs.python.org/3/library/stdtypes.html#str" title="str"><em>str</em></a>) – The full string path of the source file +where the logging call was made.</p></li> +<li><p><strong>lineno</strong> (<a class="reference internal" href="https://docs.python.org/3/library/functions.html#int" title="int"><em>int</em></a>) – The line number in the source file +where the logging call was made.</p></li> +<li><p><strong>msg</strong> (<a class="reference internal" href="https://docs.python.org/3/library/typing.html#typing.Any" title="typing.Any"><em>Any</em></a>) – The event description message, +which can be a %-format string with placeholders for variable data, +or an arbitrary object (see <a class="reference internal" href="https://docs.python.org/3/howto/logging.html#arbitrary-object-messages"><span class="std std-ref">Using arbitrary objects as messages</span></a>).</p></li> +<li><p><strong>args</strong> (<a class="reference internal" href="https://docs.python.org/3/library/stdtypes.html#tuple" title="tuple"><em>tuple</em></a><em> | </em><a class="reference internal" href="https://docs.python.org/3/library/stdtypes.html#dict" title="dict"><em>dict</em></a><em>[</em><a class="reference internal" href="https://docs.python.org/3/library/stdtypes.html#str" title="str"><em>str</em></a><em>, </em><a class="reference internal" href="https://docs.python.org/3/library/typing.html#typing.Any" title="typing.Any"><em>Any</em></a><em>]</em>) – Variable data to merge into the <em>msg</em> argument +to obtain the event description.</p></li> +<li><p><strong>exc_info</strong> (<a class="reference internal" href="https://docs.python.org/3/library/stdtypes.html#tuple" title="tuple"><em>tuple</em></a><em>[</em><a class="reference internal" href="https://docs.python.org/3/library/functions.html#type" title="type"><em>type</em></a><em>[</em><a class="reference internal" href="https://docs.python.org/3/library/exceptions.html#BaseException" title="BaseException"><em>BaseException</em></a><em>]</em><em>, </em><a class="reference internal" href="https://docs.python.org/3/library/exceptions.html#BaseException" title="BaseException"><em>BaseException</em></a><em>, </em><a class="reference internal" href="https://docs.python.org/3/library/types.html#types.TracebackType" title="types.TracebackType"><em>types.TracebackType</em></a><em>] </em><em>| </em><em>None</em>) – An exception tuple with the current exception information, +as returned by <a class="reference internal" href="https://docs.python.org/3/library/sys.html#sys.exc_info" title="sys.exc_info"><code class="xref py py-func docutils literal notranslate"><span class="pre">sys.exc_info()</span></code></a>, +or <code class="docutils literal notranslate"><span class="pre">None</span></code> if no exception information is available.</p></li> +<li><p><strong>func</strong> (<a class="reference internal" href="https://docs.python.org/3/library/stdtypes.html#str" title="str"><em>str</em></a><em> | </em><em>None</em>) – The name of the function or method +from which the logging call was invoked.</p></li> +<li><p><strong>sinfo</strong> (<a class="reference internal" href="https://docs.python.org/3/library/stdtypes.html#str" title="str"><em>str</em></a><em> | </em><em>None</em>) – A text string representing stack information +from the base of the stack in the current thread, +up to the logging call.</p></li> +</ul> +</dd> +</dl> +<dl class="py method"> +<dt class="sig sig-object py" id="logging.LogRecord.getMessage"> +<span class="sig-name descname"><span class="pre">getMessage</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.LogRecord.getMessage" title="Permalink to this definition">¶</a></dt> +<dd><p>Returns the message for this <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LogRecord" title="logging.LogRecord"><code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code></a> instance after merging any +user-supplied arguments with the message. If the user-supplied message +argument to the logging call is not a string, <a class="reference internal" href="https://docs.python.org/3/library/stdtypes.html#str" title="str"><code class="xref py py-func docutils literal notranslate"><span class="pre">str()</span></code></a> is called on it to +convert it to a string. This allows use of user-defined classes as +messages, whose <code class="docutils literal notranslate"><span class="pre">__str__</span></code> method can return the actual format string to +be used.</p> +</dd></dl> + +<div class="versionchanged"> +<p><span class="versionmodified changed">Changed in version 3.2: </span>The creation of a <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LogRecord" title="logging.LogRecord"><code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code></a> has been made more configurable by +providing a factory which is used to create the record. The factory can be +set using <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.getLogRecordFactory" title="logging.getLogRecordFactory"><code class="xref py py-func docutils literal notranslate"><span class="pre">getLogRecordFactory()</span></code></a> and <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.setLogRecordFactory" title="logging.setLogRecordFactory"><code class="xref py py-func docutils literal notranslate"><span class="pre">setLogRecordFactory()</span></code></a> +(see this for the factory’s signature).</p> +</div> +<p>This functionality can be used to inject your own values into a +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LogRecord" title="logging.LogRecord"><code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code></a> at creation time. You can use the following pattern:</p> +<div class="highlight-python3 notranslate"><div class="highlight" style="position: relative;"><pre><span></span><span class="n">old_factory</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogRecordFactory</span><span class="p">()</span> + +<span class="k">def</span> <span class="nf">record_factory</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span> + <span class="n">record</span> <span class="o">=</span> <span class="n">old_factory</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> + <span class="n">record</span><span class="o">.</span><span class="n">custom_attribute</span> <span class="o">=</span> <span class="mh">0xdecafbad</span> + <span class="k">return</span> <span class="n">record</span> + +<span class="n">logging</span><span class="o">.</span><span class="n">setLogRecordFactory</span><span class="p">(</span><span class="n">record_factory</span><span class="p">)</span> +</pre></div> +</div> +<p>With this pattern, multiple factories could be chained, and as long +as they don’t overwrite each other’s attributes or unintentionally +overwrite the standard attributes listed above, there should be no +surprises.</p> +</dd></dl> + +</section> +<section id="logrecord-attributes"> +<span id="id2"></span><h2>LogRecord attributes<a class="headerlink" href="https://docs.python.org/3/library/logging.html#logrecord-attributes" title="Permalink to this headline">¶</a></h2> +<p>The LogRecord has a number of attributes, most of which are derived from the +parameters to the constructor. (Note that the names do not always correspond +exactly between the LogRecord constructor parameters and the LogRecord +attributes.) These attributes can be used to merge data from the record into +the format string. The following table lists (in alphabetical order) the +attribute names, their meanings and the corresponding placeholder in a %-style +format string.</p> +<p>If you are using {}-formatting (<a class="reference internal" href="https://docs.python.org/3/library/stdtypes.html#str.format" title="str.format"><code class="xref py py-func docutils literal notranslate"><span class="pre">str.format()</span></code></a>), you can use +<code class="docutils literal notranslate"><span class="pre">{attrname}</span></code> as the placeholder in the format string. If you are using +$-formatting (<a class="reference internal" href="https://docs.python.org/3/library/string.html#string.Template" title="string.Template"><code class="xref py py-class docutils literal notranslate"><span class="pre">string.Template</span></code></a>), use the form <code class="docutils literal notranslate"><span class="pre">${attrname}</span></code>. In +both cases, of course, replace <code class="docutils literal notranslate"><span class="pre">attrname</span></code> with the actual attribute name +you want to use.</p> +<p>In the case of {}-formatting, you can specify formatting flags by placing them +after the attribute name, separated from it with a colon. For example: a +placeholder of <code class="docutils literal notranslate"><span class="pre">{msecs:03d}</span></code> would format a millisecond value of <code class="docutils literal notranslate"><span class="pre">4</span></code> as +<code class="docutils literal notranslate"><span class="pre">004</span></code>. Refer to the <a class="reference internal" href="https://docs.python.org/3/library/stdtypes.html#str.format" title="str.format"><code class="xref py py-meth docutils literal notranslate"><span class="pre">str.format()</span></code></a> documentation for full details on +the options available to you.</p> +<div class="responsive-table__container"><table class="docutils align-default"> +<colgroup> +<col style="width: 18%"> +<col style="width: 28%"> +<col style="width: 53%"> +</colgroup> +<thead> +<tr class="row-odd"><th class="head"><p>Attribute name</p></th> +<th class="head"><p>Format</p></th> +<th class="head"><p>Description</p></th> +</tr> +</thead> +<tbody> +<tr class="row-even"><td><p>args</p></td> +<td><p>You shouldn’t need to +format this yourself.</p></td> +<td><p>The tuple of arguments merged into <code class="docutils literal notranslate"><span class="pre">msg</span></code> to +produce <code class="docutils literal notranslate"><span class="pre">message</span></code>, or a dict whose values +are used for the merge (when there is only one +argument, and it is a dictionary).</p></td> +</tr> +<tr class="row-odd"><td><p>asctime</p></td> +<td><p><code class="docutils literal notranslate"><span class="pre">%(asctime)s</span></code></p></td> +<td><p>Human-readable time when the +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LogRecord" title="logging.LogRecord"><code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code></a> was created. By default +this is of the form ‘2003-07-08 16:49:45,896’ +(the numbers after the comma are millisecond +portion of the time).</p></td> +</tr> +<tr class="row-even"><td><p>created</p></td> +<td><p><code class="docutils literal notranslate"><span class="pre">%(created)f</span></code></p></td> +<td><p>Time when the <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LogRecord" title="logging.LogRecord"><code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code></a> was created +(as returned by <a class="reference internal" href="https://docs.python.org/3/library/time.html#time.time" title="time.time"><code class="xref py py-func docutils literal notranslate"><span class="pre">time.time()</span></code></a>).</p></td> +</tr> +<tr class="row-odd"><td><p>exc_info</p></td> +<td><p>You shouldn’t need to +format this yourself.</p></td> +<td><p>Exception tuple (à la <code class="docutils literal notranslate"><span class="pre">sys.exc_info</span></code>) or, +if no exception has occurred, <code class="docutils literal notranslate"><span class="pre">None</span></code>.</p></td> +</tr> +<tr class="row-even"><td><p>filename</p></td> +<td><p><code class="docutils literal notranslate"><span class="pre">%(filename)s</span></code></p></td> +<td><p>Filename portion of <code class="docutils literal notranslate"><span class="pre">pathname</span></code>.</p></td> +</tr> +<tr class="row-odd"><td><p>funcName</p></td> +<td><p><code class="docutils literal notranslate"><span class="pre">%(funcName)s</span></code></p></td> +<td><p>Name of function containing the logging call.</p></td> +</tr> +<tr class="row-even"><td><p>levelname</p></td> +<td><p><code class="docutils literal notranslate"><span class="pre">%(levelname)s</span></code></p></td> +<td><p>Text logging level for the message +(<code class="docutils literal notranslate"><span class="pre">'DEBUG'</span></code>, <code class="docutils literal notranslate"><span class="pre">'INFO'</span></code>, <code class="docutils literal notranslate"><span class="pre">'WARNING'</span></code>, +<code class="docutils literal notranslate"><span class="pre">'ERROR'</span></code>, <code class="docutils literal notranslate"><span class="pre">'CRITICAL'</span></code>).</p></td> +</tr> +<tr class="row-odd"><td><p>levelno</p></td> +<td><p><code class="docutils literal notranslate"><span class="pre">%(levelno)s</span></code></p></td> +<td><p>Numeric logging level for the message +(<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.DEBUG" title="logging.DEBUG"><code class="xref py py-const docutils literal notranslate"><span class="pre">DEBUG</span></code></a>, <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.INFO" title="logging.INFO"><code class="xref py py-const docutils literal notranslate"><span class="pre">INFO</span></code></a>, +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.WARNING" title="logging.WARNING"><code class="xref py py-const docutils literal notranslate"><span class="pre">WARNING</span></code></a>, <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.ERROR" title="logging.ERROR"><code class="xref py py-const docutils literal notranslate"><span class="pre">ERROR</span></code></a>, +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.CRITICAL" title="logging.CRITICAL"><code class="xref py py-const docutils literal notranslate"><span class="pre">CRITICAL</span></code></a>).</p></td> +</tr> +<tr class="row-even"><td><p>lineno</p></td> +<td><p><code class="docutils literal notranslate"><span class="pre">%(lineno)d</span></code></p></td> +<td><p>Source line number where the logging call was +issued (if available).</p></td> +</tr> +<tr class="row-odd"><td><p>message</p></td> +<td><p><code class="docutils literal notranslate"><span class="pre">%(message)s</span></code></p></td> +<td><p>The logged message, computed as <code class="docutils literal notranslate"><span class="pre">msg</span> <span class="pre">%</span> +<span class="pre">args</span></code>. This is set when +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Formatter.format" title="logging.Formatter.format"><code class="xref py py-meth docutils literal notranslate"><span class="pre">Formatter.format()</span></code></a> is invoked.</p></td> +</tr> +<tr class="row-even"><td><p>module</p></td> +<td><p><code class="docutils literal notranslate"><span class="pre">%(module)s</span></code></p></td> +<td><p>Module (name portion of <code class="docutils literal notranslate"><span class="pre">filename</span></code>).</p></td> +</tr> +<tr class="row-odd"><td><p>msecs</p></td> +<td><p><code class="docutils literal notranslate"><span class="pre">%(msecs)d</span></code></p></td> +<td><p>Millisecond portion of the time when the +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LogRecord" title="logging.LogRecord"><code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code></a> was created.</p></td> +</tr> +<tr class="row-even"><td><p>msg</p></td> +<td><p>You shouldn’t need to +format this yourself.</p></td> +<td><p>The format string passed in the original +logging call. Merged with <code class="docutils literal notranslate"><span class="pre">args</span></code> to +produce <code class="docutils literal notranslate"><span class="pre">message</span></code>, or an arbitrary object +(see <a class="reference internal" href="https://docs.python.org/3/howto/logging.html#arbitrary-object-messages"><span class="std std-ref">Using arbitrary objects as messages</span></a>).</p></td> +</tr> +<tr class="row-odd"><td><p>name</p></td> +<td><p><code class="docutils literal notranslate"><span class="pre">%(name)s</span></code></p></td> +<td><p>Name of the logger used to log the call.</p></td> +</tr> +<tr class="row-even"><td><p>pathname</p></td> +<td><p><code class="docutils literal notranslate"><span class="pre">%(pathname)s</span></code></p></td> +<td><p>Full pathname of the source file where the +logging call was issued (if available).</p></td> +</tr> +<tr class="row-odd"><td><p>process</p></td> +<td><p><code class="docutils literal notranslate"><span class="pre">%(process)d</span></code></p></td> +<td><p>Process ID (if available).</p></td> +</tr> +<tr class="row-even"><td><p>processName</p></td> +<td><p><code class="docutils literal notranslate"><span class="pre">%(processName)s</span></code></p></td> +<td><p>Process name (if available).</p></td> +</tr> +<tr class="row-odd"><td><p>relativeCreated</p></td> +<td><p><code class="docutils literal notranslate"><span class="pre">%(relativeCreated)d</span></code></p></td> +<td><p>Time in milliseconds when the LogRecord was +created, relative to the time the logging +module was loaded.</p></td> +</tr> +<tr class="row-even"><td><p>stack_info</p></td> +<td><p>You shouldn’t need to +format this yourself.</p></td> +<td><p>Stack frame information (where available) +from the bottom of the stack in the current +thread, up to and including the stack frame +of the logging call which resulted in the +creation of this record.</p></td> +</tr> +<tr class="row-odd"><td><p>thread</p></td> +<td><p><code class="docutils literal notranslate"><span class="pre">%(thread)d</span></code></p></td> +<td><p>Thread ID (if available).</p></td> +</tr> +<tr class="row-even"><td><p>threadName</p></td> +<td><p><code class="docutils literal notranslate"><span class="pre">%(threadName)s</span></code></p></td> +<td><p>Thread name (if available).</p></td> +</tr> +</tbody> +</table></div> +<div class="versionchanged"> +<p><span class="versionmodified changed">Changed in version 3.1: </span><em>processName</em> was added.</p> +</div> +</section> +<section id="loggeradapter-objects"> +<span id="logger-adapter"></span><h2>LoggerAdapter Objects<a class="headerlink" href="https://docs.python.org/3/library/logging.html#loggeradapter-objects" title="Permalink to this headline">¶</a></h2> +<p><a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LoggerAdapter" title="logging.LoggerAdapter"><code class="xref py py-class docutils literal notranslate"><span class="pre">LoggerAdapter</span></code></a> instances are used to conveniently pass contextual +information into logging calls. For a usage example, see the section on +<a class="reference internal" href="https://docs.python.org/3/howto/logging-cookbook.html#context-info"><span class="std std-ref">adding contextual information to your logging output</span></a>.</p> +<dl class="py class"> +<dt class="sig sig-object py" id="logging.LoggerAdapter"> +<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">LoggerAdapter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">logger</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">extra</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.LoggerAdapter" title="Permalink to this definition">¶</a></dt> +<dd><p>Returns an instance of <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LoggerAdapter" title="logging.LoggerAdapter"><code class="xref py py-class docutils literal notranslate"><span class="pre">LoggerAdapter</span></code></a> initialized with an +underlying <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger" title="logging.Logger"><code class="xref py py-class docutils literal notranslate"><span class="pre">Logger</span></code></a> instance and a dict-like object.</p> +<dl class="py method"> +<dt class="sig sig-object py" id="logging.LoggerAdapter.process"> +<span class="sig-name descname"><span class="pre">process</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">msg</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.LoggerAdapter.process" title="Permalink to this definition">¶</a></dt> +<dd><p>Modifies the message and/or keyword arguments passed to a logging call in +order to insert contextual information. This implementation takes the object +passed as <em>extra</em> to the constructor and adds it to <em>kwargs</em> using key +‘extra’. The return value is a (<em>msg</em>, <em>kwargs</em>) tuple which has the +(possibly modified) versions of the arguments passed in.</p> +</dd></dl> + +<dl class="py attribute"> +<dt class="sig sig-object py" id="logging.LoggerAdapter.manager"> +<span class="sig-name descname"><span class="pre">manager</span></span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.LoggerAdapter.manager" title="Permalink to this definition">¶</a></dt> +<dd><p>Delegates to the underlying <code class="xref py py-attr docutils literal notranslate"><span class="pre">manager`</span></code> on <em>logger</em>.</p> +</dd></dl> + +<dl class="py attribute"> +<dt class="sig sig-object py" id="logging.LoggerAdapter._log"> +<span class="sig-name descname"><span class="pre">_log</span></span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.LoggerAdapter._log" title="Permalink to this definition">¶</a></dt> +<dd><p>Delegates to the underlying <code class="xref py py-meth docutils literal notranslate"><span class="pre">_log`()</span></code> method on <em>logger</em>.</p> +</dd></dl> + +<p>In addition to the above, <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LoggerAdapter" title="logging.LoggerAdapter"><code class="xref py py-class docutils literal notranslate"><span class="pre">LoggerAdapter</span></code></a> supports the following +methods of <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger" title="logging.Logger"><code class="xref py py-class docutils literal notranslate"><span class="pre">Logger</span></code></a>: <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger.debug" title="logging.Logger.debug"><code class="xref py py-meth docutils literal notranslate"><span class="pre">debug()</span></code></a>, <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger.info" title="logging.Logger.info"><code class="xref py py-meth docutils literal notranslate"><span class="pre">info()</span></code></a>, +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger.warning" title="logging.Logger.warning"><code class="xref py py-meth docutils literal notranslate"><span class="pre">warning()</span></code></a>, <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger.error" title="logging.Logger.error"><code class="xref py py-meth docutils literal notranslate"><span class="pre">error()</span></code></a>, <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger.exception" title="logging.Logger.exception"><code class="xref py py-meth docutils literal notranslate"><span class="pre">exception()</span></code></a>, +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger.critical" title="logging.Logger.critical"><code class="xref py py-meth docutils literal notranslate"><span class="pre">critical()</span></code></a>, <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger.log" title="logging.Logger.log"><code class="xref py py-meth docutils literal notranslate"><span class="pre">log()</span></code></a>, <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger.isEnabledFor" title="logging.Logger.isEnabledFor"><code class="xref py py-meth docutils literal notranslate"><span class="pre">isEnabledFor()</span></code></a>, +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger.getEffectiveLevel" title="logging.Logger.getEffectiveLevel"><code class="xref py py-meth docutils literal notranslate"><span class="pre">getEffectiveLevel()</span></code></a>, <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger.setLevel" title="logging.Logger.setLevel"><code class="xref py py-meth docutils literal notranslate"><span class="pre">setLevel()</span></code></a> and +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger.hasHandlers" title="logging.Logger.hasHandlers"><code class="xref py py-meth docutils literal notranslate"><span class="pre">hasHandlers()</span></code></a>. These methods have the same signatures as their +counterparts in <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger" title="logging.Logger"><code class="xref py py-class docutils literal notranslate"><span class="pre">Logger</span></code></a>, so you can use the two types of instances +interchangeably.</p> +<div class="versionchanged"> +<p><span class="versionmodified changed">Changed in version 3.2: </span>The <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger.isEnabledFor" title="logging.Logger.isEnabledFor"><code class="xref py py-meth docutils literal notranslate"><span class="pre">isEnabledFor()</span></code></a>, <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger.getEffectiveLevel" title="logging.Logger.getEffectiveLevel"><code class="xref py py-meth docutils literal notranslate"><span class="pre">getEffectiveLevel()</span></code></a>, +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger.setLevel" title="logging.Logger.setLevel"><code class="xref py py-meth docutils literal notranslate"><span class="pre">setLevel()</span></code></a> and <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger.hasHandlers" title="logging.Logger.hasHandlers"><code class="xref py py-meth docutils literal notranslate"><span class="pre">hasHandlers()</span></code></a> methods were added +to <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LoggerAdapter" title="logging.LoggerAdapter"><code class="xref py py-class docutils literal notranslate"><span class="pre">LoggerAdapter</span></code></a>. These methods delegate to the underlying logger.</p> +</div> +<div class="versionchanged"> +<p><span class="versionmodified changed">Changed in version 3.6: </span>Attribute <code class="xref py py-attr docutils literal notranslate"><span class="pre">manager</span></code> and method <code class="xref py py-meth docutils literal notranslate"><span class="pre">_log()</span></code> were added, which +delegate to the underlying logger and allow adapters to be nested.</p> +</div> +</dd></dl> + +</section> +<section id="thread-safety"> +<h2>Thread Safety<a class="headerlink" href="https://docs.python.org/3/library/logging.html#thread-safety" title="Permalink to this headline">¶</a></h2> +<p>The logging module is intended to be thread-safe without any special work +needing to be done by its clients. It achieves this though using threading +locks; there is one lock to serialize access to the module’s shared data, and +each handler also creates a lock to serialize access to its underlying I/O.</p> +<p>If you are implementing asynchronous signal handlers using the <a class="reference internal" href="https://docs.python.org/3/library/signal.html#module-signal" title="signal: Set handlers for asynchronous events."><code class="xref py py-mod docutils literal notranslate"><span class="pre">signal</span></code></a> +module, you may not be able to use logging from within such handlers. This is +because lock implementations in the <a class="reference internal" href="https://docs.python.org/3/library/threading.html#module-threading" title="threading: Thread-based parallelism."><code class="xref py py-mod docutils literal notranslate"><span class="pre">threading</span></code></a> module are not always +re-entrant, and so cannot be invoked from such signal handlers.</p> +</section> +<section id="module-level-functions"> +<h2>Module-Level Functions<a class="headerlink" href="https://docs.python.org/3/library/logging.html#module-level-functions" title="Permalink to this headline">¶</a></h2> +<p>In addition to the classes described above, there are a number of module-level +functions.</p> +<dl class="py function"> +<dt class="sig sig-object py" id="logging.getLogger"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">getLogger</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.getLogger" title="Permalink to this definition">¶</a></dt> +<dd><p>Return a logger with the specified name or, if name is <code class="docutils literal notranslate"><span class="pre">None</span></code>, return a +logger which is the root logger of the hierarchy. If specified, the name is +typically a dot-separated hierarchical name like <em>‘a’</em>, <em>‘a.b’</em> or <em>‘a.b.c.d’</em>. +Choice of these names is entirely up to the developer who is using logging.</p> +<p>All calls to this function with a given name return the same logger instance. +This means that logger instances never need to be passed between different parts +of an application.</p> +</dd></dl> + +<dl class="py function"> +<dt class="sig sig-object py" id="logging.getLoggerClass"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">getLoggerClass</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.getLoggerClass" title="Permalink to this definition">¶</a></dt> +<dd><p>Return either the standard <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger" title="logging.Logger"><code class="xref py py-class docutils literal notranslate"><span class="pre">Logger</span></code></a> class, or the last class passed to +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.setLoggerClass" title="logging.setLoggerClass"><code class="xref py py-func docutils literal notranslate"><span class="pre">setLoggerClass()</span></code></a>. This function may be called from within a new class +definition, to ensure that installing a customized <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Logger" title="logging.Logger"><code class="xref py py-class docutils literal notranslate"><span class="pre">Logger</span></code></a> class will +not undo customizations already applied by other code. For example:</p> +<div class="highlight-python3 notranslate"><div class="highlight" style="position: relative;"><pre><span></span><span class="k">class</span> <span class="nc">MyLogger</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">getLoggerClass</span><span class="p">()):</span> + <span class="c1"># ... override behaviour here</span> +</pre></div> +</div> +</dd></dl> + +<dl class="py function"> +<dt class="sig sig-object py" id="logging.getLogRecordFactory"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">getLogRecordFactory</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.getLogRecordFactory" title="Permalink to this definition">¶</a></dt> +<dd><p>Return a callable which is used to create a <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LogRecord" title="logging.LogRecord"><code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code></a>.</p> +<div class="versionadded"> +<p><span class="versionmodified added">New in version 3.2: </span>This function has been provided, along with <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.setLogRecordFactory" title="logging.setLogRecordFactory"><code class="xref py py-func docutils literal notranslate"><span class="pre">setLogRecordFactory()</span></code></a>, +to allow developers more control over how the <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LogRecord" title="logging.LogRecord"><code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code></a> +representing a logging event is constructed.</p> +</div> +<p>See <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.setLogRecordFactory" title="logging.setLogRecordFactory"><code class="xref py py-func docutils literal notranslate"><span class="pre">setLogRecordFactory()</span></code></a> for more information about the how the +factory is called.</p> +</dd></dl> + +<dl class="py function"> +<dt class="sig sig-object py" id="logging.debug"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">debug</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">msg</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.debug" title="Permalink to this definition">¶</a></dt> +<dd><p>Logs a message with level <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.DEBUG" title="logging.DEBUG"><code class="xref py py-const docutils literal notranslate"><span class="pre">DEBUG</span></code></a> on the root logger. The <em>msg</em> is the +message format string, and the <em>args</em> are the arguments which are merged into +<em>msg</em> using the string formatting operator. (Note that this means that you can +use keywords in the format string, together with a single dictionary argument.)</p> +<p>There are three keyword arguments in <em>kwargs</em> which are inspected: <em>exc_info</em> +which, if it does not evaluate as false, causes exception information to be +added to the logging message. If an exception tuple (in the format returned by +<a class="reference internal" href="https://docs.python.org/3/library/sys.html#sys.exc_info" title="sys.exc_info"><code class="xref py py-func docutils literal notranslate"><span class="pre">sys.exc_info()</span></code></a>) or an exception instance is provided, it is used; +otherwise, <a class="reference internal" href="https://docs.python.org/3/library/sys.html#sys.exc_info" title="sys.exc_info"><code class="xref py py-func docutils literal notranslate"><span class="pre">sys.exc_info()</span></code></a> is called to get the exception information.</p> +<p>The second optional keyword argument is <em>stack_info</em>, which defaults to +<code class="docutils literal notranslate"><span class="pre">False</span></code>. If true, stack information is added to the logging +message, including the actual logging call. Note that this is not the same +stack information as that displayed through specifying <em>exc_info</em>: The +former is stack frames from the bottom of the stack up to the logging call +in the current thread, whereas the latter is information about stack frames +which have been unwound, following an exception, while searching for +exception handlers.</p> +<p>You can specify <em>stack_info</em> independently of <em>exc_info</em>, e.g. to just show +how you got to a certain point in your code, even when no exceptions were +raised. The stack frames are printed following a header line which says:</p> +<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>Stack (most recent call last): +</pre></div> +</div> +<p>This mimics the <code class="docutils literal notranslate"><span class="pre">Traceback</span> <span class="pre">(most</span> <span class="pre">recent</span> <span class="pre">call</span> <span class="pre">last):</span></code> which is used when +displaying exception frames.</p> +<p>The third optional keyword argument is <em>extra</em> which can be used to pass a +dictionary which is used to populate the __dict__ of the LogRecord created for +the logging event with user-defined attributes. These custom attributes can then +be used as you like. For example, they could be incorporated into logged +messages. For example:</p> +<div class="highlight-python3 notranslate"><div class="highlight" style="position: relative;"><pre><span></span><span class="n">FORMAT</span> <span class="o">=</span> <span class="s1">'</span><span class="si">%(asctime)s</span><span class="s1"> </span><span class="si">%(clientip)-15s</span><span class="s1"> </span><span class="si">%(user)-8s</span><span class="s1"> </span><span class="si">%(message)s</span><span class="s1">'</span> +<span class="n">logging</span><span class="o">.</span><span class="n">basicConfig</span><span class="p">(</span><span class="nb">format</span><span class="o">=</span><span class="n">FORMAT</span><span class="p">)</span> +<span class="n">d</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'clientip'</span><span class="p">:</span> <span class="s1">'192.168.0.1'</span><span class="p">,</span> <span class="s1">'user'</span><span class="p">:</span> <span class="s1">'fbloggs'</span><span class="p">}</span> +<span class="n">logging</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s1">'Protocol problem: </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="s1">'connection reset'</span><span class="p">,</span> <span class="n">extra</span><span class="o">=</span><span class="n">d</span><span class="p">)</span> +</pre></div> +</div> +<p>would print something like:</p> +<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>2006-02-08 22:20:02,165 192.168.0.1 fbloggs Protocol problem: connection reset +</pre></div> +</div> +<p>The keys in the dictionary passed in <em>extra</em> should not clash with the keys used +by the logging system. (See the <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Formatter" title="logging.Formatter"><code class="xref py py-class docutils literal notranslate"><span class="pre">Formatter</span></code></a> documentation for more +information on which keys are used by the logging system.)</p> +<p>If you choose to use these attributes in logged messages, you need to exercise +some care. In the above example, for instance, the <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Formatter" title="logging.Formatter"><code class="xref py py-class docutils literal notranslate"><span class="pre">Formatter</span></code></a> has been +set up with a format string which expects ‘clientip’ and ‘user’ in the attribute +dictionary of the LogRecord. If these are missing, the message will not be +logged because a string formatting exception will occur. So in this case, you +always need to pass the <em>extra</em> dictionary with these keys.</p> +<p>While this might be annoying, this feature is intended for use in specialized +circumstances, such as multi-threaded servers where the same code executes in +many contexts, and interesting conditions which arise are dependent on this +context (such as remote client IP address and authenticated user name, in the +above example). In such circumstances, it is likely that specialized +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Formatter" title="logging.Formatter"><code class="xref py py-class docutils literal notranslate"><span class="pre">Formatter</span></code></a>s would be used with particular <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Handler" title="logging.Handler"><code class="xref py py-class docutils literal notranslate"><span class="pre">Handler</span></code></a>s.</p> +<p>This function (as well as <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.info" title="logging.info"><code class="xref py py-func docutils literal notranslate"><span class="pre">info()</span></code></a>, <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.warning" title="logging.warning"><code class="xref py py-func docutils literal notranslate"><span class="pre">warning()</span></code></a>, <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.error" title="logging.error"><code class="xref py py-func docutils literal notranslate"><span class="pre">error()</span></code></a> and +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.critical" title="logging.critical"><code class="xref py py-func docutils literal notranslate"><span class="pre">critical()</span></code></a>) will call <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.basicConfig" title="logging.basicConfig"><code class="xref py py-func docutils literal notranslate"><span class="pre">basicConfig()</span></code></a> if the root logger doesn’t +have any handler attached.</p> +<div class="versionchanged"> +<p><span class="versionmodified changed">Changed in version 3.2: </span>The <em>stack_info</em> parameter was added.</p> +</div> +</dd></dl> + +<dl class="py function"> +<dt class="sig sig-object py" id="logging.info"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">info</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">msg</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.info" title="Permalink to this definition">¶</a></dt> +<dd><p>Logs a message with level <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.INFO" title="logging.INFO"><code class="xref py py-const docutils literal notranslate"><span class="pre">INFO</span></code></a> on the root logger. The arguments are +interpreted as for <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.debug" title="logging.debug"><code class="xref py py-func docutils literal notranslate"><span class="pre">debug()</span></code></a>.</p> +</dd></dl> + +<dl class="py function"> +<dt class="sig sig-object py" id="logging.warning"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">warning</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">msg</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.warning" title="Permalink to this definition">¶</a></dt> +<dd><p>Logs a message with level <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.WARNING" title="logging.WARNING"><code class="xref py py-const docutils literal notranslate"><span class="pre">WARNING</span></code></a> on the root logger. The arguments +are interpreted as for <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.debug" title="logging.debug"><code class="xref py py-func docutils literal notranslate"><span class="pre">debug()</span></code></a>.</p> +<div class="admonition note"> +<p class="admonition-title">Note</p> +<p>There is an obsolete function <code class="docutils literal notranslate"><span class="pre">warn</span></code> which is functionally +identical to <code class="docutils literal notranslate"><span class="pre">warning</span></code>. As <code class="docutils literal notranslate"><span class="pre">warn</span></code> is deprecated, please do not use +it - use <code class="docutils literal notranslate"><span class="pre">warning</span></code> instead.</p> +</div> +</dd></dl> + +<dl class="py function"> +<dt class="sig sig-object py" id="logging.error"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">msg</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.error" title="Permalink to this definition">¶</a></dt> +<dd><p>Logs a message with level <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.ERROR" title="logging.ERROR"><code class="xref py py-const docutils literal notranslate"><span class="pre">ERROR</span></code></a> on the root logger. The arguments are +interpreted as for <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.debug" title="logging.debug"><code class="xref py py-func docutils literal notranslate"><span class="pre">debug()</span></code></a>.</p> +</dd></dl> + +<dl class="py function"> +<dt class="sig sig-object py" id="logging.critical"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">critical</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">msg</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.critical" title="Permalink to this definition">¶</a></dt> +<dd><p>Logs a message with level <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.CRITICAL" title="logging.CRITICAL"><code class="xref py py-const docutils literal notranslate"><span class="pre">CRITICAL</span></code></a> on the root logger. The arguments +are interpreted as for <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.debug" title="logging.debug"><code class="xref py py-func docutils literal notranslate"><span class="pre">debug()</span></code></a>.</p> +</dd></dl> + +<dl class="py function"> +<dt class="sig sig-object py" id="logging.exception"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">exception</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">msg</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.exception" title="Permalink to this definition">¶</a></dt> +<dd><p>Logs a message with level <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.ERROR" title="logging.ERROR"><code class="xref py py-const docutils literal notranslate"><span class="pre">ERROR</span></code></a> on the root logger. The arguments are +interpreted as for <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.debug" title="logging.debug"><code class="xref py py-func docutils literal notranslate"><span class="pre">debug()</span></code></a>. Exception info is added to the logging +message. This function should only be called from an exception handler.</p> +</dd></dl> + +<dl class="py function"> +<dt class="sig sig-object py" id="logging.log"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">log</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">level</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">msg</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.log" title="Permalink to this definition">¶</a></dt> +<dd><p>Logs a message with level <em>level</em> on the root logger. The other arguments are +interpreted as for <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.debug" title="logging.debug"><code class="xref py py-func docutils literal notranslate"><span class="pre">debug()</span></code></a>.</p> +</dd></dl> + +<dl class="py function"> +<dt class="sig sig-object py" id="logging.disable"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">disable</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">level</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">CRITICAL</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.disable" title="Permalink to this definition">¶</a></dt> +<dd><p>Provides an overriding level <em>level</em> for all loggers which takes precedence over +the logger’s own level. When the need arises to temporarily throttle logging +output down across the whole application, this function can be useful. Its +effect is to disable all logging calls of severity <em>level</em> and below, so that +if you call it with a value of INFO, then all INFO and DEBUG events would be +discarded, whereas those of severity WARNING and above would be processed +according to the logger’s effective level. If +<code class="docutils literal notranslate"><span class="pre">logging.disable(logging.NOTSET)</span></code> is called, it effectively removes this +overriding level, so that logging output again depends on the effective +levels of individual loggers.</p> +<p>Note that if you have defined any custom logging level higher than +<code class="docutils literal notranslate"><span class="pre">CRITICAL</span></code> (this is not recommended), you won’t be able to rely on the +default value for the <em>level</em> parameter, but will have to explicitly supply a +suitable value.</p> +<div class="versionchanged"> +<p><span class="versionmodified changed">Changed in version 3.7: </span>The <em>level</em> parameter was defaulted to level <code class="docutils literal notranslate"><span class="pre">CRITICAL</span></code>. See +<a class="reference external" href="https://bugs.python.org/issue?@action=redirect&bpo=28524">bpo-28524</a> for more information about this change.</p> +</div> +</dd></dl> + +<dl class="py function"> +<dt class="sig sig-object py" id="logging.addLevelName"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">addLevelName</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">level</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">levelName</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.addLevelName" title="Permalink to this definition">¶</a></dt> +<dd><p>Associates level <em>level</em> with text <em>levelName</em> in an internal dictionary, which is +used to map numeric levels to a textual representation, for example when a +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Formatter" title="logging.Formatter"><code class="xref py py-class docutils literal notranslate"><span class="pre">Formatter</span></code></a> formats a message. This function can also be used to define +your own levels. The only constraints are that all levels used must be +registered using this function, levels should be positive integers and they +should increase in increasing order of severity.</p> +<div class="admonition note"> +<p class="admonition-title">Note</p> +<p>If you are thinking of defining your own levels, please see the +section on <a class="reference internal" href="https://docs.python.org/3/howto/logging.html#custom-levels"><span class="std std-ref">Custom Levels</span></a>.</p> +</div> +</dd></dl> + +<dl class="py function"> +<dt class="sig sig-object py" id="logging.getLevelNamesMapping"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">getLevelNamesMapping</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.getLevelNamesMapping" title="Permalink to this definition">¶</a></dt> +<dd><p>Returns a mapping from level names to their corresponding logging levels. For example, the +string “CRITICAL” maps to <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.CRITICAL" title="logging.CRITICAL"><code class="xref py py-const docutils literal notranslate"><span class="pre">CRITICAL</span></code></a>. The returned mapping is copied from an internal +mapping on each call to this function.</p> +<div class="versionadded"> +<p><span class="versionmodified added">New in version 3.11.</span></p> +</div> +</dd></dl> + +<dl class="py function"> +<dt class="sig sig-object py" id="logging.getLevelName"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">getLevelName</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">level</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.getLevelName" title="Permalink to this definition">¶</a></dt> +<dd><p>Returns the textual or numeric representation of logging level <em>level</em>.</p> +<p>If <em>level</em> is one of the predefined levels <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.CRITICAL" title="logging.CRITICAL"><code class="xref py py-const docutils literal notranslate"><span class="pre">CRITICAL</span></code></a>, <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.ERROR" title="logging.ERROR"><code class="xref py py-const docutils literal notranslate"><span class="pre">ERROR</span></code></a>, +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.WARNING" title="logging.WARNING"><code class="xref py py-const docutils literal notranslate"><span class="pre">WARNING</span></code></a>, <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.INFO" title="logging.INFO"><code class="xref py py-const docutils literal notranslate"><span class="pre">INFO</span></code></a> or <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.DEBUG" title="logging.DEBUG"><code class="xref py py-const docutils literal notranslate"><span class="pre">DEBUG</span></code></a> then you get the +corresponding string. If you have associated levels with names using +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.addLevelName" title="logging.addLevelName"><code class="xref py py-func docutils literal notranslate"><span class="pre">addLevelName()</span></code></a> then the name you have associated with <em>level</em> is +returned. If a numeric value corresponding to one of the defined levels is +passed in, the corresponding string representation is returned.</p> +<p>The <em>level</em> parameter also accepts a string representation of the level such +as ‘INFO’. In such cases, this functions returns the corresponding numeric +value of the level.</p> +<p>If no matching numeric or string value is passed in, the string +‘Level %s’ % level is returned.</p> +<div class="admonition note"> +<p class="admonition-title">Note</p> +<p>Levels are internally integers (as they need to be compared in the +logging logic). This function is used to convert between an integer level +and the level name displayed in the formatted log output by means of the +<code class="docutils literal notranslate"><span class="pre">%(levelname)s</span></code> format specifier (see <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logrecord-attributes"><span class="std std-ref">LogRecord attributes</span></a>), and +vice versa.</p> +</div> +<div class="versionchanged"> +<p><span class="versionmodified changed">Changed in version 3.4: </span>In Python versions earlier than 3.4, this function could also be passed a +text level, and would return the corresponding numeric value of the level. +This undocumented behaviour was considered a mistake, and was removed in +Python 3.4, but reinstated in 3.4.2 due to retain backward compatibility.</p> +</div> +</dd></dl> + +<dl class="py function"> +<dt class="sig sig-object py" id="logging.makeLogRecord"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">makeLogRecord</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">attrdict</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.makeLogRecord" title="Permalink to this definition">¶</a></dt> +<dd><p>Creates and returns a new <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LogRecord" title="logging.LogRecord"><code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code></a> instance whose attributes are +defined by <em>attrdict</em>. This function is useful for taking a pickled +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LogRecord" title="logging.LogRecord"><code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code></a> attribute dictionary, sent over a socket, and reconstituting +it as a <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LogRecord" title="logging.LogRecord"><code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code></a> instance at the receiving end.</p> +</dd></dl> + +<dl class="py function"> +<dt class="sig sig-object py" id="logging.basicConfig"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">basicConfig</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.basicConfig" title="Permalink to this definition">¶</a></dt> +<dd><p>Does basic configuration for the logging system by creating a +<a class="reference internal" href="https://docs.python.org/3/library/logging.handlers.html#logging.StreamHandler" title="logging.StreamHandler"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamHandler</span></code></a> with a default <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.Formatter" title="logging.Formatter"><code class="xref py py-class docutils literal notranslate"><span class="pre">Formatter</span></code></a> and adding it to the +root logger. The functions <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.debug" title="logging.debug"><code class="xref py py-func docutils literal notranslate"><span class="pre">debug()</span></code></a>, <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.info" title="logging.info"><code class="xref py py-func docutils literal notranslate"><span class="pre">info()</span></code></a>, <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.warning" title="logging.warning"><code class="xref py py-func docutils literal notranslate"><span class="pre">warning()</span></code></a>, +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.error" title="logging.error"><code class="xref py py-func docutils literal notranslate"><span class="pre">error()</span></code></a> and <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.critical" title="logging.critical"><code class="xref py py-func docutils literal notranslate"><span class="pre">critical()</span></code></a> will call <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.basicConfig" title="logging.basicConfig"><code class="xref py py-func docutils literal notranslate"><span class="pre">basicConfig()</span></code></a> automatically +if no handlers are defined for the root logger.</p> +<p>This function does nothing if the root logger already has handlers +configured, unless the keyword argument <em>force</em> is set to <code class="docutils literal notranslate"><span class="pre">True</span></code>.</p> +<div class="admonition note"> +<p class="admonition-title">Note</p> +<p>This function should be called from the main thread +before other threads are started. In versions of Python prior to +2.7.1 and 3.2, if this function is called from multiple threads, +it is possible (in rare circumstances) that a handler will be added +to the root logger more than once, leading to unexpected results +such as messages being duplicated in the log.</p> +</div> +<p>The following keyword arguments are supported.</p> +<div class="responsive-table__container"><table class="docutils align-default"> +<colgroup> +<col style="width: 24%"> +<col style="width: 76%"> +</colgroup> +<thead> +<tr class="row-odd"><th class="head"><p>Format</p></th> +<th class="head"><p>Description</p></th> +</tr> +</thead> +<tbody> +<tr class="row-even"><td><p><em>filename</em></p></td> +<td><p>Specifies that a <a class="reference internal" href="https://docs.python.org/3/library/logging.handlers.html#logging.FileHandler" title="logging.FileHandler"><code class="xref py py-class docutils literal notranslate"><span class="pre">FileHandler</span></code></a> be +created, using the specified filename, +rather than a <a class="reference internal" href="https://docs.python.org/3/library/logging.handlers.html#logging.StreamHandler" title="logging.StreamHandler"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamHandler</span></code></a>.</p></td> +</tr> +<tr class="row-odd"><td><p><em>filemode</em></p></td> +<td><p>If <em>filename</em> is specified, open the file +in this <a class="reference internal" href="https://docs.python.org/3/library/functions.html#filemodes"><span class="std std-ref">mode</span></a>. Defaults +to <code class="docutils literal notranslate"><span class="pre">'a'</span></code>.</p></td> +</tr> +<tr class="row-even"><td><p><em>format</em></p></td> +<td><p>Use the specified format string for the +handler. Defaults to attributes +<code class="docutils literal notranslate"><span class="pre">levelname</span></code>, <code class="docutils literal notranslate"><span class="pre">name</span></code> and <code class="docutils literal notranslate"><span class="pre">message</span></code> +separated by colons.</p></td> +</tr> +<tr class="row-odd"><td><p><em>datefmt</em></p></td> +<td><p>Use the specified date/time format, as +accepted by <a class="reference internal" href="https://docs.python.org/3/library/time.html#time.strftime" title="time.strftime"><code class="xref py py-func docutils literal notranslate"><span class="pre">time.strftime()</span></code></a>.</p></td> +</tr> +<tr class="row-even"><td><p><em>style</em></p></td> +<td><p>If <em>format</em> is specified, use this style +for the format string. One of <code class="docutils literal notranslate"><span class="pre">'%'</span></code>, +<code class="docutils literal notranslate"><span class="pre">'{'</span></code> or <code class="docutils literal notranslate"><span class="pre">'$'</span></code> for <a class="reference internal" href="https://docs.python.org/3/library/stdtypes.html#old-string-formatting"><span class="std std-ref">printf-style</span></a>, +<a class="reference internal" href="https://docs.python.org/3/library/stdtypes.html#str.format" title="str.format"><code class="xref py py-meth docutils literal notranslate"><span class="pre">str.format()</span></code></a> or +<a class="reference internal" href="https://docs.python.org/3/library/string.html#string.Template" title="string.Template"><code class="xref py py-class docutils literal notranslate"><span class="pre">string.Template</span></code></a> respectively. +Defaults to <code class="docutils literal notranslate"><span class="pre">'%'</span></code>.</p></td> +</tr> +<tr class="row-odd"><td><p><em>level</em></p></td> +<td><p>Set the root logger level to the specified +<a class="reference internal" href="https://docs.python.org/3/library/logging.html#levels"><span class="std std-ref">level</span></a>.</p></td> +</tr> +<tr class="row-even"><td><p><em>stream</em></p></td> +<td><p>Use the specified stream to initialize the +<a class="reference internal" href="https://docs.python.org/3/library/logging.handlers.html#logging.StreamHandler" title="logging.StreamHandler"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamHandler</span></code></a>. Note that this +argument is incompatible with <em>filename</em> - +if both are present, a <code class="docutils literal notranslate"><span class="pre">ValueError</span></code> is +raised.</p></td> +</tr> +<tr class="row-odd"><td><p><em>handlers</em></p></td> +<td><p>If specified, this should be an iterable of +already created handlers to add to the root +logger. Any handlers which don’t already +have a formatter set will be assigned the +default formatter created in this function. +Note that this argument is incompatible +with <em>filename</em> or <em>stream</em> - if both +are present, a <code class="docutils literal notranslate"><span class="pre">ValueError</span></code> is raised.</p></td> +</tr> +<tr class="row-even"><td><p><em>force</em></p></td> +<td><p>If this keyword argument is specified as +true, any existing handlers attached to the +root logger are removed and closed, before +carrying out the configuration as specified +by the other arguments.</p></td> +</tr> +<tr class="row-odd"><td><p><em>encoding</em></p></td> +<td><p>If this keyword argument is specified along +with <em>filename</em>, its value is used when the +<a class="reference internal" href="https://docs.python.org/3/library/logging.handlers.html#logging.FileHandler" title="logging.FileHandler"><code class="xref py py-class docutils literal notranslate"><span class="pre">FileHandler</span></code></a> is created, and thus +used when opening the output file.</p></td> +</tr> +<tr class="row-even"><td><p><em>errors</em></p></td> +<td><p>If this keyword argument is specified along +with <em>filename</em>, its value is used when the +<a class="reference internal" href="https://docs.python.org/3/library/logging.handlers.html#logging.FileHandler" title="logging.FileHandler"><code class="xref py py-class docutils literal notranslate"><span class="pre">FileHandler</span></code></a> is created, and thus +used when opening the output file. If not +specified, the value ‘backslashreplace’ is +used. Note that if <code class="docutils literal notranslate"><span class="pre">None</span></code> is specified, +it will be passed as such to <a class="reference internal" href="https://docs.python.org/3/library/functions.html#open" title="open"><code class="xref py py-func docutils literal notranslate"><span class="pre">open()</span></code></a>, +which means that it will be treated the +same as passing ‘errors’.</p></td> +</tr> +</tbody> +</table></div> +<div class="versionchanged"> +<p><span class="versionmodified changed">Changed in version 3.2: </span>The <em>style</em> argument was added.</p> +</div> +<div class="versionchanged"> +<p><span class="versionmodified changed">Changed in version 3.3: </span>The <em>handlers</em> argument was added. Additional checks were added to +catch situations where incompatible arguments are specified (e.g. +<em>handlers</em> together with <em>stream</em> or <em>filename</em>, or <em>stream</em> +together with <em>filename</em>).</p> +</div> +<div class="versionchanged"> +<p><span class="versionmodified changed">Changed in version 3.8: </span>The <em>force</em> argument was added.</p> +</div> +<div class="versionchanged"> +<p><span class="versionmodified changed">Changed in version 3.9: </span>The <em>encoding</em> and <em>errors</em> arguments were added.</p> +</div> +</dd></dl> + +<dl class="py function"> +<dt class="sig sig-object py" id="logging.shutdown"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">shutdown</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.shutdown" title="Permalink to this definition">¶</a></dt> +<dd><p>Informs the logging system to perform an orderly shutdown by flushing and +closing all handlers. This should be called at application exit and no +further use of the logging system should be made after this call.</p> +<p>When the logging module is imported, it registers this function as an exit +handler (see <a class="reference internal" href="https://docs.python.org/3/library/atexit.html#module-atexit" title="atexit: Register and execute cleanup functions."><code class="xref py py-mod docutils literal notranslate"><span class="pre">atexit</span></code></a>), so normally there’s no need to do that +manually.</p> +</dd></dl> + +<dl class="py function"> +<dt class="sig sig-object py" id="logging.setLoggerClass"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">setLoggerClass</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">klass</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.setLoggerClass" title="Permalink to this definition">¶</a></dt> +<dd><p>Tells the logging system to use the class <em>klass</em> when instantiating a logger. +The class should define <code class="xref py py-meth docutils literal notranslate"><span class="pre">__init__()</span></code> such that only a name argument is +required, and the <code class="xref py py-meth docutils literal notranslate"><span class="pre">__init__()</span></code> should call <code class="xref py py-meth docutils literal notranslate"><span class="pre">Logger.__init__()</span></code>. This +function is typically called before any loggers are instantiated by applications +which need to use custom logger behavior. After this call, as at any other +time, do not instantiate loggers directly using the subclass: continue to use +the <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.getLogger" title="logging.getLogger"><code class="xref py py-func docutils literal notranslate"><span class="pre">logging.getLogger()</span></code></a> API to get your loggers.</p> +</dd></dl> + +<dl class="py function"> +<dt class="sig sig-object py" id="logging.setLogRecordFactory"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">setLogRecordFactory</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">factory</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.setLogRecordFactory" title="Permalink to this definition">¶</a></dt> +<dd><p>Set a callable which is used to create a <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LogRecord" title="logging.LogRecord"><code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code></a>.</p> +<dl class="field-list simple"> +<dt class="field-odd">Parameters</dt> +<dd class="field-odd"><p><strong>factory</strong> – The factory callable to be used to instantiate a log record.</p> +</dd> +</dl> +<div class="versionadded"> +<p><span class="versionmodified added">New in version 3.2: </span>This function has been provided, along with <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.getLogRecordFactory" title="logging.getLogRecordFactory"><code class="xref py py-func docutils literal notranslate"><span class="pre">getLogRecordFactory()</span></code></a>, to +allow developers more control over how the <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.LogRecord" title="logging.LogRecord"><code class="xref py py-class docutils literal notranslate"><span class="pre">LogRecord</span></code></a> representing +a logging event is constructed.</p> +</div> +<p>The factory has the following signature:</p> +<p><code class="docutils literal notranslate"><span class="pre">factory(name,</span> <span class="pre">level,</span> <span class="pre">fn,</span> <span class="pre">lno,</span> <span class="pre">msg,</span> <span class="pre">args,</span> <span class="pre">exc_info,</span> <span class="pre">func=None,</span> <span class="pre">sinfo=None,</span> <span class="pre">**kwargs)</span></code></p> +<blockquote> +<div><dl class="field-list simple"> +<dt class="field-odd">name</dt> +<dd class="field-odd"><p>The logger name.</p> +</dd> +<dt class="field-even">level</dt> +<dd class="field-even"><p>The logging level (numeric).</p> +</dd> +<dt class="field-odd">fn</dt> +<dd class="field-odd"><p>The full pathname of the file where the logging call was made.</p> +</dd> +<dt class="field-even">lno</dt> +<dd class="field-even"><p>The line number in the file where the logging call was made.</p> +</dd> +<dt class="field-odd">msg</dt> +<dd class="field-odd"><p>The logging message.</p> +</dd> +<dt class="field-even">args</dt> +<dd class="field-even"><p>The arguments for the logging message.</p> +</dd> +<dt class="field-odd">exc_info</dt> +<dd class="field-odd"><p>An exception tuple, or <code class="docutils literal notranslate"><span class="pre">None</span></code>.</p> +</dd> +<dt class="field-even">func</dt> +<dd class="field-even"><p>The name of the function or method which invoked the logging +call.</p> +</dd> +<dt class="field-odd">sinfo</dt> +<dd class="field-odd"><p>A stack traceback such as is provided by +<a class="reference internal" href="https://docs.python.org/3/library/traceback.html#traceback.print_stack" title="traceback.print_stack"><code class="xref py py-func docutils literal notranslate"><span class="pre">traceback.print_stack()</span></code></a>, showing the call hierarchy.</p> +</dd> +<dt class="field-even">kwargs</dt> +<dd class="field-even"><p>Additional keyword arguments.</p> +</dd> +</dl> +</div></blockquote> +</dd></dl> + +</section> +<section id="module-level-attributes"> +<h2>Module-Level Attributes<a class="headerlink" href="https://docs.python.org/3/library/logging.html#module-level-attributes" title="Permalink to this headline">¶</a></h2> +<dl class="py attribute"> +<dt class="sig sig-object py" id="logging.lastResort"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">lastResort</span></span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.lastResort" title="Permalink to this definition">¶</a></dt> +<dd><p>A “handler of last resort” is available through this attribute. This +is a <a class="reference internal" href="https://docs.python.org/3/library/logging.handlers.html#logging.StreamHandler" title="logging.StreamHandler"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamHandler</span></code></a> writing to <code class="docutils literal notranslate"><span class="pre">sys.stderr</span></code> with a level of +<code class="docutils literal notranslate"><span class="pre">WARNING</span></code>, and is used to handle logging events in the absence of any +logging configuration. The end result is to just print the message to +<code class="docutils literal notranslate"><span class="pre">sys.stderr</span></code>. This replaces the earlier error message saying that +“no handlers could be found for logger XYZ”. If you need the earlier +behaviour for some reason, <code class="docutils literal notranslate"><span class="pre">lastResort</span></code> can be set to <code class="docutils literal notranslate"><span class="pre">None</span></code>.</p> +<div class="versionadded"> +<p><span class="versionmodified added">New in version 3.2.</span></p> +</div> +</dd></dl> + +</section> +<section id="integration-with-the-warnings-module"> +<h2>Integration with the warnings module<a class="headerlink" href="https://docs.python.org/3/library/logging.html#integration-with-the-warnings-module" title="Permalink to this headline">¶</a></h2> +<p>The <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.captureWarnings" title="logging.captureWarnings"><code class="xref py py-func docutils literal notranslate"><span class="pre">captureWarnings()</span></code></a> function can be used to integrate <a class="reference internal" href="https://docs.python.org/3/library/logging.html#module-logging" title="logging: Flexible event logging system for applications."><code class="xref py py-mod docutils literal notranslate"><span class="pre">logging</span></code></a> +with the <a class="reference internal" href="https://docs.python.org/3/library/warnings.html#module-warnings" title="warnings: Issue warning messages and control their disposition."><code class="xref py py-mod docutils literal notranslate"><span class="pre">warnings</span></code></a> module.</p> +<dl class="py function"> +<dt class="sig sig-object py" id="logging.captureWarnings"> +<span class="sig-prename descclassname"><span class="pre">logging.</span></span><span class="sig-name descname"><span class="pre">captureWarnings</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">capture</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="https://docs.python.org/3/library/logging.html#logging.captureWarnings" title="Permalink to this definition">¶</a></dt> +<dd><p>This function is used to turn the capture of warnings by logging on and +off.</p> +<p>If <em>capture</em> is <code class="docutils literal notranslate"><span class="pre">True</span></code>, warnings issued by the <a class="reference internal" href="https://docs.python.org/3/library/warnings.html#module-warnings" title="warnings: Issue warning messages and control their disposition."><code class="xref py py-mod docutils literal notranslate"><span class="pre">warnings</span></code></a> module will +be redirected to the logging system. Specifically, a warning will be +formatted using <a class="reference internal" href="https://docs.python.org/3/library/warnings.html#warnings.formatwarning" title="warnings.formatwarning"><code class="xref py py-func docutils literal notranslate"><span class="pre">warnings.formatwarning()</span></code></a> and the resulting string +logged to a logger named <code class="docutils literal notranslate"><span class="pre">'py.warnings'</span></code> with a severity of <a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging.WARNING" title="logging.WARNING"><code class="xref py py-const docutils literal notranslate"><span class="pre">WARNING</span></code></a>.</p> +<p>If <em>capture</em> is <code class="docutils literal notranslate"><span class="pre">False</span></code>, the redirection of warnings to the logging system +will stop, and warnings will be redirected to their original destinations +(i.e. those in effect before <code class="docutils literal notranslate"><span class="pre">captureWarnings(True)</span></code> was called).</p> +</dd></dl> + +<div class="admonition seealso"> +<p class="admonition-title">See also</p> +<dl class="simple"> +<dt>Module <a class="reference internal" href="https://docs.python.org/3/library/logging.config.html#module-logging.config" title="logging.config: Configuration of the logging module."><code class="xref py py-mod docutils literal notranslate"><span class="pre">logging.config</span></code></a></dt><dd><p>Configuration API for the logging module.</p> +</dd> +<dt>Module <a class="reference internal" href="https://docs.python.org/3/library/logging.handlers.html#module-logging.handlers" title="logging.handlers: Handlers for the logging module."><code class="xref py py-mod docutils literal notranslate"><span class="pre">logging.handlers</span></code></a></dt><dd><p>Useful handlers included with the logging module.</p> +</dd> +<dt><span class="target" id="index-1"></span><a class="pep reference external" href="https://peps.python.org/pep-0282/"><strong>PEP 282</strong></a> - A Logging System</dt><dd><p>The proposal which described this feature for inclusion in the Python standard +library.</p> +</dd> +<dt><a class="reference external" href="https://old.red-dove.com/python_logging.html">Original Python logging package</a></dt><dd><p>This is the original source for the <a class="reference internal" href="https://docs.python.org/3/library/logging.html#module-logging" title="logging: Flexible event logging system for applications."><code class="xref py py-mod docutils literal notranslate"><span class="pre">logging</span></code></a> package. The version of the +package available from this site is suitable for use with Python 1.5.2, 2.1.x +and 2.2.x, which do not include the <a class="reference internal" href="https://docs.python.org/3/library/logging.html#module-logging" title="logging: Flexible event logging system for applications."><code class="xref py py-mod docutils literal notranslate"><span class="pre">logging</span></code></a> package in the standard +library.</p> +</dd> +</dl> +</div> +</section> +</section> + + + <div class="clearer"></div> + </div> + </div> + </div> + <div class="sphinxsidebar" role="navigation" aria-label="main navigation"> + <div class="sphinxsidebarwrapper"> + <div> + <h3><a href="https://docs.python.org/3/contents.html">Table of Contents</a></h3> + <ul> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#"><code class="xref py py-mod docutils literal notranslate"><span class="pre">logging</span></code> — Logging facility for Python</a><ul> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#logger-objects">Logger Objects</a></li> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#logging-levels">Logging Levels</a></li> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#handler-objects">Handler Objects</a></li> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#formatter-objects">Formatter Objects</a></li> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#filter-objects">Filter Objects</a></li> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#logrecord-objects">LogRecord Objects</a></li> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#logrecord-attributes">LogRecord attributes</a></li> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#loggeradapter-objects">LoggerAdapter Objects</a></li> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#thread-safety">Thread Safety</a></li> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#module-level-functions">Module-Level Functions</a></li> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#module-level-attributes">Module-Level Attributes</a></li> +<li><a class="reference internal" href="https://docs.python.org/3/library/logging.html#integration-with-the-warnings-module">Integration with the warnings module</a></li> +</ul> +</li> +</ul> + + </div> + <div> + <h4>Previous topic</h4> + <p class="topless"><a href="https://docs.python.org/3/library/getopt.html" title="previous chapter"><code class="xref py py-mod docutils literal notranslate"><span class="pre">getopt</span></code> — C-style parser for command line options</a></p> + </div> + <div> + <h4>Next topic</h4> + <p class="topless"><a href="https://docs.python.org/3/library/logging.config.html" title="next chapter"><code class="xref py py-mod docutils literal notranslate"><span class="pre">logging.config</span></code> — Logging configuration</a></p> + </div> + <div role="note" aria-label="source link"> + <h3>This Page</h3> + <ul class="this-page-menu"> + <li><a href="https://docs.python.org/3/bugs.html">Report a Bug</a></li> + <li> + <a href="https://github.com/python/cpython/blob/3.11/Doc/library/logging.rst" rel="nofollow">Show Source + </a> + </li> + </ul> + </div> + </div> + <div id="sidebarbutton"><span>«</span></div></div> + <div class="clearer"></div> + </div> + <div class="related" role="navigation" aria-label="related navigation"> + <h3>Navigation</h3> + <ul> + <li class="right" style="margin-right: 10px"> + <a href="https://docs.python.org/3/genindex.html" title="General Index">index</a></li> + <li class="right"> + <a href="https://docs.python.org/3/py-modindex.html" title="Python Module Index">modules</a> |</li> + <li class="right"> + <a href="https://docs.python.org/3/library/logging.config.html" title="logging.config — Logging configuration">next</a> |</li> + <li class="right"> + <a href="https://docs.python.org/3/library/getopt.html" title="getopt — C-style parser for command line options">previous</a> |</li> + + <li><img src="./test_files/py.svg" alt="python logo" style="vertical-align: middle; margin-top: -1px"></li> + <li><a href="https://www.python.org/">Python</a> »</li> + <li class="switchers"> + <div class="language_switcher_placeholder"><select id="language_select"><option value="en" selected="selected">English</option><option value="es">Spanish</option><option value="fr">French</option><option value="ja">Japanese</option><option value="ko">Korean</option><option value="pt-br">Brazilian Portuguese</option><option value="tr">Turkish</option><option value="zh-cn">Simplified Chinese</option><option value="zh-tw">Traditional Chinese</option></select></div> + <div class="version_switcher_placeholder"><select id="version_select"><option value="3.13">dev (3.13)</option><option value="3.12">pre (3.12)</option><option value="3.11" selected="selected">3.11.5</option><option value="3.10">3.10</option><option value="3.9">3.9</option><option value="3.8">3.8</option><option value="3.7">3.7</option><option value="3.6">3.6</option><option value="3.5">3.5</option><option value="2.7">2.7</option></select></div> + </li> + <li> + + </li> + <li id="cpython-language-and-version"> + <a href="https://docs.python.org/3/index.html">3.11.5 Documentation</a> » + </li> + + <li class="nav-item nav-item-1"><a href="https://docs.python.org/3/library/index.html">The Python Standard Library</a> »</li> + <li class="nav-item nav-item-2"><a href="https://docs.python.org/3/library/allos.html">Generic Operating System Services</a> »</li> + <li class="nav-item nav-item-this"><a href="https://docs.python.org/3/library/logging.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">logging</span></code> — Logging facility for Python</a></li> + <li class="right"> + + + <div class="inline-search" role="search"> + <form class="inline-search" action="https://docs.python.org/3/search.html" method="get"> + <input placeholder="Quick search" aria-label="Quick search" type="search" name="q"> + <input type="submit" value="Go"> + </form> + </div> + | + </li> + <li class="right"> +<label class="theme-selector-label"> + Theme + <select class="theme-selector" oninput="activateTheme(this.value)"> + <option value="auto" selected="">Auto</option> + <option value="light">Light</option> + <option value="dark">Dark</option> + </select> +</label> |</li> + + </ul> + </div> + <div class="footer"> + © <a href="https://docs.python.org/3/copyright.html">Copyright</a> 2001-2023, Python Software Foundation. + <br> + This page is licensed under the Python Software Foundation License Version 2. + <br> + Examples, recipes, and other code in the documentation are additionally licensed under the Zero Clause BSD License. + <br> + See <a href="https://docs.python.org/license.html">History and License</a> for more information.<br> + <br> + + The Python Software Foundation is a non-profit corporation. +<a href="https://www.python.org/psf/donations/">Please donate.</a> +<br> + <br> + + Last updated on Sep 14, 2023. + <a href="https://docs.python.org/bugs.html">Found a bug</a>? + <br> + + Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 4.5.0. + </div> + + <script type="text/javascript" src="./test_files/switchers.js.download"></script> + +<div id="hl-aria-live-message-container" aria-live="polite" class="visually-hidden"></div><div id="hl-aria-live-alert-container" role="alert" aria-live="assertive" class="visually-hidden"></div></body><grammarly-desktop-integration data-grammarly-shadow-root="true"><template shadowrootmode="open"><style> + div.grammarly-desktop-integration { + position: absolute; + width: 1px; + height: 1px; + padding: 0; + margin: -1px; + overflow: hidden; + clip: rect(0, 0, 0, 0); + white-space: nowrap; + border: 0; + -moz-user-select: none; + -webkit-user-select: none; + -ms-user-select:none; + user-select:none; + } + + div.grammarly-desktop-integration:before { + content: attr(data-content); + } + </style><div aria-label="grammarly-integration" role="group" tabindex="-1" class="grammarly-desktop-integration" data-content="{"mode":"limited","isActive":false,"isUserDisabled":false}"></div></template></grammarly-desktop-integration></html> \ No newline at end of file diff --git a/applications/ColossalQA/data/tests/test.md b/applications/ColossalQA/data/tests/test.md new file mode 100644 index 000000000..20d3c612f --- /dev/null +++ b/applications/ColossalQA/data/tests/test.md @@ -0,0 +1,78 @@ +# README Format File for Testing + + +## Table of Contents + +- [Table of Contents](#table-of-contents) +- [Install](#install) +- [How to Use](#how-to-use) +- Examples + - [Local Chinese Retrieval QA + Chat](examples/retrieval_conversation_zh.py) + - [Local English Retrieval QA + Chat](examples/retrieval_conversation_en.py) + - [Local Bi-lingual Retrieval QA + Chat](examples/retrieval_conversation_universal.py) + - [Experimental AI Agent Based on Chatgpt + Chat](examples/conversation_agent_chatgpt.py) + +**As Colossal-AI is undergoing some major updates, this project will be actively maintained to stay in line with the Colossal-AI project.** + +## Install + +Install colossalqa +```bash +# python==3.8.17 +cd ColossalAI/applications/ColossalQA +pip install -e . +``` + +To use the vllm server, please refer to the official guide [here](https://github.com/vllm-project/vllm/tree/main) for installation instruction. Simply run the following command from another terminal. +```bash +cd ./vllm/entrypoints +python api_server.py --host localhost --port $PORT_NUMBER --model $PATH_TO_MODEL --swap-space $SWAP_SPACE_IN_GB +``` + +## How to use + +### Collect your data + +For ChatGPT based Agent we support document retrieval and simple sql search. +If you want to run the demo locally, we provided document retrieval based conversation system built upon langchain. It accept a wide range of documents. + +Read comments under ./colossalqa/data_loader for more detail + +### Serving +Currently use vllm will replace with colossal inference when ready. Please refer class VllmLLM. + +### Run the script + +We provided scripts for Chinese document retrieval based conversation system, English document retrieval based conversation system, Bi-lingual document retrieval based conversation system and an experimental AI agent with document retrieval and SQL query functionality. + +To run the bi-lingual scripts, set the following environmental variables before running the script. +```bash +export ZH_MODEL_PATH=XXX +export ZH_MODEL_NAME: chatglm2 +export EN_MODEL_PATH: XXX +export EN_MODEL_NAME: llama +python retrieval_conversation_universal.py +``` + +To run retrieval_conversation_en.py. set the following environmental variables. +```bash +export EN_MODEL_PATH=XXX +export EN_MODEL_NAME: llama +python retrieval_conversation_en.py +``` + +To run retrieval_conversation_zh.py. set the following environmental variables. +```bash +export ZH_MODEL_PATH=XXX +export ZH_MODEL_NAME: chatglm2 +python retrieval_conversation_en.py +``` + +It will ask you to provide the path to your data during the execution of the script. You can also pass a glob path to load multiple files at once. If csv files are provided, please use ',' as delimiter and '"' as quotation mark. There are no other formatting constraints for loading documents type files. For loading table type files, we use pandas, please refer to [Pandas-Input/Output](https://pandas.pydata.org/pandas-docs/stable/reference/io.html) for file format details. + +## The Plan + +- [x] build document retrieval QA tool +- [x] Add long + short term memory +- [x] Add demo for AI agent with SQL query +- [x] Add customer retriever for fast construction and retrieving (with incremental mode) diff --git a/applications/ColossalQA/data/tests/test.txt b/applications/ColossalQA/data/tests/test.txt new file mode 100644 index 000000000..ff5bf2dc7 --- /dev/null +++ b/applications/ColossalQA/data/tests/test.txt @@ -0,0 +1,38 @@ +Your Name +Lorem ipsum dolor sit amet, consectetuer adipiscing elit + 123 Your Street +Your City, ST 12345 +(123) 456-7890 +no_reply@example.com + EXPERIENCE +Company, Location — Job Title +MONTH 20XX - PRESENT +Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh. +Company, Location — Job Title +MONTH 20XX - MONTH 20XX +Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh. +Company, Location — Job Title +MONTH 20XX - MONTH 20XX +Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh. +EDUCATION +School Name, Location — Degree +MONTH 20XX - MONTH 20XX +Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore. +School Name, Location — Degree +MONTH 20XX - MONTH 20XX +Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam. +PROJECTS +Project Name — Detail +Lorem ipsum dolor sit amet, consectetuer adipiscing elit. + SKILLS +* Lorem ipsum dolor sit amet. +* Consectetuer adipiscing elit. +* Sed diam nonummy nibh euismod tincidunt. +* Laoreet dolore magna aliquam erat volutpat. +AWARDS +Lorem ipsum dolor sit amet Consectetuer adipiscing elit, Sed diam nonummy +Nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. +Lorem ipsum dolor sit amet Consectetuer adipiscing elit, Sed diam nonummy +Nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. +LANGUAGES +Lorem ipsum, Dolor sit amet, Consectetuer \ No newline at end of file diff --git a/applications/ColossalQA/examples/conversation_agent_chatgpt.py b/applications/ColossalQA/examples/conversation_agent_chatgpt.py new file mode 100644 index 000000000..cb5c1a31d --- /dev/null +++ b/applications/ColossalQA/examples/conversation_agent_chatgpt.py @@ -0,0 +1,125 @@ +""" +Script for the multilingual conversation based experimental AI agent +We used ChatGPT as the language model +You need openai api key to run this script +""" + +import argparse +import os + +from colossalqa.data_loader.document_loader import DocumentLoader +from colossalqa.data_loader.table_dataloader import TableLoader +from langchain import LLMChain, OpenAI +from langchain.agents import Tool, ZeroShotAgent +from langchain.agents.agent import AgentExecutor +from langchain.agents.agent_toolkits import create_retriever_tool +from langchain.embeddings.openai import OpenAIEmbeddings +from langchain.llms import OpenAI +from langchain.memory import ChatMessageHistory, ConversationBufferMemory +from langchain.memory.chat_memory import ChatMessageHistory +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain.utilities import SQLDatabase +from langchain.vectorstores import Chroma +from langchain_experimental.sql import SQLDatabaseChain + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Experimental AI agent powered by ChatGPT") + parser.add_argument("--open_ai_key_path", type=str, default=None, help="path to the plain text open_ai_key file") + + args = parser.parse_args() + + # Setup openai key + # Set env var OPENAI_API_KEY or load from a file + openai_key = open(args.open_ai_key_path).read() + os.environ["OPENAI_API_KEY"] = openai_key + + # Load data served on sql + print("Select files for constructing sql database") + tools = [] + + llm = OpenAI(temperature=0.0) + + while True: + file = input("Select a file to load or press Enter to exit:") + if file == "": + break + data_name = input("Enter a short description of the data:") + + table_loader = TableLoader( + [[file, data_name.replace(" ", "_")]], sql_path=f"sqlite:///{data_name.replace(' ', '_')}.db" + ) + sql_path = table_loader.get_sql_path() + + # Create sql database + db = SQLDatabase.from_uri(sql_path) + print(db.get_table_info()) + + db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True) + name = f"Query the SQL database regarding {data_name}" + description = ( + f"useful for when you need to answer questions based on data stored on a SQL database regarding {data_name}" + ) + tools.append( + Tool( + name=name, + func=db_chain.run, + description=description, + ) + ) + print(f"Added sql dataset\n\tname={name}\n\tdescription:{description}") + + # VectorDB + embedding = OpenAIEmbeddings() + + # Load data serve on sql + print("Select files for constructing retriever") + while True: + file = input("Select a file to load or press Enter to exit:") + if file == "": + break + data_name = input("Enter a short description of the data:") + retriever_data = DocumentLoader([[file, data_name.replace(" ", "_")]]).all_data + + # Split + text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=0) + splits = text_splitter.split_documents(retriever_data) + + # Create vector store + vectordb = Chroma.from_documents(documents=splits, embedding=embedding) + # Create retriever + retriever = vectordb.as_retriever( + search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.5, "k": 5} + ) + # Add to tool chain + name = f"Searches and returns documents regarding {data_name}." + tools.append(create_retriever_tool(retriever, data_name, name)) + + prefix = """Have a conversation with a human, answering the following questions as best you can. You have access to the following tools. If none of the tools can be used to answer the question. Do not share uncertain answer unless you think answering the question doesn't need any background information. In that case, try to answer the question directly.""" + suffix = """You are provided with the following background knowledge: + Begin!" + + {chat_history} + Question: {input} + {agent_scratchpad}""" + + prompt = ZeroShotAgent.create_prompt( + tools, + prefix=prefix, + suffix=suffix, + input_variables=["input", "chat_history", "agent_scratchpad"], + ) + + memory = ConversationBufferMemory(memory_key="chat_history", chat_memory=ChatMessageHistory()) + + llm_chain = LLMChain(llm=OpenAI(temperature=0.7), prompt=prompt) + agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True) + agent_chain = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory) + + while True: + user_input = input("User: ") + if " end " in user_input: + print("Agent: Happy to chat with you :)") + break + agent_response = agent_chain.run(user_input) + print(f"Agent: {agent_response}") + table_loader.sql_engine.dispose() diff --git a/applications/ColossalQA/examples/retrieval_conversation_chatgpt.py b/applications/ColossalQA/examples/retrieval_conversation_chatgpt.py new file mode 100644 index 000000000..00b920d27 --- /dev/null +++ b/applications/ColossalQA/examples/retrieval_conversation_chatgpt.py @@ -0,0 +1,131 @@ +""" +Multilingual retrieval based conversation system backed by ChatGPT +""" + +import argparse +import os + +from colossalqa.data_loader.document_loader import DocumentLoader +from colossalqa.memory import ConversationBufferWithSummary +from colossalqa.retriever import CustomRetriever +from langchain import LLMChain +from langchain.chains import RetrievalQA +from langchain.embeddings import HuggingFaceEmbeddings +from langchain.llms import OpenAI +from langchain.prompts.prompt import PromptTemplate +from langchain.text_splitter import RecursiveCharacterTextSplitter + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Multilingual retrieval based conversation system backed by ChatGPT") + parser.add_argument("--open_ai_key_path", type=str, default=None, help="path to the model") + parser.add_argument( + "--sql_file_path", type=str, default=None, help="path to the a empty folder for storing sql files for indexing" + ) + + args = parser.parse_args() + + if not os.path.exists(args.sql_file_path): + os.makedirs(args.sql_file_path) + + # Setup openai key + # Set env var OPENAI_API_KEY or load from a file + openai_key = open(args.open_ai_key_path).read() + os.environ["OPENAI_API_KEY"] = openai_key + + llm = OpenAI(temperature=0.6) + + information_retriever = CustomRetriever(k=3, sql_file_path=args.sql_file_path, verbose=True) + # VectorDB + embedding = HuggingFaceEmbeddings( + model_name="moka-ai/m3e-base", model_kwargs={"device": "cpu"}, encode_kwargs={"normalize_embeddings": False} + ) + + # Define memory with summarization ability + memory = ConversationBufferWithSummary(llm=llm) + + # Load data to vector store + print("Select files for constructing retriever") + documents = [] + while True: + file = input("Enter a file path or press Enter directory without input to exit:").strip() + if file == "": + break + data_name = input("Enter a short description of the data:") + retriever_data = DocumentLoader([[file, data_name.replace(" ", "_")]]).all_data + + # Split + text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=0) + splits = text_splitter.split_documents(retriever_data) + documents.extend(splits) + # Create retriever + information_retriever.add_documents(docs=documents, cleanup="incremental", mode="by_source", embedding=embedding) + + prompt_template = """Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. + If the answer cannot be infered based on the given context, please don't share false information. + Use the context and chat history to respond to the human's input at the end or carry on the conversation. You should generate one response only. No following up is needed. + + context: + {context} + + chat history + {chat_history} + + Human: {question} + Assistant:""" + + prompt_template_disambiguate = """You are a helpful, respectful and honest assistant. You always follow the instruction. + Please replace any ambiguous references in the given sentence with the specific names or entities mentioned in the chat history or just output the original sentence if no chat history is provided or if the sentence doesn't contain ambiguous references. Your output should be the disambiguated sentence itself (in the same line as "disambiguated sentence:") and contain nothing else. + + Here is an example: + Chat history: + Human: I have a friend, Mike. Do you know him? + Assistant: Yes, I know a person named Mike + + sentence: What's his favorite food? + disambiguated sentence: What's Mike's favorite food? + END OF EXAMPLE + + Chat history: + {chat_history} + + sentence: {input} + disambiguated sentence:""" + + PROMPT = PromptTemplate(template=prompt_template, input_variables=["question", "chat_history", "context"]) + + memory.initiate_document_retrieval_chain( + llm, + PROMPT, + information_retriever, + chain_type_kwargs={ + "chat_history": "", + }, + ) + + PROMPT_DISAMBIGUATE = PromptTemplate( + template=prompt_template_disambiguate, input_variables=["chat_history", "input"] + ) + + llm_chain = RetrievalQA.from_chain_type( + llm=llm, + verbose=False, + chain_type="stuff", + retriever=information_retriever, + chain_type_kwargs={"prompt": PROMPT, "memory": memory}, + ) + llm_chain_disambiguate = LLMChain(llm=llm, prompt=PROMPT_DISAMBIGUATE) + + def disambiguity(input): + out = llm_chain_disambiguate.run({"input": input, "chat_history": memory.buffer}) + return out.split("\n")[0] + + information_retriever.set_rephrase_handler(disambiguity) + + while True: + user_input = input("User: ") + if " end " in user_input: + print("Agent: Happy to chat with you :)") + break + agent_response = llm_chain.run(user_input) + agent_response = agent_response.split("\n")[0] + print(f"Agent: {agent_response}") diff --git a/applications/ColossalQA/examples/retrieval_conversation_en.py b/applications/ColossalQA/examples/retrieval_conversation_en.py new file mode 100644 index 000000000..e0fe46ae6 --- /dev/null +++ b/applications/ColossalQA/examples/retrieval_conversation_en.py @@ -0,0 +1,119 @@ +""" +Script for English retrieval based conversation system backed by LLaMa2 +""" +import argparse +import os + +from colossalqa.chain.retrieval_qa.base import RetrievalQA +from colossalqa.data_loader.document_loader import DocumentLoader +from colossalqa.local.llm import ColossalAPI, ColossalLLM +from colossalqa.memory import ConversationBufferWithSummary +from colossalqa.prompt.prompt import ( + EN_RETRIEVAL_QA_REJECTION_ANSWER, + EN_RETRIEVAL_QA_TRIGGER_KEYWORDS, + PROMPT_DISAMBIGUATE_EN, + PROMPT_RETRIEVAL_QA_EN, +) +from colossalqa.retriever import CustomRetriever +from langchain import LLMChain +from langchain.embeddings import HuggingFaceEmbeddings +from langchain.text_splitter import RecursiveCharacterTextSplitter + +if __name__ == "__main__": + # Parse arguments + parser = argparse.ArgumentParser(description="English retrieval based conversation system backed by LLaMa2") + parser.add_argument("--model_path", type=str, default=None, help="path to the model") + parser.add_argument("--model_name", type=str, default=None, help="name of the model") + parser.add_argument( + "--sql_file_path", type=str, default=None, help="path to the a empty folder for storing sql files for indexing" + ) + + args = parser.parse_args() + if not os.path.exists(args.sql_file_path): + os.makedirs(args.sql_file_path) + + colossal_api = ColossalAPI.get_api(args.model_name, args.model_path) + llm = ColossalLLM(n=1, api=colossal_api) + + # Define the retriever + information_retriever = CustomRetriever(k=3, sql_file_path=args.sql_file_path, verbose=True) + + # Setup embedding model locally + embedding = HuggingFaceEmbeddings( + model_name="moka-ai/m3e-base", model_kwargs={"device": "cpu"}, encode_kwargs={"normalize_embeddings": False} + ) + + # Define memory with summarization ability + memory = ConversationBufferWithSummary( + llm=llm, max_tokens=2000, llm_kwargs={"max_new_tokens": 50, "temperature": 0.6, "do_sample": True} + ) + + # Define the chain to preprocess the input + # Disambiguate the input. e.g. "What is the capital of that country?" -> "What is the capital of France?" + llm_chain_disambiguate = LLMChain( + llm=llm, prompt=PROMPT_DISAMBIGUATE_EN, llm_kwargs={"max_new_tokens": 30, "temperature": 0.6, "do_sample": True} + ) + + def disambiguity(input): + out = llm_chain_disambiguate.run(input=input, chat_history=memory.buffer, stop=["\n"]) + return out.split("\n")[0] + + # Load data to vector store + print("Select files for constructing retriever") + documents = [] + while True: + file = input("Enter a file path or press Enter directory without input to exit:").strip() + if file == "": + break + data_name = input("Enter a short description of the data:") + separator = input( + "Enter a separator to force separating text into chunks, if no separator is given, the defaut separator is '\\n\\n'. Note that" + + "we use neural text spliter to split texts into chunks, the seperator only serves as a delimiter to force split long passage into" + + " chunks before passing to the neural network. Press ENTER directly to skip:" + ) + separator = separator if separator != "" else "\n\n" + retriever_data = DocumentLoader([[file, data_name.replace(" ", "_")]]).all_data + + # Split + text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20) + splits = text_splitter.split_documents(retriever_data) + documents.extend(splits) + # Create retriever + information_retriever.add_documents(docs=documents, cleanup="incremental", mode="by_source", embedding=embedding) + + # Set document retrieval chain, we need this chain to calculate prompt length + memory.initiate_document_retrieval_chain( + llm, + PROMPT_RETRIEVAL_QA_EN, + information_retriever, + chain_type_kwargs={ + "chat_history": "", + }, + ) + + # Define retrieval chain + retrieval_chain = RetrievalQA.from_chain_type( + llm=llm, + verbose=False, + chain_type="stuff", + retriever=information_retriever, + chain_type_kwargs={"prompt": PROMPT_RETRIEVAL_QA_EN, "memory": memory}, + llm_kwargs={"max_new_tokens": 50, "temperature": 0.75, "do_sample": True}, + ) + # Set disambiguity handler + information_retriever.set_rephrase_handler(disambiguity) + + # Start conversation + while True: + user_input = input("User: ") + if "END" == user_input: + print("Agent: Happy to chat with you :)") + break + agent_response = retrieval_chain.run( + query=user_input, + stop=["Human: "], + rejection_trigger_keywrods=EN_RETRIEVAL_QA_TRIGGER_KEYWORDS, + rejection_answer=EN_RETRIEVAL_QA_REJECTION_ANSWER, + ) + agent_response = agent_response.split("\n")[0] + print(f"Agent: {agent_response}") diff --git a/applications/ColossalQA/examples/retrieval_conversation_en_customer_service.py b/applications/ColossalQA/examples/retrieval_conversation_en_customer_service.py new file mode 100644 index 000000000..d98a75592 --- /dev/null +++ b/applications/ColossalQA/examples/retrieval_conversation_en_customer_service.py @@ -0,0 +1,149 @@ +""" +Script for English retrieval based conversation system backed by LLaMa2 +""" +import argparse +import json +import os + +from colossalqa.chain.retrieval_qa.base import RetrievalQA +from colossalqa.data_loader.document_loader import DocumentLoader +from colossalqa.local.llm import ColossalAPI, ColossalLLM +from colossalqa.memory import ConversationBufferWithSummary +from colossalqa.prompt.prompt import ( + EN_RETRIEVAL_QA_REJECTION_ANSWER, + EN_RETRIEVAL_QA_TRIGGER_KEYWORDS, + PROMPT_DISAMBIGUATE_EN, + PROMPT_RETRIEVAL_QA_EN, +) +from colossalqa.retriever import CustomRetriever +from langchain import LLMChain +from langchain.embeddings import HuggingFaceEmbeddings +from langchain.text_splitter import RecursiveCharacterTextSplitter + +if __name__ == "__main__": + # Parse arguments + parser = argparse.ArgumentParser(description="English retrieval based conversation system backed by LLaMa2") + parser.add_argument("--model_path", type=str, default=None, help="path to the model") + parser.add_argument("--model_name", type=str, default=None, help="name of the model") + parser.add_argument( + "--sql_file_path", type=str, default=None, help="path to the a empty folder for storing sql files for indexing" + ) + + args = parser.parse_args() + + if not os.path.exists(args.sql_file_path): + os.makedirs(args.sql_file_path) + + colossal_api = ColossalAPI.get_api(args.model_name, args.model_path) + llm = ColossalLLM(n=1, api=colossal_api) + + # Define the retriever + information_retriever = CustomRetriever(k=3, sql_file_path=args.sql_file_path, verbose=True) + + # Setup embedding model locally + embedding = HuggingFaceEmbeddings( + model_name="moka-ai/m3e-base", model_kwargs={"device": "cpu"}, encode_kwargs={"normalize_embeddings": False} + ) + + # Define memory with summarization ability + memory = ConversationBufferWithSummary( + llm=llm, max_tokens=2000, llm_kwargs={"max_new_tokens": 50, "temperature": 0.6, "do_sample": True} + ) + + # Define the chain to preprocess the input + # Disambiguate the input. e.g. "What is the capital of that country?" -> "What is the capital of France?" + llm_chain_disambiguate = LLMChain( + llm=llm, prompt=PROMPT_DISAMBIGUATE_EN, llm_kwargs={"max_new_tokens": 30, "temperature": 0.6, "do_sample": True} + ) + + def disambiguity(input): + out = llm_chain_disambiguate.run(input=input, chat_history=memory.buffer, stop=["\n"]) + return out.split("\n")[0] + + # Load data to vector store + print("Select files for constructing retriever") + documents = [] + + # preprocess data + if not os.path.exists("../data/data_sample/custom_service_preprocessed.json"): + if not os.path.exists("../data/data_sample/custom_service.json"): + raise ValueError( + "custom_service.json not found, please download the data from HuggingFace Datasets: qgyd2021/e_commerce_customer_service" + ) + data = json.load(open("../data/data_sample/custom_service.json", "r", encoding="utf8")) + preprocessed = [] + for row in data["rows"]: + preprocessed.append({"key": row["row"]["query"], "value": row["row"]["response"]}) + data = {} + data["data"] = preprocessed + with open("../data/data_sample/custom_service_preprocessed.json", "w", encoding="utf8") as f: + json.dump(data, f, ensure_ascii=False) + + # define metadata function which is used to format the prompt with value in metadata instead of key, + # the later is langchain's default behavior + def metadata_func(data_sample, additional_fields): + """ + metadata_func (Callable[Dict, Dict]): A function that takes in the JSON + object extracted by the jq_schema and the default metadata and returns + a dict of the updated metadata. + + To use key-value format, the metadata_func should be defined as follows: + metadata = {'value': 'a string to be used to format the prompt', 'is_key_value_mapping': True} + """ + metadata = {} + metadata["value"] = f"Question: {data_sample['key']}\nAnswer:{data_sample['value']}" + metadata["is_key_value_mapping"] = True + assert "value" not in additional_fields + assert "is_key_value_mapping" not in additional_fields + metadata.update(additional_fields) + return metadata + + retriever_data = DocumentLoader( + [["../data/data_sample/custom_service_preprocessed.json", "CustomerServiceDemo"]], + content_key="key", + metadata_func=metadata_func, + ).all_data + + # Split + text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20) + splits = text_splitter.split_documents(retriever_data) + documents.extend(splits) + + # Create retriever + information_retriever.add_documents(docs=documents, cleanup="incremental", mode="by_source", embedding=embedding) + + # Set document retrieval chain, we need this chain to calculate prompt length + memory.initiate_document_retrieval_chain( + llm, + PROMPT_RETRIEVAL_QA_EN, + information_retriever, + chain_type_kwargs={ + "chat_history": "", + }, + ) + + # Define retrieval chain + retrieval_chain = RetrievalQA.from_chain_type( + llm=llm, + verbose=False, + chain_type="stuff", + retriever=information_retriever, + chain_type_kwargs={"prompt": PROMPT_RETRIEVAL_QA_EN, "memory": memory}, + llm_kwargs={"max_new_tokens": 50, "temperature": 0.75, "do_sample": True}, + ) + # Set disambiguity handler + information_retriever.set_rephrase_handler(disambiguity) + # Start conversation + while True: + user_input = input("User: ") + if "END" == user_input: + print("Agent: Happy to chat with you :)") + break + agent_response = retrieval_chain.run( + query=user_input, + stop=["Human: "], + rejection_trigger_keywrods=EN_RETRIEVAL_QA_TRIGGER_KEYWORDS, + rejection_answer=EN_RETRIEVAL_QA_REJECTION_ANSWER, + ) + agent_response = agent_response.split("\n")[0] + print(f"Agent: {agent_response}") diff --git a/applications/ColossalQA/examples/retrieval_conversation_universal.py b/applications/ColossalQA/examples/retrieval_conversation_universal.py new file mode 100644 index 000000000..361aa9833 --- /dev/null +++ b/applications/ColossalQA/examples/retrieval_conversation_universal.py @@ -0,0 +1,22 @@ +import argparse +from colossalqa.retrieval_conversation_universal import UniversalRetrievalConversation + +if __name__ == '__main__': + # Parse arguments + parser = argparse.ArgumentParser() + parser.add_argument('--en_model_path', type=str, default=None) + parser.add_argument('--zh_model_path', type=str, default=None) + parser.add_argument('--zh_model_name', type=str, default=None) + parser.add_argument('--en_model_name', type=str, default=None) + parser.add_argument('--sql_file_path', type=str, default=None, help='path to the a empty folder for storing sql files for indexing') + args = parser.parse_args() + + # Will ask for documents path in runnning time + session = UniversalRetrievalConversation(files_en=None, + files_zh=None, + zh_model_path=args.zh_model_path, en_model_path=args.en_model_path, + zh_model_name=args.zh_model_name, en_model_name=args.en_model_name, + sql_file_path=args.sql_file_path + ) + session.start_test_session() + \ No newline at end of file diff --git a/applications/ColossalQA/examples/retrieval_conversation_zh.py b/applications/ColossalQA/examples/retrieval_conversation_zh.py new file mode 100644 index 000000000..cbbbefad7 --- /dev/null +++ b/applications/ColossalQA/examples/retrieval_conversation_zh.py @@ -0,0 +1,113 @@ +""" +Script for Chinese retrieval based conversation system backed by ChatGLM +""" +import argparse +import os + +from colossalqa.chain.retrieval_qa.base import RetrievalQA +from colossalqa.data_loader.document_loader import DocumentLoader +from colossalqa.local.llm import ColossalAPI, ColossalLLM +from colossalqa.memory import ConversationBufferWithSummary +from colossalqa.prompt.prompt import ( + PROMPT_DISAMBIGUATE_ZH, + PROMPT_RETRIEVAL_QA_ZH, + SUMMARY_PROMPT_ZH, + ZH_RETRIEVAL_QA_REJECTION_ANSWER, + ZH_RETRIEVAL_QA_TRIGGER_KEYWORDS, +) +from colossalqa.retriever import CustomRetriever +from colossalqa.text_splitter import ChineseTextSplitter +from langchain import LLMChain +from langchain.embeddings import HuggingFaceEmbeddings + +if __name__ == "__main__": + # Parse arguments + parser = argparse.ArgumentParser(description="Chinese retrieval based conversation system backed by ChatGLM2") + parser.add_argument("--model_path", type=str, default=None, help="path to the model") + parser.add_argument("--model_name", type=str, default=None, help="name of the model") + parser.add_argument( + "--sql_file_path", type=str, default=None, help="path to the a empty folder for storing sql files for indexing" + ) + + args = parser.parse_args() + + if not os.path.exists(args.sql_file_path): + os.makedirs(args.sql_file_path) + + colossal_api = ColossalAPI.get_api(args.model_name, args.model_path) + llm = ColossalLLM(n=1, api=colossal_api) + + # Setup embedding model locally + embedding = HuggingFaceEmbeddings( + model_name="moka-ai/m3e-base", model_kwargs={"device": "cpu"}, encode_kwargs={"normalize_embeddings": False} + ) + # Define the retriever + information_retriever = CustomRetriever(k=3, sql_file_path=args.sql_file_path, verbose=True) + + # Define memory with summarization ability + memory = ConversationBufferWithSummary( + llm=llm, + prompt=SUMMARY_PROMPT_ZH, + human_prefix="用户", + ai_prefix="Assistant", + max_tokens=2000, + llm_kwargs={"max_new_tokens": 50, "temperature": 0.6, "do_sample": True}, + ) + + # Define the chain to preprocess the input + # Disambiguate the input. e.g. "What is the capital of that country?" -> "What is the capital of France?" + llm_chain_disambiguate = LLMChain( + llm=llm, prompt=PROMPT_DISAMBIGUATE_ZH, llm_kwargs={"max_new_tokens": 30, "temperature": 0.6, "do_sample": True} + ) + + def disambiguity(input: str): + out = llm_chain_disambiguate.run(input=input, chat_history=memory.buffer, stop=["\n"]) + return out.split("\n")[0] + + # Load data to vector store + print("Select files for constructing retriever") + documents = [] + while True: + file = input("Enter a file path or press Enter directory without input to exit:").strip() + if file == "": + break + data_name = input("Enter a short description of the data:") + retriever_data = DocumentLoader([[file, data_name.replace(" ", "_")]]).all_data + + # Split + text_splitter = ChineseTextSplitter() + splits = text_splitter.split_documents(retriever_data) + documents.extend(splits) + # Create retriever + information_retriever.add_documents(docs=documents, cleanup="incremental", mode="by_source", embedding=embedding) + + # Set document retrieval chain, we need this chain to calculate prompt length + memory.initiate_document_retrieval_chain(llm, PROMPT_RETRIEVAL_QA_ZH, information_retriever) + + # Define retrieval chain + llm_chain = RetrievalQA.from_chain_type( + llm=llm, + verbose=False, + chain_type="stuff", + retriever=information_retriever, + chain_type_kwargs={"prompt": PROMPT_RETRIEVAL_QA_ZH, "memory": memory}, + llm_kwargs={"max_new_tokens": 150, "temperature": 0.6, "do_sample": True}, + ) + + # Set disambiguity handler + information_retriever.set_rephrase_handler(disambiguity) + + # Start conversation + while True: + user_input = input("User: ") + if "END" == user_input: + print("Agent: Happy to chat with you :)") + break + agent_response = llm_chain.run( + query=user_input, + stop=["</答案>"], + doc_prefix="支持文档", + rejection_trigger_keywrods=ZH_RETRIEVAL_QA_TRIGGER_KEYWORDS, + rejection_answer=ZH_RETRIEVAL_QA_REJECTION_ANSWER, + ) + print(f"Agent: {agent_response}") diff --git a/applications/ColossalQA/examples/retrieval_intent_classification_zh_customer_service.py b/applications/ColossalQA/examples/retrieval_intent_classification_zh_customer_service.py new file mode 100644 index 000000000..adb654494 --- /dev/null +++ b/applications/ColossalQA/examples/retrieval_intent_classification_zh_customer_service.py @@ -0,0 +1,97 @@ +""" +Script for English retrieval based conversation system backed by LLaMa2 +""" +import argparse +import os + +from colossalqa.chain.retrieval_qa.base import RetrievalQA +from colossalqa.data_loader.document_loader import DocumentLoader +from colossalqa.local.llm import ColossalAPI, ColossalLLM +from colossalqa.prompt.prompt import PROMPT_RETRIEVAL_CLASSIFICATION_USE_CASE_ZH +from colossalqa.retriever import CustomRetriever +from colossalqa.text_splitter import ChineseTextSplitter +from langchain.embeddings import HuggingFaceEmbeddings + +if __name__ == "__main__": + # Parse arguments + parser = argparse.ArgumentParser(description="English retrieval based conversation system backed by LLaMa2") + parser.add_argument("--model_path", type=str, default=None, help="path to the model") + parser.add_argument("--model_name", type=str, default=None, help="name of the model") + parser.add_argument( + "--sql_file_path", type=str, default=None, help="path to the a empty folder for storing sql files for indexing" + ) + + args = parser.parse_args() + + if not os.path.exists(args.sql_file_path): + os.makedirs(args.sql_file_path) + + colossal_api = ColossalAPI.get_api(args.model_name, args.model_path) + llm = ColossalLLM(n=1, api=colossal_api) + + # Define the retriever + information_retriever = CustomRetriever(k=2, sql_file_path=args.sql_file_path, verbose=True) + + # Setup embedding model locally + embedding = HuggingFaceEmbeddings( + model_name="moka-ai/m3e-base", model_kwargs={"device": "cpu"}, encode_kwargs={"normalize_embeddings": False} + ) + + # Load data to vector store + print("Select files for constructing retriever") + documents = [] + + # define metadata function which is used to format the prompt with value in metadata instead of key, + # the later is langchain's default behavior + def metadata_func(data_sample, additional_fields): + """ + metadata_func (Callable[Dict, Dict]): A function that takes in the JSON + object extracted by the jq_schema and the default metadata and returns + a dict of the updated metadata. + + To use key-value format, the metadata_func should be defined as follows: + metadata = {'value': 'a string to be used to format the prompt', 'is_key_value_mapping': True} + """ + metadata = {} + metadata["value"] = f"Question: {data_sample['key']}\nAnswer:{data_sample['value']}" + metadata["is_key_value_mapping"] = True + assert "value" not in additional_fields + assert "is_key_value_mapping" not in additional_fields + metadata.update(additional_fields) + return metadata + + retriever_data = DocumentLoader( + [["../data/data_sample/custom_service_classification.json", "CustomerServiceDemo"]], + content_key="key", + metadata_func=metadata_func, + ).all_data + + # Split + text_splitter = ChineseTextSplitter() + splits = text_splitter.split_documents(retriever_data) + documents.extend(splits) + + # Create retriever + information_retriever.add_documents(docs=documents, cleanup="incremental", mode="by_source", embedding=embedding) + + # Define retrieval chain + retrieval_chain = RetrievalQA.from_chain_type( + llm=llm, + verbose=True, + chain_type="stuff", + retriever=information_retriever, + chain_type_kwargs={"prompt": PROMPT_RETRIEVAL_CLASSIFICATION_USE_CASE_ZH}, + llm_kwargs={"max_new_tokens": 50, "temperature": 0.75, "do_sample": True}, + ) + # Set disambiguity handler + + # Start conversation + while True: + user_input = input("User: ") + if "END" == user_input: + print("Agent: Happy to chat with you :)") + break + # 要使用和custom_service_classification.json 里的key 类似的句子做输入 + agent_response = retrieval_chain.run(query=user_input, stop=["Human: "]) + agent_response = agent_response.split("\n")[0] + print(f"Agent: {agent_response}") diff --git a/applications/ColossalQA/examples/webui_demo/RAG_ChatBot.py b/applications/ColossalQA/examples/webui_demo/RAG_ChatBot.py new file mode 100644 index 000000000..0ad547c00 --- /dev/null +++ b/applications/ColossalQA/examples/webui_demo/RAG_ChatBot.py @@ -0,0 +1,184 @@ +from typing import Dict, Tuple + +from colossalqa.chain.retrieval_qa.base import RetrievalQA +from colossalqa.data_loader.document_loader import DocumentLoader +from colossalqa.memory import ConversationBufferWithSummary +from colossalqa.mylogging import get_logger +from colossalqa.prompt.prompt import ( + PROMPT_DISAMBIGUATE_ZH, + PROMPT_RETRIEVAL_QA_ZH, + SUMMARY_PROMPT_ZH, + ZH_RETRIEVAL_QA_REJECTION_ANSWER, + ZH_RETRIEVAL_QA_TRIGGER_KEYWORDS, +) +from colossalqa.retriever import CustomRetriever +from colossalqa.text_splitter import ChineseTextSplitter +from langchain import LLMChain +from langchain.embeddings import HuggingFaceEmbeddings + +logger = get_logger() + +DEFAULT_RAG_CFG = { + "retri_top_k": 3, + "retri_kb_file_path": "./", + "verbose": True, + "mem_summary_prompt": SUMMARY_PROMPT_ZH, + "mem_human_prefix": "用户", + "mem_ai_prefix": "Assistant", + "mem_max_tokens": 2000, + "mem_llm_kwargs": {"max_new_tokens": 50, "temperature": 1, "do_sample": True}, + "disambig_prompt": PROMPT_DISAMBIGUATE_ZH, + "disambig_llm_kwargs": {"max_new_tokens": 30, "temperature": 1, "do_sample": True}, + "embed_model_name_or_path": "moka-ai/m3e-base", + "embed_model_device": {"device": "cpu"}, + "gen_llm_kwargs": {"max_new_tokens": 100, "temperature": 1, "do_sample": True}, + "gen_qa_prompt": PROMPT_RETRIEVAL_QA_ZH, +} + + +class RAG_ChatBot: + def __init__( + self, + llm, + rag_config, + ) -> None: + self.llm = llm + self.rag_config = rag_config + self.set_embed_model(**self.rag_config) + self.set_text_splitter(**self.rag_config) + self.set_memory(**self.rag_config) + self.set_info_retriever(**self.rag_config) + self.set_rag_chain(**self.rag_config) + if self.rag_config.get("disambig_prompt", None): + self.set_disambig_retriv(**self.rag_config) + + def set_embed_model(self, **kwargs): + self.embed_model = HuggingFaceEmbeddings( + model_name=kwargs["embed_model_name_or_path"], + model_kwargs=kwargs["embed_model_device"], + encode_kwargs={"normalize_embeddings": False}, + ) + + def set_text_splitter(self, **kwargs): + # Initialize text_splitter + self.text_splitter = ChineseTextSplitter() + + def set_memory(self, **kwargs): + params = {"llm_kwargs": kwargs["mem_llm_kwargs"]} if kwargs.get("mem_llm_kwargs", None) else {} + # Initialize memory with summarization ability + self.memory = ConversationBufferWithSummary( + llm=self.llm, + prompt=kwargs["mem_summary_prompt"], + human_prefix=kwargs["mem_human_prefix"], + ai_prefix=kwargs["mem_ai_prefix"], + max_tokens=kwargs["mem_max_tokens"], + **params, + ) + + def set_info_retriever(self, **kwargs): + self.info_retriever = CustomRetriever( + k=kwargs["retri_top_k"], sql_file_path=kwargs["retri_kb_file_path"], verbose=kwargs["verbose"] + ) + + def set_rag_chain(self, **kwargs): + params = {"llm_kwargs": kwargs["gen_llm_kwargs"]} if kwargs.get("gen_llm_kwargs", None) else {} + self.rag_chain = RetrievalQA.from_chain_type( + llm=self.llm, + verbose=kwargs["verbose"], + chain_type="stuff", + retriever=self.info_retriever, + chain_type_kwargs={"prompt": kwargs["gen_qa_prompt"], "memory": self.memory}, + **params, + ) + + def split_docs(self, documents): + doc_splits = self.text_splitter.split_documents(documents) + return doc_splits + + def set_disambig_retriv(self, **kwargs): + params = {"llm_kwargs": kwargs["disambig_llm_kwargs"]} if kwargs.get("disambig_llm_kwargs", None) else {} + self.llm_chain_disambiguate = LLMChain(llm=self.llm, prompt=kwargs["disambig_prompt"], **params) + + def disambiguity(input: str): + out = self.llm_chain_disambiguate.run(input=input, chat_history=self.memory.buffer, stop=["\n"]) + return out.split("\n")[0] + + self.info_retriever.set_rephrase_handler(disambiguity) + + def load_doc_from_console(self, json_parse_args: Dict = {}): + documents = [] + print("Select files for constructing Chinese retriever") + while True: + file = input("Enter a file path or press Enter directly without input to exit:").strip() + if file == "": + break + data_name = input("Enter a short description of the data:") + docs = DocumentLoader([[file, data_name.replace(" ", "_")]], **json_parse_args).all_data + documents.extend(docs) + self.documents = documents + self.split_docs_and_add_to_mem(**self.rag_config) + + def load_doc_from_files(self, files, data_name="default_kb", json_parse_args: Dict = {}): + documents = [] + for file in files: + docs = DocumentLoader([[file, data_name.replace(" ", "_")]], **json_parse_args).all_data + documents.extend(docs) + self.documents = documents + self.split_docs_and_add_to_mem(**self.rag_config) + + def split_docs_and_add_to_mem(self, **kwargs): + self.doc_splits = self.split_docs(self.documents) + self.info_retriever.add_documents( + docs=self.doc_splits, cleanup="incremental", mode="by_source", embedding=self.embed_model + ) + self.memory.initiate_document_retrieval_chain(self.llm, kwargs["gen_qa_prompt"], self.info_retriever) + + def reset_config(self, rag_config): + self.rag_config = rag_config + self.set_embed_model(**self.rag_config) + self.set_text_splitter(**self.rag_config) + self.set_memory(**self.rag_config) + self.set_info_retriever(**self.rag_config) + self.set_rag_chain(**self.rag_config) + if self.rag_config.get("disambig_prompt", None): + self.set_disambig_retriv(**self.rag_config) + + def run(self, user_input: str, memory: ConversationBufferWithSummary) -> Tuple[str, ConversationBufferWithSummary]: + if memory: + memory.buffered_history.messages = memory.buffered_history.messages + memory.summarized_history_temp.messages = memory.summarized_history_temp.messages + result = self.rag_chain.run( + query=user_input, + stop=[memory.human_prefix + ": "], + rejection_trigger_keywrods=ZH_RETRIEVAL_QA_TRIGGER_KEYWORDS, + rejection_answer=ZH_RETRIEVAL_QA_REJECTION_ANSWER, + ) + return result.split("\n")[0], memory + + def start_test_session(self): + """ + Simple session for testing purpose + """ + while True: + user_input = input("User: ") + if "END" == user_input: + print("Agent: Happy to chat with you :)") + break + agent_response, self.memory = self.run(user_input, self.memory) + print(f"Agent: {agent_response}") + + +if __name__ == "__main__": + # Initialize an Langchain LLM(here we use ChatGPT as an example) + from langchain.llms import OpenAI + + llm = OpenAI(openai_api_key="YOUR_OPENAI_API_KEY") + + # chatgpt cannot control temperature, do_sample, etc. + DEFAULT_RAG_CFG["mem_llm_kwargs"] = None + DEFAULT_RAG_CFG["disambig_llm_kwargs"] = None + DEFAULT_RAG_CFG["gen_llm_kwargs"] = None + + rag = RAG_ChatBot(llm, DEFAULT_RAG_CFG) + rag.load_doc_from_console() + rag.start_test_session() diff --git a/applications/ColossalQA/examples/webui_demo/README.md b/applications/ColossalQA/examples/webui_demo/README.md new file mode 100644 index 000000000..15ce6b5b7 --- /dev/null +++ b/applications/ColossalQA/examples/webui_demo/README.md @@ -0,0 +1,37 @@ +# ColossalQA WebUI Demo + +This demo provides a simple WebUI for ColossalQA, enabling you to upload your files as a knowledge base and interact with them through a chat interface in your browser. + +The `server.py` initializes the backend RAG chain that can be backed by various language models (e.g., ChatGPT, Huawei Pangu, ChatGLM2). Meanwhile, `webui.py` launches a Gradio-supported chatbot interface. + +# Usage + +## Installation + +First, install the necessary dependencies for ColossalQA: + +```sh +git clone https://github.com/hpcaitech/ColossalAI.git +cd ColossalAI/applications/ColossalQA/ +pip install -e . +``` + +## Configure the RAG Chain + +Customize the RAG Chain settings, such as the embedding model (default: moka-ai/m3e) and the language model, in the `start_colossal_qa.sh` script. + +For API-based language models (like ChatGPT or Huawei Pangu), provide your API key for authentication. For locally-run models, indicate the path to the model's checkpoint file. + +If you want to customize prompts in the RAG Chain, you can have a look at the `RAG_ChatBot.py` file to modify them. + +## Run WebUI Demo + +Execute the following command to start the demo: + +```sh +bash start_colossal_qa.sh +``` + +After launching the script, you can upload files and engage with the chatbot through your web browser. + + \ No newline at end of file diff --git a/applications/ColossalQA/examples/webui_demo/img/avatar_ai.png b/applications/ColossalQA/examples/webui_demo/img/avatar_ai.png new file mode 100644 index 000000000..031daa405 Binary files /dev/null and b/applications/ColossalQA/examples/webui_demo/img/avatar_ai.png differ diff --git a/applications/ColossalQA/examples/webui_demo/img/avatar_user.png b/applications/ColossalQA/examples/webui_demo/img/avatar_user.png new file mode 100644 index 000000000..b5a8014b8 Binary files /dev/null and b/applications/ColossalQA/examples/webui_demo/img/avatar_user.png differ diff --git a/applications/ColossalQA/examples/webui_demo/server.py b/applications/ColossalQA/examples/webui_demo/server.py new file mode 100644 index 000000000..c3147594f --- /dev/null +++ b/applications/ColossalQA/examples/webui_demo/server.py @@ -0,0 +1,117 @@ +import argparse +import copy +import json +import os +import random +import string +from http.server import BaseHTTPRequestHandler, HTTPServer +from colossalqa.local.llm import ColossalAPI, ColossalLLM +from colossalqa.data_loader.document_loader import DocumentLoader +from colossalqa.retrieval_conversation_zh import ChineseRetrievalConversation +from colossalqa.retriever import CustomRetriever +from langchain.embeddings import HuggingFaceEmbeddings +from langchain.text_splitter import RecursiveCharacterTextSplitter +from RAG_ChatBot import RAG_ChatBot, DEFAULT_RAG_CFG + +# Define the mapping between embed_model_name(passed from Front End) and the actual path on the back end server +EMBED_MODEL_DICT = { + "m3e": os.environ.get("EMB_MODEL_PATH", DEFAULT_RAG_CFG["embed_model_name_or_path"]) +} +# Define the mapping between LLM_name(passed from Front End) and the actual path on the back end server +LLM_DICT = { + "chatglm2": os.environ.get("CHAT_LLM_PATH", "THUDM/chatglm-6b"), + "pangu": "Pangu_API", + "chatgpt": "OpenAI_API" +} + +def randomword(length): + letters = string.ascii_lowercase + return "".join(random.choice(letters) for i in range(length)) + +class ColossalQAServerRequestHandler(BaseHTTPRequestHandler): + chatbot = None + def _set_response(self): + """ + set http header for response + """ + self.send_response(200) + self.send_header("Content-type", "application/json") + self.end_headers() + + def do_POST(self): + content_length = int(self.headers["Content-Length"]) + post_data = self.rfile.read(content_length) + received_json = json.loads(post_data.decode("utf-8")) + print(received_json) + # conversation_ready is False(user's first request): Need to upload files and initialize the RAG chain + if received_json["conversation_ready"] is False: + self.rag_config = DEFAULT_RAG_CFG.copy() + try: + assert received_json["embed_model_name"] in EMBED_MODEL_DICT + assert received_json["llm_name"] in LLM_DICT + self.docs_files = received_json["docs"] + embed_model_name, llm_name = received_json["embed_model_name"], received_json["llm_name"] + + # Find the embed_model/llm ckpt path on the back end server. + embed_model_path, llm_path = EMBED_MODEL_DICT[embed_model_name], LLM_DICT[llm_name] + self.rag_config["embed_model_name_or_path"] = embed_model_path + + # Create the storage path for knowledge base files + self.rag_config["retri_kb_file_path"] = os.path.join(os.environ["TMP"], "colossalqa_kb/"+randomword(20)) + if not os.path.exists(self.rag_config["retri_kb_file_path"]): + os.makedirs(self.rag_config["retri_kb_file_path"]) + + if (embed_model_path is not None) and (llm_path is not None): + # ---- Intialize LLM, QA_chatbot here ---- + print("Initializing LLM...") + if llm_path == "Pangu_API": + from colossalqa.local.pangu_llm import Pangu + self.llm = Pangu(id=1) + self.llm.set_auth_config() # verify user's auth info here + self.rag_config["mem_llm_kwargs"] = None + self.rag_config["disambig_llm_kwargs"] = None + self.rag_config["gen_llm_kwargs"] = None + elif llm_path == "OpenAI_API": + from langchain.llms import OpenAI + self.llm = OpenAI() + self.rag_config["mem_llm_kwargs"] = None + self.rag_config["disambig_llm_kwargs"] = None + self.rag_config["gen_llm_kwargs"] = None + else: + # ** (For Testing Only) ** + # In practice, all LLMs will run on the cloud platform and accessed by API, instead of running locally. + # initialize model from model_path by using ColossalLLM + self.rag_config["mem_llm_kwargs"] = {"max_new_tokens": 50, "temperature": 1, "do_sample": True} + self.rag_config["disambig_llm_kwargs"] = {"max_new_tokens": 30, "temperature": 1, "do_sample": True} + self.rag_config["gen_llm_kwargs"] = {"max_new_tokens": 100, "temperature": 1, "do_sample": True} + self.colossal_api = ColossalAPI(llm_name, llm_path) + self.llm = ColossalLLM(n=1, api=self.colossal_api) + + print(f"Initializing RAG Chain...") + print("RAG_CONFIG: ", self.rag_config) + self.__class__.chatbot = RAG_ChatBot(self.llm, self.rag_config) + print("Loading Files....\n", self.docs_files) + self.__class__.chatbot.load_doc_from_files(self.docs_files) + # ----------------------------------------------------------------------------------- + res = {"response": f"文件上传完成,模型初始化完成,让我们开始对话吧!(后端模型:{llm_name})", "error": "", "conversation_ready": True} + except Exception as e: + res = {"response": "文件上传或模型初始化有误,无法开始对话。", + "error": f"Error in File Uploading and/or RAG initialization. Error details: {e}", + "conversation_ready": False} + # conversation_ready is True: Chatbot and docs are all set. Ready to chat. + else: + user_input = received_json["user_input"] + chatbot_response, self.__class__.chatbot.memory = self.__class__.chatbot.run(user_input, self.__class__.chatbot.memory) + res = {"response": chatbot_response, "error": "", "conversation_ready": True} + self._set_response() + self.wfile.write(json.dumps(res).encode("utf-8")) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Chinese retrieval based conversation system") + parser.add_argument("--port", type=int, default=13666, help="port on localhost to start the server") + args = parser.parse_args() + server_address = ("localhost", args.port) + httpd = HTTPServer(server_address, ColossalQAServerRequestHandler) + print(f"Starting server on port {args.port}...") + httpd.serve_forever() + diff --git a/applications/ColossalQA/examples/webui_demo/start_colossal_qa.sh b/applications/ColossalQA/examples/webui_demo/start_colossal_qa.sh new file mode 100755 index 000000000..c9c7b71c3 --- /dev/null +++ b/applications/ColossalQA/examples/webui_demo/start_colossal_qa.sh @@ -0,0 +1,43 @@ +#!/bin/bash +cleanup() { + echo "Caught Signal ... cleaning up." + pkill -P $$ # kill all subprocess of this script + exit 1 # exit script +} +# 'cleanup' is trigered when receive SIGINT(Ctrl+C) OR SIGTERM(kill) signal +trap cleanup INT TERM + +# Disable your proxy +# unset HTTP_PROXY HTTPS_PROXY http_proxy https_proxy + +# Path to store knowledge base(Home Directory by default) +export TMP=$HOME + +# Use m3e as embedding model +export EMB_MODEL="m3e" # moka-ai/m3e-base model will be download automatically +# export EMB_MODEL_PATH="PATH_TO_LOCAL_CHECKPOINT/m3e-base" # you can also specify the local path to embedding model + +# Choose a backend LLM +# - ChatGLM2 +# export CHAT_LLM="chatglm2" +# export CHAT_LLM_PATH="PATH_TO_LOCAL_CHECKPOINT/chatglm2-6b" + +# - ChatGPT +export CHAT_LLM="chatgpt" +# Auth info for OpenAI API +export OPENAI_API_KEY="YOUR_OPENAI_API_KEY" + +# - Pangu +# export CHAT_LLM="pangu" +# # Auth info for Pangu API +# export URL="" +# export USERNAME="" +# export PASSWORD="" +# export DOMAIN_NAME="" + +# Run server.py and colossalqa_webui.py in the background +python server.py & +python webui.py & + +# Wait for all processes to finish +wait diff --git a/applications/ColossalQA/examples/webui_demo/webui.py b/applications/ColossalQA/examples/webui_demo/webui.py new file mode 100644 index 000000000..2d2910b5a --- /dev/null +++ b/applications/ColossalQA/examples/webui_demo/webui.py @@ -0,0 +1,102 @@ +import json +import os +import gradio as gr +import requests + +RAG_STATE = {"conversation_ready": False, # Conversation is not ready until files are uploaded and RAG chain is initialized + "embed_model_name": os.environ.get("EMB_MODEL", "m3e"), + "llm_name": os.environ.get("CHAT_LLM", "chatgpt")} +URL = "http://localhost:13666" + +def get_response(client_data, URL): + headers = {"Content-type": "application/json"} + print(f"Sending request to server url: {URL}") + response = requests.post(URL, data=json.dumps(client_data), headers=headers) + response = json.loads(response.content) + return response + +def add_text(history, text): + history = history + [(text, None)] + return history, gr.update(value=None, interactive=True) + +def add_file(history, files): + global RAG_STATE + RAG_STATE["conversation_ready"] = False # after adding new files, reset the ChatBot + RAG_STATE["upload_files"]=[file.name for file in files] + files_string = "\n".join([os.path.basename(path) for path in RAG_STATE["upload_files"]]) + print(files_string) + history = history + [(files_string, None)] + return history + +def bot(history): + print(history) + global RAG_STATE + if not RAG_STATE["conversation_ready"]: + # Upload files and initialize models + client_data = { + "docs": RAG_STATE["upload_files"], + "embed_model_name": RAG_STATE["embed_model_name"], # Select embedding model name here + "llm_name": RAG_STATE["llm_name"], # Select LLM model name here. ["pangu", "chatglm2"] + "conversation_ready": RAG_STATE["conversation_ready"] + } + else: + client_data = {} + client_data["conversation_ready"] = RAG_STATE["conversation_ready"] + client_data["user_input"] = history[-1][0].strip() + + response = get_response(client_data, URL) # TODO: async request, to avoid users waiting the model initialization too long + print(response) + if response["error"] != "": + raise gr.Error(response["error"]) + + RAG_STATE["conversation_ready"] = response["conversation_ready"] + history[-1][1] = response["response"] + yield history + + +CSS = """ +.contain { display: flex; flex-direction: column; height: 100vh } +#component-0 { height: 100%; } +#chatbot { flex-grow: 1; } +""" + +header_html = """ +<div style="background: linear-gradient(to right, #2a0cf4, #7100ed, #9800e6, #b600df, #ce00d9, #dc0cd1, #e81bca, #f229c3, #f738ba, #f946b2, #fb53ab, #fb5fa5); padding: 20px; text-align: left;"> + <h1 style="color: white;">ColossalQA</h1> + <h4 style="color: white;">ColossalQA</h4> +</div> +""" + +with gr.Blocks(css=CSS) as demo: + html = gr.HTML(header_html) + chatbot = gr.Chatbot( + [], + elem_id="chatbot", + bubble_full_width=False, + avatar_images=( + (os.path.join(os.path.dirname(__file__), "img/avatar_user.png")), + (os.path.join(os.path.dirname(__file__), "img/avatar_ai.png")), + ), + ) + + with gr.Row(): + txt = gr.Textbox( + scale=4, + show_label=False, + placeholder="Enter text and press enter, or upload an image", + container=True, + autofocus=True, + ) + btn = gr.UploadButton("📁", file_types=["file"], file_count="multiple") + + txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(bot, chatbot, chatbot) + # Clear the original textbox + txt_msg.then(lambda: gr.update(value=None, interactive=True), None, [txt], queue=False) + # Click Upload Button: 1. upload files 2. send config to backend, initalize model 3. get response "conversation_ready" = True/False + file_msg = btn.upload(add_file, [chatbot, btn], [chatbot], queue=False).then(bot, chatbot, chatbot) + + + +if __name__ == "__main__": + demo.queue() + demo.launch(share=True) # share=True will release a public link of the demo diff --git a/applications/ColossalQA/pytest.ini b/applications/ColossalQA/pytest.ini new file mode 100644 index 000000000..9e84349f2 --- /dev/null +++ b/applications/ColossalQA/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +markers = + dist: tests which are run in a multi-GPU or multi-machine environment (at least 4 GPUs) + largedist: tests which are run in a multi-GPU or multi-machine environment (at least 8 GPUs) \ No newline at end of file diff --git a/applications/ColossalQA/requirements.txt b/applications/ColossalQA/requirements.txt new file mode 100644 index 000000000..2b86cfd0e --- /dev/null +++ b/applications/ColossalQA/requirements.txt @@ -0,0 +1,22 @@ +transformers>=4.20.1 +tqdm==4.66.1 +datasets==2.13.0 +torch<2.0.0, >=1.12.1 +langchain==0.0.330 +langchain-experimental==0.0.37 +tokenizers==0.13.3 +modelscope==1.9.0 +sentencepiece==0.1.99 +gpustat==1.1.1 +sqlalchemy==2.0.20 +pytest==7.4.2 +# coati install from ../Chat +sentence-transformers==2.2.2 +chromadb==0.4.9 +openai==0.28.0 #used for chatgpt please install directly from openai repo +tiktoken==0.5.1 +unstructured==0.10.14 +pypdf==3.16.0 +jq==1.6.0 +gradio==3.44.4 +Requests==2.31.0 diff --git a/applications/ColossalQA/setup.py b/applications/ColossalQA/setup.py new file mode 100644 index 000000000..b1085942d --- /dev/null +++ b/applications/ColossalQA/setup.py @@ -0,0 +1,38 @@ +from setuptools import find_packages, setup + + +def fetch_requirements(path): + with open(path, "r") as fd: + return [r.strip() for r in fd.readlines()] + + +def fetch_readme(): + with open("README.md", encoding="utf-8") as f: + return f.read() + + +def fetch_version(): + with open("version.txt", "r") as f: + return f.read().strip() + + +print(find_packages(exclude=("tests", "*.egg-info", "data", "examples"))) +setup( + name="colossalqa", + version=fetch_version(), + packages=find_packages(exclude=("tests", "*.egg-info", "data", "examples")), + description="Colossal-AI powered retrieval QA", + long_description=fetch_readme(), + long_description_content_type="text/markdown", + license="Apache Software License 2.0", + url="https://github.com/hpcaitech/Coati", + install_requires=fetch_requirements("requirements.txt"), + python_requires=">=3.6", + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: Apache Software License", + "Environment :: GPU :: NVIDIA CUDA", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: System :: Distributed Computing", + ], +) diff --git a/applications/ColossalQA/tests/__init__.py b/applications/ColossalQA/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/applications/ColossalQA/tests/test_document_loader.py b/applications/ColossalQA/tests/test_document_loader.py new file mode 100644 index 000000000..163b0d2cc --- /dev/null +++ b/applications/ColossalQA/tests/test_document_loader.py @@ -0,0 +1,21 @@ +import os +from colossalqa.data_loader.document_loader import DocumentLoader + + +def test_add_document(): + PATH = os.environ.get('TEST_DOCUMENT_LOADER_DATA_PATH') + files = [[PATH, 'all data']] + document_loader = DocumentLoader(files) + documents = document_loader.all_data + all_files = [] + for doc in documents: + assert isinstance(doc.page_content, str)==True + if doc.metadata['source'] not in all_files: + all_files.append(doc.metadata['source']) + print(all_files) + assert len(all_files) == 6 + + +if __name__=='__main__': + test_add_document() + diff --git a/applications/ColossalQA/tests/test_memory.py b/applications/ColossalQA/tests/test_memory.py new file mode 100644 index 000000000..a364eda5e --- /dev/null +++ b/applications/ColossalQA/tests/test_memory.py @@ -0,0 +1,117 @@ +import os + +from colossalqa.data_loader.document_loader import DocumentLoader +from colossalqa.local.llm import ColossalAPI, ColossalLLM +from colossalqa.memory import ConversationBufferWithSummary +from colossalqa.prompt.prompt import PROMPT_RETRIEVAL_QA_ZH +from colossalqa.retriever import CustomRetriever +from langchain.embeddings import HuggingFaceEmbeddings +from langchain.text_splitter import RecursiveCharacterTextSplitter + + +def test_memory_long(): + model_path = os.environ.get("EN_MODEL_PATH") + data_path = os.environ.get("TEST_DATA_PATH_EN") + model_name = os.environ.get("EN_MODEL_NAME") + sql_file_path = os.environ.get("SQL_FILE_PATH") + + if not os.path.exists(sql_file_path): + os.makedirs(sql_file_path) + + colossal_api = ColossalAPI.get_api(model_name, model_path) + llm = ColossalLLM(n=4, api=colossal_api) + memory = ConversationBufferWithSummary( + llm=llm, max_tokens=600, llm_kwargs={"max_new_tokens": 50, "temperature": 0.6, "do_sample": True} + ) + retriever_data = DocumentLoader([[data_path, "company information"]]).all_data + + # Split + text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20) + splits = text_splitter.split_documents(retriever_data) + + embedding = HuggingFaceEmbeddings( + model_name="moka-ai/m3e-base", model_kwargs={"device": "cpu"}, encode_kwargs={"normalize_embeddings": False} + ) + + # Create retriever + information_retriever = CustomRetriever(k=3, sql_file_path=sql_file_path) + information_retriever.add_documents(docs=splits, cleanup="incremental", mode="by_source", embedding=embedding) + + memory.initiate_document_retrieval_chain( + llm, + PROMPT_RETRIEVAL_QA_ZH, + information_retriever, + chain_type_kwargs={ + "chat_history": "", + }, + ) + + # This keep the prompt length excluding dialogues the same + docs = information_retriever.get_relevant_documents("this is a test input.") + prompt_length = memory.chain.prompt_length(docs, **{"question": "this is a test input.", "chat_history": ""}) + remain = 600 - prompt_length + have_summarization_flag = False + for i in range(40): + chat_history = memory.load_memory_variables({"question": "this is a test input.", "input_documents": docs})[ + "chat_history" + ] + + assert memory.get_conversation_length() <= remain + memory.save_context({"question": "this is a test input."}, {"output": "this is a test output."}) + if "A summarization of historical conversation:" in chat_history: + have_summarization_flag = True + assert have_summarization_flag == True + + +def test_memory_short(): + model_path = os.environ.get("EN_MODEL_PATH") + data_path = os.environ.get("TEST_DATA_PATH_EN") + model_name = os.environ.get("EN_MODEL_NAME") + sql_file_path = os.environ.get("SQL_FILE_PATH") + + if not os.path.exists(sql_file_path): + os.makedirs(sql_file_path) + + colossal_api = ColossalAPI.get_api(model_name, model_path) + llm = ColossalLLM(n=4, api=colossal_api) + memory = ConversationBufferWithSummary( + llm=llm, llm_kwargs={"max_new_tokens": 50, "temperature": 0.6, "do_sample": True} + ) + retriever_data = DocumentLoader([[data_path, "company information"]]).all_data + + # Split + text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20) + splits = text_splitter.split_documents(retriever_data) + + embedding = HuggingFaceEmbeddings( + model_name="moka-ai/m3e-base", model_kwargs={"device": "cpu"}, encode_kwargs={"normalize_embeddings": False} + ) + + # create retriever + information_retriever = CustomRetriever(k=3, sql_file_path=sql_file_path) + information_retriever.add_documents(docs=splits, cleanup="incremental", mode="by_source", embedding=embedding) + + memory.initiate_document_retrieval_chain( + llm, + PROMPT_RETRIEVAL_QA_ZH, + information_retriever, + chain_type_kwargs={ + "chat_history": "", + }, + ) + + # This keep the prompt length excluding dialogues the same + docs = information_retriever.get_relevant_documents("this is a test input.", return_scores=True) + + for i in range(4): + chat_history = memory.load_memory_variables({"question": "this is a test input.", "input_documents": docs})[ + "chat_history" + ] + assert chat_history.count("Assistant: this is a test output.") == i + assert chat_history.count("Human: this is a test input.") == i + memory.save_context({"question": "this is a test input."}, {"output": "this is a test output."}) + + +if __name__ == "__main__": + test_memory_short() + test_memory_long() diff --git a/applications/ColossalQA/tests/test_retrieval_qa.py b/applications/ColossalQA/tests/test_retrieval_qa.py new file mode 100644 index 000000000..76867b562 --- /dev/null +++ b/applications/ColossalQA/tests/test_retrieval_qa.py @@ -0,0 +1,62 @@ +import os + +from colossalqa.retrieval_conversation_universal import UniversalRetrievalConversation + + +def test_en_retrievalQA(): + data_path_en = os.environ.get('TEST_DATA_PATH_EN') + data_path_zh = os.environ.get('TEST_DATA_PATH_ZH') + en_model_path = os.environ.get('EN_MODEL_PATH') + zh_model_path = os.environ.get('ZH_MODEL_PATH') + zh_model_name = os.environ.get('ZH_MODEL_NAME') + en_model_name = os.environ.get('EN_MODEL_NAME') + sql_file_path = os.environ.get('SQL_FILE_PATH') + qa_session = UniversalRetrievalConversation(files_en=[{ + 'data_path': data_path_en, + 'name': 'company information', + 'separator': '\n' + }], + files_zh=[{ + 'data_path': data_path_zh, + 'name': 'company information', + 'separator': '\n' + }], + zh_model_path=zh_model_path, + en_model_path=en_model_path, + zh_model_name=zh_model_name, + en_model_name=en_model_name, + sql_file_path=sql_file_path) + ans = qa_session.run("which company runs business in hotel industry?", which_language='en') + print(ans) + + +def test_zh_retrievalQA(): + data_path_en = os.environ.get('TEST_DATA_PATH_EN') + data_path_zh = os.environ.get('TEST_DATA_PATH_ZH') + en_model_path = os.environ.get('EN_MODEL_PATH') + zh_model_path = os.environ.get('ZH_MODEL_PATH') + zh_model_name = os.environ.get('ZH_MODEL_NAME') + en_model_name = os.environ.get('EN_MODEL_NAME') + sql_file_path = os.environ.get('SQL_FILE_PATH') + qa_session = UniversalRetrievalConversation(files_en=[{ + 'data_path': data_path_en, + 'name': 'company information', + 'separator': '\n' + }], + files_zh=[{ + 'data_path': data_path_zh, + 'name': 'company information', + 'separator': '\n' + }], + zh_model_path=zh_model_path, + en_model_path=en_model_path, + zh_model_name=zh_model_name, + en_model_name=en_model_name, + sql_file_path=sql_file_path) + ans = qa_session.run("哪家公司在经营酒店业务?", which_language='zh') + print(ans) + + +if __name__ == "__main__": + test_en_retrievalQA() + test_zh_retrievalQA() diff --git a/applications/ColossalQA/tests/test_text_splitter.py b/applications/ColossalQA/tests/test_text_splitter.py new file mode 100644 index 000000000..90755a9bc --- /dev/null +++ b/applications/ColossalQA/tests/test_text_splitter.py @@ -0,0 +1,11 @@ +from colossalqa.text_splitter.chinese_text_splitter import ChineseTextSplitter + + +def test_text_splitter(): + # unit test + spliter = ChineseTextSplitter(chunk_size=30, chunk_overlap=0) + out = spliter.split_text( + "移动端语音唤醒模型,检测关键词为“小云小云”。模型主体为4层FSMN结构,使用CTC训练准则,参数量750K,适用于移动端设备运行。模型输入为Fbank特征,输出为基于char建模的中文全集token预测,测试工具根据每一帧的预测数据进行后处理得到输入音频的实时检测结果。模型训练采用“basetrain + finetune”的模式,basetrain过程使用大量内部移动端数据,在此基础上,使用1万条设备端录制安静场景“小云小云”数据进行微调,得到最终面向业务的模型。后续用户可在basetrain模型基础上,使用其他关键词数据进行微调,得到新的语音唤醒模型,但暂时未开放模型finetune功能。" + ) + print(len(out)) + assert len(out) == 4 # ChineseTextSplitter will not break sentence. Hence the actual chunk size is not 30 diff --git a/applications/ColossalQA/version.txt b/applications/ColossalQA/version.txt new file mode 100644 index 000000000..8a9ecc2ea --- /dev/null +++ b/applications/ColossalQA/version.txt @@ -0,0 +1 @@ +0.0.1 \ No newline at end of file diff --git a/applications/README.md b/applications/README.md index f5078e06a..92096e559 100644 --- a/applications/README.md +++ b/applications/README.md @@ -8,6 +8,7 @@ The list of applications include: - [X] [ColossalEval](./ColossalEval): Evaluation Pipeline for LLMs. - [X] [ColossalChat](./Chat/README.md): Replication of ChatGPT with RLHF. - [X] [FastFold](https://github.com/hpcaitech/FastFold): Optimizing AlphaFold (Biomedicine) Training and Inference on GPU Clusters. +- [X] [ColossalQA](./ColossalQA/README.md): Document Retrieval Conversation System > Please note that the `Chatbot` application is migrated from the original `ChatGPT` folder.