diff --git a/docs/modules/chat.rst b/docs/modules/chat.rst new file mode 100644 index 00000000000..6d0ac317a1e --- /dev/null +++ b/docs/modules/chat.rst @@ -0,0 +1,27 @@ +Chat +========================== + +Chat models are a variation on language models. +While chat models language models under the hood, the interface they expose is a bit different. +Rather than expose a "text in, text out" API, they expose an interface where "chat messages" are the input, +and the output is also a chat message. + +Chat APIs are fairly new, so we are still figuring out the correct abstractions. + +The following sections of documentation are provided: + +- `Getting Started <./chat/getting_started.html>`_: An overview of the basics of chat models. + +- `Key Concepts <./chat/key_concepts.html>`_: A conceptual guide going over the various concepts related to chat models. + +- `How-To Guides <./chat/how_to_guides.html>`_: A collection of how-to guides. These highlight how to accomplish various objectives with our chat model class, as well as how to integrate with various chat model providers. + + +.. toctree:: + :maxdepth: 1 + :name: LLMs + :hidden: + + ./chat/getting_started.ipynb + ./chat/key_concepts.md + ./chat/how_to_guides.rst diff --git a/docs/modules/chat/examples/agent.ipynb b/docs/modules/chat/examples/agent.ipynb new file mode 100644 index 00000000000..9a72abb926b --- /dev/null +++ b/docs/modules/chat/examples/agent.ipynb @@ -0,0 +1,208 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e58f4d5a", + "metadata": {}, + "source": [ + "# Agent\n", + "This notebook covers how to create a custom agent for a chat model. It will utilize chat specific prompts." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "5268c7fa", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.agents import ZeroShotAgent, Tool, AgentExecutor\n", + "from langchain.chains import LLMChain\n", + "from langchain.utilities import SerpAPIWrapper" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "fbaa4dbe", + "metadata": {}, + "outputs": [], + "source": [ + "search = SerpAPIWrapper()\n", + "tools = [\n", + " Tool(\n", + " name = \"Search\",\n", + " func=search.run,\n", + " description=\"useful for when you need to answer questions about current events\"\n", + " )\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "f3ba6f08", + "metadata": {}, + "outputs": [], + "source": [ + "prefix = \"\"\"Answer the following questions as best you can, but speaking as a pirate might speak. You have access to the following tools:\"\"\"\n", + "suffix = \"\"\"Begin! Remember to speak as a pirate when giving your final answer. Use lots of \"Args\"\"\"\n", + "\n", + "prompt = ZeroShotAgent.create_prompt(\n", + " tools, \n", + " prefix=prefix, \n", + " suffix=suffix, \n", + " input_variables=[]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "3547a37d", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.prompts.chat import (\n", + " ChatPromptTemplate,\n", + " SystemMessagePromptTemplate,\n", + " AIMessagePromptTemplate,\n", + " HumanMessagePromptTemplate,\n", + ")\n", + "from langchain.schema import (\n", + " AIMessage,\n", + " HumanMessage,\n", + " SystemMessage\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a78f886f", + "metadata": {}, + "outputs": [], + "source": [ + "messages = [\n", + " SystemMessagePromptTemplate(prompt=prompt),\n", + " HumanMessagePromptTemplate.from_template(\"{input}\\n\\nThis was your previous work \"\n", + " f\"(but I haven't seen any of it! I only see what \"\n", + " \"you return as final answer):\\n{agent_scratchpad}\")\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "dadadd70", + "metadata": {}, + "outputs": [], + "source": [ + "prompt = ChatPromptTemplate.from_messages(messages)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "b7180182", + "metadata": {}, + "outputs": [], + "source": [ + "llm_chain = LLMChain(llm=ChatOpenAI(temperature=0), prompt=prompt)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "ddddb07b", + "metadata": {}, + "outputs": [], + "source": [ + "tool_names = [tool.name for tool in tools]\n", + "agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "36aef054", + "metadata": {}, + "outputs": [], + "source": [ + "agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "33a4d6cc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mArrr, ye be in luck, matey! I'll find ye the answer to yer question.\n", + "\n", + "Thought: I need to search for the current population of Canada.\n", + "Action: Search\n", + "Action Input: \"current population of Canada 2023\"\n", + "\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3mThe current population of Canada is 38,623,091 as of Saturday, March 4, 2023, based on Worldometer elaboration of the latest United Nations data.\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3mAhoy, me hearties! I've found the answer to yer question.\n", + "\n", + "Final Answer: As of March 4, 2023, the population of Canada be 38,623,091. Arrr!\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'As of March 4, 2023, the population of Canada be 38,623,091. Arrr!'" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent_executor.run(\"How many people live in canada as of 2023?\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6aefe978", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/modules/chat/examples/chat_vector_db.ipynb b/docs/modules/chat/examples/chat_vector_db.ipynb new file mode 100644 index 00000000000..6f3301db693 --- /dev/null +++ b/docs/modules/chat/examples/chat_vector_db.ipynb @@ -0,0 +1,376 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "134a0785", + "metadata": {}, + "source": [ + "# Chat Vector DB\n", + "\n", + "This notebook goes over how to set up a chat model to chat with a vector database.\n", + "\n", + "This notebook is very similar to the example of using an LLM in the ChatVectorDBChain. The only differences here are (1) using a ChatModel, and (2) passing in a ChatPromptTemplate (optimized for chat models)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "70c4e529", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.embeddings.openai import OpenAIEmbeddings\n", + "from langchain.vectorstores import Chroma\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "from langchain.chains import ChatVectorDBChain" + ] + }, + { + "cell_type": "markdown", + "id": "cdff94be", + "metadata": {}, + "source": [ + "Load in documents. You can replace this with a loader for whatever type of data you want" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "01c46e92", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.document_loaders import TextLoader\n", + "loader = TextLoader('../../state_of_the_union.txt')\n", + "documents = loader.load()" + ] + }, + { + "cell_type": "markdown", + "id": "e9be4779", + "metadata": {}, + "source": [ + "If you had multiple loaders that you wanted to combine, you do something like:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "433363a5", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# loaders = [....]\n", + "# docs = []\n", + "# for loader in loaders:\n", + "# docs.extend(loader.load())" + ] + }, + { + "cell_type": "markdown", + "id": "239475d2", + "metadata": {}, + "source": [ + "We now split the documents, create embeddings for them, and put them in a vectorstore. This allows us to do semantic search over them." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a8930cf7", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running Chroma using direct local API.\n", + "Using DuckDB in-memory for database. Data will be transient.\n" + ] + } + ], + "source": [ + "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", + "documents = text_splitter.split_documents(documents)\n", + "\n", + "embeddings = OpenAIEmbeddings()\n", + "vectorstore = Chroma.from_documents(documents, embeddings)" + ] + }, + { + "cell_type": "markdown", + "id": "18415aca", + "metadata": {}, + "source": [ + "We are now going to construct a prompt specifically designed for chat models." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "c8805230", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.prompts.chat import (\n", + " ChatPromptTemplate,\n", + " SystemMessagePromptTemplate,\n", + " AIMessagePromptTemplate,\n", + " HumanMessagePromptTemplate,\n", + ")\n", + "from langchain.schema import (\n", + " AIMessage,\n", + " HumanMessage,\n", + " SystemMessage\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "cc86c30e", + "metadata": {}, + "outputs": [], + "source": [ + "system_template=\"\"\"Use the following pieces of context to answer the users question. \n", + "If you don't know the answer, just say that you don't know, don't try to make up an answer.\n", + "----------------\n", + "{context}\"\"\"\n", + "messages = [\n", + " SystemMessagePromptTemplate.from_template(system_template),\n", + " HumanMessagePromptTemplate.from_template(\"{question}\")\n", + "]\n", + "prompt = ChatPromptTemplate.from_messages(messages)" + ] + }, + { + "cell_type": "markdown", + "id": "3c96b118", + "metadata": {}, + "source": [ + "We now initialize the ChatVectorDBChain" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "7b4110f3", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "qa = ChatVectorDBChain.from_llm(ChatOpenAI(temperature=0), vectorstore,qa_prompt=prompt)" + ] + }, + { + "cell_type": "markdown", + "id": "3872432d", + "metadata": {}, + "source": [ + "Here's an example of asking a question with no chat history" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7fe3e730", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "chat_history = []\n", + "query = \"What did the president say about Ketanji Brown Jackson\"\n", + "result = qa({\"question\": query, \"chat_history\": chat_history})" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "bfff9cc8", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "\"The President nominated Circuit Court of Appeals Judge Ketanji Brown Jackson to serve on the United States Supreme Court. He described her as one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and a consensus builder. He also mentioned that she has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.\"" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result[\"answer\"]" + ] + }, + { + "cell_type": "markdown", + "id": "9e46edf7", + "metadata": {}, + "source": [ + "Here's an example of asking a question with some chat history" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "00b4cf00", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "chat_history = [(query, result[\"answer\"])]\n", + "query = \"Did he mention who came before her\"\n", + "result = qa({\"question\": query, \"chat_history\": chat_history})" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "f01828d1", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'The context does not provide information about the predecessor of Ketanji Brown Jackson.'" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result['answer']" + ] + }, + { + "cell_type": "markdown", + "id": "2324cdc6-98bf-4708-b8cd-02a98b1e5b67", + "metadata": {}, + "source": [ + "## Chat Vector DB with streaming to `stdout`\n", + "\n", + "Output from the chain will be streamed to `stdout` token by token in this example." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "2efacec3-2690-4b05-8de3-a32fd2ac3911", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.chains.llm import LLMChain\n", + "from langchain.llms import OpenAI\n", + "from langchain.callbacks.base import CallbackManager\n", + "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", + "from langchain.chains.chat_vector_db.prompts import CONDENSE_QUESTION_PROMPT\n", + "from langchain.chains.question_answering import load_qa_chain\n", + "\n", + "# Construct a ChatVectorDBChain with a streaming llm for combine docs\n", + "# and a separate, non-streaming llm for question generation\n", + "llm = OpenAI(temperature=0)\n", + "streaming_llm = ChatOpenAI(streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0)\n", + "\n", + "question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)\n", + "doc_chain = load_qa_chain(streaming_llm, chain_type=\"stuff\", prompt=prompt)\n", + "\n", + "qa = ChatVectorDBChain(vectorstore=vectorstore, combine_docs_chain=doc_chain, question_generator=question_generator)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "fd6d43f4-7428-44a4-81bc-26fe88a98762", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The President nominated Circuit Court of Appeals Judge Ketanji Brown Jackson to serve on the United States Supreme Court. He described her as one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and a consensus builder. He also mentioned that she has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans." + ] + } + ], + "source": [ + "chat_history = []\n", + "query = \"What did the president say about Ketanji Brown Jackson\"\n", + "result = qa({\"question\": query, \"chat_history\": chat_history})" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "5ab38978-f3e8-4fa7-808c-c79dec48379a", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The context does not provide information on who Ketanji Brown Jackson succeeded on the United States Supreme Court." + ] + } + ], + "source": [ + "chat_history = [(query, result[\"answer\"])]\n", + "query = \"Did he mention who she suceeded\"\n", + "result = qa({\"question\": query, \"chat_history\": chat_history})\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e8d0055", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/modules/chat/examples/streaming.ipynb b/docs/modules/chat/examples/streaming.ipynb new file mode 100644 index 00000000000..fd696c92fd0 --- /dev/null +++ b/docs/modules/chat/examples/streaming.ipynb @@ -0,0 +1,119 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "fe4e96b5", + "metadata": {}, + "source": [ + "# Streaming\n", + "\n", + "This notebook goes over how to use streaming with a chat model." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "e0244f2a", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.schema import (\n", + " HumanMessage,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ad342bfa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "Verse 1:\n", + "Bubbles rising to the top\n", + "A refreshing drink that never stops\n", + "Clear and crisp, it's pure delight\n", + "A taste that's sure to excite\n", + "\n", + "Chorus:\n", + "Sparkling water, oh so fine\n", + "A drink that's always on my mind\n", + "With every sip, I feel alive\n", + "Sparkling water, you're my vibe\n", + "\n", + "Verse 2:\n", + "No sugar, no calories, just pure bliss\n", + "A drink that's hard to resist\n", + "It's the perfect way to quench my thirst\n", + "A drink that always comes first\n", + "\n", + "Chorus:\n", + "Sparkling water, oh so fine\n", + "A drink that's always on my mind\n", + "With every sip, I feel alive\n", + "Sparkling water, you're my vibe\n", + "\n", + "Bridge:\n", + "From the mountains to the sea\n", + "Sparkling water, you're the key\n", + "To a healthy life, a happy soul\n", + "A drink that makes me feel whole\n", + "\n", + "Chorus:\n", + "Sparkling water, oh so fine\n", + "A drink that's always on my mind\n", + "With every sip, I feel alive\n", + "Sparkling water, you're my vibe\n", + "\n", + "Outro:\n", + "Sparkling water, you're the one\n", + "A drink that's always so much fun\n", + "I'll never let you go, my friend\n", + "Sparkling" + ] + } + ], + "source": [ + "from langchain.callbacks.base import CallbackManager\n", + "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", + "chat = ChatOpenAI(streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0)\n", + "resp = chat([HumanMessage(content=\"Write me a song about sparkling water.\")])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "67c44deb", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/modules/chat/examples/vector_db_qa.ipynb b/docs/modules/chat/examples/vector_db_qa.ipynb new file mode 100644 index 00000000000..7ee0bc4d6ea --- /dev/null +++ b/docs/modules/chat/examples/vector_db_qa.ipynb @@ -0,0 +1,169 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "07c1e3b9", + "metadata": {}, + "source": [ + "# Vector DB Question/Answering\n", + "\n", + "This example showcases using a chat model to do question answering over a vector database.\n", + "\n", + "This notebook is very similar to the example of using an LLM in the ChatVectorDBChain. The only differences here are (1) using a ChatModel, and (2) passing in a ChatPromptTemplate (optimized for chat models)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "82525493", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.embeddings.openai import OpenAIEmbeddings\n", + "from langchain.vectorstores import Chroma\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "from langchain.chains import VectorDBQA" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "5c7049db", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running Chroma using direct local API.\n", + "Using DuckDB in-memory for database. Data will be transient.\n" + ] + } + ], + "source": [ + "from langchain.document_loaders import TextLoader\n", + "loader = TextLoader('../../state_of_the_union.txt')\n", + "documents = loader.load()\n", + "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", + "texts = text_splitter.split_documents(documents)\n", + "\n", + "embeddings = OpenAIEmbeddings()\n", + "docsearch = Chroma.from_documents(texts, embeddings)" + ] + }, + { + "cell_type": "markdown", + "id": "35f99145", + "metadata": {}, + "source": [ + "We can now set up the chat model and chat model specific prompt" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "32a49412", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.prompts.chat import (\n", + " ChatPromptTemplate,\n", + " SystemMessagePromptTemplate,\n", + " AIMessagePromptTemplate,\n", + " HumanMessagePromptTemplate,\n", + ")\n", + "from langchain.schema import (\n", + " AIMessage,\n", + " HumanMessage,\n", + " SystemMessage\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "f231fb9b", + "metadata": {}, + "outputs": [], + "source": [ + "system_template=\"\"\"Use the following pieces of context to answer the users question. \n", + "If you don't know the answer, just say that you don't know, don't try to make up an answer.\n", + "----------------\n", + "{context}\"\"\"\n", + "messages = [\n", + " SystemMessagePromptTemplate.from_template(system_template),\n", + " HumanMessagePromptTemplate.from_template(\"{question}\")\n", + "]\n", + "prompt = ChatPromptTemplate.from_messages(messages)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "3018f865", + "metadata": {}, + "outputs": [], + "source": [ + "chain_type_kwargs = {\"prompt\": prompt}\n", + "qa = VectorDBQA.from_chain_type(llm=ChatOpenAI(), chain_type=\"stuff\", vectorstore=docsearch, chain_type_kwargs=chain_type_kwargs)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "032a47f8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"The President nominated Ketanji Brown Jackson as a Judge for the United States Supreme Court. He described her as one of the nation's top legal minds and a former top litigator in private practice, a former federal public defender, and a consensus builder.\"" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"What did the president say about Ketanji Brown Jackson\"\n", + "qa.run(query)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b403637", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + }, + "vscode": { + "interpreter": { + "hash": "b1677b440931f40d89ef8be7bf03acb108ce003de0ac9b18e8d43753ea2e7103" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/modules/chat/examples/vector_db_qa_with_sources.ipynb b/docs/modules/chat/examples/vector_db_qa_with_sources.ipynb new file mode 100644 index 00000000000..b922ed3913a --- /dev/null +++ b/docs/modules/chat/examples/vector_db_qa_with_sources.ipynb @@ -0,0 +1,218 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "efc5be67", + "metadata": {}, + "source": [ + "# VectorDB Question Answering with Sources\n", + "\n", + "This notebook goes over how to do question-answering with sources with a chat model over a vector database. It does this by using the `VectorDBQAWithSourcesChain`, which does the lookup of the documents from a vector database. \n", + "\n", + "This notebook is very similar to the example of using an LLM in the ChatVectorDBChain. The only differences here are (1) using a ChatModel, and (2) passing in a ChatPromptTemplate (optimized for chat models)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "1c613960", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.embeddings.openai import OpenAIEmbeddings\n", + "from langchain.embeddings.cohere import CohereEmbeddings\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch\n", + "from langchain.vectorstores import Chroma" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "17d1306e", + "metadata": {}, + "outputs": [], + "source": [ + "with open('../../state_of_the_union.txt') as f:\n", + " state_of_the_union = f.read()\n", + "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", + "texts = text_splitter.split_text(state_of_the_union)\n", + "\n", + "embeddings = OpenAIEmbeddings()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "0e745d99", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running Chroma using direct local API.\n", + "Using DuckDB in-memory for database. Data will be transient.\n" + ] + } + ], + "source": [ + "docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{\"source\": f\"{i}-pl\"} for i in range(len(texts))])" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "8aa571ae", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains import VectorDBQAWithSourcesChain" + ] + }, + { + "cell_type": "markdown", + "id": "1f73b14a", + "metadata": {}, + "source": [ + "We can now set up the chat model and chat model specific prompt" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9643c775", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.prompts.chat import (\n", + " ChatPromptTemplate,\n", + " SystemMessagePromptTemplate,\n", + " AIMessagePromptTemplate,\n", + " HumanMessagePromptTemplate,\n", + ")\n", + "from langchain.schema import (\n", + " AIMessage,\n", + " HumanMessage,\n", + " SystemMessage\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "ed00e906", + "metadata": {}, + "outputs": [], + "source": [ + "system_template=\"\"\"Use the following pieces of context to answer the users question. \n", + "If you don't know the answer, just say that you don't know, don't try to make up an answer.\n", + "ALWAYS return a \"SOURCES\" part in your answer.\n", + "The \"SOURCES\" part should be a reference to the source of the document from which you got your answer.\n", + "\n", + "Example of your response should be:\n", + "\n", + "```\n", + "The answer is foo\n", + "SOURCES: xyz\n", + "```\n", + "\n", + "Begin!\n", + "----------------\n", + "{summaries}\"\"\"\n", + "messages = [\n", + " SystemMessagePromptTemplate.from_template(system_template),\n", + " HumanMessagePromptTemplate.from_template(\"{question}\")\n", + "]\n", + "prompt = ChatPromptTemplate.from_messages(messages)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "aa859d4c", + "metadata": {}, + "outputs": [], + "source": [ + "chain_type_kwargs = {\"prompt\": prompt}\n", + "chain = VectorDBQAWithSourcesChain.from_chain_type(\n", + " ChatOpenAI(temperature=0), \n", + " chain_type=\"stuff\", \n", + " vectorstore=docsearch,\n", + " chain_type_kwargs=chain_type_kwargs\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "8ba36fa7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'answer': 'The President honored Justice Stephen Breyer, an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court, for his dedicated service to the country. \\n',\n", + " 'sources': '30-pl'}" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chain({\"question\": \"What did the president say about Justice Breyer\"}, return_only_outputs=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "c91fdc8a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'answer': ' The president honored Justice Stephen Breyer for his service.\\n',\n", + " 'sources': '30-pl'}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "qa({\"question\": \"What did the president say about Justice Breyer\"}, return_only_outputs=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + }, + "vscode": { + "interpreter": { + "hash": "b1677b440931f40d89ef8be7bf03acb108ce003de0ac9b18e8d43753ea2e7103" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/modules/chat/getting_started.ipynb b/docs/modules/chat/getting_started.ipynb new file mode 100644 index 00000000000..bc237c38737 --- /dev/null +++ b/docs/modules/chat/getting_started.ipynb @@ -0,0 +1,340 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e49f1e0d", + "metadata": {}, + "source": [ + "# Getting Started\n", + "\n", + "This notebook covers how to get started with chat models. The interface is based around messages rather than raw text." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "522686de", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.chat_models import ChatOpenAI\n", + "from langchain import PromptTemplate, LLMChain\n", + "from langchain.prompts.chat import (\n", + " ChatPromptTemplate,\n", + " SystemMessagePromptTemplate,\n", + " AIMessagePromptTemplate,\n", + " HumanMessagePromptTemplate,\n", + ")\n", + "from langchain.schema import (\n", + " AIMessage,\n", + " HumanMessage,\n", + " SystemMessage\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "62e0dbc3", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "chat = ChatOpenAI(temperature=0)" + ] + }, + { + "cell_type": "markdown", + "id": "bbaec18e-3684-4eef-955f-c1cec8bf765d", + "metadata": {}, + "source": [ + "You can get chat completions by passing one or more messages to the chat model. The response will be a message. The types of messages currently supported in LangChain are `AIMessage`, `HumanMessage`, `SystemMessage`, and `ChatMessage` -- `ChatMessage` takes in an arbitrary role parameter. Most of the time, you'll just be dealing with `HumanMessage`, `AIMessage`, and `SystemMessage`" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "76a6e7b0-e927-4bfb-a414-1332a4149106", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content=\"J'aime programmer.\")" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chat([HumanMessage(content=\"Translate this sentence from English to French. I love programming.\")])" + ] + }, + { + "cell_type": "markdown", + "id": "a62153d4-1211-411b-a493-3febfe446ae0", + "metadata": {}, + "source": [ + "OpenAI's chat model supports multiple messages as input. See [here](https://platform.openai.com/docs/guides/chat/chat-vs-completions) for more information. Here is an example of sending a system and user message to the chat model:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "ce16ad78-8e6f-48cd-954e-98be75eb5836", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content=\"J'aime programmer.\")" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "messages = [\n", + " SystemMessage(content=\"You are a helpful assistant that translates English to French.\"),\n", + " HumanMessage(content=\"Translate this sentence from English to French. I love programming.\")\n", + "]\n", + "chat(messages)" + ] + }, + { + "cell_type": "markdown", + "id": "36dc8d7e-bd25-47ac-8c1b-60e3422603d3", + "metadata": {}, + "source": [ + "You can go one step further and generate completions for multiple sets of messages using `generate`. This returns an `LLMResult` with an additional `message` parameter." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "2b21fc52-74b6-4950-ab78-45d12c68fb4d", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "LLMResult(generations=[[ChatGeneration(text=\"J'aime programmer.\", generation_info=None, message=AIMessage(content=\"J'aime programmer.\"))], [ChatGeneration(text=\"J'aime l'intelligence artificielle.\", generation_info=None, message=AIMessage(content=\"J'aime l'intelligence artificielle.\"))]], llm_output=None)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "batch_messages = [\n", + " [\n", + " SystemMessage(content=\"You are a helpful assistant that translates English to French.\"),\n", + " HumanMessage(content=\"Translate this sentence from English to French. I love programming.\")\n", + " ],\n", + " [\n", + " SystemMessage(content=\"You are a helpful assistant that translates English to French.\"),\n", + " HumanMessage(content=\"Translate this sentence from English to French. I love artificial intelligence.\")\n", + " ],\n", + "]\n", + "chat.generate(batch_messages)" + ] + }, + { + "cell_type": "markdown", + "id": "b10b00ef-f373-4bc3-8302-2dfc28033734", + "metadata": {}, + "source": [ + "## PromptTemplates" + ] + }, + { + "cell_type": "markdown", + "id": "778f912a-66ea-4a5d-b3de-6c7db4baba26", + "metadata": {}, + "source": [ + "You can make use of templating by using a `MessagePromptTemplate`. You can build a `ChatPromptTemplate` from one or more `MessagePromptTemplates`. You can use `ChatPromptTemplate`'s `format_prompt` -- this returns a `PromptValue`, which you can convert to a string or Message object, depending on whether you want to use the formatted value as input to an llm or chat model.\n", + "\n", + "For convience, there is a `from_template` method exposed on the template. If you were to use this template, this is what it would look like:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "180c5cc8", + "metadata": {}, + "outputs": [], + "source": [ + "template=\"You are a helpful assistant that translates {input_language} to {output_language}.\"\n", + "system_message_prompt = SystemMessagePromptTemplate.from_template(template)\n", + "human_template=\"Translate this sentence from {input_language} to {output_language}. {text}\"\n", + "human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "fbb043e6", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content=\"J'aime programmer.\")" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])\n", + "\n", + "# get a chat completion from the formatted messages\n", + "chat(chat_prompt.format_prompt(input_language=\"English\", output_language=\"French\", text=\"I love programming.\").to_messages())" + ] + }, + { + "cell_type": "markdown", + "id": "e28b98da", + "metadata": {}, + "source": [ + "If you wanted to construct the MessagePromptTemplate more directly, you could create a PromptTemplate outside and then pass it in, eg:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d5b1ab1c", + "metadata": {}, + "outputs": [], + "source": [ + "prompt=PromptTemplate(\n", + " template=\"You are a helpful assistant that translates {input_language} to {output_language}.\",\n", + " input_variables=[\"input_language\", \"output_language\"],\n", + ")\n", + "system_message_prompt = SystemMessagePromptTemplate(prompt=prompt)" + ] + }, + { + "cell_type": "markdown", + "id": "eb779f3f", + "metadata": {}, + "source": [ + "## Streaming\n", + "\n", + "Streaming is supported for `ChatOpenAI` through callback handling." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "509181be", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "Verse 1:\n", + "Bubbles rising to the top\n", + "A refreshing drink that never stops\n", + "Clear and crisp, it's pure delight\n", + "A taste that's sure to excite\n", + "\n", + "Chorus:\n", + "Sparkling water, oh so fine\n", + "A drink that's always on my mind\n", + "With every sip, I feel alive\n", + "Sparkling water, you're my vibe\n", + "\n", + "Verse 2:\n", + "No sugar, no calories, just pure bliss\n", + "A drink that's hard to resist\n", + "It's the perfect way to quench my thirst\n", + "A drink that always comes first\n", + "\n", + "Chorus:\n", + "Sparkling water, oh so fine\n", + "A drink that's always on my mind\n", + "With every sip, I feel alive\n", + "Sparkling water, you're my vibe\n", + "\n", + "Bridge:\n", + "From the mountains to the sea\n", + "Sparkling water, you're the key\n", + "To a healthy life, a happy soul\n", + "A drink that makes me feel whole\n", + "\n", + "Chorus:\n", + "Sparkling water, oh so fine\n", + "A drink that's always on my mind\n", + "With every sip, I feel alive\n", + "Sparkling water, you're my vibe\n", + "\n", + "Outro:\n", + "Sparkling water, you're the one\n", + "A drink that's always so much fun\n", + "I'll never let you go, my friend\n", + "Sparkling" + ] + } + ], + "source": [ + "from langchain.callbacks.base import CallbackManager\n", + "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", + "chat = ChatOpenAI(streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0)\n", + "resp = chat([HumanMessage(content=\"Write me a song about sparkling water.\")])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c095285d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/modules/chat/how_to_guides.rst b/docs/modules/chat/how_to_guides.rst new file mode 100644 index 00000000000..b9788073d0b --- /dev/null +++ b/docs/modules/chat/how_to_guides.rst @@ -0,0 +1,10 @@ +How-To Guides +============= + +The examples here all address certain "how-to" guides for working with chat models. + +.. toctree:: + :maxdepth: 1 + :glob: + + ./examples/* diff --git a/docs/modules/chat/key_concepts.md b/docs/modules/chat/key_concepts.md new file mode 100644 index 00000000000..2f9110d48fc --- /dev/null +++ b/docs/modules/chat/key_concepts.md @@ -0,0 +1,29 @@ +# Key Concepts + +## ChatMessage +A chat message is what we refer to as the modular unit of information. +At the moment, this consists of "content", which refers to the content of the chat message. +At the moment, most chat models are trained to predict sequences of Human <> AI messages. +This is because so far the primary interaction mode has been between a human user and a singular AI system. + +At the moment, there are four different classes of Chat Messages + +### HumanMessage +A HumanMessage is a ChatMessage that is sent as if from a Human's point of view. + +### AIMessage +An AIMessage is a ChatMessage that is sent from the point of view of the AI system to which the Human is corresponding. + +### SystemMessage +A SystemMessage is still a bit ambiguous, and so far seems to be a concept unique to OpenAI + +### ChatMessage +A chat message is a generic chat message, with not only a "content" field but also a "role" field. +With this field, arbitrary roles may be assigned to a message. + +## ChatGeneration +The output of a single prediction of a chat message. +Currently this is just a chat message itself (eg content and a role) + +## Chat Model +A model which takes in a list of chat messages, and predicts a chat message in response. \ No newline at end of file diff --git a/docs/modules/llms/integrations/openaichat.ipynb b/docs/modules/llms/integrations/openaichat.ipynb index 0fecb37b98a..d8156c7c017 100644 --- a/docs/modules/llms/integrations/openaichat.ipynb +++ b/docs/modules/llms/integrations/openaichat.ipynb @@ -307,7 +307,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.9.1" } }, "nbformat": 4, diff --git a/langchain/chains/qa_with_sources/base.py b/langchain/chains/qa_with_sources/base.py index d566e812659..d87db00b58e 100644 --- a/langchain/chains/qa_with_sources/base.py +++ b/langchain/chains/qa_with_sources/base.py @@ -3,7 +3,7 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional from pydantic import BaseModel, Extra, root_validator @@ -62,10 +62,17 @@ class BaseQAWithSourcesChain(Chain, BaseModel, ABC): @classmethod def from_chain_type( - cls, llm: BaseLLM, chain_type: str = "stuff", **kwargs: Any + cls, + llm: BaseLLM, + chain_type: str = "stuff", + chain_type_kwargs: Optional[dict] = None, + **kwargs: Any, ) -> BaseQAWithSourcesChain: """Load chain from chain type.""" - combine_document_chain = load_qa_with_sources_chain(llm, chain_type=chain_type) + _chain_kwargs = chain_type_kwargs or {} + combine_document_chain = load_qa_with_sources_chain( + llm, chain_type=chain_type, **_chain_kwargs + ) return cls(combine_documents_chain=combine_document_chain, **kwargs) class Config: diff --git a/langchain/chat_models/openai.py b/langchain/chat_models/openai.py index 7cc1278e8b0..a7bde19ee21 100644 --- a/langchain/chat_models/openai.py +++ b/langchain/chat_models/openai.py @@ -29,24 +29,24 @@ logger = logging.getLogger(__file__) def _convert_dict_to_message(_dict: dict) -> BaseMessage: role = _dict["role"] if role == "user": - return HumanMessage(text=_dict["content"]) + return HumanMessage(content=_dict["content"]) elif role == "assistant": - return AIMessage(text=_dict["content"]) + return AIMessage(content=_dict["content"]) elif role == "systemt": - return SystemMessage(text=_dict["content"]) + return SystemMessage(content=_dict["content"]) else: - return ChatMessage(text=_dict["content"], role=role) + return ChatMessage(content=_dict["content"], role=role) def _convert_message_to_dict(message: BaseMessage) -> dict: if isinstance(message, ChatMessage): - return {"role": message.role, "content": message.text} + return {"role": message.role, "content": message.content} elif isinstance(message, HumanMessage): - return {"role": "user", "content": message.text} + return {"role": "user", "content": message.content} elif isinstance(message, AIMessage): - return {"role": "assistant", "content": message.text} + return {"role": "assistant", "content": message.content} elif isinstance(message, SystemMessage): - return {"role": "system", "content": message.text} + return {"role": "system", "content": message.content} else: raise ValueError(f"Got unknown type {message}") diff --git a/langchain/document_loaders/azlyrics.py b/langchain/document_loaders/azlyrics.py index 0947946c116..b88a31bfa82 100644 --- a/langchain/document_loaders/azlyrics.py +++ b/langchain/document_loaders/azlyrics.py @@ -11,8 +11,8 @@ class AZLyricsLoader(WebBaseLoader): def load(self) -> List[Document]: """Load webpage.""" soup = self.scrape() - title = soup.title.text - lyrics = soup.find_all("div", {"class": ""})[2].text + title = soup.title.content + lyrics = soup.find_all("div", {"class": ""})[2].content text = title + lyrics metadata = {"source": self.web_path} return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/document_loaders/college_confidential.py b/langchain/document_loaders/college_confidential.py index 1eaa64bcb27..f8104c94c29 100644 --- a/langchain/document_loaders/college_confidential.py +++ b/langchain/document_loaders/college_confidential.py @@ -11,6 +11,6 @@ class CollegeConfidentialLoader(WebBaseLoader): def load(self) -> List[Document]: """Load webpage.""" soup = self.scrape() - text = soup.select_one("main[class='skin-handler']").text + text = soup.select_one("main[class='skin-handler']").content metadata = {"source": self.web_path} return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/document_loaders/evernote.py b/langchain/document_loaders/evernote.py index a7529f379fa..f86925f3379 100644 --- a/langchain/document_loaders/evernote.py +++ b/langchain/document_loaders/evernote.py @@ -23,10 +23,10 @@ def _parse_resource(resource: list) -> dict: for elem in resource: if elem.tag == "data": # Some times elem.text is None - rsc_dict[elem.tag] = b64decode(elem.text) if elem.text else b"" + rsc_dict[elem.tag] = b64decode(elem.content) if elem.content else b"" rsc_dict["hash"] = hashlib.md5(rsc_dict[elem.tag]).hexdigest() else: - rsc_dict[elem.tag] = elem.text + rsc_dict[elem.tag] = elem.content return rsc_dict @@ -36,15 +36,15 @@ def _parse_note(note: List) -> dict: resources = [] for elem in note: if elem.tag == "content": - note_dict[elem.tag] = _parse_content(elem.text) + note_dict[elem.tag] = _parse_content(elem.content) # A copy of original content - note_dict["content-raw"] = elem.text + note_dict["content-raw"] = elem.content elif elem.tag == "resource": resources.append(_parse_resource(elem)) elif elem.tag == "created" or elem.tag == "updated": - note_dict[elem.tag] = strptime(elem.text, "%Y%m%dT%H%M%SZ") + note_dict[elem.tag] = strptime(elem.content, "%Y%m%dT%H%M%SZ") else: - note_dict[elem.tag] = elem.text + note_dict[elem.tag] = elem.content note_dict["resource"] = resources diff --git a/langchain/document_loaders/gitbook.py b/langchain/document_loaders/gitbook.py index 978e0fa843d..28d99eb7ac1 100644 --- a/langchain/document_loaders/gitbook.py +++ b/langchain/document_loaders/gitbook.py @@ -38,7 +38,7 @@ class GitbookLoader(WebBaseLoader): page_content_raw = soup.find("main") content = page_content_raw.get_text(separator="\n").strip() title_if_exists = page_content_raw.find("h1") - title = title_if_exists.text if title_if_exists else "" + title = title_if_exists.content if title_if_exists else "" metadata = { "source": custom_url if custom_url else self.web_path, "title": title, diff --git a/langchain/document_loaders/hn.py b/langchain/document_loaders/hn.py index 91ff8d9d5e0..f97887b1d1b 100644 --- a/langchain/document_loaders/hn.py +++ b/langchain/document_loaders/hn.py @@ -32,7 +32,7 @@ class HNLoader(WebBaseLoader): title = soup_info.select_one("tr[id='pagespace']").get("title") return [ Document( - page_content=comment.text.strip(), + page_content=comment.content.strip(), metadata={"source": self.web_path, "title": title}, ) for comment in comments @@ -43,9 +43,9 @@ class HNLoader(WebBaseLoader): items = soup.select("tr[class='athing']") documents = [] for lineItem in items: - ranking = lineItem.select_one("span[class='rank']").text + ranking = lineItem.select_one("span[class='rank']").content link = lineItem.find("span", {"class": "titleline"}).find("a").get("href") - title = lineItem.find("span", {"class": "titleline"}).text.strip() + title = lineItem.find("span", {"class": "titleline"}).content.strip() metadata = { "source": self.web_path, "title": title, diff --git a/langchain/document_loaders/ifixit.py b/langchain/document_loaders/ifixit.py index 18b6c79729d..955dc6d873d 100644 --- a/langchain/document_loaders/ifixit.py +++ b/langchain/document_loaders/ifixit.py @@ -94,12 +94,12 @@ class IFixitLoader(BaseLoader): output = [] - title = soup.find("h1", "post-title").text + title = soup.find("h1", "post-title").content output.append("# " + title) - output.append(soup.select_one(".post-content .post-text").text.strip()) + output.append(soup.select_one(".post-content .post-text").content.strip()) - output.append("\n## " + soup.find("div", "post-answers-header").text.strip()) + output.append("\n## " + soup.find("div", "post-answers-header").content.strip()) for answer in soup.select(".js-answers-list .post.post-answer"): if answer.has_attr("itemprop") and "acceptedAnswer" in answer["itemprop"]: output.append("\n### Accepted Answer") @@ -109,7 +109,7 @@ class IFixitLoader(BaseLoader): output.append("\n### Other Answer") output += [ - a.text.strip() for a in answer.select(".post-content .post-text") + a.content.strip() for a in answer.select(".post-content .post-text") ] output.append("\n") diff --git a/langchain/document_loaders/imsdb.py b/langchain/document_loaders/imsdb.py index 4589553d333..ee8d807fb72 100644 --- a/langchain/document_loaders/imsdb.py +++ b/langchain/document_loaders/imsdb.py @@ -11,6 +11,6 @@ class IMSDbLoader(WebBaseLoader): def load(self) -> List[Document]: """Load webpage.""" soup = self.scrape() - text = soup.select_one("td[class='scrtext']").text + text = soup.select_one("td[class='scrtext']").content metadata = {"source": self.web_path} return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/document_loaders/srt.py b/langchain/document_loaders/srt.py index ce38f1c2f89..7cfda751b1a 100644 --- a/langchain/document_loaders/srt.py +++ b/langchain/document_loaders/srt.py @@ -23,6 +23,6 @@ class SRTLoader(BaseLoader): import pysrt parsed_info = pysrt.open(self.file_path) - text = " ".join([t.text for t in parsed_info]) + text = " ".join([t.content for t in parsed_info]) metadata = {"source": self.file_path} return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/llms/cohere.py b/langchain/llms/cohere.py index 66bff40eb89..10cc94ff330 100644 --- a/langchain/llms/cohere.py +++ b/langchain/llms/cohere.py @@ -124,7 +124,7 @@ class Cohere(LLM, BaseModel): params["stop_sequences"] = stop response = self.client.generate(model=self.model, prompt=prompt, **params) - text = response.generations[0].text + text = response.generations[0].content # If stop tokens are provided, Cohere's endpoint returns them. # In order to make this consistent with other endpoints, we strip them. if stop is not None or self.stop is not None: diff --git a/langchain/llms/gooseai.py b/langchain/llms/gooseai.py index 89f17f18d32..866d50c0f78 100644 --- a/langchain/llms/gooseai.py +++ b/langchain/llms/gooseai.py @@ -139,5 +139,5 @@ class GooseAI(LLM, BaseModel): params["stop"] = stop response = self.client.create(engine=self.model_name, prompt=prompt, **params) - text = response.choices[0].text + text = response.choices[0].content return text diff --git a/langchain/prompts/base.py b/langchain/prompts/base.py index a5a3f5a4667..7007025927e 100644 --- a/langchain/prompts/base.py +++ b/langchain/prompts/base.py @@ -135,7 +135,7 @@ class StringPromptValue(PromptValue): def to_messages(self) -> List[BaseMessage]: """Return prompt as messages.""" - return [HumanMessage(text=self.text)] + return [HumanMessage(content=self.text)] class BasePromptTemplate(BaseModel, ABC): diff --git a/langchain/prompts/chat.py b/langchain/prompts/chat.py index 75dea9631b8..e5f2cb93fa0 100644 --- a/langchain/prompts/chat.py +++ b/langchain/prompts/chat.py @@ -21,6 +21,11 @@ from langchain.schema import ( class BaseMessagePromptTemplate(BaseModel, ABC): prompt: BasePromptTemplate + @classmethod + def from_template(cls, template: str, **kwargs: Any) -> BaseMessagePromptTemplate: + prompt = PromptTemplate.from_template(template) + return cls(prompt=prompt, **kwargs) + @abstractmethod def format(self, **kwargs: Any) -> BaseMessage: """To a BaseMessage.""" @@ -31,25 +36,25 @@ class ChatMessagePromptTemplate(BaseMessagePromptTemplate): def format(self, **kwargs: Any) -> BaseMessage: text = self.prompt.format(**kwargs) - return ChatMessage(text=text, role=self.role) + return ChatMessage(content=text, role=self.role) class HumanMessagePromptTemplate(BaseMessagePromptTemplate): def format(self, **kwargs: Any) -> BaseMessage: text = self.prompt.format(**kwargs) - return HumanMessage(text=text) + return HumanMessage(content=text) class AIMessagePromptTemplate(BaseMessagePromptTemplate): def format(self, **kwargs: Any) -> BaseMessage: text = self.prompt.format(**kwargs) - return AIMessage(text=text) + return AIMessage(content=text) class SystemMessagePromptTemplate(BaseMessagePromptTemplate): def format(self, **kwargs: Any) -> BaseMessage: text = self.prompt.format(**kwargs) - return SystemMessage(text=text) + return SystemMessage(content=text) class ChatPromptValue(PromptValue): @@ -74,7 +79,7 @@ class ChatPromptTemplate(BasePromptTemplate, ABC): ) -> ChatPromptTemplate: messages = [ ChatMessagePromptTemplate( - text=PromptTemplate.from_template(template), role=role + content=PromptTemplate.from_template(template), role=role ) for role, template in string_messages ] @@ -85,7 +90,7 @@ class ChatPromptTemplate(BasePromptTemplate, ABC): cls, string_messages: List[Tuple[Type[BaseMessagePromptTemplate], str]] ) -> ChatPromptTemplate: messages = [ - role(text=PromptTemplate.from_template(template)) + role(content=PromptTemplate.from_template(template)) for role, template in string_messages ] return cls.from_messages(messages) diff --git a/langchain/schema.py b/langchain/schema.py index ccf1c6369dc..ce495abb219 100644 --- a/langchain/schema.py +++ b/langchain/schema.py @@ -34,7 +34,7 @@ class Generation(BaseModel): class BaseMessage(BaseModel): """Message object.""" - text: str + content: str class HumanMessage(BaseMessage): @@ -63,7 +63,7 @@ class ChatGeneration(Generation): @root_validator def set_text(cls, values: Dict[str, Any]) -> Dict[str, Any]: - values["text"] = values["message"].text + values["text"] = values["message"].content return values diff --git a/langchain/utilities/wolfram_alpha.py b/langchain/utilities/wolfram_alpha.py index a27aec051f4..5c860d492de 100644 --- a/langchain/utilities/wolfram_alpha.py +++ b/langchain/utilities/wolfram_alpha.py @@ -52,8 +52,8 @@ class WolframAlphaAPIWrapper(BaseModel): res = self.wolfram_client.query(query) try: - assumption = next(res.pods).text - answer = next(res.results).text + assumption = next(res.pods).content + answer = next(res.results).content except StopIteration: return "Wolfram Alpha wasn't able to answer it" diff --git a/tests/integration_tests/chat_models/test_openai.py b/tests/integration_tests/chat_models/test_openai.py index 886839e7993..3e8bd88a343 100644 --- a/tests/integration_tests/chat_models/test_openai.py +++ b/tests/integration_tests/chat_models/test_openai.py @@ -21,7 +21,7 @@ def test_chat_openai() -> None: message = HumanMessage(text="Hello") response = chat([message]) assert isinstance(response, BaseMessage) - assert isinstance(response.text, str) + assert isinstance(response.content, str) def test_chat_openai_system_message() -> None: @@ -31,7 +31,7 @@ def test_chat_openai_system_message() -> None: human_message = HumanMessage(text="Hello") response = chat([system_message, human_message]) assert isinstance(response, BaseMessage) - assert isinstance(response.text, str) + assert isinstance(response.content, str) def test_chat_openai_generate() -> None: @@ -46,7 +46,7 @@ def test_chat_openai_generate() -> None: for generation in generations: assert isinstance(generation, ChatGeneration) assert isinstance(generation.text, str) - assert generation.text == generation.message.text + assert generation.text == generation.message.content def test_chat_openai_multiple_completions() -> None: @@ -58,7 +58,7 @@ def test_chat_openai_multiple_completions() -> None: assert len(response.generations) == 5 for generation in response.generations: assert isinstance(generation.message, BaseMessage) - assert isinstance(generation.message.text, str) + assert isinstance(generation.message.content, str) def test_chat_openai_streaming() -> None: diff --git a/tests/unit_tests/prompts/test_chat.py b/tests/unit_tests/prompts/test_chat.py index b8f90357d97..c0f26687b7b 100644 --- a/tests/unit_tests/prompts/test_chat.py +++ b/tests/unit_tests/prompts/test_chat.py @@ -62,10 +62,10 @@ def test_chat_prompt_template() -> None: assert isinstance(prompt, ChatPromptValue) messages = prompt.to_messages() assert len(messages) == 4 - assert messages[0].text == "Here's some context: context" - assert messages[1].text == "Hello foo, I'm bar. Thanks for the context" - assert messages[2].text == "I'm an AI. I'm foo. I'm bar." - assert messages[3].text == "I'm a generic message. I'm foo. I'm bar." + assert messages[0].content == "Here's some context: context" + assert messages[1].content == "Hello foo, I'm bar. Thanks for the context" + assert messages[2].content == "I'm an AI. I'm foo. I'm bar." + assert messages[3].content == "I'm a generic message. I'm foo. I'm bar." string = prompt.to_string() expected = (