From 54d5b74b00daba6d5e32af663384ed22b7ab6a9e Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Tue, 24 Sep 2024 14:09:56 -0400 Subject: [PATCH] docs: update trim messages notebook (#26793) Update trim messages notebook to include common use cases and explain what the desired behavior is --- docs/docs/how_to/trim_messages.ipynb | 261 ++++++++++++++++++++------- 1 file changed, 200 insertions(+), 61 deletions(-) diff --git a/docs/docs/how_to/trim_messages.ipynb b/docs/docs/how_to/trim_messages.ipynb index eb8a44a7f4d..57152e143cc 100644 --- a/docs/docs/how_to/trim_messages.ipynb +++ b/docs/docs/how_to/trim_messages.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "b5ee5b75-6876-4d62-9ade-5a7a808ae5a2", + "id": "eaad9a82-0592-4315-9931-0621054bdd0e", "metadata": {}, "source": [ "# How to trim messages\n", @@ -22,33 +22,77 @@ "\n", "All models have finite context windows, meaning there's a limit to how many tokens they can take as input. If you have very long messages or a chain/agent that accumulates a long message is history, you'll need to manage the length of the messages you're passing in to the model.\n", "\n", - "The `trim_messages` util provides some basic strategies for trimming a list of messages to be of a certain token length.\n", + "[trim_messages](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.utils.trim_messages.html) can be used to reduce the size of a chat history to a specified token count or specified message count.\n", "\n", - "## Getting the last `max_tokens` tokens\n", "\n", - "To get the last `max_tokens` in the list of Messages we can set `strategy=\"last\"`. Notice that for our `token_counter` we can pass in a function (more on that below) or a language model (since language models have a message token counting method). It makes sense to pass in a model when you're trimming your messages to fit into the context window of that specific model:" + "If passing the trimmed chat history back into a chat model directly, the trimmed chat history should satisfy the following properties:\n", + "\n", + "1. The resulting chat history should be **valid**. Most chat models expect that chat\n", + " history starts with either (1) a `HumanMessage` or (2) a [SystemMessage](/docs/concepts/#systemmessage) followed\n", + " by a `HumanMessage`. In addition, generally a `ToolMessage` can only appear after an `AIMessage`\n", + " that involved a tool call. This can be achieved by setting `start_on=\"human\"`.\n", + "2. It includes recent messages and drops old messages in the chat history.\n", + " This can be achieved by setting `strategy=\"last\"`.\n", + "4. Usually, the new chat history should include the `SystemMessage` if it\n", + " was present in the original chat history since the `SystemMessage` includes\n", + " special instructions to the chat model. The `SystemMessage` is almost always\n", + " the first message in the history if present. This can be achieved by setting\n", + " `include_system=True`." + ] + }, + { + "cell_type": "markdown", + "id": "e4bffc37-78c0-46c3-ad0c-b44de0ed3e90", + "metadata": {}, + "source": [ + "## Trimming based on token count\n", + "\n", + "Here, we'll trim the chat history based on token count. The trimmed chat history will produce a **valid** chat history that includes the `SystemMessage`.\n", + "\n", + "To keep the most recent messages, we set `strategy=\"last\"`. We'll also set `include_system=True` to include the `SystemMessage`, and `start_on=\"human\"` to make sure the resulting chat history is valid. \n", + "\n", + "This is a good default configuration when using `trim_messages` based on token count. Remember to adjust `token_counter` and `max_tokens` for your use case.\n", + "\n", + "Notice that for our `token_counter` we can pass in a function (more on that below) or a language model (since language models have a message token counting method). It makes sense to pass in a model when you're trimming your messages to fit into the context window of that specific model:" ] }, { "cell_type": "code", "execution_count": 1, - "id": "c974633b-3bd0-4844-8a8f-85e3e25f13fe", + "id": "c91edeb2-9978-4665-9fdb-fc96cdb51caa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install -qU langchain-openai" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "40ea972c-d424-4bc4-9f2e-82f01c3d7598", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[AIMessage(content=\"Hmmm let me think.\\n\\nWhy, he's probably chasing after the last cup of coffee in the office!\"),\n", - " HumanMessage(content='what do you call a speechless parrot')]" + "[SystemMessage(content=\"you're a good assistant, you always respond with a joke.\", additional_kwargs={}, response_metadata={}),\n", + " HumanMessage(content='what do you call a speechless parrot', additional_kwargs={}, response_metadata={})]" ] }, - "execution_count": 1, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# pip install -U langchain-openai\n", "from langchain_core.messages import (\n", " AIMessage,\n", " HumanMessage,\n", @@ -70,36 +114,66 @@ " HumanMessage(\"what do you call a speechless parrot\"),\n", "]\n", "\n", + "\n", "trim_messages(\n", " messages,\n", - " max_tokens=45,\n", + " # Keep the last <= n_count tokens of the messages.\n", " strategy=\"last\",\n", + " # highlight-start\n", + " # Remember to adjust based on your model\n", + " # or else pass a custom token_encoder\n", " token_counter=ChatOpenAI(model=\"gpt-4o\"),\n", + " # highlight-end\n", + " # Most chat models expect that chat history starts with either:\n", + " # (1) a HumanMessage or\n", + " # (2) a SystemMessage followed by a HumanMessage\n", + " # highlight-start\n", + " # Remember to adjust based on the desired conversation\n", + " # length\n", + " max_tokens=45,\n", + " # highlight-end\n", + " # Most chat models expect that chat history starts with either:\n", + " # (1) a HumanMessage or\n", + " # (2) a SystemMessage followed by a HumanMessage\n", + " # start_on=\"human\" makes sure we produce a valid chat history\n", + " start_on=\"human\",\n", + " # Usually, we want to keep the SystemMessage\n", + " # if it's present in the original history.\n", + " # The SystemMessage has special instructions for the model.\n", + " include_system=True,\n", + " allow_partial=False,\n", ")" ] }, { "cell_type": "markdown", - "id": "d3f46654-c4b2-4136-b995-91c3febe5bf9", + "id": "28fcfc94-0d4a-415c-9506-8ae7634253a2", "metadata": {}, "source": [ - "If we want to always keep the initial system message we can specify `include_system=True`:" + "## Trimming based on message count\n", + "\n", + "Alternatively, we can trim the chat history based on **message count**, by setting `token_counter=len`. In this case, each message will count as a single token, and `max_tokens` will control\n", + "the maximum number of messages.\n", + "\n", + "This is a good default configuration when using `trim_messages` based on message count. Remember to adjust `max_tokens` for your use case." ] }, { "cell_type": "code", - "execution_count": 2, - "id": "589b0223-3a73-44ec-8315-2dba3ee6117d", + "execution_count": 3, + "id": "c8fdedae-0e6b-4901-a222-81fc95e265c2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[SystemMessage(content=\"you're a good assistant, you always respond with a joke.\"),\n", - " HumanMessage(content='what do you call a speechless parrot')]" + "[SystemMessage(content=\"you're a good assistant, you always respond with a joke.\", additional_kwargs={}, response_metadata={}),\n", + " HumanMessage(content='and who is harrison chasing anyways', additional_kwargs={}, response_metadata={}),\n", + " AIMessage(content=\"Hmmm let me think.\\n\\nWhy, he's probably chasing after the last cup of coffee in the office!\", additional_kwargs={}, response_metadata={}),\n", + " HumanMessage(content='what do you call a speechless parrot', additional_kwargs={}, response_metadata={})]" ] }, - "execution_count": 2, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -107,36 +181,56 @@ "source": [ "trim_messages(\n", " messages,\n", - " max_tokens=45,\n", + " # Keep the last <= n_count tokens of the messages.\n", " strategy=\"last\",\n", - " token_counter=ChatOpenAI(model=\"gpt-4o\"),\n", + " # highlight-next-line\n", + " token_counter=len,\n", + " # When token_counter=len, each message\n", + " # will be counted as a single token.\n", + " # highlight-start\n", + " # Remember to adjust for your use case\n", + " max_tokens=5,\n", + " # highlight-end\n", + " # Most chat models expect that chat history starts with either:\n", + " # (1) a HumanMessage or\n", + " # (2) a SystemMessage followed by a HumanMessage\n", + " # start_on=\"human\" makes sure we produce a valid chat history\n", + " start_on=\"human\",\n", + " # Usually, we want to keep the SystemMessage\n", + " # if it's present in the original history.\n", + " # The SystemMessage has special instructions for the model.\n", " include_system=True,\n", ")" ] }, { + "attachments": {}, "cell_type": "markdown", - "id": "8a8b542c-04d1-4515-8d82-b999ea4fac4f", + "id": "9367857f-7f9a-4d17-9f9c-6ffc5aae909c", "metadata": {}, "source": [ + "## Advanced Usage\n", + "\n", + "You can use `trim_message` as a building-block to create more complex processing logic.\n", + "\n", "If we want to allow splitting up the contents of a message we can specify `allow_partial=True`:" ] }, { "cell_type": "code", - "execution_count": 3, - "id": "8c46a209-dddd-4d01-81f6-f6ae55d3225c", + "execution_count": 4, + "id": "8bcca1fe-674c-4713-bacc-8e8e6d6f56c3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[SystemMessage(content=\"you're a good assistant, you always respond with a joke.\"),\n", - " AIMessage(content=\"\\nWhy, he's probably chasing after the last cup of coffee in the office!\"),\n", - " HumanMessage(content='what do you call a speechless parrot')]" + "[SystemMessage(content=\"you're a good assistant, you always respond with a joke.\", additional_kwargs={}, response_metadata={}),\n", + " AIMessage(content=\"\\nWhy, he's probably chasing after the last cup of coffee in the office!\", additional_kwargs={}, response_metadata={}),\n", + " HumanMessage(content='what do you call a speechless parrot', additional_kwargs={}, response_metadata={})]" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -154,26 +248,26 @@ }, { "cell_type": "markdown", - "id": "306adf9c-41cd-495c-b4dc-e4f43dd7f8f8", + "id": "245bee9b-e515-4e89-8f2a-84bda9a25de8", "metadata": {}, "source": [ - "If we need to make sure that our first message (excluding the system message) is always of a specific type, we can specify `start_on`:" + "By default, the `SystemMessage` will not be included, so you can drop it by either setting `include_system=False` or by dropping the `include_system` argument." ] }, { "cell_type": "code", - "execution_count": 4, - "id": "878a730b-fe44-4e9d-ab65-7b8f7b069de8", + "execution_count": 5, + "id": "94351736-28a1-44a3-aac7-82356c81d171", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[SystemMessage(content=\"you're a good assistant, you always respond with a joke.\"),\n", - " HumanMessage(content='what do you call a speechless parrot')]" + "[AIMessage(content=\"Hmmm let me think.\\n\\nWhy, he's probably chasing after the last cup of coffee in the office!\", additional_kwargs={}, response_metadata={}),\n", + " HumanMessage(content='what do you call a speechless parrot', additional_kwargs={}, response_metadata={})]" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -181,11 +275,9 @@ "source": [ "trim_messages(\n", " messages,\n", - " max_tokens=60,\n", + " max_tokens=45,\n", " strategy=\"last\",\n", " token_counter=ChatOpenAI(model=\"gpt-4o\"),\n", - " include_system=True,\n", - " start_on=\"human\",\n", ")" ] }, @@ -194,25 +286,23 @@ "id": "7f5d391d-235b-4091-b2de-c22866b478f3", "metadata": {}, "source": [ - "## Getting the first `max_tokens` tokens\n", - "\n", "We can perform the flipped operation of getting the *first* `max_tokens` by specifying `strategy=\"first\"`:" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "5f56ae54-1a39-4019-9351-3b494c003d5b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[SystemMessage(content=\"you're a good assistant, you always respond with a joke.\"),\n", - " HumanMessage(content=\"i wonder why it's called langchain\")]" + "[SystemMessage(content=\"you're a good assistant, you always respond with a joke.\", additional_kwargs={}, response_metadata={}),\n", + " HumanMessage(content=\"i wonder why it's called langchain\", additional_kwargs={}, response_metadata={})]" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -238,18 +328,36 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 60, + "id": "d930c089-e8e6-4980-9d39-11d41e794772", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install -qU tiktoken" + ] + }, + { + "cell_type": "code", + "execution_count": 7, "id": "1c1c3b1e-2ece-49e7-a3b6-e69877c1633b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[AIMessage(content=\"Hmmm let me think.\\n\\nWhy, he's probably chasing after the last cup of coffee in the office!\"),\n", - " HumanMessage(content='what do you call a speechless parrot')]" + "[SystemMessage(content=\"you're a good assistant, you always respond with a joke.\", additional_kwargs={}, response_metadata={}),\n", + " HumanMessage(content='what do you call a speechless parrot', additional_kwargs={}, response_metadata={})]" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -257,7 +365,6 @@ "source": [ "from typing import List\n", "\n", - "# pip install tiktoken\n", "import tiktoken\n", "from langchain_core.messages import BaseMessage, ToolMessage\n", "\n", @@ -298,9 +405,25 @@ "\n", "trim_messages(\n", " messages,\n", - " max_tokens=45,\n", - " strategy=\"last\",\n", + " # highlight-next-line\n", " token_counter=tiktoken_counter,\n", + " # Keep the last <= n_count tokens of the messages.\n", + " strategy=\"last\",\n", + " # When token_counter=len, each message\n", + " # will be counted as a single token.\n", + " # highlight-start\n", + " # Remember to adjust for your use case\n", + " max_tokens=45,\n", + " # highlight-end\n", + " # Most chat models expect that chat history starts with either:\n", + " # (1) a HumanMessage or\n", + " # (2) a SystemMessage followed by a HumanMessage\n", + " # start_on=\"human\" makes sure we produce a valid chat history\n", + " start_on=\"human\",\n", + " # Usually, we want to keep the SystemMessage\n", + " # if it's present in the original history.\n", + " # The SystemMessage has special instructions for the model.\n", + " include_system=True,\n", ")" ] }, @@ -311,22 +434,22 @@ "source": [ "## Chaining\n", "\n", - "`trim_messages` can be used in an imperatively (like above) or declaratively, making it easy to compose with other components in a chain" + "`trim_messages` can be used imperatively (like above) or declaratively, making it easy to compose with other components in a chain" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 62, "id": "96aa29b2-01e0-437c-a1ab-02fb0141cb57", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "AIMessage(content='A: A \"Polly-gone\"!', response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 32, 'total_tokens': 41}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_66b29dffce', 'finish_reason': 'stop', 'logprobs': None}, id='run-83e96ddf-bcaa-4f63-824c-98b0f8a0d474-0', usage_metadata={'input_tokens': 32, 'output_tokens': 9, 'total_tokens': 41})" + "AIMessage(content='A \"polygon!\"', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 4, 'prompt_tokens': 32, 'total_tokens': 36, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3537616b13', 'finish_reason': 'stop', 'logprobs': None}, id='run-995342be-0443-4e33-9b54-153f5c8771d3-0', usage_metadata={'input_tokens': 32, 'output_tokens': 4, 'total_tokens': 36})" ] }, - "execution_count": 7, + "execution_count": 62, "metadata": {}, "output_type": "execute_result" } @@ -340,7 +463,15 @@ " max_tokens=45,\n", " strategy=\"last\",\n", " token_counter=llm,\n", + " # Usually, we want to keep the SystemMessage\n", + " # if it's present in the original history.\n", + " # The SystemMessage has special instructions for the model.\n", " include_system=True,\n", + " # Most chat models expect that chat history starts with either:\n", + " # (1) a HumanMessage or\n", + " # (2) a SystemMessage followed by a HumanMessage\n", + " # start_on=\"human\" makes sure we produce a valid chat history\n", + " start_on=\"human\",\n", ")\n", "\n", "chain = trimmer | llm\n", @@ -359,18 +490,18 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 63, "id": "1ff02d0a-353d-4fac-a77c-7c2c5262abd9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[SystemMessage(content=\"you're a good assistant, you always respond with a joke.\"),\n", - " HumanMessage(content='what do you call a speechless parrot')]" + "[SystemMessage(content=\"you're a good assistant, you always respond with a joke.\", additional_kwargs={}, response_metadata={}),\n", + " HumanMessage(content='what do you call a speechless parrot', additional_kwargs={}, response_metadata={})]" ] }, - "execution_count": 8, + "execution_count": 63, "metadata": {}, "output_type": "execute_result" } @@ -391,17 +522,17 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "id": "a9517858-fc2f-4dc3-898d-bf98a0e905a0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "AIMessage(content='A \"polly-no-wanna-cracker\"!', response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 32, 'total_tokens': 42}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_5bf7397cd3', 'finish_reason': 'stop', 'logprobs': None}, id='run-054dd309-3497-4e7b-b22a-c1859f11d32e-0', usage_metadata={'input_tokens': 32, 'output_tokens': 10, 'total_tokens': 42})" + "AIMessage(content='A polygon! (Because it\\'s a \"poly-gone\" quiet!)', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 32, 'total_tokens': 46, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_e375328146', 'finish_reason': 'stop', 'logprobs': None}, id='run-8569a119-ca02-4232-bee1-20caea61cd6d-0', usage_metadata={'input_tokens': 32, 'output_tokens': 14, 'total_tokens': 46})" ] }, - "execution_count": 9, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -425,7 +556,15 @@ " max_tokens=45,\n", " strategy=\"last\",\n", " token_counter=llm,\n", + " # Usually, we want to keep the SystemMessage\n", + " # if it's present in the original history.\n", + " # The SystemMessage has special instructions for the model.\n", " include_system=True,\n", + " # Most chat models expect that chat history starts with either:\n", + " # (1) a HumanMessage or\n", + " # (2) a SystemMessage followed by a HumanMessage\n", + " # start_on=\"human\" makes sure we produce a valid chat history\n", + " start_on=\"human\",\n", ")\n", "\n", "chain = trimmer | llm\n", @@ -471,7 +610,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.11.4" } }, "nbformat": 4,