From 97dec30eeaa069928f077d214d0b26d08cfbb8c5 Mon Sep 17 00:00:00 2001 From: Vadym Barda Date: Mon, 24 Mar 2025 14:50:48 -0400 Subject: [PATCH] docs[patch]: update trim_messages doc (#30462) --- docs/docs/how_to/trim_messages.ipynb | 97 +++++++++++++--------- libs/core/langchain_core/messages/utils.py | 6 ++ 2 files changed, 65 insertions(+), 38 deletions(-) diff --git a/docs/docs/how_to/trim_messages.ipynb b/docs/docs/how_to/trim_messages.ipynb index 9c6553c7e6d..db510d099bc 100644 --- a/docs/docs/how_to/trim_messages.ipynb +++ b/docs/docs/how_to/trim_messages.ipynb @@ -60,18 +60,10 @@ }, { "cell_type": "code", - "execution_count": 1, - "id": "c91edeb2-9978-4665-9fdb-fc96cdb51caa", + "execution_count": null, + "id": "c9bed5ea-8aee-4d43-a717-77a431a02d2e", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], + "outputs": [], "source": [ "pip install -qU langchain-openai" ] @@ -102,7 +94,7 @@ " ToolMessage,\n", " trim_messages,\n", ")\n", - "from langchain_openai import ChatOpenAI\n", + "from langchain_core.messages.utils import count_tokens_approximately\n", "\n", "messages = [\n", " SystemMessage(\"you're a good assistant, you always respond with a joke.\"),\n", @@ -124,8 +116,8 @@ " strategy=\"last\",\n", " # highlight-start\n", " # Remember to adjust based on your model\n", - " # or else pass a custom token_encoder\n", - " token_counter=ChatOpenAI(model=\"gpt-4o\"),\n", + " # or else pass a custom token_counter\n", + " token_counter=count_tokens_approximately,\n", " # highlight-end\n", " # Most chat models expect that chat history starts with either:\n", " # (1) a HumanMessage or\n", @@ -220,7 +212,7 @@ "source": [ "## Advanced Usage\n", "\n", - "You can use `trim_message` as a building-block to create more complex processing logic.\n", + "You can use `trim_messages` as a building-block to create more complex processing logic.\n", "\n", "If we want to allow splitting up the contents of a message we can specify `allow_partial=True`:" ] @@ -228,7 +220,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "8bcca1fe-674c-4713-bacc-8e8e6d6f56c3", + "id": "0265eba7-c8f3-4495-bcbb-17cd7ede3ece", "metadata": {}, "outputs": [ { @@ -249,7 +241,7 @@ " messages,\n", " max_tokens=56,\n", " strategy=\"last\",\n", - " token_counter=ChatOpenAI(model=\"gpt-4o\"),\n", + " token_counter=count_tokens_approximately,\n", " include_system=True,\n", " allow_partial=True,\n", ")" @@ -286,7 +278,7 @@ " messages,\n", " max_tokens=45,\n", " strategy=\"last\",\n", - " token_counter=ChatOpenAI(model=\"gpt-4o\"),\n", + " token_counter=count_tokens_approximately,\n", ")" ] }, @@ -317,6 +309,45 @@ } ], "source": [ + "trim_messages(\n", + " messages,\n", + " max_tokens=45,\n", + " strategy=\"first\",\n", + " token_counter=count_tokens_approximately,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "0625c094-380f-4485-b2d2-e5dfa83fe299", + "metadata": {}, + "source": [ + "## Using `ChatModel` as a token counter\n", + "\n", + "You can pass a ChatModel as a token-counter. This will use `ChatModel.get_num_tokens_from_messages`. Let's demonstrate how to use it with OpenAI:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "9ef35359-1b7a-4918-ab41-30bec69fb3dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[SystemMessage(content=\"you're a good assistant, you always respond with a joke.\", additional_kwargs={}, response_metadata={}),\n", + " HumanMessage(content=\"i wonder why it's called langchain\", additional_kwargs={}, response_metadata={})]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain_openai import ChatOpenAI\n", + "\n", "trim_messages(\n", " messages,\n", " max_tokens=45,\n", @@ -337,25 +368,17 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "d930c089-e8e6-4980-9d39-11d41e794772", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], + "outputs": [], "source": [ "pip install -qU tiktoken" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "id": "1c1c3b1e-2ece-49e7-a3b6-e69877c1633b", "metadata": {}, "outputs": [ @@ -366,7 +389,7 @@ " HumanMessage(content='what do you call a speechless parrot', additional_kwargs={}, response_metadata={})]" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -451,17 +474,17 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "96aa29b2-01e0-437c-a1ab-02fb0141cb57", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "AIMessage(content='A polygon! Because it\\'s a \"poly-gone\" quiet!', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 13, 'prompt_tokens': 32, 'total_tokens': 45, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_057232b607', 'finish_reason': 'stop', 'logprobs': None}, id='run-4fa026e7-9137-4fef-b596-54243615e3b3-0', usage_metadata={'input_tokens': 32, 'output_tokens': 13, 'total_tokens': 45})" + "AIMessage(content='A \"polly-no-wanna-cracker\"!', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 11, 'prompt_tokens': 32, 'total_tokens': 43, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_90d33c15d4', 'finish_reason': 'stop', 'logprobs': None}, id='run-b1f8b63b-6bc2-4df4-b3b9-dfc4e3e675fe-0', usage_metadata={'input_tokens': 32, 'output_tokens': 11, 'total_tokens': 43, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -509,7 +532,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "1ff02d0a-353d-4fac-a77c-7c2c5262abd9", "metadata": {}, "outputs": [ @@ -520,7 +543,7 @@ " HumanMessage(content='what do you call a speechless parrot', additional_kwargs={}, response_metadata={})]" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -569,8 +592,6 @@ " return chat_history\n", "\n", "\n", - "llm = ChatOpenAI(model=\"gpt-4o\")\n", - "\n", "trimmer = trim_messages(\n", " max_tokens=45,\n", " strategy=\"last\",\n", @@ -629,7 +650,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.12.3" } }, "nbformat": 4, diff --git a/libs/core/langchain_core/messages/utils.py b/libs/core/langchain_core/messages/utils.py index f34c289cd13..5331b1040f5 100644 --- a/libs/core/langchain_core/messages/utils.py +++ b/libs/core/langchain_core/messages/utils.py @@ -681,6 +681,12 @@ def trim_messages( BaseMessage. If a BaseLanguageModel is passed in then BaseLanguageModel.get_num_tokens_from_messages() will be used. Set to `len` to count the number of **messages** in the chat history. + + Note: + Use `count_tokens_approximately` to get fast, approximate token counts. + This is recommended for using `trim_messages` on the hot path, where + exact token counting is not necessary. + strategy: Strategy for trimming. - "first": Keep the first <= n_count tokens of the messages. - "last": Keep the last <= n_count tokens of the messages.