From 97dec30eeaa069928f077d214d0b26d08cfbb8c5 Mon Sep 17 00:00:00 2001
From: Vadym Barda <vadym@langchain.dev>
Date: Mon, 24 Mar 2025 14:50:48 -0400
Subject: [PATCH] docs[patch]: update trim_messages doc (#30462)

---
 docs/docs/how_to/trim_messages.ipynb       | 97 +++++++++++++---------
 libs/core/langchain_core/messages/utils.py |  6 ++
 2 files changed, 65 insertions(+), 38 deletions(-)

diff --git a/docs/docs/how_to/trim_messages.ipynb b/docs/docs/how_to/trim_messages.ipynb
index 9c6553c7e6d..db510d099bc 100644
--- a/docs/docs/how_to/trim_messages.ipynb
+++ b/docs/docs/how_to/trim_messages.ipynb
@@ -60,18 +60,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "id": "c91edeb2-9978-4665-9fdb-fc96cdb51caa",
+   "execution_count": null,
+   "id": "c9bed5ea-8aee-4d43-a717-77a431a02d2e",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Note: you may need to restart the kernel to use updated packages.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "pip install -qU langchain-openai"
    ]
@@ -102,7 +94,7 @@
     "    ToolMessage,\n",
     "    trim_messages,\n",
     ")\n",
-    "from langchain_openai import ChatOpenAI\n",
+    "from langchain_core.messages.utils import count_tokens_approximately\n",
     "\n",
     "messages = [\n",
     "    SystemMessage(\"you're a good assistant, you always respond with a joke.\"),\n",
@@ -124,8 +116,8 @@
     "    strategy=\"last\",\n",
     "    # highlight-start\n",
     "    # Remember to adjust based on your model\n",
-    "    # or else pass a custom token_encoder\n",
-    "    token_counter=ChatOpenAI(model=\"gpt-4o\"),\n",
+    "    # or else pass a custom token_counter\n",
+    "    token_counter=count_tokens_approximately,\n",
     "    # highlight-end\n",
     "    # Most chat models expect that chat history starts with either:\n",
     "    # (1) a HumanMessage or\n",
@@ -220,7 +212,7 @@
    "source": [
     "## Advanced Usage\n",
     "\n",
-    "You can use `trim_message` as a building-block to create more complex processing logic.\n",
+    "You can use `trim_messages` as a building-block to create more complex processing logic.\n",
     "\n",
     "If we want to allow splitting up the contents of a message we can specify `allow_partial=True`:"
    ]
@@ -228,7 +220,7 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "id": "8bcca1fe-674c-4713-bacc-8e8e6d6f56c3",
+   "id": "0265eba7-c8f3-4495-bcbb-17cd7ede3ece",
    "metadata": {},
    "outputs": [
     {
@@ -249,7 +241,7 @@
     "    messages,\n",
     "    max_tokens=56,\n",
     "    strategy=\"last\",\n",
-    "    token_counter=ChatOpenAI(model=\"gpt-4o\"),\n",
+    "    token_counter=count_tokens_approximately,\n",
     "    include_system=True,\n",
     "    allow_partial=True,\n",
     ")"
@@ -286,7 +278,7 @@
     "    messages,\n",
     "    max_tokens=45,\n",
     "    strategy=\"last\",\n",
-    "    token_counter=ChatOpenAI(model=\"gpt-4o\"),\n",
+    "    token_counter=count_tokens_approximately,\n",
     ")"
    ]
   },
@@ -317,6 +309,45 @@
     }
    ],
    "source": [
+    "trim_messages(\n",
+    "    messages,\n",
+    "    max_tokens=45,\n",
+    "    strategy=\"first\",\n",
+    "    token_counter=count_tokens_approximately,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0625c094-380f-4485-b2d2-e5dfa83fe299",
+   "metadata": {},
+   "source": [
+    "## Using `ChatModel` as a token counter\n",
+    "\n",
+    "You can pass a ChatModel as a token-counter. This will use `ChatModel.get_num_tokens_from_messages`. Let's demonstrate how to use it with OpenAI:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "9ef35359-1b7a-4918-ab41-30bec69fb3dc",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[SystemMessage(content=\"you're a good assistant, you always respond with a joke.\", additional_kwargs={}, response_metadata={}),\n",
+       " HumanMessage(content=\"i wonder why it's called langchain\", additional_kwargs={}, response_metadata={})]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_openai import ChatOpenAI\n",
+    "\n",
     "trim_messages(\n",
     "    messages,\n",
     "    max_tokens=45,\n",
@@ -337,25 +368,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "id": "d930c089-e8e6-4980-9d39-11d41e794772",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Note: you may need to restart the kernel to use updated packages.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "pip install -qU tiktoken"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "id": "1c1c3b1e-2ece-49e7-a3b6-e69877c1633b",
    "metadata": {},
    "outputs": [
@@ -366,7 +389,7 @@
        " HumanMessage(content='what do you call a speechless parrot', additional_kwargs={}, response_metadata={})]"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -451,17 +474,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
    "id": "96aa29b2-01e0-437c-a1ab-02fb0141cb57",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "AIMessage(content='A polygon! Because it\\'s a \"poly-gone\" quiet!', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 13, 'prompt_tokens': 32, 'total_tokens': 45, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_057232b607', 'finish_reason': 'stop', 'logprobs': None}, id='run-4fa026e7-9137-4fef-b596-54243615e3b3-0', usage_metadata={'input_tokens': 32, 'output_tokens': 13, 'total_tokens': 45})"
+       "AIMessage(content='A \"polly-no-wanna-cracker\"!', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 11, 'prompt_tokens': 32, 'total_tokens': 43, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_90d33c15d4', 'finish_reason': 'stop', 'logprobs': None}, id='run-b1f8b63b-6bc2-4df4-b3b9-dfc4e3e675fe-0', usage_metadata={'input_tokens': 32, 'output_tokens': 11, 'total_tokens': 43, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -509,7 +532,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "id": "1ff02d0a-353d-4fac-a77c-7c2c5262abd9",
    "metadata": {},
    "outputs": [
@@ -520,7 +543,7 @@
        " HumanMessage(content='what do you call a speechless parrot', additional_kwargs={}, response_metadata={})]"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -569,8 +592,6 @@
     "    return chat_history\n",
     "\n",
     "\n",
-    "llm = ChatOpenAI(model=\"gpt-4o\")\n",
-    "\n",
     "trimmer = trim_messages(\n",
     "    max_tokens=45,\n",
     "    strategy=\"last\",\n",
@@ -629,7 +650,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.4"
+   "version": "3.12.3"
   }
  },
  "nbformat": 4,
diff --git a/libs/core/langchain_core/messages/utils.py b/libs/core/langchain_core/messages/utils.py
index f34c289cd13..5331b1040f5 100644
--- a/libs/core/langchain_core/messages/utils.py
+++ b/libs/core/langchain_core/messages/utils.py
@@ -681,6 +681,12 @@ def trim_messages(
             BaseMessage. If a BaseLanguageModel is passed in then
             BaseLanguageModel.get_num_tokens_from_messages() will be used.
             Set to `len` to count the number of **messages** in the chat history.
+
+            Note:
+                Use `count_tokens_approximately` to get fast, approximate token counts.
+                This is recommended for using `trim_messages` on the hot path, where
+                exact token counting is not necessary.
+
         strategy: Strategy for trimming.
             - "first": Keep the first <= n_count tokens of the messages.
             - "last": Keep the last <= n_count tokens of the messages.