docs[patch]: update trim_messages doc (#30462)

2025-07-31 08:32:32 +00:00 · 2025-03-24 14:50:48 -04:00 · 2025-03-24 14:50:48 -04:00 · 97dec30eea
commit 97dec30eea
parent c2dd8d84ff
2 changed files with 65 additions and 38 deletions
--- a/docs/docs/how_to/trim_messages.ipynb
+++ b/docs/docs/how_to/trim_messages.ipynb
@ -60,18 +60,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
-   "id": "c91edeb2-9978-4665-9fdb-fc96cdb51caa",
+   "execution_count": null,
+   "id": "c9bed5ea-8aee-4d43-a717-77a431a02d2e",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Note: you may need to restart the kernel to use updated packages.\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "pip install -qU langchain-openai"
   ]
@ -102,7 +94,7 @@
    "    ToolMessage,\n",
    "    trim_messages,\n",
    ")\n",
-    "from langchain_openai import ChatOpenAI\n",
+    "from langchain_core.messages.utils import count_tokens_approximately\n",
    "\n",
    "messages = [\n",
    "    SystemMessage(\"you're a good assistant, you always respond with a joke.\"),\n",
@ -124,8 +116,8 @@
    "    strategy=\"last\",\n",
    "    # highlight-start\n",
    "    # Remember to adjust based on your model\n",
-    "    # or else pass a custom token_encoder\n",
-    "    token_counter=ChatOpenAI(model=\"gpt-4o\"),\n",
+    "    # or else pass a custom token_counter\n",
+    "    token_counter=count_tokens_approximately,\n",
    "    # highlight-end\n",
    "    # Most chat models expect that chat history starts with either:\n",
    "    # (1) a HumanMessage or\n",
@ -220,7 +212,7 @@
   "source": [
    "## Advanced Usage\n",
    "\n",
-    "You can use `trim_message` as a building-block to create more complex processing logic.\n",
+    "You can use `trim_messages` as a building-block to create more complex processing logic.\n",
    "\n",
    "If we want to allow splitting up the contents of a message we can specify `allow_partial=True`:"
   ]
@ -228,7 +220,7 @@
  {
   "cell_type": "code",
   "execution_count": 4,
-   "id": "8bcca1fe-674c-4713-bacc-8e8e6d6f56c3",
+   "id": "0265eba7-c8f3-4495-bcbb-17cd7ede3ece",
   "metadata": {},
   "outputs": [
    {
@ -249,7 +241,7 @@
    "    messages,\n",
    "    max_tokens=56,\n",
    "    strategy=\"last\",\n",
-    "    token_counter=ChatOpenAI(model=\"gpt-4o\"),\n",
+    "    token_counter=count_tokens_approximately,\n",
    "    include_system=True,\n",
    "    allow_partial=True,\n",
    ")"
@ -286,7 +278,7 @@
    "    messages,\n",
    "    max_tokens=45,\n",
    "    strategy=\"last\",\n",
-    "    token_counter=ChatOpenAI(model=\"gpt-4o\"),\n",
+    "    token_counter=count_tokens_approximately,\n",
    ")"
   ]
  },
@ -317,6 +309,45 @@
    }
   ],
   "source": [
+    "trim_messages(\n",
+    "    messages,\n",
+    "    max_tokens=45,\n",
+    "    strategy=\"first\",\n",
+    "    token_counter=count_tokens_approximately,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0625c094-380f-4485-b2d2-e5dfa83fe299",
+   "metadata": {},
+   "source": [
+    "## Using `ChatModel` as a token counter\n",
+    "\n",
+    "You can pass a ChatModel as a token-counter. This will use `ChatModel.get_num_tokens_from_messages`. Let's demonstrate how to use it with OpenAI:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "9ef35359-1b7a-4918-ab41-30bec69fb3dc",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[SystemMessage(content=\"you're a good assistant, you always respond with a joke.\", additional_kwargs={}, response_metadata={}),\n",
+       " HumanMessage(content=\"i wonder why it's called langchain\", additional_kwargs={}, response_metadata={})]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_openai import ChatOpenAI\n",
+    "\n",
    "trim_messages(\n",
    "    messages,\n",
    "    max_tokens=45,\n",
@ -337,25 +368,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
   "id": "d930c089-e8e6-4980-9d39-11d41e794772",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Note: you may need to restart the kernel to use updated packages.\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "pip install -qU tiktoken"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
   "id": "1c1c3b1e-2ece-49e7-a3b6-e69877c1633b",
   "metadata": {},
   "outputs": [
@ -366,7 +389,7 @@
       " HumanMessage(content='what do you call a speechless parrot', additional_kwargs={}, response_metadata={})]"
      ]
     },
-     "execution_count": 8,
+     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -451,17 +474,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
   "id": "96aa29b2-01e0-437c-a1ab-02fb0141cb57",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "AIMessage(content='A polygon! Because it\\'s a \"poly-gone\" quiet!', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 13, 'prompt_tokens': 32, 'total_tokens': 45, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_057232b607', 'finish_reason': 'stop', 'logprobs': None}, id='run-4fa026e7-9137-4fef-b596-54243615e3b3-0', usage_metadata={'input_tokens': 32, 'output_tokens': 13, 'total_tokens': 45})"
+       "AIMessage(content='A \"polly-no-wanna-cracker\"!', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 11, 'prompt_tokens': 32, 'total_tokens': 43, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_90d33c15d4', 'finish_reason': 'stop', 'logprobs': None}, id='run-b1f8b63b-6bc2-4df4-b3b9-dfc4e3e675fe-0', usage_metadata={'input_tokens': 32, 'output_tokens': 11, 'total_tokens': 43, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})"
      ]
     },
-     "execution_count": 9,
+     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -509,7 +532,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
   "id": "1ff02d0a-353d-4fac-a77c-7c2c5262abd9",
   "metadata": {},
   "outputs": [
@ -520,7 +543,7 @@
       " HumanMessage(content='what do you call a speechless parrot', additional_kwargs={}, response_metadata={})]"
      ]
     },
-     "execution_count": 10,
+     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -569,8 +592,6 @@
    "    return chat_history\n",
    "\n",
    "\n",
-    "llm = ChatOpenAI(model=\"gpt-4o\")\n",
-    "\n",
    "trimmer = trim_messages(\n",
    "    max_tokens=45,\n",
    "    strategy=\"last\",\n",
@ -629,7 +650,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.4"
+   "version": "3.12.3"
  }
 },
 "nbformat": 4,
--- a/libs/core/langchain_core/messages/utils.py
+++ b/libs/core/langchain_core/messages/utils.py
@ -681,6 +681,12 @@ def trim_messages(
            BaseMessage. If a BaseLanguageModel is passed in then
            BaseLanguageModel.get_num_tokens_from_messages() will be used.
            Set to `len` to count the number of **messages** in the chat history.
+
+            Note:
+                Use `count_tokens_approximately` to get fast, approximate token counts.
+                This is recommended for using `trim_messages` on the hot path, where
+                exact token counting is not necessary.
+
        strategy: Strategy for trimming.
            - "first": Keep the first <= n_count tokens of the messages.
            - "last": Keep the last <= n_count tokens of the messages.