openai[patch]: ignore file blocks when counting tokens (#30601)

OpenAI does not appear to document how it transforms PDF pages to images, which determines how tokens are counted: https://platform.openai.com/docs/guides/pdf-files?api-mode=chat#usage-considerations Currently these block types raise ValueError inside `get_num_tokens_from_messages`. Here we update to generate a warning and continue.
2026-06-09 18:50:33 +00:00 · 2025-04-01 15:29:33 -04:00
parent 558191198f
commit 8a69de5c24
2 changed files with 25 additions and 0 deletions
--- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py
+++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py
@@ -752,6 +752,25 @@ def test_get_num_tokens_from_messages() -> None:
    actual = llm.get_num_tokens_from_messages(messages)
    assert expected == actual

+    # Test file inputs
+    messages = [
+        HumanMessage(
+            [
+                "Summarize this document.",
+                {
+                    "type": "file",
+                    "file": {
+                        "filename": "my file",
+                        "file_data": "data:application/pdf;base64,<data>",
+                    },
+                },
+            ]
+        )
+    ]
+    with pytest.warns(match="file inputs are not supported"):
+        actual = llm.get_num_tokens_from_messages(messages)
+        assert actual == 13
+

 class Foo(BaseModel):
    bar: int