diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index ce9c25cb6cc..83c8fda6a67 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -1298,6 +1298,12 @@ class BaseChatOpenAI(BaseChatModel): encoding.encode(val["function"]["arguments"]) ) num_tokens += len(encoding.encode(val["function"]["name"])) + elif val["type"] == "file": + warnings.warn( + "Token counts for file inputs are not supported. " + "Ignoring file inputs." + ) + pass else: raise ValueError( f"Unrecognized content block type\n\n{val}" diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py index 7dacf9d9547..fc4666c10c6 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py @@ -752,6 +752,25 @@ def test_get_num_tokens_from_messages() -> None: actual = llm.get_num_tokens_from_messages(messages) assert expected == actual + # Test file inputs + messages = [ + HumanMessage( + [ + "Summarize this document.", + { + "type": "file", + "file": { + "filename": "my file", + "file_data": "data:application/pdf;base64,", + }, + }, + ] + ) + ] + with pytest.warns(match="file inputs are not supported"): + actual = llm.get_num_tokens_from_messages(messages) + assert actual == 13 + class Foo(BaseModel): bar: int