mirror of
https://github.com/hwchase17/langchain.git
synced 2026-06-09 18:50:33 +00:00
openai[patch]: ignore file blocks when counting tokens (#30601)
OpenAI does not appear to document how it transforms PDF pages to images, which determines how tokens are counted: https://platform.openai.com/docs/guides/pdf-files?api-mode=chat#usage-considerations Currently these block types raise ValueError inside `get_num_tokens_from_messages`. Here we update to generate a warning and continue.
This commit is contained in:
@@ -752,6 +752,25 @@ def test_get_num_tokens_from_messages() -> None:
|
||||
actual = llm.get_num_tokens_from_messages(messages)
|
||||
assert expected == actual
|
||||
|
||||
# Test file inputs
|
||||
messages = [
|
||||
HumanMessage(
|
||||
[
|
||||
"Summarize this document.",
|
||||
{
|
||||
"type": "file",
|
||||
"file": {
|
||||
"filename": "my file",
|
||||
"file_data": "data:application/pdf;base64,<data>",
|
||||
},
|
||||
},
|
||||
]
|
||||
)
|
||||
]
|
||||
with pytest.warns(match="file inputs are not supported"):
|
||||
actual = llm.get_num_tokens_from_messages(messages)
|
||||
assert actual == 13
|
||||
|
||||
|
||||
class Foo(BaseModel):
|
||||
bar: int
|
||||
|
||||
Reference in New Issue
Block a user