mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-11 13:55:03 +00:00
openai[patch]: ignore file blocks when counting tokens (#30601)
OpenAI does not appear to document how it transforms PDF pages to images, which determines how tokens are counted: https://platform.openai.com/docs/guides/pdf-files?api-mode=chat#usage-considerations Currently these block types raise ValueError inside `get_num_tokens_from_messages`. Here we update to generate a warning and continue.
This commit is contained in:
parent
558191198f
commit
8a69de5c24
@ -1298,6 +1298,12 @@ class BaseChatOpenAI(BaseChatModel):
|
|||||||
encoding.encode(val["function"]["arguments"])
|
encoding.encode(val["function"]["arguments"])
|
||||||
)
|
)
|
||||||
num_tokens += len(encoding.encode(val["function"]["name"]))
|
num_tokens += len(encoding.encode(val["function"]["name"]))
|
||||||
|
elif val["type"] == "file":
|
||||||
|
warnings.warn(
|
||||||
|
"Token counts for file inputs are not supported. "
|
||||||
|
"Ignoring file inputs."
|
||||||
|
)
|
||||||
|
pass
|
||||||
else:
|
else:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Unrecognized content block type\n\n{val}"
|
f"Unrecognized content block type\n\n{val}"
|
||||||
|
@ -752,6 +752,25 @@ def test_get_num_tokens_from_messages() -> None:
|
|||||||
actual = llm.get_num_tokens_from_messages(messages)
|
actual = llm.get_num_tokens_from_messages(messages)
|
||||||
assert expected == actual
|
assert expected == actual
|
||||||
|
|
||||||
|
# Test file inputs
|
||||||
|
messages = [
|
||||||
|
HumanMessage(
|
||||||
|
[
|
||||||
|
"Summarize this document.",
|
||||||
|
{
|
||||||
|
"type": "file",
|
||||||
|
"file": {
|
||||||
|
"filename": "my file",
|
||||||
|
"file_data": "data:application/pdf;base64,<data>",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
with pytest.warns(match="file inputs are not supported"):
|
||||||
|
actual = llm.get_num_tokens_from_messages(messages)
|
||||||
|
assert actual == 13
|
||||||
|
|
||||||
|
|
||||||
class Foo(BaseModel):
|
class Foo(BaseModel):
|
||||||
bar: int
|
bar: int
|
||||||
|
Loading…
Reference in New Issue
Block a user