From 0189c50570913d73afe8c854f95dcfd5e7d6dd71 Mon Sep 17 00:00:00 2001 From: Andrew Jaeger Date: Fri, 27 Jun 2025 11:35:14 -0400 Subject: [PATCH] openai[fix]: Correctly set usage metadata for OpenAI Responses API (#31756) --- .../langchain_openai/chat_models/base.py | 16 +++++--- .../chat_models/test_responses_standard.py | 38 +++++++++++++++++++ .../tests/unit_tests/chat_models/test_base.py | 20 ++++++++++ 3 files changed, 68 insertions(+), 6 deletions(-) diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index 206fa2f28db..b2488031931 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -3271,19 +3271,23 @@ def _create_usage_metadata_responses(oai_token_usage: dict) -> UsageMetadata: input_tokens = oai_token_usage.get("input_tokens", 0) output_tokens = oai_token_usage.get("output_tokens", 0) total_tokens = oai_token_usage.get("total_tokens", input_tokens + output_tokens) - output_token_details: dict = { - "audio": (oai_token_usage.get("completion_tokens_details") or {}).get( - "audio_tokens" - ), - "reasoning": (oai_token_usage.get("output_token_details") or {}).get( + "reasoning": (oai_token_usage.get("output_tokens_details") or {}).get( "reasoning_tokens" - ), + ) + } + input_token_details: dict = { + "cache_read": (oai_token_usage.get("input_tokens_details") or {}).get( + "cached_tokens" + ) } return UsageMetadata( input_tokens=input_tokens, output_tokens=output_tokens, total_tokens=total_tokens, + input_token_details=InputTokenDetails( + **{k: v for k, v in input_token_details.items() if v is not None} + ), output_token_details=OutputTokenDetails( **{k: v for k, v in output_token_details.items() if v is not None} ), diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_standard.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_standard.py index 4bd6c8128e9..22e18155bd7 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_standard.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_standard.py @@ -1,11 +1,17 @@ """Standard LangChain interface tests for Responses API""" +from pathlib import Path +from typing import cast + import pytest from langchain_core.language_models import BaseChatModel +from langchain_core.messages import AIMessage from langchain_openai import ChatOpenAI from tests.integration_tests.chat_models.test_base_standard import TestOpenAIStandard +REPO_ROOT_DIR = Path(__file__).parents[6] + class TestOpenAIResponses(TestOpenAIStandard): @property @@ -19,3 +25,35 @@ class TestOpenAIResponses(TestOpenAIStandard): @pytest.mark.xfail(reason="Unsupported.") def test_stop_sequence(self, model: BaseChatModel) -> None: super().test_stop_sequence(model) + + def invoke_with_cache_read_input(self, *, stream: bool = False) -> AIMessage: + with open(REPO_ROOT_DIR / "README.md") as f: + readme = f.read() + + input_ = f"""What's langchain? Here's the langchain README: + + {readme} + """ + llm = ChatOpenAI(model="gpt-4.1-mini", output_version="responses/v1") + _invoke(llm, input_, stream) + # invoke twice so first invocation is cached + return _invoke(llm, input_, stream) + + def invoke_with_reasoning_output(self, *, stream: bool = False) -> AIMessage: + llm = ChatOpenAI( + model="o4-mini", + reasoning={"effort": "medium", "summary": "auto"}, + output_version="responses/v1", + ) + input_ = "What was the 3rd highest building in 2000?" + return _invoke(llm, input_, stream) + + +def _invoke(llm: ChatOpenAI, input_: str, stream: bool) -> AIMessage: + if stream: + full = None + for chunk in llm.stream(input_): + full = full + chunk if full else chunk # type: ignore[operator] + return cast(AIMessage, full) + else: + return cast(AIMessage, llm.invoke(input_)) diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py index d20d9479bb6..4e8f57ce7e0 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py @@ -60,6 +60,7 @@ from langchain_openai.chat_models.base import ( _convert_message_to_dict, _convert_to_openai_response_format, _create_usage_metadata, + _create_usage_metadata_responses, _format_message_content, _get_last_messages, _oai_structured_outputs_parser, @@ -948,6 +949,25 @@ def test__create_usage_metadata() -> None: ) +def test__create_usage_metadata_responses() -> None: + response_usage_metadata = { + "input_tokens": 100, + "input_tokens_details": {"cached_tokens": 50}, + "output_tokens": 50, + "output_tokens_details": {"reasoning_tokens": 10}, + "total_tokens": 150, + } + result = _create_usage_metadata_responses(response_usage_metadata) + + assert result == UsageMetadata( + output_tokens=50, + input_tokens=100, + total_tokens=150, + input_token_details={"cache_read": 50}, + output_token_details={"reasoning": 10}, + ) + + def test__convert_to_openai_response_format() -> None: # Test response formats that aren't tool-like. response_format: dict = {