diff --git a/libs/langchain/tests/unit_tests/chat_models/test_base.py b/libs/langchain/tests/unit_tests/chat_models/test_base.py index 686a627830b..f3dec20c72f 100644 --- a/libs/langchain/tests/unit_tests/chat_models/test_base.py +++ b/libs/langchain/tests/unit_tests/chat_models/test_base.py @@ -167,7 +167,7 @@ def test_configurable() -> None: "store": None, "extra_body": None, "include_response_headers": False, - "stream_usage": False, + "stream_usage": True, "use_previous_response_id": False, "use_responses_api": None, }, diff --git a/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py b/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py index d7b03ac1eab..a89dc6adb95 100644 --- a/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py +++ b/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py @@ -168,7 +168,7 @@ def test_configurable() -> None: "store": None, "extra_body": None, "include_response_headers": False, - "stream_usage": False, + "stream_usage": True, "use_previous_response_id": False, "use_responses_api": None, }, diff --git a/libs/partners/openai/langchain_openai/chat_models/azure.py b/libs/partners/openai/langchain_openai/chat_models/azure.py index 5af8583fc91..8fab88398e0 100644 --- a/libs/partners/openai/langchain_openai/chat_models/azure.py +++ b/libs/partners/openai/langchain_openai/chat_models/azure.py @@ -615,6 +615,25 @@ class AzureChatOpenAI(BaseChatOpenAI): or os.getenv("OPENAI_ORG_ID") or os.getenv("OPENAI_ORGANIZATION") ) + + # Enable stream_usage by default if using default base URL and client + if all( + getattr(self, key, None) is None + for key in ( + "stream_usage", + "openai_proxy", + "openai_api_base", + "base_url", + "client", + "root_client", + "async_client", + "root_async_client", + "http_client", + "http_async_client", + ) + ): + self.stream_usage = True + # For backwards compatibility. Before openai v1, no distinction was made # between azure_endpoint and base_url (openai_api_base). openai_api_base = self.openai_api_base diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index 08b6c91b8bb..0400c1f98bb 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -99,7 +99,13 @@ from langchain_core.utils.pydantic import ( is_basemodel_subclass, ) from langchain_core.utils.utils import _build_model_kwargs, from_env, secret_from_env -from pydantic import BaseModel, ConfigDict, Field, SecretStr, model_validator +from pydantic import ( + BaseModel, + ConfigDict, + Field, + SecretStr, + model_validator, +) from pydantic.v1 import BaseModel as BaseModelV1 from typing_extensions import Self @@ -478,11 +484,18 @@ class BaseChatOpenAI(BaseChatModel): ) """Timeout for requests to OpenAI completion API. Can be float, ``httpx.Timeout`` or None.""" - stream_usage: bool = False - """Whether to include usage metadata in streaming output. If True, an additional + stream_usage: Optional[bool] = None + """Whether to include usage metadata in streaming output. If enabled, an additional message chunk will be generated during the stream including usage metadata. + This parameter is enabled unless ``openai_api_base`` is set or the model is + initialized with a custom client, as many chat completions APIs do not support + streaming token usage. + !!! version-added "Added in version 0.3.9" + + !!! warning "Behavior changed in 0.3.35" + Enabled for default base URL and client. """ max_retries: Optional[int] = None """Maximum number of retries to make when generating.""" @@ -761,6 +774,25 @@ class BaseChatOpenAI(BaseChatModel): or os.getenv("OPENAI_ORGANIZATION") ) self.openai_api_base = self.openai_api_base or os.getenv("OPENAI_API_BASE") + + # Enable stream_usage by default if using default base URL and client + if all( + getattr(self, key, None) is None + for key in ( + "stream_usage", + "openai_proxy", + "openai_api_base", + "base_url", + "client", + "root_client", + "async_client", + "root_async_client", + "http_client", + "http_async_client", + ) + ): + self.stream_usage = True + client_params: dict = { "api_key": ( self.openai_api_key.get_secret_value() if self.openai_api_key else None @@ -1073,7 +1105,7 @@ class BaseChatOpenAI(BaseChatModel): for source in stream_usage_sources: if isinstance(source, bool): return source - return self.stream_usage + return self.stream_usage or False def _stream( self, diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py b/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py index 96cd02bc6cd..b173ce87e47 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py @@ -23,7 +23,6 @@ class TestAzureOpenAIStandard(ChatModelIntegrationTests): "deployment_name": os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"], "openai_api_version": OPENAI_API_VERSION, "azure_endpoint": OPENAI_API_BASE, - "stream_usage": True, } @property @@ -83,7 +82,6 @@ class TestAzureOpenAIStandardLegacy(ChatModelIntegrationTests): "deployment_name": os.environ["AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME"], "openai_api_version": OPENAI_API_VERSION, "azure_endpoint": OPENAI_API_BASE, - "stream_usage": True, } @property diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py index 373f2645db0..1595599d713 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py @@ -200,7 +200,7 @@ def test_openai_invoke() -> None: def test_stream() -> None: """Test streaming tokens from OpenAI.""" - llm = ChatOpenAI() + llm = ChatOpenAI(model="gpt-4.1-mini") full: Optional[BaseMessageChunk] = None for chunk in llm.stream("I'm Pickle Rick"): @@ -214,7 +214,7 @@ def test_stream() -> None: aggregate: Optional[BaseMessageChunk] = None chunks_with_token_counts = 0 chunks_with_response_metadata = 0 - for chunk in llm.stream("Hello", stream_usage=True): + for chunk in llm.stream("Hello"): assert isinstance(chunk.content, str) aggregate = chunk if aggregate is None else aggregate + chunk assert isinstance(chunk, AIMessageChunk) @@ -281,13 +281,14 @@ async def test_astream() -> None: assert chunks_with_token_counts == 0 assert full.usage_metadata is None - llm = ChatOpenAI(temperature=0, max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg] - await _test_stream(llm.astream("Hello"), expect_usage=False) + llm = ChatOpenAI(model="gpt-4.1-mini", temperature=0, max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg] + await _test_stream(llm.astream("Hello", stream_usage=False), expect_usage=False) await _test_stream( llm.astream("Hello", stream_options={"include_usage": True}), expect_usage=True ) await _test_stream(llm.astream("Hello", stream_usage=True), expect_usage=True) llm = ChatOpenAI( + model="gpt-4.1-mini", temperature=0, max_tokens=MAX_TOKEN_COUNT, # type: ignore[call-arg] model_kwargs={"stream_options": {"include_usage": True}}, @@ -297,7 +298,12 @@ async def test_astream() -> None: llm.astream("Hello", stream_options={"include_usage": False}), expect_usage=False, ) - llm = ChatOpenAI(temperature=0, max_tokens=MAX_TOKEN_COUNT, stream_usage=True) # type: ignore[call-arg] + llm = ChatOpenAI( + model="gpt-4.1-mini", + temperature=0, + max_tokens=MAX_TOKEN_COUNT, # type: ignore[call-arg] + stream_usage=True, + ) await _test_stream(llm.astream("Hello"), expect_usage=True) await _test_stream(llm.astream("Hello", stream_usage=False), expect_usage=False) diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py index 8a94a4ca01d..302b8ab0327 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py @@ -22,7 +22,7 @@ class TestOpenAIStandard(ChatModelIntegrationTests): @property def chat_model_params(self) -> dict: - return {"model": "gpt-4o-mini", "stream_usage": True} + return {"model": "gpt-4o-mini"} @property def supports_image_inputs(self) -> bool: diff --git a/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_azure_standard.ambr b/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_azure_standard.ambr index 2060512958a..3faf95d352f 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_azure_standard.ambr +++ b/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_azure_standard.ambr @@ -27,6 +27,7 @@ 'request_timeout': 60.0, 'stop': list([ ]), + 'stream_usage': True, 'temperature': 0.0, 'validate_base_url': True, }), diff --git a/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_base_standard.ambr b/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_base_standard.ambr index e7307c6158f..223d5418075 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_base_standard.ambr +++ b/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_base_standard.ambr @@ -21,6 +21,7 @@ 'request_timeout': 60.0, 'stop': list([ ]), + 'stream_usage': True, 'temperature': 0.0, }), 'lc': 1, diff --git a/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_responses_standard.ambr b/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_responses_standard.ambr index 88a49a27502..88ea956d7fb 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_responses_standard.ambr +++ b/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_responses_standard.ambr @@ -21,6 +21,7 @@ 'request_timeout': 60.0, 'stop': list([ ]), + 'stream_usage': True, 'temperature': 0.0, 'use_responses_api': True, }), diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_azure.py b/libs/partners/openai/tests/unit_tests/chat_models/test_azure.py index 05e60f85c7b..b81b5d82ff0 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_azure.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_azure.py @@ -38,6 +38,7 @@ def test_initialize_more() -> None: assert llm.deployment_name == "35-turbo-dev" assert llm.openai_api_version == "2023-05-15" assert llm.temperature == 0 + assert llm.stream_usage ls_params = llm._get_ls_params() assert ls_params.get("ls_provider") == "azure" diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py index 65fceeff76e..5af243707f6 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py @@ -504,7 +504,8 @@ def mock_openai_completion() -> list[dict]: async def test_openai_astream(mock_openai_completion: list) -> None: llm_name = "gpt-4o" - llm = ChatOpenAI(model=llm_name, stream_usage=True) + llm = ChatOpenAI(model=llm_name) + assert llm.stream_usage mock_client = AsyncMock() async def mock_create(*args: Any, **kwargs: Any) -> MockAsyncContextManager: @@ -528,10 +529,14 @@ async def test_openai_astream(mock_openai_completion: list) -> None: def test_openai_stream(mock_openai_completion: list) -> None: llm_name = "gpt-4o" - llm = ChatOpenAI(model=llm_name, stream_usage=True) + llm = ChatOpenAI(model=llm_name) + assert llm.stream_usage mock_client = MagicMock() + call_kwargs = [] + def mock_create(*args: Any, **kwargs: Any) -> MockSyncContextManager: + call_kwargs.append(kwargs) return MockSyncContextManager(mock_openai_completion) mock_client.create = mock_create @@ -543,12 +548,31 @@ def test_openai_stream(mock_openai_completion: list) -> None: if chunk.usage_metadata is not None: usage_metadata = chunk.usage_metadata + assert call_kwargs[-1]["stream_options"] == {"include_usage": True} assert usage_metadata is not None - assert usage_metadata["input_tokens"] == usage_chunk["usage"]["prompt_tokens"] assert usage_metadata["output_tokens"] == usage_chunk["usage"]["completion_tokens"] assert usage_metadata["total_tokens"] == usage_chunk["usage"]["total_tokens"] + # Verify no streaming outside of default base URL or clients + for param, value in { + "stream_usage": False, + "openai_proxy": "http://localhost:7890", + "openai_api_base": "https://example.com/v1", + "base_url": "https://example.com/v1", + "client": mock_client, + "root_client": mock_client, + "async_client": mock_client, + "root_async_client": mock_client, + "http_client": httpx.Client(), + "http_async_client": httpx.AsyncClient(), + }.items(): + llm = ChatOpenAI(model=llm_name, **{param: value}) # type: ignore[arg-type] + assert not llm.stream_usage + with patch.object(llm, "client", mock_client): + _ = list(llm.stream("...")) + assert "stream_options" not in call_kwargs[-1] + @pytest.fixture def mock_completion() -> dict: