feat(openai): enable stream_usage when using default base URL and client (#33205)

2026-06-09 10:17:00 +00:00 · 2025-10-06 08:56:38 -04:00
parent 90e4d944ac
commit 4e50ec4b98
12 changed files with 100 additions and 17 deletions
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py
@@ -23,7 +23,6 @@ class TestAzureOpenAIStandard(ChatModelIntegrationTests):
            "deployment_name": os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"],
            "openai_api_version": OPENAI_API_VERSION,
            "azure_endpoint": OPENAI_API_BASE,
-            "stream_usage": True,
        }

    @property
@@ -83,7 +82,6 @@ class TestAzureOpenAIStandardLegacy(ChatModelIntegrationTests):
            "deployment_name": os.environ["AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME"],
            "openai_api_version": OPENAI_API_VERSION,
            "azure_endpoint": OPENAI_API_BASE,
-            "stream_usage": True,
        }

    @property
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py
@@ -200,7 +200,7 @@ def test_openai_invoke() -> None:

 def test_stream() -> None:
    """Test streaming tokens from OpenAI."""
-    llm = ChatOpenAI()
+    llm = ChatOpenAI(model="gpt-4.1-mini")

    full: Optional[BaseMessageChunk] = None
    for chunk in llm.stream("I'm Pickle Rick"):
@@ -214,7 +214,7 @@ def test_stream() -> None:
    aggregate: Optional[BaseMessageChunk] = None
    chunks_with_token_counts = 0
    chunks_with_response_metadata = 0
-    for chunk in llm.stream("Hello", stream_usage=True):
+    for chunk in llm.stream("Hello"):
        assert isinstance(chunk.content, str)
        aggregate = chunk if aggregate is None else aggregate + chunk
        assert isinstance(chunk, AIMessageChunk)
@@ -281,13 +281,14 @@ async def test_astream() -> None:
            assert chunks_with_token_counts == 0
            assert full.usage_metadata is None

-    llm = ChatOpenAI(temperature=0, max_tokens=MAX_TOKEN_COUNT)  # type: ignore[call-arg]
-    await _test_stream(llm.astream("Hello"), expect_usage=False)
+    llm = ChatOpenAI(model="gpt-4.1-mini", temperature=0, max_tokens=MAX_TOKEN_COUNT)  # type: ignore[call-arg]
+    await _test_stream(llm.astream("Hello", stream_usage=False), expect_usage=False)
    await _test_stream(
        llm.astream("Hello", stream_options={"include_usage": True}), expect_usage=True
    )
    await _test_stream(llm.astream("Hello", stream_usage=True), expect_usage=True)
    llm = ChatOpenAI(
+        model="gpt-4.1-mini",
        temperature=0,
        max_tokens=MAX_TOKEN_COUNT,  # type: ignore[call-arg]
        model_kwargs={"stream_options": {"include_usage": True}},
@@ -297,7 +298,12 @@ async def test_astream() -> None:
        llm.astream("Hello", stream_options={"include_usage": False}),
        expect_usage=False,
    )
-    llm = ChatOpenAI(temperature=0, max_tokens=MAX_TOKEN_COUNT, stream_usage=True)  # type: ignore[call-arg]
+    llm = ChatOpenAI(
+        model="gpt-4.1-mini",
+        temperature=0,
+        max_tokens=MAX_TOKEN_COUNT,  # type: ignore[call-arg]
+        stream_usage=True,
+    )
    await _test_stream(llm.astream("Hello"), expect_usage=True)
    await _test_stream(llm.astream("Hello", stream_usage=False), expect_usage=False)

--- a/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py
@@ -22,7 +22,7 @@ class TestOpenAIStandard(ChatModelIntegrationTests):

    @property
    def chat_model_params(self) -> dict:
-        return {"model": "gpt-4o-mini", "stream_usage": True}
+        return {"model": "gpt-4o-mini"}

    @property
    def supports_image_inputs(self) -> bool: