openai[patch]: guard against None text completions in BaseOpenAI (#31514)

Some chat completions APIs will return null `text` output (even though this is typed as string).
2025-08-29 06:23:20 +00:00 · 2025-06-06 09:14:37 -04:00 · 2025-06-06 09:14:37 -04:00 · 4cc2f6b807
commit 4cc2f6b807
parent abc8bf9f1c
2 changed files with 38 additions and 1 deletions
--- a/libs/partners/openai/langchain_openai/llms/base.py
+++ b/libs/partners/openai/langchain_openai/llms/base.py
@ -40,7 +40,7 @@ def _stream_response_to_generation_chunk(
    if not stream_response["choices"]:
        return GenerationChunk(text="")
    return GenerationChunk(
-        text=stream_response["choices"][0]["text"],
+        text=stream_response["choices"][0]["text"] or "",
        generation_info=dict(
            finish_reason=stream_response["choices"][0].get("finish_reason", None),
            logprobs=stream_response["choices"][0].get("logprobs", None),
--- a/libs/partners/openai/tests/unit_tests/llms/test_base.py
+++ b/libs/partners/openai/tests/unit_tests/llms/test_base.py
@ -1,8 +1,10 @@
 import os

 import pytest
+from langchain_core.outputs import GenerationChunk

 from langchain_openai import OpenAI
+from langchain_openai.llms.base import _stream_response_to_generation_chunk

 os.environ["OPENAI_API_KEY"] = "foo"

@ -69,3 +71,38 @@ def test_custom_token_counting() -> None:

    llm = OpenAI(custom_get_token_ids=token_encoder)
    assert llm.get_token_ids("foo") == [1, 2, 3]
+
+
+def test_stream_response_to_generation_chunk() -> None:
+    completion = {
+        "id": "cmpl-abc123",
+        "choices": [
+            {"finish_reason": None, "index": 0, "logprobs": None, "text": "foo"}
+        ],
+        "created": 1749214401,
+        "model": "my-model",
+        "object": "text_completion",
+        "system_fingerprint": None,
+        "usage": None,
+    }
+    chunk = _stream_response_to_generation_chunk(completion)
+    assert chunk == GenerationChunk(
+        text="foo", generation_info={"finish_reason": None, "logprobs": None}
+    )
+
+    # Pathological completion with None text (e.g., from other providers)
+    completion = {
+        "id": "cmpl-abc123",
+        "choices": [
+            {"finish_reason": None, "index": 0, "logprobs": None, "text": None}
+        ],
+        "created": 1749214401,
+        "model": "my-model",
+        "object": "text_completion",
+        "system_fingerprint": None,
+        "usage": None,
+    }
+    chunk = _stream_response_to_generation_chunk(completion)
+    assert chunk == GenerationChunk(
+        text="", generation_info={"finish_reason": None, "logprobs": None}
+    )