mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-12 12:59:07 +00:00
openai[patch]: support streaming with json_schema response format (#29044)
- Stream JSON string content. Final chunk includes parsed representation (following OpenAI [docs](https://platform.openai.com/docs/guides/structured-outputs#streaming)). - Mildly (?) breaking change: if you were using streaming with `response_format` before, usage metadata will disappear unless you set `stream_usage=True`. ## Response format Before:  After:  ## with_structured_output For pydantic output, behavior of `with_structured_output` is unchanged (except for warning disappearing), because we pluck the parsed representation straight from OpenAI, and OpenAI doesn't return it until the stream is completed. Open to alternatives (e.g., parsing from content or intermediate dict chunks generated by OpenAI). Before:  After: 
This commit is contained in:
@@ -13,6 +13,7 @@ from langchain_core.messages import (
|
||||
HumanMessage,
|
||||
)
|
||||
from langchain_core.outputs import ChatGeneration, ChatResult, LLMResult
|
||||
from pydantic import BaseModel
|
||||
|
||||
from langchain_openai import AzureChatOpenAI
|
||||
from tests.unit_tests.fake.callbacks import FakeCallbackHandler
|
||||
@@ -262,3 +263,37 @@ async def test_json_mode_async(llm: AzureChatOpenAI) -> None:
|
||||
assert isinstance(full, AIMessageChunk)
|
||||
assert isinstance(full.content, str)
|
||||
assert json.loads(full.content) == {"a": 1}
|
||||
|
||||
|
||||
class Foo(BaseModel):
|
||||
response: str
|
||||
|
||||
|
||||
def test_stream_response_format(llm: AzureChatOpenAI) -> None:
|
||||
full: Optional[BaseMessageChunk] = None
|
||||
chunks = []
|
||||
for chunk in llm.stream("how are ya", response_format=Foo):
|
||||
chunks.append(chunk)
|
||||
full = chunk if full is None else full + chunk
|
||||
assert len(chunks) > 1
|
||||
assert isinstance(full, AIMessageChunk)
|
||||
parsed = full.additional_kwargs["parsed"]
|
||||
assert isinstance(parsed, Foo)
|
||||
assert isinstance(full.content, str)
|
||||
parsed_content = json.loads(full.content)
|
||||
assert parsed.response == parsed_content["response"]
|
||||
|
||||
|
||||
async def test_astream_response_format(llm: AzureChatOpenAI) -> None:
|
||||
full: Optional[BaseMessageChunk] = None
|
||||
chunks = []
|
||||
async for chunk in llm.astream("how are ya", response_format=Foo):
|
||||
chunks.append(chunk)
|
||||
full = chunk if full is None else full + chunk
|
||||
assert len(chunks) > 1
|
||||
assert isinstance(full, AIMessageChunk)
|
||||
parsed = full.additional_kwargs["parsed"]
|
||||
assert isinstance(parsed, Foo)
|
||||
assert isinstance(full.content, str)
|
||||
parsed_content = json.loads(full.content)
|
||||
assert parsed.response == parsed_content["response"]
|
||||
|
@@ -1092,14 +1092,37 @@ class Foo(BaseModel):
|
||||
|
||||
|
||||
def test_stream_response_format() -> None:
|
||||
list(ChatOpenAI(model="gpt-4o-mini").stream("how are ya", response_format=Foo))
|
||||
full: Optional[BaseMessageChunk] = None
|
||||
chunks = []
|
||||
for chunk in ChatOpenAI(model="gpt-4o-mini").stream(
|
||||
"how are ya", response_format=Foo
|
||||
):
|
||||
chunks.append(chunk)
|
||||
full = chunk if full is None else full + chunk
|
||||
assert len(chunks) > 1
|
||||
assert isinstance(full, AIMessageChunk)
|
||||
parsed = full.additional_kwargs["parsed"]
|
||||
assert isinstance(parsed, Foo)
|
||||
assert isinstance(full.content, str)
|
||||
parsed_content = json.loads(full.content)
|
||||
assert parsed.response == parsed_content["response"]
|
||||
|
||||
|
||||
async def test_astream_response_format() -> None:
|
||||
async for _ in ChatOpenAI(model="gpt-4o-mini").astream(
|
||||
full: Optional[BaseMessageChunk] = None
|
||||
chunks = []
|
||||
async for chunk in ChatOpenAI(model="gpt-4o-mini").astream(
|
||||
"how are ya", response_format=Foo
|
||||
):
|
||||
pass
|
||||
chunks.append(chunk)
|
||||
full = chunk if full is None else full + chunk
|
||||
assert len(chunks) > 1
|
||||
assert isinstance(full, AIMessageChunk)
|
||||
parsed = full.additional_kwargs["parsed"]
|
||||
assert isinstance(parsed, Foo)
|
||||
assert isinstance(full.content, str)
|
||||
parsed_content = json.loads(full.content)
|
||||
assert parsed.response == parsed_content["response"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("use_max_completion_tokens", [True, False])
|
||||
|
Reference in New Issue
Block a user