diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index daf6aeb3a73..7040900aeef 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -221,12 +221,12 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC): generation: Optional[ChatGenerationChunk] = None try: for chunk in self._stream(messages, stop=stop, **kwargs): - run_manager.on_llm_new_token( - cast(str, chunk.message.content), chunk=chunk - ) if chunk.message.id is None: chunk.message.id = f"run-{run_manager.run_id}" chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) + run_manager.on_llm_new_token( + cast(str, chunk.message.content), chunk=chunk + ) yield chunk.message if generation is None: generation = chunk @@ -293,12 +293,12 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC): stop=stop, **kwargs, ): - await run_manager.on_llm_new_token( - cast(str, chunk.message.content), chunk=chunk - ) if chunk.message.id is None: chunk.message.id = f"run-{run_manager.run_id}" chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) + await run_manager.on_llm_new_token( + cast(str, chunk.message.content), chunk=chunk + ) yield chunk.message if generation is None: generation = chunk @@ -610,13 +610,13 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC): ): chunks: List[ChatGenerationChunk] = [] for chunk in self._stream(messages, stop=stop, **kwargs): + chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) if run_manager: if chunk.message.id is None: chunk.message.id = f"run-{run_manager.run_id}" run_manager.on_llm_new_token( cast(str, chunk.message.content), chunk=chunk ) - chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) chunks.append(chunk) result = generate_from_stream(iter(chunks)) else: @@ -691,13 +691,13 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC): ): chunks: List[ChatGenerationChunk] = [] async for chunk in self._astream(messages, stop=stop, **kwargs): + chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) if run_manager: if chunk.message.id is None: chunk.message.id = f"run-{run_manager.run_id}" await run_manager.on_llm_new_token( cast(str, chunk.message.content), chunk=chunk ) - chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) chunks.append(chunk) result = generate_from_stream(iter(chunks)) else: