diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 591a3d09b7e..6a3474aec1f 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -81,6 +81,28 @@ if TYPE_CHECKING: from langchain_core.tools import BaseTool +def _generate_response_from_error(error: BaseException) -> list[ChatGeneration]: + if hasattr(error, "response"): + response = error.response + metadata: dict = {} + if hasattr(response, "headers"): + try: + metadata["headers"] = dict(response.headers) + except Exception: + metadata["headers"] = None + if hasattr(response, "status_code"): + metadata["status_code"] = response.status_code + if hasattr(error, "request_id"): + metadata["request_id"] = error.request_id + generations = [ + ChatGeneration(message=AIMessage(content="", response_metadata=metadata)) + ] + else: + generations = [] + + return generations + + def generate_from_stream(stream: Iterator[ChatGenerationChunk]) -> ChatResult: """Generate from a stream. @@ -443,12 +465,12 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC): else: generation += chunk except BaseException as e: - run_manager.on_llm_error( - e, - response=LLMResult( - generations=[[generation]] if generation else [] - ), - ) + generations_with_error_metadata = _generate_response_from_error(e) + if generation: + generations = [[generation], generations_with_error_metadata] + else: + generations = [generations_with_error_metadata] + run_manager.on_llm_error(e, response=LLMResult(generations=generations)) # type: ignore[arg-type] raise if generation is None: @@ -532,9 +554,14 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC): else: generation += chunk except BaseException as e: + generations_with_error_metadata = _generate_response_from_error(e) + if generation: + generations = [[generation], generations_with_error_metadata] + else: + generations = [generations_with_error_metadata] await run_manager.on_llm_error( e, - response=LLMResult(generations=[[generation]] if generation else []), + response=LLMResult(generations=generations), # type: ignore[arg-type] ) raise @@ -698,7 +725,13 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC): ) except BaseException as e: if run_managers: - run_managers[i].on_llm_error(e, response=LLMResult(generations=[])) + generations_with_error_metadata = _generate_response_from_error(e) + run_managers[i].on_llm_error( + e, + response=LLMResult( + generations=[generations_with_error_metadata] # type: ignore[list-item] + ), + ) raise flattened_outputs = [ LLMResult(generations=[res.generations], llm_output=res.llm_output) # type: ignore[list-item] @@ -805,8 +838,12 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC): for i, res in enumerate(results): if isinstance(res, BaseException): if run_managers: + generations_with_error_metadata = _generate_response_from_error(res) await run_managers[i].on_llm_error( - res, response=LLMResult(generations=[]) + res, + response=LLMResult( + generations=[generations_with_error_metadata] # type: ignore[list-item] + ), ) exceptions.append(res) if exceptions: diff --git a/libs/core/langchain_core/tracers/base.py b/libs/core/langchain_core/tracers/base.py index 6f01befa020..d075abf263f 100644 --- a/libs/core/langchain_core/tracers/base.py +++ b/libs/core/langchain_core/tracers/base.py @@ -229,8 +229,7 @@ class BaseTracer(_TracerCore, BaseCallbackHandler, ABC): # "chat_model" is only used for the experimental new streaming_events format. # This change should not affect any existing tracers. llm_run = self._errored_llm_run( - error=error, - run_id=run_id, + error=error, run_id=run_id, response=kwargs.pop("response", None) ) self._end_trace(llm_run) self._on_llm_error(llm_run) diff --git a/libs/core/langchain_core/tracers/core.py b/libs/core/langchain_core/tracers/core.py index dbf62e1282e..9e485709f83 100644 --- a/libs/core/langchain_core/tracers/core.py +++ b/libs/core/langchain_core/tracers/core.py @@ -293,9 +293,20 @@ class _TracerCore(ABC): return llm_run - def _errored_llm_run(self, error: BaseException, run_id: UUID) -> Run: + def _errored_llm_run( + self, error: BaseException, run_id: UUID, response: Optional[LLMResult] = None + ) -> Run: llm_run = self._get_run(run_id, run_type={"llm", "chat_model"}) llm_run.error = self._get_stacktrace(error) + if response: + llm_run.outputs = response.model_dump() + for i, generations in enumerate(response.generations): + for j, generation in enumerate(generations): + output_generation = llm_run.outputs["generations"][i][j] + if "message" in output_generation: + output_generation["message"] = dumpd( + cast("ChatGeneration", generation).message + ) llm_run.end_time = datetime.now(timezone.utc) llm_run.events.append({"name": "error", "time": llm_run.end_time})