diff --git a/libs/langchain/langchain/smith/evaluation/runner_utils.py b/libs/langchain/langchain/smith/evaluation/runner_utils.py index de5b77bb661..61ffdb1a817 100644 --- a/libs/langchain/langchain/smith/evaluation/runner_utils.py +++ b/libs/langchain/langchain/smith/evaluation/runner_utils.py @@ -122,7 +122,7 @@ class TestResult(dict): r.update( { **{f"feedback.{f.key}": f.score for f in feedback}, - "error": result.get("error"), + "error": result.get("Error"), "execution_time": result["execution_time"], } ) @@ -1026,7 +1026,7 @@ def _collect_test_results( "execution_time": all_execution_time.get(str(example.id)), } if isinstance(output, EvalError): - results[str(example.id)]["error"] = output.error + results[str(example.id)]["Error"] = output.error else: results[str(example.id)]["output"] = output if example.outputs: