mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-23 23:29:21 +00:00
parent
cc76f0e834
commit
e5bd32ff6d
@ -135,6 +135,7 @@ class TestResult(dict):
|
|||||||
**{f"feedback.{f.key}": f.score for f in feedback},
|
**{f"feedback.{f.key}": f.score for f in feedback},
|
||||||
"error": result.get("Error"),
|
"error": result.get("Error"),
|
||||||
"execution_time": result["execution_time"],
|
"execution_time": result["execution_time"],
|
||||||
|
"run_id": result.get("run_id"),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
records.append(r)
|
records.append(r)
|
||||||
@ -1018,6 +1019,7 @@ def _collect_test_results(
|
|||||||
wait_for_all_evaluators()
|
wait_for_all_evaluators()
|
||||||
all_eval_results = {}
|
all_eval_results = {}
|
||||||
all_execution_time = {}
|
all_execution_time = {}
|
||||||
|
all_run_ids = {}
|
||||||
for c in configs:
|
for c in configs:
|
||||||
for callback in cast(list, c["callbacks"]):
|
for callback in cast(list, c["callbacks"]):
|
||||||
if isinstance(callback, EvaluatorCallbackHandler):
|
if isinstance(callback, EvaluatorCallbackHandler):
|
||||||
@ -1028,12 +1030,14 @@ def _collect_test_results(
|
|||||||
elif isinstance(callback, LangChainTracer):
|
elif isinstance(callback, LangChainTracer):
|
||||||
run = callback.latest_run
|
run = callback.latest_run
|
||||||
example_id = callback.example_id
|
example_id = callback.example_id
|
||||||
|
run_id = str(run.id) if run else None
|
||||||
execution_time = (
|
execution_time = (
|
||||||
(run.end_time - run.start_time).total_seconds()
|
(run.end_time - run.start_time).total_seconds()
|
||||||
if run and run.end_time
|
if run and run.end_time
|
||||||
else None
|
else None
|
||||||
)
|
)
|
||||||
all_execution_time[str(example_id)] = execution_time
|
all_execution_time[str(example_id)] = execution_time
|
||||||
|
all_run_ids[str(example_id)] = run_id
|
||||||
|
|
||||||
results: dict = {}
|
results: dict = {}
|
||||||
for example, output in zip(examples, batch_results):
|
for example, output in zip(examples, batch_results):
|
||||||
@ -1042,6 +1046,7 @@ def _collect_test_results(
|
|||||||
"input": example.inputs,
|
"input": example.inputs,
|
||||||
"feedback": feedback,
|
"feedback": feedback,
|
||||||
"execution_time": all_execution_time.get(str(example.id)),
|
"execution_time": all_execution_time.get(str(example.id)),
|
||||||
|
"run_id": all_run_ids.get(str(example.id)),
|
||||||
}
|
}
|
||||||
if isinstance(output, EvalError):
|
if isinstance(output, EvalError):
|
||||||
results[str(example.id)]["Error"] = output.Error
|
results[str(example.id)]["Error"] = output.Error
|
||||||
|
@ -345,6 +345,7 @@ async def test_arun_on_dataset(monkeypatch: pytest.MonkeyPatch) -> None:
|
|||||||
"feedback": [],
|
"feedback": [],
|
||||||
# No run since we mock the call to the llm above
|
# No run since we mock the call to the llm above
|
||||||
"execution_time": None,
|
"execution_time": None,
|
||||||
|
"run_id": None,
|
||||||
}
|
}
|
||||||
for example in examples
|
for example in examples
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user