diff --git a/docs/docs/langsmith/walkthrough.ipynb b/docs/docs/langsmith/walkthrough.ipynb index 0e9b6c5004f..155a4418b7b 100644 --- a/docs/docs/langsmith/walkthrough.ipynb +++ b/docs/docs/langsmith/walkthrough.ipynb @@ -400,12 +400,12 @@ "id": "1cc51d0a-4982-4ff9-89c1-b294d5cce8f6", "metadata": {}, "source": [ - "#### Aggregate Evaluators\n", + "#### Batch Evaluators\n", "\n", - "Some metrics are only performed in aggregate or full \"test session\" level. These could be simple \n", - "classification metrics like Precision, Recall, or AUC, or other custom metrics.\n", + "Some metrics are aggregated over a full \"test\" without being assigned to an individual runs/examples. These could be as simple \n", + "as common classification metrics like Precision, Recall, or AUC, or it could be another custom aggregate metric.\n", "\n", - "You can define any custom metric on an aggregate level using a callable that accepts a list of Runs (system traces) and list of Examples (dataset records)." + "You can define any batch metric on a full test level by defining a function (or any callable) that accepts a list of Runs (system traces) and list of Examples (dataset records)." ] }, { @@ -476,7 +476,7 @@ " normalize_by=10,\n", " ),\n", " ],\n", - " aggregate_evaluators=[max_pred_length],\n", + " batch_evaluators=[max_pred_length],\n", ")" ] },