From d3c2ca5656155f2895ff6d7309a166cd37dc01ae Mon Sep 17 00:00:00 2001 From: William FH <13333726+hinthornw@users.noreply.github.com> Date: Wed, 27 Sep 2023 16:04:43 -0700 Subject: [PATCH] Enhanced pairwise error (#11131) --- .../docs/guides/evaluation/comparison/index.mdx | 4 ++++ docs/extras/integrations/llms/titan_takeoff.ipynb | 5 +++-- .../langchain/smith/evaluation/runner_utils.py | 15 ++++++++++++++- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/docs/docs_skeleton/docs/guides/evaluation/comparison/index.mdx b/docs/docs_skeleton/docs/guides/evaluation/comparison/index.mdx index 7f4f033420e..8f956f6068d 100644 --- a/docs/docs_skeleton/docs/guides/evaluation/comparison/index.mdx +++ b/docs/docs_skeleton/docs/guides/evaluation/comparison/index.mdx @@ -16,6 +16,10 @@ Here's a summary of the key methods and properties of a comparison evaluator: - `requires_input`: This property indicates whether this evaluator requires an input string. - `requires_reference`: This property specifies whether this evaluator requires a reference label. +:::note LangSmith Support +The [run_on_dataset](https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.smith) evaluation method is designed to evaluate only a single model at a time, and thus, doesn't support these evaluators. +::: + Detailed information about creating custom evaluators and the available built-in comparison evaluators is provided in the following sections. import DocCardList from "@theme/DocCardList"; diff --git a/docs/extras/integrations/llms/titan_takeoff.ipynb b/docs/extras/integrations/llms/titan_takeoff.ipynb index 5ccff187647..9d79166000f 100644 --- a/docs/extras/integrations/llms/titan_takeoff.ipynb +++ b/docs/extras/integrations/llms/titan_takeoff.ipynb @@ -17,7 +17,7 @@ "source": [ "## Installation\n", "\n", - "To get started with Iris Takeoff, all you need is to have docker and python installed on your local system. If you wish to use the server with gpu suport, then you will need to install docker with cuda support.\n", + "To get started with Iris Takeoff, all you need is to have docker and python installed on your local system. If you wish to use the server with gpu support, then you will need to install docker with cuda support.\n", "\n", "For Mac and Windows users, make sure you have the docker daemon running! You can check this by running docker ps in your terminal. To start the daemon, open the docker desktop app.\n", "\n", @@ -157,7 +157,8 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain.prompts import PromptTemplate\nfrom langchain.chains import LLMChain\n", + "from langchain.prompts import PromptTemplate\n", + "from langchain.chains import LLMChain\n", "\n", "llm = TitanTakeoff()\n", "\n", diff --git a/libs/langchain/langchain/smith/evaluation/runner_utils.py b/libs/langchain/langchain/smith/evaluation/runner_utils.py index 2939ee2b36d..7f8e2e6592f 100644 --- a/libs/langchain/langchain/smith/evaluation/runner_utils.py +++ b/libs/langchain/langchain/smith/evaluation/runner_utils.py @@ -28,7 +28,11 @@ from langchain.callbacks.tracers.evaluation import EvaluatorCallbackHandler from langchain.callbacks.tracers.langchain import LangChainTracer, wait_for_all_tracers from langchain.chains.base import Chain from langchain.evaluation.loading import load_evaluator -from langchain.evaluation.schema import EvaluatorType, StringEvaluator +from langchain.evaluation.schema import ( + EvaluatorType, + PairwiseStringEvaluator, + StringEvaluator, +) from langchain.schema import ChatResult, LLMResult from langchain.schema.language_model import BaseLanguageModel from langchain.schema.messages import BaseMessage, messages_from_dict @@ -486,6 +490,15 @@ def _construct_run_evaluator( reference_key=reference_key, tags=[eval_type_tag], ) + elif isinstance(evaluator_, PairwiseStringEvaluator): + raise NotImplementedError( + f"Run evaluator for {eval_type_tag} is not implemented." + " PairwiseStringEvaluators compare the outputs of two different models" + " rather than the output of a single model." + " Did you mean to use a StringEvaluator instead?" + "\nSee: https://python.langchain.com/docs/guides/evaluation/string/" + ) + else: raise NotImplementedError( f"Run evaluator for {eval_type_tag} is not implemented"