Add similarity

2026-01-24 05:50:18 +00:00 · 2023-06-27 08:04:20 -07:00
parent e1fdb67440
commit 8094bd9b64
2 changed files with 55 additions and 18 deletions
--- a/langchain/evaluation/comparison/eval_chain.py
+++ b/langchain/evaluation/comparison/eval_chain.py
@@ -1,14 +1,18 @@
 """Base classes for comparing the output of two models."""
 from __future__ import annotations

-from typing import Any, Optional
+from typing import Any, Optional, Union

 from pydantic import Field

 from langchain.base_language import BaseLanguageModel
 from langchain.callbacks.manager import Callbacks
 from langchain.chains.llm import LLMChain
-from langchain.evaluation.comparison.prompt import PROMPT, PROMPT_WITH_REFERENCE
+from langchain.evaluation.comparison.prompt import (
+    PROMPT,
+    PROMPT_WITH_REFERENCE,
+    EQUIVALENCE_PROMPT,
+)
 from langchain.prompts.prompt import PromptTemplate
 from langchain.schema import BaseOutputParser

@@ -85,34 +89,45 @@ class PairwiseStringEvalChain(LLMChain):
        cls,
        *,
        llm: BaseLanguageModel,
-        prompt: Optional[PromptTemplate] = None,
-        require_reference: bool = False,
+        prompt: Optional[Union[PromptTemplate, str]] = None,
        **kwargs: Any,
    ) -> PairwiseStringEvalChain:
        """Initialize the PairwiseStringEvalChain from an LLM.

        Args:
            llm (BaseLanguageModel): The LLM to use.
-            prompt (PromptTemplate, optional): The prompt to use.
-            require_reference (bool, optional): Whether to require a reference
-                string. Defaults to False.
+            prompt (Optional[Union[PromptTemplate, str]], optional):
+                The prompt to use. Defaults to None.
+                - If None or "default", the default prompt will be used,
+                    which does not use reference labels to return whether
+                    A is preferred to B.
+                - If "with_reference", the chain will use reference labels
+                    to return whether A is preferred to B.
+                - If "equivalence", the prompt will return whether the outputs
+                    of A and B share the same meaning.
            **kwargs (Any): Additional keyword arguments.

        Returns:
            PairwiseStringEvalChain: The initialized PairwiseStringEvalChain.
        """
        expected_input_vars = {"output_a", "output_b", "input"}
-        if prompt is None:
-            if require_reference:
-                expected_input_vars.add("reference")
-                prompt_ = PROMPT_WITH_REFERENCE
-            else:
-                prompt_ = PROMPT
-        else:
-            if require_reference:
+        if isinstance(prompt, PromptTemplate):
+            if "reference" in prompt.input_variables:
                expected_input_vars.add("reference")
            prompt_ = prompt
-
+        elif prompt is None or prompt == "default":
+            prompt_ = PROMPT
+        elif prompt == "with_reference":
+            expected_input_vars.add("reference")
+            prompt_ = PROMPT_WITH_REFERENCE
+        elif prompt == "equivalence":
+            prompt_ = EQUIVALENCE_PROMPT
+        else:
+            raise ValueError(
+                f"Invalid prompt: {prompt}. "
+                "Prompt must be one of None, 'default', 'with_reference', "
+                "or 'equivalence'."
+            )
        if expected_input_vars != set(prompt_.input_variables):
            raise ValueError(
                f"Input variables should be {expected_input_vars}, "
--- a/langchain/evaluation/comparison/prompt.py
+++ b/langchain/evaluation/comparison/prompt.py
@@ -31,7 +31,7 @@ PROMPT = PromptTemplate(
    input_variables=["input", "output_a", "output_b"], template=template
 )

-template = """Act as a fair judge and rate the two responses to the question below.\
+ref_template = """Act as a fair judge and rate the two responses to the question below.\
 Choose the response that best followed the instructions and answered the question.\
 Your assessment should weigh helpfulness, relevance, accuracy, depth, creativity, and detail.\
 Start by comparing both responses and give a brief rationale.\
@@ -60,5 +60,27 @@ After giving your rationale, make your final decision using this format:\
 [/RESPONSE B]"""

 PROMPT_WITH_REFERENCE = PromptTemplate(
-    input_variables=["input", "output_a", "output_b", "reference"], template=template
+    input_variables=["input", "output_a", "output_b", "reference"],
+    template=ref_template,
+)
+
+
+sim_template = """You are tasked with evaluating whether the two responses to the question below\
+ are equivalent in meaning. Start by comparing both responses and give a brief rationale.\
+ If the task or question are provided, use them to help determine equivalence.\
+
+[BEGIN DATA]
+***
+[Question]: {input}
+***
+[Response 1]: {output_a}
+***
+[Response 2]: {output_b}
+***
+[END DATA]
+
+Are the meanings of Response A and Response B the same? Choices are [[A]]: Equivalent, [[B]]: Not Equivalent, [[C]]: Impossible to tell. First, write out in a step by step manner your reasoning about each criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then print only the judgement [[A]] or [[B]] on its own line corresponding to the correct answer. At the end, repeat just the letter again by itself on a new line."""
+
+EQUIVALENCE_PROMPT = PromptTemplate(
+    input_variables=["input", "output_a", "output_b"], template=sim_template
 )