Add similarity

This commit is contained in:
vowelparrot
2023-06-27 08:04:20 -07:00
parent e1fdb67440
commit 8094bd9b64
2 changed files with 55 additions and 18 deletions

View File

@@ -1,14 +1,18 @@
"""Base classes for comparing the output of two models."""
from __future__ import annotations
from typing import Any, Optional
from typing import Any, Optional, Union
from pydantic import Field
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.manager import Callbacks
from langchain.chains.llm import LLMChain
from langchain.evaluation.comparison.prompt import PROMPT, PROMPT_WITH_REFERENCE
from langchain.evaluation.comparison.prompt import (
PROMPT,
PROMPT_WITH_REFERENCE,
EQUIVALENCE_PROMPT,
)
from langchain.prompts.prompt import PromptTemplate
from langchain.schema import BaseOutputParser
@@ -85,34 +89,45 @@ class PairwiseStringEvalChain(LLMChain):
cls,
*,
llm: BaseLanguageModel,
prompt: Optional[PromptTemplate] = None,
require_reference: bool = False,
prompt: Optional[Union[PromptTemplate, str]] = None,
**kwargs: Any,
) -> PairwiseStringEvalChain:
"""Initialize the PairwiseStringEvalChain from an LLM.
Args:
llm (BaseLanguageModel): The LLM to use.
prompt (PromptTemplate, optional): The prompt to use.
require_reference (bool, optional): Whether to require a reference
string. Defaults to False.
prompt (Optional[Union[PromptTemplate, str]], optional):
The prompt to use. Defaults to None.
- If None or "default", the default prompt will be used,
which does not use reference labels to return whether
A is preferred to B.
- If "with_reference", the chain will use reference labels
to return whether A is preferred to B.
- If "equivalence", the prompt will return whether the outputs
of A and B share the same meaning.
**kwargs (Any): Additional keyword arguments.
Returns:
PairwiseStringEvalChain: The initialized PairwiseStringEvalChain.
"""
expected_input_vars = {"output_a", "output_b", "input"}
if prompt is None:
if require_reference:
expected_input_vars.add("reference")
prompt_ = PROMPT_WITH_REFERENCE
else:
prompt_ = PROMPT
else:
if require_reference:
if isinstance(prompt, PromptTemplate):
if "reference" in prompt.input_variables:
expected_input_vars.add("reference")
prompt_ = prompt
elif prompt is None or prompt == "default":
prompt_ = PROMPT
elif prompt == "with_reference":
expected_input_vars.add("reference")
prompt_ = PROMPT_WITH_REFERENCE
elif prompt == "equivalence":
prompt_ = EQUIVALENCE_PROMPT
else:
raise ValueError(
f"Invalid prompt: {prompt}. "
"Prompt must be one of None, 'default', 'with_reference', "
"or 'equivalence'."
)
if expected_input_vars != set(prompt_.input_variables):
raise ValueError(
f"Input variables should be {expected_input_vars}, "

View File

@@ -31,7 +31,7 @@ PROMPT = PromptTemplate(
input_variables=["input", "output_a", "output_b"], template=template
)
template = """Act as a fair judge and rate the two responses to the question below.\
ref_template = """Act as a fair judge and rate the two responses to the question below.\
Choose the response that best followed the instructions and answered the question.\
Your assessment should weigh helpfulness, relevance, accuracy, depth, creativity, and detail.\
Start by comparing both responses and give a brief rationale.\
@@ -60,5 +60,27 @@ After giving your rationale, make your final decision using this format:\
[/RESPONSE B]"""
PROMPT_WITH_REFERENCE = PromptTemplate(
input_variables=["input", "output_a", "output_b", "reference"], template=template
input_variables=["input", "output_a", "output_b", "reference"],
template=ref_template,
)
sim_template = """You are tasked with evaluating whether the two responses to the question below\
are equivalent in meaning. Start by comparing both responses and give a brief rationale.\
If the task or question are provided, use them to help determine equivalence.\
[BEGIN DATA]
***
[Question]: {input}
***
[Response 1]: {output_a}
***
[Response 2]: {output_b}
***
[END DATA]
Are the meanings of Response A and Response B the same? Choices are [[A]]: Equivalent, [[B]]: Not Equivalent, [[C]]: Impossible to tell. First, write out in a step by step manner your reasoning about each criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then print only the judgement [[A]] or [[B]] on its own line corresponding to the correct answer. At the end, repeat just the letter again by itself on a new line."""
EQUIVALENCE_PROMPT = PromptTemplate(
input_variables=["input", "output_a", "output_b"], template=sim_template
)