Compare commits

...

13 Commits

Author SHA1 Message Date
Chester Curme
39d1759b66 update docstring 2024-08-15 14:08:48 -04:00
Chester Curme
7005c4fe5b CriteriaEvalChain 2024-08-15 14:06:30 -04:00
Chester Curme
1eace6523d PairwiseStringEvalChain 2024-08-15 12:43:28 -04:00
Chester Curme
bacf4c58ef update 2024-08-15 12:40:11 -04:00
Chester Curme
b71b5bd3d7 update 2024-08-15 12:31:50 -04:00
Chester Curme
31364de10c propagate include_run_info 2024-08-15 12:23:25 -04:00
Chester Curme
8a70754dfe ContextQAEvalChain 2024-08-15 12:21:11 -04:00
Chester Curme
9bd4459f9a lint 2024-08-15 12:06:06 -04:00
Chester Curme
50c1ecc5f1 QAEvalChain 2024-08-15 11:55:39 -04:00
Chester Curme
f51a9024ae Merge branch 'master' into cc/deprecate_evaluators 2024-08-15 11:11:25 -04:00
Chester Curme
15254d1027 QAGenerateChain 2024-08-13 14:23:49 -04:00
Chester Curme
d38c9c7026 ScoreStringEvalChain 2024-08-13 14:22:09 -04:00
Chester Curme
d249318f94 TrajectoryEvalChain 2024-08-13 14:22:04 -04:00
6 changed files with 353 additions and 78 deletions

View File

@@ -27,11 +27,11 @@ from langchain_core.callbacks.manager import (
from langchain_core.exceptions import OutputParserException
from langchain_core.language_models import BaseLanguageModel
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.output_parsers import BaseOutputParser
from langchain_core.output_parsers import BaseOutputParser, StrOutputParser
from langchain_core.pydantic_v1 import Field
from langchain_core.runnables import Runnable
from langchain_core.tools import BaseTool
from langchain.chains.llm import LLMChain
from langchain.evaluation.agents.trajectory_eval_prompt import (
EVAL_CHAT_PROMPT,
TOOL_FREE_EVAL_CHAT_PROMPT,
@@ -147,7 +147,7 @@ class TrajectoryEvalChain(AgentTrajectoryEvaluator, LLMEvalChain):
agent_tools: Optional[List[BaseTool]] = None
"""A list of tools available to the agent."""
eval_chain: LLMChain
eval_chain: Runnable
"""The language model chain used for evaluation."""
output_parser: TrajectoryOutputParser = Field(
default_factory=TrajectoryOutputParser
@@ -253,7 +253,7 @@ The following is the expected answer. Use this to measure correctness:
prompt = EVAL_CHAT_PROMPT
else:
prompt = TOOL_FREE_EVAL_CHAT_PROMPT
eval_chain = LLMChain(llm=llm, prompt=prompt)
eval_chain = prompt | llm | StrOutputParser()
return cls(
agent_tools=agent_tools, # type: ignore[arg-type]
eval_chain=eval_chain,
@@ -303,8 +303,8 @@ The following is the expected answer. Use this to measure correctness:
if self.agent_tools:
chain_input["tool_descriptions"] = self._tools_description
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
raw_output = self.eval_chain.run(
chain_input, callbacks=_run_manager.get_child()
raw_output = self.eval_chain.invoke(
chain_input, {"callbacks": _run_manager.get_child()}
)
return cast(dict, self.output_parser.parse(raw_output))
@@ -327,8 +327,8 @@ The following is the expected answer. Use this to measure correctness:
if self.agent_tools:
chain_input["tool_descriptions"] = self._tools_description
_run_manager = run_manager or AsyncCallbackManagerForChainRun.get_noop_manager()
raw_output = await self.eval_chain.arun(
chain_input, callbacks=_run_manager.get_child()
raw_output = await self.eval_chain.ainvoke(
chain_input, {"callbacks": _run_manager.get_child()}
)
return cast(dict, self.output_parser.parse(raw_output))

View File

@@ -6,14 +6,15 @@ import logging
import re
from typing import Any, Dict, List, Optional, Union
from langchain_core.callbacks.manager import Callbacks
from langchain_core.callbacks.manager import CallbackManagerForChainRun, Callbacks
from langchain_core.language_models import BaseLanguageModel
from langchain_core.output_parsers import BaseOutputParser
from langchain_core.prompts.base import BasePromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.pydantic_v1 import Field
from langchain_core.runnables import RunnableConfig
from langchain.chains.constitutional_ai.models import ConstitutionalPrinciple
from langchain.chains.llm import LLMChain
from langchain.evaluation.comparison.prompt import (
COMPARISON_TEMPLATE,
COMPARISON_TEMPLATE_WITH_REFERENCE,
@@ -151,7 +152,7 @@ class PairwiseStringResultOutputParser(BaseOutputParser[dict]):
}
class PairwiseStringEvalChain(PairwiseStringEvaluator, LLMEvalChain, LLMChain):
class PairwiseStringEvalChain(PairwiseStringEvaluator, LLMEvalChain):
"""A chain for comparing two outputs, such as the outputs
of two models, prompts, or outputs of a single model on similar inputs.
@@ -186,6 +187,10 @@ class PairwiseStringEvalChain(PairwiseStringEvaluator, LLMEvalChain, LLMChain):
output_parser: BaseOutputParser = Field(
default_factory=PairwiseStringResultOutputParser
)
llm: BaseLanguageModel
"""The language model to use for scoring."""
prompt: BasePromptTemplate
"""The prompt to use for scoring."""
@classmethod
def is_lc_serializable(cls) -> bool:
@@ -228,6 +233,22 @@ class PairwiseStringEvalChain(PairwiseStringEvaluator, LLMEvalChain, LLMChain):
" (EvaluatorType.LABELED_PAIRWISE_STRING) instead."
)
@property
def input_keys(self) -> List[str]:
"""Will be whatever keys the prompt expects.
:meta private:
"""
return self.prompt.input_variables
@property
def output_keys(self) -> List[str]:
"""Will always return text key.
:meta private:
"""
return [self.output_key]
@classmethod
def from_llm(
cls,
@@ -305,6 +326,19 @@ Performance may be significantly worse with other models."
parsed[RUN_KEY] = result[RUN_KEY]
return parsed
def _call(
self,
inputs: Dict[str, Any],
run_manager: Optional[CallbackManagerForChainRun] = None,
) -> Dict[str, Any]:
if run_manager:
config = RunnableConfig(callbacks=run_manager.get_child())
else:
config = None
chain = self.prompt | self.llm | self.output_parser
response = chain.invoke(inputs, config=config)
return {self.output_key: response}
def _evaluate_string_pairs(
self,
*,
@@ -338,13 +372,17 @@ Performance may be significantly worse with other models."
"""
input_ = self._prepare_input(prediction, prediction_b, input, reference)
result = self(
inputs=input_,
callbacks=callbacks,
tags=tags,
metadata=metadata,
include_run_info=include_run_info,
)
# prep config
config: RunnableConfig = {}
if callbacks is not None:
config["callbacks"] = callbacks
if tags is not None:
config["tags"] = tags
if metadata is not None:
config["metadata"] = metadata
result = self.invoke(input_, config=config, include_run_info=include_run_info)
return self._prepare_output(result)
async def _aevaluate_string_pairs(
@@ -380,13 +418,20 @@ Performance may be significantly worse with other models."
"""
input_ = self._prepare_input(prediction, prediction_b, input, reference)
result = await self.acall(
inputs=input_,
callbacks=callbacks,
tags=tags,
metadata=metadata,
include_run_info=include_run_info,
# prep config
config: RunnableConfig = {}
if callbacks is not None:
config["callbacks"] = callbacks
if tags is not None:
config["tags"] = tags
if metadata is not None:
config["metadata"] = metadata
result = await self.ainvoke(
input_, config=config, include_run_info=include_run_info
)
return self._prepare_output(result)

View File

@@ -4,14 +4,14 @@ import re
from enum import Enum
from typing import Any, Dict, List, Mapping, Optional, Union
from langchain_core.callbacks.manager import Callbacks
from langchain_core.callbacks.manager import CallbackManagerForChainRun, Callbacks
from langchain_core.language_models import BaseLanguageModel
from langchain_core.output_parsers import BaseOutputParser
from langchain_core.prompts import BasePromptTemplate
from langchain_core.pydantic_v1 import Field
from langchain_core.runnables import RunnableConfig
from langchain.chains.constitutional_ai.models import ConstitutionalPrinciple
from langchain.chains.llm import LLMChain
from langchain.evaluation.criteria.prompt import PROMPT, PROMPT_WITH_REFERENCES
from langchain.evaluation.schema import LLMEvalChain, StringEvaluator
from langchain.schema import RUN_KEY
@@ -164,7 +164,7 @@ def resolve_criteria(
return criteria_
class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
class CriteriaEvalChain(StringEvaluator, LLMEvalChain):
"""LLM Chain for evaluating runs against criteria.
Parameters
@@ -184,7 +184,7 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
reference labels in the prompt. Otherwise, the `PROMPT` template will be
used, which is a reference-free prompt.
**kwargs : Any
Additional keyword arguments to pass to the `LLMChain` constructor.
Additional keyword arguments to pass to the `Chain` constructor.
Returns
-------
@@ -231,6 +231,10 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
criterion_name: str
"""The name of the criterion being evaluated."""
output_key: str = "results" #: :meta private:
llm: BaseLanguageModel
"""The language model to use for scoring."""
prompt: BasePromptTemplate
"""The prompt to use for scoring."""
@classmethod
def is_lc_serializable(cls) -> bool:
@@ -267,6 +271,22 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
"\nTo use references, use the labeled_criteria instead."
)
@property
def input_keys(self) -> List[str]:
"""Will be whatever keys the prompt expects.
:meta private:
"""
return self.prompt.input_variables
@property
def output_keys(self) -> List[str]:
"""Will always return text key.
:meta private:
"""
return [self.output_key]
@classmethod
def _resolve_prompt(
cls, prompt: Optional[BasePromptTemplate] = None
@@ -332,7 +352,7 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
The prompt template to use for generating prompts. If not provided,
a default prompt template will be used.
**kwargs : Any
Additional keyword arguments to pass to the `LLMChain`
Additional keyword arguments to pass to the `Chain`
constructor.
Returns
@@ -396,6 +416,19 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
parsed[RUN_KEY] = result[RUN_KEY]
return parsed
def _call(
self,
inputs: Dict[str, Any],
run_manager: Optional[CallbackManagerForChainRun] = None,
) -> Dict[str, Any]:
if run_manager:
config = RunnableConfig(callbacks=run_manager.get_child())
else:
config = None
chain = self.prompt | self.llm | self.output_parser
response = chain.invoke(inputs, config=config)
return {self.output_key: response}
def _evaluate_strings(
self,
*,
@@ -420,7 +453,7 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
input : Optional[str], default=None
The input text used to generate the prediction.
**kwargs : Any
Additional keyword arguments to pass to the `LLMChain` `__call__`
Additional keyword arguments to pass to the `Chain` `invoke`
method.
Returns
@@ -442,13 +475,17 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
)
"""
input_ = self._get_eval_input(prediction, reference, input)
result = self(
input_,
callbacks=callbacks,
tags=tags,
metadata=metadata,
include_run_info=include_run_info,
)
# prep config
config: RunnableConfig = {}
if callbacks is not None:
config["callbacks"] = callbacks
if tags is not None:
config["tags"] = tags
if metadata is not None:
config["metadata"] = metadata
result = self.invoke(input_, config=config, include_run_info=include_run_info)
return self._prepare_output(result)
async def _aevaluate_strings(
@@ -475,7 +512,7 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
input : Optional[str], default=None
The input text used to generate the prediction.
**kwargs : Any
Additional keyword arguments to pass to the `LLMChain` `acall`
Additional keyword arguments to pass to the `Chain` `acall`
method.
Returns
@@ -497,12 +534,18 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
)
"""
input_ = self._get_eval_input(prediction, reference, input)
result = await self.acall(
input_,
callbacks=callbacks,
tags=tags,
metadata=metadata,
include_run_info=include_run_info,
# prep config
config: RunnableConfig = {}
if callbacks is not None:
config["callbacks"] = callbacks
if tags is not None:
config["tags"] = tags
if metadata is not None:
config["metadata"] = metadata
result = await self.ainvoke(
input_, config=config, include_run_info=include_run_info
)
return self._prepare_output(result)
@@ -556,7 +599,7 @@ class LabeledCriteriaEvalChain(CriteriaEvalChain):
The prompt template to use for generating prompts. If not provided,
a default prompt will be used.
**kwargs : Any
Additional keyword arguments to pass to the `LLMChain`
Additional keyword arguments to pass to the `Chain`
constructor.
Returns

View File

@@ -4,13 +4,15 @@ from __future__ import annotations
import re
import string
from typing import Any, List, Optional, Sequence, Tuple
from typing import Any, Dict, List, Optional, Sequence, Tuple
from langchain_core.callbacks.manager import Callbacks
from langchain_core.callbacks.manager import CallbackManagerForChainRun, Callbacks
from langchain_core.language_models import BaseLanguageModel
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts.base import BasePromptTemplate
from langchain_core.runnables import RunnableConfig
from langchain.chains.llm import LLMChain
from langchain.evaluation.qa.eval_prompt import CONTEXT_PROMPT, COT_PROMPT, PROMPT
from langchain.evaluation.schema import LLMEvalChain, StringEvaluator
from langchain.schema import RUN_KEY
@@ -67,10 +69,14 @@ def _parse_string_eval_output(text: str) -> dict:
}
class QAEvalChain(LLMChain, StringEvaluator, LLMEvalChain):
class QAEvalChain(StringEvaluator, LLMEvalChain):
"""LLM Chain for evaluating question answering."""
output_key: str = "results" #: :meta private:
llm: BaseLanguageModel
"""The language model to use for scoring."""
prompt: BasePromptTemplate
"""The prompt to use for scoring."""
class Config:
extra = "ignore"
@@ -91,6 +97,35 @@ class QAEvalChain(LLMChain, StringEvaluator, LLMEvalChain):
def requires_input(self) -> bool:
return True
@property
def input_keys(self) -> List[str]:
"""Will be whatever keys the prompt expects.
:meta private:
"""
return self.prompt.input_variables
@property
def output_keys(self) -> List[str]:
"""Will always return text key.
:meta private:
"""
return [self.output_key]
def _call(
self,
inputs: Dict[str, Any],
run_manager: Optional[CallbackManagerForChainRun] = None,
) -> Dict[str, Any]:
if run_manager:
config = RunnableConfig(callbacks=run_manager.get_child())
else:
config = None
chain = self.prompt | self.llm | StrOutputParser()
response = chain.invoke(inputs, config=config)
return {self.output_key: response}
@classmethod
def from_llm(
cls,
@@ -141,8 +176,14 @@ class QAEvalChain(LLMChain, StringEvaluator, LLMEvalChain):
}
for i, example in enumerate(examples)
]
if callbacks:
config = RunnableConfig(callbacks=callbacks)
else:
config = None
outputs = self.batch(inputs, config=config)
return self.apply(inputs, callbacks=callbacks)
# Subset to output key only
return [{self.output_key: output[self.output_key]} for output in outputs]
def _prepare_output(self, result: dict) -> dict:
parsed_result = _parse_string_eval_output(result[self.output_key])
@@ -174,13 +215,17 @@ class QAEvalChain(LLMChain, StringEvaluator, LLMEvalChain):
Returns:
dict: The evaluation results containing the score or value.
"""
result = self(
if callbacks:
config = RunnableConfig(callbacks=callbacks)
else:
config = None
result = self.invoke(
{
"query": input,
"answer": reference,
"result": prediction,
},
callbacks=callbacks,
config=config,
include_run_info=include_run_info,
)
return self._prepare_output(result)
@@ -195,17 +240,31 @@ class QAEvalChain(LLMChain, StringEvaluator, LLMEvalChain):
include_run_info: bool = False,
**kwargs: Any,
) -> dict:
result = await self.acall(
inputs={"query": input, "answer": reference, "result": prediction},
callbacks=callbacks,
if callbacks:
config = RunnableConfig(callbacks=callbacks)
else:
config = None
result = await self.ainvoke(
{
"query": input,
"answer": reference,
"result": prediction,
},
config=config,
include_run_info=include_run_info,
)
return self._prepare_output(result)
class ContextQAEvalChain(LLMChain, StringEvaluator, LLMEvalChain):
class ContextQAEvalChain(StringEvaluator, LLMEvalChain):
"""LLM Chain for evaluating QA w/o GT based on context"""
output_key: str = "text" #: :meta private:
llm: BaseLanguageModel
"""The language model to use for scoring."""
prompt: BasePromptTemplate
"""The prompt to use for scoring."""
@classmethod
def is_lc_serializable(cls) -> bool:
return False
@@ -220,6 +279,22 @@ class ContextQAEvalChain(LLMChain, StringEvaluator, LLMEvalChain):
"""Whether the chain requires an input string."""
return True
@property
def input_keys(self) -> List[str]:
"""Will be whatever keys the prompt expects.
:meta private:
"""
return self.prompt.input_variables
@property
def output_keys(self) -> List[str]:
"""Will always return text key.
:meta private:
"""
return [self.output_key]
class Config:
extra = "ignore"
@@ -236,6 +311,19 @@ class ContextQAEvalChain(LLMChain, StringEvaluator, LLMEvalChain):
def evaluation_name(self) -> str:
return "Contextual Accuracy"
def _call(
self,
inputs: Dict[str, Any],
run_manager: Optional[CallbackManagerForChainRun] = None,
) -> Dict[str, Any]:
if run_manager:
config = RunnableConfig(callbacks=run_manager.get_child())
else:
config = None
chain = self.prompt | self.llm | StrOutputParser()
response = chain.invoke(inputs, config=config)
return {self.output_key: response}
@classmethod
def from_llm(
cls,
@@ -281,8 +369,13 @@ class ContextQAEvalChain(LLMChain, StringEvaluator, LLMEvalChain):
}
for i, example in enumerate(examples)
]
if callbacks:
config = RunnableConfig(callbacks=callbacks)
else:
config = None
outputs = self.batch(inputs, config=config)
return self.apply(inputs, callbacks=callbacks)
return [{self.output_key: output[self.output_key]} for output in outputs]
def _prepare_output(self, result: dict) -> dict:
parsed_result = _parse_string_eval_output(result[self.output_key])
@@ -300,13 +393,17 @@ class ContextQAEvalChain(LLMChain, StringEvaluator, LLMEvalChain):
include_run_info: bool = False,
**kwargs: Any,
) -> dict:
result = self(
if callbacks:
config = RunnableConfig(callbacks=callbacks)
else:
config = None
result = self.invoke(
{
"query": input,
"context": reference,
"result": prediction,
},
callbacks=callbacks,
config=config,
include_run_info=include_run_info,
)
return self._prepare_output(result)
@@ -321,9 +418,17 @@ class ContextQAEvalChain(LLMChain, StringEvaluator, LLMEvalChain):
include_run_info: bool = False,
**kwargs: Any,
) -> dict:
result = await self.acall(
inputs={"query": input, "context": reference, "result": prediction},
callbacks=callbacks,
if callbacks:
config = RunnableConfig(callbacks=callbacks)
else:
config = None
result = await self.ainvoke(
{
"query": input,
"context": reference,
"result": prediction,
},
config=config,
include_run_info=include_run_info,
)
return self._prepare_output(result)

View File

@@ -4,6 +4,7 @@ from __future__ import annotations
from typing import Any
from langchain_core._api import deprecated
from langchain_core.language_models import BaseLanguageModel
from langchain_core.output_parsers import BaseLLMOutputParser
from langchain_core.pydantic_v1 import Field
@@ -17,8 +18,44 @@ _QA_OUTPUT_PARSER = RegexParser(
)
@deprecated(
since="0.2.13",
message=(
"This class is deprecated and will be removed in langchain 1.0. "
"See API reference for replacement: "
"https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.qa.generate_chain.QAGenerateChain.html" # noqa: E501
),
removal="1.0",
)
class QAGenerateChain(LLMChain):
"""LLM Chain for generating examples for question answering."""
"""LLM Chain for generating examples for question answering.
Note: this class is deprecated. See below for a replacement implementation
that leverages LLM tool calling features.
.. code-block:: python
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from typing_extensions import TypedDict
template = \"\"\"You are a teacher coming up with questions to ask on a quiz.
Given the following document, please generate a question and answer based on that document.
These questions should be detailed and be based explicitly on information in the document.
\"\"\"
prompt = ChatPromptTemplate.from_template(template)
class QuestionAndAnswer(TypedDict):
\"\"\"Question and answer based on document.\"\"\"
question: str
answer: str
llm = ChatOpenAI(model="gpt-4o-mini").with_structured_output(QuestionAndAnswer)
llm.invoke("...")
""" # noqa: E501
output_parser: BaseLLMOutputParser = Field(default=_QA_OUTPUT_PARSER)
output_key: str = "qa_pairs"

View File

@@ -6,14 +6,16 @@ import logging
import re
from typing import Any, Dict, List, Optional, Union
from langchain_core.callbacks import CallbackManagerForChainRun
from langchain_core.callbacks.manager import Callbacks
from langchain_core.language_models import BaseLanguageModel
from langchain_core.output_parsers import BaseOutputParser
from langchain_core.prompts.base import BasePromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.pydantic_v1 import Field
from langchain_core.runnables import RunnableConfig
from langchain.chains.constitutional_ai.models import ConstitutionalPrinciple
from langchain.chains.llm import LLMChain
from langchain.evaluation.criteria.eval_chain import (
CRITERIA_TYPE,
Criteria,
@@ -144,7 +146,7 @@ class ScoreStringResultOutputParser(BaseOutputParser[dict]):
}
class ScoreStringEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
class ScoreStringEvalChain(StringEvaluator, LLMEvalChain):
"""A chain for scoring on a scale of 1-10 the output of a model.
Attributes:
@@ -178,10 +180,43 @@ class ScoreStringEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
"""The value to normalize the score by, if specified."""
criterion_name: str
"""The name of the criterion being evaluated."""
llm: BaseLanguageModel
"""The language model to use for scoring."""
prompt: BasePromptTemplate
"""The prompt to use for scoring."""
class Config:
extra = "ignore"
@property
def input_keys(self) -> List[str]:
"""Will be whatever keys the prompt expects.
:meta private:
"""
return self.prompt.input_variables
@property
def output_keys(self) -> List[str]:
"""Will always return text key.
:meta private:
"""
return [self.output_key]
def _call(
self,
inputs: Dict[str, Any],
run_manager: Optional[CallbackManagerForChainRun] = None,
) -> Dict[str, Any]:
if run_manager:
config = RunnableConfig(callbacks=run_manager.get_child())
else:
config = None
chain = self.prompt | self.llm | self.output_parser
response = chain.invoke(inputs, config=config)
return {self.output_key: response}
@classmethod
def is_lc_serializable(cls) -> bool:
return False
@@ -348,13 +383,17 @@ Performance may be significantly worse with other models."
"""
input_ = self._prepare_input(prediction, input, reference)
result = self(
inputs=input_,
callbacks=callbacks,
tags=tags,
metadata=metadata,
include_run_info=include_run_info,
)
# prep config
config: RunnableConfig = {}
if callbacks is not None:
config["callbacks"] = callbacks
if tags is not None:
config["tags"] = tags
if metadata is not None:
config["metadata"] = metadata
result = self.invoke(input_, config=config, include_run_info=include_run_info)
return self._prepare_output(result)
async def _aevaluate_string_pairs(
@@ -385,12 +424,18 @@ Performance may be significantly worse with other models."
"""
input_ = self._prepare_input(prediction, input, reference)
result = await self.acall(
inputs=input_,
callbacks=callbacks,
tags=tags,
metadata=metadata,
include_run_info=include_run_info,
# prep config
config: RunnableConfig = {}
if callbacks is not None:
config["callbacks"] = callbacks
if tags is not None:
config["tags"] = tags
if metadata is not None:
config["metadata"] = metadata
result = await self.ainvoke(
input_, config=config, include_run_info=include_run_info
)
return self._prepare_output(result)