mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-23 07:09:31 +00:00
Add Agent Trajectory Interface (#7122)
This commit is contained in:
parent
a6b39afe0e
commit
1f4a51cb9c
@ -21,7 +21,11 @@ from langchain.evaluation.agents.trajectory_eval_chain import TrajectoryEvalChai
|
|||||||
from langchain.evaluation.comparison import PairwiseStringEvalChain
|
from langchain.evaluation.comparison import PairwiseStringEvalChain
|
||||||
from langchain.evaluation.criteria.eval_chain import CriteriaEvalChain
|
from langchain.evaluation.criteria.eval_chain import CriteriaEvalChain
|
||||||
from langchain.evaluation.qa import ContextQAEvalChain, CotQAEvalChain, QAEvalChain
|
from langchain.evaluation.qa import ContextQAEvalChain, CotQAEvalChain, QAEvalChain
|
||||||
from langchain.evaluation.schema import PairwiseStringEvaluator, StringEvaluator
|
from langchain.evaluation.schema import (
|
||||||
|
AgentTrajectoryEvaluator,
|
||||||
|
PairwiseStringEvaluator,
|
||||||
|
StringEvaluator,
|
||||||
|
)
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"PairwiseStringEvalChain",
|
"PairwiseStringEvalChain",
|
||||||
@ -32,4 +36,5 @@ __all__ = [
|
|||||||
"PairwiseStringEvaluator",
|
"PairwiseStringEvaluator",
|
||||||
"TrajectoryEvalChain",
|
"TrajectoryEvalChain",
|
||||||
"CriteriaEvalChain",
|
"CriteriaEvalChain",
|
||||||
|
"AgentTrajectoryEvaluator",
|
||||||
]
|
]
|
||||||
|
@ -22,6 +22,7 @@ from langchain.evaluation.agents.trajectory_eval_prompt import (
|
|||||||
EVAL_CHAT_PROMPT,
|
EVAL_CHAT_PROMPT,
|
||||||
TOOL_FREE_EVAL_CHAT_PROMPT,
|
TOOL_FREE_EVAL_CHAT_PROMPT,
|
||||||
)
|
)
|
||||||
|
from langchain.evaluation.schema import AgentTrajectoryEvaluator
|
||||||
from langchain.schema import AgentAction, BaseOutputParser, OutputParserException
|
from langchain.schema import AgentAction, BaseOutputParser, OutputParserException
|
||||||
from langchain.tools.base import BaseTool
|
from langchain.tools.base import BaseTool
|
||||||
|
|
||||||
@ -70,7 +71,7 @@ class TrajectoryOutputParser(BaseOutputParser):
|
|||||||
return TrajectoryEval(score=int(score_str), reasoning=reasoning)
|
return TrajectoryEval(score=int(score_str), reasoning=reasoning)
|
||||||
|
|
||||||
|
|
||||||
class TrajectoryEvalChain(Chain):
|
class TrajectoryEvalChain(AgentTrajectoryEvaluator, Chain):
|
||||||
"""A chain for evaluating ReAct style agents.
|
"""A chain for evaluating ReAct style agents.
|
||||||
|
|
||||||
This chain is used to evaluate ReAct style agents by reasoning about
|
This chain is used to evaluate ReAct style agents by reasoning about
|
||||||
@ -142,7 +143,9 @@ Description: {tool.description}"""
|
|||||||
)
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_agent_trajectory(steps: Union[str, List[Tuple[AgentAction, str]]]) -> str:
|
def get_agent_trajectory(
|
||||||
|
steps: Union[str, Sequence[Tuple[AgentAction, str]]]
|
||||||
|
) -> str:
|
||||||
"""Get the agent trajectory as a formatted string.
|
"""Get the agent trajectory as a formatted string.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -308,12 +311,12 @@ The following is the expected answer. Use this to measure correctness:
|
|||||||
|
|
||||||
return {"score": parsed_output.score}
|
return {"score": parsed_output.score}
|
||||||
|
|
||||||
def evaluate_agent_trajectory(
|
def _evaluate_agent_trajectory(
|
||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
prediction: str,
|
prediction: str,
|
||||||
input: str,
|
input: str,
|
||||||
agent_trajectory: Union[str, List[Tuple[AgentAction, str]]],
|
agent_trajectory: Sequence[Tuple[AgentAction, str]],
|
||||||
reference: Optional[str] = None,
|
reference: Optional[str] = None,
|
||||||
callbacks: Callbacks = None,
|
callbacks: Callbacks = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
@ -321,11 +324,12 @@ The following is the expected answer. Use this to measure correctness:
|
|||||||
"""Evaluate a trajectory.
|
"""Evaluate a trajectory.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
input (str): The input question.
|
prediction (str): The final predicted response.
|
||||||
agent_trajectory (Union[str, List[Tuple[AgentAction, str]]]):
|
input (str): The input to the agent.
|
||||||
|
agent_trajectory (List[Tuple[AgentAction, str]]):
|
||||||
The intermediate steps forming the agent trajectory.
|
The intermediate steps forming the agent trajectory.
|
||||||
prediction (str): The expected prediction.
|
|
||||||
reference (Optional[str]): The reference answer.
|
reference (Optional[str]): The reference answer.
|
||||||
|
callbacks (Callbacks): Callbacks to use for this chain run.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict: The evaluation result.
|
dict: The evaluation result.
|
||||||
@ -338,12 +342,12 @@ The following is the expected answer. Use this to measure correctness:
|
|||||||
}
|
}
|
||||||
return self(inputs=inputs, callbacks=callbacks, **kwargs)
|
return self(inputs=inputs, callbacks=callbacks, **kwargs)
|
||||||
|
|
||||||
async def aevaluate_agent_trajectory(
|
async def _aevaluate_agent_trajectory(
|
||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
prediction: str,
|
prediction: str,
|
||||||
input: str,
|
input: str,
|
||||||
agent_trajectory: Union[str, List[Tuple[AgentAction, str]]],
|
agent_trajectory: Sequence[Tuple[AgentAction, str]],
|
||||||
reference: Optional[str] = None,
|
reference: Optional[str] = None,
|
||||||
callbacks: Callbacks = None,
|
callbacks: Callbacks = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
@ -351,11 +355,12 @@ The following is the expected answer. Use this to measure correctness:
|
|||||||
"""Asynchronously evaluate a trajectory.
|
"""Asynchronously evaluate a trajectory.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
input (str): The input question.
|
prediction (str): The final predicted response.
|
||||||
agent_trajectory (Union[str, List[Tuple[AgentAction, str]]]):
|
input (str): The input to the agent.
|
||||||
|
agent_trajectory (List[Tuple[AgentAction, str]]):
|
||||||
The intermediate steps forming the agent trajectory.
|
The intermediate steps forming the agent trajectory.
|
||||||
prediction (str): The expected prediction.
|
|
||||||
reference (Optional[str]): The reference answer.
|
reference (Optional[str]): The reference answer.
|
||||||
|
callbacks (Callbacks): Callbacks to use for this chain run.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict: The evaluation result.
|
dict: The evaluation result.
|
||||||
|
@ -3,9 +3,11 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional, Sequence, Tuple
|
||||||
from warnings import warn
|
from warnings import warn
|
||||||
|
|
||||||
|
from langchain.schema.agent import AgentAction
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@ -275,3 +277,120 @@ class PairwiseStringEvaluator(_EvalArgsMixin, ABC):
|
|||||||
input=input,
|
input=input,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AgentTrajectoryEvaluator(_EvalArgsMixin, ABC):
|
||||||
|
"""Interface for evaluating agent trajectories."""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def requires_input(self) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _evaluate_agent_trajectory(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
prediction: str,
|
||||||
|
agent_trajectory: Sequence[Tuple[AgentAction, str]],
|
||||||
|
input: str,
|
||||||
|
reference: Optional[str] = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> dict:
|
||||||
|
"""Evaluate a trajectory.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prediction (str): The final predicted response.
|
||||||
|
agent_trajectory (List[Tuple[AgentAction, str]]):
|
||||||
|
The intermediate steps forming the agent trajectory.
|
||||||
|
input (str): The input to the agent.
|
||||||
|
reference (Optional[str]): The reference answer.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: The evaluation result.
|
||||||
|
"""
|
||||||
|
|
||||||
|
async def _aevaluate_agent_trajectory(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
prediction: str,
|
||||||
|
agent_trajectory: Sequence[Tuple[AgentAction, str]],
|
||||||
|
input: str,
|
||||||
|
reference: Optional[str] = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> dict:
|
||||||
|
"""Asynchronously evaluate a trajectory.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prediction (str): The final predicted response.
|
||||||
|
agent_trajectory (List[Tuple[AgentAction, str]]):
|
||||||
|
The intermediate steps forming the agent trajectory.
|
||||||
|
input (str): The input to the agent.
|
||||||
|
reference (Optional[str]): The reference answer.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: The evaluation result.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError(
|
||||||
|
f"{self.__class__.__name__} hasn't implemented an async "
|
||||||
|
"aevaluate_agent_trajectory method."
|
||||||
|
)
|
||||||
|
|
||||||
|
def evaluate_agent_trajectory(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
prediction: str,
|
||||||
|
agent_trajectory: Sequence[Tuple[AgentAction, str]],
|
||||||
|
input: str,
|
||||||
|
reference: Optional[str] = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> dict:
|
||||||
|
"""Evaluate a trajectory.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prediction (str): The final predicted response.
|
||||||
|
agent_trajectory (List[Tuple[AgentAction, str]]):
|
||||||
|
The intermediate steps forming the agent trajectory.
|
||||||
|
input (str): The input to the agent.
|
||||||
|
reference (Optional[str]): The reference answer.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: The evaluation result.
|
||||||
|
"""
|
||||||
|
self._check_evaluation_args(reference=reference, input=input)
|
||||||
|
return self._evaluate_agent_trajectory(
|
||||||
|
prediction=prediction,
|
||||||
|
input=input,
|
||||||
|
agent_trajectory=agent_trajectory,
|
||||||
|
reference=reference,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def aevaluate_agent_trajectory(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
prediction: str,
|
||||||
|
agent_trajectory: Sequence[Tuple[AgentAction, str]],
|
||||||
|
input: str,
|
||||||
|
reference: Optional[str] = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> dict:
|
||||||
|
"""Asynchronously evaluate a trajectory.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prediction (str): The final predicted response.
|
||||||
|
agent_trajectory (List[Tuple[AgentAction, str]]):
|
||||||
|
The intermediate steps forming the agent trajectory.
|
||||||
|
input (str): The input to the agent.
|
||||||
|
reference (Optional[str]): The reference answer.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: The evaluation result.
|
||||||
|
"""
|
||||||
|
self._check_evaluation_args(reference=reference, input=input)
|
||||||
|
return await self._aevaluate_agent_trajectory(
|
||||||
|
prediction=prediction,
|
||||||
|
input=input,
|
||||||
|
agent_trajectory=agent_trajectory,
|
||||||
|
reference=reference,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user