mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-13 13:36:15 +00:00
Clean up agent trajectory interface (#6799)
- Enable reference - Enable not specifying tools at the start - Add methods with keywords
This commit is contained in:
0
tests/unit_tests/evaluation/agents/__init__.py
Normal file
0
tests/unit_tests/evaluation/agents/__init__.py
Normal file
113
tests/unit_tests/evaluation/agents/test_eval_chain.py
Normal file
113
tests/unit_tests/evaluation/agents/test_eval_chain.py
Normal file
@@ -0,0 +1,113 @@
|
||||
"""Test agent trajectory evaluation chain."""
|
||||
|
||||
from typing import List, Tuple
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain.evaluation.agents.trajectory_eval_chain import TrajectoryEvalChain
|
||||
from langchain.schema import AgentAction
|
||||
from langchain.tools.base import tool
|
||||
from tests.unit_tests.llms.fake_llm import FakeLLM
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def intermediate_steps() -> List[Tuple[AgentAction, str]]:
|
||||
return [
|
||||
(
|
||||
AgentAction(
|
||||
tool="Foo",
|
||||
tool_input="Bar",
|
||||
log="Star date 2021-06-13: Foo received input: Bar",
|
||||
),
|
||||
"Baz",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@tool
|
||||
def foo(bar: str) -> str:
|
||||
"""Foo."""
|
||||
return bar
|
||||
|
||||
|
||||
def test_trajectory_eval_chain(
|
||||
intermediate_steps: List[Tuple[AgentAction, str]]
|
||||
) -> None:
|
||||
llm = FakeLLM(
|
||||
queries={
|
||||
"a": "Trajectory good\nScore: 5",
|
||||
"b": "Trajectory not good\nScore: 1",
|
||||
},
|
||||
sequential_responses=True,
|
||||
)
|
||||
chain = TrajectoryEvalChain.from_llm(llm=llm, agent_tools=[foo]) # type: ignore
|
||||
# Test when ref is not provided
|
||||
res = chain.evaluate_agent_trajectory(
|
||||
input="What is your favorite food?",
|
||||
agent_trajectory=intermediate_steps,
|
||||
output="I like pie.",
|
||||
)
|
||||
assert res["score"] == 5
|
||||
# Test when ref is provided
|
||||
res = chain.evaluate_agent_trajectory(
|
||||
input="What is your favorite food?",
|
||||
agent_trajectory=intermediate_steps,
|
||||
output="I like pie.",
|
||||
reference="Paris",
|
||||
)
|
||||
assert res["score"] == 1
|
||||
|
||||
|
||||
def test_trajectory_eval_chain_no_tools(
|
||||
intermediate_steps: List[Tuple[AgentAction, str]]
|
||||
) -> None:
|
||||
llm = FakeLLM(
|
||||
queries={
|
||||
"a": "Trajectory good\nScore: 5",
|
||||
"b": "Trajectory not good\nScore: 1",
|
||||
},
|
||||
sequential_responses=True,
|
||||
)
|
||||
chain = TrajectoryEvalChain.from_llm(llm=llm) # type: ignore
|
||||
res = chain.evaluate_agent_trajectory(
|
||||
input="What is your favorite food?",
|
||||
agent_trajectory=intermediate_steps,
|
||||
output="I like pie.",
|
||||
)
|
||||
assert res["score"] == 5
|
||||
res = chain.evaluate_agent_trajectory(
|
||||
input="What is your favorite food?",
|
||||
agent_trajectory=intermediate_steps,
|
||||
output="I like pie.",
|
||||
reference="Paris",
|
||||
)
|
||||
assert res["score"] == 1
|
||||
|
||||
|
||||
def test_old_api_works(intermediate_steps: List[Tuple[AgentAction, str]]) -> None:
|
||||
llm = FakeLLM(
|
||||
queries={
|
||||
"a": "Trajectory good\nScore: 5",
|
||||
"b": "Trajectory not good\nScore: 1",
|
||||
},
|
||||
sequential_responses=True,
|
||||
)
|
||||
chain = TrajectoryEvalChain.from_llm(llm=llm) # type: ignore
|
||||
res = chain(
|
||||
{
|
||||
"question": "What is your favorite food?",
|
||||
"agent_trajectory": intermediate_steps,
|
||||
"answer": "I like pie.",
|
||||
}
|
||||
)
|
||||
assert res["score"] == 5
|
||||
|
||||
res = chain(
|
||||
{
|
||||
"question": "What is your favorite food?",
|
||||
"agent_trajectory": intermediate_steps,
|
||||
"answer": "I like pie.",
|
||||
"reference": "Paris",
|
||||
}
|
||||
)
|
||||
assert res["score"] == 1
|
Reference in New Issue
Block a user