mirror of
https://github.com/hwchase17/langchain.git
synced 2025-05-19 05:51:16 +00:00
Rm deprecated (#15920)
Remove the usage of deprecated methods in the test runner.
This commit is contained in:
parent
438beb6c94
commit
129552e3d6
@ -674,15 +674,14 @@ async def _arun_llm(
|
|||||||
"""
|
"""
|
||||||
if input_mapper is not None:
|
if input_mapper is not None:
|
||||||
prompt_or_messages = input_mapper(inputs)
|
prompt_or_messages = input_mapper(inputs)
|
||||||
if isinstance(prompt_or_messages, str):
|
if (
|
||||||
return await llm.apredict(
|
isinstance(prompt_or_messages, str)
|
||||||
prompt_or_messages, callbacks=callbacks, tags=tags
|
or isinstance(prompt_or_messages, list)
|
||||||
)
|
and all(isinstance(msg, BaseMessage) for msg in prompt_or_messages)
|
||||||
elif isinstance(prompt_or_messages, list) and all(
|
|
||||||
isinstance(msg, BaseMessage) for msg in prompt_or_messages
|
|
||||||
):
|
):
|
||||||
return await llm.apredict_messages(
|
return await llm.ainvoke(
|
||||||
prompt_or_messages, callbacks=callbacks, tags=tags
|
prompt_or_messages,
|
||||||
|
config=RunnableConfig(callbacks=callbacks, tags=tags or []),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
raise InputFormatError(
|
raise InputFormatError(
|
||||||
@ -694,13 +693,13 @@ async def _arun_llm(
|
|||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
prompt = _get_prompt(inputs)
|
prompt = _get_prompt(inputs)
|
||||||
llm_output: Union[str, BaseMessage] = await llm.apredict(
|
llm_output: Union[str, BaseMessage] = await llm.ainvoke(
|
||||||
prompt, callbacks=callbacks, tags=tags
|
prompt, config=RunnableConfig(callbacks=callbacks, tags=tags or [])
|
||||||
)
|
)
|
||||||
except InputFormatError:
|
except InputFormatError:
|
||||||
messages = _get_messages(inputs)
|
messages = _get_messages(inputs)
|
||||||
llm_output = await llm.apredict_messages(
|
llm_output = await llm.ainvoke(
|
||||||
messages, callbacks=callbacks, tags=tags
|
messages, config=RunnableConfig(callbacks=callbacks, tags=tags or [])
|
||||||
)
|
)
|
||||||
return llm_output
|
return llm_output
|
||||||
|
|
||||||
@ -722,7 +721,9 @@ async def _arun_chain(
|
|||||||
and chain.input_keys
|
and chain.input_keys
|
||||||
):
|
):
|
||||||
val = next(iter(inputs_.values()))
|
val = next(iter(inputs_.values()))
|
||||||
output = await chain.acall(val, callbacks=callbacks, tags=tags)
|
output = await chain.ainvoke(
|
||||||
|
val, config=RunnableConfig(callbacks=callbacks, tags=tags or [])
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
runnable_config = RunnableConfig(tags=tags or [], callbacks=callbacks)
|
runnable_config = RunnableConfig(tags=tags or [], callbacks=callbacks)
|
||||||
output = await chain.ainvoke(inputs_, config=runnable_config)
|
output = await chain.ainvoke(inputs_, config=runnable_config)
|
||||||
@ -807,17 +808,17 @@ def _run_llm(
|
|||||||
ValueError: If the LLM type is unsupported.
|
ValueError: If the LLM type is unsupported.
|
||||||
InputFormatError: If the input format is invalid.
|
InputFormatError: If the input format is invalid.
|
||||||
"""
|
"""
|
||||||
|
# Most of this is legacy code; we could probably remove a lot of it.
|
||||||
if input_mapper is not None:
|
if input_mapper is not None:
|
||||||
prompt_or_messages = input_mapper(inputs)
|
prompt_or_messages = input_mapper(inputs)
|
||||||
if isinstance(prompt_or_messages, str):
|
if (
|
||||||
llm_output: Union[str, BaseMessage] = llm.predict(
|
isinstance(prompt_or_messages, str)
|
||||||
prompt_or_messages, callbacks=callbacks, tags=tags
|
or isinstance(prompt_or_messages, list)
|
||||||
)
|
and all(isinstance(msg, BaseMessage) for msg in prompt_or_messages)
|
||||||
elif isinstance(prompt_or_messages, list) and all(
|
|
||||||
isinstance(msg, BaseMessage) for msg in prompt_or_messages
|
|
||||||
):
|
):
|
||||||
llm_output = llm.predict_messages(
|
llm_output: Union[str, BaseMessage] = llm.invoke(
|
||||||
prompt_or_messages, callbacks=callbacks, tags=tags
|
prompt_or_messages,
|
||||||
|
config=RunnableConfig(callbacks=callbacks, tags=tags or []),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
raise InputFormatError(
|
raise InputFormatError(
|
||||||
@ -828,10 +829,14 @@ def _run_llm(
|
|||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
llm_prompts = _get_prompt(inputs)
|
llm_prompts = _get_prompt(inputs)
|
||||||
llm_output = llm.predict(llm_prompts, callbacks=callbacks, tags=tags)
|
llm_output = llm.invoke(
|
||||||
|
llm_prompts, config=RunnableConfig(callbacks=callbacks, tags=tags or [])
|
||||||
|
)
|
||||||
except InputFormatError:
|
except InputFormatError:
|
||||||
llm_messages = _get_messages(inputs)
|
llm_messages = _get_messages(inputs)
|
||||||
llm_output = llm.predict_messages(llm_messages, callbacks=callbacks)
|
llm_output = llm.invoke(
|
||||||
|
llm_messages, config=RunnableConfig(callbacks=callbacks)
|
||||||
|
)
|
||||||
return llm_output
|
return llm_output
|
||||||
|
|
||||||
|
|
||||||
@ -852,7 +857,9 @@ def _run_chain(
|
|||||||
and chain.input_keys
|
and chain.input_keys
|
||||||
):
|
):
|
||||||
val = next(iter(inputs_.values()))
|
val = next(iter(inputs_.values()))
|
||||||
output = chain(val, callbacks=callbacks, tags=tags)
|
output = chain.invoke(
|
||||||
|
val, config=RunnableConfig(callbacks=callbacks, tags=tags or [])
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
runnable_config = RunnableConfig(tags=tags or [], callbacks=callbacks)
|
runnable_config = RunnableConfig(tags=tags or [], callbacks=callbacks)
|
||||||
output = chain.invoke(inputs_, config=runnable_config)
|
output = chain.invoke(inputs_, config=runnable_config)
|
||||||
@ -1313,7 +1320,7 @@ Examples
|
|||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
from langsmith import Client
|
from langsmith import Client
|
||||||
from langchain_community.chat_models import ChatOpenAI
|
from langchain_openai import ChatOpenAI
|
||||||
from langchain.chains import LLMChain
|
from langchain.chains import LLMChain
|
||||||
from langchain.smith import smith_eval.RunEvalConfig, run_on_dataset
|
from langchain.smith import smith_eval.RunEvalConfig, run_on_dataset
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from typing import Iterator, List
|
from typing import Iterator, List, Optional
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
@ -7,7 +7,8 @@ from langchain_community.llms.openai import OpenAI
|
|||||||
from langchain_core.messages import BaseMessage, HumanMessage
|
from langchain_core.messages import BaseMessage, HumanMessage
|
||||||
from langchain_core.prompts.chat import ChatPromptTemplate
|
from langchain_core.prompts.chat import ChatPromptTemplate
|
||||||
from langsmith import Client as Client
|
from langsmith import Client as Client
|
||||||
from langsmith.schemas import DataType
|
from langsmith.evaluation import run_evaluator
|
||||||
|
from langsmith.schemas import DataType, Example, Run
|
||||||
|
|
||||||
from langchain.chains.llm import LLMChain
|
from langchain.chains.llm import LLMChain
|
||||||
from langchain.evaluation import EvaluatorType
|
from langchain.evaluation import EvaluatorType
|
||||||
@ -26,7 +27,15 @@ def _check_all_feedback_passed(_project_name: str, client: Client) -> None:
|
|||||||
feedback = list(client.list_feedback(run_ids=[run.id for run in runs]))
|
feedback = list(client.list_feedback(run_ids=[run.id for run in runs]))
|
||||||
if not feedback:
|
if not feedback:
|
||||||
return
|
return
|
||||||
assert all([f.score == 1 for f in feedback])
|
assert all([bool(f.score) for f in feedback])
|
||||||
|
|
||||||
|
|
||||||
|
@run_evaluator
|
||||||
|
def not_empty(run: Run, example: Optional[Example] = None) -> dict:
|
||||||
|
return {
|
||||||
|
"score": run.outputs and next(iter(run.outputs.values())),
|
||||||
|
"key": "not_empty",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
@ -81,7 +90,9 @@ def test_chat_model(
|
|||||||
kv_dataset_name: str, eval_project_name: str, client: Client
|
kv_dataset_name: str, eval_project_name: str, client: Client
|
||||||
) -> None:
|
) -> None:
|
||||||
llm = ChatOpenAI(temperature=0)
|
llm = ChatOpenAI(temperature=0)
|
||||||
eval_config = RunEvalConfig(evaluators=[EvaluatorType.QA, EvaluatorType.CRITERIA])
|
eval_config = RunEvalConfig(
|
||||||
|
evaluators=[EvaluatorType.QA], custom_evaluators=[not_empty]
|
||||||
|
)
|
||||||
with pytest.raises(ValueError, match="Must specify reference_key"):
|
with pytest.raises(ValueError, match="Must specify reference_key"):
|
||||||
run_on_dataset(
|
run_on_dataset(
|
||||||
dataset_name=kv_dataset_name,
|
dataset_name=kv_dataset_name,
|
||||||
@ -90,7 +101,7 @@ def test_chat_model(
|
|||||||
client=client,
|
client=client,
|
||||||
)
|
)
|
||||||
eval_config = RunEvalConfig(
|
eval_config = RunEvalConfig(
|
||||||
evaluators=[EvaluatorType.QA, EvaluatorType.CRITERIA],
|
evaluators=[EvaluatorType.QA],
|
||||||
reference_key="some_output",
|
reference_key="some_output",
|
||||||
)
|
)
|
||||||
with pytest.raises(
|
with pytest.raises(
|
||||||
@ -109,9 +120,8 @@ def test_chat_model(
|
|||||||
run_on_dataset(
|
run_on_dataset(
|
||||||
client=client,
|
client=client,
|
||||||
dataset_name=kv_dataset_name,
|
dataset_name=kv_dataset_name,
|
||||||
llm_or_chain_factory=llm,
|
llm_or_chain_factory=input_mapper | llm,
|
||||||
evaluation=eval_config,
|
evaluation=eval_config,
|
||||||
input_mapper=input_mapper,
|
|
||||||
project_name=eval_project_name,
|
project_name=eval_project_name,
|
||||||
tags=["shouldpass"],
|
tags=["shouldpass"],
|
||||||
)
|
)
|
||||||
@ -120,7 +130,7 @@ def test_chat_model(
|
|||||||
|
|
||||||
def test_llm(kv_dataset_name: str, eval_project_name: str, client: Client) -> None:
|
def test_llm(kv_dataset_name: str, eval_project_name: str, client: Client) -> None:
|
||||||
llm = OpenAI(temperature=0)
|
llm = OpenAI(temperature=0)
|
||||||
eval_config = RunEvalConfig(evaluators=[EvaluatorType.QA, EvaluatorType.CRITERIA])
|
eval_config = RunEvalConfig(evaluators=[EvaluatorType.QA])
|
||||||
with pytest.raises(ValueError, match="Must specify reference_key"):
|
with pytest.raises(ValueError, match="Must specify reference_key"):
|
||||||
run_on_dataset(
|
run_on_dataset(
|
||||||
dataset_name=kv_dataset_name,
|
dataset_name=kv_dataset_name,
|
||||||
@ -132,9 +142,7 @@ def test_llm(kv_dataset_name: str, eval_project_name: str, client: Client) -> No
|
|||||||
evaluators=[EvaluatorType.QA, EvaluatorType.CRITERIA],
|
evaluators=[EvaluatorType.QA, EvaluatorType.CRITERIA],
|
||||||
reference_key="some_output",
|
reference_key="some_output",
|
||||||
)
|
)
|
||||||
with pytest.raises(
|
with pytest.raises(InputFormatError, match="Example inputs"):
|
||||||
InputFormatError, match="Example inputs do not match language model"
|
|
||||||
):
|
|
||||||
run_on_dataset(
|
run_on_dataset(
|
||||||
dataset_name=kv_dataset_name,
|
dataset_name=kv_dataset_name,
|
||||||
llm_or_chain_factory=llm,
|
llm_or_chain_factory=llm,
|
||||||
@ -148,9 +156,8 @@ def test_llm(kv_dataset_name: str, eval_project_name: str, client: Client) -> No
|
|||||||
run_on_dataset(
|
run_on_dataset(
|
||||||
client=client,
|
client=client,
|
||||||
dataset_name=kv_dataset_name,
|
dataset_name=kv_dataset_name,
|
||||||
llm_or_chain_factory=llm,
|
llm_or_chain_factory=input_mapper | llm,
|
||||||
evaluation=eval_config,
|
evaluation=eval_config,
|
||||||
input_mapper=input_mapper,
|
|
||||||
project_name=eval_project_name,
|
project_name=eval_project_name,
|
||||||
tags=["shouldpass"],
|
tags=["shouldpass"],
|
||||||
)
|
)
|
||||||
@ -172,9 +179,7 @@ def test_chain(kv_dataset_name: str, eval_project_name: str, client: Client) ->
|
|||||||
evaluators=[EvaluatorType.QA, EvaluatorType.CRITERIA],
|
evaluators=[EvaluatorType.QA, EvaluatorType.CRITERIA],
|
||||||
reference_key="some_output",
|
reference_key="some_output",
|
||||||
)
|
)
|
||||||
with pytest.raises(
|
with pytest.raises(InputFormatError, match="Example inputs"):
|
||||||
InputFormatError, match="Example inputs do not match chain input keys"
|
|
||||||
):
|
|
||||||
run_on_dataset(
|
run_on_dataset(
|
||||||
dataset_name=kv_dataset_name,
|
dataset_name=kv_dataset_name,
|
||||||
llm_or_chain_factory=lambda: chain,
|
llm_or_chain_factory=lambda: chain,
|
||||||
@ -182,18 +187,8 @@ def test_chain(kv_dataset_name: str, eval_project_name: str, client: Client) ->
|
|||||||
client=client,
|
client=client,
|
||||||
)
|
)
|
||||||
|
|
||||||
def input_mapper(d: dict) -> dict:
|
eval_config = RunEvalConfig(
|
||||||
return {"input": d["some_input"]}
|
custom_evaluators=[not_empty],
|
||||||
|
|
||||||
with pytest.raises(
|
|
||||||
InputFormatError,
|
|
||||||
match=" match the chain's expected input keys.",
|
|
||||||
):
|
|
||||||
run_on_dataset(
|
|
||||||
dataset_name=kv_dataset_name,
|
|
||||||
llm_or_chain_factory=lambda: input_mapper | chain,
|
|
||||||
client=client,
|
|
||||||
evaluation=eval_config,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def right_input_mapper(d: dict) -> dict:
|
def right_input_mapper(d: dict) -> dict:
|
||||||
@ -259,7 +254,7 @@ def test_chat_model_on_chat_dataset(
|
|||||||
chat_dataset_name: str, eval_project_name: str, client: Client
|
chat_dataset_name: str, eval_project_name: str, client: Client
|
||||||
) -> None:
|
) -> None:
|
||||||
llm = ChatOpenAI(temperature=0)
|
llm = ChatOpenAI(temperature=0)
|
||||||
eval_config = RunEvalConfig(evaluators=[EvaluatorType.QA, EvaluatorType.CRITERIA])
|
eval_config = RunEvalConfig(custom_evaluators=[not_empty])
|
||||||
run_on_dataset(
|
run_on_dataset(
|
||||||
dataset_name=chat_dataset_name,
|
dataset_name=chat_dataset_name,
|
||||||
llm_or_chain_factory=llm,
|
llm_or_chain_factory=llm,
|
||||||
@ -274,7 +269,7 @@ def test_llm_on_chat_dataset(
|
|||||||
chat_dataset_name: str, eval_project_name: str, client: Client
|
chat_dataset_name: str, eval_project_name: str, client: Client
|
||||||
) -> None:
|
) -> None:
|
||||||
llm = OpenAI(temperature=0)
|
llm = OpenAI(temperature=0)
|
||||||
eval_config = RunEvalConfig(evaluators=[EvaluatorType.QA, EvaluatorType.CRITERIA])
|
eval_config = RunEvalConfig(custom_evaluators=[not_empty])
|
||||||
run_on_dataset(
|
run_on_dataset(
|
||||||
dataset_name=chat_dataset_name,
|
dataset_name=chat_dataset_name,
|
||||||
llm_or_chain_factory=llm,
|
llm_or_chain_factory=llm,
|
||||||
@ -337,7 +332,7 @@ def test_chat_model_on_llm_dataset(
|
|||||||
llm_dataset_name: str, eval_project_name: str, client: Client
|
llm_dataset_name: str, eval_project_name: str, client: Client
|
||||||
) -> None:
|
) -> None:
|
||||||
llm = ChatOpenAI(temperature=0)
|
llm = ChatOpenAI(temperature=0)
|
||||||
eval_config = RunEvalConfig(evaluators=[EvaluatorType.QA, EvaluatorType.CRITERIA])
|
eval_config = RunEvalConfig(custom_evaluators=[not_empty])
|
||||||
run_on_dataset(
|
run_on_dataset(
|
||||||
client=client,
|
client=client,
|
||||||
dataset_name=llm_dataset_name,
|
dataset_name=llm_dataset_name,
|
||||||
@ -353,7 +348,7 @@ def test_llm_on_llm_dataset(
|
|||||||
llm_dataset_name: str, eval_project_name: str, client: Client
|
llm_dataset_name: str, eval_project_name: str, client: Client
|
||||||
) -> None:
|
) -> None:
|
||||||
llm = OpenAI(temperature=0)
|
llm = OpenAI(temperature=0)
|
||||||
eval_config = RunEvalConfig(evaluators=[EvaluatorType.QA, EvaluatorType.CRITERIA])
|
eval_config = RunEvalConfig(custom_evaluators=[not_empty])
|
||||||
run_on_dataset(
|
run_on_dataset(
|
||||||
client=client,
|
client=client,
|
||||||
dataset_name=llm_dataset_name,
|
dataset_name=llm_dataset_name,
|
||||||
@ -431,7 +426,7 @@ def test_llm_on_kv_singleio_dataset(
|
|||||||
kv_singleio_dataset_name: str, eval_project_name: str, client: Client
|
kv_singleio_dataset_name: str, eval_project_name: str, client: Client
|
||||||
) -> None:
|
) -> None:
|
||||||
llm = OpenAI(temperature=0)
|
llm = OpenAI(temperature=0)
|
||||||
eval_config = RunEvalConfig(evaluators=[EvaluatorType.QA, EvaluatorType.CRITERIA])
|
eval_config = RunEvalConfig(custom_evaluators=[not_empty])
|
||||||
run_on_dataset(
|
run_on_dataset(
|
||||||
dataset_name=kv_singleio_dataset_name,
|
dataset_name=kv_singleio_dataset_name,
|
||||||
llm_or_chain_factory=llm,
|
llm_or_chain_factory=llm,
|
||||||
@ -448,7 +443,7 @@ def test_chain_on_kv_singleio_dataset(
|
|||||||
) -> None:
|
) -> None:
|
||||||
llm = ChatOpenAI(temperature=0)
|
llm = ChatOpenAI(temperature=0)
|
||||||
chain = LLMChain.from_string(llm, "The answer to the {question} is: ")
|
chain = LLMChain.from_string(llm, "The answer to the {question} is: ")
|
||||||
eval_config = RunEvalConfig(evaluators=[EvaluatorType.QA, EvaluatorType.CRITERIA])
|
eval_config = RunEvalConfig(custom_evaluators=[not_empty])
|
||||||
run_on_dataset(
|
run_on_dataset(
|
||||||
dataset_name=kv_singleio_dataset_name,
|
dataset_name=kv_singleio_dataset_name,
|
||||||
llm_or_chain_factory=lambda: chain,
|
llm_or_chain_factory=lambda: chain,
|
||||||
@ -467,7 +462,7 @@ async def test_runnable_on_kv_singleio_dataset(
|
|||||||
ChatPromptTemplate.from_messages([("human", "{the wackiest input}")])
|
ChatPromptTemplate.from_messages([("human", "{the wackiest input}")])
|
||||||
| ChatOpenAI()
|
| ChatOpenAI()
|
||||||
)
|
)
|
||||||
eval_config = RunEvalConfig(evaluators=[EvaluatorType.QA, EvaluatorType.CRITERIA])
|
eval_config = RunEvalConfig(custom_evaluators=[not_empty])
|
||||||
await arun_on_dataset(
|
await arun_on_dataset(
|
||||||
dataset_name=kv_singleio_dataset_name,
|
dataset_name=kv_singleio_dataset_name,
|
||||||
llm_or_chain_factory=runnable,
|
llm_or_chain_factory=runnable,
|
||||||
@ -496,7 +491,7 @@ async def test_arb_func_on_kv_singleio_dataset(
|
|||||||
f"Expected message with content type string, got {content}"
|
f"Expected message with content type string, got {content}"
|
||||||
)
|
)
|
||||||
|
|
||||||
eval_config = RunEvalConfig(evaluators=[EvaluatorType.QA, EvaluatorType.CRITERIA])
|
eval_config = RunEvalConfig(custom_evaluators=[not_empty])
|
||||||
await arun_on_dataset(
|
await arun_on_dataset(
|
||||||
dataset_name=kv_singleio_dataset_name,
|
dataset_name=kv_singleio_dataset_name,
|
||||||
llm_or_chain_factory=my_func,
|
llm_or_chain_factory=my_func,
|
||||||
|
Loading…
Reference in New Issue
Block a user