langchain[patch]: Update evaluation logic that instantiates a default LLM (#20760)

Favor langchain_openai over langchain_community for evaluation logic.

---------

Co-authored-by: ccurme <chester.curme@gmail.com>
This commit is contained in:
Eugene Yurtsev 2024-04-23 16:09:32 -04:00 committed by GitHub
parent 72f720fa38
commit a7c347ab35
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 22 additions and 13 deletions

View File

@ -5,8 +5,6 @@ import logging
import re import re
from typing import Any, Dict, List, Optional, Union from typing import Any, Dict, List, Optional, Union
from langchain_community.chat_models.azure_openai import AzureChatOpenAI
from langchain_community.chat_models.openai import ChatOpenAI
from langchain_core.callbacks.manager import Callbacks from langchain_core.callbacks.manager import Callbacks
from langchain_core.language_models import BaseLanguageModel from langchain_core.language_models import BaseLanguageModel
from langchain_core.output_parsers import BaseOutputParser from langchain_core.output_parsers import BaseOutputParser
@ -254,10 +252,8 @@ class PairwiseStringEvalChain(PairwiseStringEvaluator, LLMEvalChain, LLMChain):
ValueError: If the input variables are not as expected. ValueError: If the input variables are not as expected.
""" """
if not ( # Check if the model is GPT-4 if not raise a warning
isinstance(llm, (ChatOpenAI, AzureChatOpenAI)) if not hasattr(llm, "model_name") or not llm.model_name.startswith("gpt-4"):
and llm.model_name.startswith("gpt-4")
):
logger.warning( logger.warning(
"This chain was only tested with GPT-4. \ "This chain was only tested with GPT-4. \
Performance may be significantly worse with other models." Performance may be significantly worse with other models."

View File

@ -193,7 +193,7 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
Examples Examples
-------- --------
>>> from langchain_community.chat_models import ChatAnthropic >>> from langchain_anthropic import ChatAnthropic
>>> from langchain.evaluation.criteria import CriteriaEvalChain >>> from langchain.evaluation.criteria import CriteriaEvalChain
>>> llm = ChatAnthropic(temperature=0) >>> llm = ChatAnthropic(temperature=0)
>>> criteria = {"my-custom-criterion": "Is the submission the most amazing ever?"} >>> criteria = {"my-custom-criterion": "Is the submission the most amazing ever?"}
@ -205,7 +205,7 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
'score': 0, 'score': 0,
} }
>>> from langchain_community.chat_models import ChatOpenAI >>> from langchain_openai import ChatOpenAI
>>> from langchain.evaluation.criteria import LabeledCriteriaEvalChain >>> from langchain.evaluation.criteria import LabeledCriteriaEvalChain
>>> llm = ChatOpenAI(model="gpt-4", temperature=0) >>> llm = ChatOpenAI(model="gpt-4", temperature=0)
>>> criteria = "correctness" >>> criteria = "correctness"
@ -344,7 +344,7 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
Examples Examples
-------- --------
>>> from langchain_community.llms import OpenAI >>> from langchain_openai import OpenAI
>>> from langchain.evaluation.criteria import LabeledCriteriaEvalChain >>> from langchain.evaluation.criteria import LabeledCriteriaEvalChain
>>> llm = OpenAI() >>> llm = OpenAI()
>>> criteria = { >>> criteria = {
@ -432,7 +432,7 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
Examples Examples
-------- --------
>>> from langchain_community.llms import OpenAI >>> from langchain_openai import OpenAI
>>> from langchain.evaluation.criteria import CriteriaEvalChain >>> from langchain.evaluation.criteria import CriteriaEvalChain
>>> llm = OpenAI() >>> llm = OpenAI()
>>> criteria = "conciseness" >>> criteria = "conciseness"
@ -487,7 +487,7 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
Examples Examples
-------- --------
>>> from langchain_community.llms import OpenAI >>> from langchain_openai import OpenAI
>>> from langchain.evaluation.criteria import CriteriaEvalChain >>> from langchain.evaluation.criteria import CriteriaEvalChain
>>> llm = OpenAI() >>> llm = OpenAI()
>>> criteria = "conciseness" >>> criteria = "conciseness"
@ -568,7 +568,7 @@ class LabeledCriteriaEvalChain(CriteriaEvalChain):
Examples Examples
-------- --------
>>> from langchain_community.llms import OpenAI >>> from langchain_openai import OpenAI
>>> from langchain.evaluation.criteria import LabeledCriteriaEvalChain >>> from langchain.evaluation.criteria import LabeledCriteriaEvalChain
>>> llm = OpenAI() >>> llm = OpenAI()
>>> criteria = { >>> criteria = {

View File

@ -1,7 +1,6 @@
"""Loading datasets and evaluators.""" """Loading datasets and evaluators."""
from typing import Any, Dict, List, Optional, Sequence, Type, Union from typing import Any, Dict, List, Optional, Sequence, Type, Union
from langchain_community.chat_models.openai import ChatOpenAI
from langchain_core.language_models import BaseLanguageModel from langchain_core.language_models import BaseLanguageModel
from langchain.chains.base import Chain from langchain.chains.base import Chain
@ -131,6 +130,20 @@ def load_evaluator(
evaluator_cls = _EVALUATOR_MAP[evaluator] evaluator_cls = _EVALUATOR_MAP[evaluator]
if issubclass(evaluator_cls, LLMEvalChain): if issubclass(evaluator_cls, LLMEvalChain):
try: try:
try:
from langchain_openai import ChatOpenAI
except ImportError:
try:
from langchain_community.chat_models.openai import ChatOpenAI
except ImportError:
raise ImportError(
"Could not import langchain_openai or fallback onto "
"langchain_community. Please install langchain_openai "
"or specify a language model explicitly. "
"It's recommended to install langchain_openai AND "
"specify a language model explicitly."
)
llm = llm or ChatOpenAI( # type: ignore[call-arg] llm = llm or ChatOpenAI( # type: ignore[call-arg]
model="gpt-4", model_kwargs={"seed": 42}, temperature=0 model="gpt-4", model_kwargs={"seed": 42}, temperature=0
) )