mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-15 09:23:57 +00:00
langchain[patch]: Update evaluation logic that instantiates a default LLM (#20760)
Favor langchain_openai over langchain_community for evaluation logic. --------- Co-authored-by: ccurme <chester.curme@gmail.com>
This commit is contained in:
parent
72f720fa38
commit
a7c347ab35
@ -5,8 +5,6 @@ import logging
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from langchain_community.chat_models.azure_openai import AzureChatOpenAI
|
||||
from langchain_community.chat_models.openai import ChatOpenAI
|
||||
from langchain_core.callbacks.manager import Callbacks
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.output_parsers import BaseOutputParser
|
||||
@ -254,10 +252,8 @@ class PairwiseStringEvalChain(PairwiseStringEvaluator, LLMEvalChain, LLMChain):
|
||||
ValueError: If the input variables are not as expected.
|
||||
|
||||
"""
|
||||
if not (
|
||||
isinstance(llm, (ChatOpenAI, AzureChatOpenAI))
|
||||
and llm.model_name.startswith("gpt-4")
|
||||
):
|
||||
# Check if the model is GPT-4 if not raise a warning
|
||||
if not hasattr(llm, "model_name") or not llm.model_name.startswith("gpt-4"):
|
||||
logger.warning(
|
||||
"This chain was only tested with GPT-4. \
|
||||
Performance may be significantly worse with other models."
|
||||
|
@ -193,7 +193,7 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from langchain_community.chat_models import ChatAnthropic
|
||||
>>> from langchain_anthropic import ChatAnthropic
|
||||
>>> from langchain.evaluation.criteria import CriteriaEvalChain
|
||||
>>> llm = ChatAnthropic(temperature=0)
|
||||
>>> criteria = {"my-custom-criterion": "Is the submission the most amazing ever?"}
|
||||
@ -205,7 +205,7 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
|
||||
'score': 0,
|
||||
}
|
||||
|
||||
>>> from langchain_community.chat_models import ChatOpenAI
|
||||
>>> from langchain_openai import ChatOpenAI
|
||||
>>> from langchain.evaluation.criteria import LabeledCriteriaEvalChain
|
||||
>>> llm = ChatOpenAI(model="gpt-4", temperature=0)
|
||||
>>> criteria = "correctness"
|
||||
@ -344,7 +344,7 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from langchain_community.llms import OpenAI
|
||||
>>> from langchain_openai import OpenAI
|
||||
>>> from langchain.evaluation.criteria import LabeledCriteriaEvalChain
|
||||
>>> llm = OpenAI()
|
||||
>>> criteria = {
|
||||
@ -432,7 +432,7 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from langchain_community.llms import OpenAI
|
||||
>>> from langchain_openai import OpenAI
|
||||
>>> from langchain.evaluation.criteria import CriteriaEvalChain
|
||||
>>> llm = OpenAI()
|
||||
>>> criteria = "conciseness"
|
||||
@ -487,7 +487,7 @@ class CriteriaEvalChain(StringEvaluator, LLMEvalChain, LLMChain):
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from langchain_community.llms import OpenAI
|
||||
>>> from langchain_openai import OpenAI
|
||||
>>> from langchain.evaluation.criteria import CriteriaEvalChain
|
||||
>>> llm = OpenAI()
|
||||
>>> criteria = "conciseness"
|
||||
@ -568,7 +568,7 @@ class LabeledCriteriaEvalChain(CriteriaEvalChain):
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from langchain_community.llms import OpenAI
|
||||
>>> from langchain_openai import OpenAI
|
||||
>>> from langchain.evaluation.criteria import LabeledCriteriaEvalChain
|
||||
>>> llm = OpenAI()
|
||||
>>> criteria = {
|
||||
|
@ -1,7 +1,6 @@
|
||||
"""Loading datasets and evaluators."""
|
||||
from typing import Any, Dict, List, Optional, Sequence, Type, Union
|
||||
|
||||
from langchain_community.chat_models.openai import ChatOpenAI
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
|
||||
from langchain.chains.base import Chain
|
||||
@ -131,6 +130,20 @@ def load_evaluator(
|
||||
evaluator_cls = _EVALUATOR_MAP[evaluator]
|
||||
if issubclass(evaluator_cls, LLMEvalChain):
|
||||
try:
|
||||
try:
|
||||
from langchain_openai import ChatOpenAI
|
||||
except ImportError:
|
||||
try:
|
||||
from langchain_community.chat_models.openai import ChatOpenAI
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import langchain_openai or fallback onto "
|
||||
"langchain_community. Please install langchain_openai "
|
||||
"or specify a language model explicitly. "
|
||||
"It's recommended to install langchain_openai AND "
|
||||
"specify a language model explicitly."
|
||||
)
|
||||
|
||||
llm = llm or ChatOpenAI( # type: ignore[call-arg]
|
||||
model="gpt-4", model_kwargs={"seed": 42}, temperature=0
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user