mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-21 14:18:52 +00:00
parent
b82ad19ed2
commit
5341b04d68
@ -108,7 +108,6 @@ def load_evaluator(
|
|||||||
>>> from langchain.evaluation import load_evaluator, EvaluatorType
|
>>> from langchain.evaluation import load_evaluator, EvaluatorType
|
||||||
>>> evaluator = load_evaluator(EvaluatorType.QA)
|
>>> evaluator = load_evaluator(EvaluatorType.QA)
|
||||||
"""
|
"""
|
||||||
llm = llm or ChatOpenAI(model="gpt-4", temperature=0)
|
|
||||||
if evaluator not in _EVALUATOR_MAP:
|
if evaluator not in _EVALUATOR_MAP:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Unknown evaluator type: {evaluator}"
|
f"Unknown evaluator type: {evaluator}"
|
||||||
@ -116,6 +115,16 @@ def load_evaluator(
|
|||||||
)
|
)
|
||||||
evaluator_cls = _EVALUATOR_MAP[evaluator]
|
evaluator_cls = _EVALUATOR_MAP[evaluator]
|
||||||
if issubclass(evaluator_cls, LLMEvalChain):
|
if issubclass(evaluator_cls, LLMEvalChain):
|
||||||
|
try:
|
||||||
|
llm = llm or ChatOpenAI(model="gpt-4", temperature=0)
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(
|
||||||
|
f"Evaluation with the {evaluator_cls} requires a "
|
||||||
|
"language model to function."
|
||||||
|
" Failed to create the default 'gpt-4' model."
|
||||||
|
" Please manually provide an evaluation LLM"
|
||||||
|
" or check your openai credentials."
|
||||||
|
) from e
|
||||||
return evaluator_cls.from_llm(llm=llm, **kwargs)
|
return evaluator_cls.from_llm(llm=llm, **kwargs)
|
||||||
else:
|
else:
|
||||||
return evaluator_cls(**kwargs)
|
return evaluator_cls(**kwargs)
|
||||||
@ -154,7 +163,6 @@ def load_evaluators(
|
|||||||
>>> evaluators = [EvaluatorType.QA, EvaluatorType.CRITERIA]
|
>>> evaluators = [EvaluatorType.QA, EvaluatorType.CRITERIA]
|
||||||
>>> loaded_evaluators = load_evaluators(evaluators, criteria="helpfulness")
|
>>> loaded_evaluators = load_evaluators(evaluators, criteria="helpfulness")
|
||||||
"""
|
"""
|
||||||
llm = llm or ChatOpenAI(model="gpt-4", temperature=0)
|
|
||||||
loaded = []
|
loaded = []
|
||||||
for evaluator in evaluators:
|
for evaluator in evaluators:
|
||||||
_kwargs = config.get(evaluator, {}) if config else {}
|
_kwargs = config.get(evaluator, {}) if config else {}
|
||||||
|
@ -35,7 +35,6 @@ from langchain.callbacks.tracers.base import BaseTracer
|
|||||||
from langchain.callbacks.tracers.evaluation import EvaluatorCallbackHandler
|
from langchain.callbacks.tracers.evaluation import EvaluatorCallbackHandler
|
||||||
from langchain.callbacks.tracers.langchain import LangChainTracer
|
from langchain.callbacks.tracers.langchain import LangChainTracer
|
||||||
from langchain.chains.base import Chain
|
from langchain.chains.base import Chain
|
||||||
from langchain.chat_models.openai import ChatOpenAI
|
|
||||||
from langchain.evaluation.loading import load_evaluator
|
from langchain.evaluation.loading import load_evaluator
|
||||||
from langchain.evaluation.schema import EvaluatorType, StringEvaluator
|
from langchain.evaluation.schema import EvaluatorType, StringEvaluator
|
||||||
from langchain.schema import ChatResult, LLMResult
|
from langchain.schema import ChatResult, LLMResult
|
||||||
@ -493,7 +492,7 @@ def _determine_reference_key(
|
|||||||
|
|
||||||
def _construct_run_evaluator(
|
def _construct_run_evaluator(
|
||||||
eval_config: Union[EvaluatorType, str, EvalConfig],
|
eval_config: Union[EvaluatorType, str, EvalConfig],
|
||||||
eval_llm: BaseLanguageModel,
|
eval_llm: Optional[BaseLanguageModel],
|
||||||
run_type: str,
|
run_type: str,
|
||||||
data_type: DataType,
|
data_type: DataType,
|
||||||
example_outputs: Optional[List[str]],
|
example_outputs: Optional[List[str]],
|
||||||
@ -563,7 +562,6 @@ def _load_run_evaluators(
|
|||||||
Returns:
|
Returns:
|
||||||
A list of run evaluators.
|
A list of run evaluators.
|
||||||
"""
|
"""
|
||||||
eval_llm = config.eval_llm or ChatOpenAI(model="gpt-4", temperature=0.0)
|
|
||||||
run_evaluators = []
|
run_evaluators = []
|
||||||
input_key, prediction_key, reference_key = None, None, None
|
input_key, prediction_key, reference_key = None, None, None
|
||||||
if (
|
if (
|
||||||
@ -580,7 +578,7 @@ def _load_run_evaluators(
|
|||||||
for eval_config in config.evaluators:
|
for eval_config in config.evaluators:
|
||||||
run_evaluator = _construct_run_evaluator(
|
run_evaluator = _construct_run_evaluator(
|
||||||
eval_config,
|
eval_config,
|
||||||
eval_llm,
|
config.eval_llm,
|
||||||
run_type,
|
run_type,
|
||||||
data_type,
|
data_type,
|
||||||
example_outputs,
|
example_outputs,
|
||||||
|
Loading…
Reference in New Issue
Block a user