From 0ee6ed76ca507b554adc06f312369b1cf23f6e2e Mon Sep 17 00:00:00 2001 From: William FH <13333726+hinthornw@users.noreply.github.com> Date: Thu, 18 Jul 2024 19:12:28 -0700 Subject: [PATCH] [Evaluation] Pass in seed directly (#24403) adding test rn --- libs/langchain/langchain/evaluation/loading.py | 2 +- .../unit_tests/evaluation/qa/test_eval_chain.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/libs/langchain/langchain/evaluation/loading.py b/libs/langchain/langchain/evaluation/loading.py index 756235f0569..a9a576911bf 100644 --- a/libs/langchain/langchain/evaluation/loading.py +++ b/libs/langchain/langchain/evaluation/loading.py @@ -146,7 +146,7 @@ def load_evaluator( ) llm = llm or ChatOpenAI( # type: ignore[call-arg] - model="gpt-4", model_kwargs={"seed": 42}, temperature=0 + model="gpt-4", seed=42, temperature=0 ) except Exception as e: raise ValueError( diff --git a/libs/langchain/tests/unit_tests/evaluation/qa/test_eval_chain.py b/libs/langchain/tests/unit_tests/evaluation/qa/test_eval_chain.py index 451c5c6cd90..69f95b7575d 100644 --- a/libs/langchain/tests/unit_tests/evaluation/qa/test_eval_chain.py +++ b/libs/langchain/tests/unit_tests/evaluation/qa/test_eval_chain.py @@ -1,11 +1,14 @@ """Test LLM Bash functionality.""" +import os import sys from typing import Type +from unittest.mock import patch import pytest from langchain.chains.llm import LLMChain +from langchain.evaluation.loading import load_evaluator from langchain.evaluation.qa.eval_chain import ( ContextQAEvalChain, CotQAEvalChain, @@ -50,6 +53,18 @@ def test_context_eval_chain(chain_cls: Type[ContextQAEvalChain]) -> None: assert outputs[0]["text"] == "foo" +def test_load_criteria_evaluator() -> None: + """Test loading a criteria evaluator.""" + try: + from langchain_openai import ChatOpenAI # noqa: F401 + except ImportError: + pytest.skip("langchain-openai not installed") + # Patch the env with an openai-api-key + with patch.dict(os.environ, {"OPENAI_API_KEY": "foo"}): + # Check it can load using a string arg (even if that's not how it's typed) + load_evaluator("criteria") # type: ignore + + @pytest.mark.parametrize("chain_cls", [QAEvalChain, ContextQAEvalChain, CotQAEvalChain]) def test_implements_string_evaluator_protocol( chain_cls: Type[LLMChain],