multiple: langchain 0.2 in master (#21191)

0.2rc 

migrations

- [x] Move memory
- [x] Move remaining retrievers
- [x] graph_qa chains
- [x] some dependency from evaluation code potentially on math utils
- [x] Move openapi chain from `langchain.chains.api.openapi` to
`langchain_community.chains.openapi`
- [x] Migrate `langchain.chains.ernie_functions` to
`langchain_community.chains.ernie_functions`
- [x] migrate `langchain/chains/llm_requests.py` to
`langchain_community.chains.llm_requests`
- [x] Moving `langchain_community.cross_enoders.base:BaseCrossEncoder`
->
`langchain_community.retrievers.document_compressors.cross_encoder:BaseCrossEncoder`
(namespace not ideal, but it needs to be moved to `langchain` to avoid
circular deps)
- [x] unit tests langchain -- add pytest.mark.community to some unit
tests that will stay in langchain
- [x] unit tests community -- move unit tests that depend on community
to community
- [x] mv integration tests that depend on community to community
- [x] mypy checks

Other todo

- [x] Make deprecation warnings not noisy (need to use warn deprecated
and check that things are implemented properly)
- [x] Update deprecation messages with timeline for code removal (likely
we actually won't be removing things until 0.4 release) -- will give
people more time to transition their code.
- [ ] Add information to deprecation warning to show users how to
migrate their code base using langchain-cli
- [ ] Remove any unnecessary requirements in langchain (e.g., is
SQLALchemy required?)

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
Eugene Yurtsev
2024-05-08 16:46:52 -04:00
committed by GitHub
parent 6b392d6d12
commit f92006de3c
238 changed files with 7552 additions and 5899 deletions

View File

@@ -0,0 +1,503 @@
from typing import Iterator, List, Optional
from uuid import uuid4
import pytest
from langchain.chains.llm import LLMChain
from langchain.evaluation import EvaluatorType
from langchain.smith import RunEvalConfig, run_on_dataset
from langchain.smith.evaluation import InputFormatError
from langchain.smith.evaluation.runner_utils import arun_on_dataset
from langchain_core.messages import BaseMessage, HumanMessage
from langchain_core.prompts.chat import ChatPromptTemplate
from langsmith import Client as Client
from langsmith.evaluation import run_evaluator
from langsmith.schemas import DataType, Example, Run
from langchain_community.chat_models import ChatOpenAI
from langchain_community.llms.openai import OpenAI
def _check_all_feedback_passed(_project_name: str, client: Client) -> None:
# Assert that all runs completed, all feedback completed, and that the
# chain or llm passes for the feedback provided.
runs = list(client.list_runs(project_name=_project_name, execution_order=1))
if not runs:
# Queue delays. We are mainly just smoke checking rn.
return
feedback = list(client.list_feedback(run_ids=[run.id for run in runs]))
if not feedback:
return
assert all([bool(f.score) for f in feedback])
@run_evaluator
def not_empty(run: Run, example: Optional[Example] = None) -> dict:
return {
"score": run.outputs and next(iter(run.outputs.values())),
"key": "not_empty",
}
@pytest.fixture
def eval_project_name() -> str:
return f"lcp integration tests - {str(uuid4())[-8:]}"
@pytest.fixture(scope="module")
def client() -> Client:
return Client()
@pytest.fixture(
scope="module",
)
def kv_dataset_name() -> Iterator[str]:
import pandas as pd
client = Client()
df = pd.DataFrame(
{
"some_input": [
"What's the capital of California?",
"What's the capital of Nevada?",
"What's the capital of Oregon?",
"What's the capital of Washington?",
],
"other_input": [
"a",
"b",
"c",
"d",
],
"some_output": ["Sacramento", "Carson City", "Salem", "Olympia"],
"other_output": ["e", "f", "g", "h"],
}
)
uid = str(uuid4())[-8:]
_dataset_name = f"lcp kv dataset integration tests - {uid}"
client.upload_dataframe(
df,
name=_dataset_name,
input_keys=["some_input", "other_input"],
output_keys=["some_output", "other_output"],
description="Integration test dataset",
)
yield _dataset_name
def test_chat_model(
kv_dataset_name: str, eval_project_name: str, client: Client
) -> None:
llm = ChatOpenAI(temperature=0)
eval_config = RunEvalConfig(
evaluators=[EvaluatorType.QA], custom_evaluators=[not_empty]
)
with pytest.raises(ValueError, match="Must specify reference_key"):
run_on_dataset(
dataset_name=kv_dataset_name,
llm_or_chain_factory=llm,
evaluation=eval_config,
client=client,
)
eval_config = RunEvalConfig(
evaluators=[EvaluatorType.QA],
reference_key="some_output",
)
with pytest.raises(
InputFormatError, match="Example inputs do not match language model"
):
run_on_dataset(
dataset_name=kv_dataset_name,
llm_or_chain_factory=llm,
evaluation=eval_config,
client=client,
)
def input_mapper(d: dict) -> List[BaseMessage]:
return [HumanMessage(content=d["some_input"])]
run_on_dataset(
client=client,
dataset_name=kv_dataset_name,
llm_or_chain_factory=input_mapper | llm,
evaluation=eval_config,
project_name=eval_project_name,
tags=["shouldpass"],
)
_check_all_feedback_passed(eval_project_name, client)
def test_llm(kv_dataset_name: str, eval_project_name: str, client: Client) -> None:
llm = OpenAI(temperature=0)
eval_config = RunEvalConfig(evaluators=[EvaluatorType.QA])
with pytest.raises(ValueError, match="Must specify reference_key"):
run_on_dataset(
dataset_name=kv_dataset_name,
llm_or_chain_factory=llm,
evaluation=eval_config,
client=client,
)
eval_config = RunEvalConfig(
evaluators=[EvaluatorType.QA, EvaluatorType.CRITERIA],
reference_key="some_output",
)
with pytest.raises(InputFormatError, match="Example inputs"):
run_on_dataset(
dataset_name=kv_dataset_name,
llm_or_chain_factory=llm,
evaluation=eval_config,
client=client,
)
def input_mapper(d: dict) -> str:
return d["some_input"]
run_on_dataset(
client=client,
dataset_name=kv_dataset_name,
llm_or_chain_factory=input_mapper | llm,
evaluation=eval_config,
project_name=eval_project_name,
tags=["shouldpass"],
)
_check_all_feedback_passed(eval_project_name, client)
def test_chain(kv_dataset_name: str, eval_project_name: str, client: Client) -> None:
llm = ChatOpenAI(temperature=0)
chain = LLMChain.from_string(llm, "The answer to the {question} is: ")
eval_config = RunEvalConfig(evaluators=[EvaluatorType.QA, EvaluatorType.CRITERIA])
with pytest.raises(ValueError, match="Must specify reference_key"):
run_on_dataset(
dataset_name=kv_dataset_name,
llm_or_chain_factory=lambda: chain,
evaluation=eval_config,
client=client,
)
eval_config = RunEvalConfig(
evaluators=[EvaluatorType.QA, EvaluatorType.CRITERIA],
reference_key="some_output",
)
with pytest.raises(InputFormatError, match="Example inputs"):
run_on_dataset(
dataset_name=kv_dataset_name,
llm_or_chain_factory=lambda: chain,
evaluation=eval_config,
client=client,
)
eval_config = RunEvalConfig(
custom_evaluators=[not_empty],
)
def right_input_mapper(d: dict) -> dict:
return {"question": d["some_input"]}
run_on_dataset(
dataset_name=kv_dataset_name,
llm_or_chain_factory=lambda: right_input_mapper | chain,
client=client,
evaluation=eval_config,
project_name=eval_project_name,
tags=["shouldpass"],
)
_check_all_feedback_passed(eval_project_name, client)
### Testing Chat Datasets
@pytest.fixture(
scope="module",
)
def chat_dataset_name() -> Iterator[str]:
def _create_message(txt: str, role: str = "human") -> List[dict]:
return [{"type": role, "data": {"content": txt}}]
import pandas as pd
client = Client()
df = pd.DataFrame(
{
"input": [
_create_message(txt)
for txt in (
"What's the capital of California?",
"What's the capital of Nevada?",
"What's the capital of Oregon?",
"What's the capital of Washington?",
)
],
"output": [
_create_message(txt, role="ai")[0]
for txt in ("Sacramento", "Carson City", "Salem", "Olympia")
],
}
)
uid = str(uuid4())[-8:]
_dataset_name = f"lcp chat dataset integration tests - {uid}"
ds = client.create_dataset(
_dataset_name, description="Integration test dataset", data_type=DataType.chat
)
for row in df.itertuples():
client.create_example(
dataset_id=ds.id,
inputs={"input": row.input},
outputs={"output": row.output},
)
yield _dataset_name
def test_chat_model_on_chat_dataset(
chat_dataset_name: str, eval_project_name: str, client: Client
) -> None:
llm = ChatOpenAI(temperature=0)
eval_config = RunEvalConfig(custom_evaluators=[not_empty])
run_on_dataset(
dataset_name=chat_dataset_name,
llm_or_chain_factory=llm,
evaluation=eval_config,
client=client,
project_name=eval_project_name,
)
_check_all_feedback_passed(eval_project_name, client)
def test_llm_on_chat_dataset(
chat_dataset_name: str, eval_project_name: str, client: Client
) -> None:
llm = OpenAI(temperature=0)
eval_config = RunEvalConfig(custom_evaluators=[not_empty])
run_on_dataset(
dataset_name=chat_dataset_name,
llm_or_chain_factory=llm,
client=client,
evaluation=eval_config,
project_name=eval_project_name,
tags=["shouldpass"],
)
_check_all_feedback_passed(eval_project_name, client)
def test_chain_on_chat_dataset(chat_dataset_name: str, client: Client) -> None:
llm = ChatOpenAI(temperature=0)
chain = LLMChain.from_string(llm, "The answer to the {question} is: ")
eval_config = RunEvalConfig(evaluators=[EvaluatorType.QA, EvaluatorType.CRITERIA])
with pytest.raises(
ValueError, match="Cannot evaluate a chain on dataset with data_type=chat"
):
run_on_dataset(
dataset_name=chat_dataset_name,
client=client,
llm_or_chain_factory=lambda: chain,
evaluation=eval_config,
)
@pytest.fixture(
scope="module",
)
def llm_dataset_name() -> Iterator[str]:
import pandas as pd
client = Client()
df = pd.DataFrame(
{
"input": [
"What's the capital of California?",
"What's the capital of Nevada?",
"What's the capital of Oregon?",
"What's the capital of Washington?",
],
"output": ["Sacramento", "Carson City", "Salem", "Olympia"],
}
)
uid = str(uuid4())[-8:]
_dataset_name = f"lcp llm dataset integration tests - {uid}"
client.upload_dataframe(
df,
name=_dataset_name,
input_keys=["input"],
output_keys=["output"],
description="Integration test dataset",
data_type=DataType.llm,
)
yield _dataset_name
def test_chat_model_on_llm_dataset(
llm_dataset_name: str, eval_project_name: str, client: Client
) -> None:
llm = ChatOpenAI(temperature=0)
eval_config = RunEvalConfig(custom_evaluators=[not_empty])
run_on_dataset(
client=client,
dataset_name=llm_dataset_name,
llm_or_chain_factory=llm,
evaluation=eval_config,
project_name=eval_project_name,
tags=["shouldpass"],
)
_check_all_feedback_passed(eval_project_name, client)
def test_llm_on_llm_dataset(
llm_dataset_name: str, eval_project_name: str, client: Client
) -> None:
llm = OpenAI(temperature=0)
eval_config = RunEvalConfig(custom_evaluators=[not_empty])
run_on_dataset(
client=client,
dataset_name=llm_dataset_name,
llm_or_chain_factory=llm,
evaluation=eval_config,
project_name=eval_project_name,
tags=["shouldpass"],
)
_check_all_feedback_passed(eval_project_name, client)
def test_chain_on_llm_dataset(llm_dataset_name: str, client: Client) -> None:
llm = ChatOpenAI(temperature=0)
chain = LLMChain.from_string(llm, "The answer to the {question} is: ")
eval_config = RunEvalConfig(evaluators=[EvaluatorType.QA, EvaluatorType.CRITERIA])
with pytest.raises(
ValueError, match="Cannot evaluate a chain on dataset with data_type=llm"
):
run_on_dataset(
client=client,
dataset_name=llm_dataset_name,
llm_or_chain_factory=lambda: chain,
evaluation=eval_config,
)
@pytest.fixture(
scope="module",
)
def kv_singleio_dataset_name() -> Iterator[str]:
import pandas as pd
client = Client()
df = pd.DataFrame(
{
"the wackiest input": [
"What's the capital of California?",
"What's the capital of Nevada?",
"What's the capital of Oregon?",
"What's the capital of Washington?",
],
"unthinkable output": ["Sacramento", "Carson City", "Salem", "Olympia"],
}
)
uid = str(uuid4())[-8:]
_dataset_name = f"lcp singleio kv dataset integration tests - {uid}"
client.upload_dataframe(
df,
name=_dataset_name,
input_keys=["the wackiest input"],
output_keys=["unthinkable output"],
description="Integration test dataset",
)
yield _dataset_name
def test_chat_model_on_kv_singleio_dataset(
kv_singleio_dataset_name: str, eval_project_name: str, client: Client
) -> None:
llm = ChatOpenAI(temperature=0)
eval_config = RunEvalConfig(evaluators=[EvaluatorType.QA, EvaluatorType.CRITERIA])
run_on_dataset(
dataset_name=kv_singleio_dataset_name,
llm_or_chain_factory=llm,
evaluation=eval_config,
client=client,
project_name=eval_project_name,
tags=["shouldpass"],
)
_check_all_feedback_passed(eval_project_name, client)
def test_llm_on_kv_singleio_dataset(
kv_singleio_dataset_name: str, eval_project_name: str, client: Client
) -> None:
llm = OpenAI(temperature=0)
eval_config = RunEvalConfig(custom_evaluators=[not_empty])
run_on_dataset(
dataset_name=kv_singleio_dataset_name,
llm_or_chain_factory=llm,
client=client,
evaluation=eval_config,
project_name=eval_project_name,
tags=["shouldpass"],
)
_check_all_feedback_passed(eval_project_name, client)
def test_chain_on_kv_singleio_dataset(
kv_singleio_dataset_name: str, eval_project_name: str, client: Client
) -> None:
llm = ChatOpenAI(temperature=0)
chain = LLMChain.from_string(llm, "The answer to the {question} is: ")
eval_config = RunEvalConfig(custom_evaluators=[not_empty])
run_on_dataset(
dataset_name=kv_singleio_dataset_name,
llm_or_chain_factory=lambda: chain,
client=client,
evaluation=eval_config,
project_name=eval_project_name,
tags=["shouldpass"],
)
_check_all_feedback_passed(eval_project_name, client)
async def test_runnable_on_kv_singleio_dataset(
kv_singleio_dataset_name: str, eval_project_name: str, client: Client
) -> None:
runnable = (
ChatPromptTemplate.from_messages([("human", "{the wackiest input}")])
| ChatOpenAI()
)
eval_config = RunEvalConfig(custom_evaluators=[not_empty])
await arun_on_dataset(
dataset_name=kv_singleio_dataset_name,
llm_or_chain_factory=runnable,
client=client,
evaluation=eval_config,
project_name=eval_project_name,
tags=["shouldpass"],
)
_check_all_feedback_passed(eval_project_name, client)
async def test_arb_func_on_kv_singleio_dataset(
kv_singleio_dataset_name: str, eval_project_name: str, client: Client
) -> None:
runnable = (
ChatPromptTemplate.from_messages([("human", "{the wackiest input}")])
| ChatOpenAI()
)
def my_func(x: dict) -> str:
content = runnable.invoke(x).content
if isinstance(content, str):
return content
else:
raise ValueError(
f"Expected message with content type string, got {content}"
)
eval_config = RunEvalConfig(custom_evaluators=[not_empty])
await arun_on_dataset(
dataset_name=kv_singleio_dataset_name,
llm_or_chain_factory=my_func,
client=client,
evaluation=eval_config,
project_name=eval_project_name,
tags=["shouldpass"],
)
_check_all_feedback_passed(eval_project_name, client)