Files
langchain/tests/regression_tests/multi_input_tools.py
2023-04-25 17:15:55 -07:00

149 lines
5.0 KiB
Python

"""Test the BaseOutputParser class and its sub-classes."""
from collections import defaultdict
import json
from copy import deepcopy
from pathlib import Path
from typing import List, Tuple
from pydantic import ValidationError
import pytest
from langchain.agents import initialize_agent
from langchain.agents.agent_toolkits.json.toolkit import JsonToolkit
from langchain.agents.agent_toolkits.nla.toolkit import NLAToolkit
from langchain.agents.agent_toolkits.openapi.toolkit import RequestsToolkit
from langchain.agents.agent_types import AgentType
from langchain.llms.openai import OpenAI
from langchain.memory.buffer import ConversationBufferMemory
from langchain.requests import TextRequestsWrapper
from langchain.schema import BaseLanguageModel
from langchain.tools.base import BaseTool
from langchain.tools.json.tool import JsonSpec
def _get_requests_tools_and_questions(**kwargs) -> List[Tuple[BaseTool, List[str]]]:
requests_wrapper = TextRequestsWrapper()
requests_toolkit = RequestsToolkit(requests_wrapper=requests_wrapper)
tools = requests_toolkit.get_tools()
tools_dict = {tool.name: tool for tool in tools}
method_to_questions = {
# "get": ["Get the header of google.com"],
"post": ["Post data {'key': 'value'} to google.com"],
"patch": ["Patch data {'key': 'value'} to google.com"],
"put": ["Put data {'key': 'value'} to google.com"],
"delete": ["Delete data with ID 1234abc from google.com"],
}
results = []
for method, qs in method_to_questions.items():
results.append((tools_dict[f"requests_{method}"], qs))
return results
def _get_json_tools_and_questions(**kwargs) -> List[Tuple[BaseTool, List[str]]]:
spec = JsonSpec.from_file(
Path("tests/unit_tests/tools/openapi/test_specs/apis-guru/apispec.json")
)
json_toolkit = JsonToolkit(spec=spec)
list_keys, get_value = json_toolkit.get_tools()
return [
(list_keys, "What keys are in the JSON spec?"),
(get_value, "What's in the info.description?"),
]
def _get_nla_tools_nad_questions(
*,
llm: BaseLanguageModel,
) -> List[Tuple[BaseTool, List[str]]]:
speak_toolkit = NLAToolkit.from_llm_and_url(
llm, "https://api.speak.com/openapi.yaml"
)
# TODO: make more pointed questions
speak_tools_and_questions = [
(tool, ["Could you help me learn something new in Spanish?"])
for tool in speak_toolkit.get_tools()
]
klarna_toolkit = NLAToolkit.from_llm_and_url(
llm, "https://www.klarna.com/us/shopping/public/openai/v0/api-docs/"
)
klarna_tools_and_questions = [
(tool, ["I want to buy some cheap shoes"])
for tool in klarna_toolkit.get_tools()
]
return speak_tools_and_questions + klarna_tools_and_questions
def generate_tuples() -> (
List[Tuple[BaseTool, List[str], BaseLanguageModel, AgentType, bool]]
):
"""Grid test."""
llms = [
# ChatOpenAI(),
OpenAI(),
]
generators = [
# _get_nla_tools_nad_questions,
# _get_json_tools_and_questions,
_get_requests_tools_and_questions,
]
# These types don't really support arbitrary single tools...
# excluded_types = (AgentType.SELF_ASK_WITH_SEARCH, AgentType.REACT_DOCSTORE)
# agent_types = [
# agent_type for agent_type in AgentType if agent_type not in excluded_types
# ]
agent_types = [
# AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
]
results = []
for llm in llms:
for agent_type in agent_types:
for generator in generators:
tools_and_queries = generator(llm=llm)
for tool, queries in tools_and_queries:
results.append((tool, queries, llm, agent_type))
return results
_AGGREGATE_AXES = ["tool", "llm", "agent_type"]
_FAILURE_COUNT = {k: defaultdict(int) for k in _AGGREGATE_AXES}
@pytest.mark.parametrize("tool, queries, llm, agent_type", generate_tuples())
def test_run_tool(
tool: BaseTool,
queries: List[str],
llm: BaseLanguageModel,
agent_type: AgentType,
) -> None:
global _FAILURE_COUNT
tool = deepcopy(tool)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
agent = initialize_agent(
llm=llm,
tools=[tool],
agent=agent_type,
memory=memory,
verbose=True,
)
results = []
for query in queries:
try:
result = agent(query)
results.append(result)
except Exception as e:
results.append(e)
type_errors = [r for r in results if isinstance(r, TypeError)]
if type_errors:
print(f"{str(llm)}: {tool.name} failed with: {type_errors}")
_FAILURE_COUNT["tool"][tool.name] += 1
_FAILURE_COUNT["llm"][str(llm)] += 1
_FAILURE_COUNT["agent_type"][str(agent_type)] += 1
assert not type_errors, type_errors
validation_errors = [r for r in results if isinstance(r, ValidationError)]
assert not validation_errors, validation_errors