From 90579021f850234f662ae755d5e51e15ec628b78 Mon Sep 17 00:00:00 2001 From: William FH <13333726+hinthornw@users.noreply.github.com> Date: Wed, 9 Aug 2023 12:33:00 -0700 Subject: [PATCH 1/5] Update Key Check (#8948) In eval loop. It needn't be done unless you are creating the corresponding evaluators --- .../smith/evaluation/runner_utils.py | 48 ++++++++++++------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/libs/langchain/langchain/smith/evaluation/runner_utils.py b/libs/langchain/langchain/smith/evaluation/runner_utils.py index 5b3d5775c49..88cd3f89860 100644 --- a/libs/langchain/langchain/smith/evaluation/runner_utils.py +++ b/libs/langchain/langchain/smith/evaluation/runner_utils.py @@ -502,6 +502,18 @@ def _construct_run_evaluator( return run_evaluator +def _get_keys( + config: RunEvalConfig, + run_inputs: Optional[List[str]], + run_outputs: Optional[List[str]], + example_outputs: Optional[List[str]], +) -> Tuple[Optional[str], Optional[str], Optional[str]]: + input_key = _determine_input_key(config, run_inputs) + prediction_key = _determine_prediction_key(config, run_outputs) + reference_key = _determine_reference_key(config, example_outputs) + return input_key, prediction_key, reference_key + + def _load_run_evaluators( config: RunEvalConfig, run_type: str, @@ -521,9 +533,13 @@ def _load_run_evaluators( """ eval_llm = config.eval_llm or ChatOpenAI(model="gpt-4", temperature=0.0) run_evaluators = [] - input_key = _determine_input_key(config, run_inputs) - prediction_key = _determine_prediction_key(config, run_outputs) - reference_key = _determine_reference_key(config, example_outputs) + input_key, prediction_key, reference_key = None, None, None + if config.evaluators or any( + [isinstance(e, EvaluatorType) for e in config.evaluators] + ): + input_key, prediction_key, reference_key = _get_keys( + config, run_inputs, run_outputs, example_outputs + ) for eval_config in config.evaluators: run_evaluator = _construct_run_evaluator( eval_config, @@ -1074,15 +1090,15 @@ def _run_on_examples( A dictionary mapping example ids to the model outputs. """ results: Dict[str, Any] = {} - llm_or_chain_factory = _wrap_in_chain_factory(llm_or_chain_factory) - project_name = _get_project_name(project_name, llm_or_chain_factory) + wrapped_model = _wrap_in_chain_factory(llm_or_chain_factory) + project_name = _get_project_name(project_name, wrapped_model) tracer = LangChainTracer( project_name=project_name, client=client, use_threading=False ) run_evaluators, examples = _setup_evaluation( - llm_or_chain_factory, examples, evaluation, data_type + wrapped_model, examples, evaluation, data_type ) - examples = _validate_example_inputs(examples, llm_or_chain_factory, input_mapper) + examples = _validate_example_inputs(examples, wrapped_model, input_mapper) evalution_handler = EvaluatorCallbackHandler( evaluators=run_evaluators or [], client=client, @@ -1091,7 +1107,7 @@ def _run_on_examples( for i, example in enumerate(examples): result = _run_llm_or_chain( example, - llm_or_chain_factory, + wrapped_model, num_repetitions, tags=tags, callbacks=callbacks, @@ -1114,8 +1130,8 @@ def _prepare_eval_run( llm_or_chain_factory: MODEL_OR_CHAIN_FACTORY, project_name: Optional[str], ) -> Tuple[MCF, str, Dataset, Iterator[Example]]: - llm_or_chain_factory = _wrap_in_chain_factory(llm_or_chain_factory, dataset_name) - project_name = _get_project_name(project_name, llm_or_chain_factory) + wrapped_model = _wrap_in_chain_factory(llm_or_chain_factory, dataset_name) + project_name = _get_project_name(project_name, wrapped_model) try: project = client.create_project(project_name) except ValueError as e: @@ -1130,7 +1146,7 @@ def _prepare_eval_run( ) dataset = client.read_dataset(dataset_name=dataset_name) examples = client.list_examples(dataset_id=str(dataset.id)) - return llm_or_chain_factory, project_name, dataset, examples + return wrapped_model, project_name, dataset, examples async def arun_on_dataset( @@ -1256,13 +1272,13 @@ async def arun_on_dataset( evaluation=evaluation_config, ) """ # noqa: E501 - llm_or_chain_factory, project_name, dataset, examples = _prepare_eval_run( + wrapped_model, project_name, dataset, examples = _prepare_eval_run( client, dataset_name, llm_or_chain_factory, project_name ) results = await _arun_on_examples( client, examples, - llm_or_chain_factory, + wrapped_model, concurrency_level=concurrency_level, num_repetitions=num_repetitions, project_name=project_name, @@ -1423,14 +1439,14 @@ def run_on_dataset( evaluation=evaluation_config, ) """ # noqa: E501 - llm_or_chain_factory, project_name, dataset, examples = _prepare_eval_run( + wrapped_model, project_name, dataset, examples = _prepare_eval_run( client, dataset_name, llm_or_chain_factory, project_name ) if concurrency_level in (0, 1): results = _run_on_examples( client, examples, - llm_or_chain_factory, + wrapped_model, num_repetitions=num_repetitions, project_name=project_name, verbose=verbose, @@ -1444,7 +1460,7 @@ def run_on_dataset( coro = _arun_on_examples( client, examples, - llm_or_chain_factory, + wrapped_model, concurrency_level=concurrency_level, num_repetitions=num_repetitions, project_name=project_name, From a6e6e9bb86d3667803dfe693522a3a4c991f6c7d Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Wed, 9 Aug 2023 16:13:06 -0400 Subject: [PATCH 2/5] Fix airbyte loader (#8998) Fix airbyte loader https://github.com/langchain-ai/langchain/issues/8996 --- libs/langchain/langchain/document_loaders/airbyte.py | 3 +-- .../tests/unit_tests/document_loaders/test_airbyte.py | 9 +++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) create mode 100644 libs/langchain/tests/unit_tests/document_loaders/test_airbyte.py diff --git a/libs/langchain/langchain/document_loaders/airbyte.py b/libs/langchain/langchain/document_loaders/airbyte.py index 1a6aca6c0ca..aa670704655 100644 --- a/libs/langchain/langchain/document_loaders/airbyte.py +++ b/libs/langchain/langchain/document_loaders/airbyte.py @@ -1,10 +1,9 @@ """Loads local airbyte json files.""" from typing import Any, Callable, Iterator, List, Mapping, Optional -from libs.langchain.langchain.utils.utils import guard_import - from langchain.docstore.document import Document from langchain.document_loaders.base import BaseLoader +from langchain.utils.utils import guard_import RecordHandler = Callable[[Any, Optional[str]], Document] diff --git a/libs/langchain/tests/unit_tests/document_loaders/test_airbyte.py b/libs/langchain/tests/unit_tests/document_loaders/test_airbyte.py new file mode 100644 index 00000000000..8e3b93037fa --- /dev/null +++ b/libs/langchain/tests/unit_tests/document_loaders/test_airbyte.py @@ -0,0 +1,9 @@ +"""Test the airbyte document loader. + +Light test to ensure that the airbyte document loader can be imported. +""" + + +def test_airbyte_import() -> None: + """Test that the airbyte document loader can be imported.""" + from langchain.document_loaders import airbyte # noqa From 808248049ddf060732c8bac502d71d5dd04d761c Mon Sep 17 00:00:00 2001 From: Nuno Campos Date: Wed, 9 Aug 2023 21:17:04 +0100 Subject: [PATCH 3/5] Implement a router for openai functions (#8589) --- libs/langchain/langchain/chains/base.py | 18 +++- libs/langchain/langchain/chat_models/base.py | 16 +++- libs/langchain/langchain/llms/base.py | 16 +++- .../langchain/langchain/runnables/__init__.py | 0 .../langchain/runnables/openai_functions.py | 46 +++++++++ libs/langchain/langchain/schema/retriever.py | 16 +++- libs/langchain/langchain/schema/runnable.py | 13 ++- libs/langchain/langchain/tools/base.py | 16 +++- .../__snapshots__/test_openai_functions.ambr | 31 ++++++ .../runnables/test_openai_functions.py | 95 +++++++++++++++++++ 10 files changed, 254 insertions(+), 13 deletions(-) create mode 100644 libs/langchain/langchain/runnables/__init__.py create mode 100644 libs/langchain/langchain/runnables/openai_functions.py create mode 100644 libs/langchain/tests/unit_tests/runnables/__snapshots__/test_openai_functions.ambr create mode 100644 libs/langchain/tests/unit_tests/runnables/test_openai_functions.py diff --git a/libs/langchain/langchain/chains/base.py b/libs/langchain/langchain/chains/base.py index 301b0143e7b..751dcbd581b 100644 --- a/libs/langchain/langchain/chains/base.py +++ b/libs/langchain/langchain/chains/base.py @@ -62,7 +62,14 @@ class Chain(Serializable, Runnable[Dict[str, Any], Dict[str, Any]], ABC): config: Optional[RunnableConfig] = None, **kwargs: Any, ) -> Dict[str, Any]: - return self(input, **(config or {}), **kwargs) + config = config or {} + return self( + input, + callbacks=config.get("callbacks"), + tags=config.get("tags"), + metadata=config.get("metadata"), + **kwargs, + ) async def ainvoke( self, @@ -76,7 +83,14 @@ class Chain(Serializable, Runnable[Dict[str, Any], Dict[str, Any]], ABC): None, partial(self.invoke, input, config, **kwargs) ) - return await self.acall(input, **(config or {}), **kwargs) + config = config or {} + return await self.acall( + input, + callbacks=config.get("callbacks"), + tags=config.get("tags"), + metadata=config.get("metadata"), + **kwargs, + ) memory: Optional[BaseMemory] = None """Optional memory object. Defaults to None. diff --git a/libs/langchain/langchain/chat_models/base.py b/libs/langchain/langchain/chat_models/base.py index b06b99f99d7..0a39dff54ac 100644 --- a/libs/langchain/langchain/chat_models/base.py +++ b/libs/langchain/langchain/chat_models/base.py @@ -103,12 +103,18 @@ class BaseChatModel(BaseLanguageModel[BaseMessageChunk], ABC): stop: Optional[List[str]] = None, **kwargs: Any, ) -> BaseMessageChunk: + config = config or {} return cast( BaseMessageChunk, cast( ChatGeneration, self.generate_prompt( - [self._convert_input(input)], stop=stop, **(config or {}), **kwargs + [self._convert_input(input)], + stop=stop, + callbacks=config.get("callbacks"), + tags=config.get("tags"), + metadata=config.get("metadata"), + **kwargs, ).generations[0][0], ).message, ) @@ -127,8 +133,14 @@ class BaseChatModel(BaseLanguageModel[BaseMessageChunk], ABC): None, partial(self.invoke, input, config, stop=stop, **kwargs) ) + config = config or {} llm_result = await self.agenerate_prompt( - [self._convert_input(input)], stop=stop, **(config or {}), **kwargs + [self._convert_input(input)], + stop=stop, + callbacks=config.get("callbacks"), + tags=config.get("tags"), + metadata=config.get("metadata"), + **kwargs, ) return cast( BaseMessageChunk, cast(ChatGeneration, llm_result.generations[0][0]).message diff --git a/libs/langchain/langchain/llms/base.py b/libs/langchain/langchain/llms/base.py index 7da494de78b..3fa006ea72b 100644 --- a/libs/langchain/langchain/llms/base.py +++ b/libs/langchain/langchain/llms/base.py @@ -219,9 +219,15 @@ class BaseLLM(BaseLanguageModel[str], ABC): stop: Optional[List[str]] = None, **kwargs: Any, ) -> str: + config = config or {} return ( self.generate_prompt( - [self._convert_input(input)], stop=stop, **(config or {}), **kwargs + [self._convert_input(input)], + stop=stop, + callbacks=config.get("callbacks"), + tags=config.get("tags"), + metadata=config.get("metadata"), + **kwargs, ) .generations[0][0] .text @@ -241,8 +247,14 @@ class BaseLLM(BaseLanguageModel[str], ABC): None, partial(self.invoke, input, config, stop=stop, **kwargs) ) + config = config or {} llm_result = await self.agenerate_prompt( - [self._convert_input(input)], stop=stop, **(config or {}), **kwargs + [self._convert_input(input)], + stop=stop, + callbacks=config.get("callbacks"), + tags=config.get("tags"), + metadata=config.get("metadata"), + **kwargs, ) return llm_result.generations[0][0].text diff --git a/libs/langchain/langchain/runnables/__init__.py b/libs/langchain/langchain/runnables/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/libs/langchain/langchain/runnables/openai_functions.py b/libs/langchain/langchain/runnables/openai_functions.py new file mode 100644 index 00000000000..55c9765d20c --- /dev/null +++ b/libs/langchain/langchain/runnables/openai_functions.py @@ -0,0 +1,46 @@ +from operator import itemgetter +from typing import Any, Callable, List, Mapping, Optional, Union + +from typing_extensions import TypedDict + +from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser +from langchain.schema.output import ChatGeneration +from langchain.schema.runnable import RouterRunnable, Runnable, RunnableBinding + + +class OpenAIFunction(TypedDict): + """A function description for ChatOpenAI""" + + name: str + """The name of the function.""" + description: str + """The description of the function.""" + parameters: dict + """The parameters to the function.""" + + +class OpenAIFunctionsRouter(RunnableBinding[ChatGeneration, Any]): + """A runnable that routes to the selected function.""" + + functions: Optional[List[OpenAIFunction]] + + def __init__( + self, + runnables: Mapping[ + str, + Union[ + Runnable[dict, Any], + Callable[[dict], Any], + ], + ], + functions: Optional[List[OpenAIFunction]] = None, + ): + if functions is not None: + assert len(functions) == len(runnables) + assert all(func["name"] in runnables for func in functions) + router = ( + JsonOutputFunctionsParser(args_only=False) + | {"key": itemgetter("name"), "input": itemgetter("arguments")} + | RouterRunnable(runnables) + ) + super().__init__(bound=router, kwargs={}, functions=functions) diff --git a/libs/langchain/langchain/schema/retriever.py b/libs/langchain/langchain/schema/retriever.py index 9df3e7a1389..72c5cf6366d 100644 --- a/libs/langchain/langchain/schema/retriever.py +++ b/libs/langchain/langchain/schema/retriever.py @@ -107,7 +107,13 @@ class BaseRetriever(Serializable, Runnable[str, List[Document]], ABC): def invoke( self, input: str, config: Optional[RunnableConfig] = None ) -> List[Document]: - return self.get_relevant_documents(input, **(config or {})) + config = config or {} + return self.get_relevant_documents( + input, + callbacks=config.get("callbacks"), + tags=config.get("tags"), + metadata=config.get("metadata"), + ) async def ainvoke( self, input: str, config: Optional[RunnableConfig] = None @@ -116,7 +122,13 @@ class BaseRetriever(Serializable, Runnable[str, List[Document]], ABC): # If the retriever doesn't implement async, use default implementation return await super().ainvoke(input, config) - return await self.aget_relevant_documents(input, **(config or {})) + config = config or {} + return await self.aget_relevant_documents( + input, + callbacks=config.get("callbacks"), + tags=config.get("tags"), + metadata=config.get("metadata"), + ) @abstractmethod def _get_relevant_documents( diff --git a/libs/langchain/langchain/schema/runnable.py b/libs/langchain/langchain/schema/runnable.py index 8edafe4599e..84399a2c0b9 100644 --- a/libs/langchain/langchain/schema/runnable.py +++ b/libs/langchain/langchain/schema/runnable.py @@ -1254,7 +1254,7 @@ class RunnablePassthrough(Serializable, Runnable[Input, Input]): class RunnableBinding(Serializable, Runnable[Input, Output]): """ - A runnable that binds a runnable to a set of kwargs. + A runnable that delegates calls to another runnable with a set of kwargs. """ bound: Runnable[Input, Output] @@ -1339,8 +1339,15 @@ class RouterRunnable( runnables: Mapping[str, Runnable[Input, Output]] - def __init__(self, runnables: Mapping[str, Runnable[Input, Output]]) -> None: - super().__init__(runnables=runnables) + def __init__( + self, + runnables: Mapping[ + str, Union[Runnable[Input, Output], Callable[[Input], Output]] + ], + ) -> None: + super().__init__( + runnables={key: _coerce_to_runnable(r) for key, r in runnables.items()} + ) class Config: arbitrary_types_allowed = True diff --git a/libs/langchain/langchain/tools/base.py b/libs/langchain/langchain/tools/base.py index 651138718b8..f8607d51443 100644 --- a/libs/langchain/langchain/tools/base.py +++ b/libs/langchain/langchain/tools/base.py @@ -203,7 +203,13 @@ class BaseTool(BaseModel, Runnable[Union[str, Dict], Any], metaclass=ToolMetacla **kwargs: Any, ) -> Any: config = config or {} - return self.run(input, **config, **kwargs) + return self.run( + input, + callbacks=config.get("callbacks"), + tags=config.get("tags"), + metadata=config.get("metadata"), + **kwargs, + ) async def ainvoke( self, @@ -216,7 +222,13 @@ class BaseTool(BaseModel, Runnable[Union[str, Dict], Any], metaclass=ToolMetacla return super().ainvoke(input, config, **kwargs) config = config or {} - return await self.arun(input, **config, **kwargs) + return await self.arun( + input, + callbacks=config.get("callbacks"), + tags=config.get("tags"), + metadata=config.get("metadata"), + **kwargs, + ) # --- Tool --- diff --git a/libs/langchain/tests/unit_tests/runnables/__snapshots__/test_openai_functions.ambr b/libs/langchain/tests/unit_tests/runnables/__snapshots__/test_openai_functions.ambr new file mode 100644 index 00000000000..ed3f36e061f --- /dev/null +++ b/libs/langchain/tests/unit_tests/runnables/__snapshots__/test_openai_functions.ambr @@ -0,0 +1,31 @@ +# serializer version: 1 +# name: test_openai_functions_router + list([ + dict({ + 'description': 'Sends the draft for revision.', + 'name': 'revise', + 'parameters': dict({ + 'properties': dict({ + 'notes': dict({ + 'description': "The editor's notes to guide the revision.", + 'type': 'string', + }), + }), + 'type': 'object', + }), + }), + dict({ + 'description': 'Accepts the draft.', + 'name': 'accept', + 'parameters': dict({ + 'properties': dict({ + 'draft': dict({ + 'description': 'The draft to accept.', + 'type': 'string', + }), + }), + 'type': 'object', + }), + }), + ]) +# --- diff --git a/libs/langchain/tests/unit_tests/runnables/test_openai_functions.py b/libs/langchain/tests/unit_tests/runnables/test_openai_functions.py new file mode 100644 index 00000000000..e4cec167d88 --- /dev/null +++ b/libs/langchain/tests/unit_tests/runnables/test_openai_functions.py @@ -0,0 +1,95 @@ +from typing import Any, List, Optional + +from pytest_mock import MockerFixture +from syrupy import SnapshotAssertion + +from langchain.callbacks.manager import CallbackManagerForLLMRun +from langchain.chat_models.base import BaseChatModel +from langchain.runnables.openai_functions import OpenAIFunctionsRouter +from langchain.schema import ChatResult +from langchain.schema.messages import AIMessage, BaseMessage +from langchain.schema.output import ChatGeneration + + +class FakeChatOpenAI(BaseChatModel): + @property + def _llm_type(self) -> str: + return "fake-openai-chat-model" + + def _generate( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> ChatResult: + return ChatResult( + generations=[ + ChatGeneration( + message=AIMessage( + content="", + additional_kwargs={ + "function_call": { + "name": "accept", + "arguments": '{\n "draft": "turtles"\n}', + } + }, + ) + ) + ] + ) + + +def test_openai_functions_router( + snapshot: SnapshotAssertion, mocker: MockerFixture +) -> None: + revise = mocker.Mock( + side_effect=lambda kw: f'Revised draft: no more {kw["notes"]}!' + ) + accept = mocker.Mock(side_effect=lambda kw: f'Accepted draft: {kw["draft"]}!') + + router = OpenAIFunctionsRouter( + { + "revise": revise, + "accept": accept, + }, + functions=[ + { + "name": "revise", + "description": "Sends the draft for revision.", + "parameters": { + "type": "object", + "properties": { + "notes": { + "type": "string", + "description": "The editor's notes to guide the revision.", + }, + }, + }, + }, + { + "name": "accept", + "description": "Accepts the draft.", + "parameters": { + "type": "object", + "properties": { + "draft": { + "type": "string", + "description": "The draft to accept.", + }, + }, + }, + }, + ], + ) + + model = FakeChatOpenAI() + + chain = model.bind(functions=router.functions) | router + + assert router.functions == snapshot + + assert chain.invoke("Something about turtles?") == "Accepted draft: turtles!" + + revise.assert_not_called() + accept.assert_called_once_with({"draft": "turtles"}) From c2f46b2cdbad36d9ead3ef6702a75cc6a1edcbb6 Mon Sep 17 00:00:00 2001 From: Michael Shen <71194090+haozhenshen@users.noreply.github.com> Date: Wed, 9 Aug 2023 16:17:46 -0400 Subject: [PATCH 4/5] Fixed wrong paper reference (#8970) The ReAct reference references to MRKL paper. Corrected so that it points to the actual ReAct paper #8964. --- docs/docs_skeleton/docs/modules/agents/agent_types/index.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs_skeleton/docs/modules/agents/agent_types/index.mdx b/docs/docs_skeleton/docs/modules/agents/agent_types/index.mdx index 7327159eee8..ddb38c4d9c2 100644 --- a/docs/docs_skeleton/docs/modules/agents/agent_types/index.mdx +++ b/docs/docs_skeleton/docs/modules/agents/agent_types/index.mdx @@ -12,7 +12,7 @@ Here are the agents available in LangChain. ### [Zero-shot ReAct](/docs/modules/agents/agent_types/react.html) -This agent uses the [ReAct](https://arxiv.org/pdf/2205.00445.pdf) framework to determine which tool to use +This agent uses the [ReAct](https://arxiv.org/pdf/2210.03629) framework to determine which tool to use based solely on the tool's description. Any number of tools can be provided. This agent requires that a description is provided for each tool. From 96d064e30512debb09e4e7c10b3e2a2e45061c95 Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Wed, 9 Aug 2023 13:40:49 -0700 Subject: [PATCH 5/5] bump 260 (#9002) --- libs/langchain/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml index 4cc34c99fa5..2d5aef61224 100644 --- a/libs/langchain/pyproject.toml +++ b/libs/langchain/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langchain" -version = "0.0.259" +version = "0.0.260" description = "Building applications with LLMs through composability" authors = [] license = "MIT"