From 11df1bedc3e496a129ab8e72cc79791378a875e0 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 23 Jan 2026 23:07:48 -0500 Subject: [PATCH] style(core): lint (#34862) it looks scary but i promise it is not improving documentation consistency across core. primarily update docstrings and comments for better formatting, readability, and accuracy, as well as add minor clarifications and formatting improvements to user-facing documentation. --- libs/core/Makefile | 2 + libs/core/README.md | 2 +- libs/core/langchain_core/__init__.py | 2 +- libs/core/langchain_core/_api/__init__.py | 2 +- .../langchain_core/_api/beta_decorator.py | 71 +++-- libs/core/langchain_core/_api/deprecation.py | 5 +- libs/core/langchain_core/_import_utils.py | 7 +- libs/core/langchain_core/agents.py | 19 +- libs/core/langchain_core/caches.py | 28 +- .../core/langchain_core/callbacks/__init__.py | 2 +- libs/core/langchain_core/callbacks/base.py | 41 +-- libs/core/langchain_core/callbacks/file.py | 38 +-- libs/core/langchain_core/callbacks/manager.py | 153 +++++------ libs/core/langchain_core/callbacks/stdout.py | 4 +- .../callbacks/streaming_stdout.py | 5 +- libs/core/langchain_core/callbacks/usage.py | 8 +- libs/core/langchain_core/chat_history.py | 41 +-- libs/core/langchain_core/chat_sessions.py | 1 + .../langchain_core/document_loaders/base.py | 21 +- .../document_loaders/blob_loaders.py | 9 +- .../document_loaders/langsmith.py | 28 +- .../core/langchain_core/documents/__init__.py | 1 + libs/core/langchain_core/documents/base.py | 21 +- libs/core/langchain_core/load/serializable.py | 1 + .../langchain_core/output_parsers/base.py | 53 ++-- .../langchain_core/output_parsers/json.py | 18 +- .../langchain_core/output_parsers/list.py | 2 +- .../output_parsers/openai_tools.py | 31 ++- .../langchain_core/output_parsers/pydantic.py | 9 +- .../output_parsers/transform.py | 4 +- .../core/langchain_core/output_parsers/xml.py | 39 +-- libs/core/langchain_core/outputs/__init__.py | 6 +- .../langchain_core/outputs/chat_generation.py | 23 +- .../langchain_core/outputs/chat_result.py | 18 +- .../core/langchain_core/outputs/generation.py | 16 +- .../core/langchain_core/outputs/llm_result.py | 22 +- libs/core/langchain_core/outputs/run_info.py | 12 +- libs/core/langchain_core/prompt_values.py | 6 +- libs/core/langchain_core/prompts/__init__.py | 2 +- libs/core/langchain_core/prompts/base.py | 44 ++-- libs/core/langchain_core/prompts/chat.py | 246 ++++++++++-------- libs/core/langchain_core/prompts/dict.py | 9 +- libs/core/langchain_core/prompts/few_shot.py | 82 +++--- .../prompts/few_shot_with_templates.py | 28 +- libs/core/langchain_core/prompts/image.py | 5 +- libs/core/langchain_core/prompts/loading.py | 10 +- libs/core/langchain_core/prompts/message.py | 4 +- libs/core/langchain_core/prompts/prompt.py | 83 +++--- libs/core/langchain_core/prompts/string.py | 30 ++- .../core/langchain_core/prompts/structured.py | 22 +- libs/core/langchain_core/rate_limiters.py | 6 + libs/core/langchain_core/retrievers.py | 8 +- .../core/langchain_core/runnables/__init__.py | 15 +- libs/core/langchain_core/runnables/base.py | 2 +- libs/core/langchain_core/runnables/config.py | 2 +- .../langchain_core/runnables/fallbacks.py | 4 +- libs/core/langchain_core/runnables/graph.py | 2 +- libs/core/langchain_core/runnables/history.py | 2 +- .../langchain_core/runnables/passthrough.py | 2 +- libs/core/langchain_core/runnables/retry.py | 2 +- libs/core/langchain_core/runnables/router.py | 2 +- libs/core/langchain_core/runnables/utils.py | 2 +- libs/core/langchain_core/stores.py | 46 ++-- libs/core/langchain_core/structured_query.py | 6 + libs/core/langchain_core/tools/__init__.py | 6 +- libs/core/langchain_core/tools/base.py | 54 ++-- libs/core/langchain_core/tools/convert.py | 46 ++-- libs/core/langchain_core/tools/retriever.py | 15 +- libs/core/langchain_core/tools/simple.py | 2 + libs/core/langchain_core/tools/structured.py | 12 +- libs/core/langchain_core/tracers/__init__.py | 2 +- libs/core/langchain_core/tracers/_compat.py | 20 +- .../core/langchain_core/tracers/_streaming.py | 10 +- libs/core/langchain_core/tracers/base.py | 21 +- libs/core/langchain_core/tracers/context.py | 6 +- libs/core/langchain_core/tracers/core.py | 31 ++- .../core/langchain_core/tracers/evaluation.py | 32 ++- .../langchain_core/tracers/event_stream.py | 35 ++- libs/core/langchain_core/tracers/langchain.py | 21 +- .../core/langchain_core/tracers/log_stream.py | 115 ++++---- .../langchain_core/tracers/memory_stream.py | 43 +-- .../langchain_core/tracers/root_listeners.py | 2 +- .../langchain_core/tracers/run_collector.py | 7 +- libs/core/langchain_core/tracers/stdout.py | 13 +- libs/core/langchain_core/utils/aiter.py | 67 ++--- libs/core/langchain_core/utils/env.py | 5 +- .../langchain_core/utils/function_calling.py | 92 ++++--- libs/core/langchain_core/utils/html.py | 8 +- libs/core/langchain_core/utils/input.py | 1 + .../langchain_core/utils/interactive_env.py | 2 +- libs/core/langchain_core/utils/iter.py | 58 +++-- libs/core/langchain_core/utils/json.py | 11 +- libs/core/langchain_core/utils/json_schema.py | 9 +- libs/core/langchain_core/utils/mustache.py | 53 ++-- libs/core/langchain_core/utils/pydantic.py | 11 +- libs/core/langchain_core/utils/strings.py | 8 +- libs/core/langchain_core/utils/utils.py | 30 ++- libs/core/langchain_core/vectorstores/base.py | 39 +-- .../langchain_core/vectorstores/in_memory.py | 11 +- .../core/langchain_core/vectorstores/utils.py | 16 +- libs/core/scripts/check_version.py | 12 +- .../unit_tests/runnables/test_runnable.py | 1 + 102 files changed, 1333 insertions(+), 1031 deletions(-) diff --git a/libs/core/Makefile b/libs/core/Makefile index 70425ce6d12..44b334d7998 100644 --- a/libs/core/Makefile +++ b/libs/core/Makefile @@ -13,6 +13,7 @@ test tests: env \ -u LANGCHAIN_TRACING_V2 \ -u LANGCHAIN_API_KEY \ + -u LANGSMITH_API_KEY \ -u LANGSMITH_TRACING \ -u LANGCHAIN_PROJECT \ uv run --group test pytest -n auto --disable-socket --allow-unix-socket $(TEST_FILE) @@ -21,6 +22,7 @@ test_watch: env \ -u LANGCHAIN_TRACING_V2 \ -u LANGCHAIN_API_KEY \ + -u LANGSMITH_API_KEY \ -u LANGSMITH_TRACING \ -u LANGCHAIN_PROJECT \ uv run --group test ptw --snapshot-update --now . --disable-socket --allow-unix-socket -vv -- $(TEST_FILE) diff --git a/libs/core/README.md b/libs/core/README.md index 10fb97d4301..a58084d1548 100644 --- a/libs/core/README.md +++ b/libs/core/README.md @@ -34,7 +34,7 @@ The LangChain ecosystem is built on top of `langchain-core`. Some of the benefit ## 📖 Documentation -For full documentation, see the [API reference](https://reference.langchain.com/python/langchain_core/). For conceptual guides, tutorials, and examples on using LangChain, see the [LangChain Docs](https://docs.langchain.com/oss/python/langchain/overview). +For full documentation, see the [API reference](https://reference.langchain.com/python/langchain_core/). For conceptual guides, tutorials, and examples on using LangChain, see the [LangChain Docs](https://docs.langchain.com/oss/python/langchain/overview). You can also chat with the docs using [Chat LangChain](https://chat.langchain.com). ## 📕 Releases & Versioning diff --git a/libs/core/langchain_core/__init__.py b/libs/core/langchain_core/__init__.py index 6694d36c3d7..0e16b0f744a 100644 --- a/libs/core/langchain_core/__init__.py +++ b/libs/core/langchain_core/__init__.py @@ -2,7 +2,7 @@ The interfaces for core components like chat models, LLMs, vector stores, retrievers, and more are defined here. The universal invocation protocol (Runnables) along with -a syntax for combining components (LangChain Expression Language) are also defined here. +a syntax for combining components are also defined here. **No third-party integrations are defined here.** The dependencies are kept purposefully very lightweight. diff --git a/libs/core/langchain_core/_api/__init__.py b/libs/core/langchain_core/_api/__init__.py index 6ba941f3d0f..1028cd366bb 100644 --- a/libs/core/langchain_core/_api/__init__.py +++ b/libs/core/langchain_core/_api/__init__.py @@ -5,7 +5,7 @@ This module is only relevant for LangChain developers, not for users. !!! warning This module and its submodules are for internal use only. Do not use them in your - own code. We may change the API at any time with no warning. + own code. We may change the API at any time with no warning. """ from typing import TYPE_CHECKING diff --git a/libs/core/langchain_core/_api/beta_decorator.py b/libs/core/langchain_core/_api/beta_decorator.py index a095f425fa8..94671a11224 100644 --- a/libs/core/langchain_core/_api/beta_decorator.py +++ b/libs/core/langchain_core/_api/beta_decorator.py @@ -1,8 +1,7 @@ """Helper functions for marking parts of the LangChain API as beta. -This module was loosely adapted from matplotlibs _api/deprecation.py module: - -https://github.com/matplotlib/matplotlib/blob/main/lib/matplotlib/_api/deprecation.py +This module was loosely adapted from matplotlib's [`_api/deprecation.py`](https://github.com/matplotlib/matplotlib/blob/main/lib/matplotlib/_api/deprecation.py) +module. !!! warning @@ -39,38 +38,34 @@ def beta( ) -> Callable[[T], T]: """Decorator to mark a function, a class, or a property as beta. - When marking a classmethod, a staticmethod, or a property, the - `@beta` decorator should go *under* `@classmethod` and - `@staticmethod` (i.e., `beta` should directly decorate the - underlying callable), but *over* `@property`. + When marking a classmethod, a staticmethod, or a property, the `@beta` decorator + should go *under* `@classmethod` and `@staticmethod` (i.e., `beta` should directly + decorate the underlying callable), but *over* `@property`. - When marking a class `C` intended to be used as a base class in a - multiple inheritance hierarchy, `C` *must* define an `__init__` method - (if `C` instead inherited its `__init__` from its own base class, then - `@beta` would mess up `__init__` inheritance when installing its - own (annotation-emitting) `C.__init__`). + When marking a class `C` intended to be used as a base class in a multiple + inheritance hierarchy, `C` *must* define an `__init__` method (if `C` instead + inherited its `__init__` from its own base class, then `@beta` would mess up + `__init__` inheritance when installing its own (annotation-emitting) `C.__init__`). Args: - message: - Override the default beta message. The %(since)s, - %(name)s, %(alternative)s, %(obj_type)s, %(addendum)s, - and %(removal)s format specifiers will be replaced by the - values of the respective arguments passed to this function. - name: - The name of the beta object. - obj_type: - The object type being beta. - addendum: - Additional text appended directly to the final message. + message: Override the default beta message. + + The %(since)s, %(name)s, %(alternative)s, %(obj_type)s, %(addendum)s, and + %(removal)s format specifiers will be replaced by the values of the + respective arguments passed to this function. + name: The name of the beta object. + obj_type: The object type being beta. + addendum: Additional text appended directly to the final message. Returns: A decorator which can be used to mark functions or classes as beta. - ```python - @beta - def the_function_to_annotate(): - pass - ``` + Example: + ```python + @beta + def the_function_to_annotate(): + pass + ``` """ def beta( @@ -209,7 +204,7 @@ def beta( @contextlib.contextmanager def suppress_langchain_beta_warning() -> Generator[None, None, None]: - """Context manager to suppress LangChainDeprecationWarning.""" + """Context manager to suppress `LangChainDeprecationWarning`.""" with warnings.catch_warnings(): warnings.simplefilter("ignore", LangChainBetaWarning) yield @@ -225,17 +220,13 @@ def warn_beta( """Display a standardized beta annotation. Args: - message: - Override the default beta message. The - %(name)s, %(obj_type)s, %(addendum)s - format specifiers will be replaced by the - values of the respective arguments passed to this function. - name: - The name of the annotated object. - obj_type: - The object type being annotated. - addendum: - Additional text appended directly to the final message. + message: Override the default beta message. + + The %(name)s, %(obj_type)s, %(addendum)s format specifiers will be replaced + by the values of the respective arguments passed to this function. + name: The name of the annotated object. + obj_type: The object type being annotated. + addendum: Additional text appended directly to the final message. """ if not message: message = "" diff --git a/libs/core/langchain_core/_api/deprecation.py b/libs/core/langchain_core/_api/deprecation.py index 06fa1a2a547..f757aa435fd 100644 --- a/libs/core/langchain_core/_api/deprecation.py +++ b/libs/core/langchain_core/_api/deprecation.py @@ -1,8 +1,7 @@ """Helper functions for deprecating parts of the LangChain API. -This module was adapted from matplotlibs _api/deprecation.py module: - -https://github.com/matplotlib/matplotlib/blob/main/lib/matplotlib/_api/deprecation.py +This module was adapted from matplotlib's [`_api/deprecation.py`](https://github.com/matplotlib/matplotlib/blob/main/lib/matplotlib/_api/deprecation.py) +module. !!! warning diff --git a/libs/core/langchain_core/_import_utils.py b/libs/core/langchain_core/_import_utils.py index 6225a655e1c..60c6ca0ede9 100644 --- a/libs/core/langchain_core/_import_utils.py +++ b/libs/core/langchain_core/_import_utils.py @@ -8,13 +8,14 @@ def import_attr( ) -> object: """Import an attribute from a module located in a package. - This utility function is used in custom __getattr__ methods within __init__.py + This utility function is used in custom `__getattr__` methods within `__init__.py` files to dynamically import attributes. Args: attr_name: The name of the attribute to import. - module_name: The name of the module to import from. If `None`, the attribute - is imported from the package itself. + module_name: The name of the module to import from. + + If `None`, the attribute is imported from the package itself. package: The name of the package where the module is located. Raises: diff --git a/libs/core/langchain_core/agents.py b/libs/core/langchain_core/agents.py index 4a020a99c95..76f818b06a6 100644 --- a/libs/core/langchain_core/agents.py +++ b/libs/core/langchain_core/agents.py @@ -1,9 +1,11 @@ """Schema definitions for representing agent actions, observations, and return values. !!! warning + The schema definitions are provided for backwards compatibility. !!! warning + New agents should be built using the [`langchain` library](https://pypi.org/project/langchain/), which provides a simpler and more flexible way to define agents. @@ -21,7 +23,7 @@ A basic agent works in the following manner: the next action. 4. When the agent reaches a stopping condition, it returns a final return value. -The schemas for the agents themselves are defined in langchain.agents.agent. +The schemas for the agents themselves are defined in `langchain.agents.agent`. """ from __future__ import annotations @@ -47,9 +49,11 @@ class AgentAction(Serializable): """ tool: str - """The name of the Tool to execute.""" + """The name of the `Tool` to execute.""" + tool_input: str | dict - """The input to pass in to the Tool.""" + """The input to pass in to the `Tool`.""" + log: str """Additional information to log about the action. @@ -60,6 +64,7 @@ class AgentAction(Serializable): useful when `(tool, tool_input)` does not contain full information about the LLM prediction (for example, any `thought` before the tool/tool_input). """ + type: Literal["AgentAction"] = "AgentAction" # Override init to support instantiation by position for backward compat. @@ -114,8 +119,8 @@ class AgentActionMessageLog(AgentAction): This is again useful if `(tool, tool_input)` cannot be used to fully recreate the LLM prediction, and you need that LLM prediction (for future agent iteration). - Compared to `log`, this is useful when the underlying LLM is a - chat model (and therefore returns messages rather than a string). + Compared to `log`, this is useful when the underlying LLM is a chat model (and + therefore returns messages rather than a string). """ # Ignoring type because we're overriding the type from AgentAction. # And this is the correct thing to do in this case. @@ -128,6 +133,7 @@ class AgentStep(Serializable): action: AgentAction """The `AgentAction` that was executed.""" + observation: Any """The result of the `AgentAction`.""" @@ -145,6 +151,7 @@ class AgentFinish(Serializable): return_values: dict """Dictionary of return values.""" + log: str """Additional information to log about the return value. @@ -192,7 +199,7 @@ def _convert_agent_action_to_messages( agent_action: Agent action to convert. Returns: - AIMessage that corresponds to the original tool invocation. + `AIMessage` that corresponds to the original tool invocation. """ if isinstance(agent_action, AgentActionMessageLog): return agent_action.message_log diff --git a/libs/core/langchain_core/caches.py b/libs/core/langchain_core/caches.py index 5c7b4bc0cec..2f639410983 100644 --- a/libs/core/langchain_core/caches.py +++ b/libs/core/langchain_core/caches.py @@ -54,6 +54,7 @@ class BaseCache(ABC): Args: prompt: A string representation of the prompt. + In the case of a chat model, the prompt is a non-trivial serialization of the prompt into the language model. llm_string: A string representation of the LLM configuration. @@ -65,18 +66,19 @@ class BaseCache(ABC): Returns: On a cache miss, return `None`. On a cache hit, return the cached value. - The cached value is a list of `Generation` (or subclasses). + The cached value is a list of `Generation` (or subclasses). """ @abstractmethod def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None: """Update cache based on `prompt` and `llm_string`. - The prompt and llm_string are used to generate a key for the cache. - The key should match that of the lookup method. + The `prompt` and `llm_string` are used to generate a key for the cache. The key + should match that of the lookup method. Args: prompt: A string representation of the prompt. + In the case of a chat model, the prompt is a non-trivial serialization of the prompt into the language model. llm_string: A string representation of the LLM configuration. @@ -86,8 +88,9 @@ class BaseCache(ABC): These invocation parameters are serialized into a string representation. - return_val: The value to be cached. The value is a list of `Generation` - (or subclasses). + return_val: The value to be cached. + + The value is a list of `Generation` (or subclasses). """ @abstractmethod @@ -102,6 +105,7 @@ class BaseCache(ABC): Args: prompt: A string representation of the prompt. + In the case of a chat model, the prompt is a non-trivial serialization of the prompt into the language model. llm_string: A string representation of the LLM configuration. @@ -114,7 +118,7 @@ class BaseCache(ABC): Returns: On a cache miss, return `None`. On a cache hit, return the cached value. - The cached value is a list of `Generation` (or subclasses). + The cached value is a list of `Generation` (or subclasses). """ return await run_in_executor(None, self.lookup, prompt, llm_string) @@ -128,6 +132,7 @@ class BaseCache(ABC): Args: prompt: A string representation of the prompt. + In the case of a chat model, the prompt is a non-trivial serialization of the prompt into the language model. llm_string: A string representation of the LLM configuration. @@ -155,7 +160,9 @@ class InMemoryCache(BaseCache): Args: maxsize: The maximum number of items to store in the cache. + If `None`, the cache has no maximum size. + If the cache exceeds the maximum size, the oldest items are removed. Raises: @@ -172,6 +179,7 @@ class InMemoryCache(BaseCache): Args: prompt: A string representation of the prompt. + In the case of a chat model, the prompt is a non-trivial serialization of the prompt into the language model. llm_string: A string representation of the LLM configuration. @@ -186,11 +194,13 @@ class InMemoryCache(BaseCache): Args: prompt: A string representation of the prompt. + In the case of a chat model, the prompt is a non-trivial serialization of the prompt into the language model. llm_string: A string representation of the LLM configuration. - return_val: The value to be cached. The value is a list of `Generation` - (or subclasses). + return_val: The value to be cached. + + The value is a list of `Generation` (or subclasses). """ if self._maxsize is not None and len(self._cache) == self._maxsize: del self._cache[next(iter(self._cache))] @@ -206,6 +216,7 @@ class InMemoryCache(BaseCache): Args: prompt: A string representation of the prompt. + In the case of a chat model, the prompt is a non-trivial serialization of the prompt into the language model. llm_string: A string representation of the LLM configuration. @@ -222,6 +233,7 @@ class InMemoryCache(BaseCache): Args: prompt: A string representation of the prompt. + In the case of a chat model, the prompt is a non-trivial serialization of the prompt into the language model. llm_string: A string representation of the LLM configuration. diff --git a/libs/core/langchain_core/callbacks/__init__.py b/libs/core/langchain_core/callbacks/__init__.py index ef42894bacf..e1f1775248f 100644 --- a/libs/core/langchain_core/callbacks/__init__.py +++ b/libs/core/langchain_core/callbacks/__init__.py @@ -1,4 +1,4 @@ -"""**Callback handlers** allow listening to events in LangChain.""" +"""Callback handlers allow listening to events in LangChain.""" from typing import TYPE_CHECKING diff --git a/libs/core/langchain_core/callbacks/base.py b/libs/core/langchain_core/callbacks/base.py index 062cd7f90bc..220df5c77b7 100644 --- a/libs/core/langchain_core/callbacks/base.py +++ b/libs/core/langchain_core/callbacks/base.py @@ -71,9 +71,11 @@ class LLMManagerMixin: tags: list[str] | None = None, **kwargs: Any, ) -> Any: - """Run on new output token. Only available when streaming is enabled. + """Run on new output token. - For both chat models and non-chat models (legacy LLMs). + Only available when streaming is enabled. + + For both chat models and non-chat models (legacy text completion LLMs). Args: token: The new token. @@ -250,8 +252,9 @@ class CallbackManagerMixin: """Run when LLM starts running. !!! warning - This method is called for non-chat models (regular LLMs). If you're - implementing a handler for a chat model, you should use + + This method is called for non-chat models (regular text completion LLMs). If + you're implementing a handler for a chat model, you should use `on_chat_model_start` instead. Args: @@ -278,6 +281,7 @@ class CallbackManagerMixin: """Run when a chat model starts running. !!! warning + This method is called for chat models. If you're implementing a handler for a non-chat model, you should use `on_llm_start` instead. @@ -418,8 +422,9 @@ class RunManagerMixin: Args: name: The name of the custom event. - data: The data for the custom event. Format will match the format specified - by the user. + data: The data for the custom event. + + Format will match the format specified by the user. run_id: The ID of the run. tags: The tags associated with the custom event (includes inherited tags). metadata: The metadata associated with the custom event (includes inherited @@ -496,8 +501,9 @@ class AsyncCallbackHandler(BaseCallbackHandler): """Run when the model starts running. !!! warning - This method is called for non-chat models (regular LLMs). If you're - implementing a handler for a chat model, you should use + + This method is called for non-chat models (regular text completion LLMs). If + you're implementing a handler for a chat model, you should use `on_chat_model_start` instead. Args: @@ -524,6 +530,7 @@ class AsyncCallbackHandler(BaseCallbackHandler): """Run when a chat model starts running. !!! warning + This method is called for chat models. If you're implementing a handler for a non-chat model, you should use `on_llm_start` instead. @@ -553,7 +560,7 @@ class AsyncCallbackHandler(BaseCallbackHandler): ) -> None: """Run on new output token. Only available when streaming is enabled. - For both chat models and non-chat models (legacy LLMs). + For both chat models and non-chat models (legacy text completion LLMs). Args: token: The new token. @@ -878,13 +885,13 @@ class AsyncCallbackHandler(BaseCallbackHandler): Args: name: The name of the custom event. - data: The data for the custom event. Format will match - the format specified by the user. + data: The data for the custom event. + + Format will match the format specified by the user. run_id: The ID of the run. - tags: The tags associated with the custom event - (includes inherited tags). - metadata: The metadata associated with the custom event - (includes inherited metadata). + tags: The tags associated with the custom event (includes inherited tags). + metadata: The metadata associated with the custom event (includes inherited + metadata). """ @@ -945,9 +952,9 @@ class BaseCallbackManager(CallbackManagerMixin): Returns: The merged callback manager of the same type as the current object. - Example: Merging two callback managers. - + Example: ```python + # Merging two callback managers` from langchain_core.callbacks.manager import ( CallbackManager, trace_as_chain_group, diff --git a/libs/core/langchain_core/callbacks/file.py b/libs/core/langchain_core/callbacks/file.py index 7f0c82531c1..3ba0e863c73 100644 --- a/libs/core/langchain_core/callbacks/file.py +++ b/libs/core/langchain_core/callbacks/file.py @@ -1,4 +1,4 @@ -"""Callback Handler that writes to a file.""" +"""Callback handler that writes to a file.""" from __future__ import annotations @@ -19,7 +19,7 @@ _GLOBAL_DEPRECATION_WARNED = False class FileCallbackHandler(BaseCallbackHandler): - """Callback Handler that writes to a file. + """Callback handler that writes to a file. This handler supports both context manager usage (recommended) and direct instantiation (deprecated) for backwards compatibility. @@ -50,9 +50,10 @@ class FileCallbackHandler(BaseCallbackHandler): color: Default color for text output. !!! note - When not used as a context manager, a deprecation warning will be issued - on first use. The file will be opened immediately in `__init__` and closed - in `__del__` or when `close()` is called explicitly. + + When not used as a context manager, a deprecation warning will be issued on + first use. The file will be opened immediately in `__init__` and closed in + `__del__` or when `close()` is called explicitly. """ @@ -81,11 +82,12 @@ class FileCallbackHandler(BaseCallbackHandler): """Enter the context manager. Returns: - The FileCallbackHandler instance. + The `FileCallbackHandler` instance. !!! note - The file is already opened in `__init__`, so this just marks that - the handler is being used as a context manager. + + The file is already opened in `__init__`, so this just marks that the + handler is being used as a context manager. """ self._file_opened_in_context = True @@ -196,8 +198,9 @@ class FileCallbackHandler(BaseCallbackHandler): Args: action: The agent action containing the log to write. - color: Color override for this specific output. If `None`, uses - `self.color`. + color: Color override for this specific output. + + If `None`, uses `self.color`. **kwargs: Additional keyword arguments. """ @@ -216,8 +219,9 @@ class FileCallbackHandler(BaseCallbackHandler): Args: output: The tool output to write. - color: Color override for this specific output. If `None`, uses - `self.color`. + color: Color override for this specific output. + + If `None`, uses `self.color`. observation_prefix: Optional prefix to write before the output. llm_prefix: Optional prefix to write after the output. **kwargs: Additional keyword arguments. @@ -237,8 +241,9 @@ class FileCallbackHandler(BaseCallbackHandler): Args: text: The text to write. - color: Color override for this specific output. If `None`, uses - `self.color`. + color: Color override for this specific output. + + If `None`, uses `self.color`. end: String appended after the text. **kwargs: Additional keyword arguments. @@ -253,8 +258,9 @@ class FileCallbackHandler(BaseCallbackHandler): Args: finish: The agent finish object containing the log to write. - color: Color override for this specific output. If `None`, uses - `self.color`. + color: Color override for this specific output. + + If `None`, uses `self.color`. **kwargs: Additional keyword arguments. """ diff --git a/libs/core/langchain_core/callbacks/manager.py b/libs/core/langchain_core/callbacks/manager.py index cb8fb9970d1..5a3433ef7e6 100644 --- a/libs/core/langchain_core/callbacks/manager.py +++ b/libs/core/langchain_core/callbacks/manager.py @@ -74,8 +74,8 @@ def trace_as_chain_group( ) -> Generator[CallbackManagerForChainGroup, None, None]: """Get a callback manager for a chain group in a context manager. - Useful for grouping different calls together as a single run even if - they aren't composed in a single chain. + Useful for grouping different calls together as a single run even if they aren't + composed in a single chain. Args: group_name: The name of the chain group. @@ -88,6 +88,7 @@ def trace_as_chain_group( metadata: The metadata to apply to all runs. !!! note + Must have `LANGCHAIN_TRACING_V2` env var set to true to see the trace in LangSmith. @@ -149,13 +150,13 @@ async def atrace_as_chain_group( ) -> AsyncGenerator[AsyncCallbackManagerForChainGroup, None]: """Get an async callback manager for a chain group in a context manager. - Useful for grouping different async calls together as a single run even if - they aren't composed in a single chain. + Useful for grouping different async calls together as a single run even if they + aren't composed in a single chain. Args: group_name: The name of the chain group. - callback_manager: The async callback manager to use, - which manages tracing and other callback behavior. + callback_manager: The async callback manager to use, which manages tracing and + other callback behavior. inputs: The inputs to the chain group. project_name: The name of the project. example_id: The ID of the example. @@ -167,6 +168,7 @@ async def atrace_as_chain_group( The async callback manager for the chain group. !!! note + Must have `LANGCHAIN_TRACING_V2` env var set to true to see the trace in LangSmith. @@ -258,16 +260,13 @@ def handle_event( *args: Any, **kwargs: Any, ) -> None: - """Generic event handler for CallbackManager. - - !!! note - This function is used by `LangServe` to handle events. + """Generic event handler for `CallbackManager`. Args: handlers: The list of handlers that will handle the event. event_name: The name of the event (e.g., `'on_llm_start'`). - ignore_condition_name: Name of the attribute defined on handler - that if True will cause the handler to be skipped for the given event. + ignore_condition_name: Name of the attribute defined on handler that if `True` + will cause the handler to be skipped for the given event. *args: The arguments to pass to the event handler. **kwargs: The keyword arguments to pass to the event handler @@ -427,14 +426,11 @@ async def ahandle_event( ) -> None: """Async generic event handler for `AsyncCallbackManager`. - !!! note - This function is used by `LangServe` to handle events. - Args: handlers: The list of handlers that will handle the event. event_name: The name of the event (e.g., `'on_llm_start'`). - ignore_condition_name: Name of the attribute defined on handler - that if True will cause the handler to be skipped for the given event. + ignore_condition_name: Name of the attribute defined on handler that if `True` + will cause the handler to be skipped for the given event. *args: The arguments to pass to the event handler. **kwargs: The keyword arguments to pass to the event handler. @@ -515,7 +511,7 @@ class BaseRunManager(RunManagerMixin): class RunManager(BaseRunManager): - """Sync Run Manager.""" + """Synchronous run manager.""" def on_text( self, @@ -568,7 +564,7 @@ class RunManager(BaseRunManager): class ParentRunManager(RunManager): - """Sync Parent Run Manager.""" + """Synchronous parent run manager.""" def get_child(self, tag: str | None = None) -> CallbackManager: """Get a child callback manager. @@ -590,14 +586,14 @@ class ParentRunManager(RunManager): class AsyncRunManager(BaseRunManager, ABC): - """Async Run Manager.""" + """Async run manager.""" @abstractmethod def get_sync(self) -> RunManager: - """Get the equivalent sync RunManager. + """Get the equivalent sync `RunManager`. Returns: - The sync RunManager. + The sync `RunManager`. """ @@ -652,7 +648,7 @@ class AsyncRunManager(BaseRunManager, ABC): class AsyncParentRunManager(AsyncRunManager): - """Async Parent Run Manager.""" + """Async parent run manager.""" def get_child(self, tag: str | None = None) -> AsyncCallbackManager: """Get a child callback manager. @@ -736,6 +732,7 @@ class CallbackManagerForLLMRun(RunManager, LLMManagerMixin): Args: error: The error. **kwargs: Additional keyword arguments. + - response (LLMResult): The response which was generated before the error occurred. """ @@ -757,10 +754,10 @@ class AsyncCallbackManagerForLLMRun(AsyncRunManager, LLMManagerMixin): """Async callback manager for LLM run.""" def get_sync(self) -> CallbackManagerForLLMRun: - """Get the equivalent sync RunManager. + """Get the equivalent sync `RunManager`. Returns: - The sync RunManager. + The sync `RunManager`. """ return CallbackManagerForLLMRun( @@ -836,11 +833,10 @@ class AsyncCallbackManagerForLLMRun(AsyncRunManager, LLMManagerMixin): Args: error: The error. **kwargs: Additional keyword arguments. + - response (LLMResult): The response which was generated before the error occurred. - - """ if not self.handlers: return @@ -950,10 +946,10 @@ class AsyncCallbackManagerForChainRun(AsyncParentRunManager, ChainManagerMixin): """Async callback manager for chain run.""" def get_sync(self) -> CallbackManagerForChainRun: - """Get the equivalent sync RunManager. + """Get the equivalent sync `RunManager`. Returns: - The sync RunManager. + The sync `RunManager`. """ return CallbackManagerForChainRun( run_id=self.run_id, @@ -1113,10 +1109,10 @@ class AsyncCallbackManagerForToolRun(AsyncParentRunManager, ToolManagerMixin): """Async callback manager for tool run.""" def get_sync(self) -> CallbackManagerForToolRun: - """Get the equivalent sync RunManager. + """Get the equivalent sync `RunManager`. Returns: - The sync RunManager. + The sync `RunManager`. """ return CallbackManagerForToolRun( run_id=self.run_id, @@ -1237,10 +1233,10 @@ class AsyncCallbackManagerForRetrieverRun( """Async callback manager for retriever run.""" def get_sync(self) -> CallbackManagerForRetrieverRun: - """Get the equivalent sync RunManager. + """Get the equivalent sync `RunManager`. Returns: - The sync RunManager. + The sync `RunManager`. """ return CallbackManagerForRetrieverRun( @@ -1473,14 +1469,17 @@ class CallbackManager(BaseCallbackManager): Args: serialized: Serialized representation of the tool. input_str: The input to the tool as a string. + Non-string inputs are cast to strings. run_id: ID for the run. parent_run_id: The ID of the parent run. inputs: The original input to the tool if provided. - Recommended for usage instead of input_str when the original - input is needed. - If provided, the inputs are expected to be formatted as a dict. - The keys will correspond to the named-arguments in the tool. + + Recommended for usage instead of input_str when the original input is + needed. + + If provided, the inputs are expected to be formatted as a dict. The keys + will correspond to the named-arguments in the tool. **kwargs: The keyword arguments to pass to the event handler Returns: @@ -1572,9 +1571,9 @@ class CallbackManager(BaseCallbackManager): ) -> None: """Dispatch an adhoc event to the handlers (async version). - This event should NOT be used in any internal LangChain code. The event - is meant specifically for users of the library to dispatch custom - events that are tailored to their application. + This event should NOT be used in any internal LangChain code. The event is meant + specifically for users of the library to dispatch custom events that are + tailored to their application. Args: name: The name of the adhoc event. @@ -1693,17 +1692,16 @@ class CallbackManagerForChainGroup(CallbackManager): ) -> CallbackManagerForChainGroup: """Merge the group callback manager with another callback manager. - Overwrites the merge method in the base class to ensure that the - parent run manager is preserved. Keeps the parent_run_manager - from the current object. + Overwrites the merge method in the base class to ensure that the parent run + manager is preserved. Keeps the `parent_run_manager` from the current object. Returns: A copy of the current object with the handlers, tags, and other attributes merged from the other object. - Example: Merging two callback managers. - + Example: ```python + # Merging two callback managers from langchain_core.callbacks.manager import ( CallbackManager, trace_as_chain_group, @@ -1800,7 +1798,7 @@ class AsyncCallbackManager(BaseCallbackManager): **kwargs: Additional keyword arguments. Returns: - The list of async callback managers, one for each LLM Run corresponding to + The list of async callback managers, one for each LLM run corresponding to each prompt. """ inline_tasks = [] @@ -1888,8 +1886,8 @@ class AsyncCallbackManager(BaseCallbackManager): **kwargs: Additional keyword arguments. Returns: - The list of async callback managers, one for each LLM Run corresponding to - each inner message list. + The list of async callback managers, one for each LLM run corresponding to + each inner message list. """ inline_tasks = [] non_inline_tasks = [] @@ -2045,9 +2043,9 @@ class AsyncCallbackManager(BaseCallbackManager): ) -> None: """Dispatch an adhoc event to the handlers (async version). - This event should NOT be used in any internal LangChain code. The event - is meant specifically for users of the library to dispatch custom - events that are tailored to their application. + This event should NOT be used in any internal LangChain code. The event is meant + specifically for users of the library to dispatch custom events that are + tailored to their application. Args: name: The name of the adhoc event. @@ -2213,17 +2211,16 @@ class AsyncCallbackManagerForChainGroup(AsyncCallbackManager): ) -> AsyncCallbackManagerForChainGroup: """Merge the group callback manager with another callback manager. - Overwrites the merge method in the base class to ensure that the - parent run manager is preserved. Keeps the parent_run_manager - from the current object. + Overwrites the merge method in the base class to ensure that the parent run + manager is preserved. Keeps the `parent_run_manager` from the current object. Returns: - A copy of the current AsyncCallbackManagerForChainGroup - with the handlers, tags, etc. of the other callback manager merged in. - - Example: Merging two callback managers. + A copy of the current `AsyncCallbackManagerForChainGroup` with the handlers, + tags, etc. of the other callback manager merged in. + Example: ```python + # Merging two callback managers from langchain_core.callbacks.manager import ( CallbackManager, atrace_as_chain_group, @@ -2482,14 +2479,17 @@ async def adispatch_custom_event( Args: name: The name of the adhoc event. - data: The data for the adhoc event. Free form data. Ideally should be - JSON serializable to avoid serialization issues downstream, but - this is not enforced. - config: Optional config object. Mirrors the async API but not strictly needed. + data: The data for the adhoc event. + + Free form data. Ideally should be JSON serializable to avoid serialization + issues downstream, but this is not enforced. + config: Optional config object. + + Mirrors the async API but not strictly needed. Raises: - RuntimeError: If there is no parent run ID available to associate - the event with. + RuntimeError: If there is no parent run ID available to associate the event + with. Example: ```python @@ -2562,11 +2562,11 @@ async def adispatch_custom_event( ``` !!! warning - If using python <= 3.10 and async, you MUST - specify the `config` parameter or the function will raise an error. - This is due to a limitation in asyncio for python <= 3.10 that prevents - LangChain from automatically propagating the config object on the user's - behalf. + + If using python 3.10 and async, you MUST specify the `config` parameter or the + function will raise an error. This is due to a limitation in asyncio for python + 3.10 that prevents LangChain from automatically propagating the config object on + the user's behalf. """ # Import locally to prevent circular imports. from langchain_core.runnables.config import ( # noqa: PLC0415 @@ -2604,14 +2604,17 @@ def dispatch_custom_event( Args: name: The name of the adhoc event. - data: The data for the adhoc event. Free form data. Ideally should be - JSON serializable to avoid serialization issues downstream, but - this is not enforced. - config: Optional config object. Mirrors the async API but not strictly needed. + data: The data for the adhoc event. + + Free form data. Ideally should be JSON serializable to avoid serialization + issues downstream, but this is not enforced. + config: Optional config object. + + Mirrors the async API but not strictly needed. Raises: - RuntimeError: If there is no parent run ID available to associate - the event with. + RuntimeError: If there is no parent run ID available to associate the event + with. Example: ```python diff --git a/libs/core/langchain_core/callbacks/stdout.py b/libs/core/langchain_core/callbacks/stdout.py index 95259cfb38a..8cfce2ce7c4 100644 --- a/libs/core/langchain_core/callbacks/stdout.py +++ b/libs/core/langchain_core/callbacks/stdout.py @@ -1,4 +1,4 @@ -"""Callback Handler that prints to std out.""" +"""Callback handler that prints to std out.""" from __future__ import annotations @@ -14,7 +14,7 @@ if TYPE_CHECKING: class StdOutCallbackHandler(BaseCallbackHandler): - """Callback Handler that prints to std out.""" + """Callback handler that prints to std out.""" def __init__(self, color: str | None = None) -> None: """Initialize callback handler. diff --git a/libs/core/langchain_core/callbacks/streaming_stdout.py b/libs/core/langchain_core/callbacks/streaming_stdout.py index 0860123b2e4..920fef80bde 100644 --- a/libs/core/langchain_core/callbacks/streaming_stdout.py +++ b/libs/core/langchain_core/callbacks/streaming_stdout.py @@ -16,7 +16,10 @@ if TYPE_CHECKING: class StreamingStdOutCallbackHandler(BaseCallbackHandler): - """Callback handler for streaming. Only works with LLMs that support streaming.""" + """Callback handler for streaming. + + !!! warning "Only works with LLMs that support streaming." + """ def on_llm_start( self, serialized: dict[str, Any], prompts: list[str], **kwargs: Any diff --git a/libs/core/langchain_core/callbacks/usage.py b/libs/core/langchain_core/callbacks/usage.py index 5590a36fc7c..a83265d549e 100644 --- a/libs/core/langchain_core/callbacks/usage.py +++ b/libs/core/langchain_core/callbacks/usage.py @@ -1,4 +1,4 @@ -"""Callback Handler that tracks AIMessage.usage_metadata.""" +"""Callback Handler that tracks `AIMessage.usage_metadata`.""" import threading from collections.abc import Generator @@ -16,7 +16,7 @@ from langchain_core.tracers.context import register_configure_hook class UsageMetadataCallbackHandler(BaseCallbackHandler): - """Callback Handler that tracks AIMessage.usage_metadata. + """Callback Handler that tracks `AIMessage.usage_metadata`. Example: ```python @@ -31,6 +31,7 @@ class UsageMetadataCallbackHandler(BaseCallbackHandler): result_2 = llm_2.invoke("Hello", config={"callbacks": [callback]}) callback.usage_metadata ``` + ```txt {'gpt-4o-mini-2024-07-18': {'input_tokens': 8, 'output_tokens': 10, @@ -48,7 +49,7 @@ class UsageMetadataCallbackHandler(BaseCallbackHandler): """ def __init__(self) -> None: - """Initialize the UsageMetadataCallbackHandler.""" + """Initialize the `UsageMetadataCallbackHandler`.""" super().__init__() self._lock = threading.Lock() self.usage_metadata: dict[str, UsageMetadata] = {} @@ -116,6 +117,7 @@ def get_usage_metadata_callback( llm_2.invoke("Hello") print(cb.usage_metadata) ``` + ```txt { "gpt-4o-mini-2024-07-18": { diff --git a/libs/core/langchain_core/chat_history.py b/libs/core/langchain_core/chat_history.py index 7c315d44d46..a4a76f534a3 100644 --- a/libs/core/langchain_core/chat_history.py +++ b/libs/core/langchain_core/chat_history.py @@ -1,4 +1,4 @@ -"""**Chat message history** stores a history of the message interactions in a chat.""" +"""Chat message history stores a history of the message interactions in a chat.""" from __future__ import annotations @@ -26,19 +26,19 @@ class BaseChatMessageHistory(ABC): Implementations are expected to over-ride all or some of the following methods: - * add_messages: sync variant for bulk addition of messages - * aadd_messages: async variant for bulk addition of messages - * messages: sync variant for getting messages - * aget_messages: async variant for getting messages - * clear: sync variant for clearing messages - * aclear: async variant for clearing messages + * `add_messages`: sync variant for bulk addition of messages + * `aadd_messages`: async variant for bulk addition of messages + * `messages`: sync variant for getting messages + * `aget_messages`: async variant for getting messages + * `clear`: sync variant for clearing messages + * `aclear`: async variant for clearing messages - add_messages contains a default implementation that calls add_message + `add_messages` contains a default implementation that calls `add_message` for each message in the sequence. This is provided for backwards compatibility - with existing implementations which only had add_message. + with existing implementations which only had `add_message`. Async variants all have default implementations that call the sync variants. - Implementers can choose to over-ride the async implementations to provide + Implementers can choose to override the async implementations to provide truly async implementations. Usage guidelines: @@ -47,8 +47,7 @@ class BaseChatMessageHistory(ABC): over `add_message` or other variants like `add_user_message` and `add_ai_message` to avoid unnecessary round-trips to the underlying persistence layer. - Example: Shows a default implementation. - + Example: ```python import json import os @@ -93,9 +92,8 @@ class BaseChatMessageHistory(ABC): messages: list[BaseMessage] """A property or attribute that returns a list of messages. - In general, getting the messages may involve IO to the underlying - persistence layer, so this operation is expected to incur some - latency. + In general, getting the messages may involve IO to the underlying persistence + layer, so this operation is expected to incur some latency. """ async def aget_messages(self) -> list[BaseMessage]: @@ -103,8 +101,8 @@ class BaseChatMessageHistory(ABC): Can over-ride this method to provide an efficient async implementation. - In general, fetching messages may involve IO to the underlying - persistence layer. + In general, fetching messages may involve IO to the underlying persistence + layer. Returns: The messages. @@ -115,6 +113,7 @@ class BaseChatMessageHistory(ABC): """Convenience method for adding a human message string to the store. !!! note + This is a convenience method. Code should favor the bulk `add_messages` interface instead to save on round-trips to the persistence layer. @@ -132,6 +131,7 @@ class BaseChatMessageHistory(ABC): """Convenience method for adding an `AIMessage` string to the store. !!! note + This is a convenience method. Code should favor the bulk `add_messages` interface instead to save on round-trips to the persistence layer. @@ -149,7 +149,7 @@ class BaseChatMessageHistory(ABC): """Add a Message object to the store. Args: - message: A BaseMessage object to store. + message: A `BaseMessage` object to store. Raises: NotImplementedError: If the sub-class has not implemented an efficient @@ -212,8 +212,9 @@ class InMemoryChatMessageHistory(BaseChatMessageHistory, BaseModel): """Async version of getting messages. Can over-ride this method to provide an efficient async implementation. - In general, fetching messages may involve IO to the underlying - persistence layer. + + In general, fetching messages may involve IO to the underlying persistence + layer. Returns: List of messages. diff --git a/libs/core/langchain_core/chat_sessions.py b/libs/core/langchain_core/chat_sessions.py index 23c60f3fe6c..ed8c6343c50 100644 --- a/libs/core/langchain_core/chat_sessions.py +++ b/libs/core/langchain_core/chat_sessions.py @@ -14,5 +14,6 @@ class ChatSession(TypedDict, total=False): messages: Sequence[BaseMessage] """A sequence of the LangChain chat messages loaded from the source.""" + functions: Sequence[dict] """A sequence of the function calling specs for the messages.""" diff --git a/libs/core/langchain_core/document_loaders/base.py b/libs/core/langchain_core/document_loaders/base.py index e74bc5976c0..448c85988dc 100644 --- a/libs/core/langchain_core/document_loaders/base.py +++ b/libs/core/langchain_core/document_loaders/base.py @@ -24,10 +24,10 @@ except ImportError: class BaseLoader(ABC): # noqa: B024 - """Interface for Document Loader. + """Interface for document loader. - Implementations should implement the lazy-loading method using generators - to avoid loading all documents into memory at once. + Implementations should implement the lazy-loading method using generators to avoid + loading all documents into memory at once. `load` is provided just for user convenience and should not be overridden. """ @@ -61,14 +61,15 @@ class BaseLoader(ABC): # noqa: B024 Args: text_splitter: `TextSplitter` instance to use for splitting documents. + Defaults to `RecursiveCharacterTextSplitter`. Raises: - ImportError: If `langchain-text-splitters` is not installed - and no `text_splitter` is provided. + ImportError: If `langchain-text-splitters` is not installed and no + `text_splitter` is provided. Returns: - List of `Document`. + List of `Document` objects. """ if text_splitter is None: if not _HAS_TEXT_SPLITTERS: @@ -116,11 +117,11 @@ class BaseLoader(ABC): # noqa: B024 class BaseBlobParser(ABC): """Abstract interface for blob parsers. - A blob parser provides a way to parse raw data stored in a blob into one - or more `Document` objects. + A blob parser provides a way to parse raw data stored in a blob into one or more + `Document` objects. - The parser can be composed with blob loaders, making it easy to reuse - a parser independent of how the blob was originally loaded. + The parser can be composed with blob loaders, making it easy to reuse a parser + independent of how the blob was originally loaded. """ @abstractmethod diff --git a/libs/core/langchain_core/document_loaders/blob_loaders.py b/libs/core/langchain_core/document_loaders/blob_loaders.py index 8c6832177fd..c8e807a47fc 100644 --- a/libs/core/langchain_core/document_loaders/blob_loaders.py +++ b/libs/core/langchain_core/document_loaders/blob_loaders.py @@ -1,8 +1,7 @@ """Schema for Blobs and Blob Loaders. -The goal is to facilitate decoupling of content loading from content parsing code. - -In addition, content loading code should provide a lazy loading interface by default. +The goal is to facilitate decoupling of content loading from content parsing code. In +addition, content loading code should provide a lazy loading interface by default. """ from __future__ import annotations @@ -20,8 +19,8 @@ if TYPE_CHECKING: class BlobLoader(ABC): """Abstract interface for blob loaders implementation. - Implementer should be able to load raw content from a storage system according - to some criteria and return the raw content lazily as a stream of blobs. + Implementer should be able to load raw content from a storage system according to + some criteria and return the raw content lazily as a stream of blobs. """ @abstractmethod diff --git a/libs/core/langchain_core/document_loaders/langsmith.py b/libs/core/langchain_core/document_loaders/langsmith.py index 610222a1ea2..23a44e05d40 100644 --- a/libs/core/langchain_core/document_loaders/langsmith.py +++ b/libs/core/langchain_core/document_loaders/langsmith.py @@ -21,7 +21,7 @@ class LangSmithLoader(BaseLoader): example into the `Document` metadata. This allows you to easily create few-shot example retrievers from the loaded documents. - ??? note "Lazy loading example" + ??? example "Lazy loading" ```python from langchain_core.document_loaders import LangSmithLoader @@ -60,27 +60,35 @@ class LangSmithLoader(BaseLoader): Args: dataset_id: The ID of the dataset to filter by. dataset_name: The name of the dataset to filter by. - content_key: The inputs key to set as Document page content. `'.'` characters - are interpreted as nested keys. E.g. `content_key="first.second"` will - result in + content_key: The inputs key to set as `Document` page content. + + `'.'` characters are interpreted as nested keys, e.g. + `content_key="first.second"` will result in `Document(page_content=format_content(example.inputs["first"]["second"]))` format_content: Function for converting the content extracted from the example - inputs into a string. Defaults to JSON-encoding the contents. + inputs into a string. + + Defaults to JSON-encoding the contents. example_ids: The IDs of the examples to filter by. as_of: The dataset version tag or timestamp to retrieve the examples as of. + Response examples will only be those that were present at the time of the tagged (or timestamped) version. - splits: A list of dataset splits, which are - divisions of your dataset such as `train`, `test`, or `validation`. + splits: A list of dataset splits, which are divisions of your dataset such + as `train`, `test`, or `validation`. + Returns examples only from the specified splits. inline_s3_urls: Whether to inline S3 URLs. offset: The offset to start from. limit: The maximum number of examples to return. metadata: Metadata to filter by. filter: A structured filter string to apply to the examples. - client: LangSmith Client. If not provided will be initialized from below args. - client_kwargs: Keyword args to pass to LangSmith client init. Should only be - specified if `client` isn't. + client: LangSmith Client. + + If not provided will be initialized from below args. + client_kwargs: Keyword args to pass to LangSmith client init. + + Should only be specified if `client` isn't. Raises: ValueError: If both `client` and `client_kwargs` are provided. diff --git a/libs/core/langchain_core/documents/__init__.py b/libs/core/langchain_core/documents/__init__.py index d7aa87f1943..128a9dcfba6 100644 --- a/libs/core/langchain_core/documents/__init__.py +++ b/libs/core/langchain_core/documents/__init__.py @@ -4,6 +4,7 @@ This module provides core abstractions for handling data in retrieval-augmented generation (RAG) pipelines, vector stores, and document processing workflows. !!! warning "Documents vs. message content" + This module is distinct from `langchain_core.messages.content`, which provides multimodal content blocks for **LLM chat I/O** (text, images, audio, etc. within messages). diff --git a/libs/core/langchain_core/documents/base.py b/libs/core/langchain_core/documents/base.py index 0341c8c184e..1dc9661c061 100644 --- a/libs/core/langchain_core/documents/base.py +++ b/libs/core/langchain_core/documents/base.py @@ -7,6 +7,7 @@ This module contains core abstractions for **data retrieval and processing workf - `Document`: Text content for retrieval (RAG, vector stores, semantic search) !!! note "Not for LLM chat messages" + These classes are for data processing pipelines, not LLM I/O. For multimodal content in chat messages (images, audio in conversations), see `langchain.messages` content blocks instead. @@ -36,6 +37,7 @@ class BaseMedia(Serializable): Provides common fields for content that needs to be stored, indexed, or searched. !!! note + For multimodal content in **chat messages** (images, audio sent to/from LLMs), use `langchain.messages` content blocks instead. """ @@ -113,13 +115,16 @@ class Blob(BaseMedia): data: bytes | str | None = None """Raw data associated with the `Blob`.""" + mimetype: str | None = None """MIME type, not to be confused with a file extension.""" + encoding: str = "utf-8" """Encoding to use if decoding the bytes into a string. Uses `utf-8` as default encoding if decoding to string. """ + path: PathLike | None = None """Location where the original content was found.""" @@ -284,6 +289,7 @@ class Document(BaseMedia): """Class for storing a piece of text and associated metadata. !!! note + `Document` is for **retrieval workflows**, not chat I/O. For sending text to an LLM in a conversation, use message types from `langchain.messages`. @@ -299,13 +305,14 @@ class Document(BaseMedia): page_content: str """String text.""" + type: Literal["Document"] = "Document" def __init__(self, page_content: str, **kwargs: Any) -> None: """Pass page_content in as positional or named arg.""" # my-py is complaining that page_content is not defined on the base class. # Here, we're relying on pydantic base class to handle the validation. - super().__init__(page_content=page_content, **kwargs) + super().__init__(page_content=page_content, **kwargs) # type: ignore[call-arg,unused-ignore] @classmethod def is_lc_serializable(cls) -> bool: @@ -317,7 +324,7 @@ class Document(BaseMedia): """Get the namespace of the LangChain object. Returns: - ["langchain", "schema", "document"] + `["langchain", "schema", "document"]` """ return ["langchain", "schema", "document"] @@ -329,12 +336,12 @@ class Document(BaseMedia): """ # The format matches pydantic format for __str__. # - # The purpose of this change is to make sure that user code that - # feeds Document objects directly into prompts remains unchanged - # due to the addition of the id field (or any other fields in the future). + # The purpose of this change is to make sure that user code that feeds + # Document objects directly into prompts remains unchanged due to the addition + # of the id field (or any other fields in the future). # - # This override will likely be removed in the future in favor of - # a more general solution of formatting content directly inside the prompts. + # This override will likely be removed in the future in favor of a more general + # solution of formatting content directly inside the prompts. if self.metadata: return f"page_content='{self.page_content}' metadata={self.metadata}" return f"page_content='{self.page_content}'" diff --git a/libs/core/langchain_core/load/serializable.py b/libs/core/langchain_core/load/serializable.py index 431ff02dbaa..b447ddfc8bc 100644 --- a/libs/core/langchain_core/load/serializable.py +++ b/libs/core/langchain_core/load/serializable.py @@ -103,6 +103,7 @@ class Serializable(BaseModel, ABC): the correct class to instantiate. Please see the `Reviver` class in `langchain_core.load.load` for more details. + During deserialization an additional mapping is handle classes that have moved or been renamed across package versions. diff --git a/libs/core/langchain_core/output_parsers/base.py b/libs/core/langchain_core/output_parsers/base.py index 7796e2e59ce..861e8ba7777 100644 --- a/libs/core/langchain_core/output_parsers/base.py +++ b/libs/core/langchain_core/output_parsers/base.py @@ -35,10 +35,13 @@ class BaseLLMOutputParser(ABC, Generic[T]): """Parse a list of candidate model `Generation` objects into a specific format. Args: - result: A list of `Generation` to be parsed. The `Generation` objects are - assumed to be different candidate outputs for a single model input. - partial: Whether to parse the output as a partial result. This is useful - for parsers that can parse partial results. + result: A list of `Generation` to be parsed. + + The `Generation` objects are assumed to be different candidate outputs + for a single model input. + partial: Whether to parse the output as a partial result. + + This is useful for parsers that can parse partial results. Returns: Structured output. @@ -50,10 +53,13 @@ class BaseLLMOutputParser(ABC, Generic[T]): """Parse a list of candidate model `Generation` objects into a specific format. Args: - result: A list of `Generation` to be parsed. The Generations are assumed - to be different candidate outputs for a single model input. - partial: Whether to parse the output as a partial result. This is useful - for parsers that can parse partial results. + result: A list of `Generation` to be parsed. + + The Generations are assumed to be different candidate outputs for a + single model input. + partial: Whether to parse the output as a partial result. + + This is useful for parsers that can parse partial results. Returns: Structured output. @@ -241,13 +247,16 @@ class BaseOutputParser( """Parse a list of candidate model `Generation` objects into a specific format. The return value is parsed from only the first `Generation` in the result, which - is assumed to be the highest-likelihood `Generation`. + is assumed to be the highest-likelihood `Generation`. Args: - result: A list of `Generation` to be parsed. The `Generation` objects are - assumed to be different candidate outputs for a single model input. - partial: Whether to parse the output as a partial result. This is useful - for parsers that can parse partial results. + result: A list of `Generation` to be parsed. + + The `Generation` objects are assumed to be different candidate outputs + for a single model input. + partial: Whether to parse the output as a partial result. + + This is useful for parsers that can parse partial results. Returns: Structured output. @@ -271,13 +280,16 @@ class BaseOutputParser( """Parse a list of candidate model `Generation` objects into a specific format. The return value is parsed from only the first `Generation` in the result, which - is assumed to be the highest-likelihood `Generation`. + is assumed to be the highest-likelihood `Generation`. Args: - result: A list of `Generation` to be parsed. The `Generation` objects are - assumed to be different candidate outputs for a single model input. - partial: Whether to parse the output as a partial result. This is useful - for parsers that can parse partial results. + result: A list of `Generation` to be parsed. + + The `Generation` objects are assumed to be different candidate outputs + for a single model input. + partial: Whether to parse the output as a partial result. + + This is useful for parsers that can parse partial results. Returns: Structured output. @@ -303,9 +315,8 @@ class BaseOutputParser( ) -> Any: """Parse the output of an LLM call with the input prompt for context. - The prompt is largely provided in the event the `OutputParser` wants - to retry or fix the output in some way, and needs information from - the prompt to do so. + The prompt is largely provided in the event the `OutputParser` wants to retry or + fix the output in some way, and needs information from the prompt to do so. Args: completion: String output of a language model. diff --git a/libs/core/langchain_core/output_parsers/json.py b/libs/core/langchain_core/output_parsers/json.py index fc2b43ee01c..829e042c7c9 100644 --- a/libs/core/langchain_core/output_parsers/json.py +++ b/libs/core/langchain_core/output_parsers/json.py @@ -34,16 +34,18 @@ class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]): Probably the most reliable output parser for getting structured data that does *not* use function calling. - When used in streaming mode, it will yield partial JSON objects containing - all the keys that have been returned so far. + When used in streaming mode, it will yield partial JSON objects containing all the + keys that have been returned so far. - In streaming, if `diff` is set to `True`, yields JSONPatch operations describing the - difference between the previous and the current object. + In streaming, if `diff` is set to `True`, yields `JSONPatch` operations describing + the difference between the previous and the current object. """ pydantic_object: Annotated[type[TBaseModel] | None, SkipValidation()] = None # type: ignore[valid-type] """The Pydantic object to use for validation. - If `None`, no validation is performed.""" + + If `None`, no validation is performed. + """ @override def _diff(self, prev: Any | None, next: Any) -> Any: @@ -62,8 +64,10 @@ class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]): Args: result: The result of the LLM call. partial: Whether to parse partial JSON objects. - If `True`, the output will be a JSON object containing - all the keys that have been returned so far. + + If `True`, the output will be a JSON object containing all the keys that + have been returned so far. + If `False`, the output will be the full JSON object. Returns: diff --git a/libs/core/langchain_core/output_parsers/list.py b/libs/core/langchain_core/output_parsers/list.py index 16b99b64f6a..834c9ec153a 100644 --- a/libs/core/langchain_core/output_parsers/list.py +++ b/libs/core/langchain_core/output_parsers/list.py @@ -24,7 +24,7 @@ def droplastn( iter: Iterator[T], # noqa: A002 n: int, ) -> Iterator[T]: - """Drop the last n elements of an iterator. + """Drop the last `n` elements of an iterator. Args: iter: The iterator to drop elements from. diff --git a/libs/core/langchain_core/output_parsers/openai_tools.py b/libs/core/langchain_core/output_parsers/openai_tools.py index 037afd4312e..0b756c41491 100644 --- a/libs/core/langchain_core/output_parsers/openai_tools.py +++ b/libs/core/langchain_core/output_parsers/openai_tools.py @@ -82,14 +82,14 @@ def make_invalid_tool_call( raw_tool_call: dict[str, Any], error_msg: str | None, ) -> InvalidToolCall: - """Create an InvalidToolCall from a raw tool call. + """Create an `InvalidToolCall` from a raw tool call. Args: raw_tool_call: The raw tool call. error_msg: The error message. Returns: - An InvalidToolCall instance with the error message. + An `InvalidToolCall` instance with the error message. """ return invalid_tool_call( name=raw_tool_call["function"]["name"], @@ -147,17 +147,20 @@ class JsonOutputToolsParser(BaseCumulativeTransformOutputParser[Any]): Useful when the parsed output may include unicode characters or new lines. """ + return_id: bool = False """Whether to return the tool call id.""" + first_tool_only: bool = False """Whether to return only the first tool call. - If `False`, the result will be a list of tool calls, or an empty list - if no tool calls are found. + If `False`, the result will be a list of tool calls, or an empty list if no tool + calls are found. - If true, and multiple tool calls are found, only the first one will be returned, + If `True`, and multiple tool calls are found, only the first one will be returned, and the other tool calls will be ignored. - If no tool calls are found, None will be returned. + + If no tool calls are found, `None` will be returned. """ def parse_result(self, result: list[Generation], *, partial: bool = False) -> Any: @@ -166,8 +169,10 @@ class JsonOutputToolsParser(BaseCumulativeTransformOutputParser[Any]): Args: result: The result of the LLM call. partial: Whether to parse partial JSON. + If `True`, the output will be a JSON object containing all the keys that have been returned so far. + If `False`, the output will be the full JSON object. Returns: @@ -304,16 +309,18 @@ class PydanticToolsParser(JsonOutputToolsParser): tools: Annotated[list[TypeBaseModel], SkipValidation()] """The tools to parse.""" - # TODO: Support more granular streaming of objects. Currently only streams once all - # Pydantic object fields are present. + # TODO: Support more granular streaming of objects. + # Currently only streams once all Pydantic object fields are present. def parse_result(self, result: list[Generation], *, partial: bool = False) -> Any: """Parse the result of an LLM call to a list of Pydantic objects. Args: result: The result of the LLM call. partial: Whether to parse partial JSON. - If `True`, the output will be a JSON object containing - all the keys that have been returned so far. + + If `True`, the output will be a JSON object containing all the keys that + have been returned so far. + If `False`, the output will be the full JSON object. Returns: @@ -321,8 +328,8 @@ class PydanticToolsParser(JsonOutputToolsParser): Raises: ValueError: If the tool call arguments are not a dict. - ValidationError: If the tool call arguments do not conform - to the Pydantic model. + ValidationError: If the tool call arguments do not conform to the Pydantic + model. """ json_results = super().parse_result(result, partial=partial) if not json_results: diff --git a/libs/core/langchain_core/output_parsers/pydantic.py b/libs/core/langchain_core/output_parsers/pydantic.py index 9a9f4f13111..7a7eee972df 100644 --- a/libs/core/langchain_core/output_parsers/pydantic.py +++ b/libs/core/langchain_core/output_parsers/pydantic.py @@ -60,12 +60,13 @@ class PydanticOutputParser(JsonOutputParser, Generic[TBaseModel]): Args: result: The result of the LLM call. partial: Whether to parse partial JSON objects. - If `True`, the output will be a JSON object containing - all the keys that have been returned so far. + + If `True`, the output will be a JSON object containing all the keys that + have been returned so far. Raises: - OutputParserException: If the result is not valid JSON - or does not conform to the Pydantic model. + OutputParserException: If the result is not valid JSON or does not conform + to the Pydantic model. Returns: The parsed Pydantic object. diff --git a/libs/core/langchain_core/output_parsers/transform.py b/libs/core/langchain_core/output_parsers/transform.py index b3cf0ea1d75..f04d66b6851 100644 --- a/libs/core/langchain_core/output_parsers/transform.py +++ b/libs/core/langchain_core/output_parsers/transform.py @@ -100,8 +100,8 @@ class BaseCumulativeTransformOutputParser(BaseTransformOutputParser[T]): """Base class for an output parser that can handle streaming input.""" diff: bool = False - """In streaming mode, whether to yield diffs between the previous and current - parsed output, or just the current parsed output. + """In streaming mode, whether to yield diffs between the previous and current parsed + output, or just the current parsed output. """ def _diff( diff --git a/libs/core/langchain_core/output_parsers/xml.py b/libs/core/langchain_core/output_parsers/xml.py index 55e93542d7f..c65a1db3299 100644 --- a/libs/core/langchain_core/output_parsers/xml.py +++ b/libs/core/langchain_core/output_parsers/xml.py @@ -42,16 +42,18 @@ Here are the output tags: class _StreamingParser: """Streaming parser for XML. - This implementation is pulled into a class to avoid implementation - drift between transform and atransform of the `XMLOutputParser`. + This implementation is pulled into a class to avoid implementation drift between + `transform` and `atransform` of the `XMLOutputParser`. """ def __init__(self, parser: Literal["defusedxml", "xml"]) -> None: """Initialize the streaming parser. Args: - parser: Parser to use for XML parsing. Can be either `'defusedxml'` or - `'xml'`. See documentation in `XMLOutputParser` for more information. + parser: Parser to use for XML parsing. + + Can be either `'defusedxml'` or `'xml'`. See documentation in + `XMLOutputParser` for more information. Raises: ImportError: If `defusedxml` is not installed and the `defusedxml` parser is @@ -160,30 +162,35 @@ class XMLOutputParser(BaseTransformOutputParser): For example, with `tags=["foo", "bar", "baz"]`: 1. A well-formatted XML instance: - `"\n \n \n \n"` + `'\n \n \n \n'` 2. A badly-formatted XML instance (missing closing tag for 'bar'): - `"\n \n "` + `'\n \n '` 3. A badly-formatted XML instance (unexpected 'tag' element): - `"\n \n \n"` + `'\n \n \n'` """ encoding_matcher: re.Pattern = re.compile( r"<([^>]*encoding[^>]*)>\n(.*)", re.MULTILINE | re.DOTALL ) + parser: Literal["defusedxml", "xml"] = "defusedxml" - """Parser to use for XML parsing. Can be either `'defusedxml'` or `'xml'`. + """Parser to use for XML parsing. - * `'defusedxml'` is the default parser and is used to prevent XML vulnerabilities - present in some distributions of Python's standard library xml. - `defusedxml` is a wrapper around the standard library parser that - sets up the parser with secure defaults. - * `'xml'` is the standard library parser. + Can be either `'defusedxml'` or `'xml'`. - Use `xml` only if you are sure that your distribution of the standard library is not - vulnerable to XML vulnerabilities. + - `'defusedxml'` is the default parser and is used to prevent XML vulnerabilities + present in some distributions of Python's standard library xml. `defusedxml` is + a wrapper around the standard library parser that sets up the parser with secure + defaults. + - `'xml'` is the standard library parser. - Please review the following resources for more information: + !!! warning + + Use `xml` only if you are sure that your distribution of the standard library is + not vulnerable to XML vulnerabilities. + + Review the following resources for more information: * https://docs.python.org/3/library/xml.html#xml-vulnerabilities * https://github.com/tiran/defusedxml diff --git a/libs/core/langchain_core/outputs/__init__.py b/libs/core/langchain_core/outputs/__init__.py index 07b35d95591..d579010ee76 100644 --- a/libs/core/langchain_core/outputs/__init__.py +++ b/libs/core/langchain_core/outputs/__init__.py @@ -12,9 +12,9 @@ When invoking models via the standard runnable methods (e.g. invoke, batch, etc. - LLMs will return regular text strings. In addition, users can access the raw output of either LLMs or chat models via -callbacks. The `on_chat_model_end` and `on_llm_end` callbacks will return an -LLMResult object containing the generated outputs and any additional information -returned by the model provider. +callbacks. The `on_chat_model_end` and `on_llm_end` callbacks will return an `LLMResult` +object containing the generated outputs and any additional information returned by the +model provider. In general, if information is already available in the AIMessage object, it is recommended to access it from there rather than from the `LLMResult` object. diff --git a/libs/core/langchain_core/outputs/chat_generation.py b/libs/core/langchain_core/outputs/chat_generation.py index cebbead8ae1..bd61727219b 100644 --- a/libs/core/langchain_core/outputs/chat_generation.py +++ b/libs/core/langchain_core/outputs/chat_generation.py @@ -17,26 +17,26 @@ if TYPE_CHECKING: class ChatGeneration(Generation): """A single chat generation output. - A subclass of `Generation` that represents the response from a chat model - that generates chat messages. + A subclass of `Generation` that represents the response from a chat model that + generates chat messages. - The `message` attribute is a structured representation of the chat message. - Most of the time, the message will be of type `AIMessage`. + The `message` attribute is a structured representation of the chat message. Most of + the time, the message will be of type `AIMessage`. Users working with chat models will usually access information via either - `AIMessage` (returned from runnable interfaces) or `LLMResult` (available - via callbacks). + `AIMessage` (returned from runnable interfaces) or `LLMResult` (available via + callbacks). """ text: str = "" """The text contents of the output message. - !!! warning - SHOULD NOT BE SET DIRECTLY! + !!! warning "SHOULD NOT BE SET DIRECTLY!" """ message: BaseMessage """The message output by the chat model.""" + # Override type to be ChatGeneration, ignore mypy error as this is intentional type: Literal["ChatGeneration"] = "ChatGeneration" # type: ignore[assignment] """Type is used exclusively for serialization purposes.""" @@ -82,6 +82,7 @@ class ChatGenerationChunk(ChatGeneration): message: BaseMessageChunk """The message chunk output by the chat model.""" # Override type to be ChatGeneration, ignore mypy error as this is intentional + type: Literal["ChatGenerationChunk"] = "ChatGenerationChunk" # type: ignore[assignment] """Type is used exclusively for serialization purposes.""" @@ -91,8 +92,8 @@ class ChatGenerationChunk(ChatGeneration): """Concatenate two `ChatGenerationChunk`s. Args: - other: The other `ChatGenerationChunk` or list of `ChatGenerationChunk` - to concatenate. + other: The other `ChatGenerationChunk` or list of `ChatGenerationChunk` to + concatenate. Raises: TypeError: If other is not a `ChatGenerationChunk` or list of @@ -134,7 +135,7 @@ def merge_chat_generation_chunks( chunks: A list of `ChatGenerationChunk` to merge. Returns: - A merged `ChatGenerationChunk`, or None if the input list is empty. + A merged `ChatGenerationChunk`, or `None` if the input list is empty. """ if not chunks: return None diff --git a/libs/core/langchain_core/outputs/chat_result.py b/libs/core/langchain_core/outputs/chat_result.py index 7a53830067a..87eb094d27f 100644 --- a/libs/core/langchain_core/outputs/chat_result.py +++ b/libs/core/langchain_core/outputs/chat_result.py @@ -8,14 +8,14 @@ from langchain_core.outputs.chat_generation import ChatGeneration class ChatResult(BaseModel): """Use to represent the result of a chat model call with a single prompt. - This container is used internally by some implementations of chat model, - it will eventually be mapped to a more general `LLMResult` object, and - then projected into an `AIMessage` object. + This container is used internally by some implementations of chat model, it will + eventually be mapped to a more general `LLMResult` object, and then projected into + an `AIMessage` object. LangChain users working with chat models will usually access information via - `AIMessage` (returned from runnable interfaces) or `LLMResult` (available - via callbacks). Please refer the `AIMessage` and `LLMResult` schema documentation - for more information. + `AIMessage` (returned from runnable interfaces) or `LLMResult` (available via + callbacks). Please refer the `AIMessage` and `LLMResult` schema documentation for + more information. """ generations: list[ChatGeneration] @@ -24,13 +24,13 @@ class ChatResult(BaseModel): Generations is a list to allow for multiple candidate generations for a single input prompt. """ + llm_output: dict | None = None """For arbitrary LLM provider specific output. This dictionary is a free-form dictionary that can contain any information that the provider wants to return. It is not standardized and is provider-specific. - Users should generally avoid relying on this field and instead rely on - accessing relevant information from standardized fields present in - AIMessage. + Users should generally avoid relying on this field and instead rely on accessing + relevant information from standardized fields present in `AIMessage`. """ diff --git a/libs/core/langchain_core/outputs/generation.py b/libs/core/langchain_core/outputs/generation.py index 5fbd9c7e1b0..246b68c1fc2 100644 --- a/libs/core/langchain_core/outputs/generation.py +++ b/libs/core/langchain_core/outputs/generation.py @@ -14,13 +14,12 @@ class Generation(Serializable): Generation represents the response from an "old-fashioned" LLM (string-in, string-out) that generates regular text (not chat messages). - This model is used internally by chat model and will eventually - be mapped to a more general `LLMResult` object, and then projected into - an `AIMessage` object. + This model is used internally by chat model and will eventually be mapped to a more + general `LLMResult` object, and then projected into an `AIMessage` object. LangChain users working with chat models will usually access information via - `AIMessage` (returned from runnable interfaces) or `LLMResult` (available - via callbacks). Please refer to `AIMessage` and `LLMResult` for more information. + `AIMessage` (returned from runnable interfaces) or `LLMResult` (available via + callbacks). Please refer to `AIMessage` and `LLMResult` for more information. """ text: str @@ -31,10 +30,11 @@ class Generation(Serializable): May include things like the reason for finishing or token log probabilities. """ + type: Literal["Generation"] = "Generation" """Type is used exclusively for serialization purposes. - Set to "Generation" for this class. + Set to `'Generation'` for this class. """ @classmethod @@ -53,10 +53,10 @@ class Generation(Serializable): class GenerationChunk(Generation): - """`GenerationChunk`, which can be concatenated with other Generation chunks.""" + """`GenerationChunk`, which can be concatenated with other `Generation` chunks.""" def __add__(self, other: GenerationChunk) -> GenerationChunk: - """Concatenate two `GenerationChunk`s. + """Concatenate two `GenerationChunk` objects. Args: other: Another `GenerationChunk` to concatenate with. diff --git a/libs/core/langchain_core/outputs/llm_result.py b/libs/core/langchain_core/outputs/llm_result.py index 4eb3160f397..cf8e47ef3a9 100644 --- a/libs/core/langchain_core/outputs/llm_result.py +++ b/libs/core/langchain_core/outputs/llm_result.py @@ -1,4 +1,4 @@ -"""LLMResult class.""" +"""`LLMResult` class.""" from __future__ import annotations @@ -15,7 +15,7 @@ from langchain_core.outputs.run_info import RunInfo class LLMResult(BaseModel): """A container for results of an LLM call. - Both chat models and LLMs generate an LLMResult object. This object contains the + Both chat models and LLMs generate an `LLMResult` object. This object contains the generated outputs and any additional information that the model provider wants to return. """ @@ -33,9 +33,10 @@ class LLMResult(BaseModel): - When returned from **an LLM**, the type is `list[list[Generation]]`. - When returned from a **chat model**, the type is `list[list[ChatGeneration]]`. - ChatGeneration is a subclass of Generation that has a field for a structured chat - message. + `ChatGeneration` is a subclass of `Generation` that has a field for a structured + chat message. """ + llm_output: dict | None = None """For arbitrary LLM provider specific output. @@ -45,6 +46,7 @@ class LLMResult(BaseModel): Users should generally avoid relying on this field and instead rely on accessing relevant information from standardized fields present in AIMessage. """ + run: list[RunInfo] | None = None """List of metadata info for model call for each input. @@ -57,14 +59,14 @@ class LLMResult(BaseModel): def flatten(self) -> list[LLMResult]: """Flatten generations into a single list. - Unpack list[list[Generation]] -> list[LLMResult] where each returned LLMResult - contains only a single Generation. If token usage information is available, - it is kept only for the LLMResult corresponding to the top-choice - Generation, to avoid over-counting of token usage downstream. + Unpack `list[list[Generation]] -> list[LLMResult]` where each returned + `LLMResult` contains only a single `Generation`. If token usage information is + available, it is kept only for the `LLMResult` corresponding to the top-choice + `Generation`, to avoid over-counting of token usage downstream. Returns: - List of LLMResults where each returned LLMResult contains a single - Generation. + List of `LLMResult` objects where each returned `LLMResult` contains a + single `Generation`. """ llm_results = [] for i, gen_list in enumerate(self.generations): diff --git a/libs/core/langchain_core/outputs/run_info.py b/libs/core/langchain_core/outputs/run_info.py index c12550977d6..b8bbca14555 100644 --- a/libs/core/langchain_core/outputs/run_info.py +++ b/libs/core/langchain_core/outputs/run_info.py @@ -1,4 +1,4 @@ -"""RunInfo class.""" +"""`RunInfo` class.""" from __future__ import annotations @@ -8,14 +8,14 @@ from pydantic import BaseModel class RunInfo(BaseModel): - """Class that contains metadata for a single execution of a Chain or model. + """Class that contains metadata for a single execution of a chain or model. - Defined for backwards compatibility with older versions of langchain_core. + Defined for backwards compatibility with older versions of `langchain_core`. - This model will likely be deprecated in the future. + !!! warning "This model will likely be deprecated in the future." - Users can acquire the run_id information from callbacks or via run_id - information present in the astream_event API (depending on the use case). + Users can acquire the `run_id` information from callbacks or via `run_id` + information present in the `astream_event` API (depending on the use case). """ run_id: UUID diff --git a/libs/core/langchain_core/prompt_values.py b/libs/core/langchain_core/prompt_values.py index 635fa87f20d..e85fe1efb4c 100644 --- a/libs/core/langchain_core/prompt_values.py +++ b/libs/core/langchain_core/prompt_values.py @@ -1,7 +1,7 @@ """**Prompt values** for language model prompts. -Prompt values are used to represent different pieces of prompts. -They can be used to represent text, images, or chat message pieces. +Prompt values are used to represent different pieces of prompts. They can be used to +represent text, images, or chat message pieces. """ from __future__ import annotations @@ -56,6 +56,7 @@ class StringPromptValue(PromptValue): text: str """Prompt text.""" + type: Literal["StringPromptValue"] = "StringPromptValue" @classmethod @@ -136,6 +137,7 @@ class ImagePromptValue(PromptValue): image_url: ImageURL """Image URL.""" + type: Literal["ImagePromptValue"] = "ImagePromptValue" def to_string(self) -> str: diff --git a/libs/core/langchain_core/prompts/__init__.py b/libs/core/langchain_core/prompts/__init__.py index 34a32ba6f55..772ed40d19d 100644 --- a/libs/core/langchain_core/prompts/__init__.py +++ b/libs/core/langchain_core/prompts/__init__.py @@ -1,4 +1,4 @@ -"""**Prompt** is the input to the model. +"""A prompt is the input to the model. Prompt is often constructed from multiple components and prompt values. Prompt classes and functions make constructing and working with prompts easy. diff --git a/libs/core/langchain_core/prompts/base.py b/libs/core/langchain_core/prompts/base.py index 6318d4e58dc..2524120e1fb 100644 --- a/libs/core/langchain_core/prompts/base.py +++ b/libs/core/langchain_core/prompts/base.py @@ -45,27 +45,33 @@ class BasePromptTemplate( """A list of the names of the variables whose values are required as inputs to the prompt. """ + optional_variables: list[str] = Field(default=[]) """A list of the names of the variables for placeholder or `MessagePlaceholder` that are optional. These variables are auto inferred from the prompt and user need not provide them. """ + input_types: builtins.dict[str, Any] = Field(default_factory=dict, exclude=True) """A dictionary of the types of the variables the prompt template expects. If not provided, all variables are assumed to be strings. """ + output_parser: BaseOutputParser | None = None """How to parse the output of calling an LLM on this formatted prompt.""" + partial_variables: Mapping[str, Any] = Field(default_factory=dict) """A dictionary of the partial variables the prompt template carries. Partial variables populate the template so that you don't need to pass them in every time you call the prompt. """ + metadata: builtins.dict[str, Any] | None = None """Metadata to be used for tracing.""" + tags: list[str] | None = None """Tags to be used for tracing.""" @@ -410,20 +416,18 @@ def format_document(doc: Document, prompt: BasePromptTemplate[str]) -> str: First, this pulls information from the document from two sources: - 1. `page_content`: - This takes the information from the `document.page_content` and assigns it to a - variable named `page_content`. - 2. `metadata`: - This takes information from `document.metadata` and assigns it to variables of - the same name. + 1. `page_content`: This takes the information from the `document.page_content` and + assigns it to a variable named `page_content`. + 2. `metadata`: This takes information from `document.metadata` and assigns it to + variables of the same name. Those variables are then passed into the `prompt` to produce a formatted string. Args: - doc: `Document`, the `page_content` and `metadata` will be used to create - the final string. - prompt: `BasePromptTemplate`, will be used to format the `page_content` - and `metadata` into the final string. + doc: `Document`, the `page_content` and `metadata` will be used to create the + final string. + prompt: `BasePromptTemplate`, will be used to format the `page_content` and + `metadata` into the final string. Returns: String of the document formatted. @@ -436,7 +440,7 @@ def format_document(doc: Document, prompt: BasePromptTemplate[str]) -> str: doc = Document(page_content="This is a joke", metadata={"page": "1"}) prompt = PromptTemplate.from_template("Page {page}: {page_content}") format_document(doc, prompt) - >>> "Page 1: This is a joke" + # -> "Page 1: This is a joke" ``` """ return prompt.format(**_get_document_info(doc, prompt)) @@ -447,20 +451,18 @@ async def aformat_document(doc: Document, prompt: BasePromptTemplate[str]) -> st First, this pulls information from the document from two sources: - 1. `page_content`: - This takes the information from the `document.page_content` and assigns it to a - variable named `page_content`. - 2. `metadata`: - This takes information from `document.metadata` and assigns it to variables of - the same name. + 1. `page_content`: This takes the information from the `document.page_content` and + assigns it to a variable named `page_content`. + 2. `metadata`: This takes information from `document.metadata` and assigns it to + variables of the same name. Those variables are then passed into the `prompt` to produce a formatted string. Args: - doc: `Document`, the `page_content` and `metadata` will be used to create - the final string. - prompt: `BasePromptTemplate`, will be used to format the `page_content` - and `metadata` into the final string. + doc: `Document`, the `page_content` and `metadata` will be used to create the + final string. + prompt: `BasePromptTemplate`, will be used to format the `page_content` and + `metadata` into the final string. Returns: String of the document formatted. diff --git a/libs/core/langchain_core/prompts/chat.py b/libs/core/langchain_core/prompts/chat.py index 607a9be61b6..95910367bcf 100644 --- a/libs/core/langchain_core/prompts/chat.py +++ b/libs/core/langchain_core/prompts/chat.py @@ -54,7 +54,7 @@ class MessagesPlaceholder(BaseMessagePromptTemplate): A placeholder which can be used to pass in a list of messages. - Direct usage: + !!! example "Direct usage" ```python from langchain_core.prompts import MessagesPlaceholder @@ -77,7 +77,7 @@ class MessagesPlaceholder(BaseMessagePromptTemplate): # ] ``` - Building a prompt with chat history: + !!! example "Building a prompt with chat history" ```python from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder @@ -103,7 +103,7 @@ class MessagesPlaceholder(BaseMessagePromptTemplate): # ]) ``` - Limiting the number of messages: + !!! example "Limiting the number of messages" ```python from langchain_core.prompts import MessagesPlaceholder @@ -126,12 +126,19 @@ class MessagesPlaceholder(BaseMessagePromptTemplate): """Name of variable to use as messages.""" optional: bool = False - """If `True` format_messages can be called with no arguments and will return an - empty list. If `False` then a named argument with name `variable_name` must be - passed in, even if the value is an empty list.""" + """Whether `format_messages` must be provided. + + If `True` `format_messages` can be called with no arguments and will return an empty + list. + + If `False` then a named argument with name `variable_name` must be passed in, even + if the value is an empty list. + """ n_messages: PositiveInt | None = None - """Maximum number of messages to include. If `None`, then will include all. + """Maximum number of messages to include. + + If `None`, then will include all. """ def __init__( @@ -141,13 +148,17 @@ class MessagesPlaceholder(BaseMessagePromptTemplate): Args: variable_name: Name of variable to use as messages. - optional: If `True` format_messages can be called with no arguments and will - return an empty list. If `False` then a named argument with name - `variable_name` must be passed in, even if the value is an empty list. + optional: Whether `format_messages` must be provided. + + If `True` format_messages can be called with no arguments and will + return an empty list. + + If `False` then a named argument with name `variable_name` must be + passed in, even if the value is an empty list. """ # mypy can't detect the init which is defined in the parent class # b/c these are BaseModel classes. - super().__init__(variable_name=variable_name, optional=optional, **kwargs) + super().__init__(variable_name=variable_name, optional=optional, **kwargs) # type: ignore[call-arg,unused-ignore] def format_messages(self, **kwargs: Any) -> list[BaseMessage]: """Format messages from kwargs. @@ -156,7 +167,7 @@ class MessagesPlaceholder(BaseMessagePromptTemplate): **kwargs: Keyword arguments to use for formatting. Returns: - List of BaseMessage. + List of `BaseMessage` objects. Raises: ValueError: If variable is not a list of messages. @@ -216,6 +227,7 @@ class BaseStringMessagePromptTemplate(BaseMessagePromptTemplate, ABC): prompt: StringPromptTemplate """String prompt template.""" + additional_kwargs: dict = Field(default_factory=dict) """Additional keyword arguments to pass to the prompt template.""" @@ -233,12 +245,13 @@ class BaseStringMessagePromptTemplate(BaseMessagePromptTemplate, ABC): template: a template. template_format: format of the template. partial_variables: A dictionary of variables that can be used to partially - fill in the template. For example, if the template is - `"{variable1} {variable2}"`, and `partial_variables` is - `{"variable1": "foo"}`, then the final prompt will be - `"foo {variable2}"`. + fill in the template. - **kwargs: keyword arguments to pass to the constructor. + For example, if the template is `"{variable1} {variable2}"`, and + `partial_variables` is `{"variable1": "foo"}`, then the final prompt + will be `"foo {variable2}"`. + + **kwargs: Keyword arguments to pass to the constructor. Returns: A new instance of this class. @@ -259,8 +272,8 @@ class BaseStringMessagePromptTemplate(BaseMessagePromptTemplate, ABC): """Create a class from a template file. Args: - template_file: path to a template file. String or Path. - **kwargs: keyword arguments to pass to the constructor. + template_file: path to a template file. + **kwargs: Keyword arguments to pass to the constructor. Returns: A new instance of this class. @@ -297,7 +310,7 @@ class BaseStringMessagePromptTemplate(BaseMessagePromptTemplate, ABC): **kwargs: Keyword arguments to use for formatting. Returns: - List of BaseMessages. + List of `BaseMessage` objects. """ return [self.format(**kwargs)] @@ -308,7 +321,7 @@ class BaseStringMessagePromptTemplate(BaseMessagePromptTemplate, ABC): **kwargs: Keyword arguments to use for formatting. Returns: - List of BaseMessages. + List of `BaseMessage` objects. """ return [await self.aformat(**kwargs)] @@ -408,10 +421,11 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate): Args: template: a template. template_format: format of the template. - Options are: 'f-string', 'mustache', 'jinja2'. + + Options are: `'f-string'`, `'mustache'`, `'jinja2'`. partial_variables: A dictionary of variables that can be used too partially. - **kwargs: keyword arguments to pass to the constructor. + **kwargs: Keyword arguments to pass to the constructor. Returns: A new instance of this class. @@ -524,9 +538,9 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate): """Create a class from a template file. Args: - template_file: path to a template file. String or Path. + template_file: path to a template file. input_variables: list of input variables. - **kwargs: keyword arguments to pass to the constructor. + **kwargs: Keyword arguments to pass to the constructor. Returns: A new instance of this class. @@ -541,7 +555,7 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate): **kwargs: Keyword arguments to use for formatting. Returns: - List of BaseMessages. + List of `BaseMessage` objects. """ return [self.format(**kwargs)] @@ -552,7 +566,7 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate): **kwargs: Keyword arguments to use for formatting. Returns: - List of BaseMessages. + List of `BaseMessage` objects. """ return [await self.aformat(**kwargs)] @@ -647,13 +661,19 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate): class HumanMessagePromptTemplate(_StringImageMessagePromptTemplate): - """Human message prompt template. This is a message sent from the user.""" + """Human message prompt template. + + This is a message sent from the user. + """ _msg_class: type[BaseMessage] = HumanMessage class AIMessagePromptTemplate(_StringImageMessagePromptTemplate): - """AI message prompt template. This is a message sent from the AI.""" + """AI message prompt template. + + This is a message sent from the AI. + """ _msg_class: type[BaseMessage] = AIMessage @@ -679,11 +699,11 @@ class BaseChatPromptTemplate(BasePromptTemplate, ABC): """Format the chat template into a string. Args: - **kwargs: keyword arguments to use for filling in template variables - in all the template messages in this chat template. + **kwargs: Keyword arguments to use for filling in template variables in all + the template messages in this chat template. Returns: - formatted string. + Formatted string. """ return self.format_prompt(**kwargs).to_string() @@ -691,34 +711,32 @@ class BaseChatPromptTemplate(BasePromptTemplate, ABC): """Async format the chat template into a string. Args: - **kwargs: keyword arguments to use for filling in template variables - in all the template messages in this chat template. + **kwargs: Keyword arguments to use for filling in template variables in all + the template messages in this chat template. Returns: - formatted string. + Formatted string. """ return (await self.aformat_prompt(**kwargs)).to_string() def format_prompt(self, **kwargs: Any) -> ChatPromptValue: - """Format prompt. Should return a ChatPromptValue. + """Format prompt. + + Should return a `ChatPromptValue`. Args: **kwargs: Keyword arguments to use for formatting. - - Returns: - ChatPromptValue. """ messages = self.format_messages(**kwargs) return ChatPromptValue(messages=messages) async def aformat_prompt(self, **kwargs: Any) -> ChatPromptValue: - """Async format prompt. Should return a ChatPromptValue. + """Async format prompt. + + Should return a `ChatPromptValue`. Args: **kwargs: Keyword arguments to use for formatting. - - Returns: - PromptValue. """ messages = await self.aformat_messages(**kwargs) return ChatPromptValue(messages=messages) @@ -728,14 +746,14 @@ class BaseChatPromptTemplate(BasePromptTemplate, ABC): """Format kwargs into a list of messages. Returns: - List of messages. + List of `BaseMessage` objects. """ async def aformat_messages(self, **kwargs: Any) -> list[BaseMessage]: """Async format kwargs into a list of messages. Returns: - List of messages. + List of `BaseMessage` objects. """ return self.format_messages(**kwargs) @@ -773,34 +791,36 @@ class ChatPromptTemplate(BaseChatPromptTemplate): Use to create flexible templated prompts for chat models. - ```python - from langchain_core.prompts import ChatPromptTemplate + !!! example - template = ChatPromptTemplate( - [ - ("system", "You are a helpful AI bot. Your name is {name}."), - ("human", "Hello, how are you doing?"), - ("ai", "I'm doing well, thanks!"), - ("human", "{user_input}"), - ] - ) + ```python + from langchain_core.prompts import ChatPromptTemplate - prompt_value = template.invoke( - { - "name": "Bob", - "user_input": "What is your name?", - } - ) - # Output: - # ChatPromptValue( - # messages=[ - # SystemMessage(content='You are a helpful AI bot. Your name is Bob.'), - # HumanMessage(content='Hello, how are you doing?'), - # AIMessage(content="I'm doing well, thanks!"), - # HumanMessage(content='What is your name?') - # ] - # ) - ``` + template = ChatPromptTemplate( + [ + ("system", "You are a helpful AI bot. Your name is {name}."), + ("human", "Hello, how are you doing?"), + ("ai", "I'm doing well, thanks!"), + ("human", "{user_input}"), + ] + ) + + prompt_value = template.invoke( + { + "name": "Bob", + "user_input": "What is your name?", + } + ) + # Output: + # ChatPromptValue( + # messages=[ + # SystemMessage(content='You are a helpful AI bot. Your name is Bob.'), + # HumanMessage(content='Hello, how are you doing?'), + # AIMessage(content="I'm doing well, thanks!"), + # HumanMessage(content='What is your name?') + # ] + # ) + ``` !!! note "Messages Placeholder" @@ -845,8 +865,8 @@ class ChatPromptTemplate(BaseChatPromptTemplate): !!! note "Single-variable template" - If your prompt has only a single input variable (i.e., 1 instance of - "{variable_nams}"), and you invoke the template with a non-dict object, the + If your prompt has only a single input variable (i.e., one instance of + `'{variable_nams}'`), and you invoke the template with a non-dict object, the prompt template will inject the provided argument into that variable location. ```python @@ -875,6 +895,7 @@ class ChatPromptTemplate(BaseChatPromptTemplate): messages: Annotated[list[MessageLike], SkipValidation()] """List of messages consisting of either message prompt templates or messages.""" + validate_template: bool = False """Whether or not to try validating the template.""" @@ -895,10 +916,10 @@ class ChatPromptTemplate(BaseChatPromptTemplate): 1. `BaseMessagePromptTemplate` 2. `BaseMessage` 3. 2-tuple of `(message type, template)`; e.g., - `("human", "{user_input}")` + `('human', '{user_input}')` 4. 2-tuple of `(message class, template)` - 5. A string which is shorthand for `("human", template)`; e.g., - `"{user_input}"` + 5. A string which is shorthand for `('human', template)`; e.g., + `'{user_input}'` template_format: Format of the template. **kwargs: Additional keyword arguments passed to `BasePromptTemplate`, including (but not limited to): @@ -1027,8 +1048,8 @@ class ChatPromptTemplate(BaseChatPromptTemplate): def validate_input_variables(cls, values: dict) -> Any: """Validate input variables. - If input_variables is not set, it will be set to the union of - all input variables in the messages. + If `input_variables` is not set, it will be set to the union of all input + variables in the messages. Args: values: values to validate. @@ -1080,12 +1101,12 @@ class ChatPromptTemplate(BaseChatPromptTemplate): def from_template(cls, template: str, **kwargs: Any) -> ChatPromptTemplate: """Create a chat prompt template from a template string. - Creates a chat template consisting of a single message assumed to be from - the human. + Creates a chat template consisting of a single message assumed to be from the + human. Args: - template: template string - **kwargs: keyword arguments to pass to the constructor. + template: Template string + **kwargs: Keyword arguments to pass to the constructor. Returns: A new instance of this class. @@ -1133,14 +1154,14 @@ class ChatPromptTemplate(BaseChatPromptTemplate): 1. `BaseMessagePromptTemplate` 2. `BaseMessage` 3. 2-tuple of `(message type, template)`; e.g., - `("human", "{user_input}")` + `('human', '{user_input}')` 4. 2-tuple of `(message class, template)` - 5. A string which is shorthand for `("human", template)`; e.g., - `"{user_input}"` - template_format: format of the template. + 5. A string which is shorthand for `('human', template)`; e.g., + `'{user_input}'` + template_format: Format of the template. Returns: - a chat prompt template. + A chat prompt template. """ return cls(messages, template_format=template_format) @@ -1149,14 +1170,14 @@ class ChatPromptTemplate(BaseChatPromptTemplate): """Format the chat template into a list of finalized messages. Args: - **kwargs: keyword arguments to use for filling in template variables + **kwargs: Keyword arguments to use for filling in template variables in all the template messages in this chat template. Raises: - ValueError: if messages are of unexpected types. + ValueError: If messages are of unexpected types. Returns: - list of formatted messages. + List of formatted messages. """ kwargs = self._merge_partial_and_user_variables(**kwargs) result = [] @@ -1177,11 +1198,11 @@ class ChatPromptTemplate(BaseChatPromptTemplate): """Async format the chat template into a list of finalized messages. Args: - **kwargs: keyword arguments to use for filling in template variables + **kwargs: Keyword arguments to use for filling in template variables in all the template messages in this chat template. Returns: - list of formatted messages. + List of formatted messages. Raises: ValueError: If unexpected input. @@ -1202,15 +1223,15 @@ class ChatPromptTemplate(BaseChatPromptTemplate): return result def partial(self, **kwargs: Any) -> ChatPromptTemplate: - """Get a new ChatPromptTemplate with some input variables already filled in. + """Get a new `ChatPromptTemplate` with some input variables already filled in. Args: - **kwargs: keyword arguments to use for filling in template variables. Ought - to be a subset of the input variables. + **kwargs: Keyword arguments to use for filling in template variables. + + Ought to be a subset of the input variables. Returns: - A new ChatPromptTemplate. - + A new `ChatPromptTemplate`. Example: ```python @@ -1265,8 +1286,9 @@ class ChatPromptTemplate(BaseChatPromptTemplate): Returns: If index is an int, returns the message at that index. - If index is a slice, returns a new `ChatPromptTemplate` - containing the messages in that slice. + + If index is a slice, returns a new `ChatPromptTemplate` containing the + messages in that slice. """ if isinstance(index, slice): start, stop, step = index.indices(len(self.messages)) @@ -1313,12 +1335,12 @@ def _create_template_from_message_type( """Create a message prompt template from a message type and template string. Args: - message_type: str the type of the message template (e.g., "human", "ai", etc.) - template: str the template string. - template_format: format of the template. + message_type: The type of the message template (e.g., `'human'`, `'ai'`, etc.) + template: The template string. + template_format: Format of the template. Returns: - a message prompt template of the appropriate type. + A message prompt template of the appropriate type. Raises: ValueError: If unexpected message type. @@ -1388,20 +1410,20 @@ def _convert_to_message_template( ) -> BaseMessage | BaseMessagePromptTemplate | BaseChatPromptTemplate: """Instantiate a message from a variety of message formats. - The message format can be one of the following: + A message can be represented using the following formats: - - BaseMessagePromptTemplate - - BaseMessage - - 2-tuple of (role string, template); e.g., ("human", "{user_input}") - - 2-tuple of (message class, template) - - string: shorthand for ("human", template); e.g., "{user_input}" + 1. `BaseMessagePromptTemplate` + 2. `BaseMessage` + 3. 2-tuple of `(message type, template)`; e.g., `('human', '{user_input}')` + 4. 2-tuple of `(message class, template)` + 5. A string which is shorthand for `('human', template)`; e.g., `'{user_input}'` Args: - message: a representation of a message in one of the supported formats. - template_format: format of the template. + message: A representation of a message in one of the supported formats. + template_format: Format of the template. Returns: - an instance of a message or a message template. + An instance of a message or a message template. Raises: ValueError: If unexpected message type. diff --git a/libs/core/langchain_core/prompts/dict.py b/libs/core/langchain_core/prompts/dict.py index dca8d9e80a3..7fa85049dfc 100644 --- a/libs/core/langchain_core/prompts/dict.py +++ b/libs/core/langchain_core/prompts/dict.py @@ -1,4 +1,4 @@ -"""Dict prompt template.""" +"""Dictionary prompt template.""" import warnings from functools import cached_property @@ -16,10 +16,11 @@ from langchain_core.runnables.config import ensure_config class DictPromptTemplate(RunnableSerializable[dict, dict]): - """Template represented by a dict. + """Template represented by a dictionary. - Recognizes variables in f-string or mustache formatted string dict values. Does NOT - recognize variables in dict keys. Applies recursively. + Recognizes variables in f-string or mustache formatted string dict values. + + Does NOT recognize variables in dict keys. Applies recursively. """ template: dict[str, Any] diff --git a/libs/core/langchain_core/prompts/few_shot.py b/libs/core/langchain_core/prompts/few_shot.py index 86ceb03bad4..8c3310655e1 100644 --- a/libs/core/langchain_core/prompts/few_shot.py +++ b/libs/core/langchain_core/prompts/few_shot.py @@ -35,11 +35,15 @@ class _FewShotPromptTemplateMixin(BaseModel): examples: list[dict] | None = None """Examples to format into the prompt. - Either this or example_selector should be provided.""" + + Either this or `example_selector` should be provided. + """ example_selector: BaseExampleSelector | None = None - """ExampleSelector to choose the examples to format into the prompt. - Either this or examples should be provided.""" + """`ExampleSelector` to choose the examples to format into the prompt. + + Either this or `examples` should be provided. + """ model_config = ConfigDict( arbitrary_types_allowed=True, @@ -49,7 +53,7 @@ class _FewShotPromptTemplateMixin(BaseModel): @model_validator(mode="before") @classmethod def check_examples_and_selector(cls, values: dict) -> Any: - """Check that one and only one of examples/example_selector are provided. + """Check that one and only one of `examples`/`example_selector` are provided. Args: values: The values to check. @@ -58,8 +62,9 @@ class _FewShotPromptTemplateMixin(BaseModel): The values if they are valid. Raises: - ValueError: If neither or both examples and example_selector are provided. - ValueError: If both examples and example_selector are provided. + ValueError: If neither or both `examples` and `example_selector` are + provided. + ValueError: If both `examples` and `example_selector` are provided. """ examples = values.get("examples") example_selector = values.get("example_selector") @@ -83,7 +88,7 @@ class _FewShotPromptTemplateMixin(BaseModel): List of examples. Raises: - ValueError: If neither examples nor example_selector are provided. + ValueError: If neither `examples` nor `example_selector` are provided. """ if self.examples is not None: return self.examples @@ -102,7 +107,7 @@ class _FewShotPromptTemplateMixin(BaseModel): List of examples. Raises: - ValueError: If neither examples nor example_selector are provided. + ValueError: If neither `examples` nor `example_selector` are provided. """ if self.examples is not None: return self.examples @@ -117,14 +122,14 @@ class FewShotPromptTemplate(_FewShotPromptTemplateMixin, StringPromptTemplate): @classmethod def is_lc_serializable(cls) -> bool: - """Return False as this class is not serializable.""" + """Return `False` as this class is not serializable.""" return False validate_template: bool = False """Whether or not to try validating the template.""" example_prompt: PromptTemplate - """PromptTemplate used to format an individual example.""" + """`PromptTemplate` used to format an individual example.""" suffix: str """A prompt template string to put after the examples.""" @@ -136,7 +141,10 @@ class FewShotPromptTemplate(_FewShotPromptTemplateMixin, StringPromptTemplate): """A prompt template string to put before the examples.""" template_format: Literal["f-string", "jinja2"] = "f-string" - """The format of the prompt template. Options are: 'f-string', 'jinja2'.""" + """The format of the prompt template. + + Options are: `'f-string'`, `'jinja2'`. + """ def __init__(self, **kwargs: Any) -> None: """Initialize the few shot prompt template.""" @@ -174,7 +182,7 @@ class FewShotPromptTemplate(_FewShotPromptTemplateMixin, StringPromptTemplate): Use this method to generate a string representation of a prompt. Args: - **kwargs: keyword arguments to use for formatting. + **kwargs: Keyword arguments to use for formatting. Returns: A string representation of the prompt. @@ -202,7 +210,7 @@ class FewShotPromptTemplate(_FewShotPromptTemplateMixin, StringPromptTemplate): Use this method to generate a string representation of a prompt. Args: - **kwargs: keyword arguments to use for formatting. + **kwargs: Keyword arguments to use for formatting. Returns: A string representation of the prompt. @@ -236,7 +244,7 @@ class FewShotPromptTemplate(_FewShotPromptTemplateMixin, StringPromptTemplate): file_path: The path to save the prompt template to. Raises: - ValueError: If example_selector is provided. + ValueError: If `example_selector` is provided. """ if self.example_selector: msg = "Saving an example selector is not currently supported" @@ -254,15 +262,22 @@ class FewShotChatMessagePromptTemplate( This structure enables creating a conversation with intermediate examples like: - System: You are a helpful AI Assistant - Human: What is 2+2? - AI: 4 - Human: What is 2+3? - AI: 5 - Human: What is 4+4? + ```txt + System: You are a helpful AI Assistant - This prompt template can be used to generate a fixed list of examples or else - to dynamically select examples based on the input. + Human: What is 2+2? + + AI: 4 + + Human: What is 2+3? + + AI: 5 + + Human: What is 4+4? + ``` + + This prompt template can be used to generate a fixed list of examples or else to + dynamically select examples based on the input. Examples: Prompt template with a fixed list of examples (matching the sample @@ -355,15 +370,16 @@ class FewShotChatMessagePromptTemplate( """ input_variables: list[str] = Field(default_factory=list) - """A list of the names of the variables the prompt template will use - to pass to the example_selector, if provided.""" + """A list of the names of the variables the prompt template will use to pass to + the `example_selector`, if provided. + """ example_prompt: BaseMessagePromptTemplate | BaseChatPromptTemplate """The class to format each example.""" @classmethod def is_lc_serializable(cls) -> bool: - """Return False as this class is not serializable.""" + """Return `False` as this class is not serializable.""" return False model_config = ConfigDict( @@ -375,7 +391,7 @@ class FewShotChatMessagePromptTemplate( """Format kwargs into a list of messages. Args: - **kwargs: keyword arguments to use for filling in templates in messages. + **kwargs: Keyword arguments to use for filling in templates in messages. Returns: A list of formatted messages with all template variables filled in. @@ -396,7 +412,7 @@ class FewShotChatMessagePromptTemplate( """Async format kwargs into a list of messages. Args: - **kwargs: keyword arguments to use for filling in templates in messages. + **kwargs: Keyword arguments to use for filling in templates in messages. Returns: A list of formatted messages with all template variables filled in. @@ -416,13 +432,13 @@ class FewShotChatMessagePromptTemplate( def format(self, **kwargs: Any) -> str: """Format the prompt with inputs generating a string. - Use this method to generate a string representation of a prompt consisting - of chat messages. + Use this method to generate a string representation of a prompt consisting of + chat messages. Useful for feeding into a string-based completion language model or debugging. Args: - **kwargs: keyword arguments to use for formatting. + **kwargs: Keyword arguments to use for formatting. Returns: A string representation of the prompt @@ -433,13 +449,13 @@ class FewShotChatMessagePromptTemplate( async def aformat(self, **kwargs: Any) -> str: """Async format the prompt with inputs generating a string. - Use this method to generate a string representation of a prompt consisting - of chat messages. + Use this method to generate a string representation of a prompt consisting of + chat messages. Useful for feeding into a string-based completion language model or debugging. Args: - **kwargs: keyword arguments to use for formatting. + **kwargs: Keyword arguments to use for formatting. Returns: A string representation of the prompt diff --git a/libs/core/langchain_core/prompts/few_shot_with_templates.py b/libs/core/langchain_core/prompts/few_shot_with_templates.py index 1df73bc3229..cd2d26b7aeb 100644 --- a/libs/core/langchain_core/prompts/few_shot_with_templates.py +++ b/libs/core/langchain_core/prompts/few_shot_with_templates.py @@ -20,27 +20,33 @@ class FewShotPromptWithTemplates(StringPromptTemplate): examples: list[dict] | None = None """Examples to format into the prompt. - Either this or example_selector should be provided.""" + + Either this or `example_selector` should be provided. + """ example_selector: BaseExampleSelector | None = None - """ExampleSelector to choose the examples to format into the prompt. - Either this or examples should be provided.""" + """`ExampleSelector` to choose the examples to format into the prompt. + + Either this or `examples` should be provided. + """ example_prompt: PromptTemplate - """PromptTemplate used to format an individual example.""" + """`PromptTemplate` used to format an individual example.""" suffix: StringPromptTemplate - """A PromptTemplate to put after the examples.""" + """A `PromptTemplate` to put after the examples.""" example_separator: str = "\n\n" """String separator used to join the prefix, the examples, and suffix.""" prefix: StringPromptTemplate | None = None - """A PromptTemplate to put before the examples.""" + """A `PromptTemplate` to put before the examples.""" template_format: PromptTemplateFormat = "f-string" """The format of the prompt template. - Options are: 'f-string', 'jinja2', 'mustache'.""" + + Options are: `'f-string'`, `'jinja2'`, `'mustache'`. + """ validate_template: bool = False """Whether or not to try validating the template.""" @@ -123,9 +129,9 @@ class FewShotPromptWithTemplates(StringPromptTemplate): A formatted string. Example: - ```python - prompt.format(variable1="foo") - ``` + ```python + prompt.format(variable1="foo") + ``` """ kwargs = self._merge_partial_and_user_variables(**kwargs) # Get the examples to use. @@ -216,7 +222,7 @@ class FewShotPromptWithTemplates(StringPromptTemplate): file_path: The path to save the prompt to. Raises: - ValueError: If example_selector is provided. + ValueError: If `example_selector` is provided. """ if self.example_selector: msg = "Saving an example selector is not currently supported" diff --git a/libs/core/langchain_core/prompts/image.py b/libs/core/langchain_core/prompts/image.py index bdd3db31236..017bd48e21c 100644 --- a/libs/core/langchain_core/prompts/image.py +++ b/libs/core/langchain_core/prompts/image.py @@ -18,9 +18,12 @@ class ImagePromptTemplate(BasePromptTemplate[ImageURL]): template: dict = Field(default_factory=dict) """Template for the prompt.""" + template_format: PromptTemplateFormat = "f-string" """The format of the prompt template. - Options are: 'f-string', 'mustache', 'jinja2'.""" + + Options are: `'f-string'`, `'mustache'`, `'jinja2'`. + """ def __init__(self, **kwargs: Any) -> None: """Create an image prompt template. diff --git a/libs/core/langchain_core/prompts/loading.py b/libs/core/langchain_core/prompts/loading.py index c1a95f63d36..66dcba0abc9 100644 --- a/libs/core/langchain_core/prompts/loading.py +++ b/libs/core/langchain_core/prompts/loading.py @@ -18,13 +18,13 @@ logger = logging.getLogger(__name__) def load_prompt_from_config(config: dict) -> BasePromptTemplate: - """Load prompt from Config Dict. + """Load prompt from config dict. Args: config: Dict containing the prompt configuration. Returns: - A PromptTemplate object. + A `PromptTemplate` object. Raises: ValueError: If the prompt type is not supported. @@ -135,17 +135,17 @@ def _load_prompt(config: dict) -> PromptTemplate: def load_prompt(path: str | Path, encoding: str | None = None) -> BasePromptTemplate: - """Unified method for loading a prompt from LangChainHub or local fs. + """Unified method for loading a prompt from LangChainHub or local filesystem. Args: path: Path to the prompt file. encoding: Encoding of the file. Returns: - A PromptTemplate object. + A `PromptTemplate` object. Raises: - RuntimeError: If the path is a Lang Chain Hub path. + RuntimeError: If the path is a LangChainHub path. """ if isinstance(path, str) and path.startswith("lc://"): msg = ( diff --git a/libs/core/langchain_core/prompts/message.py b/libs/core/langchain_core/prompts/message.py index ae0ccc03530..ce20a493018 100644 --- a/libs/core/langchain_core/prompts/message.py +++ b/libs/core/langchain_core/prompts/message.py @@ -32,7 +32,9 @@ class BaseMessagePromptTemplate(Serializable, ABC): @abstractmethod def format_messages(self, **kwargs: Any) -> list[BaseMessage]: - """Format messages from kwargs. Should return a list of `BaseMessage` objects. + """Format messages from kwargs. + + Should return a list of `BaseMessage` objects. Args: **kwargs: Keyword arguments to use for formatting. diff --git a/libs/core/langchain_core/prompts/prompt.py b/libs/core/langchain_core/prompts/prompt.py index 6d486dc3aea..cef55a5c2ff 100644 --- a/libs/core/langchain_core/prompts/prompt.py +++ b/libs/core/langchain_core/prompts/prompt.py @@ -27,21 +27,22 @@ class PromptTemplate(StringPromptTemplate): A prompt template consists of a string template. It accepts a set of parameters from the user that can be used to generate a prompt for a language model. - The template can be formatted using either f-strings (default), jinja2, - or mustache syntax. + The template can be formatted using either f-strings (default), jinja2, or mustache + syntax. - *Security warning*: - Prefer using `template_format="f-string"` instead of - `template_format="jinja2"`, or make sure to NEVER accept jinja2 templates - from untrusted sources as they may lead to arbitrary Python code execution. + !!! warning "Security" - As of LangChain 0.0.329, Jinja2 templates will be rendered using - Jinja2's SandboxedEnvironment by default. This sand-boxing should - be treated as a best-effort approach rather than a guarantee of security, - as it is an opt-out rather than opt-in approach. + Prefer using `template_format='f-string'` instead of `template_format='jinja2'`, + or make sure to NEVER accept jinja2 templates from untrusted sources as they may + lead to arbitrary Python code execution. - Despite the sand-boxing, we recommend to never use jinja2 templates - from untrusted sources. + As of LangChain 0.0.329, Jinja2 templates will be rendered using Jinja2's + SandboxedEnvironment by default. This sand-boxing should be treated as a + best-effort approach rather than a guarantee of security, as it is an opt-out + rather than opt-in approach. + + Despite the sandboxing, we recommend to never use jinja2 templates from + untrusted sources. Example: ```python @@ -78,7 +79,9 @@ class PromptTemplate(StringPromptTemplate): template_format: PromptTemplateFormat = "f-string" """The format of the prompt template. - Options are: 'f-string', 'mustache', 'jinja2'.""" + + Options are: `'f-string'`, `'mustache'`, `'jinja2'`. + """ validate_template: bool = False """Whether or not to try validating the template.""" @@ -137,7 +140,7 @@ class PromptTemplate(StringPromptTemplate): return mustache_schema(self.template) def __add__(self, other: Any) -> PromptTemplate: - """Override the + operator to allow for combining prompt templates. + """Override the `+` operator to allow for combining prompt templates. Raises: ValueError: If the template formats are not f-string or if there are @@ -213,14 +216,15 @@ class PromptTemplate(StringPromptTemplate): Args: examples: List of examples to use in the prompt. - suffix: String to go after the list of examples. Should generally - set up the user's input. - input_variables: A list of variable names the final prompt template - will expect. - example_separator: The separator to use in between examples. Defaults - to two new line characters. - prefix: String that should go before any examples. Generally includes - examples. + suffix: String to go after the list of examples. + + Should generally set up the user's input. + input_variables: A list of variable names the final prompt template will + expect. + example_separator: The separator to use in between examples. + prefix: String that should go before any examples. + + Generally includes examples. Returns: The final prompt generated. @@ -240,6 +244,7 @@ class PromptTemplate(StringPromptTemplate): Args: template_file: The path to the file containing the prompt template. encoding: The encoding system for opening the template file. + If not provided, will use the OS default. Returns: @@ -259,28 +264,32 @@ class PromptTemplate(StringPromptTemplate): ) -> PromptTemplate: """Load a prompt template from a template. - *Security warning*: - Prefer using `template_format="f-string"` instead of - `template_format="jinja2"`, or make sure to NEVER accept jinja2 templates + !!! warning "Security" + + Prefer using `template_format='f-string'` instead of + `template_format='jinja2'`, or make sure to NEVER accept jinja2 templates from untrusted sources as they may lead to arbitrary Python code execution. - As of LangChain 0.0.329, Jinja2 templates will be rendered using - Jinja2's SandboxedEnvironment by default. This sand-boxing should - be treated as a best-effort approach rather than a guarantee of security, - as it is an opt-out rather than opt-in approach. + As of LangChain 0.0.329, Jinja2 templates will be rendered using Jinja2's + SandboxedEnvironment by default. This sand-boxing should be treated as a + best-effort approach rather than a guarantee of security, as it is an + opt-out rather than opt-in approach. - Despite the sand-boxing, we recommend never using jinja2 templates - from untrusted sources. + Despite the sandboxing, we recommend to never use jinja2 templates from + untrusted sources. Args: template: The template to load. - template_format: The format of the template. Use `jinja2` for jinja2, - `mustache` for mustache, and `f-string` for f-strings. + template_format: The format of the template. + + Use `jinja2` for jinja2, `mustache` for mustache, and `f-string` for + f-strings. partial_variables: A dictionary of variables that can be used to partially - fill in the template. For example, if the template is - `"{variable1} {variable2}"`, and `partial_variables` is - `{"variable1": "foo"}`, then the final prompt will be - `"foo {variable2}"`. + fill in the template. + + For example, if the template is `'{variable1} {variable2}'`, and + `partial_variables` is `{"variable1": "foo"}`, then the final prompt + will be `'foo {variable2}'`. **kwargs: Any other arguments to pass to the prompt template. Returns: diff --git a/libs/core/langchain_core/prompts/string.py b/libs/core/langchain_core/prompts/string.py index bb255f645a9..e0eb642306a 100644 --- a/libs/core/langchain_core/prompts/string.py +++ b/libs/core/langchain_core/prompts/string.py @@ -1,4 +1,4 @@ -"""BasePrompt schema definition.""" +"""`BasePrompt` schema definition.""" from __future__ import annotations @@ -33,14 +33,16 @@ PromptTemplateFormat = Literal["f-string", "mustache", "jinja2"] def jinja2_formatter(template: str, /, **kwargs: Any) -> str: """Format a template using jinja2. - *Security warning*: - As of LangChain 0.0.329, this method uses Jinja2's - SandboxedEnvironment by default. However, this sand-boxing should - be treated as a best-effort approach rather than a guarantee of security. + !!! warning "Security" + + As of LangChain 0.0.329, this method uses Jinja2's `SandboxedEnvironment` by + default. However, this sandboxing should be treated as a best-effort approach + rather than a guarantee of security. + Do not accept jinja2 templates from untrusted sources as they may lead to arbitrary Python code execution. - https://jinja.palletsprojects.com/en/3.1.x/sandbox/ + [More information.](https://jinja.palletsprojects.com/en/3.1.x/sandbox/) Args: template: The template string. @@ -123,14 +125,14 @@ def mustache_template_vars( ) -> set[str]: """Get the top-level variables from a mustache template. - For nested variables like `{{person.name}}`, only the top-level - key (`person`) is returned. + For nested variables like `{{person.name}}`, only the top-level key (`person`) is + returned. Args: template: The template string. Returns: - The top-level variables from the template. + The top-level variables from the template. """ variables: set[str] = set() section_depth = 0 @@ -222,7 +224,9 @@ def check_valid_template( Args: template: The template string. - template_format: The template format. Should be one of "f-string" or "jinja2". + template_format: The template format. + + Should be one of `'f-string'` or `'jinja2'`. input_variables: The input variables. Raises: @@ -252,7 +256,9 @@ def get_template_variables(template: str, template_format: str) -> list[str]: Args: template: The template string. - template_format: The template format. Should be one of "f-string" or "jinja2". + template_format: The template format. + + Should be one of `'f-string'` or `'jinja2'`. Returns: The variables from the template. @@ -366,7 +372,7 @@ class StringPromptTemplate(BasePromptTemplate, ABC): def is_subsequence(child: Sequence, parent: Sequence) -> bool: - """Return True if child is subsequence of parent.""" + """Return `True` if child is subsequence of parent.""" if len(child) == 0 or len(parent) == 0: return False if len(parent) < len(child): diff --git a/libs/core/langchain_core/prompts/structured.py b/libs/core/langchain_core/prompts/structured.py index 7adb673487b..00ac407fb72 100644 --- a/libs/core/langchain_core/prompts/structured.py +++ b/libs/core/langchain_core/prompts/structured.py @@ -30,6 +30,7 @@ class StructuredPrompt(ChatPromptTemplate): schema_: dict | type """Schema for the structured prompt.""" + structured_output_kwargs: dict[str, Any] = Field(default_factory=dict) def __init__( @@ -44,13 +45,13 @@ class StructuredPrompt(ChatPromptTemplate): """Create a structured prompt template. Args: - messages: sequence of messages. - schema_: schema for the structured prompt. - structured_output_kwargs: additional kwargs for structured output. - template_format: template format for the prompt. + messages: Sequence of messages. + schema_: Schema for the structured prompt. + structured_output_kwargs: Additional kwargs for structured output. + template_format: Template format for the prompt. Raises: - ValueError: if schema is not provided. + ValueError: If schema is not provided. """ schema_ = schema_ or kwargs.pop("schema", None) if not schema_: @@ -74,8 +75,8 @@ class StructuredPrompt(ChatPromptTemplate): def get_lc_namespace(cls) -> list[str]: """Get the namespace of the LangChain object. - For example, if the class is `langchain.llms.openai.OpenAI`, then the - namespace is `["langchain", "llms", "openai"]` + For example, if the class is `langchain.llms.openai.OpenAI`, then the namespace + is `["langchain", "llms", "openai"]` Returns: The namespace of the LangChain object. @@ -112,6 +113,7 @@ class StructuredPrompt(ChatPromptTemplate): OutputSchema, ) ``` + Args: messages: Sequence of message representations. @@ -160,11 +162,11 @@ class StructuredPrompt(ChatPromptTemplate): name: The name of the pipeline. Returns: - A RunnableSequence object. + A `RunnableSequence` object. Raises: - NotImplementedError: If the first element of `others` - is not a language model. + NotImplementedError: If the first element of `others` is not a language + model. """ if (others and isinstance(others[0], BaseLanguageModel)) or hasattr( others[0], "with_structured_output" diff --git a/libs/core/langchain_core/rate_limiters.py b/libs/core/langchain_core/rate_limiters.py index 986d3ff63b7..8bc45b6b5b0 100644 --- a/libs/core/langchain_core/rate_limiters.py +++ b/libs/core/langchain_core/rate_limiters.py @@ -147,14 +147,19 @@ class InMemoryRateLimiter(BaseRateLimiter): """ # Number of requests that we can make per second. self.requests_per_second = requests_per_second + # Number of tokens in the bucket. self.available_tokens = 0.0 + self.max_bucket_size = max_bucket_size + # A lock to ensure that tokens can only be consumed by one thread # at a given time. self._consume_lock = threading.Lock() + # The last time we tried to consume tokens. self.last: float | None = None + self.check_every_n_seconds = check_every_n_seconds def _consume(self) -> bool: @@ -211,6 +216,7 @@ class InMemoryRateLimiter(BaseRateLimiter): while not self._consume(): time.sleep(self.check_every_n_seconds) + return True async def aacquire(self, *, blocking: bool = True) -> bool: diff --git a/libs/core/langchain_core/retrievers.py b/libs/core/langchain_core/retrievers.py index 7be6df9a727..caaad5005af 100644 --- a/libs/core/langchain_core/retrievers.py +++ b/libs/core/langchain_core/retrievers.py @@ -1,4 +1,4 @@ -"""**Retriever** class returns Documents given a text **query**. +"""**Retriever** class returns `Document` objects given a text **query**. It is more general than a vector store. A retriever does not need to be able to store documents, only to return (or retrieve) it. Vector stores can be used as @@ -41,10 +41,13 @@ class LangSmithRetrieverParams(TypedDict, total=False): ls_retriever_name: str """Retriever name.""" + ls_vector_store_provider: str | None """Vector store provider.""" + ls_embedding_provider: str | None """Embedding provider.""" + ls_embedding_model: str | None """Embedding model.""" @@ -116,7 +119,9 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC): ) _new_arg_supported: bool = False + _expects_other_args: bool = False + tags: list[str] | None = None """Optional list of tags associated with the retriever. @@ -126,6 +131,7 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC): You can use these to eg identify a specific instance of a retriever with its use case. """ + metadata: dict[str, Any] | None = None """Optional metadata associated with the retriever. diff --git a/libs/core/langchain_core/runnables/__init__.py b/libs/core/langchain_core/runnables/__init__.py index f12a0baf697..70306d89182 100644 --- a/libs/core/langchain_core/runnables/__init__.py +++ b/libs/core/langchain_core/runnables/__init__.py @@ -3,18 +3,19 @@ The LangChain Expression Language (LCEL) offers a declarative method to build production-grade programs that harness the power of LLMs. -Programs created using LCEL and LangChain Runnables inherently support -synchronous, asynchronous, batch, and streaming operations. +Programs created using LCEL and LangChain `Runnable` objects inherently suppor +synchronous asynchronous, batch, and streaming operations. -Support for **async** allows servers hosting LCEL based programs to scale better -for higher concurrent loads. +Support for **async** allows servers hosting LCEL based programs to scale bette for +higher concurrent loads. **Batch** operations allow for processing multiple inputs in parallel. -**Streaming** of intermediate outputs, as they're being generated, allows for -creating more responsive UX. +**Streaming** of intermediate outputs, as they're being generated, allows for creating +more responsive UX. -This module contains schema and implementation of LangChain Runnables primitives. +This module contains schema and implementation of LangChain `Runnable` object +primitives. """ from typing import TYPE_CHECKING diff --git a/libs/core/langchain_core/runnables/base.py b/libs/core/langchain_core/runnables/base.py index 9bd63e3bca5..e95b7c5f04a 100644 --- a/libs/core/langchain_core/runnables/base.py +++ b/libs/core/langchain_core/runnables/base.py @@ -1,4 +1,4 @@ -"""Base classes and utilities for `Runnable`s.""" +"""Base classes and utilities for `Runnable` objects.""" from __future__ import annotations diff --git a/libs/core/langchain_core/runnables/config.py b/libs/core/langchain_core/runnables/config.py index 3079f82d943..f24f5603170 100644 --- a/libs/core/langchain_core/runnables/config.py +++ b/libs/core/langchain_core/runnables/config.py @@ -1,4 +1,4 @@ -"""Configuration utilities for Runnables.""" +"""Configuration utilities for `Runnable` objects.""" from __future__ import annotations diff --git a/libs/core/langchain_core/runnables/fallbacks.py b/libs/core/langchain_core/runnables/fallbacks.py index 8d8a3d7524e..0fcacb4420d 100644 --- a/libs/core/langchain_core/runnables/fallbacks.py +++ b/libs/core/langchain_core/runnables/fallbacks.py @@ -1,4 +1,4 @@ -"""Runnable that can fallback to other Runnables if it fails.""" +"""`Runnable` that can fallback to other `Runnable` objects if it fails.""" import asyncio import inspect @@ -34,7 +34,7 @@ if TYPE_CHECKING: class RunnableWithFallbacks(RunnableSerializable[Input, Output]): - """`Runnable` that can fallback to other `Runnable`s if it fails. + """`Runnable` that can fallback to other `Runnable` objects if it fails. External APIs (e.g., APIs for a language model) may at times experience degraded performance or even downtime. diff --git a/libs/core/langchain_core/runnables/graph.py b/libs/core/langchain_core/runnables/graph.py index 7e9ead54996..cdab7d48846 100644 --- a/libs/core/langchain_core/runnables/graph.py +++ b/libs/core/langchain_core/runnables/graph.py @@ -1,4 +1,4 @@ -"""Graph used in Runnables.""" +"""Graph used in `Runnable` objects.""" from __future__ import annotations diff --git a/libs/core/langchain_core/runnables/history.py b/libs/core/langchain_core/runnables/history.py index 73c65fe7879..2521cfae753 100644 --- a/libs/core/langchain_core/runnables/history.py +++ b/libs/core/langchain_core/runnables/history.py @@ -1,4 +1,4 @@ -"""Runnable that manages chat message history for another Runnable.""" +"""`Runnable` that manages chat message history for another `Runnable`.""" from __future__ import annotations diff --git a/libs/core/langchain_core/runnables/passthrough.py b/libs/core/langchain_core/runnables/passthrough.py index b27e3d4af58..f5e01cfe20f 100644 --- a/libs/core/langchain_core/runnables/passthrough.py +++ b/libs/core/langchain_core/runnables/passthrough.py @@ -1,4 +1,4 @@ -"""Implementation of the RunnablePassthrough.""" +"""Implementation of the `RunnablePassthrough`.""" from __future__ import annotations diff --git a/libs/core/langchain_core/runnables/retry.py b/libs/core/langchain_core/runnables/retry.py index 03466718274..6b9f5fef2de 100644 --- a/libs/core/langchain_core/runnables/retry.py +++ b/libs/core/langchain_core/runnables/retry.py @@ -1,4 +1,4 @@ -"""Runnable that retries a Runnable if it fails.""" +"""`Runnable` that retries a `Runnable` if it fails.""" from typing import ( TYPE_CHECKING, diff --git a/libs/core/langchain_core/runnables/router.py b/libs/core/langchain_core/runnables/router.py index 4b62beba0c3..a6341da1c16 100644 --- a/libs/core/langchain_core/runnables/router.py +++ b/libs/core/langchain_core/runnables/router.py @@ -1,4 +1,4 @@ -"""Runnable that routes to a set of Runnables.""" +"""`Runnable` that routes to a set of `Runnable` objects.""" from __future__ import annotations diff --git a/libs/core/langchain_core/runnables/utils.py b/libs/core/langchain_core/runnables/utils.py index 103226c4993..ce91e44116f 100644 --- a/libs/core/langchain_core/runnables/utils.py +++ b/libs/core/langchain_core/runnables/utils.py @@ -1,4 +1,4 @@ -"""Utility code for runnables.""" +"""Utility code for `Runnable` objects.""" from __future__ import annotations diff --git a/libs/core/langchain_core/stores.py b/libs/core/langchain_core/stores.py index e133b9a226d..080fe03225c 100644 --- a/libs/core/langchain_core/stores.py +++ b/libs/core/langchain_core/stores.py @@ -26,25 +26,25 @@ V = TypeVar("V") class BaseStore(ABC, Generic[K, V]): """Abstract interface for a key-value store. - This is an interface that's meant to abstract away the details of - different key-value stores. It provides a simple interface for - getting, setting, and deleting key-value pairs. + This is an interface that's meant to abstract away the details of different + key-value stores. It provides a simple interface for getting, setting, and deleting + key-value pairs. - The basic methods are `mget`, `mset`, and `mdelete` for getting, - setting, and deleting multiple key-value pairs at once. The `yield_keys` - method is used to iterate over keys that match a given prefix. + The basic methods are `mget`, `mset`, and `mdelete` for getting, setting, and + deleting multiple key-value pairs at once. The `yield_keys` method is used to + iterate over keys that match a given prefix. - The async versions of these methods are also provided, which are - meant to be used in async contexts. The async methods are named with - an `a` prefix, e.g., `amget`, `amset`, `amdelete`, and `ayield_keys`. + The async versions of these methods are also provided, which are meant to be used in + async contexts. The async methods are named with an `a` prefix, e.g., `amget`, + `amset`, `amdelete`, and `ayield_keys`. - By default, the `amget`, `amset`, `amdelete`, and `ayield_keys` methods - are implemented using the synchronous methods. If the store can natively - support async operations, it should override these methods. + By default, the `amget`, `amset`, `amdelete`, and `ayield_keys` methods are + implemented using the synchronous methods. If the store can natively support async + operations, it should override these methods. - By design the methods only accept batches of keys and values, and not - single keys or values. This is done to force user code to work with batches - which will usually be more efficient by saving on round trips to the store. + By design the methods only accept batches of keys and values, and not single keys or + values. This is done to force user code to work with batches which will usually be + more efficient by saving on round trips to the store. Examples: ```python @@ -86,7 +86,7 @@ class BaseStore(ABC, Generic[K, V]): Returns: A sequence of optional values associated with the keys. - If a key is not found, the corresponding value will be `None`. + If a key is not found, the corresponding value will be `None`. """ async def amget(self, keys: Sequence[K]) -> list[V | None]: @@ -97,7 +97,7 @@ class BaseStore(ABC, Generic[K, V]): Returns: A sequence of optional values associated with the keys. - If a key is not found, the corresponding value will be `None`. + If a key is not found, the corresponding value will be `None`. """ return await run_in_executor(None, self.mget, keys) @@ -142,8 +142,9 @@ class BaseStore(ABC, Generic[K, V]): Yields: An iterator over keys that match the given prefix. - This method is allowed to return an iterator over either K or str - depending on what makes more sense for the given store. + + This method is allowed to return an iterator over either K or str + depending on what makes more sense for the given store. """ async def ayield_keys( @@ -156,8 +157,9 @@ class BaseStore(ABC, Generic[K, V]): Yields: The keys that match the given prefix. - This method is allowed to return an iterator over either K or str - depending on what makes more sense for the given store. + + This method is allowed to return an iterator over either K or str + depending on what makes more sense for the given store. """ iterator = await run_in_executor(None, self.yield_keys, prefix=prefix) done = object() @@ -172,7 +174,7 @@ ByteStore = BaseStore[str, bytes] class InMemoryBaseStore(BaseStore[str, V], Generic[V]): - """In-memory implementation of the BaseStore using a dictionary.""" + """In-memory implementation of the `BaseStore` using a dictionary.""" def __init__(self) -> None: """Initialize an empty store.""" diff --git a/libs/core/langchain_core/structured_query.py b/libs/core/langchain_core/structured_query.py index 664b85b50e2..84dcd9a5c28 100644 --- a/libs/core/langchain_core/structured_query.py +++ b/libs/core/langchain_core/structured_query.py @@ -17,6 +17,7 @@ class Visitor(ABC): allowed_comparators: Sequence[Comparator] | None = None """Allowed comparators for the visitor.""" + allowed_operators: Sequence[Operator] | None = None """Allowed operators for the visitor.""" @@ -127,8 +128,10 @@ class Comparison(FilterDirective): comparator: Comparator """The comparator to use.""" + attribute: str """The attribute to compare.""" + value: Any """The value to compare to.""" @@ -153,6 +156,7 @@ class Operation(FilterDirective): operator: Operator """The operator to use.""" + arguments: list[FilterDirective] """The arguments to the operator.""" @@ -174,8 +178,10 @@ class StructuredQuery(Expr): query: str """Query string.""" + filter: FilterDirective | None """Filtering expression.""" + limit: int | None """Limit on the number of results.""" diff --git a/libs/core/langchain_core/tools/__init__.py b/libs/core/langchain_core/tools/__init__.py index 3d4642dc4be..ecfbd5ef8de 100644 --- a/libs/core/langchain_core/tools/__init__.py +++ b/libs/core/langchain_core/tools/__init__.py @@ -1,7 +1,7 @@ -"""**Tools** are classes that an Agent uses to interact with the world. +"""Tools are classes that an Agent uses to interact with the world. -Each tool has a **description**. Agent uses the description to choose the right -tool for the job. +Each tool has a description. Agent uses the description to choose the righ tool for the +job. """ from __future__ import annotations diff --git a/libs/core/langchain_core/tools/base.py b/libs/core/langchain_core/tools/base.py index e4c7cb3552c..8eb2ba66435 100644 --- a/libs/core/langchain_core/tools/base.py +++ b/libs/core/langchain_core/tools/base.py @@ -87,23 +87,23 @@ _logger = logging.getLogger(__name__) class SchemaAnnotationError(TypeError): - """Raised when args_schema is missing or has an incorrect type annotation.""" + """Raised when `args_schema` is missing or has an incorrect type annotation.""" def _is_annotated_type(typ: type[Any]) -> bool: - """Check if a type is an Annotated type. + """Check if a type is an `Annotated` type. Args: typ: The type to check. Returns: - `True` if the type is an Annotated type, `False` otherwise. + `True` if the type is an `Annotated` type, `False` otherwise. """ return get_origin(typ) in {typing.Annotated, typing_extensions.Annotated} def _get_annotation_description(arg_type: type) -> str | None: - """Extract description from an Annotated type. + """Extract description from an `Annotated` type. Checks for string annotations and `FieldInfo` objects with descriptions. @@ -111,7 +111,7 @@ def _get_annotation_description(arg_type: type) -> str | None: arg_type: The type to extract description from. Returns: - The description string if found, None otherwise. + The description string if found, `None` otherwise. """ if _is_annotated_type(arg_type): annotated_args = get_args(arg_type) @@ -232,7 +232,7 @@ def _is_pydantic_annotation(annotation: Any, pydantic_version: str = "v2") -> bo Args: annotation: The type annotation to check. - pydantic_version: The Pydantic version to check against ("v1" or "v2"). + pydantic_version: The Pydantic version to check against (`'v1'` or `'v2'`). Returns: `True` if the annotation is a Pydantic model, `False` otherwise. @@ -247,17 +247,17 @@ def _is_pydantic_annotation(annotation: Any, pydantic_version: str = "v2") -> bo def _function_annotations_are_pydantic_v1( signature: inspect.Signature, func: Callable ) -> bool: - """Check if all Pydantic annotations in a function are from V1. + """Check if all Pydantic annotations in a function are from v1. Args: signature: The function signature to check. func: The function being checked. Returns: - True if all Pydantic annotations are from V1, `False` otherwise. + True if all Pydantic annotations are from v1, `False` otherwise. Raises: - NotImplementedError: If the function contains mixed V1 and V2 annotations. + NotImplementedError: If the function contains mixed v1 and v2 annotations. """ any_v1_annotations = any( _is_pydantic_annotation(parameter.annotation, pydantic_version="v1") @@ -281,6 +281,7 @@ class _SchemaConfig: extra: str = "forbid" """Whether to allow extra fields in the model.""" + arbitrary_types_allowed: bool = True """Whether to allow arbitrary types in the model.""" @@ -300,14 +301,16 @@ def create_schema_from_function( model_name: Name to assign to the generated Pydantic schema. func: Function to generate the schema from. filter_args: Optional list of arguments to exclude from the schema. + Defaults to `FILTERED_ARGS`. parse_docstring: Whether to parse the function's docstring for descriptions for each argument. - error_on_invalid_docstring: if `parse_docstring` is provided, configure + error_on_invalid_docstring: If `parse_docstring` is provided, configure whether to raise `ValueError` on invalid Google Style docstrings. include_injected: Whether to include injected arguments in the schema. - Defaults to `True`, since we want to include them in the schema - when *validating* tool inputs. + + Defaults to `True`, since we want to include them in the schema when + *validating* tool inputs. Returns: A Pydantic model with the same arguments as the function. @@ -388,8 +391,9 @@ class ToolException(Exception): # noqa: N818 """Exception thrown when a tool execution error occurs. This exception allows tools to signal errors without stopping the agent. - The error is handled according to the tool's handle_tool_error setting, - and the result is returned as an observation to the agent. + + The error is handled according to the tool's `handle_tool_error` setting, and the + result is returned as an observation to the agent. """ @@ -441,6 +445,7 @@ class ChildTool(BaseTool): name: str """The unique name of the tool that clearly communicates its purpose.""" + description: str """Used to tell the model how/when/why to use the tool. @@ -458,12 +463,14 @@ class ChildTool(BaseTool): - A subclass of `pydantic.v1.BaseModel` if accessing v1 namespace in pydantic 2 - A JSON schema dict """ + return_direct: bool = False """Whether to return the tool's output directly. Setting this to `True` means that after the tool is called, the `AgentExecutor` will stop looping. """ + verbose: bool = False """Whether to log the tool's progress.""" @@ -479,14 +486,14 @@ class ChildTool(BaseTool): You can use these to, e.g., identify a specific instance of a tool with its use case. """ + metadata: dict[str, Any] | None = None """Optional metadata associated with the tool. This metadata will be associated with each call to this tool, and passed as arguments to the handlers defined in `callbacks`. - You can use these to, e.g., identify a specific instance of a tool with its use - case. + You can use these to, e.g., identify a specific instance of a tool with its usecase. """ handle_tool_error: bool | str | Callable[[ToolException], str] | None = False @@ -603,7 +610,7 @@ class ChildTool(BaseTool): @functools.cached_property def _injected_args_keys(self) -> frozenset[str]: - # base implementation doesn't manage injected args + # Base implementation doesn't manage injected args return _EMPTY_SET # --- Runnable --- @@ -1389,8 +1396,8 @@ class _DirectlyInjectedToolArg: class InjectedToolCallId(InjectedToolArg): """Annotation for injecting the tool call ID. - This annotation is used to mark a tool parameter that should receive - the tool call ID at runtime. + This annotation is used to mark a tool parameter that should receive the tool call + ID at runtime. ```python from typing import Annotated @@ -1408,7 +1415,6 @@ class InjectedToolCallId(InjectedToolArg): name="foo", tool_call_id=tool_call_id ) - ``` """ @@ -1417,10 +1423,12 @@ def _is_directly_injected_arg_type(type_: Any) -> bool: """Check if a type annotation indicates a directly injected argument. This is currently only used for `ToolRuntime`. + Checks if either the annotation itself is a subclass of `_DirectlyInjectedToolArg` or the origin of the annotation is a subclass of `_DirectlyInjectedToolArg`. - Ex: `ToolRuntime` or `ToolRuntime[ContextT, StateT]` would both return `True`. + For example, `ToolRuntime` or `ToolRuntime[ContextT, StateT]` would both return + `True`. """ return ( isinstance(type_, type) and issubclass(type_, _DirectlyInjectedToolArg) @@ -1564,8 +1572,8 @@ def _replace_type_vars( class BaseToolkit(BaseModel, ABC): """Base class for toolkits containing related tools. - A toolkit is a collection of related tools that can be used together - to accomplish a specific task or work with a particular system. + A toolkit is a collection of related tools that can be used together to accomplish a + specific task or work with a particular system. """ @abstractmethod diff --git a/libs/core/langchain_core/tools/convert.py b/libs/core/langchain_core/tools/convert.py index 3f6357698f2..480e30efaeb 100644 --- a/libs/core/langchain_core/tools/convert.py +++ b/libs/core/langchain_core/tools/convert.py @@ -94,6 +94,7 @@ def tool( unless disabled. !!! note "Requirements" + - Functions must have type hints for proper schema inference - When `infer_schema=False`, functions must be `(str) -> str` and have docstrings @@ -101,7 +102,9 @@ def tool( Args: name_or_callable: Optional name of the tool or the `Callable` to be - converted to a tool. Overrides the function's name. + converted to a tool. + + Overrides the function's name. Must be provided as a positional argument. runnable: Optional `Runnable` to convert to a tool. @@ -111,19 +114,22 @@ def tool( Precedence for the tool description value is as follows: - - This `description` argument - (used even if docstring and/or `args_schema` are provided) - - Tool function docstring - (used even if `args_schema` is provided) - - `args_schema` description - (used only if `description` and docstring are not provided) - *args: Extra positional arguments. Must be empty. + - This `description` argument (used even if docstring and/or `args_schema` + are provided) + - Tool function docstring (used even if `args_schema` is provided) + - `args_schema` description (used only if `description` and docstring are + not provided) + *args: Extra positional arguments. + + Must be empty. return_direct: Whether to return directly from the tool rather than continuing the agent loop. args_schema: Optional argument schema for user to specify. infer_schema: Whether to infer the schema of the arguments from the function's - signature. This also makes the resultant tool accept a dictionary input to - its `run()` function. + signature. + + This also makes the resultant tool accept a dictionary input to its `run()` + function. response_format: The tool response format. If `'content'`, then the output of the tool is interpreted as the contents @@ -217,10 +223,10 @@ def tool( } ``` - Note that parsing by default will raise `ValueError` if the docstring - is considered invalid. A docstring is considered invalid if it contains - arguments not in the function signature, or is unable to be parsed into - a summary and `"Args:"` blocks. Examples below: + Note that parsing by default will raise `ValueError` if the docstring is + considered invalid. A docstring is considered invalid if it contains arguments + not in the function signature, or is unable to be parsed into a summary and + `'Args:'` blocks. Examples below: ```python # No args section @@ -259,7 +265,7 @@ def tool( tool_name: The name that will be assigned to the tool. Returns: - A function that takes a callable or Runnable and returns a tool. + A function that takes a callable or `Runnable` and returns a tool. """ def _tool_factory(dec_func: Callable | Runnable) -> BaseTool: @@ -376,7 +382,7 @@ def tool( # def my_tool(): # pass def _partial(func: Callable | Runnable) -> BaseTool: - """Partial function that takes a callable and returns a tool.""" + """Partial function that takes a `Callable` and returns a tool.""" name_ = func.get_name() if isinstance(func, Runnable) else func.__name__ tool_factory = _create_tool_factory(name_) return tool_factory(func) @@ -385,7 +391,7 @@ def tool( def _get_description_from_runnable(runnable: Runnable) -> str: - """Generate a placeholder description of a runnable.""" + """Generate a placeholder description of a `Runnable`.""" input_schema = runnable.input_schema.model_json_schema() return f"Takes {input_schema}." @@ -395,7 +401,7 @@ def _get_schema_from_runnable_and_arg_types( name: str, arg_types: dict[str, type] | None = None, ) -> type[BaseModel]: - """Infer args_schema for tool.""" + """Infer `args_schema` for tool.""" if arg_types is None: try: arg_types = get_type_hints(runnable.InputType) @@ -418,10 +424,10 @@ def convert_runnable_to_tool( description: str | None = None, arg_types: dict[str, type] | None = None, ) -> BaseTool: - """Convert a Runnable into a BaseTool. + """Convert a `Runnable` into a `BaseTool`. Args: - runnable: The runnable to convert. + runnable: The `Runnable` to convert. args_schema: The schema for the tool's input arguments. name: The name of the tool. description: The description of the tool. diff --git a/libs/core/langchain_core/tools/retriever.py b/libs/core/langchain_core/tools/retriever.py index 52db07fcaeb..9e2d84dcb0c 100644 --- a/libs/core/langchain_core/tools/retriever.py +++ b/libs/core/langchain_core/tools/retriever.py @@ -41,16 +41,19 @@ def create_retriever_tool( Args: retriever: The retriever to use for the retrieval - name: The name for the tool. This will be passed to the language model, - so should be unique and somewhat descriptive. - description: The description for the tool. This will be passed to the language - model, so should be descriptive. + name: The name for the tool. + + This will be passed to the language model, so should be unique and somewhat + descriptive. + description: The description for the tool. + + This will be passed to the language model, so should be descriptive. document_prompt: The prompt to use for the document. document_separator: The separator to use between documents. response_format: The tool response format. - If `"content"` then the output of the tool is interpreted as the contents of - a `ToolMessage`. If `"content_and_artifact"` then the output is expected to + If `'content'` then the output of the tool is interpreted as the contents of + a `ToolMessage`. If `'content_and_artifact'` then the output is expected to be a two-tuple corresponding to the `(content, artifact)` of a `ToolMessage` (artifact being a list of documents in this case). diff --git a/libs/core/langchain_core/tools/simple.py b/libs/core/langchain_core/tools/simple.py index 88af06e598c..ca80164df88 100644 --- a/libs/core/langchain_core/tools/simple.py +++ b/libs/core/langchain_core/tools/simple.py @@ -32,8 +32,10 @@ class Tool(BaseTool): """Tool that takes in function or coroutine directly.""" description: str = "" + func: Callable[..., str] | None """The function to run when the tool is called.""" + coroutine: Callable[..., Awaitable[str]] | None = None """The asynchronous version of the function.""" diff --git a/libs/core/langchain_core/tools/structured.py b/libs/core/langchain_core/tools/structured.py index 01070f78f5f..8b67e3b4547 100644 --- a/libs/core/langchain_core/tools/structured.py +++ b/libs/core/langchain_core/tools/structured.py @@ -41,12 +41,15 @@ class StructuredTool(BaseTool): """Tool that can operate on any number of inputs.""" description: str = "" + args_schema: Annotated[ArgsSchema, SkipValidation()] = Field( ..., description="The tool schema." ) """The input arguments' schema.""" + func: Callable[..., Any] | None = None """The function to run when the tool is called.""" + coroutine: Callable[..., Awaitable[Any]] | None = None """The asynchronous version of the function.""" @@ -149,16 +152,19 @@ class StructuredTool(BaseTool): Args: func: The function from which to create a tool. coroutine: The async function from which to create a tool. - name: The name of the tool. Defaults to the function name. + name: The name of the tool. + + Defaults to the function name. description: The description of the tool. + Defaults to the function docstring. return_direct: Whether to return the result directly or as a callback. args_schema: The schema of the tool's input arguments. infer_schema: Whether to infer the schema from the function's signature. response_format: The tool response format. - If `"content"` then the output of the tool is interpreted as the - contents of a `ToolMessage`. If `"content_and_artifact"` then the output + If `'content'` then the output of the tool is interpreted as the + contents of a `ToolMessage`. If `'content_and_artifact'` then the output is expected to be a two-tuple corresponding to the `(content, artifact)` of a `ToolMessage`. parse_docstring: If `infer_schema` and `parse_docstring`, will attempt diff --git a/libs/core/langchain_core/tracers/__init__.py b/libs/core/langchain_core/tracers/__init__.py index b2f41d10f82..c6d35bc2a44 100644 --- a/libs/core/langchain_core/tracers/__init__.py +++ b/libs/core/langchain_core/tracers/__init__.py @@ -1,4 +1,4 @@ -"""**Tracers** are classes for tracing runs.""" +"""Tracers are classes for tracing runs.""" from typing import TYPE_CHECKING diff --git a/libs/core/langchain_core/tracers/_compat.py b/libs/core/langchain_core/tracers/_compat.py index 73ff2b2cbb2..54c2f49da03 100644 --- a/libs/core/langchain_core/tracers/_compat.py +++ b/libs/core/langchain_core/tracers/_compat.py @@ -1,8 +1,10 @@ -"""Compatibility helpers for Pydantic v1/v2 with langsmith Run objects. +"""Compatibility helpers for Pydantic v1/v2 with langsmith `Run` objects. -Note: The generic helpers (`pydantic_to_dict`, `pydantic_copy`) detect Pydantic -version based on the langsmith `Run` model. They're intended for langsmith objects -(`Run`, `Example`) which migrate together. +!!! note + + The generic helpers (`pydantic_to_dict`, `pydantic_copy`) detect Pydanti version + based on the langsmith `Run` model. They're intended for langsmith objects (`Run`, + `Example`) which migrate together. For general Pydantic v1/v2 handling, see `langchain_core.utils.pydantic`. """ @@ -24,7 +26,7 @@ def run_to_dict(run: Run, **kwargs: Any) -> dict[str, Any]: Args: run: The run to convert. - **kwargs: Additional arguments passed to model_dump/dict. + **kwargs: Additional arguments passed to `model_dump`/`dict`. Returns: Dictionary representation of the run. @@ -39,7 +41,7 @@ def run_copy(run: Run, **kwargs: Any) -> Run: Args: run: The run to copy. - **kwargs: Additional arguments passed to model_copy/copy. + **kwargs: Additional arguments passed to `model_copy`/`copy`. Returns: A copy of the run. @@ -56,7 +58,7 @@ def run_construct(**kwargs: Any) -> Run: **kwargs: Fields to set on the run. Returns: - A new Run instance constructed without validation. + A new `Run` instance constructed without validation. """ if _RUN_IS_PYDANTIC_V2: return Run.model_construct(**kwargs) @@ -68,7 +70,7 @@ def pydantic_to_dict(obj: Any, **kwargs: Any) -> dict[str, Any]: Args: obj: The Pydantic model to convert. - **kwargs: Additional arguments passed to model_dump/dict. + **kwargs: Additional arguments passed to `model_dump`/`dict`. Returns: Dictionary representation of the model. @@ -83,7 +85,7 @@ def pydantic_copy(obj: T, **kwargs: Any) -> T: Args: obj: The Pydantic model to copy. - **kwargs: Additional arguments passed to model_copy/copy. + **kwargs: Additional arguments passed to `model_copy`/`copy`. Returns: A copy of the model. diff --git a/libs/core/langchain_core/tracers/_streaming.py b/libs/core/langchain_core/tracers/_streaming.py index 7ac1eb7c056..7ed7dcf747a 100644 --- a/libs/core/langchain_core/tracers/_streaming.py +++ b/libs/core/langchain_core/tracers/_streaming.py @@ -1,4 +1,4 @@ -"""Internal tracers used for stream_log and astream events implementations.""" +"""Internal tracers used for `stream_log` and `astream` events implementations.""" import typing from collections.abc import AsyncIterator, Iterator @@ -12,11 +12,11 @@ T = typing.TypeVar("T") class _StreamingCallbackHandler(typing.Protocol[T]): """Types for streaming callback handlers. - This is a common mixin that the callback handlers - for both astream events and astream log inherit from. + This is a common mixin that the callback handlers for both astream events and + astream log inherit from. - The `tap_output_aiter` method is invoked in some contexts - to produce callbacks for intermediate results. + The `tap_output_aiter` method is invoked in some contexts to produce callbacks for + intermediate results. """ def tap_output_aiter( diff --git a/libs/core/langchain_core/tracers/base.py b/libs/core/langchain_core/tracers/base.py index 01bc9da0aa8..68579b3ff64 100644 --- a/libs/core/langchain_core/tracers/base.py +++ b/libs/core/langchain_core/tracers/base.py @@ -141,7 +141,9 @@ class BaseTracer(_TracerCore, BaseCallbackHandler, ABC): parent_run_id: UUID | None = None, **kwargs: Any, ) -> Run: - """Run on new LLM token. Only available when streaming is enabled. + """Run on new LLM token. + + Only available when streaming is enabled. Args: token: The token. @@ -440,7 +442,7 @@ class BaseTracer(_TracerCore, BaseCallbackHandler, ABC): name: str | None = None, **kwargs: Any, ) -> Run: - """Run when the Retriever starts running. + """Run when the `Retriever` starts running. Args: serialized: The serialized retriever. @@ -477,7 +479,7 @@ class BaseTracer(_TracerCore, BaseCallbackHandler, ABC): run_id: UUID, **kwargs: Any, ) -> Run: - """Run when Retriever errors. + """Run when `Retriever` errors. Args: error: The error. @@ -499,7 +501,7 @@ class BaseTracer(_TracerCore, BaseCallbackHandler, ABC): def on_retriever_end( self, documents: Sequence[Document], *, run_id: UUID, **kwargs: Any ) -> Run: - """Run when the Retriever ends running. + """Run when the `Retriever` ends running. Args: documents: The documents. @@ -527,7 +529,7 @@ class BaseTracer(_TracerCore, BaseCallbackHandler, ABC): class AsyncBaseTracer(_TracerCore, AsyncCallbackHandler, ABC): - """Async Base interface for tracers.""" + """Async base interface for tracers.""" @abstractmethod @override @@ -538,8 +540,9 @@ class AsyncBaseTracer(_TracerCore, AsyncCallbackHandler, ABC): async def _start_trace(self, run: Run) -> None: """Start a trace for a run. - Starting a trace will run concurrently with each _on_[run_type]_start method. - No _on_[run_type]_start callback should depend on operations in _start_trace. + Starting a trace will run concurrently with each `_on_[run_type]_start` method. + No `_on_[run_type]_start` callback should depend on operations in + `_start_trace`. """ super()._start_trace(run) await self._on_run_create(run) @@ -548,8 +551,8 @@ class AsyncBaseTracer(_TracerCore, AsyncCallbackHandler, ABC): async def _end_trace(self, run: Run) -> None: """End a trace for a run. - Ending a trace will run concurrently with each _on_[run_type]_end method. - No _on_[run_type]_end callback should depend on operations in _end_trace. + Ending a trace will run concurrently with each `_on_[run_type]_end` method. + No `_on_[run_type]_end` callback should depend on operations in `_end_trace`. """ if not run.parent_run_id: await self._persist_run(run) diff --git a/libs/core/langchain_core/tracers/context.py b/libs/core/langchain_core/tracers/context.py index fac7b3a0871..2ad17663910 100644 --- a/libs/core/langchain_core/tracers/context.py +++ b/libs/core/langchain_core/tracers/context.py @@ -47,7 +47,9 @@ def tracing_v2_enabled( """Instruct LangChain to log all runs in context to LangSmith. Args: - project_name: The name of the project. Defaults to `'default'`. + project_name: The name of the project. + + Defaults to `'default'`. example_id: The ID of the example. tags: The tags to add to the run. client: The client of the langsmith. @@ -181,7 +183,7 @@ def register_configure_hook( env_var: The environment variable. Raises: - ValueError: If env_var is set, handle_class must also be set to a non-None + ValueError: If `env_var` is set, `handle_class` must also be set to a non-`None` value. """ if env_var is not None and handle_class is None: diff --git a/libs/core/langchain_core/tracers/core.py b/libs/core/langchain_core/tracers/core.py index ebd0f989259..654a235acb7 100644 --- a/libs/core/langchain_core/tracers/core.py +++ b/libs/core/langchain_core/tracers/core.py @@ -56,25 +56,30 @@ class _TracerCore(ABC): """Initialize the tracer. Args: - _schema_format: Primarily changes how the inputs and outputs are - handled. For internal use only. This API will change. + _schema_format: Primarily changes how the inputs and outputs are handled. - - 'original' is the format used by all current tracers. - This format is slightly inconsistent with respect to inputs - and outputs. - - 'streaming_events' is used for supporting streaming events, - for internal usage. It will likely change in the future, or - be deprecated entirely in favor of a dedicated async tracer - for streaming events. - - 'original+chat' is a format that is the same as 'original' - except it does NOT raise an attribute error on_chat_model_start - **kwargs: Additional keyword arguments that will be passed to - the superclass. + For internal use only. This API will change. + + - `'original'` is the format used by all current tracers. + + This format is slightly inconsistent with respect to inputs and + outputs. + - `'streaming_events'` is used for supporting streaming events, for + internal usage. It will likely change in the future, or be + deprecated entirely in favor of a dedicated async tracer for + streaming events. + - `'original+chat'` is a format that is the same as `'original'` except + it does NOT raise an attribute error `on_chat_model_start` + **kwargs: Additional keyword arguments that will be passed to the + superclass. """ super().__init__(**kwargs) + self._schema_format = _schema_format # For internal use only API will change. + self.run_map: dict[str, Run] = {} """Map of run ID to run. Cleared on run end.""" + self.order_map: dict[UUID, tuple[UUID, str]] = {} """Map of run ID to (trace_id, dotted_order). Cleared when tracer GCed.""" diff --git a/libs/core/langchain_core/tracers/evaluation.py b/libs/core/langchain_core/tracers/evaluation.py index e008b004111..22c6f600f58 100644 --- a/libs/core/langchain_core/tracers/evaluation.py +++ b/libs/core/langchain_core/tracers/evaluation.py @@ -39,26 +39,34 @@ class EvaluatorCallbackHandler(BaseTracer): """Tracer that runs a run evaluator whenever a run is persisted. Attributes: - client : Client - The LangSmith client instance used for evaluating the runs. + client: The LangSmith client instance used for evaluating the runs. """ name: str = "evaluator_callback_handler" + example_id: UUID | None = None """The example ID associated with the runs.""" + client: langsmith.Client """The LangSmith client instance used for evaluating the runs.""" + evaluators: Sequence[langsmith.RunEvaluator] = () """The sequence of run evaluators to be executed.""" + executor: ThreadPoolExecutor | None = None """The thread pool executor used for running the evaluators.""" + futures: weakref.WeakSet[Future] = weakref.WeakSet() """The set of futures representing the running evaluators.""" + skip_unfinished: bool = True """Whether to skip runs that are not finished or raised an error.""" + project_name: str | None = None """The LangSmith project name to be organize eval chain runs under.""" + logged_eval_results: dict[tuple[str, str], list[EvaluationResult]] + lock: threading.Lock def __init__( @@ -74,19 +82,15 @@ class EvaluatorCallbackHandler(BaseTracer): """Create an EvaluatorCallbackHandler. Args: - evaluators : Sequence[RunEvaluator] - The run evaluators to apply to all top level runs. - client : LangSmith Client, optional - The LangSmith client instance to use for evaluating the runs. + evaluators: The run evaluators to apply to all top level runs. + client: The LangSmith client instance to use for evaluating the runs. + If not specified, a new instance will be created. - example_id : Union[UUID, str], optional - The example ID to be associated with the runs. - skip_unfinished: bool, optional - Whether to skip unfinished runs. - project_name : str, optional - The LangSmith project name to be organize eval chain runs under. - max_concurrency : int, optional - The maximum number of concurrent evaluators to run. + example_id: The example ID to be associated with the runs. + skip_unfinished: Whether to skip unfinished runs. + project_name: The LangSmith project name to be organize eval chain runs + under. + max_concurrency: The maximum number of concurrent evaluators to run. """ super().__init__(**kwargs) self.example_id = ( diff --git a/libs/core/langchain_core/tracers/event_stream.py b/libs/core/langchain_core/tracers/event_stream.py index f4d16a83fb0..399a7c19b6e 100644 --- a/libs/core/langchain_core/tracers/event_stream.py +++ b/libs/core/langchain_core/tracers/event_stream.py @@ -63,16 +63,22 @@ class RunInfo(TypedDict): name: str """The name of the run.""" + tags: list[str] """The tags associated with the run.""" + metadata: dict[str, Any] """The metadata associated with the run.""" + run_type: str """The type of the run.""" + inputs: NotRequired[Any] """The inputs to the run.""" + parent_run_id: UUID | None """The ID of the parent run.""" + tool_call_id: NotRequired[str | None] """The tool call ID associated with the run.""" @@ -176,16 +182,15 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand ) -> AsyncIterator[T]: """Tap the output aiter. - This method is used to tap the output of a Runnable that produces - an async iterator. It is used to generate stream events for the - output of the Runnable. + This method is used to tap the output of a `Runnable` that produces an async + iterator. It is used to generate stream events for the output of the `Runnable`. Args: run_id: The ID of the run. - output: The output of the Runnable. + output: The output of the `Runnable`. Yields: - The output of the Runnable. + The output of the `Runnable`. """ sentinel = object() # atomic check and set @@ -232,10 +237,10 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand Args: run_id: The ID of the run. - output: The output of the Runnable. + output: The output of the `Runnable`. Yields: - The output of the Runnable. + The output of the `Runnable`. """ sentinel = object() # atomic check and set @@ -429,9 +434,11 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand parent_run_id: UUID | None = None, **kwargs: Any, ) -> None: - """Run on new output token. Only available when streaming is enabled. + """Run on new output token. - For both chat models and non-chat models (legacy LLMs). + Only available when streaming is enabled. + + For both chat models and non-chat models (legacy text-completion LLMs). Raises: ValueError: If the run type is not `llm` or `chat_model`. @@ -484,7 +491,7 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand ) -> None: """End a trace for a model run. - For both chat models and non-chat models (legacy LLMs). + For both chat models and non-chat models (legacy text-completion LLMs). Raises: ValueError: If the run type is not `'llm'` or `'chat_model'`. @@ -629,7 +636,7 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand run_id: The run ID of the tool. Returns: - A tuple of (run_info, inputs). + A tuple of `(run_info, inputs)`. Raises: AssertionError: If the run ID is a tool call and does not have inputs. @@ -754,7 +761,7 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand name: str | None = None, **kwargs: Any, ) -> None: - """Run when Retriever starts running.""" + """Run when `Retriever` starts running.""" name_ = _assign_name(name, serialized) run_type = "retriever" @@ -789,7 +796,7 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand async def on_retriever_end( self, documents: Sequence[Document], *, run_id: UUID, **kwargs: Any ) -> None: - """Run when Retriever ends running.""" + """Run when `Retriever` ends running.""" run_info = self.run_map.pop(run_id) self._send( @@ -1006,7 +1013,7 @@ async def _astream_events_implementation_v2( exclude_tags: Sequence[str] | None = None, **kwargs: Any, ) -> AsyncIterator[StandardStreamEvent]: - """Implementation of the astream events API for V2 runnables.""" + """Implementation of the astream events API for v2 runnables.""" event_streamer = _AstreamEventsCallbackHandler( include_names=include_names, include_types=include_types, diff --git a/libs/core/langchain_core/tracers/langchain.py b/libs/core/langchain_core/tracers/langchain.py index 6ff14105644..a5122b7ffdc 100644 --- a/libs/core/langchain_core/tracers/langchain.py +++ b/libs/core/langchain_core/tracers/langchain.py @@ -1,4 +1,4 @@ -"""A Tracer implementation that records to LangChain endpoint.""" +"""A tracer implementation that records to LangChain endpoint.""" from __future__ import annotations @@ -80,9 +80,8 @@ def _get_usage_metadata_from_generations( messages. This is typically present in chat model outputs. Args: - generations: List of generation batches, where each batch is a list - of generation dicts that may contain a `'message'` key with - `'usage_metadata'`. + generations: List of generation batches, where each batch is a list of + generation dicts that may contain a `'message'` key with `'usage_metadata'`. Returns: The aggregated `usage_metadata` dict if found, otherwise `None`. @@ -98,7 +97,7 @@ def _get_usage_metadata_from_generations( class LangChainTracer(BaseTracer): - """Implementation of the SharedTracer that POSTS to the LangChain endpoint.""" + """Implementation of the `SharedTracer` that `POSTS` to the LangChain endpoint.""" run_inline = True @@ -114,9 +113,15 @@ class LangChainTracer(BaseTracer): Args: example_id: The example ID. - project_name: The project name. Defaults to the tracer project. - client: The client. Defaults to the global client. - tags: The tags. Defaults to an empty list. + project_name: The project name. + + Defaults to the tracer project. + client: The client. + + Defaults to the global client. + tags: The tags. + + Defaults to an empty list. **kwargs: Additional keyword arguments. """ super().__init__(**kwargs) diff --git a/libs/core/langchain_core/tracers/log_stream.py b/libs/core/langchain_core/tracers/log_stream.py index 2bc1a529e79..192c817197e 100644 --- a/libs/core/langchain_core/tracers/log_stream.py +++ b/libs/core/langchain_core/tracers/log_stream.py @@ -42,30 +42,42 @@ class LogEntry(TypedDict): id: str """ID of the sub-run.""" + name: str """Name of the object being run.""" + type: str """Type of the object being run, eg. prompt, chain, llm, etc.""" + tags: list[str] """List of tags for the run.""" + metadata: dict[str, Any] """Key-value pairs of metadata for the run.""" + start_time: str """ISO-8601 timestamp of when the run started.""" streamed_output_str: list[str] """List of LLM tokens streamed by this run, if applicable.""" + streamed_output: list[Any] """List of output chunks streamed by this run, if available.""" + inputs: NotRequired[Any | None] - """Inputs to this run. Not available currently via astream_log.""" + """Inputs to this run. Not available currently via `astream_log`.""" + final_output: Any | None """Final output of this run. - Only available after the run has finished successfully.""" + Only available after the run has finished successfully. + """ + end_time: str | None """ISO-8601 timestamp of when the run ended. - Only available after the run has finished.""" + + Only available after the run has finished. + """ class RunState(TypedDict): @@ -73,34 +85,45 @@ class RunState(TypedDict): id: str """ID of the run.""" + streamed_output: list[Any] - """List of output chunks streamed by Runnable.stream()""" + """List of output chunks streamed by `Runnable.stream()`""" + final_output: Any | None """Final output of the run, usually the result of aggregating (`+`) streamed_output. - Updated throughout the run when supported by the Runnable.""" + + Updated throughout the run when supported by the `Runnable`. + """ name: str """Name of the object being run.""" + type: str - """Type of the object being run, eg. prompt, chain, llm, etc.""" + """Type of the object being run, e.g. prompt, chain, llm, etc.""" # Do we want tags/metadata on the root run? Client kinda knows it in most situations # tags: list[str] logs: dict[str, LogEntry] - """Map of run names to sub-runs. If filters were supplied, this list will - contain only the runs that matched the filters.""" + """Map of run names to sub-runs. + + If filters were supplied, this list will contain only the runs that matched the + filters. + """ class RunLogPatch: """Patch to the run log.""" ops: list[dict[str, Any]] - """List of JSONPatch operations, which describe how to create the run state - from an empty dict. This is the minimal representation of the log, designed to - be serialized as JSON and sent over the wire to reconstruct the log on the other - side. Reconstruction of the state can be done with any JSONPatch-compliant library, - see https://jsonpatch.com for more information.""" + """List of `JSONPatch` operations, which describe how to create the run state + from an empty dict. + + This is the minimal representation of the log, designed to be serialized as JSON and + sent over the wire to reconstruct the log on the other side. Reconstruction of the + state can be done with any JSONPatch-compliant library, see https://jsonpatch.com + for more information. + """ def __init__(self, *ops: dict[str, Any]) -> None: """Create a RunLogPatch. @@ -159,7 +182,7 @@ class RunLog(RunLogPatch): self.state = state def __add__(self, other: RunLogPatch | Any) -> RunLog: - """Combine two `RunLog`s. + """Combine two `RunLog` objects. Args: other: The other `RunLog` or `RunLogPatch` to combine with. @@ -226,24 +249,23 @@ class LogStreamCallbackHandler(BaseTracer, _StreamingCallbackHandler): Args: auto_close: Whether to close the stream when the root run finishes. - include_names: Only include runs from Runnables with matching names. - include_types: Only include runs from Runnables with matching types. - include_tags: Only include runs from Runnables with matching tags. - exclude_names: Exclude runs from Runnables with matching names. - exclude_types: Exclude runs from Runnables with matching types. - exclude_tags: Exclude runs from Runnables with matching tags. - _schema_format: Primarily changes how the inputs and outputs are - handled. + include_names: Only include runs from `Runnable` objects with matching + names. + include_types: Only include runs from `Runnable` objects with matching + types. + include_tags: Only include runs from `Runnable` objects with matching tags. + exclude_names: Exclude runs from `Runnable` objects with matching names. + exclude_types: Exclude runs from `Runnable` objects with matching types. + exclude_tags: Exclude runs from `Runnable` objects with matching tags. + _schema_format: Primarily changes how the inputs and outputs are handled. **For internal use only. This API will change.** - - 'original' is the format used by all current tracers. - This format is slightly inconsistent with respect to inputs - and outputs. - - 'streaming_events' is used for supporting streaming events, - for internal usage. It will likely change in the future, or - be deprecated entirely in favor of a dedicated async tracer - for streaming events. + - `'original'` is the format used by all current tracers. This format is + slightly inconsistent with respect to inputs and outputs. + - 'streaming_events' is used for supporting streaming events, for + internal usage. It will likely change in the future, or deprecated + entirely in favor of a dedicated async tracer for streaming events. Raises: ValueError: If an invalid schema format is provided (internal use only). @@ -285,13 +307,13 @@ class LogStreamCallbackHandler(BaseTracer, _StreamingCallbackHandler): return self.receive_stream.__aiter__() def send(self, *ops: dict[str, Any]) -> bool: - """Send a patch to the stream, return False if the stream is closed. + """Send a patch to the stream, return `False` if the stream is closed. Args: *ops: The operations to send to the stream. Returns: - `True` if the patch was sent successfully, False if the stream is closed. + `True` if the patch was sent successfully, `False` if the stream is closed. """ # We will likely want to wrap this in try / except at some point # to handle exceptions that might arise at run time. @@ -365,13 +387,13 @@ class LogStreamCallbackHandler(BaseTracer, _StreamingCallbackHandler): yield chunk def include_run(self, run: Run) -> bool: - """Check if a Run should be included in the log. + """Check if a `Run` should be included in the log. Args: - run: The Run to check. + run: The `Run` to check. Returns: - `True` if the run should be included, `False` otherwise. + `True` if the `Run` should be included, `False` otherwise. """ if run.id == self.root_id: return False @@ -466,7 +488,7 @@ class LogStreamCallbackHandler(BaseTracer, _StreamingCallbackHandler): ) def _on_run_update(self, run: Run) -> None: - """Finish a run.""" + """Finish a `Run`.""" try: index = self._key_map_by_run_id.get(run.id) @@ -542,18 +564,17 @@ class LogStreamCallbackHandler(BaseTracer, _StreamingCallbackHandler): def _get_standardized_inputs( run: Run, schema_format: Literal["original", "streaming_events"] ) -> Any: - """Extract standardized inputs from a run. + """Extract standardized inputs from a `Run`. Standardizes the inputs based on the type of the runnable used. Args: - run: Run object + run: `Run` object schema_format: The schema format to use. Returns: - Valid inputs are only dict. By conventions, inputs always represented - invocation using named arguments. - None means that the input is not yet known! + Valid inputs are only dict. By conventions, inputs always represented invocation + using named arguments. `None` means that the input is not yet known! """ if schema_format == "original": msg = ( @@ -593,7 +614,7 @@ def _get_standardized_outputs( schema_format: The schema format to use. Returns: - An output if returned, otherwise a None + An output if returned, otherwise `None`. """ outputs = load(run.outputs, allowed_objects="all") if schema_format == "original": @@ -652,18 +673,18 @@ async def _astream_log_implementation( """Implementation of astream_log for a given runnable. The implementation has been factored out (at least temporarily) as both - astream_log and astream_events relies on it. + `astream_log` and `astream_events` relies on it. Args: runnable: The runnable to run in streaming mode. value: The input to the runnable. config: The config to pass to the runnable. stream: The stream to send the run logs to. - diff: Whether to yield run log patches (True) or full run logs (False). - with_streamed_output_list: Whether to include a list of all streamed - outputs in each patch. If `False`, only the final output will be included - in the patches. - **kwargs: Additional keyword arguments to pass to the runnable. + diff: Whether to yield run log patches (`True`) or full run logs (`False`). + with_streamed_output_list: Whether to include a list of all streamed outputs in + each patch. If `False`, only the final output will be included in the + patches. + **kwargs: Additional keyword arguments to pass to the `Runnable`. Raises: ValueError: If the callbacks in the config are of an unexpected type. diff --git a/libs/core/langchain_core/tracers/memory_stream.py b/libs/core/langchain_core/tracers/memory_stream.py index 0a9facf17af..42e74fb00d9 100644 --- a/libs/core/langchain_core/tracers/memory_stream.py +++ b/libs/core/langchain_core/tracers/memory_stream.py @@ -2,11 +2,10 @@ This module provides a way to communicate between two co-routines using a memory channel. The writer and reader can be in the same event loop or in different event -loops. When they're in different event loops, they will also be in different -threads. +loops. When they're in different event loops, they will also be in different threads. -Useful in situations when there's a mix of synchronous and asynchronous -used in the code. +Useful in situations when there's a mix of synchronous and asynchronous used in the +code. """ import asyncio @@ -24,9 +23,12 @@ class _SendStream(Generic[T]): """Create a writer for the queue and done object. Args: - reader_loop: The event loop to use for the writer. This loop will be used - to schedule the writes to the queue. - queue: The queue to write to. This is an asyncio queue. + reader_loop: The event loop to use for the writer. + + This loop will be used to schedule the writes to the queue. + queue: The queue to write to. + + This is an asyncio queue. done: Special sentinel object to indicate that the writer is done. """ self._reader_loop = reader_loop @@ -52,8 +54,8 @@ class _SendStream(Generic[T]): item: The item to write to the queue. Raises: - RuntimeError: If the event loop is already closed when trying to write - to the queue. + RuntimeError: If the event loop is already closed when trying to write to + the queue. """ try: self._reader_loop.call_soon_threadsafe(self._queue.put_nowait, item) @@ -71,8 +73,8 @@ class _SendStream(Generic[T]): This is a non-blocking call. Raises: - RuntimeError: If the event loop is already closed when trying to write - to the queue. + RuntimeError: If the event loop is already closed when trying to write to + the queue. """ try: self._reader_loop.call_soon_threadsafe(self._queue.put_nowait, self._done) @@ -85,8 +87,8 @@ class _ReceiveStream(Generic[T]): def __init__(self, queue: Queue, done: object) -> None: """Create a reader for the queue and done object. - This reader should be used in the same loop as the loop that was passed - to the channel. + This reader should be used in the same loop as the loop that was passed to the + channel. """ self._queue = queue self._done = done @@ -105,22 +107,23 @@ class _MemoryStream(Generic[T]): """Stream data from a writer to a reader even if they are in different threads. Uses asyncio queues to communicate between two co-routines. This implementation - should work even if the writer and reader co-routines belong to two different - event loops (e.g. one running from an event loop in the main thread - and the other running in an event loop in a background thread). + should work even if the writer and reader co-routines belong to two different event + loops (e.g. one running from an event loop in the main thread and the other running + in an event loop in a background thread). This implementation is meant to be used with a single writer and a single reader. - This is an internal implementation to LangChain. Please do not use it directly. + This is an internal implementation to LangChain. Do not use it directly. """ def __init__(self, loop: AbstractEventLoop) -> None: """Create a channel for the given loop. Args: - loop: The event loop to use for the channel. The reader is assumed - to be running in the same loop as the one passed to this constructor. - This will NOT be validated at run time. + loop: The event loop to use for the channel. + + The reader is assumed to be running in the same loop as the one passed + to this constructor. This will NOT be validated at run time. """ self._loop = loop self._queue: asyncio.Queue = asyncio.Queue(maxsize=0) diff --git a/libs/core/langchain_core/tracers/root_listeners.py b/libs/core/langchain_core/tracers/root_listeners.py index 923cd1c16f6..8d1c90612e9 100644 --- a/libs/core/langchain_core/tracers/root_listeners.py +++ b/libs/core/langchain_core/tracers/root_listeners.py @@ -76,7 +76,7 @@ class RootListenersTracer(BaseTracer): class AsyncRootListenersTracer(AsyncBaseTracer): - """Async Tracer that calls listeners on run start, end, and error.""" + """Async tracer that calls listeners on run start, end, and error.""" log_missing_parent = False """Whether to log a warning if the parent is missing.""" diff --git a/libs/core/langchain_core/tracers/run_collector.py b/libs/core/langchain_core/tracers/run_collector.py index da6b5ecc235..3d389af2721 100644 --- a/libs/core/langchain_core/tracers/run_collector.py +++ b/libs/core/langchain_core/tracers/run_collector.py @@ -17,11 +17,10 @@ class RunCollectorCallbackHandler(BaseTracer): name: str = "run-collector_callback_handler" def __init__(self, example_id: UUID | str | None = None, **kwargs: Any) -> None: - """Initialize the RunCollectorCallbackHandler. + """Initialize the `RunCollectorCallbackHandler`. Args: - example_id: The ID of the example being traced. (default: None). - It can be either a UUID or a string. + example_id: The ID of the example being traced. **kwargs: Additional keyword arguments. """ super().__init__(**kwargs) @@ -31,7 +30,7 @@ class RunCollectorCallbackHandler(BaseTracer): self.traced_runs: list[Run] = [] def _persist_run(self, run: Run) -> None: - """Persist a run by adding it to the traced_runs list. + """Persist a run by adding it to the `traced_runs` list. Args: run: The run to be persisted. diff --git a/libs/core/langchain_core/tracers/stdout.py b/libs/core/langchain_core/tracers/stdout.py index 119b9127fd6..b47bb512a6e 100644 --- a/libs/core/langchain_core/tracers/stdout.py +++ b/libs/core/langchain_core/tracers/stdout.py @@ -31,11 +31,11 @@ def elapsed(run: Any) -> str: """Get the elapsed time of a run. Args: - run: any object with a start_time and end_time attribute. + run: any object with a `start_time` and `end_time` attribute. Returns: - A string with the elapsed time in seconds or - milliseconds if time is less than a second. + A string with the elapsed time in seconds or milliseconds if time is less than a + second. """ elapsed_time = run.end_time - run.start_time @@ -49,10 +49,13 @@ class FunctionCallbackHandler(BaseTracer): """Tracer that calls a function with a single str parameter.""" name: str = "function_callback_handler" - """The name of the tracer. This is used to identify the tracer in the logs.""" + """The name of the tracer. + + This is used to identify the tracer in the logs. + """ def __init__(self, function: Callable[[str], None], **kwargs: Any) -> None: - """Create a FunctionCallbackHandler. + """Create a `FunctionCallbackHandler`. Args: function: The callback function to call. diff --git a/libs/core/langchain_core/utils/aiter.py b/libs/core/langchain_core/utils/aiter.py index e00dc99a923..e5dc0d1aea2 100644 --- a/libs/core/langchain_core/utils/aiter.py +++ b/libs/core/langchain_core/utils/aiter.py @@ -38,21 +38,23 @@ _no_default = object() def py_anext( iterator: AsyncIterator[T], default: T | Any = _no_default ) -> Awaitable[T | Any | None]: - """Pure-Python implementation of anext() for testing purposes. + """Pure-Python implementation of `anext()` for testing purposes. - Closely matches the builtin anext() C implementation. - Can be used to compare the built-in implementation of the inner - coroutines machinery to C-implementation of __anext__() and send() - or throw() on the returned generator. + Closely matches the builtin `anext()` C implementation. + + Can be used to compare the built-in implementation of the inner coroutines machinery + to C-implementation of `__anext__()` and `send()` or `throw()` on the returned + generator. Args: iterator: The async iterator to advance. default: The value to return if the iterator is exhausted. - If not provided, a StopAsyncIteration exception is raised. + + If not provided, a `StopAsyncIteration` exception is raised. Returns: - The next value from the iterator, or the default value - if the iterator is exhausted. + The next value from the iterator, or the default value if the iterator is + exhausted. Raises: TypeError: If the iterator is not an async iterator. @@ -109,8 +111,10 @@ async def tee_peer( """An individual iterator of a `tee`. This function is a generator that yields items from the shared iterator - `iterator`. It buffers items until the least advanced iterator has - yielded them as well. The buffer is shared with all other peers. + `iterator`. It buffers items until the least advanced iterator has yielded them as + well. + + The buffer is shared with all other peers. Args: iterator: The shared iterator. @@ -159,9 +163,11 @@ class Tee(Generic[T]): This splits a single `iterable` into multiple iterators, each providing the same items in the same order. - All child iterators may advance separately but share the same items - from `iterable` -- when the most advanced iterator retrieves an item, - it is buffered until the least advanced iterator has yielded it as well. + + All child iterators may advance separately but share the same items from `iterable` + -- when the most advanced iterator retrieves an item, it is buffered until the least + advanced iterator has yielded it as well. + A `tee` works lazily and can handle an infinite `iterable`, provided that all iterators advance. @@ -172,23 +178,24 @@ class Tee(Generic[T]): return a.map(operator.sub, previous, current) ``` - Unlike `itertools.tee`, `.tee` returns a custom type instead - of a :py`tuple`. Like a tuple, it can be indexed, iterated and unpacked - to get the child iterators. In addition, its `.tee.aclose` method - immediately closes all children, and it can be used in an `async with` context - for the same effect. + Unlike `itertools.tee`, `.tee` returns a custom type instead of a `tuple`. Like a + tuple, it can be indexed, iterated and unpacked to get the child iterators. In + addition, its `.tee.aclose` method immediately closes all children, and it can be + used in an `async with` context for the same effect. - If `iterable` is an iterator and read elsewhere, `tee` will *not* - provide these items. Also, `tee` must internally buffer each item until the - last iterator has yielded it; if the most and least advanced iterator differ - by most data, using a :py`list` is more efficient (but not lazy). + If `iterable` is an iterator and read elsewhere, `tee` will *not* provide these + items. Also, `tee` must internally buffer each item until the last iterator has + yielded it; if the most and least advanced iterator differ by most data, using a + `list` is more efficient (but not lazy). + + If the underlying iterable is concurrency safe (`anext` may be awaited concurrently) + the resulting iterators are concurrency safe as well. Otherwise, the iterators are + safe if there is only ever one single "most advanced" iterator. - If the underlying iterable is concurrency safe (`anext` may be awaited - concurrently) the resulting iterators are concurrency safe as well. Otherwise, - the iterators are safe if there is only ever one single "most advanced" iterator. To enforce sequential use of `anext`, provide a `lock` - - e.g. an :py`asyncio.Lock` instance in an :py:mod:`asyncio` application - - and access is automatically synchronised. + + - e.g. an `asyncio.Lock` instance in an `asyncio` application - and access is + automatically synchronised. """ @@ -256,7 +263,7 @@ class Tee(Generic[T]): """Close all child iterators. Returns: - False, exceptions not suppressed. + `False`, exceptions not suppressed. """ await self.aclose() return False @@ -271,7 +278,7 @@ atee = Tee class aclosing(AbstractAsyncContextManager): # noqa: N801 - """Async context manager to wrap an AsyncGenerator that has a `aclose()` method. + """Async context manager to wrap an `AsyncGenerator` that has a `aclose()` method. Code like this: @@ -280,7 +287,7 @@ class aclosing(AbstractAsyncContextManager): # noqa: N801 ``` - is equivalent to this: + ...is equivalent to this: ```python agen = .fetch() diff --git a/libs/core/langchain_core/utils/env.py b/libs/core/langchain_core/utils/env.py index 2f103ee5403..f8eab221bfe 100644 --- a/libs/core/langchain_core/utils/env.py +++ b/libs/core/langchain_core/utils/env.py @@ -33,8 +33,9 @@ def get_from_dict_or_env( Args: data: The dictionary to look up the key in. - key: The key to look up in the dictionary. This can be a list of keys to try - in order. + key: The key to look up in the dictionary. + + This can be a list of keys to try in order. env_key: The environment variable to look up if the key is not in the dictionary. default: The default value to return if the key is not in the dictionary diff --git a/libs/core/langchain_core/utils/function_calling.py b/libs/core/langchain_core/utils/function_calling.py index 6850a5648ee..c80f43cfd9d 100644 --- a/libs/core/langchain_core/utils/function_calling.py +++ b/libs/core/langchain_core/utils/function_calling.py @@ -67,8 +67,10 @@ class FunctionDescription(TypedDict): name: str """The name of the function.""" + description: str """A description of the function.""" + parameters: dict """The parameters of the function.""" @@ -78,22 +80,23 @@ class ToolDescription(TypedDict): type: Literal["function"] """The type of the tool.""" + function: FunctionDescription """The function description.""" def _rm_titles(kv: dict, prev_key: str = "") -> dict: - """Recursively removes "title" fields from a JSON schema dictionary. + """Recursively removes `'title'` fields from a JSON schema dictionary. - Remove "title" fields from the input JSON schema dictionary, - except when a "title" appears within a property definition under "properties". + Remove `'title'` fields from the input JSON schema dictionary, + except when a `'title'` appears within a property definition under `'properties'`. Args: kv: The input JSON schema as a dictionary. prev_key: The key from the parent dictionary, used to identify context. Returns: - A new dictionary with appropriate "title" fields removed. + A new dictionary with appropriate `'title'` fields removed. """ new_kv = {} @@ -127,10 +130,12 @@ def _convert_json_schema_to_openai_function( Args: schema: The JSON schema to convert. - name: The name of the function. If not provided, the title of the schema will be - used. - description: The description of the function. If not provided, the description - of the schema will be used. + name: The name of the function. + + If not provided, the title of the schema will be used. + description: The description of the function. + + If not provided, the description of the schema will be used. rm_titles: Whether to remove titles from the schema. Returns: @@ -161,10 +166,12 @@ def _convert_pydantic_to_openai_function( Args: model: The Pydantic model to convert. - name: The name of the function. If not provided, the title of the schema will be - used. - description: The description of the function. If not provided, the description - of the schema will be used. + name: The name of the function. + + If not provided, the title of the schema will be used. + description: The description of the function. + + If not provided, the description of the schema will be used. rm_titles: Whether to remove titles from the schema. Raises: @@ -196,8 +203,8 @@ def _convert_python_function_to_openai_function( """Convert a Python function to an OpenAI function-calling API compatible dict. Assumes the Python function has type hints and a docstring with a description. If - the docstring has Google Python style argument descriptions, these will be - included as well. + the docstring has Google Python style argument descriptions, these will be included + as well. Args: function: The Python function to convert. @@ -359,20 +366,20 @@ def convert_to_openai_function( """Convert a raw function/class to an OpenAI function. Args: - function: - A dictionary, Pydantic `BaseModel` class, `TypedDict` class, a LangChain - `Tool` object, or a Python function. If a dictionary is passed in, it is - assumed to already be a valid OpenAI function, a JSON schema with - top-level `title` key specified, an Anthropic format tool, or an Amazon - Bedrock Converse format tool. - strict: - If `True`, model output is guaranteed to exactly match the JSON Schema - provided in the function definition. If `None`, `strict` argument will not - be included in function definition. + function: A dictionary, Pydantic `BaseModel` class, `TypedDict` class, a + LangChain `Tool` object, or a Python function. + + If a dictionary is passed in, it is assumed to already be a valid OpenAI + function, a JSON schema with top-level `title` key specified, an Anthropic + format tool, or an Amazon Bedrock Converse format tool. + strict: If `True`, model output is guaranteed to exactly match the JSON Schema + provided in the function definition. + + If `None`, `strict` argument will not be included in function definition. Returns: A dict version of the passed in function which is compatible with the OpenAI - function-calling API. + function-calling API. Raises: ValueError: If function is not in a supported format. @@ -498,19 +505,20 @@ def convert_to_openai_tool( [OpenAI tool schema reference](https://platform.openai.com/docs/api-reference/chat/create#chat-create-tools) Args: - tool: - Either a dictionary, a `pydantic.BaseModel` class, Python function, or - `BaseTool`. If a dictionary is passed in, it is assumed to already be a - valid OpenAI function, a JSON schema with top-level `title` key specified, - an Anthropic format tool, or an Amazon Bedrock Converse format tool. - strict: - If `True`, model output is guaranteed to exactly match the JSON Schema - provided in the function definition. If `None`, `strict` argument will not - be included in tool definition. + tool: Either a dictionary, a `pydantic.BaseModel` class, Python function, or + `BaseTool`. + + If a dictionary is passed in, it is assumed to already be a valid OpenAI + function, a JSON schema with top-level `title` key specified, an Anthropic + format tool, or an Amazon Bedrock Converse format tool. + strict: If `True`, model output is guaranteed to exactly match the JSON Schema + provided in the function definition. + + If `None`, `strict` argument will not be included in tool definition. Returns: - A dict version of the passed in tool which is compatible with the - OpenAI tool-calling API. + A dict version of the passed in tool which is compatible with the OpenAI + tool-calling API. !!! warning "Behavior changed in `langchain-core` 0.3.16" @@ -559,8 +567,9 @@ def convert_to_json_schema( Args: schema: The schema to convert. strict: If `True`, model output is guaranteed to exactly match the JSON Schema - provided in the function definition. If `None`, `strict` argument will not - be included in function definition. + provided in the function definition. + + If `None`, `strict` argument will not be included in function definition. Raises: ValueError: If the input is not a valid OpenAI-format tool. @@ -621,8 +630,10 @@ def tool_example_to_messages( input: The user input tool_calls: Tool calls represented as Pydantic BaseModels tool_outputs: Tool call outputs. - Does not need to be provided. If not provided, a placeholder value - will be inserted. + + Does not need to be provided. + + If not provided, a placeholder value will be inserted. ai_response: If provided, content for a final `AIMessage`. Returns: @@ -664,6 +675,7 @@ def tool_example_to_messages( ``` """ messages: list[BaseMessage] = [HumanMessage(content=input)] + openai_tool_calls = [ { "id": str(uuid.uuid4()), diff --git a/libs/core/langchain_core/utils/html.py b/libs/core/langchain_core/utils/html.py index e928cbd51d2..4798b02ce78 100644 --- a/libs/core/langchain_core/utils/html.py +++ b/libs/core/langchain_core/utils/html.py @@ -8,6 +8,7 @@ from urllib.parse import urljoin, urlparse logger = logging.getLogger(__name__) PREFIXES_TO_IGNORE = ("javascript:", "mailto:", "#") + SUFFIXES_TO_IGNORE = ( ".css", ".js", @@ -28,12 +29,15 @@ SUFFIXES_TO_IGNORE = ( ".pptx", ".pptm", ) + SUFFIXES_TO_IGNORE_REGEX = ( "(?!" + "|".join([re.escape(s) + r"[\#'\"]" for s in SUFFIXES_TO_IGNORE]) + ")" ) + PREFIXES_TO_IGNORE_REGEX = ( "(?!" + "|".join([re.escape(s) for s in PREFIXES_TO_IGNORE]) + ")" ) + DEFAULT_LINK_REGEX = ( rf"href=[\"']{PREFIXES_TO_IGNORE_REGEX}((?:{SUFFIXES_TO_IGNORE_REGEX}.)*?)[\#'\"]" ) @@ -68,8 +72,8 @@ def extract_sub_links( """Extract all links from a raw HTML string and convert into absolute paths. Args: - raw_html: original HTML. - url: the url of the HTML. + raw_html: Original HTML. + url: The url of the HTML. base_url: the base URL to check for outside links against. pattern: Regex to use for extracting links from raw HTML. prevent_outside: If `True`, ignore external links which are not children diff --git a/libs/core/langchain_core/utils/input.py b/libs/core/langchain_core/utils/input.py index 9a058a3604b..d97d4006d33 100644 --- a/libs/core/langchain_core/utils/input.py +++ b/libs/core/langchain_core/utils/input.py @@ -67,6 +67,7 @@ def print_text( """Print text with highlighting and no end characters. If a color is provided, the text will be printed in that color. + If a file is provided, the text will be written to that file. Args: diff --git a/libs/core/langchain_core/utils/interactive_env.py b/libs/core/langchain_core/utils/interactive_env.py index f86fe0763b2..f0e7ea8b202 100644 --- a/libs/core/langchain_core/utils/interactive_env.py +++ b/libs/core/langchain_core/utils/interactive_env.py @@ -7,6 +7,6 @@ def is_interactive_env() -> bool: """Determine if running within IPython or Jupyter. Returns: - True if running in an interactive environment, `False` otherwise. + `True` if running in an interactive environment, `False` otherwise. """ return hasattr(sys, "ps2") diff --git a/libs/core/langchain_core/utils/iter.py b/libs/core/langchain_core/utils/iter.py index a4f9b0e1ade..b24c5f213ad 100644 --- a/libs/core/langchain_core/utils/iter.py +++ b/libs/core/langchain_core/utils/iter.py @@ -42,9 +42,9 @@ def tee_peer( ) -> Generator[T, None, None]: """An individual iterator of a `.tee`. - This function is a generator that yields items from the shared iterator - `iterator`. It buffers items until the least advanced iterator has - yielded them as well. The buffer is shared with all other peers. + This function is a generator that yields items from the shared iterator `iterator`. + It buffers items until the least advanced iterator has yielded them as well. The + buffer is shared with all other peers. Args: iterator: The shared iterator. @@ -91,13 +91,13 @@ def tee_peer( class Tee(Generic[T]): """Create `n` separate asynchronous iterators over `iterable`. - This splits a single `iterable` into multiple iterators, each providing - the same items in the same order. - All child iterators may advance separately but share the same items - from `iterable` -- when the most advanced iterator retrieves an item, - it is buffered until the least advanced iterator has yielded it as well. - A `tee` works lazily and can handle an infinite `iterable`, provided - that all iterators advance. + This splits a single `iterable` into multiple iterators, each providing the same + items in the same order. + + All child iterators may advance separately but share the same items from `iterable` + -- when the most advanced iterator retrieves an item, it is buffered until the least + advanced iterator has yielded it as well. A `tee` works lazily and can handle an + infinite `iterable`, provided that all iterators advance. ```python async def derivative(sensor_data): @@ -106,23 +106,23 @@ class Tee(Generic[T]): return a.map(operator.sub, previous, current) ``` - Unlike `itertools.tee`, `.tee` returns a custom type instead - of a :py`tuple`. Like a tuple, it can be indexed, iterated and unpacked - to get the child iterators. In addition, its `.tee.aclose` method - immediately closes all children, and it can be used in an `async with` context - for the same effect. + Unlike `itertools.tee`, `.tee` returns a custom type instead of a `tuple`. Like a + tuple, it can be indexed, iterated and unpacked to get the child iterators. In + addition, its `.tee.aclose` method immediately closes all children, and it can be + used in an `async with` context for the same effect. - If `iterable` is an iterator and read elsewhere, `tee` will *not* - provide these items. Also, `tee` must internally buffer each item until the - last iterator has yielded it; if the most and least advanced iterator differ - by most data, using a :py`list` is more efficient (but not lazy). + If `iterable` is an iterator and read elsewhere, `tee` will *not* provide these + items. Also, `tee` must internally buffer each item until the last iterator has + yielded it; if the most and least advanced iterator differ by most data, using a + `list` is more efficient (but not lazy). - If the underlying iterable is concurrency safe (`anext` may be awaited - concurrently) the resulting iterators are concurrency safe as well. Otherwise, - the iterators are safe if there is only ever one single "most advanced" iterator. - To enforce sequential use of `anext`, provide a `lock` - - e.g. an :py`asyncio.Lock` instance in an :py:mod:`asyncio` application - - and access is automatically synchronised. + If the underlying iterable is concurrency safe (`anext` may be awaited concurrently) + the resulting iterators are concurrency safe as well. Otherwise, the iterators are + safe if there is only ever one single "most advanced" iterator. To enforce + sequential use of `anext`, provide a `lock` + + - e.g., an `asyncio.Lock` instance in an `asyncio` application - and access is + automatically synchronised. """ @@ -176,7 +176,7 @@ class Tee(Generic[T]): yield from self._children def __enter__(self) -> "Tee[T]": - """Return Tee instance.""" + """Return `Tee` instance.""" return self def __exit__( @@ -188,7 +188,7 @@ class Tee(Generic[T]): """Close all child iterators. Returns: - False (exception not suppressed). + `False` (exception not suppressed). """ self.close() return False @@ -207,7 +207,9 @@ def batch_iterate(size: int | None, iterable: Iterable[T]) -> Iterator[list[T]]: """Utility batching function. Args: - size: The size of the batch. If `None`, returns a single batch. + size: The size of the batch. + + If `None`, returns a single batch. iterable: The iterable to batch. Yields: diff --git a/libs/core/langchain_core/utils/json.py b/libs/core/langchain_core/utils/json.py index 14ff8a4239d..a836ffc4e61 100644 --- a/libs/core/langchain_core/utils/json.py +++ b/libs/core/langchain_core/utils/json.py @@ -33,10 +33,9 @@ def _replace_new_line(match: re.Match[str]) -> str: def _custom_parser(multiline_string: str | bytes | bytearray) -> str: r"""Custom parser for multiline strings. - The LLM response for `action_input` may be a multiline - string containing unescaped newlines, tabs or quotes. This function - replaces those characters with their escaped counterparts. - (newlines in JSON must be double-escaped: `\\n`). + The LLM response for `action_input` may be a multiline string containing unescaped + newlines, tabs or quotes. This function replaces those characters with their escaped + counterparts. (newlines in JSON must be double-escaped: `\\n`). Returns: The modified string with escaped newlines, tabs and quotes. @@ -147,7 +146,7 @@ def parse_json_markdown( Args: json_string: The Markdown string. - parser: The parser to use. Defaults to `parse_partial_json`. + parser: The parser to use. Returns: The parsed JSON object as a Python dictionary. @@ -179,8 +178,6 @@ def _parse_json( json_str: The JSON string to parse. parser: Optional custom parser function. - Defaults to `parse_partial_json`. - Returns: Parsed JSON object. """ diff --git a/libs/core/langchain_core/utils/json_schema.py b/libs/core/langchain_core/utils/json_schema.py index f07450556de..d1ff1de5fcb 100644 --- a/libs/core/langchain_core/utils/json_schema.py +++ b/libs/core/langchain_core/utils/json_schema.py @@ -93,8 +93,8 @@ def _dereference_refs_helper( This function processes JSON Schema objects containing $ref properties by resolving the references and merging any additional properties. It handles: - - Pure $ref objects: {"$ref": "#/path/to/definition"} - - Mixed $ref objects: {"$ref": "#/path", "title": "Custom Title", ...} + - Pure `$ref` objects: `{"$ref": "#/path/to/definition"}` + - Mixed `$ref` objects: `{"$ref": "#/path", "title": "Custom Title", ...}` - Circular references by breaking cycles and preserving non-ref properties Args: @@ -102,10 +102,10 @@ def _dereference_refs_helper( full_schema: The complete schema containing all definitions processed_refs: Set tracking currently processing refs (for cycle detection) skip_keys: Keys under which to skip recursion - shallow_refs: If `True`, only break cycles; if False, deep-inline all refs + shallow_refs: If `True`, only break cycles; if `False`, deep-inline all refs Returns: - The object with $ref properties resolved and merged with other properties. + The object with `$ref` properties resolved and merged with other properties. """ if processed_refs is None: processed_refs = set() @@ -217,6 +217,7 @@ def dereference_refs( Returns: A new dictionary with all $ref references resolved and inlined. + The original `schema_obj` is not modified. Examples: diff --git a/libs/core/langchain_core/utils/mustache.py b/libs/core/langchain_core/utils/mustache.py index cce84753a2f..54674c262fb 100644 --- a/libs/core/langchain_core/utils/mustache.py +++ b/libs/core/langchain_core/utils/mustache.py @@ -201,30 +201,29 @@ def tokenize( ) -> Iterator[tuple[str, str]]: """Tokenize a mustache template. - Tokenizes a mustache template in a generator fashion, - using file-like objects. It also accepts a string containing - the template. + Tokenizes a mustache template in a generator fashion, using file-like objects. It + also accepts a string containing the template. Args: template: a file-like object, or a string of a mustache template def_ldel: The default left delimiter - ("{{" by default, as in spec compliant mustache) + (`'{{'` by default, as in spec compliant mustache) def_rdel: The default right delimiter - ("}}" by default, as in spec compliant mustache) + (`'}}'` by default, as in spec compliant mustache) Yields: - Mustache tags in the form of a tuple (tag_type, tag_key) - where tag_type is one of: + Mustache tags in the form of a tuple `(tag_type, tag_key)` where `tag_type` is + one of: - * literal - * section - * inverted section - * end - * partial - * no escape + * literal + * section + * inverted section + * end + * partial + * no escape - and tag_key is either the key or in the case of a literal tag, - the literal itself. + ...and `tag_key` is either the key or in the case of a literal tag, the + literal itself. Raises: ChevronError: If there is a syntax error in the template. @@ -483,21 +482,27 @@ def render( template: A file-like object or a string containing the template. data: A python dictionary with your data scope. partials_path: The path to where your partials are stored. - If set to None, then partials won't be loaded from the file system - (defaults to '.'). + + If set to None, then partials won't be loaded from the file system + + Defaults to `'.'`. partials_ext: The extension that you want the parser to look for - (defaults to 'mustache'). + + Defaults to `'mustache'`. partials_dict: A python dictionary which will be search for partials - before the filesystem is. {'include': 'foo'} is the same - as a file called include.mustache - (defaults to {}). + before the filesystem is. + + `{'include': 'foo'}` is the same as a file called include.mustache + (defaults to `{}`). padding: This is for padding partials, and shouldn't be used (but can be if you really want to). def_ldel: The default left delimiter - ("{{" by default, as in spec compliant mustache). + + (`'{{'` by default, as in spec compliant mustache). def_rdel: The default right delimiter - ("}}" by default, as in spec compliant mustache). - scopes: The list of scopes that get_key will look through. + + (`'}}'` by default, as in spec compliant mustache). + scopes: The list of scopes that `get_key` will look through. warn: Log a warning when a template substitution isn't found in the data keep: Keep unreplaced tags when a substitution isn't found in the data. diff --git a/libs/core/langchain_core/utils/pydantic.py b/libs/core/langchain_core/utils/pydantic.py index fcf70615a9d..aa47ae37f5d 100644 --- a/libs/core/langchain_core/utils/pydantic.py +++ b/libs/core/langchain_core/utils/pydantic.py @@ -55,7 +55,7 @@ PYDANTIC_VERSION = version.parse(pydantic.__version__) def get_pydantic_major_version() -> int: """DEPRECATED - Get the major version of Pydantic. - Use PYDANTIC_VERSION.major instead. + Use `PYDANTIC_VERSION.major` instead. Returns: The major version of Pydantic. @@ -447,6 +447,7 @@ def create_model( Args: model_name: The name of the model. module_name: The name of the module where the model is defined. + This is used by Pydantic to resolve any forward references. **field_definitions: The field definitions for the model. @@ -515,13 +516,15 @@ def create_model_v2( ) -> type[BaseModel]: """Create a Pydantic model with the given field definitions. - Attention: - Please do not use outside of langchain packages. This API - is subject to change at any time. + !!! warning + + Do not use outside of langchain packages. This API is subject to change at any + time. Args: model_name: The name of the model. module_name: The name of the module where the model is defined. + This is used by Pydantic to resolve any forward references. field_definitions: The field definitions for the model. root: Type for a root model (`RootModel`) diff --git a/libs/core/langchain_core/utils/strings.py b/libs/core/langchain_core/utils/strings.py index 00c565a9360..357b16f8e16 100644 --- a/libs/core/langchain_core/utils/strings.py +++ b/libs/core/langchain_core/utils/strings.py @@ -49,16 +49,16 @@ def comma_list(items: Iterable[Any]) -> str: def sanitize_for_postgres(text: str, replacement: str = "") -> str: r"""Sanitize text by removing NUL bytes that are incompatible with PostgreSQL. - PostgreSQL text fields cannot contain NUL (0x00) bytes, which can cause - psycopg.DataError when inserting documents. This function removes or replaces + PostgreSQL text fields cannot contain `NUL (0x00)` bytes, which can cause + `psycopg.DataError` when inserting documents. This function removes or replaces such characters to ensure compatibility. Args: text: The text to sanitize. - replacement: String to replace NUL bytes with. + replacement: String to replace `NUL` bytes with. Returns: - The sanitized text with NUL bytes removed or replaced. + The sanitized text with `NUL` bytes removed or replaced. Example: >>> sanitize_for_postgres("Hello\\x00world") diff --git a/libs/core/langchain_core/utils/utils.py b/libs/core/langchain_core/utils/utils.py index cb22c049dd8..e8a5ed999a2 100644 --- a/libs/core/langchain_core/utils/utils.py +++ b/libs/core/langchain_core/utils/utils.py @@ -28,8 +28,7 @@ def xor_args(*arg_groups: tuple[str, ...]) -> Callable: *arg_groups: Groups of mutually exclusive keyword args. Returns: - Decorator that validates the specified keyword args - are mutually exclusive. + Decorator that validates the specified keyword args are mutually exclusive. """ def decorator(func: Callable) -> Callable: @@ -268,6 +267,7 @@ def build_extra_kwargs( """Build extra kwargs from values and extra_kwargs. !!! danger "DON'T USE" + Kept for backwards-compatibility but should never have been public. Use the internal `_build_model_kwargs` function instead. @@ -311,13 +311,13 @@ def build_extra_kwargs( def convert_to_secret_str(value: SecretStr | str) -> SecretStr: - """Convert a string to a SecretStr if needed. + """Convert a string to a `SecretStr` if needed. Args: value: The value to convert. Returns: - The SecretStr value. + The `SecretStr` value. """ if isinstance(value, SecretStr): return value @@ -375,17 +375,19 @@ def from_env( """Create a factory method that gets a value from an environment variable. Args: - key: The environment variable to look up. If a list of keys is provided, - the first key found in the environment will be used. - If no key is found, the default value will be used if set, + key: The environment variable to look up. + + If a list of keys is provided, the first key found in the environment will + be used. If no key is found, the default value will be used if set, otherwise an error will be raised. default: The default value to return if the environment variable is not set. - error_message: the error message which will be raised if the key is not found + error_message: The error message which will be raised if the key is not found and no default value is provided. + This will be raised as a ValueError. Returns: - factory method that will look up the value from the environment. + Factory method that will look up the value from the environment. """ def get_from_env_fn() -> str | None: @@ -449,12 +451,13 @@ def secret_from_env( Args: key: The environment variable to look up. default: The default value to return if the environment variable is not set. - error_message: the error message which will be raised if the key is not found + error_message: The error message which will be raised if the key is not found and no default value is provided. - This will be raised as a ValueError. + + This will be raised as a `ValueError`. Returns: - factory method that will look up the secret from the environment. + Factory method that will look up the secret from the environment. """ def get_secret_from_env() -> SecretStr | None: @@ -496,8 +499,9 @@ LC_ID_PREFIX = "lc_run-" """Internal tracing/callback system identifier. Used for: + - Tracing. Every LangChain operation (LLM call, chain execution, tool use, etc.) - gets a unique run_id (UUID) + gets a unique run_id (UUID) - Enables tracking parent-child relationships between operations """ diff --git a/libs/core/langchain_core/vectorstores/base.py b/libs/core/langchain_core/vectorstores/base.py index f9fccff6711..827a05cc90d 100644 --- a/libs/core/langchain_core/vectorstores/base.py +++ b/libs/core/langchain_core/vectorstores/base.py @@ -1,4 +1,4 @@ -"""**Vector store** stores embedded data and performs vector search. +"""A vector store stores embedded data and performs vector search. One of the most common ways to store and search over unstructured data is to embed it and store the resulting embedding vectors, and then query the store @@ -58,6 +58,7 @@ class VectorStore(ABC): metadatas: Optional list of metadatas associated with the texts. ids: Optional list of IDs associated with the texts. **kwargs: `VectorStore` specific parameters. + One of the kwargs should be `ids` which is a list of ids associated with the texts. @@ -402,11 +403,11 @@ class VectorStore(ABC): def _select_relevance_score_fn(self) -> Callable[[float], float]: """The 'correct' relevance function. - may differ depending on a few things, including: + May differ depending on a few things, including: - - the distance / similarity metric used by the VectorStore - - the scale of your embeddings (OpenAI's are unit normed. Many others are not!) - - embedding dimensionality + - The distance / similarity metric used by the VectorStore + - The scale of your embeddings (OpenAI's are unit normed. Many others are not!) + - Embedding dimensionality - etc. Vectorstores should define their own selection-based method of relevance. @@ -672,9 +673,9 @@ class VectorStore(ABC): query: Text to look up documents similar to. k: Number of `Document` objects to return. fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm. - lambda_mult: Number between `0` and `1` that determines the degree - of diversity among the results with `0` corresponding - to maximum diversity and `1` to minimum diversity. + lambda_mult: Number between `0` and `1` that determines the degree of + diversity among the results with `0` corresponding to maximum diversity + and `1` to minimum diversity. **kwargs: Arguments to pass to the search method. Returns: @@ -699,9 +700,9 @@ class VectorStore(ABC): query: Text to look up documents similar to. k: Number of `Document` objects to return. fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm. - lambda_mult: Number between `0` and `1` that determines the degree - of diversity among the results with `0` corresponding - to maximum diversity and `1` to minimum diversity. + lambda_mult: Number between `0` and `1` that determines the degree of + diversity among the results with `0` corresponding to maximum diversity + and `1` to minimum diversity. **kwargs: Arguments to pass to the search method. Returns: @@ -737,9 +738,9 @@ class VectorStore(ABC): embedding: Embedding to look up documents similar to. k: Number of `Document` objects to return. fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm. - lambda_mult: Number between `0` and `1` that determines the degree - of diversity among the results with `0` corresponding - to maximum diversity and `1` to minimum diversity. + lambda_mult: Number between `0` and `1` that determines the degree of + diversity among the results with `0` corresponding to maximum diversity + and `1` to minimum diversity. **kwargs: Arguments to pass to the search method. Returns: @@ -764,9 +765,9 @@ class VectorStore(ABC): embedding: Embedding to look up documents similar to. k: Number of `Document` objects to return. fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm. - lambda_mult: Number between `0` and `1` that determines the degree - of diversity among the results with `0` corresponding - to maximum diversity and `1` to minimum diversity. + lambda_mult: Number between `0` and `1` that determines the degree of + diversity among the results with `0` corresponding to maximum diversity + and `1` to minimum diversity. **kwargs: Arguments to pass to the search method. Returns: @@ -965,10 +966,13 @@ class VectorStoreRetriever(BaseRetriever): vectorstore: VectorStore """VectorStore to use for retrieval.""" + search_type: str = "similarity" """Type of search to perform.""" + search_kwargs: dict = Field(default_factory=dict) """Keyword arguments to pass to the search function.""" + allowed_search_types: ClassVar[Collection[str]] = ( "similarity", "similarity_score_threshold", @@ -1016,6 +1020,7 @@ class VectorStoreRetriever(BaseRetriever): kwargs_ = self.search_kwargs | kwargs ls_params = super()._get_ls_params(**kwargs_) + ls_params["ls_vector_store_provider"] = self.vectorstore.__class__.__name__ if self.vectorstore.embeddings: diff --git a/libs/core/langchain_core/vectorstores/in_memory.py b/libs/core/langchain_core/vectorstores/in_memory.py index ad893a02637..769715d116d 100644 --- a/libs/core/langchain_core/vectorstores/in_memory.py +++ b/libs/core/langchain_core/vectorstores/in_memory.py @@ -44,7 +44,8 @@ class InMemoryVectorStore(VectorStore): ``` Key init args — indexing params: - embedding_function: Embeddings + + * embedding_function: Embeddings Embedding function to use. Instantiate: @@ -293,7 +294,7 @@ class InMemoryVectorStore(VectorStore): k: int = 4, filter: Callable[[Document], bool] | None = None, # noqa: A002 ) -> list[tuple[Document, float, list[float]]]: - # get all docs with fixed order in list + # Get all docs with fixed order in list docs = list(self.store.values()) if filter is not None: @@ -312,7 +313,7 @@ class InMemoryVectorStore(VectorStore): similarity = cosine_similarity([embedding], [doc["vector"] for doc in docs])[0] - # get the indices ordered by similarity score + # Get the indices ordered by similarity score top_k_idx = similarity.argsort()[::-1][:k] return [ @@ -345,7 +346,7 @@ class InMemoryVectorStore(VectorStore): filter: A function to filter the documents. Returns: - A list of tuples of Document objects and their similarity scores. + A list of tuples of `Document` objects and their similarity scores. """ return [ (doc, similarity) @@ -524,7 +525,7 @@ class InMemoryVectorStore(VectorStore): **kwargs: Additional arguments to pass to the constructor. Returns: - A VectorStore object. + A `VectorStore` object. """ path_: Path = Path(path) with path_.open("r", encoding="utf-8") as f: diff --git a/libs/core/langchain_core/vectorstores/utils.py b/libs/core/langchain_core/vectorstores/utils.py index 23f0c40e6b8..551524beb3b 100644 --- a/libs/core/langchain_core/vectorstores/utils.py +++ b/libs/core/langchain_core/vectorstores/utils.py @@ -1,7 +1,9 @@ """Internal utilities for the in memory implementation of `VectorStore`. -These are part of a private API, and users should not use them directly -as they can change without notice. +!!! warning + + These are part of a private API, and users should not use them directly as they can + change without notice. """ from __future__ import annotations @@ -34,15 +36,15 @@ def _cosine_similarity(x: Matrix, y: Matrix) -> np.ndarray: """Row-wise cosine similarity between two equal-width matrices. Args: - x: A matrix of shape (n, m). - y: A matrix of shape (k, m). + x: A matrix of shape `(n, m)`. + y: A matrix of shape `(k, m)`. Returns: - A matrix of shape (n, k) where each element (i, j) is the cosine similarity - between the ith row of X and the jth row of Y. + A matrix of shape `(n, k)` where each element `(i, j)` is the cosine similarity + between the `i`th row of `x` and the `j`th row of `y`. Raises: - ValueError: If the number of columns in X and Y are not the same. + ValueError: If the number of columns in `x` and `y` are not the same. ImportError: If numpy is not installed. """ if not _HAS_NUMPY: diff --git a/libs/core/scripts/check_version.py b/libs/core/scripts/check_version.py index b0f47a0296d..535deb58a3f 100644 --- a/libs/core/scripts/check_version.py +++ b/libs/core/scripts/check_version.py @@ -1,8 +1,8 @@ -"""Check version consistency between pyproject.toml and version.py. +"""Check version consistency between `pyproject.toml` and `version.py`. -This script validates that the version defined in pyproject.toml matches -the VERSION variable in langchain_core/version.py. Intended for use as -a pre-commit hook to prevent version mismatches. +This script validates that the version defined in pyproject.toml matches the `VERSION` +variable in `langchain_core/version.py`. Intended for use a a pre-commit hook to prevent +version mismatches. """ import re @@ -11,14 +11,14 @@ from pathlib import Path def get_pyproject_version(pyproject_path: Path) -> str | None: - """Extract version from pyproject.toml.""" + """Extract version from `pyproject.toml`.""" content = pyproject_path.read_text() match = re.search(r'^version\s*=\s*"([^"]+)"', content, re.MULTILINE) return match.group(1) if match else None def get_version_py_version(version_path: Path) -> str | None: - """Extract VERSION from version.py.""" + """Extract `VERSION` from `version.py`.""" content = version_path.read_text() match = re.search(r'^VERSION\s*=\s*"([^"]+)"', content, re.MULTILINE) return match.group(1) if match else None diff --git a/libs/core/tests/unit_tests/runnables/test_runnable.py b/libs/core/tests/unit_tests/runnables/test_runnable.py index 91702130ade..b6848c2cd85 100644 --- a/libs/core/tests/unit_tests/runnables/test_runnable.py +++ b/libs/core/tests/unit_tests/runnables/test_runnable.py @@ -320,6 +320,7 @@ def test_schemas(snapshot: SnapshotAssertion) -> None: "associated metadata.\n" "\n" "!!! note\n" + "\n" " `Document` is for **retrieval workflows**, not chat I/O. For " "sending text\n" " to an LLM in a conversation, use message types from "