Compare commits

...

3 Commits

Author SHA1 Message Date
jacoblee93
a208128236 Fix 2026-04-18 18:57:44 -07:00
jacoblee93
8e63b39a56 efficiency 2026-04-18 18:39:00 -07:00
jacoblee93
9171e29c21 Treat langsmith metadata keys as trace only 2026-04-18 18:16:33 -07:00
5 changed files with 286 additions and 9 deletions

View File

@@ -7,7 +7,15 @@ import asyncio
# Cannot move uuid to TYPE_CHECKING as RunnableConfig is used in Pydantic models
import uuid # noqa: TC003
import warnings
from collections.abc import Awaitable, Callable, Generator, Iterable, Iterator, Sequence
from collections.abc import (
Awaitable,
Callable,
Generator,
Iterable,
Iterator,
Mapping,
Sequence,
)
from concurrent.futures import Executor, Future, ThreadPoolExecutor
from contextlib import contextmanager
from contextvars import Context, ContextVar, Token, copy_context
@@ -29,6 +37,7 @@ from langchain_core.runnables.utils import (
accepts_config,
accepts_run_manager,
)
from langchain_core.tracers._constants import LANGSMITH_INHERITABLE_METADATA_KEYS
if TYPE_CHECKING:
from langchain_core.callbacks.base import BaseCallbackManager, Callbacks
@@ -146,17 +155,30 @@ CONFIGURABLE_TO_TRACING_METADATA_EXCLUDED_KEYS = frozenset(("api_key",))
def _get_langsmith_inheritable_metadata_from_config(
config: RunnableConfig,
*,
extra_metadata: Mapping[str, Any] | None = None,
) -> dict[str, Any] | None:
"""Get LangSmith-only inheritable metadata defaults derived from config."""
"""Get LangSmith-only inheritable metadata defaults derived from config.
Args:
config: The config.
extra_metadata: Additional LangSmith-only metadata to merge in (takes
precedence over configurable-derived entries). Intended for entries
already extracted from ``config["metadata"]`` by the caller so we
don't iterate over it twice.
"""
configurable = config.get("configurable") or {}
config_metadata = config.get("metadata") or {}
metadata = {
key: value
for key, value in configurable.items()
if not key.startswith("__")
and isinstance(value, (str, int, float, bool))
and key not in config.get("metadata", {})
and key not in config_metadata
and key not in CONFIGURABLE_TO_TRACING_METADATA_EXCLUDED_KEYS
}
if extra_metadata:
metadata.update(extra_metadata)
return metadata or None
@@ -520,6 +542,36 @@ def acall_func_with_variable_args(
return func(input, **kwargs) # type: ignore[call-arg]
def _split_inheritable_metadata(
config: RunnableConfig,
) -> tuple[dict[str, Any] | None, dict[str, Any]]:
"""Split ``config["metadata"]`` into general and LangSmith-only halves.
Allowlisted keys (see
:data:`langchain_core.tracers._constants.LANGSMITH_INHERITABLE_METADATA_KEYS`)
are *moved* out of the general inheritable metadata so they don't reach
non-tracer callback handlers (e.g. ``stream_events`` output). Nested
override semantics are preserved on the tracer side by
``LangChainTracer.copy_with_metadata_defaults``, which treats the same
allowlist as last-wins.
Returns a tuple of ``(general_metadata, langsmith_only_metadata)``. The
first element preserves ``None`` when the config has no ``metadata`` key
so callers can forward it unchanged to ``CallbackManager.configure``.
"""
metadata = config.get("metadata")
if metadata is None:
return None, {}
general: dict[str, Any] = {}
langsmith_only: dict[str, Any] = {}
for key, value in metadata.items():
if key in LANGSMITH_INHERITABLE_METADATA_KEYS:
langsmith_only[key] = value
else:
general[key] = value
return general, langsmith_only
def get_callback_manager_for_config(config: RunnableConfig) -> CallbackManager:
"""Get a callback manager for a config.
@@ -529,12 +581,13 @@ def get_callback_manager_for_config(config: RunnableConfig) -> CallbackManager:
Returns:
The callback manager.
"""
general_metadata, langsmith_only_metadata = _split_inheritable_metadata(config)
return CallbackManager.configure(
inheritable_callbacks=config.get("callbacks"),
inheritable_tags=config.get("tags"),
inheritable_metadata=config.get("metadata"),
inheritable_metadata=general_metadata,
langsmith_inheritable_metadata=_get_langsmith_inheritable_metadata_from_config(
config
config, extra_metadata=langsmith_only_metadata
),
)
@@ -550,12 +603,13 @@ def get_async_callback_manager_for_config(
Returns:
The async callback manager.
"""
general_metadata, langsmith_only_metadata = _split_inheritable_metadata(config)
return AsyncCallbackManager.configure(
inheritable_callbacks=config.get("callbacks"),
inheritable_tags=config.get("tags"),
inheritable_metadata=config.get("metadata"),
inheritable_metadata=general_metadata,
langsmith_inheritable_metadata=_get_langsmith_inheritable_metadata_from_config(
config
config, extra_metadata=langsmith_only_metadata
),
)

View File

@@ -0,0 +1,25 @@
"""Private constants shared across tracer and runnable-config code.
These live in their own module so they can be imported without pulling in
the heavier ``langchain_core.tracers.langchain`` dependency (which loads
``langsmith`` transitively).
"""
from __future__ import annotations
LANGSMITH_INHERITABLE_METADATA_KEYS: frozenset[str] = frozenset(("ls_agent_type",))
"""Allowlist of metadata keys routed to LangSmith tracers only.
Keys in this set are:
1. Stripped from general ``inheritable_metadata`` by
``langchain_core.runnables.config._split_inheritable_metadata`` so they
don't reach non-tracer callback handlers (``stream_events``,
``astream_log``, user-provided ``BaseCallbackHandler`` instances, etc.).
2. Forwarded to ``LangChainTracer`` as *overridable* defaults via
``LangChainTracer.copy_with_metadata_defaults``. Unlike general
metadata defaults (first-wins), keys in this allowlist are last-wins so
that a nested ``RunnableConfig`` / ``CallbackManager.configure`` call
can rescope the value to the innermost run (e.g. ``ls_agent_type``).
"""
# TODO: Expand this to cover all `ls_`-prefixed metadata keys.

View File

@@ -23,6 +23,7 @@ from langchain_core.env import get_runtime_environment
from langchain_core.load import dumpd
from langchain_core.messages.ai import UsageMetadata, add_usage
from langchain_core.tracers._compat import run_construct, run_to_dict
from langchain_core.tracers._constants import LANGSMITH_INHERITABLE_METADATA_KEYS
from langchain_core.tracers.base import BaseTracer
from langchain_core.tracers.schemas import Run
@@ -167,7 +168,16 @@ class LangChainTracer(BaseTracer):
metadata: Mapping[str, str] | None = None,
tags: list[str] | None = None,
) -> LangChainTracer:
"""Return a new tracer with merged tracer-only defaults."""
"""Return a new tracer with merged tracer-only defaults.
By default, keys already present on this tracer take precedence over
``metadata`` (first-wins). Keys in
:data:`langchain_core.tracers._constants.LANGSMITH_INHERITABLE_METADATA_KEYS`
are the exception: they are treated as narrowly scoped, overridable
defaults so that a nested ``RunnableConfig`` /
``CallbackManager.configure`` call can rescope them to the innermost
run (e.g. ``ls_agent_type``).
"""
base_metadata = self.tracing_metadata
if metadata is None:
merged_metadata = dict(base_metadata) if base_metadata is not None else None
@@ -176,7 +186,9 @@ class LangChainTracer(BaseTracer):
else:
merged_metadata = dict(base_metadata)
for key, value in metadata.items():
if key not in merged_metadata:
if key in LANGSMITH_INHERITABLE_METADATA_KEYS or (
key not in merged_metadata
):
merged_metadata[key] = value
merged_tags = sorted(set(self.tags + tags)) if tags else self.tags

View File

@@ -19,6 +19,8 @@ from langchain_core.runnables.config import (
_get_langsmith_inheritable_metadata_from_config,
_set_config_context,
ensure_config,
get_async_callback_manager_for_config,
get_callback_manager_for_config,
merge_configs,
run_in_executor,
)
@@ -230,6 +232,101 @@ def test_get_langsmith_inheritable_metadata_from_config_uses_previous_copy_rules
}
def test_get_langsmith_inheritable_metadata_pulls_allowlisted_metadata() -> None:
# Callers pre-extract allowlisted metadata and pass it via `extra_metadata`
# so that `config["metadata"]` is only iterated once.
config: RunnableConfig = {
"metadata": {
"foo": "bar",
"ls_agent_type": "react",
"ls_provider": "openai", # not on the allowlist
},
"configurable": {"baz": "qux"},
}
assert _get_langsmith_inheritable_metadata_from_config(
config, extra_metadata={"ls_agent_type": "react"}
) == {
"baz": "qux",
"ls_agent_type": "react",
}
def test_get_langsmith_inheritable_metadata_handles_missing_metadata() -> None:
# No metadata key at all
assert _get_langsmith_inheritable_metadata_from_config(
{"configurable": {"baz": "qux"}}
) == {"baz": "qux"}
# metadata present but no allowlisted keys and no configurable contributions
assert (
_get_langsmith_inheritable_metadata_from_config({"metadata": {"foo": "bar"}})
is None
)
def test_get_langsmith_inheritable_metadata_extra_metadata_overrides_configurable() -> (
None
):
# `extra_metadata` takes precedence over configurable entries with the
# same name (applied after the initial dict is built). The existing
# `key not in config_metadata` guard in the configurable pass also
# prevents configurable entries from shadowing metadata keys that the
# caller intentionally routed into `extra_metadata`.
config: RunnableConfig = {
"metadata": {"ls_agent_type": "from-metadata"},
"configurable": {"ls_agent_type": "from-configurable"},
}
assert _get_langsmith_inheritable_metadata_from_config(
config, extra_metadata={"ls_agent_type": "from-metadata"}
) == {
"ls_agent_type": "from-metadata",
}
def test_get_callback_manager_for_config_filters_allowlisted_metadata() -> None:
# Allowlisted keys (LangSmith-only) are stripped from general inheritable
# metadata so they don't reach non-tracer callback handlers or
# `stream_events` output. They reach tracers via the
# `langsmith_inheritable_metadata` path instead.
config: RunnableConfig = {
"metadata": {
"foo": "bar",
"ls_agent_type": "react",
"ls_provider": "openai", # not on the allowlist
},
}
manager = get_callback_manager_for_config(config)
# Allowlisted keys are stripped; regular keys (including non-allowlisted
# ls_* keys) pass through.
assert manager.inheritable_metadata == {"foo": "bar", "ls_provider": "openai"}
def test_get_callback_manager_for_config_preserves_empty_metadata() -> None:
# When no metadata is supplied, inheritable_metadata should remain empty
# (and the split should not raise on a missing metadata key).
manager = get_callback_manager_for_config({})
assert not manager.inheritable_metadata
async def test_get_async_callback_manager_for_config_filters_allowlisted_metadata() -> (
None
):
config: RunnableConfig = {
"metadata": {
"foo": "bar",
"ls_agent_type": "react",
},
}
manager = get_async_callback_manager_for_config(config)
assert manager.inheritable_metadata == {"foo": "bar"}
async def test_merge_config_callbacks() -> None:
manager: RunnableConfig = {
"callbacks": CallbackManager(handlers=[StdOutCallbackHandler()])

View File

@@ -1040,6 +1040,95 @@ class TestLangsmithInheritableTracingDefaultsInConfigure:
md = posts[0].get("extra", {}).get("metadata", {})
assert md["env"] == "prod"
def test_nested_ls_agent_type_is_scoped_to_each_runnable(self) -> None:
"""Allowlisted `ls_` metadata set on a nested runnable overrides the outer.
An outer runnable bound with `ls_agent_type="root"` that invokes an
inner runnable bound with `ls_agent_type="subagent"` should post two
runs where each carries its own `ls_agent_type`. In particular, the
inner run's metadata must NOT inherit `"root"` from the outer.
"""
tracer = _create_tracer_with_mocked_client()
@RunnableLambda
def inner(x: int) -> int:
return x + 1
inner_with_config = inner.with_config(
{"metadata": {"ls_agent_type": "subagent"}, "run_name": "inner"}
)
@RunnableLambda
def outer(x: int) -> int:
return inner_with_config.invoke(x)
outer_with_config = outer.with_config(
{"metadata": {"ls_agent_type": "root"}, "run_name": "outer"}
)
outer_with_config.invoke(1, {"callbacks": [tracer]})
posts = _get_posts(tracer.client)
posts_by_name = {post.get("name"): post for post in posts}
assert set(posts_by_name) >= {"outer", "inner"}, (
f"expected both outer and inner runs, got {list(posts_by_name)}"
)
outer_md = posts_by_name["outer"].get("extra", {}).get("metadata", {})
inner_md = posts_by_name["inner"].get("extra", {}).get("metadata", {})
assert outer_md.get("ls_agent_type") == "root"
assert inner_md.get("ls_agent_type") == "subagent"
def test_ls_agent_type_not_visible_to_non_tracer_handlers(self) -> None:
"""Allowlisted `ls_` metadata is tracer-only.
It must not reach non-tracer callback handlers (which would surface
it in ``stream_events``, ``astream_log``, and user-provided handlers).
"""
seen_metadata: list[dict[str, Any]] = []
class RecordingHandler(BaseCallbackHandler):
def on_chain_start(
self,
serialized: dict[str, Any] | None,
inputs: dict[str, Any] | Any,
*,
metadata: dict[str, Any] | None = None,
**kwargs: Any,
) -> None:
seen_metadata.append(dict(metadata or {}))
@RunnableLambda
def inner(x: int) -> int:
return x + 1
inner_with_config = inner.with_config(
{"metadata": {"ls_agent_type": "subagent", "visible": "inner"}}
)
@RunnableLambda
def outer(x: int) -> int:
return inner_with_config.invoke(x)
outer_with_config = outer.with_config(
{"metadata": {"ls_agent_type": "root", "visible": "outer"}}
)
outer_with_config.invoke(1, {"callbacks": [RecordingHandler()]})
# Every non-tracer callback invocation must exclude `ls_agent_type`.
assert seen_metadata, "expected on_chain_start to fire"
for md in seen_metadata:
assert "ls_agent_type" not in md, (
f"ls_agent_type leaked to non-tracer handler: {md}"
)
# Regular metadata keys should still be visible so the filter is
# correctly scoped to the allowlist.
assert any(md.get("visible") == "outer" for md in seen_metadata)
assert any(md.get("visible") == "inner" for md in seen_metadata)
def test_runnable_config_copies_configurable_values_to_tracing_metadata(
self,
) -> None: