Compare commits

..

1 Commits

Author SHA1 Message Date
Open SWE Agent
136aed4dc9 feat(openai): support tool search and defer_loading for openai responses api 2026-03-05 19:37:07 +00:00
318 changed files with 8342 additions and 23947 deletions

View File

@@ -6,8 +6,6 @@ body:
- type: markdown
attributes:
value: |
> **All contributions must be in English.** See the [language policy](https://docs.langchain.com/oss/python/contributing/overview#language-policy).
Thank you for taking the time to file a bug report.
For usage questions, feature requests and general design questions, please use the [LangChain Forum](https://forum.langchain.com/).
@@ -22,7 +20,7 @@ body:
- type: checkboxes
id: checks
attributes:
label: Submission checklist
label: Checked other resources
description: Please confirm and check all the following options.
options:
- label: This is a bug, not a usage question.

View File

@@ -6,8 +6,6 @@ body:
- type: markdown
attributes:
value: |
> **All contributions must be in English.** See the [language policy](https://docs.langchain.com/oss/python/contributing/overview#language-policy).
Thank you for taking the time to request a new feature.
Use this to request NEW FEATURES or ENHANCEMENTS in LangChain. For bug reports, please use the bug report template. For usage questions and general design questions, please use the [LangChain Forum](https://forum.langchain.com/).
@@ -20,12 +18,10 @@ body:
* [LangChain ChatBot](https://chat.langchain.com/)
* [GitHub search](https://github.com/langchain-ai/langchain),
* [LangChain Forum](https://forum.langchain.com/),
**Note:** Do not begin work on a PR unless explicitly assigned to this issue by a maintainer.
- type: checkboxes
id: checks
attributes:
label: Submission checklist
label: Checked other resources
description: Please confirm and check all the following options.
options:
- label: This is a feature request, not a bug report or usage question.

View File

@@ -1,13 +1,6 @@
Fixes #
---
<!-- Keep the `Fixes #xx` keyword at the very top and update the issue number — this auto-closes the issue on merge. Replace this comment with a 1-2 sentence description of your change. No `# Summary` header; the description is the summary. -->
(Replace this entire block of text)
Read the full contributing guidelines: https://docs.langchain.com/oss/python/contributing/overview
> **All contributions must be in English.** See the [language policy](https://docs.langchain.com/oss/python/contributing/overview#language-policy).
If you paste a large clearly AI generated description here your PR may be IGNORED or CLOSED!
Thank you for contributing to LangChain! Follow these steps to have your pull request considered as ready for review.
@@ -23,7 +16,7 @@ Thank you for contributing to LangChain! Follow these steps to have your pull re
2. PR description:
- Write 1-2 sentences summarizing the change.
- The `Fixes #xx` line at the top is **required** for external contributions — update the issue number and keep the keyword. This links your PR to the approved issue and auto-closes it on merge.
- If this PR addresses a specific issue, please include "Fixes #ISSUE_NUMBER" in the description to automatically close the issue when the PR is merged.
- If there are any breaking changes, please clearly describe them.
- If this PR depends on another PR being merged first, please include "Depends on #PR_NUMBER" in the description.
@@ -35,7 +28,7 @@ Thank you for contributing to LangChain! Follow these steps to have your pull re
Additional guidelines:
- All external PRs must link to an issue or discussion where a solution has been approved by a maintainer, and you must be assigned to that issue. PRs without prior approval will be closed.
- We ask that if you use generative AI for your contribution, you include a disclaimer.
- PRs should not touch more than one package unless absolutely necessary.
- Do not update the `uv.lock` files or add dependencies to `pyproject.toml` files (even optional ones) unless you have explicit permission to do so by a maintainer.

View File

@@ -27,7 +27,7 @@ runs:
using: composite
steps:
- name: Install uv and set the python version
uses: astral-sh/setup-uv@0ca8f610542aa7f4acaf39e65cf4eb3c35091883 # v7
uses: astral-sh/setup-uv@v7
with:
version: ${{ env.UV_VERSION }}
python-version: ${{ inputs.python-version }}

View File

@@ -8,19 +8,12 @@ updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "monthly"
interval: "weekly"
day: "monday"
groups:
minor-and-patch:
github-actions:
patterns:
- "*"
update-types:
- "minor"
- "patch"
major:
patterns:
- "*"
update-types:
- "major"
- package-ecosystem: "uv"
directories:
@@ -28,19 +21,12 @@ updates:
- "/libs/langchain/"
- "/libs/langchain_v1/"
schedule:
interval: "monthly"
interval: "weekly"
day: "monday"
groups:
minor-and-patch:
langchain-deps:
patterns:
- "*"
update-types:
- "minor"
- "patch"
major:
patterns:
- "*"
update-types:
- "major"
- package-ecosystem: "uv"
directories:
@@ -60,19 +46,12 @@ updates:
- "/libs/partners/qdrant/"
- "/libs/partners/xai/"
schedule:
interval: "monthly"
interval: "weekly"
day: "monday"
groups:
minor-and-patch:
partner-deps:
patterns:
- "*"
update-types:
- "minor"
- "patch"
major:
patterns:
- "*"
update-types:
- "major"
- package-ecosystem: "uv"
directories:
@@ -80,16 +59,9 @@ updates:
- "/libs/standard-tests/"
- "/libs/model-profiles/"
schedule:
interval: "monthly"
interval: "weekly"
day: "monday"
groups:
minor-and-patch:
other-deps:
patterns:
- "*"
update-types:
- "minor"
- "patch"
major:
patterns:
- "*"
update-types:
- "major"

128
.github/pr-file-labeler.yml vendored Normal file
View File

@@ -0,0 +1,128 @@
# Label PRs (config)
# Automatically applies labels based on changed files and branch patterns
# Core packages
core:
- changed-files:
- any-glob-to-any-file:
- "libs/core/**/*"
langchain-classic:
- changed-files:
- any-glob-to-any-file:
- "libs/langchain/**/*"
langchain:
- changed-files:
- any-glob-to-any-file:
- "libs/langchain_v1/**/*"
standard-tests:
- changed-files:
- any-glob-to-any-file:
- "libs/standard-tests/**/*"
model-profiles:
- changed-files:
- any-glob-to-any-file:
- "libs/model-profiles/**/*"
text-splitters:
- changed-files:
- any-glob-to-any-file:
- "libs/text-splitters/**/*"
# Partner integrations
integration:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/**/*"
anthropic:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/anthropic/**/*"
chroma:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/chroma/**/*"
deepseek:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/deepseek/**/*"
exa:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/exa/**/*"
fireworks:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/fireworks/**/*"
groq:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/groq/**/*"
huggingface:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/huggingface/**/*"
mistralai:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/mistralai/**/*"
nomic:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/nomic/**/*"
ollama:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/ollama/**/*"
openai:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/openai/**/*"
openrouter:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/openrouter/**/*"
perplexity:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/perplexity/**/*"
qdrant:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/qdrant/**/*"
xai:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/xai/**/*"
github_actions:
- changed-files:
- any-glob-to-any-file:
- ".github/workflows/**/*"
- ".github/actions/**/*"
dependencies:
- changed-files:
- any-glob-to-any-file:
- "**/pyproject.toml"
- "uv.lock"
- "**/requirements*.txt"
- "**/poetry.lock"

View File

@@ -33,22 +33,18 @@ LANGCHAIN_DIRS = [
"libs/model-profiles",
]
# Packages with VCR cassette-backed integration tests.
# These get a playback-only CI check to catch stale cassettes.
VCR_PACKAGES = {
"libs/partners/openai",
}
# When set to True, we are ignoring core dependents
# in order to be able to get CI to pass for each individual
# package that depends on core
# e.g. if you touch core, we don't then add textsplitters/etc to CI
IGNORE_CORE_DEPENDENTS = False
# Ignored partners are removed from dependents but still run if directly edited
# ignored partners are removed from dependents
# but still run if directly edited
IGNORED_PARTNERS = [
# remove huggingface from dependents because of CI instability
# specifically in huggingface jobs
# https://github.com/langchain-ai/langchain/issues/25558
"huggingface",
]
@@ -132,23 +128,12 @@ def _get_configs_for_single_dir(job: str, dir_: str) -> List[Dict[str, str]]:
return _get_pydantic_test_configs(dir_)
if job == "codspeed":
# CPU simulation (<1% variance, Valgrind-based) is the default.
# Partners with heavy SDK inits use walltime instead to keep CI fast.
CODSPEED_WALLTIME_DIRS = {
"libs/core",
"libs/partners/fireworks", # ~328s under simulation
"libs/partners/openai", # 6 benchmarks, ~6 min under simulation
}
mode = "walltime" if dir_ in CODSPEED_WALLTIME_DIRS else "simulation"
return [
{
"working-directory": dir_,
"python-version": "3.13",
"codspeed-mode": mode,
}
]
if dir_ == "libs/core":
py_versions = ["3.13"]
elif dir_ == "libs/core":
py_versions = ["3.10", "3.11", "3.12", "3.13", "3.14"]
# custom logic for specific directories
elif dir_ in {"libs/partners/chroma"}:
py_versions = ["3.10", "3.13"]
else:
py_versions = ["3.10", "3.14"]
@@ -226,14 +211,6 @@ def _get_configs_for_multi_dirs(
dirs = list(dirs_to_run["extended-test"])
elif job == "codspeed":
dirs = list(dirs_to_run["codspeed"])
elif job == "vcr-tests":
# Only run VCR tests for packages that have cassettes and are affected
all_affected = set(
add_dependents(
dirs_to_run["test"] | dirs_to_run["extended-test"], dependents
)
)
dirs = [d for d in VCR_PACKAGES if d in all_affected]
else:
raise ValueError(f"Unknown job: {job}")
@@ -282,8 +259,6 @@ if __name__ == "__main__":
if file.startswith("libs/core"):
dirs_to_run["codspeed"].add("libs/core")
if file.startswith("libs/langchain_v1"):
dirs_to_run["codspeed"].add("libs/langchain_v1")
if any(file.startswith(dir_) for dir_ in LANGCHAIN_DIRS):
# add that dir and all dirs after in LANGCHAIN_DIRS
# for extended testing
@@ -316,13 +291,8 @@ if __name__ == "__main__":
if not filename.startswith(".")
] != ["README.md"]:
dirs_to_run["test"].add(f"libs/partners/{partner_dir}")
# Only add to codspeed if the partner has benchmarks and is not ignored
if (
partner_dir not in IGNORED_PARTNERS
and os.path.isdir(
f"libs/partners/{partner_dir}/tests/benchmarks"
)
):
# Skip codspeed for partners without benchmarks or in IGNORED_PARTNERS
if partner_dir not in IGNORED_PARTNERS:
dirs_to_run["codspeed"].add(f"libs/partners/{partner_dir}")
# Skip if the directory was deleted or is just a tombstone readme
elif file.startswith("libs/"):
@@ -355,7 +325,6 @@ if __name__ == "__main__":
"dependencies",
"test-pydantic",
"codspeed",
"vcr-tests",
]
}

View File

@@ -48,7 +48,7 @@ def get_pypi_versions(package_name: str) -> List[str]:
KeyError: If package not found or response format unexpected
"""
pypi_url = f"https://pypi.org/pypi/{package_name}/json"
response = requests.get(pypi_url, timeout=10.0)
response = requests.get(pypi_url)
response.raise_for_status()
return list(response.json()["releases"].keys())

View File

@@ -1,84 +0,0 @@
{
"trustedThreshold": 5,
"labelColor": "b76e79",
"sizeThresholds": [
{ "label": "size: XS", "max": 50 },
{ "label": "size: S", "max": 200 },
{ "label": "size: M", "max": 500 },
{ "label": "size: L", "max": 1000 },
{ "label": "size: XL" }
],
"excludedFiles": ["uv.lock"],
"excludedPaths": ["docs/"],
"typeToLabel": {
"feat": "feature",
"fix": "fix",
"docs": "documentation",
"style": "linting",
"refactor": "refactor",
"perf": "performance",
"test": "tests",
"build": "infra",
"ci": "infra",
"chore": "infra",
"revert": "revert",
"release": "release",
"hotfix": "hotfix",
"breaking": "breaking"
},
"scopeToLabel": {
"core": "core",
"langchain": "langchain",
"langchain-classic": "langchain-classic",
"model-profiles": "model-profiles",
"standard-tests": "standard-tests",
"text-splitters": "text-splitters",
"anthropic": "anthropic",
"chroma": "chroma",
"deepseek": "deepseek",
"exa": "exa",
"fireworks": "fireworks",
"groq": "groq",
"huggingface": "huggingface",
"mistralai": "mistralai",
"nomic": "nomic",
"ollama": "ollama",
"openai": "openai",
"openrouter": "openrouter",
"perplexity": "perplexity",
"qdrant": "qdrant",
"xai": "xai",
"deps": "dependencies",
"docs": "documentation",
"infra": "infra"
},
"fileRules": [
{ "label": "core", "prefix": "libs/core/", "skipExcludedFiles": true },
{ "label": "langchain-classic", "prefix": "libs/langchain/", "skipExcludedFiles": true },
{ "label": "langchain", "prefix": "libs/langchain_v1/", "skipExcludedFiles": true },
{ "label": "standard-tests", "prefix": "libs/standard-tests/", "skipExcludedFiles": true },
{ "label": "model-profiles", "prefix": "libs/model-profiles/", "skipExcludedFiles": true },
{ "label": "text-splitters", "prefix": "libs/text-splitters/", "skipExcludedFiles": true },
{ "label": "integration", "prefix": "libs/partners/", "skipExcludedFiles": true },
{ "label": "anthropic", "prefix": "libs/partners/anthropic/", "skipExcludedFiles": true },
{ "label": "chroma", "prefix": "libs/partners/chroma/", "skipExcludedFiles": true },
{ "label": "deepseek", "prefix": "libs/partners/deepseek/", "skipExcludedFiles": true },
{ "label": "exa", "prefix": "libs/partners/exa/", "skipExcludedFiles": true },
{ "label": "fireworks", "prefix": "libs/partners/fireworks/", "skipExcludedFiles": true },
{ "label": "groq", "prefix": "libs/partners/groq/", "skipExcludedFiles": true },
{ "label": "huggingface", "prefix": "libs/partners/huggingface/", "skipExcludedFiles": true },
{ "label": "mistralai", "prefix": "libs/partners/mistralai/", "skipExcludedFiles": true },
{ "label": "nomic", "prefix": "libs/partners/nomic/", "skipExcludedFiles": true },
{ "label": "ollama", "prefix": "libs/partners/ollama/", "skipExcludedFiles": true },
{ "label": "openai", "prefix": "libs/partners/openai/", "skipExcludedFiles": true },
{ "label": "openrouter", "prefix": "libs/partners/openrouter/", "skipExcludedFiles": true },
{ "label": "perplexity", "prefix": "libs/partners/perplexity/", "skipExcludedFiles": true },
{ "label": "qdrant", "prefix": "libs/partners/qdrant/", "skipExcludedFiles": true },
{ "label": "xai", "prefix": "libs/partners/xai/", "skipExcludedFiles": true },
{ "label": "github_actions", "prefix": ".github/workflows/" },
{ "label": "github_actions", "prefix": ".github/actions/" },
{ "label": "dependencies", "suffix": "pyproject.toml" },
{ "label": "dependencies", "exact": "uv.lock" },
{ "label": "dependencies", "pattern": "(?:^|/)requirements[^/]*\\.txt$" }
]
}

View File

@@ -1,278 +0,0 @@
// Shared helpers for pr_labeler.yml and tag-external-issues.yml.
//
// Usage from actions/github-script (requires actions/checkout first):
// const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
const fs = require('fs');
const path = require('path');
function loadConfig() {
const configPath = path.join(__dirname, 'pr-labeler-config.json');
let raw;
try {
raw = fs.readFileSync(configPath, 'utf8');
} catch (e) {
throw new Error(`Failed to read ${configPath}: ${e.message}`);
}
let config;
try {
config = JSON.parse(raw);
} catch (e) {
throw new Error(`Failed to parse pr-labeler-config.json: ${e.message}`);
}
const required = [
'labelColor', 'sizeThresholds', 'fileRules',
'typeToLabel', 'scopeToLabel', 'trustedThreshold',
'excludedFiles', 'excludedPaths',
];
const missing = required.filter(k => !(k in config));
if (missing.length > 0) {
throw new Error(`pr-labeler-config.json missing required keys: ${missing.join(', ')}`);
}
return config;
}
function init(github, owner, repo, config, core) {
if (!core) {
throw new Error('init() requires a `core` parameter (e.g., from actions/github-script)');
}
const {
trustedThreshold,
labelColor,
sizeThresholds,
scopeToLabel,
typeToLabel,
fileRules: fileRulesDef,
excludedFiles,
excludedPaths,
} = config;
const sizeLabels = sizeThresholds.map(t => t.label);
const allTypeLabels = [...new Set(Object.values(typeToLabel))];
const tierLabels = ['new-contributor', 'trusted-contributor'];
// ── Label management ──────────────────────────────────────────────
async function ensureLabel(name, color = labelColor) {
try {
await github.rest.issues.getLabel({ owner, repo, name });
} catch (e) {
if (e.status !== 404) throw e;
try {
await github.rest.issues.createLabel({ owner, repo, name, color });
} catch (createErr) {
// 422 = label created by a concurrent run between our get and create
if (createErr.status !== 422) throw createErr;
core.info(`Label "${name}" creation returned 422 (likely already exists)`);
}
}
}
// ── Size calculation ──────────────────────────────────────────────
function getSizeLabel(totalChanged) {
for (const t of sizeThresholds) {
if (t.max != null && totalChanged < t.max) return t.label;
}
// Last entry has no max — it's the catch-all
return sizeThresholds[sizeThresholds.length - 1].label;
}
function computeSize(files) {
const excluded = new Set(excludedFiles);
const totalChanged = files.reduce((sum, f) => {
const p = f.filename ?? '';
const base = p.split('/').pop();
if (excluded.has(base)) return sum;
for (const prefix of excludedPaths) {
if (p.startsWith(prefix)) return sum;
}
return sum + (f.additions ?? 0) + (f.deletions ?? 0);
}, 0);
return { totalChanged, sizeLabel: getSizeLabel(totalChanged) };
}
// ── File-based labels ─────────────────────────────────────────────
function buildFileRules() {
return fileRulesDef.map((rule, i) => {
let test;
if (rule.prefix) test = p => p.startsWith(rule.prefix);
else if (rule.suffix) test = p => p.endsWith(rule.suffix);
else if (rule.exact) test = p => p === rule.exact;
else if (rule.pattern) {
const re = new RegExp(rule.pattern);
test = p => re.test(p);
} else {
throw new Error(
`fileRules[${i}] (label: "${rule.label}") has no recognized matcher ` +
`(expected one of: prefix, suffix, exact, pattern)`
);
}
return { label: rule.label, test, skipExcluded: !!rule.skipExcludedFiles };
});
}
function matchFileLabels(files, fileRules) {
const rules = fileRules || buildFileRules();
const excluded = new Set(excludedFiles);
const labels = new Set();
for (const rule of rules) {
// skipExcluded: ignore files whose basename is in the top-level
// "excludedFiles" list (e.g. uv.lock) so lockfile-only changes
// don't trigger package labels.
const candidates = rule.skipExcluded
? files.filter(f => !excluded.has((f.filename ?? '').split('/').pop()))
: files;
if (candidates.some(f => rule.test(f.filename ?? ''))) {
labels.add(rule.label);
}
}
return labels;
}
// ── Title-based labels ────────────────────────────────────────────
function matchTitleLabels(title) {
const labels = new Set();
const m = (title ?? '').match(/^(\w+)(?:\(([^)]+)\))?(!)?:/);
if (!m) return { labels, type: null, typeLabel: null, scopes: [], breaking: false };
const type = m[1].toLowerCase();
const scopeStr = m[2] ?? '';
const breaking = !!m[3];
const typeLabel = typeToLabel[type] || null;
if (typeLabel) labels.add(typeLabel);
if (breaking) labels.add('breaking');
const scopes = scopeStr.split(',').map(s => s.trim()).filter(Boolean);
for (const scope of scopes) {
const sl = scopeToLabel[scope];
if (sl) labels.add(sl);
}
return { labels, type, typeLabel, scopes, breaking };
}
// ── Org membership ────────────────────────────────────────────────
async function checkMembership(author, userType) {
if (userType === 'Bot') {
console.log(`${author} is a Bot — treating as internal`);
return { isExternal: false };
}
try {
const membership = await github.rest.orgs.getMembershipForUser({
org: 'langchain-ai',
username: author,
});
const isExternal = membership.data.state !== 'active';
console.log(
isExternal
? `${author} has pending membership — treating as external`
: `${author} is an active member of langchain-ai`,
);
return { isExternal };
} catch (e) {
if (e.status === 404) {
console.log(`${author} is not a member of langchain-ai`);
return { isExternal: true };
}
// Non-404 errors (rate limit, auth failure, server error) must not
// silently default to external — rethrow to fail the step.
throw new Error(
`Membership check failed for ${author} (${e.status}): ${e.message}`,
);
}
}
// ── Contributor analysis ──────────────────────────────────────────
async function getContributorInfo(contributorCache, author, userType) {
if (contributorCache.has(author)) return contributorCache.get(author);
const { isExternal } = await checkMembership(author, userType);
let mergedCount = null;
if (isExternal) {
try {
const result = await github.rest.search.issuesAndPullRequests({
q: `repo:${owner}/${repo} is:pr is:merged author:"${author}"`,
per_page: 1,
});
mergedCount = result?.data?.total_count ?? null;
} catch (e) {
if (e?.status !== 422) throw e;
core.warning(`Search failed for ${author}; skipping tier.`);
}
}
const info = { isExternal, mergedCount };
contributorCache.set(author, info);
return info;
}
// ── Tier label resolution ───────────────────────────────────────────
async function applyTierLabel(issueNumber, author, { skipNewContributor = false } = {}) {
let mergedCount;
try {
const result = await github.rest.search.issuesAndPullRequests({
q: `repo:${owner}/${repo} is:pr is:merged author:"${author}"`,
per_page: 1,
});
mergedCount = result?.data?.total_count;
} catch (error) {
if (error?.status !== 422) throw error;
core.warning(`Search failed for ${author}; skipping tier label.`);
return;
}
if (mergedCount == null) {
core.warning(`Search response missing total_count for ${author}; skipping tier label.`);
return;
}
let tierLabel = null;
if (mergedCount >= trustedThreshold) tierLabel = 'trusted-contributor';
else if (mergedCount === 0 && !skipNewContributor) tierLabel = 'new-contributor';
if (tierLabel) {
await ensureLabel(tierLabel);
await github.rest.issues.addLabels({
owner, repo, issue_number: issueNumber, labels: [tierLabel],
});
console.log(`Applied '${tierLabel}' to #${issueNumber} (${mergedCount} merged PRs)`);
} else {
console.log(`No tier label for ${author} (${mergedCount} merged PRs)`);
}
return tierLabel;
}
return {
ensureLabel,
getSizeLabel,
computeSize,
buildFileRules,
matchFileLabels,
matchTitleLabels,
allTypeLabels,
checkMembership,
getContributorInfo,
applyTierLabel,
sizeLabels,
tierLabels,
trustedThreshold,
labelColor,
};
}
function loadAndInit(github, owner, repo, core) {
const config = loadConfig();
return { config, h: init(github, owner, repo, config, core) };
}
module.exports = { loadConfig, init, loadAndInit };

View File

@@ -1,48 +0,0 @@
"""Verify _release.yml dropdown options match actual package directories."""
from pathlib import Path
import yaml
REPO_ROOT = Path(__file__).resolve().parents[2]
def _get_release_options() -> list[str]:
workflow = REPO_ROOT / ".github" / "workflows" / "_release.yml"
with open(workflow) as f:
data = yaml.safe_load(f)
try:
# PyYAML (YAML 1.1) parses the bare key `on` as boolean True
return data[True]["workflow_dispatch"]["inputs"]["working-directory"]["options"]
except (KeyError, TypeError) as e:
msg = f"Could not find workflow_dispatch options in {workflow}: {e}"
raise AssertionError(msg) from e
def _get_package_dirs() -> set[str]:
libs = REPO_ROOT / "libs"
dirs: set[str] = set()
# Top-level packages (libs/core, libs/langchain, etc.)
for p in libs.iterdir():
if p.is_dir() and (p / "pyproject.toml").exists():
dirs.add(f"libs/{p.name}")
# Partner packages (libs/partners/*)
partners = libs / "partners"
if partners.exists():
for p in partners.iterdir():
if p.is_dir() and (p / "pyproject.toml").exists():
dirs.add(f"libs/partners/{p.name}")
return dirs
def test_release_options_match_packages() -> None:
options = set(_get_release_options())
packages = _get_package_dirs()
missing_from_dropdown = packages - options
extra_in_dropdown = options - packages
assert not missing_from_dropdown, (
f"Packages on disk missing from _release.yml dropdown: {missing_from_dropdown}"
)
assert not extra_in_dropdown, (
f"Dropdown options with no matching package directory: {extra_in_dropdown}"
)

View File

@@ -35,7 +35,7 @@ jobs:
timeout-minutes: 20
name: "Python ${{ inputs.python-version }}"
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- uses: actions/checkout@v6
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
uses: "./.github/actions/uv_setup"

View File

@@ -38,7 +38,7 @@ jobs:
timeout-minutes: 20
steps:
- name: "📋 Checkout Code"
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
uses: actions/checkout@v6
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
uses: "./.github/actions/uv_setup"

View File

@@ -1,202 +0,0 @@
# Reusable workflow: refreshes model profile data for any repo that uses the
# `langchain-profiles` CLI. Creates (or updates) a pull request with the
# resulting changes.
#
# Callers MUST set `permissions: { contents: write, pull-requests: write }` —
# reusable workflows cannot escalate the caller's token permissions.
#
# ── Example: external repo (langchain-google) ──────────────────────────
#
# jobs:
# refresh-profiles:
# uses: langchain-ai/langchain/.github/workflows/_refresh_model_profiles.yml@master
# with:
# providers: >-
# [
# {"provider":"google", "data_dir":"libs/genai/langchain_google_genai/data"},
# ]
# secrets:
# MODEL_PROFILE_BOT_APP_ID: ${{ secrets.MODEL_PROFILE_BOT_APP_ID }}
# MODEL_PROFILE_BOT_PRIVATE_KEY: ${{ secrets.MODEL_PROFILE_BOT_PRIVATE_KEY }}
name: "Refresh Model Profiles (reusable)"
on:
workflow_call:
inputs:
providers:
description: >-
JSON array of objects, each with `provider` (models.dev provider ID)
and `data_dir` (path relative to repo root where `_profiles.py` and
`profile_augmentations.toml` live).
required: true
type: string
cli-path:
description: >-
Path (relative to workspace) to an existing `libs/model-profiles`
checkout. When set the workflow skips cloning the langchain repo and
uses this directory for the CLI instead. Useful when the caller IS
the langchain monorepo.
required: false
type: string
default: ""
cli-ref:
description: >-
Git ref of langchain-ai/langchain to checkout for the CLI.
Ignored when `cli-path` is set.
required: false
type: string
default: master
add-paths:
description: "Glob for files to stage in the PR commit."
required: false
type: string
default: "**/_profiles.py"
pr-branch:
description: "Branch name for the auto-created PR."
required: false
type: string
default: bot/refresh-model-profiles
pr-title:
description: "PR / commit title."
required: false
type: string
default: "chore(model-profiles): refresh model profile data"
pr-body:
description: "PR body."
required: false
type: string
default: |
Automated refresh of model profile data via `langchain-profiles refresh`.
🤖 Generated by the `refresh_model_profiles` workflow.
pr-labels:
description: "Comma-separated labels to apply to the PR."
required: false
type: string
default: bot
secrets:
MODEL_PROFILE_BOT_APP_ID:
required: true
MODEL_PROFILE_BOT_PRIVATE_KEY:
required: true
permissions:
contents: write
pull-requests: write
jobs:
refresh-profiles:
name: refresh model profiles
runs-on: ubuntu-latest
steps:
- name: "📋 Checkout"
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: "📋 Checkout langchain-profiles CLI"
if: inputs.cli-path == ''
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
with:
repository: langchain-ai/langchain
ref: ${{ inputs.cli-ref }}
sparse-checkout: libs/model-profiles
path: _langchain-cli
- name: "🔧 Resolve CLI directory"
id: cli
env:
CLI_PATH: ${{ inputs.cli-path }}
run: |
if [ -n "${CLI_PATH}" ]; then
resolved="${GITHUB_WORKSPACE}/${CLI_PATH}"
if [ ! -d "${resolved}" ]; then
echo "::error::cli-path '${CLI_PATH}' does not exist at ${resolved}"
exit 1
fi
echo "dir=${CLI_PATH}" >> "$GITHUB_OUTPUT"
else
echo "dir=_langchain-cli/libs/model-profiles" >> "$GITHUB_OUTPUT"
fi
- name: "🐍 Set up Python + uv"
uses: astral-sh/setup-uv@0ca8f610542aa7f4acaf39e65cf4eb3c35091883 # v7
with:
version: "0.5.25"
python-version: "3.12"
enable-cache: true
cache-dependency-glob: "**/model-profiles/uv.lock"
- name: "📦 Install langchain-profiles CLI"
working-directory: ${{ steps.cli.outputs.dir }}
run: uv sync --frozen --no-group test --no-group dev --no-group lint
- name: "✅ Validate providers input"
env:
PROVIDERS_JSON: ${{ inputs.providers }}
run: |
echo "${PROVIDERS_JSON}" | jq -e 'type == "array" and length > 0' > /dev/null || {
echo "::error::providers input must be a non-empty JSON array"
exit 1
}
echo "${PROVIDERS_JSON}" | jq -e 'all(has("provider") and has("data_dir"))' > /dev/null || {
echo "::error::every entry in providers must have 'provider' and 'data_dir' keys"
exit 1
}
- name: "🔄 Refresh profiles"
env:
PROVIDERS_JSON: ${{ inputs.providers }}
run: |
cli_dir="${GITHUB_WORKSPACE}/${{ steps.cli.outputs.dir }}"
failed=""
mapfile -t rows < <(echo "${PROVIDERS_JSON}" | jq -c '.[]')
for row in "${rows[@]}"; do
provider=$(echo "${row}" | jq -r '.provider')
data_dir=$(echo "${row}" | jq -r '.data_dir')
echo "--- Refreshing ${provider} -> ${data_dir} ---"
if ! echo y | uv run --frozen --project "${cli_dir}" \
langchain-profiles refresh \
--provider "${provider}" \
--data-dir "${GITHUB_WORKSPACE}/${data_dir}"; then
echo "::error::Failed to refresh provider: ${provider}"
failed="${failed} ${provider}"
fi
done
if [ -n "${failed}" ]; then
echo "::error::The following providers failed:${failed}"
exit 1
fi
- name: "🔑 Generate GitHub App token"
id: app-token
uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3
with:
app-id: ${{ secrets.MODEL_PROFILE_BOT_APP_ID }}
private-key: ${{ secrets.MODEL_PROFILE_BOT_PRIVATE_KEY }}
- name: "🔀 Create pull request"
id: create-pr
uses: peter-evans/create-pull-request@c0f553fe549906ede9cf27b5156039d195d2ece0 # v8
with:
token: ${{ steps.app-token.outputs.token }}
branch: ${{ inputs.pr-branch }}
commit-message: ${{ inputs.pr-title }}
title: ${{ inputs.pr-title }}
body: ${{ inputs.pr-body }}
labels: ${{ inputs.pr-labels }}
add-paths: ${{ inputs.add-paths }}
- name: "📝 Summary"
if: always()
env:
PR_OP: ${{ steps.create-pr.outputs.pull-request-operation }}
PR_URL: ${{ steps.create-pr.outputs.pull-request-url }}
JOB_STATUS: ${{ job.status }}
run: |
if [ "${PR_OP}" = "created" ] || [ "${PR_OP}" = "updated" ]; then
echo "### ✅ PR ${PR_OP}: ${PR_URL}" >> "$GITHUB_STEP_SUMMARY"
elif [ -z "${PR_OP}" ] && [ "${JOB_STATUS}" = "success" ]; then
echo "### ⏭️ Skipped: profiles already up to date" >> "$GITHUB_STEP_SUMMARY"
elif [ "${JOB_STATUS}" = "failure" ]; then
echo "### ❌ Job failed — check step logs for details" >> "$GITHUB_STEP_SUMMARY"
fi

View File

@@ -5,7 +5,7 @@
# Handles version bumping, building, and publishing to PyPI with authentication.
name: "🚀 Package Release"
run-name: "Release ${{ inputs.working-directory-override || inputs.working-directory }} ${{ inputs.release-version }}"
run-name: "Release ${{ inputs.working-directory }} ${{ inputs.release-version }}"
on:
workflow_call:
inputs:
@@ -17,35 +17,9 @@ on:
inputs:
working-directory:
required: true
type: choice
type: string
description: "From which folder this pipeline executes"
default: "libs/langchain_v1"
options:
- libs/core
- libs/langchain
- libs/langchain_v1
- libs/text-splitters
- libs/standard-tests
- libs/model-profiles
- libs/partners/anthropic
- libs/partners/chroma
- libs/partners/deepseek
- libs/partners/exa
- libs/partners/fireworks
- libs/partners/groq
- libs/partners/huggingface
- libs/partners/mistralai
- libs/partners/nomic
- libs/partners/ollama
- libs/partners/openai
- libs/partners/openrouter
- libs/partners/perplexity
- libs/partners/qdrant
- libs/partners/xai
working-directory-override:
required: false
type: string
description: "Manual override — takes precedence over dropdown (e.g. libs/partners/partner-xyz)"
release-version:
required: true
type: string
@@ -61,10 +35,9 @@ env:
PYTHON_VERSION: "3.11"
UV_FROZEN: "true"
UV_NO_SYNC: "true"
EFFECTIVE_WORKING_DIR: ${{ inputs.working-directory-override || inputs.working-directory }}
permissions:
contents: read # Job-level overrides grant write only where needed (mark-release)
contents: write # Required for creating GitHub releases
jobs:
# Build the distribution package and extract version info
@@ -81,7 +54,7 @@ jobs:
version: ${{ steps.check-version.outputs.version }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- uses: actions/checkout@v6
- name: Set up Python + uv
uses: "./.github/actions/uv_setup"
@@ -91,7 +64,6 @@ jobs:
# We want to keep this build stage *separate* from the release stage,
# so that there's no sharing of permissions between them.
# (Release stage has trusted publishing and GitHub repo contents write access,
# which the build stage must not have access to.)
#
# Otherwise, a malicious `build` step (e.g. via a compromised dependency)
# could get access to our GitHub or PyPI credentials.
@@ -102,18 +74,18 @@ jobs:
# https://github.com/pypa/gh-action-pypi-publish#non-goals
- name: Build project for distribution
run: uv build
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
working-directory: ${{ inputs.working-directory }}
- name: Upload build
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7
uses: actions/upload-artifact@v7
with:
name: dist
path: ${{ env.EFFECTIVE_WORKING_DIR }}/dist/
path: ${{ inputs.working-directory }}/dist/
- name: Check version
id: check-version
shell: python
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
working-directory: ${{ inputs.working-directory }}
run: |
import os
import tomllib
@@ -125,8 +97,6 @@ jobs:
f.write(f"pkg-name={pkg_name}\n")
f.write(f"version={version}\n")
release-notes:
# release-notes must run before publishing because its check-tags step
# validates version/tag state — do not remove this dependency.
needs:
- build
runs-on: ubuntu-latest
@@ -135,18 +105,18 @@ jobs:
outputs:
release-body: ${{ steps.generate-release-body.outputs.release-body }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- uses: actions/checkout@v6
with:
repository: langchain-ai/langchain
path: langchain
sparse-checkout: | # this only grabs files for relevant dir
${{ env.EFFECTIVE_WORKING_DIR }}
${{ inputs.working-directory }}
ref: ${{ github.ref }} # this scopes to just ref'd branch
fetch-depth: 0 # this fetches entire commit history
- name: Check tags
id: check-tags
shell: bash
working-directory: langchain/${{ env.EFFECTIVE_WORKING_DIR }}
working-directory: langchain/${{ inputs.working-directory }}
env:
PKG_NAME: ${{ needs.build.outputs.pkg-name }}
VERSION: ${{ needs.build.outputs.version }}
@@ -203,7 +173,7 @@ jobs:
id: generate-release-body
working-directory: langchain
env:
WORKING_DIR: ${{ env.EFFECTIVE_WORKING_DIR }}
WORKING_DIR: ${{ inputs.working-directory }}
PKG_NAME: ${{ needs.build.outputs.pkg-name }}
TAG: ${{ steps.check-tags.outputs.tag }}
PREV_TAG: ${{ steps.check-tags.outputs.prev-tag }}
@@ -223,8 +193,6 @@ jobs:
} >> "$GITHUB_OUTPUT"
test-pypi-publish:
# release-notes must run before publishing because its check-tags step
# validates version/tag state — do not remove this dependency.
needs:
- build
- release-notes
@@ -238,17 +206,17 @@ jobs:
id-token: write
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- uses: actions/checkout@v6
- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
- uses: actions/download-artifact@v8
with:
name: dist
path: ${{ env.EFFECTIVE_WORKING_DIR }}/dist/
path: ${{ inputs.working-directory }}/dist/
- name: Publish to test PyPI
uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # release/v1
uses: pypa/gh-action-pypi-publish@release/v1
with:
packages-dir: ${{ env.EFFECTIVE_WORKING_DIR }}/dist/
packages-dir: ${{ inputs.working-directory }}/dist/
verbose: true
print-hash: true
repository-url: https://test.pypi.org/legacy/
@@ -269,7 +237,7 @@ jobs:
contents: read
timeout-minutes: 20
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- uses: actions/checkout@v6
# We explicitly *don't* set up caching here. This ensures our tests are
# maximally sensitive to catching breakage.
@@ -290,18 +258,26 @@ jobs:
with:
python-version: ${{ env.PYTHON_VERSION }}
- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
- uses: actions/download-artifact@v8
with:
name: dist
path: ${{ env.EFFECTIVE_WORKING_DIR }}/dist/
path: ${{ inputs.working-directory }}/dist/
- name: Import dist package
shell: bash
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
working-directory: ${{ inputs.working-directory }}
env:
PKG_NAME: ${{ needs.build.outputs.pkg-name }}
VERSION: ${{ needs.build.outputs.version }}
# Install directly from the locally-built wheel (no index resolution needed)
# Here we use:
# - The default regular PyPI index as the *primary* index, meaning
# that it takes priority (https://pypi.org/simple)
# - The test PyPI index as an extra index, so that any dependencies that
# are not found on test PyPI can be resolved and installed anyway.
# (https://test.pypi.org/simple). This will include the PKG_NAME==VERSION
# package because VERSION will not have been uploaded to regular PyPI yet.
# - attempt install again after 5 seconds if it fails because there is
# sometimes a delay in availability on test pypi
run: |
uv venv
VIRTUAL_ENV=.venv uv pip install dist/*.whl
@@ -315,11 +291,11 @@ jobs:
- name: Import test dependencies
run: uv sync --group test
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
working-directory: ${{ inputs.working-directory }}
# Overwrite the local version of the package with the built version
- name: Import published package (again)
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
working-directory: ${{ inputs.working-directory }}
shell: bash
env:
PKG_NAME: ${{ needs.build.outputs.pkg-name }}
@@ -330,17 +306,17 @@ jobs:
- name: Check for prerelease versions
# Block release if any dependencies allow prerelease versions
# (unless this is itself a prerelease version)
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
working-directory: ${{ inputs.working-directory }}
run: |
uv run python $GITHUB_WORKSPACE/.github/scripts/check_prerelease_dependencies.py pyproject.toml
- name: Run unit tests
run: make tests
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
working-directory: ${{ inputs.working-directory }}
- name: Get minimum versions
# Find the minimum published versions that satisfies the given constraints
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
working-directory: ${{ inputs.working-directory }}
id: min-version
run: |
VIRTUAL_ENV=.venv uv pip install packaging requests
@@ -356,16 +332,16 @@ jobs:
run: |
VIRTUAL_ENV=.venv uv pip install --force-reinstall --editable .
VIRTUAL_ENV=.venv uv pip install --force-reinstall $MIN_VERSIONS
make tests PYTEST_EXTRA="-q -k 'not test_serdes'"
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
make tests
working-directory: ${{ inputs.working-directory }}
- name: Import integration test dependencies
run: uv sync --group test --group test_integration
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
working-directory: ${{ inputs.working-directory }}
- name: Run integration tests
# Uses the Makefile's `integration_tests` target for the specified package
if: ${{ startsWith(env.EFFECTIVE_WORKING_DIR, 'libs/partners/') }}
if: ${{ startsWith(inputs.working-directory, 'libs/partners/') }}
env:
AI21_API_KEY: ${{ secrets.AI21_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
@@ -405,7 +381,7 @@ jobs:
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
LANGCHAIN_TESTS_USER_AGENT: ${{ secrets.LANGCHAIN_TESTS_USER_AGENT }}
run: make integration_tests
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
working-directory: ${{ inputs.working-directory }}
# Test select published packages against new core
# Done when code changes are made to langchain-core
@@ -439,32 +415,32 @@ jobs:
AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME }}
LANGCHAIN_TESTS_USER_AGENT: ${{ secrets.LANGCHAIN_TESTS_USER_AGENT }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- uses: actions/checkout@v6
# We implement this conditional as Github Actions does not have good support
# for conditionally needing steps. https://github.com/actions/runner/issues/491
# TODO: this seems to be resolved upstream, so we can probably remove this workaround
- name: Check if libs/core
run: |
if [ "${{ startsWith(env.EFFECTIVE_WORKING_DIR, 'libs/core') }}" != "true" ]; then
if [ "${{ startsWith(inputs.working-directory, 'libs/core') }}" != "true" ]; then
echo "Not in libs/core. Exiting successfully."
exit 0
fi
- name: Set up Python + uv
if: startsWith(env.EFFECTIVE_WORKING_DIR, 'libs/core')
if: startsWith(inputs.working-directory, 'libs/core')
uses: "./.github/actions/uv_setup"
with:
python-version: ${{ env.PYTHON_VERSION }}
- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
if: startsWith(env.EFFECTIVE_WORKING_DIR, 'libs/core')
- uses: actions/download-artifact@v8
if: startsWith(inputs.working-directory, 'libs/core')
with:
name: dist
path: ${{ env.EFFECTIVE_WORKING_DIR }}/dist/
path: ${{ inputs.working-directory }}/dist/
- name: Test against ${{ matrix.partner }}
if: startsWith(env.EFFECTIVE_WORKING_DIR, 'libs/core')
if: startsWith(inputs.working-directory, 'libs/core')
run: |
# Identify latest tag, excluding pre-releases
LATEST_PACKAGE_TAG="$(
@@ -509,11 +485,8 @@ jobs:
runs-on: ubuntu-latest
permissions:
contents: read
# Only run for core or langchain_v1 releases.
# Job-level 'if' does not support env context; must use inputs directly.
if: >-
startsWith(inputs.working-directory-override || inputs.working-directory, 'libs/core')
|| startsWith(inputs.working-directory-override || inputs.working-directory, 'libs/langchain_v1')
# Only run for core or langchain_v1 releases
if: startsWith(inputs.working-directory, 'libs/core') || startsWith(inputs.working-directory, 'libs/langchain_v1')
strategy:
fail-fast: false
matrix:
@@ -525,11 +498,11 @@ jobs:
# No API keys needed for now - deepagents `make test` only runs unit tests
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- uses: actions/checkout@v6
with:
path: langchain
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- uses: actions/checkout@v6
with:
repository: ${{ matrix.package.repo }}
path: ${{ matrix.package.name }}
@@ -539,7 +512,7 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
- uses: actions/download-artifact@v8
with:
name: dist
path: dist/
@@ -583,25 +556,25 @@ jobs:
defaults:
run:
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
working-directory: ${{ inputs.working-directory }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- uses: actions/checkout@v6
- name: Set up Python + uv
uses: "./.github/actions/uv_setup"
with:
python-version: ${{ env.PYTHON_VERSION }}
- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
- uses: actions/download-artifact@v8
with:
name: dist
path: ${{ env.EFFECTIVE_WORKING_DIR }}/dist/
path: ${{ inputs.working-directory }}/dist/
- name: Publish package distributions to PyPI
uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # release/v1
uses: pypa/gh-action-pypi-publish@release/v1
with:
packages-dir: ${{ env.EFFECTIVE_WORKING_DIR }}/dist/
packages-dir: ${{ inputs.working-directory }}/dist/
verbose: true
print-hash: true
# Temp workaround since attestations are on by default as of gh-action-pypi-publish v1.11.0
@@ -615,7 +588,7 @@ jobs:
- test-pypi-publish
- pre-release-checks
- publish
# Run if all needed jobs succeeded or were skipped
# Run if all needed jobs succeeded or were skipped (test-dependents only runs for core/langchain_v1)
if: ${{ !cancelled() && !failure() }}
runs-on: ubuntu-latest
permissions:
@@ -625,23 +598,23 @@ jobs:
defaults:
run:
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
working-directory: ${{ inputs.working-directory }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- uses: actions/checkout@v6
- name: Set up Python + uv
uses: "./.github/actions/uv_setup"
with:
python-version: ${{ env.PYTHON_VERSION }}
- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
- uses: actions/download-artifact@v8
with:
name: dist
path: ${{ env.EFFECTIVE_WORKING_DIR }}/dist/
path: ${{ inputs.working-directory }}/dist/
- name: Create Tag
uses: ncipollo/release-action@339a81892b84b4eeb0f6e744e4574d79d0d9b8dd # v1
uses: ncipollo/release-action@v1
with:
artifacts: "dist/*"
token: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -33,7 +33,7 @@ jobs:
name: "Python ${{ inputs.python-version }}"
steps:
- name: "📋 Checkout Code"
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
uses: actions/checkout@v6
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
uses: "./.github/actions/uv_setup"
@@ -50,7 +50,7 @@ jobs:
- name: "🧪 Run Core Unit Tests"
shell: bash
run: |
make test PYTEST_EXTRA=-q
make test
- name: "🔍 Calculate Minimum Dependency Versions"
working-directory: ${{ inputs.working-directory }}
@@ -69,7 +69,7 @@ jobs:
MIN_VERSIONS: ${{ steps.min-version.outputs.min-versions }}
run: |
VIRTUAL_ENV=.venv uv pip install $MIN_VERSIONS
make tests PYTEST_EXTRA=-q
make tests
working-directory: ${{ inputs.working-directory }}
- name: "🧹 Verify Clean Working Directory"

View File

@@ -36,7 +36,7 @@ jobs:
name: "Pydantic ~=${{ inputs.pydantic-version }}"
steps:
- name: "📋 Checkout Code"
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
uses: actions/checkout@v6
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
uses: "./.github/actions/uv_setup"

View File

@@ -1,66 +0,0 @@
# Runs VCR cassette-backed integration tests in playback-only mode.
#
# No API keys needed — catches stale cassettes caused by test input
# changes without re-recording.
#
# Called as part of check_diffs.yml workflow.
name: "📼 VCR Cassette Tests"
on:
workflow_call:
inputs:
working-directory:
required: true
type: string
description: "From which folder this pipeline executes"
python-version:
required: true
type: string
description: "Python version to use"
permissions:
contents: read
env:
UV_FROZEN: "true"
jobs:
build:
defaults:
run:
working-directory: ${{ inputs.working-directory }}
runs-on: ubuntu-latest
timeout-minutes: 20
name: "Python ${{ inputs.python-version }}"
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
uses: "./.github/actions/uv_setup"
with:
python-version: ${{ inputs.python-version }}
cache-suffix: test-vcr-${{ inputs.working-directory }}
working-directory: ${{ inputs.working-directory }}
- name: "📦 Install Test Dependencies"
shell: bash
run: uv sync --group test
- name: "📼 Run VCR Cassette Tests (playback-only)"
shell: bash
env:
OPENAI_API_KEY: sk-fake
run: make test_vcr
- name: "🧹 Verify Clean Working Directory"
shell: bash
run: |
set -eu
STATUS="$(git status)"
echo "$STATUS"
# grep will exit non-zero if the target message isn't found,
# and `set -e` above will cause the step to fail.
echo "$STATUS" | grep 'nothing to commit, working tree clean'

View File

@@ -4,9 +4,6 @@ on:
issues:
types: [opened, edited]
permissions:
contents: read
jobs:
label-by-package:
permissions:
@@ -15,20 +12,14 @@ jobs:
steps:
- name: Sync package labels
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
uses: actions/github-script@v8
with:
script: |
const body = context.payload.issue.body || "";
// Extract text under "## Package" or "### Package" (handles " (Required)" suffix and being last section)
const match = body.match(/#{2,3} Package[^\n]*\n([\s\S]*?)(?:\n#{2,3} |$)/i);
if (!match) {
core.setFailed(
`Could not find "## Package" section in issue #${context.issue.number} body. ` +
`The issue template may have changed — update the regex in this workflow.`
);
return;
}
// Extract text under "### Package" (handles " (Required)" suffix and being last section)
const match = body.match(/### Package[^\n]*\n([\s\S]*?)(?:\n###|$)/i);
if (!match) return;
const packageSection = match[1].trim();

View File

@@ -26,7 +26,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: "📋 Checkout Code"
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
uses: actions/checkout@v6
- name: "🔍 Check CLAUDE.md and AGENTS.md are in sync"
run: |

View File

@@ -20,7 +20,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- uses: actions/checkout@v6
- name: "✅ Verify pyproject.toml & version.py Match"
run: |

View File

@@ -8,6 +8,7 @@
# - Pydantic compatibility tests (_test_pydantic.yml)
# - Integration test compilation checks (_compile_integration_test.yml)
# - Extended test suites that require additional dependencies
# - Codspeed benchmarks (if not labeled 'codspeed-ignore')
#
# Reports status to GitHub checks and PR status.
@@ -46,14 +47,14 @@ jobs:
if: ${{ !contains(github.event.pull_request.labels.*.name, 'ci-ignore') }}
steps:
- name: "📋 Checkout Code"
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
uses: actions/checkout@v6
- name: "🐍 Setup Python 3.11"
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
uses: actions/setup-python@v6
with:
python-version: "3.11"
- name: "📂 Get Changed Files"
id: files
uses: Ana06/get-changed-files@25f79e676e7ea1868813e21465014798211fad8c # v2.3.0
uses: Ana06/get-changed-files@v2.3.0
- name: "🔍 Analyze Changed Files & Generate Build Matrix"
id: set-matrix
run: |
@@ -66,7 +67,7 @@ jobs:
compile-integration-tests: ${{ steps.set-matrix.outputs.compile-integration-tests }}
dependencies: ${{ steps.set-matrix.outputs.dependencies }}
test-pydantic: ${{ steps.set-matrix.outputs.test-pydantic }}
vcr-tests: ${{ steps.set-matrix.outputs.vcr-tests }}
codspeed: ${{ steps.set-matrix.outputs.codspeed }}
# Run linting only on packages that have changed files
lint:
needs: [build]
@@ -124,21 +125,6 @@ jobs:
python-version: ${{ matrix.job-configs.python-version }}
secrets: inherit
# Run VCR cassette-backed integration tests in playback-only mode (no API keys)
vcr-tests:
name: "VCR Cassette Tests"
needs: [build]
if: ${{ needs.build.outputs.vcr-tests != '[]' }}
strategy:
matrix:
job-configs: ${{ fromJson(needs.build.outputs.vcr-tests) }}
fail-fast: false
uses: ./.github/workflows/_test_vcr.yml
with:
working-directory: ${{ matrix.job-configs.working-directory }}
python-version: ${{ matrix.job-configs.python-version }}
secrets: inherit
# Run extended test suites that require additional dependencies
extended-tests:
name: "Extended Tests"
@@ -155,7 +141,7 @@ jobs:
run:
working-directory: ${{ matrix.job-configs.working-directory }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- uses: actions/checkout@v6
- name: "🐍 Set up Python ${{ matrix.job-configs.python-version }} + UV"
uses: "./.github/actions/uv_setup"
@@ -185,20 +171,72 @@ jobs:
# and `set -e` above will cause the step to fail.
echo "$STATUS" | grep 'nothing to commit, working tree clean'
# Verify _release.yml dropdown options stay in sync with package directories
check-release-options:
name: "Validate Release Options"
# Run codspeed benchmarks only on packages that have changed files
codspeed:
name: "⚡ CodSpeed Benchmarks"
needs: [build]
if: ${{ needs.build.outputs.codspeed != '[]' && !contains(github.event.pull_request.labels.*.name, 'codspeed-ignore') }}
runs-on: ubuntu-latest
strategy:
matrix:
job-configs: ${{ fromJson(needs.build.outputs.codspeed) }}
fail-fast: false
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: "🐍 Setup Python 3.11"
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
- uses: actions/checkout@v6
- name: "📦 Install UV Package Manager"
uses: astral-sh/setup-uv@v7
with:
python-version: "3.11"
- name: "📦 Install Dependencies"
run: python -m pip install pyyaml pytest
- name: "🔍 Check release dropdown matches packages"
run: python -m pytest .github/scripts/test_release_options.py -v
# Pinned to 3.13.11 to work around CodSpeed walltime segfault on 3.13.12+
# See: https://github.com/CodSpeedHQ/pytest-codspeed/issues/106
python-version: "3.13.11"
- uses: actions/setup-python@v6
with:
# Pinned to 3.13.11 to work around CodSpeed walltime segfault on 3.13.12+
# See: https://github.com/CodSpeedHQ/pytest-codspeed/issues/106
python-version: "3.13.11"
- name: "📦 Install Test Dependencies"
run: uv sync --group test
working-directory: ${{ matrix.job-configs.working-directory }}
- name: "⚡ Run Benchmarks: ${{ matrix.job-configs.working-directory }}"
uses: CodSpeedHQ/action@v4
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
ANTHROPIC_FILES_API_IMAGE_ID: ${{ secrets.ANTHROPIC_FILES_API_IMAGE_ID }}
ANTHROPIC_FILES_API_PDF_ID: ${{ secrets.ANTHROPIC_FILES_API_PDF_ID }}
AZURE_OPENAI_API_VERSION: ${{ secrets.AZURE_OPENAI_API_VERSION }}
AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_CHAT_DEPLOYMENT_NAME }}
AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME }}
AZURE_OPENAI_LLM_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LLM_DEPLOYMENT_NAME }}
AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME }}
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
DEEPSEEK_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }}
EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
NOMIC_API_KEY: ${{ secrets.NOMIC_API_KEY }}
OLLAMA_API_KEY: ${{ secrets.OLLAMA_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
PPLX_API_KEY: ${{ secrets.PPLX_API_KEY }}
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
with:
token: ${{ secrets.CODSPEED_TOKEN }}
run: |
cd ${{ matrix.job-configs.working-directory }}
if [ "${{ matrix.job-configs.working-directory }}" = "libs/core" ]; then
uv run --no-sync pytest ./tests/benchmarks --codspeed
else
uv run --no-sync pytest ./tests/ --codspeed
fi
mode: ${{ matrix.job-configs.working-directory == 'libs/core' && 'walltime' || 'instrumentation' }}
# Final status check - ensures all required jobs passed before allowing merge
ci_success:
@@ -209,10 +247,9 @@ jobs:
lint,
test,
compile-integration-tests,
vcr-tests,
extended-tests,
test-pydantic,
check-release-options,
codspeed,
]
if: |
always()

View File

@@ -1,196 +0,0 @@
# Auto-close issues that bypass or ignore the issue template checkboxes.
#
# GitHub issue forms enforce `required: true` checkboxes in the web UI,
# but the API bypasses form validation entirely — bots/scripts can open
# issues with every box unchecked or skip the template altogether.
#
# Rules:
# 0. No issue type -> close unless author is an org member
# 1. No checkboxes at all -> close unless author is an org member or bot
# 2. Checkboxes present but none checked -> close
# 3. "Submission checklist" section incomplete -> close
# 4. "Package (Required)" section has no selection -> close
#
# Org membership check reuses the shared helper from pr-labeler.js and
# the same GitHub App used by tag-external-issues.yml.
name: Close Unchecked Issues
on:
issues:
types: [opened]
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.event.issue.number }}
cancel-in-progress: true
jobs:
check-boxes:
runs-on: ubuntu-latest
permissions:
contents: read
issues: write
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: Generate GitHub App token
id: app-token
uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3
with:
app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }}
private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }}
- name: Validate issue checkboxes
if: steps.app-token.outcome == 'success'
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
with:
github-token: ${{ steps.app-token.outputs.token }}
script: |
const { owner, repo } = context.repo;
const issue_number = context.payload.issue.number;
const body = context.payload.issue.body ?? '';
const allChecked = (body.match(/- \[x\]/gi) || []).length;
const allUnchecked = (body.match(/- \[ \]/g) || []).length;
const total = allChecked + allUnchecked;
// ── Helpers ─────────────────────────────────────────────────
// Extract checkboxes under a markdown H2/H3 heading.
// Returns { checked, unchecked } counts, or null if the
// section heading is not found in the body.
function parseSection(heading) {
const escaped = heading.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
// Find the heading line
const headingRe = new RegExp(`^#{2,3}\\s+${escaped}\\s*$`, 'm');
const headingMatch = headingRe.exec(body);
if (!headingMatch) return null;
// Slice from after the heading to the next heading or end
const rest = body.slice(headingMatch.index + headingMatch[0].length);
const nextHeading = rest.search(/\n#{2,3}\s/);
const block = nextHeading === -1 ? rest : rest.slice(0, nextHeading);
return {
checked: (block.match(/- \[x\]/gi) || []).length,
unchecked: (block.match(/- \[ \]/g) || []).length,
};
}
let _cachedMember;
async function isOrgMember() {
if (_cachedMember) return _cachedMember;
const { h } = require('./.github/scripts/pr-labeler.js')
.loadAndInit(github, owner, repo, core);
const author = context.payload.sender.login;
const { isExternal } = await h.checkMembership(
author, context.payload.sender.type,
);
_cachedMember = { internal: !isExternal, author };
return _cachedMember;
}
async function closeWithComment(lines) {
const templateUrl = `https://github.com/${owner}/${repo}/issues/new/choose`;
lines.push(
'',
`Please use one of the [issue templates](${templateUrl}).`,
);
// Post comment first so the author sees the reason even if
// the subsequent close call fails.
await github.rest.issues.createComment({
owner, repo, issue_number,
body: lines.join('\n'),
});
await github.rest.issues.update({
owner, repo, issue_number,
state: 'closed',
state_reason: 'not_planned',
});
}
// ── Rule 0: no issue type (API/CLI bypass) ──────────────────
// Issue types are set automatically when using web UI templates.
// External users cannot set issue types via the API (requires
// write/triage permissions), so a missing type reliably indicates
// programmatic submission.
if (!context.payload.issue.type) {
let membership;
try {
membership = await isOrgMember();
} catch (e) {
// Org membership check failed — skip Rule 0 and let
// Rules 1-4 handle validation via checkboxes.
core.warning(`Rule 0: org membership check failed, skipping: ${e.message}`);
}
if (membership?.internal) {
console.log(`No issue type, but ${membership.author} is internal — OK`);
} else if (membership) {
console.log(`No issue type and ${membership.author} is external — closing`);
await closeWithComment([
'This issue was automatically closed because it appears to have been submitted programmatically — issue types are automatically set when using the GitHub web interface, and this issue has none.',
'',
'We do not allow automated issue submission at this time.',
]);
return;
}
}
// ── Rule 1: no checkboxes at all ────────────────────────────
if (total === 0) {
const { internal, author } = await isOrgMember();
if (internal) {
console.log(`No checkboxes, but ${author} is internal — OK`);
return;
}
console.log(`No checkboxes and ${author} is external — closing`);
await closeWithComment([
'This issue was automatically closed because no issue template was used.',
]);
return;
}
// ── Rule 2: checkboxes present but none checked ─────────────
if (allChecked === 0) {
console.log(`${allUnchecked} checkbox(es) present, none checked — closing`);
await closeWithComment([
'This issue was automatically closed because none of the required checkboxes were checked. Please re-file using an issue template and complete the checklist.',
]);
return;
}
// ── Rules 34: parse sections for targeted feedback ─────────
const checklist = parseSection('Submission checklist');
const pkg = parseSection('Package (Required)');
console.log(`Section parse — checklist: ${JSON.stringify(checklist)}, pkg: ${JSON.stringify(pkg)}`);
const problems = [];
if (checklist && checklist.unchecked > 0) {
problems.push(
'the submission checklist is incomplete — please confirm you searched for duplicates, included a reproduction, etc.'
);
}
if (pkg !== null && pkg.checked === 0) {
problems.push(
'no package was selected (e.g. langchain-core, langchain, langgraph) — this helps us route the issue to the right team'
);
} else if (pkg === null) {
problems.push(
'the package selection is missing (e.g. langchain-core, langchain, langgraph) — this helps us route the issue to the right team'
);
}
if (problems.length === 0) {
console.log(`All section checks passed (${allChecked} checked) — OK`);
return;
}
console.log(`Closing — problems: ${problems.join('; ')}`);
await closeWithComment([
'Thanks for opening an issue! It was automatically closed because:',
'',
...problems.map(p => `- ${p}`),
]);

View File

@@ -1,81 +0,0 @@
# CodSpeed performance benchmarks.
#
# Runs benchmarks on changed packages and uploads results to CodSpeed.
# Separated from the main CI workflow so that push-to-master baseline runs
# are never cancelled by subsequent merges (cancel-in-progress is only
# enabled for pull_request events).
name: "⚡ CodSpeed"
on:
push:
branches: [master]
pull_request:
# On PRs, cancel stale runs when new commits are pushed.
# On push-to-master, never cancel — these runs populate CodSpeed baselines.
concurrency:
group: ${{ github.workflow }}-${{ github.event_name == 'push' && github.sha || github.ref }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
permissions:
contents: read
env:
UV_FROZEN: "true"
UV_NO_SYNC: "true"
jobs:
build:
name: "Detect Changes"
runs-on: ubuntu-latest
if: ${{ !contains(github.event.pull_request.labels.*.name, 'codspeed-ignore') }}
steps:
- name: "📋 Checkout Code"
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: "🐍 Setup Python 3.11"
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
with:
python-version: "3.11"
- name: "📂 Get Changed Files"
id: files
uses: Ana06/get-changed-files@25f79e676e7ea1868813e21465014798211fad8c # v2.3.0
- name: "🔍 Analyze Changed Files"
id: set-matrix
run: |
python -m pip install packaging requests
python .github/scripts/check_diff.py ${{ steps.files.outputs.all }} >> $GITHUB_OUTPUT
outputs:
codspeed: ${{ steps.set-matrix.outputs.codspeed }}
benchmarks:
name: "⚡ CodSpeed Benchmarks"
needs: [build]
if: ${{ needs.build.outputs.codspeed != '[]' }}
runs-on: codspeed-macro
strategy:
matrix:
job-configs: ${{ fromJson(needs.build.outputs.codspeed) }}
fail-fast: false
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: "📦 Install UV Package Manager"
uses: astral-sh/setup-uv@0ca8f610542aa7f4acaf39e65cf4eb3c35091883 # v7
with:
# Pinned to 3.13.11 to work around CodSpeed walltime segfault on 3.13.12+
# See: https://github.com/CodSpeedHQ/pytest-codspeed/issues/106
python-version: "3.13.11"
- name: "📦 Install Test Dependencies"
run: uv sync --group test
working-directory: ${{ matrix.job-configs.working-directory }}
- name: "⚡ Run Benchmarks: ${{ matrix.job-configs.working-directory }}"
uses: CodSpeedHQ/action@a50965600eafa04edcd6717761f55b77e52aafbd # v4
with:
token: ${{ secrets.CODSPEED_TOKEN }}
run: |
cd ${{ matrix.job-configs.working-directory }}
uv run --no-sync pytest ./tests/benchmarks --codspeed
mode: ${{ matrix.job-configs.codspeed-mode }}

View File

@@ -5,44 +5,17 @@
# Runs daily with the option to trigger manually.
name: "⏰ Integration Tests"
run-name: "Run Integration Tests - ${{ inputs.working-directory-override || (inputs.working-directory != 'all' && inputs.working-directory) || 'all libs' }} (Python ${{ inputs.python-version-override || '3.10, 3.13' }})"
run-name: "Run Integration Tests - ${{ inputs.working-directory-force || 'all libs' }} (Python ${{ inputs.python-version-force || '3.10, 3.13' }})"
on:
workflow_dispatch:
inputs:
working-directory:
type: choice
description: "Library to test (select from dropdown)"
default: "all"
options:
- "all"
- "libs/core"
- "libs/langchain"
- "libs/langchain_v1"
- "libs/text-splitters"
- "libs/standard-tests"
- "libs/model-profiles"
- "libs/partners/anthropic"
- "libs/partners/chroma"
- "libs/partners/deepseek"
- "libs/partners/exa"
- "libs/partners/fireworks"
- "libs/partners/groq"
- "libs/partners/huggingface"
- "libs/partners/mistralai"
- "libs/partners/nomic"
- "libs/partners/ollama"
- "libs/partners/openai"
- "libs/partners/openrouter"
- "libs/partners/perplexity"
- "libs/partners/qdrant"
- "libs/partners/xai"
working-directory-override:
working-directory-force:
type: string
description: "Manual override — takes precedence over dropdown (e.g. libs/partners/partner-xyz)"
python-version-override:
description: "From which folder this pipeline executes - defaults to all in matrix - example value: libs/partners/anthropic"
python-version-force:
type: string
description: "Python version override defaults to 3.10 and 3.13 in matrix (e.g. 3.11)"
description: "Python version to use - defaults to 3.10 and 3.13 in matrix - example value: 3.11"
schedule:
- cron: "0 13 * * *" # Runs daily at 1PM UTC (9AM EDT/6AM PDT)
@@ -79,32 +52,29 @@ jobs:
id: set-matrix
env:
DEFAULT_LIBS: ${{ env.DEFAULT_LIBS }}
WORKING_DIRECTORY_OVERRIDE: ${{ github.event.inputs.working-directory-override || '' }}
WORKING_DIRECTORY_CHOICE: ${{ github.event.inputs.working-directory || 'all' }}
PYTHON_VERSION_OVERRIDE: ${{ github.event.inputs.python-version-override || '' }}
WORKING_DIRECTORY_FORCE: ${{ github.event.inputs.working-directory-force || '' }}
PYTHON_VERSION_FORCE: ${{ github.event.inputs.python-version-force || '' }}
run: |
# echo "matrix=..." where matrix is a json formatted str with keys python-version and working-directory
# python-version defaults to 3.10 and 3.13, overridden to [PYTHON_VERSION_OVERRIDE] if set
# working-directory priority: override string > dropdown choice > DEFAULT_LIBS
# python-version should default to 3.10 and 3.13, but is overridden to [PYTHON_VERSION_FORCE] if set
# working-directory should default to DEFAULT_LIBS, but is overridden to [WORKING_DIRECTORY_FORCE] if set
python_version='["3.10", "3.13"]'
python_version_min_3_11='["3.11", "3.13"]'
working_directory="$DEFAULT_LIBS"
if [ -n "$PYTHON_VERSION_OVERRIDE" ]; then
python_version="[\"$PYTHON_VERSION_OVERRIDE\"]"
# Bound override version to >= 3.11 for packages requiring it
if [ "$(echo "$PYTHON_VERSION_OVERRIDE >= 3.11" | bc -l)" -eq 1 ]; then
python_version_min_3_11="[\"$PYTHON_VERSION_OVERRIDE\"]"
if [ -n "$PYTHON_VERSION_FORCE" ]; then
python_version="[\"$PYTHON_VERSION_FORCE\"]"
# Bound forced version to >= 3.11 for packages requiring it
if [ "$(echo "$PYTHON_VERSION_FORCE >= 3.11" | bc -l)" -eq 1 ]; then
python_version_min_3_11="[\"$PYTHON_VERSION_FORCE\"]"
else
python_version_min_3_11='["3.11"]'
fi
fi
if [ -n "$WORKING_DIRECTORY_OVERRIDE" ]; then
working_directory="[\"$WORKING_DIRECTORY_OVERRIDE\"]"
elif [ "$WORKING_DIRECTORY_CHOICE" != "all" ]; then
working_directory="[\"$WORKING_DIRECTORY_CHOICE\"]"
if [ -n "$WORKING_DIRECTORY_FORCE" ]; then
working_directory="[\"$WORKING_DIRECTORY_FORCE\"]"
fi
matrix="{\"python-version\": $python_version, \"working-directory\": $working_directory}"
echo "$matrix"
echo $matrix
echo "matrix=$matrix" >> $GITHUB_OUTPUT
echo "python-version-min-3-11=$python_version_min_3_11" >> $GITHUB_OUTPUT
@@ -122,26 +92,26 @@ jobs:
working-directory: ${{ fromJSON(needs.compute-matrix.outputs.matrix).working-directory }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- uses: actions/checkout@v6
with:
path: langchain
# These libraries exist outside of the monorepo and need to be checked out separately
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- uses: actions/checkout@v6
with:
repository: langchain-ai/langchain-google
path: langchain-google
- name: "🔐 Authenticate to Google Cloud"
id: "auth"
uses: google-github-actions/auth@7c6bc770dae815cd3e89ee6cdf493a5fab2cc093 # v3
uses: google-github-actions/auth@v3
with:
credentials_json: "${{ secrets.GOOGLE_CREDENTIALS }}"
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- uses: actions/checkout@v6
with:
repository: langchain-ai/langchain-aws
path: langchain-aws
- name: "🔐 Configure AWS Credentials"
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # v6
uses: aws-actions/configure-aws-credentials@v6
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
@@ -264,11 +234,11 @@ jobs:
path: libs/deepagents
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- uses: actions/checkout@v6
with:
path: langchain
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- uses: actions/checkout@v6
with:
repository: ${{ matrix.package.repo }}
path: ${{ matrix.package.name }}

View File

@@ -1,213 +0,0 @@
# Unified PR labeler — applies size, file-based, title-based, and
# contributor classification labels in a single sequential workflow.
#
# Consolidates pr_labeler_file.yml, pr_labeler_title.yml,
# pr_size_labeler.yml, and PR-handling from tag-external-contributions.yml
# into one workflow to eliminate race conditions from concurrent label
# mutations. tag-external-issues.yml remains active for issue-only
# labeling. Backfill lives in pr_labeler_backfill.yml.
#
# Config and shared logic live in .github/scripts/pr-labeler-config.json
# and .github/scripts/pr-labeler.js — update those when adding partners.
#
# Setup Requirements:
# 1. Create a GitHub App with permissions:
# - Repository: Pull requests (write)
# - Repository: Issues (write)
# - Organization: Members (read)
# 2. Install the app on your organization and this repository
# 3. Add these repository secrets:
# - ORG_MEMBERSHIP_APP_ID: Your app's ID
# - ORG_MEMBERSHIP_APP_PRIVATE_KEY: Your app's private key
#
# The GitHub App token is required to check private organization membership
# and to propagate label events to downstream workflows.
name: "🏷️ PR Labeler"
on:
# Safe since we're not checking out or running the PR's code.
# NEVER CHECK OUT UNTRUSTED CODE FROM A PR's HEAD IN A pull_request_target JOB.
# Doing so would allow attackers to execute arbitrary code in the context of your repository.
pull_request_target:
types: [opened, synchronize, reopened, edited]
permissions:
contents: read
concurrency:
# Separate opened events so external/tier labels are never lost to cancellation
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}-${{ github.event.action == 'opened' && 'opened' || 'update' }}
cancel-in-progress: ${{ github.event.action != 'opened' }}
jobs:
label:
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: write
issues: write
steps:
# Checks out the BASE branch (safe for pull_request_target — never
# the PR head). Needed to load .github/scripts/pr-labeler*.
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: Generate GitHub App token
if: github.event.action == 'opened'
id: app-token
uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3
with:
app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }}
private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }}
- name: Verify App token
if: github.event.action == 'opened'
run: |
if [ -z "${{ steps.app-token.outputs.token }}" ]; then
echo "::error::GitHub App token generation failed — cannot classify contributor"
exit 1
fi
- name: Check org membership
if: github.event.action == 'opened'
id: check-membership
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
with:
github-token: ${{ steps.app-token.outputs.token }}
script: |
const { owner, repo } = context.repo;
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
const author = context.payload.sender.login;
const { isExternal } = await h.checkMembership(
author, context.payload.sender.type,
);
core.setOutput('is-external', isExternal ? 'true' : 'false');
- name: Apply PR labels
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
env:
IS_EXTERNAL: ${{ steps.check-membership.outputs.is-external }}
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const { owner, repo } = context.repo;
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
const pr = context.payload.pull_request;
if (!pr) return;
const prNumber = pr.number;
const action = context.payload.action;
const toAdd = new Set();
const toRemove = new Set();
const currentLabels = (await github.paginate(
github.rest.issues.listLabelsOnIssue,
{ owner, repo, issue_number: prNumber, per_page: 100 },
)).map(l => l.name ?? '');
// ── Size + file labels (skip on 'edited' — files unchanged) ──
if (action !== 'edited') {
for (const sl of h.sizeLabels) await h.ensureLabel(sl);
const files = await github.paginate(github.rest.pulls.listFiles, {
owner, repo, pull_number: prNumber, per_page: 100,
});
const { totalChanged, sizeLabel } = h.computeSize(files);
toAdd.add(sizeLabel);
for (const sl of h.sizeLabels) {
if (currentLabels.includes(sl) && sl !== sizeLabel) toRemove.add(sl);
}
console.log(`Size: ${totalChanged} changed lines → ${sizeLabel}`);
for (const label of h.matchFileLabels(files)) {
toAdd.add(label);
}
}
// ── Title-based labels ──
const { labels: titleLabels, typeLabel } = h.matchTitleLabels(pr.title || '');
for (const label of titleLabels) toAdd.add(label);
// Remove stale type labels only when a type was detected
if (typeLabel) {
for (const tl of h.allTypeLabels) {
if (currentLabels.includes(tl) && !titleLabels.has(tl)) toRemove.add(tl);
}
}
// ── Internal label (only on open, non-external contributors) ──
// IS_EXTERNAL is empty string on non-opened events (step didn't
// run), so this guard is only true for opened + internal.
if (action === 'opened' && process.env.IS_EXTERNAL === 'false') {
toAdd.add('internal');
}
// ── Apply changes ──
// Ensure all labels we're about to add exist (addLabels returns
// 422 if any label in the batch is missing, which would prevent
// ALL labels from being applied).
for (const name of toAdd) {
await h.ensureLabel(name);
}
for (const name of toRemove) {
if (toAdd.has(name)) continue;
try {
await github.rest.issues.removeLabel({
owner, repo, issue_number: prNumber, name,
});
} catch (e) {
if (e.status !== 404) throw e;
}
}
const addList = [...toAdd];
if (addList.length > 0) {
await github.rest.issues.addLabels({
owner, repo, issue_number: prNumber, labels: addList,
});
}
const removed = [...toRemove].filter(r => !toAdd.has(r));
console.log(`PR #${prNumber}: +[${addList.join(', ')}] -[${removed.join(', ')}]`);
# Apply tier label BEFORE the external label so that
# "trusted-contributor" is already present when the "external" labeled
# event fires and triggers require_issue_link.yml.
- name: Apply contributor tier label
if: github.event.action == 'opened' && steps.check-membership.outputs.is-external == 'true'
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
with:
github-token: ${{ steps.app-token.outputs.token }}
script: |
const { owner, repo } = context.repo;
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
const pr = context.payload.pull_request;
await h.applyTierLabel(pr.number, pr.user.login);
- name: Add external label
if: github.event.action == 'opened' && steps.check-membership.outputs.is-external == 'true'
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
with:
# Use App token so the "labeled" event propagates to downstream
# workflows (e.g. require_issue_link.yml). Events created by the
# default GITHUB_TOKEN do not trigger additional workflow runs.
github-token: ${{ steps.app-token.outputs.token }}
script: |
const { owner, repo } = context.repo;
const prNumber = context.payload.pull_request.number;
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
await h.ensureLabel('external');
await github.rest.issues.addLabels({
owner, repo,
issue_number: prNumber,
labels: ['external'],
});
console.log(`Added 'external' label to PR #${prNumber}`);

View File

@@ -1,130 +0,0 @@
# Backfill PR labels on all open PRs.
#
# Manual-only workflow that applies the same labels as pr_labeler.yml
# (size, file, title, contributor classification) to existing open PRs.
# Reuses shared logic from .github/scripts/pr-labeler.js.
name: "🏷️ PR Labeler Backfill"
on:
workflow_dispatch:
inputs:
max_items:
description: "Maximum number of open PRs to process"
default: "100"
type: string
permissions:
contents: read
jobs:
backfill:
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: write
issues: write
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: Generate GitHub App token
id: app-token
uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3
with:
app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }}
private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }}
- name: Backfill labels on open PRs
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
with:
github-token: ${{ steps.app-token.outputs.token }}
script: |
const { owner, repo } = context.repo;
const rawMax = '${{ inputs.max_items }}';
const maxItems = parseInt(rawMax, 10);
if (isNaN(maxItems) || maxItems <= 0) {
core.setFailed(`Invalid max_items: "${rawMax}" — must be a positive integer`);
return;
}
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
for (const name of [...h.sizeLabels, ...h.tierLabels]) {
await h.ensureLabel(name);
}
const contributorCache = new Map();
const fileRules = h.buildFileRules();
const prs = await github.paginate(github.rest.pulls.list, {
owner, repo, state: 'open', per_page: 100,
});
let processed = 0;
let failures = 0;
for (const pr of prs) {
if (processed >= maxItems) break;
try {
const author = pr.user.login;
const info = await h.getContributorInfo(contributorCache, author, pr.user.type);
const labels = new Set();
labels.add(info.isExternal ? 'external' : 'internal');
if (info.isExternal && info.mergedCount != null && info.mergedCount >= h.trustedThreshold) {
labels.add('trusted-contributor');
} else if (info.isExternal && info.mergedCount === 0) {
labels.add('new-contributor');
}
// Size + file labels
const files = await github.paginate(github.rest.pulls.listFiles, {
owner, repo, pull_number: pr.number, per_page: 100,
});
const { sizeLabel } = h.computeSize(files);
labels.add(sizeLabel);
for (const label of h.matchFileLabels(files, fileRules)) {
labels.add(label);
}
// Title labels
const { labels: titleLabels } = h.matchTitleLabels(pr.title ?? '');
for (const tl of titleLabels) labels.add(tl);
// Ensure all labels exist before batch add
for (const name of labels) {
await h.ensureLabel(name);
}
// Remove stale managed labels
const currentLabels = (await github.paginate(
github.rest.issues.listLabelsOnIssue,
{ owner, repo, issue_number: pr.number, per_page: 100 },
)).map(l => l.name ?? '');
const managed = [...h.sizeLabels, ...h.tierLabels, ...h.allTypeLabels];
for (const name of currentLabels) {
if (managed.includes(name) && !labels.has(name)) {
try {
await github.rest.issues.removeLabel({
owner, repo, issue_number: pr.number, name,
});
} catch (e) {
if (e.status !== 404) throw e;
}
}
}
await github.rest.issues.addLabels({
owner, repo, issue_number: pr.number, labels: [...labels],
});
console.log(`PR #${pr.number} (${author}): ${[...labels].join(', ')}`);
processed++;
} catch (e) {
failures++;
core.warning(`Failed to process PR #${pr.number}: ${e.message}`);
}
}
console.log(`\nBackfill complete. Processed ${processed} PRs, ${failures} failures. ${contributorCache.size} unique authors.`);

28
.github/workflows/pr_labeler_file.yml vendored Normal file
View File

@@ -0,0 +1,28 @@
# Label PRs based on changed files.
#
# See `.github/pr-file-labeler.yml` to see rules for each label/directory.
name: "🏷️ Pull Request Labeler"
on:
# Safe since we're not checking out or running the PR's code
# Never check out the PR's head in a pull_request_target job
pull_request_target:
types: [opened, synchronize, reopened]
jobs:
labeler:
name: "label"
permissions:
contents: read
pull-requests: write
issues: write
runs-on: ubuntu-latest
steps:
- name: Label Pull Request
uses: actions/labeler@v6
with:
repo-token: "${{ secrets.GITHUB_TOKEN }}"
configuration-path: .github/pr-file-labeler.yml
sync-labels: false

44
.github/workflows/pr_labeler_title.yml vendored Normal file
View File

@@ -0,0 +1,44 @@
# Label PRs based on their titles.
#
# Uses conventional commit types from PR titles to apply labels.
# Note: Scope-based labeling (e.g., integration labels) is handled by pr_labeler_file.yml
name: "🏷️ PR Title Labeler"
on:
# Safe since we're not checking out or running the PR's code
# Never check out the PR's head in a pull_request_target job
pull_request_target:
types: [opened, edited]
jobs:
pr-title-labeler:
name: "label"
permissions:
contents: read
pull-requests: write
issues: write
runs-on: ubuntu-latest
steps:
- name: Label PR based on title
uses: bcoe/conventional-release-labels@v1
with:
token: ${{ secrets.GITHUB_TOKEN }}
type_labels: >-
{
"feat": "feature",
"fix": "fix",
"docs": "documentation",
"style": "linting",
"refactor": "refactor",
"perf": "performance",
"test": "tests",
"build": "infra",
"ci": "infra",
"chore": "infra",
"revert": "revert",
"release": "release",
"breaking": "breaking"
}
ignored_types: '[]'

View File

@@ -25,13 +25,12 @@
# * chore — other changes that don't modify source or test files
# * revert — reverts a previous commit
# * release — prepare a new release
# * hotfix — urgent fix
#
# Allowed Scope(s) (optional):
# core, langchain, langchain-classic, model-profiles,
# standard-tests, text-splitters, docs, anthropic, chroma, deepseek, exa,
# fireworks, groq, huggingface, mistralai, nomic, ollama, openai,
# perplexity, qdrant, xai, infra, deps, partners
# perplexity, qdrant, xai, infra, deps
#
# Multiple scopes can be used by separating them with a comma. For example:
#
@@ -66,17 +65,8 @@ jobs:
name: "validate format"
runs-on: ubuntu-latest
steps:
- name: "🚫 Reject empty scope"
env:
PR_TITLE: ${{ github.event.pull_request.title }}
run: |
if [[ "$PR_TITLE" =~ ^[a-z]+\(\)[!]?: ]]; then
echo "::error::PR title has empty scope parentheses: '$PR_TITLE'"
echo "Either remove the parentheses or provide a scope (e.g., 'fix(core): ...')."
exit 1
fi
- name: "✅ Validate Conventional Commits Format"
uses: amannn/action-semantic-pull-request@48f256284bd46cdaab1048c3721360e808335d50 # v6
uses: amannn/action-semantic-pull-request@v6
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
@@ -93,7 +83,6 @@ jobs:
chore
revert
release
hotfix
scopes: |
core
langchain
@@ -119,7 +108,6 @@ jobs:
xai
infra
deps
partners
requireScope: false
disallowScopes: |
release

View File

@@ -18,28 +18,76 @@ permissions:
jobs:
refresh-profiles:
uses: ./.github/workflows/_refresh_model_profiles.yml
with:
providers: >-
[
{"provider":"anthropic", "data_dir":"libs/partners/anthropic/langchain_anthropic/data"},
{"provider":"deepseek", "data_dir":"libs/partners/deepseek/langchain_deepseek/data"},
{"provider":"fireworks-ai", "data_dir":"libs/partners/fireworks/langchain_fireworks/data"},
{"provider":"groq", "data_dir":"libs/partners/groq/langchain_groq/data"},
{"provider":"huggingface", "data_dir":"libs/partners/huggingface/langchain_huggingface/data"},
{"provider":"mistral", "data_dir":"libs/partners/mistralai/langchain_mistralai/data"},
{"provider":"openai", "data_dir":"libs/partners/openai/langchain_openai/data"},
{"provider":"openrouter", "data_dir":"libs/partners/openrouter/langchain_openrouter/data"},
{"provider":"perplexity", "data_dir":"libs/partners/perplexity/langchain_perplexity/data"},
{"provider":"xai", "data_dir":"libs/partners/xai/langchain_xai/data"}
]
cli-path: libs/model-profiles
add-paths: libs/partners/**/data/_profiles.py
pr-body: |
Automated refresh of model profile data for all in-monorepo partner
integrations via `langchain-profiles refresh`.
name: "refresh all partner profiles"
runs-on: ubuntu-latest
steps:
- name: "📋 Checkout"
uses: actions/checkout@v6
🤖 Generated by the `refresh_model_profiles` workflow.
secrets:
MODEL_PROFILE_BOT_APP_ID: ${{ secrets.MODEL_PROFILE_BOT_APP_ID }}
MODEL_PROFILE_BOT_PRIVATE_KEY: ${{ secrets.MODEL_PROFILE_BOT_PRIVATE_KEY }}
- name: "🐍 Set up Python + uv"
uses: ./.github/actions/uv_setup
with:
python-version: "3.12"
working-directory: libs/model-profiles
- name: "📦 Install langchain-profiles CLI"
working-directory: libs/model-profiles
run: uv sync
- name: "🔄 Refresh profiles"
working-directory: libs/model-profiles
run: |
declare -A PROVIDERS=(
[anthropic]=anthropic
[deepseek]=deepseek
[fireworks]=fireworks-ai
[groq]=groq
[huggingface]=huggingface
[mistralai]=mistral
[openai]=openai
[openrouter]=openrouter
[perplexity]=perplexity
[xai]=xai
)
for partner in "${!PROVIDERS[@]}"; do
provider="${PROVIDERS[$partner]}"
data_dir="../../libs/partners/${partner}/langchain_${partner//-/_}/data"
echo "--- Refreshing ${partner} (provider: ${provider}) ---"
echo y | uv run langchain-profiles refresh \
--provider "$provider" \
--data-dir "$data_dir"
done
- name: "🔑 Generate GitHub App token"
id: app-token
uses: actions/create-github-app-token@v2
with:
app-id: ${{ secrets.MODEL_PROFILE_BOT_APP_ID }}
private-key: ${{ secrets.MODEL_PROFILE_BOT_PRIVATE_KEY }}
- name: "🔀 Create pull request"
id: create-pr
uses: peter-evans/create-pull-request@v8
with:
token: ${{ steps.app-token.outputs.token }}
branch: bot/refresh-model-profiles
commit-message: "chore(model-profiles): refresh model profile data"
title: "chore(model-profiles): refresh model profile data"
body: |
Automated refresh of model profile data for all in-monorepo partner
integrations via `langchain-profiles refresh`.
🤖 Generated by the `refresh_model_profiles` workflow.
labels: bot
add-paths: libs/partners/**/data/_profiles.py
- name: "📝 Summary"
run: |
op="${{ steps.create-pr.outputs.pull-request-operation }}"
url="${{ steps.create-pr.outputs.pull-request-url }}"
if [ "$op" = "created" ] || [ "$op" = "updated" ]; then
echo "### ✅ PR ${op}: ${url}" >> "$GITHUB_STEP_SUMMARY"
else
echo "### ⏭️ Skipped: profiles already up to date" >> "$GITHUB_STEP_SUMMARY"
fi

View File

@@ -1,195 +0,0 @@
# Reopen PRs that were auto-closed by require_issue_link.yml when the
# contributor was not assigned to the linked issue. When a maintainer
# assigns the contributor to the issue, this workflow finds matching
# closed PRs, verifies the issue link, and reopens them.
#
# Uses the default GITHUB_TOKEN (not a PAT or app token) so that the
# reopen and label-removal events do NOT re-trigger other workflows.
# GitHub suppresses events created by the default GITHUB_TOKEN within
# workflow runs to prevent infinite loops.
name: Reopen PR on Issue Assignment
on:
issues:
types: [assigned]
permissions:
contents: read
jobs:
reopen-linked-prs:
runs-on: ubuntu-latest
permissions:
actions: write
pull-requests: write
steps:
- name: Find and reopen matching PRs
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
with:
script: |
const { owner, repo } = context.repo;
const issueNumber = context.payload.issue.number;
const assignee = context.payload.assignee.login;
console.log(
`Issue #${issueNumber} assigned to ${assignee} — searching for closed PRs to reopen`,
);
const q = [
`is:pr`,
`is:closed`,
`author:${assignee}`,
`label:missing-issue-link`,
`repo:${owner}/${repo}`,
].join(' ');
let data;
try {
({ data } = await github.rest.search.issuesAndPullRequests({
q,
per_page: 30,
}));
} catch (e) {
throw new Error(
`Failed to search for closed PRs to reopen after assigning ${assignee} ` +
`to #${issueNumber} (HTTP ${e.status ?? 'unknown'}): ${e.message}`,
);
}
if (data.total_count === 0) {
console.log('No matching closed PRs found');
return;
}
console.log(`Found ${data.total_count} candidate PR(s)`);
// Must stay in sync with the identical pattern in require_issue_link.yml
const pattern = /(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)\s*#(\d+)/gi;
for (const item of data.items) {
const prNumber = item.number;
const body = item.body || '';
const matches = [...body.matchAll(pattern)];
const referencedIssues = matches.map(m => parseInt(m[1], 10));
if (!referencedIssues.includes(issueNumber)) {
console.log(`PR #${prNumber} does not reference #${issueNumber} — skipping`);
continue;
}
// Skip if already bypassed
const labels = item.labels.map(l => l.name);
if (labels.includes('bypass-issue-check')) {
console.log(`PR #${prNumber} already has bypass-issue-check — skipping`);
continue;
}
// Reopen first, remove label second — a closed PR that still has
// missing-issue-link is recoverable; a closed PR with the label
// stripped is invisible to both workflows.
try {
await github.rest.pulls.update({
owner,
repo,
pull_number: prNumber,
state: 'open',
});
console.log(`Reopened PR #${prNumber}`);
} catch (e) {
if (e.status === 422) {
// Head branch deleted — PR is unrecoverable. Notify the
// contributor so they know to open a new PR.
core.warning(`Cannot reopen PR #${prNumber}: head branch was likely deleted`);
try {
await github.rest.issues.createComment({
owner,
repo,
issue_number: prNumber,
body:
`You have been assigned to #${issueNumber}, but this PR could not be ` +
`reopened because the head branch has been deleted. Please open a new ` +
`PR referencing the issue.`,
});
} catch (commentErr) {
core.warning(
`Also failed to post comment on PR #${prNumber}: ${commentErr.message}`,
);
}
continue;
}
// Transient errors (rate limit, 5xx) should fail the job so
// the label is NOT removed and the run can be retried.
throw e;
}
// Remove missing-issue-link label only after successful reopen
try {
await github.rest.issues.removeLabel({
owner,
repo,
issue_number: prNumber,
name: 'missing-issue-link',
});
console.log(`Removed missing-issue-link from PR #${prNumber}`);
} catch (e) {
if (e.status !== 404) throw e;
}
// Minimize stale enforcement comment (best-effort;
// sync w/ require_issue_link.yml minimize blocks)
try {
const marker = '<!-- require-issue-link -->';
const comments = await github.paginate(
github.rest.issues.listComments,
{ owner, repo, issue_number: prNumber, per_page: 100 },
);
const stale = comments.find(c => c.body && c.body.includes(marker));
if (stale) {
await github.graphql(`
mutation($id: ID!) {
minimizeComment(input: {subjectId: $id, classifier: OUTDATED}) {
minimizedComment { isMinimized }
}
}
`, { id: stale.node_id });
console.log(`Minimized stale enforcement comment ${stale.id} as outdated`);
}
} catch (e) {
core.warning(`Could not minimize stale comment on PR #${prNumber}: ${e.message}`);
}
// Re-run the failed require_issue_link check so it picks up the
// new assignment. The re-run uses the original event payload but
// fetches live issue data, so the assignment check will pass.
//
// Limitation: we look up runs by the PR's current head SHA. If the
// contributor pushed new commits while the PR was closed, head.sha
// won't match the SHA of the original failed run and the query will
// return 0 results. This is acceptable because any push after reopen
// triggers a fresh require_issue_link run against the new SHA.
try {
const { data: pr } = await github.rest.pulls.get({
owner, repo, pull_number: prNumber,
});
const { data: runs } = await github.rest.actions.listWorkflowRuns({
owner, repo,
workflow_id: 'require_issue_link.yml',
head_sha: pr.head.sha,
status: 'failure',
per_page: 1,
});
if (runs.workflow_runs.length > 0) {
await github.rest.actions.reRunWorkflowFailedJobs({
owner, repo,
run_id: runs.workflow_runs[0].id,
});
console.log(`Re-ran failed require_issue_link run ${runs.workflow_runs[0].id} for PR #${prNumber}`);
} else {
console.log(`No failed require_issue_link runs found for PR #${prNumber} — skipping re-run`);
}
} catch (e) {
core.warning(`Could not re-run require_issue_link check for PR #${prNumber} (HTTP ${e.status ?? 'unknown'}): ${e.message}`);
}
}

View File

@@ -1,467 +0,0 @@
# Require external PRs to reference an approved issue (e.g. Fixes #NNN) and
# the PR author to be assigned to that issue. On failure the PR is
# labeled "missing-issue-link", commented on, and closed.
#
# Maintainer override: an org member can reopen the PR or remove
# "missing-issue-link" — both add "bypass-issue-check" and reopen.
#
# Dependency: pr_labeler.yml must apply the "external" label first. This
# workflow does NOT trigger on "opened" (new PRs have no labels yet, so the
# gate would always skip).
name: Require Issue Link
on:
pull_request_target:
# NEVER CHECK OUT UNTRUSTED CODE FROM A PR's HEAD IN A pull_request_target JOB.
# Doing so would allow attackers to execute arbitrary code in the context of your repository.
types: [edited, reopened, labeled, unlabeled]
# ──────────────────────────────────────────────────────────────────────────────
# Enforcement gate: set to 'true' to activate the issue link requirement.
# When 'false', the workflow still runs the check logic (useful for dry-run
# visibility) but will NOT label, comment, close, or fail PRs.
# ──────────────────────────────────────────────────────────────────────────────
env:
ENFORCE_ISSUE_LINK: "true"
permissions:
contents: read
jobs:
check-issue-link:
# Run when the "external" label is added, on edit/reopen if already labeled,
# or when "missing-issue-link" is removed (triggers maintainer override check).
# Skip entirely when the PR already carries "trusted-contributor" or
# "bypass-issue-check".
if: >-
!contains(github.event.pull_request.labels.*.name, 'trusted-contributor') &&
!contains(github.event.pull_request.labels.*.name, 'bypass-issue-check') &&
(
(github.event.action == 'labeled' && github.event.label.name == 'external') ||
(github.event.action == 'unlabeled' && github.event.label.name == 'missing-issue-link' && contains(github.event.pull_request.labels.*.name, 'external')) ||
(github.event.action != 'labeled' && github.event.action != 'unlabeled' && contains(github.event.pull_request.labels.*.name, 'external'))
)
runs-on: ubuntu-latest
permissions:
actions: write
pull-requests: write
steps:
- name: Check for issue link and assignee
id: check-link
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
with:
script: |
const { owner, repo } = context.repo;
const prNumber = context.payload.pull_request.number;
const action = context.payload.action;
// ── Helper: ensure a label exists, then add it to the PR ────────
async function ensureAndAddLabel(labelName, color) {
try {
await github.rest.issues.getLabel({ owner, repo, name: labelName });
} catch (e) {
if (e.status !== 404) throw e;
try {
await github.rest.issues.createLabel({ owner, repo, name: labelName, color });
} catch (createErr) {
// 422 = label was created by a concurrent run between our
// GET and POST — safe to ignore.
if (createErr.status !== 422) throw createErr;
}
}
await github.rest.issues.addLabels({
owner, repo, issue_number: prNumber, labels: [labelName],
});
}
// ── Helper: check if the user who triggered this event (reopened
// the PR / removed the label) has write+ access on the repo ───
// Uses the repo collaborator permission endpoint instead of the
// org membership endpoint. The org endpoint requires the caller
// to be an org member, which GITHUB_TOKEN (an app installation
// token) never is — so it always returns 403.
async function senderIsOrgMember() {
const sender = context.payload.sender?.login;
if (!sender) {
throw new Error('Event has no sender — cannot check permissions');
}
try {
const { data } = await github.rest.repos.getCollaboratorPermissionLevel({
owner, repo, username: sender,
});
const perm = data.permission;
if (['admin', 'maintain', 'write'].includes(perm)) {
console.log(`${sender} has ${perm} permission — treating as maintainer`);
return { isMember: true, login: sender };
}
console.log(`${sender} has ${perm} permission — not a maintainer`);
return { isMember: false, login: sender };
} catch (e) {
if (e.status === 404) {
console.log(`Cannot check permissions for ${sender} — treating as non-maintainer`);
return { isMember: false, login: sender };
}
const status = e.status ?? 'unknown';
throw new Error(
`Permission check failed for ${sender} (HTTP ${status}): ${e.message}`,
);
}
}
// ── Helper: apply maintainer bypass (shared by both override paths) ──
async function applyMaintainerBypass(reason) {
console.log(reason);
// Remove missing-issue-link if present
try {
await github.rest.issues.removeLabel({
owner, repo, issue_number: prNumber, name: 'missing-issue-link',
});
} catch (e) {
if (e.status !== 404) throw e;
}
// Reopen before adding bypass label — a failed reopen is more
// actionable than a closed PR with a bypass label stuck on it.
if (context.payload.pull_request.state === 'closed') {
try {
await github.rest.pulls.update({
owner, repo, pull_number: prNumber, state: 'open',
});
console.log(`Reopened PR #${prNumber}`);
} catch (e) {
// 422 if head branch deleted; 403 if permissions insufficient.
// Bypass labels still apply — maintainer can reopen manually.
core.warning(
`Could not reopen PR #${prNumber} (HTTP ${e.status ?? 'unknown'}): ${e.message}. ` +
`Bypass labels were applied — a maintainer may need to reopen manually.`,
);
}
}
// Add bypass-issue-check so future triggers skip enforcement
await ensureAndAddLabel('bypass-issue-check', '0e8a16');
// Minimize stale enforcement comment (best-effort; must not
// abort bypass — sync w/ reopen_on_assignment.yml & step below)
try {
const marker = '<!-- require-issue-link -->';
const comments = await github.paginate(
github.rest.issues.listComments,
{ owner, repo, issue_number: prNumber, per_page: 100 },
);
const stale = comments.find(c => c.body && c.body.includes(marker));
if (stale) {
await github.graphql(`
mutation($id: ID!) {
minimizeComment(input: {subjectId: $id, classifier: OUTDATED}) {
minimizedComment { isMinimized }
}
}
`, { id: stale.node_id });
console.log(`Minimized stale enforcement comment ${stale.id} as outdated`);
}
} catch (e) {
core.warning(`Could not minimize stale comment on PR #${prNumber}: ${e.message}`);
}
core.setOutput('has-link', 'true');
core.setOutput('is-assigned', 'true');
}
// ── Maintainer override: removed "missing-issue-link" label ─────
if (action === 'unlabeled') {
const { isMember, login } = await senderIsOrgMember();
if (isMember) {
await applyMaintainerBypass(
`Maintainer ${login} removed missing-issue-link from PR #${prNumber} — bypassing enforcement`,
);
return;
}
// Non-member removed the label — re-add it defensively and
// set failure outputs so downstream steps (comment, close) fire.
// NOTE: addLabels fires a "labeled" event, but the job-level gate
// only matches labeled events for "external", so no re-trigger.
console.log(`Non-member ${login} removed missing-issue-link — re-adding`);
try {
await ensureAndAddLabel('missing-issue-link', 'b76e79');
} catch (e) {
core.warning(
`Failed to re-add missing-issue-link (HTTP ${e.status ?? 'unknown'}): ${e.message}. ` +
`Downstream step will retry.`,
);
}
core.setOutput('has-link', 'false');
core.setOutput('is-assigned', 'false');
return;
}
// ── Maintainer override: reopened PR with "missing-issue-link" ──
const prLabels = context.payload.pull_request.labels.map(l => l.name);
if (action === 'reopened' && prLabels.includes('missing-issue-link')) {
const { isMember, login } = await senderIsOrgMember();
if (isMember) {
await applyMaintainerBypass(
`Maintainer ${login} reopened PR #${prNumber} — bypassing enforcement`,
);
return;
}
console.log(`Non-member ${login} reopened PR — proceeding with check`);
}
// ── Fetch live labels (race guard) ──────────────────────────────
const { data: liveLabels } = await github.rest.issues.listLabelsOnIssue({
owner, repo, issue_number: prNumber,
});
const liveNames = liveLabels.map(l => l.name);
if (liveNames.includes('trusted-contributor') || liveNames.includes('bypass-issue-check')) {
console.log('PR has trusted-contributor or bypass-issue-check label — bypassing');
core.setOutput('has-link', 'true');
core.setOutput('is-assigned', 'true');
return;
}
const body = context.payload.pull_request.body || '';
const pattern = /(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)\s*#(\d+)/gi;
const matches = [...body.matchAll(pattern)];
if (matches.length === 0) {
console.log('No issue link found in PR body');
core.setOutput('has-link', 'false');
core.setOutput('is-assigned', 'false');
return;
}
const issues = matches.map(m => `#${m[1]}`).join(', ');
console.log(`Found issue link(s): ${issues}`);
core.setOutput('has-link', 'true');
// Check whether the PR author is assigned to at least one linked issue
const prAuthor = context.payload.pull_request.user.login;
const MAX_ISSUES = 5;
const allIssueNumbers = [...new Set(matches.map(m => parseInt(m[1], 10)))];
const issueNumbers = allIssueNumbers.slice(0, MAX_ISSUES);
if (allIssueNumbers.length > MAX_ISSUES) {
core.warning(
`PR references ${allIssueNumbers.length} issues — only checking the first ${MAX_ISSUES}`,
);
}
let assignedToAny = false;
for (const num of issueNumbers) {
try {
const { data: issue } = await github.rest.issues.get({
owner, repo, issue_number: num,
});
const assignees = issue.assignees.map(a => a.login.toLowerCase());
if (assignees.includes(prAuthor.toLowerCase())) {
console.log(`PR author "${prAuthor}" is assigned to #${num}`);
assignedToAny = true;
break;
} else {
console.log(`PR author "${prAuthor}" is NOT assigned to #${num} (assignees: ${assignees.join(', ') || 'none'})`);
}
} catch (error) {
if (error.status === 404) {
console.log(`Issue #${num} not found — skipping`);
} else {
// Non-404 errors (rate limit, server error) must not be
// silently skipped — they could cause false enforcement
// (closing a legitimate PR whose assignment can't be verified).
throw new Error(
`Cannot verify assignee for issue #${num} (${error.status}): ${error.message}`,
);
}
}
}
core.setOutput('is-assigned', assignedToAny ? 'true' : 'false');
- name: Add missing-issue-link label
if: >-
env.ENFORCE_ISSUE_LINK == 'true' &&
(steps.check-link.outputs.has-link != 'true' || steps.check-link.outputs.is-assigned != 'true')
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
with:
script: |
const { owner, repo } = context.repo;
const prNumber = context.payload.pull_request.number;
const labelName = 'missing-issue-link';
// Ensure the label exists (no checkout/shared helper available)
try {
await github.rest.issues.getLabel({ owner, repo, name: labelName });
} catch (e) {
if (e.status !== 404) throw e;
try {
await github.rest.issues.createLabel({
owner, repo, name: labelName, color: 'b76e79',
});
} catch (createErr) {
if (createErr.status !== 422) throw createErr;
}
}
await github.rest.issues.addLabels({
owner, repo, issue_number: prNumber, labels: [labelName],
});
- name: Remove missing-issue-link label and reopen PR
if: >-
env.ENFORCE_ISSUE_LINK == 'true' &&
steps.check-link.outputs.has-link == 'true' && steps.check-link.outputs.is-assigned == 'true'
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
with:
script: |
const { owner, repo } = context.repo;
const prNumber = context.payload.pull_request.number;
try {
await github.rest.issues.removeLabel({
owner, repo, issue_number: prNumber, name: 'missing-issue-link',
});
} catch (error) {
if (error.status !== 404) throw error;
}
// Reopen if this workflow previously closed the PR. We check the
// event payload labels (not live labels) because we already removed
// missing-issue-link above; the payload still reflects pre-step state.
const labels = context.payload.pull_request.labels.map(l => l.name);
if (context.payload.pull_request.state === 'closed' && labels.includes('missing-issue-link')) {
await github.rest.pulls.update({
owner,
repo,
pull_number: prNumber,
state: 'open',
});
console.log(`Reopened PR #${prNumber}`);
}
// Minimize stale enforcement comment (best-effort;
// sync w/ applyMaintainerBypass above & reopen_on_assignment.yml)
try {
const marker = '<!-- require-issue-link -->';
const comments = await github.paginate(
github.rest.issues.listComments,
{ owner, repo, issue_number: prNumber, per_page: 100 },
);
const stale = comments.find(c => c.body && c.body.includes(marker));
if (stale) {
await github.graphql(`
mutation($id: ID!) {
minimizeComment(input: {subjectId: $id, classifier: OUTDATED}) {
minimizedComment { isMinimized }
}
}
`, { id: stale.node_id });
console.log(`Minimized stale enforcement comment ${stale.id} as outdated`);
}
} catch (e) {
core.warning(`Could not minimize stale comment on PR #${prNumber}: ${e.message}`);
}
- name: Post comment, close PR, and fail
if: >-
env.ENFORCE_ISSUE_LINK == 'true' &&
(steps.check-link.outputs.has-link != 'true' || steps.check-link.outputs.is-assigned != 'true')
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
with:
script: |
const { owner, repo } = context.repo;
const prNumber = context.payload.pull_request.number;
const hasLink = '${{ steps.check-link.outputs.has-link }}' === 'true';
const isAssigned = '${{ steps.check-link.outputs.is-assigned }}' === 'true';
const marker = '<!-- require-issue-link -->';
let lines;
if (!hasLink) {
lines = [
marker,
'**This PR has been automatically closed** because it does not link to an approved issue.',
'',
'All external contributions must reference an approved issue or discussion. Please:',
'1. Find or [open an issue](https://github.com/' + owner + '/' + repo + '/issues/new/choose) describing the change',
'2. Wait for a maintainer to approve and assign you',
'3. Add `Fixes #<issue_number>`, `Closes #<issue_number>`, or `Resolves #<issue_number>` to your PR description and the PR will be reopened automatically',
'',
'*Maintainers: reopen this PR or remove the `missing-issue-link` label to bypass this check.*',
];
} else {
lines = [
marker,
'**This PR has been automatically closed** because you are not assigned to the linked issue.',
'',
'External contributors must be assigned to an issue before opening a PR for it. Please:',
'1. Comment on the linked issue to request assignment from a maintainer',
'2. Once assigned, your PR will be reopened automatically',
'',
'*Maintainers: reopen this PR or remove the `missing-issue-link` label to bypass this check.*',
];
}
const body = lines.join('\n');
// Deduplicate: check for existing comment with the marker
const comments = await github.paginate(
github.rest.issues.listComments,
{ owner, repo, issue_number: prNumber, per_page: 100 },
);
const existing = comments.find(c => c.body && c.body.includes(marker));
if (!existing) {
await github.rest.issues.createComment({
owner,
repo,
issue_number: prNumber,
body,
});
console.log('Posted requirement comment');
} else if (existing.body !== body) {
await github.rest.issues.updateComment({
owner,
repo,
comment_id: existing.id,
body,
});
console.log('Updated existing comment with new message');
} else {
console.log('Comment already exists — skipping');
}
// Close the PR
if (context.payload.pull_request.state === 'open') {
await github.rest.pulls.update({
owner,
repo,
pull_number: prNumber,
state: 'closed',
});
console.log(`Closed PR #${prNumber}`);
}
// Cancel all other in-progress and queued workflow runs for this PR
const headSha = context.payload.pull_request.head.sha;
for (const status of ['in_progress', 'queued']) {
const runs = await github.paginate(
github.rest.actions.listWorkflowRunsForRepo,
{ owner, repo, head_sha: headSha, status, per_page: 100 },
);
for (const run of runs) {
if (run.id === context.runId) continue;
try {
await github.rest.actions.cancelWorkflowRun({
owner, repo, run_id: run.id,
});
console.log(`Cancelled ${status} run ${run.id} (${run.name})`);
} catch (err) {
console.log(`Could not cancel run ${run.id}: ${err.message}`);
}
}
}
const reason = !hasLink
? 'PR must reference an issue using auto-close keywords (e.g., "Fixes #123").'
: 'PR author must be assigned to the linked issue.';
core.setFailed(reason);

View File

@@ -0,0 +1,148 @@
# Automatically tag issues and pull requests as "external" or "internal"
# based on whether the author is a member of the langchain-ai
# GitHub organization.
#
# Setup Requirements:
# 1. Create a GitHub App with permissions:
# - Repository: Issues (write), Pull requests (write)
# - Organization: Members (read)
# 2. Install the app on your organization and this repository
# 3. Add these repository secrets:
# - ORG_MEMBERSHIP_APP_ID: Your app's ID
# - ORG_MEMBERSHIP_APP_PRIVATE_KEY: Your app's private key
#
# The GitHub App token is required to check private organization membership.
# Without it, the workflow will fail.
name: Tag External Contributions
on:
issues:
types: [opened]
pull_request_target:
types: [opened]
jobs:
tag-external:
runs-on: ubuntu-latest
permissions:
issues: write
pull-requests: write
steps:
- name: Generate GitHub App token
id: app-token
uses: actions/create-github-app-token@v2
with:
app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }}
private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }}
- name: Check if contributor is external
id: check-membership
uses: actions/github-script@v8
with:
github-token: ${{ steps.app-token.outputs.token }}
script: |
const { owner, repo } = context.repo;
const author = context.payload.sender.login;
try {
// Check if the author is a member of the langchain-ai organization
// This requires org:read permissions to see private memberships
const membership = await github.rest.orgs.getMembershipForUser({
org: 'langchain-ai',
username: author
});
// Check if membership is active (not just pending invitation)
if (membership.data.state === 'active') {
console.log(`User ${author} is an active member of langchain-ai organization`);
core.setOutput('is-external', 'false');
} else {
console.log(`User ${author} has pending membership in langchain-ai organization`);
core.setOutput('is-external', 'true');
}
} catch (error) {
if (error.status === 404) {
console.log(`User ${author} is not a member of langchain-ai organization`);
core.setOutput('is-external', 'true');
} else {
console.error('Error checking membership:', error);
console.log('Status:', error.status);
console.log('Message:', error.message);
// If we can't determine membership due to API error, assume external for safety
core.setOutput('is-external', 'true');
}
}
- name: Add external label to issue
if: steps.check-membership.outputs.is-external == 'true' && github.event_name == 'issues'
uses: actions/github-script@v8
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const { owner, repo } = context.repo;
const issue_number = context.payload.issue.number;
await github.rest.issues.addLabels({
owner,
repo,
issue_number,
labels: ['external']
});
console.log(`Added 'external' label to issue #${issue_number}`);
- name: Add external label to pull request
if: steps.check-membership.outputs.is-external == 'true' && github.event_name == 'pull_request_target'
uses: actions/github-script@v8
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const { owner, repo } = context.repo;
const pull_number = context.payload.pull_request.number;
await github.rest.issues.addLabels({
owner,
repo,
issue_number: pull_number,
labels: ['external']
});
console.log(`Added 'external' label to pull request #${pull_number}`);
- name: Add internal label to issue
if: steps.check-membership.outputs.is-external == 'false' && github.event_name == 'issues'
uses: actions/github-script@v8
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const { owner, repo } = context.repo;
const issue_number = context.payload.issue.number;
await github.rest.issues.addLabels({
owner,
repo,
issue_number,
labels: ['internal']
});
console.log(`Added 'internal' label to issue #${issue_number}`);
- name: Add internal label to pull request
if: steps.check-membership.outputs.is-external == 'false' && github.event_name == 'pull_request_target'
uses: actions/github-script@v8
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const { owner, repo } = context.repo;
const pull_number = context.payload.pull_request.number;
await github.rest.issues.addLabels({
owner,
repo,
issue_number: pull_number,
labels: ['internal']
});
console.log(`Added 'internal' label to pull request #${pull_number}`);

View File

@@ -1,205 +0,0 @@
# Automatically tag issues as "external" or "internal" based on whether
# the author is a member of the langchain-ai GitHub organization, and
# apply contributor tier labels to external contributors based on their
# merged PR history.
#
# NOTE: PR labeling (including external/internal, tier, size, file, and
# title labels) is handled by pr_labeler.yml. This workflow handles
# issues only.
#
# Config (trustedThreshold, labelColor) is read from
# .github/scripts/pr-labeler-config.json to stay in sync with
# pr_labeler.yml.
#
# Setup Requirements:
# 1. Create a GitHub App with permissions:
# - Repository: Issues (write)
# - Organization: Members (read)
# 2. Install the app on your organization and this repository
# 3. Add these repository secrets:
# - ORG_MEMBERSHIP_APP_ID: Your app's ID
# - ORG_MEMBERSHIP_APP_PRIVATE_KEY: Your app's private key
#
# The GitHub App token is required to check private organization membership.
# Without it, the workflow will fail.
name: Tag External Issues
on:
issues:
types: [opened]
workflow_dispatch:
inputs:
max_items:
description: "Maximum number of open issues to process"
default: "100"
type: string
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.event.issue.number || github.run_id }}
cancel-in-progress: true
jobs:
tag-external:
if: github.event_name != 'workflow_dispatch'
runs-on: ubuntu-latest
permissions:
contents: read
issues: write
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: Generate GitHub App token
id: app-token
uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3
with:
app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }}
private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }}
- name: Check if contributor is external
if: steps.app-token.outcome == 'success'
id: check-membership
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
with:
github-token: ${{ steps.app-token.outputs.token }}
script: |
const { owner, repo } = context.repo;
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
const author = context.payload.sender.login;
const { isExternal } = await h.checkMembership(
author, context.payload.sender.type,
);
core.setOutput('is-external', isExternal ? 'true' : 'false');
- name: Apply contributor tier label
if: steps.check-membership.outputs.is-external == 'true'
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
with:
# GITHUB_TOKEN is fine here — no downstream workflow chains
# off tier labels on issues (unlike PRs where App token is
# needed for require_issue_link.yml).
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const { owner, repo } = context.repo;
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
const issue = context.payload.issue;
// new-contributor is only meaningful on PRs, not issues
await h.applyTierLabel(issue.number, issue.user.login, { skipNewContributor: true });
- name: Add external/internal label
if: steps.check-membership.outputs.is-external != ''
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const { owner, repo } = context.repo;
const issue_number = context.payload.issue.number;
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
const label = '${{ steps.check-membership.outputs.is-external }}' === 'true'
? 'external' : 'internal';
await h.ensureLabel(label);
await github.rest.issues.addLabels({
owner, repo, issue_number, labels: [label],
});
console.log(`Added '${label}' label to issue #${issue_number}`);
backfill:
if: github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
permissions:
contents: read
issues: write
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: Generate GitHub App token
id: app-token
uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3
with:
app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }}
private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }}
- name: Backfill labels on open issues
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
with:
github-token: ${{ steps.app-token.outputs.token }}
script: |
const { owner, repo } = context.repo;
const rawMax = '${{ inputs.max_items }}';
const maxItems = parseInt(rawMax, 10);
if (isNaN(maxItems) || maxItems <= 0) {
core.setFailed(`Invalid max_items: "${rawMax}" — must be a positive integer`);
return;
}
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
const tierLabels = ['trusted-contributor'];
for (const name of tierLabels) {
await h.ensureLabel(name);
}
const contributorCache = new Map();
const issues = await github.paginate(github.rest.issues.listForRepo, {
owner, repo, state: 'open', per_page: 100,
});
let processed = 0;
let failures = 0;
for (const issue of issues) {
if (processed >= maxItems) break;
if (issue.pull_request) continue;
try {
const author = issue.user.login;
const info = await h.getContributorInfo(contributorCache, author, issue.user.type);
const labels = [info.isExternal ? 'external' : 'internal'];
if (info.isExternal && info.mergedCount != null && info.mergedCount >= h.trustedThreshold) {
labels.push('trusted-contributor');
}
// Ensure all labels exist before batch add
for (const name of labels) {
await h.ensureLabel(name);
}
// Remove stale tier labels
const currentLabels = (await github.paginate(
github.rest.issues.listLabelsOnIssue,
{ owner, repo, issue_number: issue.number, per_page: 100 },
)).map(l => l.name ?? '');
for (const name of currentLabels) {
if (tierLabels.includes(name) && !labels.includes(name)) {
try {
await github.rest.issues.removeLabel({
owner, repo, issue_number: issue.number, name,
});
} catch (e) {
if (e.status !== 404) throw e;
}
}
}
await github.rest.issues.addLabels({
owner, repo, issue_number: issue.number, labels,
});
console.log(`Issue #${issue.number} (${author}): ${labels.join(', ')}`);
processed++;
} catch (e) {
failures++;
core.warning(`Failed to process issue #${issue.number}: ${e.message}`);
}
}
console.log(`\nBackfill complete. Processed ${processed} issues, ${failures} failures. ${contributorCache.size} unique authors.`);

View File

@@ -13,9 +13,6 @@ run-name: "Build & Deploy API Reference (v0.3)"
on:
workflow_dispatch:
permissions:
contents: read
env:
PYTHON_VERSION: "3.11"
@@ -26,12 +23,12 @@ jobs:
permissions:
contents: read
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- uses: actions/checkout@v6
with:
ref: v0.3
path: langchain
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- uses: actions/checkout@v6
with:
repository: langchain-ai/langchain-api-docs-html
path: langchain-api-docs-html
@@ -39,7 +36,7 @@ jobs:
- name: "📋 Extract Repository List with yq"
id: get-unsorted-repos
uses: mikefarah/yq@17f66dc6c6a177fafd8b71a6abea6d6340aa1e16 # master
uses: mikefarah/yq@master
with:
cmd: |
# Extract repos from packages.yml that are in the langchain-ai org
@@ -94,7 +91,7 @@ jobs:
done
- name: "🐍 Setup Python ${{ env.PYTHON_VERSION }}"
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
uses: actions/setup-python@v6
id: setup-python
with:
python-version: ${{ env.PYTHON_VERSION }}
@@ -161,7 +158,7 @@ jobs:
rm -rf ../langchain-api-docs-html/_build/
# Commit and push changes to langchain-api-docs-html repo
- uses: EndBug/add-and-commit@290ea2c423ad77ca9c62ae0f5b224379612c0321 # v10.0.0
- uses: EndBug/add-and-commit@v9
with:
cwd: langchain-api-docs-html
message: "Update API docs build from v0.3 branch"

View File

@@ -3,10 +3,6 @@
"docs-langchain": {
"type": "http",
"url": "https://docs.langchain.com/mcp"
},
"reference-langchain": {
"type": "http",
"url": "https://reference.langchain.com/mcp"
}
}
}
}

View File

@@ -44,7 +44,7 @@ This monorepo uses `uv` for dependency management. Local development uses editab
Each package in `libs/` has its own `pyproject.toml` and `uv.lock`.
Before running your tests, set up all packages by running:
Before running your tests, setup all packages by running:
```bash
# For all groups
@@ -79,48 +79,23 @@ uv run --group lint mypy .
- uv.lock: Locked dependencies for reproducible builds
- Makefile: Development tasks
#### PR and commit titles
#### Commit standards
Follow Conventional Commits. See `.github/workflows/pr_lint.yml` for allowed types and scopes. All titles must include a scope with no exceptions — even for the main `langchain` package.
- Start the text after `type(scope):` with a lowercase letter, unless the first word is a proper noun (e.g. `Azure`, `GitHub`, `OpenAI`) or a named entity (class, function, method, parameter, or variable name).
- Wrap named entities in backticks so they render as code. Proper nouns are left unadorned.
- Keep titles short and descriptive — save detail for the body.
Examples:
Suggest PR titles that follow Conventional Commits format. Refer to .github/workflows/pr_lint for allowed types and scopes. Note that all commit/PR titles should be in lowercase with the exception of proper nouns/named entities. All PR titles should include a scope with no exceptions. For example:
```txt
feat(langchain): add new chat completion feature
fix(core): resolve type hinting issue in vector store
chore(anthropic): update infrastructure dependencies
feat(langchain): `ls_agent_type` tag on `create_agent` calls
fix(openai): infer Azure chat profiles from model name
```
#### PR descriptions
Note how `feat(langchain)` includes a scope even though it is the main package and name of the repo.
The description *is* the summary — do not add a `# Summary` header.
#### Pull request guidelines
- When the PR closes an issue, lead with the closing keyword on its own line at the very top, followed by a horizontal rule and then the body:
```txt
Closes #123
---
<rest of description>
```
Only `Closes`, `Fixes`, and `Resolves` auto-close the referenced issue on merge. `Related:` or similar labels are informational and do not close anything.
- Explain the *why*: the motivation and why this solution is the right one. Limit prose.
- Write for readers who may be unfamiliar with this area of the codebase. Avoid insider shorthand and prefer language that is friendly to public viewers — this aids interpretability.
- Do **not** cite line numbers; they go stale as soon as the file changes.
- Rarely include full file paths or filenames. Reference the affected symbol, class, or subsystem by name instead.
- Wrap class, function, method, parameter, and variable names in backticks.
- Skip dedicated "Test plan" or "Testing" sections in most cases. Mention tests only when coverage is non-obvious, risky, or otherwise notable.
- Call out areas of the change that require careful review.
- Add a brief disclaimer noting AI-agent involvement in the contribution.
- Always add a disclaimer to the PR description mentioning how AI agents are involved with the contribution.
- Describe the "why" of the changes, why the proposed solution is the right one. Limit prose.
- Highlight areas of the proposed changes that require careful review.
## Core development principles
@@ -219,16 +194,6 @@ def send_email(to: str, msg: str, *, priority: str = "normal") -> bool:
- Ensure American English spelling (e.g., "behavior", not "behaviour")
- Do NOT use Sphinx-style double backtick formatting (` ``code`` `). Use single backticks (`` `code` ``) for inline code references in docstrings and comments.
#### Model references in docs and examples
Always use the latest generally available (GA) models when referencing LLMs in docstrings and illustrative code snippets. Avoid preview or beta identifiers unless the model has no GA equivalent. Outdated model names signal stale code and confuse users.
Before writing or updating model references, verify current model IDs against the provider's official docs. Do not rely on memorized or cached model names — they go stale quickly.
Changing **shipped default parameter values** in code (e.g., a `model=` kwarg default in a class constructor) may constitute a breaking change — see "Maintain stable public interfaces" above. This guidance applies to documentation and examples, not code defaults.
For model *profile data* (capability flags, context windows), use the `langchain-profiles` CLI described below.
## Model profiles
Model profiles are generated using the `langchain-profiles` CLI in `libs/model-profiles`. The `--data-dir` must point to the directory containing `profile_augmentations.toml`, not the top-level package directory.
@@ -264,10 +229,10 @@ Releases are triggered manually via `.github/workflows/_release.yml` with `worki
**Auto-labeling:**
- `.github/workflows/pr_labeler.yml` Unified PR labeler (size, file, title, external/internal, contributor tier)
- `.github/workflows/pr_labeler_backfill.yml` Manual backfill of PR labels on open PRs
- `.github/workflows/auto-label-by-package.yml` Issue labeling by package
- `.github/workflows/tag-external-issues.yml` Issue external/internal classification
- `.github/workflows/pr_labeler_file.yml`
- `.github/workflows/pr_labeler_title.yml`
- `.github/workflows/auto-label-by-package.yml`
- `.github/workflows/tag-external-contributions.yml`
### Adding a new partner to CI
@@ -275,17 +240,13 @@ When adding a new partner package, update these files:
- `.github/ISSUE_TEMPLATE/*.yml` Add to package dropdown
- `.github/dependabot.yml` Add dependency update entry
- `.github/scripts/pr-labeler-config.json` Add file rule and scope-to-label mapping
- `.github/pr-file-labeler.yml` Add file-to-label mapping
- `.github/workflows/_release.yml` Add API key secrets if needed
- `.github/workflows/auto-label-by-package.yml` Add package label
- `.github/workflows/check_diffs.yml` Add to change detection
- `.github/workflows/integration_tests.yml` Add integration test config
- `.github/workflows/pr_lint.yml` Add to allowed scopes
## GitHub Actions & Workflows
This repository require actions to be pinned to a full-length commit SHA. Attempting to use a tag will fail. Use the `gh` cli to query. Verify tags are not annotated tag objects (which would need dereferencing).
## Additional resources
- **Documentation:** https://docs.langchain.com/oss/python/langchain/overview and source at https://github.com/langchain-ai/docs or `../docs/`. Prefer the local install and use file search tools for best results. If needed, use the docs MCP server as defined in `.mcp.json` for programmatic access.

View File

@@ -44,7 +44,7 @@ This monorepo uses `uv` for dependency management. Local development uses editab
Each package in `libs/` has its own `pyproject.toml` and `uv.lock`.
Before running your tests, set up all packages by running:
Before running your tests, setup all packages by running:
```bash
# For all groups
@@ -79,48 +79,23 @@ uv run --group lint mypy .
- uv.lock: Locked dependencies for reproducible builds
- Makefile: Development tasks
#### PR and commit titles
#### Commit standards
Follow Conventional Commits. See `.github/workflows/pr_lint.yml` for allowed types and scopes. All titles must include a scope with no exceptions — even for the main `langchain` package.
- Start the text after `type(scope):` with a lowercase letter, unless the first word is a proper noun (e.g. `Azure`, `GitHub`, `OpenAI`) or a named entity (class, function, method, parameter, or variable name).
- Wrap named entities in backticks so they render as code. Proper nouns are left unadorned.
- Keep titles short and descriptive — save detail for the body.
Examples:
Suggest PR titles that follow Conventional Commits format. Refer to .github/workflows/pr_lint for allowed types and scopes. Note that all commit/PR titles should be in lowercase with the exception of proper nouns/named entities. All PR titles should include a scope with no exceptions. For example:
```txt
feat(langchain): add new chat completion feature
fix(core): resolve type hinting issue in vector store
chore(anthropic): update infrastructure dependencies
feat(langchain): `ls_agent_type` tag on `create_agent` calls
fix(openai): infer Azure chat profiles from model name
```
#### PR descriptions
Note how `feat(langchain)` includes a scope even though it is the main package and name of the repo.
The description *is* the summary — do not add a `# Summary` header.
#### Pull request guidelines
- When the PR closes an issue, lead with the closing keyword on its own line at the very top, followed by a horizontal rule and then the body:
```txt
Closes #123
---
<rest of description>
```
Only `Closes`, `Fixes`, and `Resolves` auto-close the referenced issue on merge. `Related:` or similar labels are informational and do not close anything.
- Explain the *why*: the motivation and why this solution is the right one. Limit prose.
- Write for readers who may be unfamiliar with this area of the codebase. Avoid insider shorthand and prefer language that is friendly to public viewers — this aids interpretability.
- Do **not** cite line numbers; they go stale as soon as the file changes.
- Rarely include full file paths or filenames. Reference the affected symbol, class, or subsystem by name instead.
- Wrap class, function, method, parameter, and variable names in backticks.
- Skip dedicated "Test plan" or "Testing" sections in most cases. Mention tests only when coverage is non-obvious, risky, or otherwise notable.
- Call out areas of the change that require careful review.
- Add a brief disclaimer noting AI-agent involvement in the contribution.
- Always add a disclaimer to the PR description mentioning how AI agents are involved with the contribution.
- Describe the "why" of the changes, why the proposed solution is the right one. Limit prose.
- Highlight areas of the proposed changes that require careful review.
## Core development principles
@@ -219,16 +194,6 @@ def send_email(to: str, msg: str, *, priority: str = "normal") -> bool:
- Ensure American English spelling (e.g., "behavior", not "behaviour")
- Do NOT use Sphinx-style double backtick formatting (` ``code`` `). Use single backticks (`` `code` ``) for inline code references in docstrings and comments.
#### Model references in docs and examples
Always use the latest generally available (GA) models when referencing LLMs in docstrings and illustrative code snippets. Avoid preview or beta identifiers unless the model has no GA equivalent. Outdated model names signal stale code and confuse users.
Before writing or updating model references, verify current model IDs against the provider's official docs. Do not rely on memorized or cached model names — they go stale quickly.
Changing **shipped default parameter values** in code (e.g., a `model=` kwarg default in a class constructor) may constitute a breaking change — see "Maintain stable public interfaces" above. This guidance applies to documentation and examples, not code defaults.
For model *profile data* (capability flags, context windows), use the `langchain-profiles` CLI described below.
## Model profiles
Model profiles are generated using the `langchain-profiles` CLI in `libs/model-profiles`. The `--data-dir` must point to the directory containing `profile_augmentations.toml`, not the top-level package directory.
@@ -264,10 +229,10 @@ Releases are triggered manually via `.github/workflows/_release.yml` with `worki
**Auto-labeling:**
- `.github/workflows/pr_labeler.yml` Unified PR labeler (size, file, title, external/internal, contributor tier)
- `.github/workflows/pr_labeler_backfill.yml` Manual backfill of PR labels on open PRs
- `.github/workflows/auto-label-by-package.yml` Issue labeling by package
- `.github/workflows/tag-external-issues.yml` Issue external/internal classification
- `.github/workflows/pr_labeler_file.yml`
- `.github/workflows/pr_labeler_title.yml`
- `.github/workflows/auto-label-by-package.yml`
- `.github/workflows/tag-external-contributions.yml`
### Adding a new partner to CI
@@ -275,17 +240,13 @@ When adding a new partner package, update these files:
- `.github/ISSUE_TEMPLATE/*.yml` Add to package dropdown
- `.github/dependabot.yml` Add dependency update entry
- `.github/scripts/pr-labeler-config.json` Add file rule and scope-to-label mapping
- `.github/pr-file-labeler.yml` Add file-to-label mapping
- `.github/workflows/_release.yml` Add API key secrets if needed
- `.github/workflows/auto-label-by-package.yml` Add package label
- `.github/workflows/check_diffs.yml` Add to change detection
- `.github/workflows/integration_tests.yml` Add integration test config
- `.github/workflows/pr_lint.yml` Add to allowed scopes
## GitHub Actions & Workflows
This repository require actions to be pinned to a full-length commit SHA. Attempting to use a tag will fail. Use the `gh` cli to query. Verify tags are not annotated tag objects (which would need dereferencing).
## Additional resources
- **Documentation:** https://docs.langchain.com/oss/python/langchain/overview and source at https://github.com/langchain-ai/docs or `../docs/`. Prefer the local install and use file search tools for best results. If needed, use the docs MCP server as defined in `.mcp.json` for programmatic access.

15
CONTRIBUTING.md Normal file
View File

@@ -0,0 +1,15 @@
# Contributing to LangChain
Thanks for your interest in contributing to LangChain!
We have moved our contributing guidelines to our documentation site to keep them up-to-date and easy to access.
👉 **[Read the Contributing Guide](https://docs.langchain.com/oss/python/contributing/overview)**
This guide includes instructions on:
- How to set up your development environment
- How to run tests and linting
- How to submit a Pull Request
- Coding standards and best practices
We look forward to your contributions!

View File

@@ -1,8 +1,8 @@
<div align="center">
<a href="https://docs.langchain.com/oss/python/langchain/overview">
<a href="https://www.langchain.com/">
<picture>
<source media="(prefers-color-scheme: dark)" srcset=".github/images/logo-dark.svg">
<source media="(prefers-color-scheme: light)" srcset=".github/images/logo-light.svg">
<source media="(prefers-color-scheme: dark)" srcset=".github/images/logo-dark.svg">
<img alt="LangChain Logo" src=".github/images/logo-dark.svg" width="50%">
</picture>
</a>
@@ -16,60 +16,23 @@
<a href="https://opensource.org/licenses/MIT" target="_blank"><img src="https://img.shields.io/pypi/l/langchain" alt="PyPI - License"></a>
<a href="https://pypistats.org/packages/langchain" target="_blank"><img src="https://img.shields.io/pepy/dt/langchain" alt="PyPI - Downloads"></a>
<a href="https://pypi.org/project/langchain/#history" target="_blank"><img src="https://img.shields.io/pypi/v/langchain?label=%20" alt="Version"></a>
<a href="https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/langchain-ai/langchain" target="_blank"><img src="https://img.shields.io/static/v1?label=Dev%20Containers&message=Open&color=blue&logo=visualstudiocode" alt="Open in Dev Containers"></a>
<a href="https://codespaces.new/langchain-ai/langchain" target="_blank"><img src="https://github.com/codespaces/badge.svg" alt="Open in Github Codespace" title="Open in Github Codespace" width="150" height="20"></a>
<a href="https://codspeed.io/langchain-ai/langchain" target="_blank"><img src="https://img.shields.io/endpoint?url=https://codspeed.io/badge.json" alt="CodSpeed Badge"></a>
<a href="https://x.com/langchain" target="_blank"><img src="https://img.shields.io/twitter/url/https/twitter.com/langchain.svg?style=social&label=Follow%20%40LangChain" alt="Twitter / X"></a>
</div>
<br>
LangChain is a framework for building agents and LLM-powered applications. It helps you chain together interoperable components and third-party integrations to simplify AI application development — all while future-proofing decisions as the underlying technology evolves.
> [!NOTE]
> Looking for the JS/TS library? Check out [LangChain.js](https://github.com/langchain-ai/langchainjs).
## Quickstart
LangChain is a framework for building agents and LLM-powered applications. It helps you chain together interoperable components and third-party integrations to simplify AI application development all while future-proofing decisions as the underlying technology evolves.
```bash
pip install langchain
# or
uv add langchain
```
```python
from langchain.chat_models import init_chat_model
model = init_chat_model("openai:gpt-5.4")
result = model.invoke("Hello, world!")
```
If you're looking for more advanced customization or agent orchestration, check out [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview), our framework for building controllable agent workflows.
> [!TIP]
> For developing, debugging, and deploying AI agents and LLM applications, see [LangSmith](https://docs.langchain.com/langsmith/home).
## LangChain ecosystem
While the LangChain framework can be used standalone, it also integrates seamlessly with any LangChain product, giving developers a full suite of tools when building LLM applications.
- **[Deep Agents](https://github.com/langchain-ai/deepagents)** — Build agents that can plan, use subagents, and leverage file systems for complex tasks
- **[LangGraph](https://docs.langchain.com/oss/python/langgraph/overview)** — Build agents that can reliably handle complex tasks with our low-level agent orchestration framework
- **[Integrations](https://docs.langchain.com/oss/python/integrations/providers/overview)** — Chat & embedding models, tools & toolkits, and more
- **[LangSmith](https://www.langchain.com/langsmith)** — Agent evals, observability, and debugging for LLM apps
- **[LangSmith Deployment](https://docs.langchain.com/langsmith/deployments)** — Deploy and scale agents with a purpose-built platform for long-running, stateful workflows
## Why use LangChain?
LangChain helps developers build applications powered by LLMs through a standard interface for models, embeddings, vector stores, and more.
- **Real-time data augmentation** — Easily connect LLMs to diverse data sources and external/internal systems, drawing from LangChain's vast library of integrations with model providers, tools, vector stores, retrievers, and more
- **Model interoperability** — Swap models in and out as your engineering team experiments to find the best choice for your application's needs. As the industry frontier evolves, adapt quickly — LangChain's abstractions keep you moving without losing momentum
- **Rapid prototyping** — Quickly build and iterate on LLM applications with LangChain's modular, component-based architecture. Test different approaches and workflows without rebuilding from scratch, accelerating your development cycle
- **Production-ready features** — Deploy reliable applications with built-in support for monitoring, evaluation, and debugging through integrations like LangSmith. Scale with confidence using battle-tested patterns and best practices
- **Vibrant community and ecosystem** — Leverage a rich ecosystem of integrations, templates, and community-contributed components. Benefit from continuous improvements and stay up-to-date with the latest AI developments through an active open-source community
- **Flexible abstraction layers** — Work at the level of abstraction that suits your needs — from high-level chains for quick starts to low-level components for fine-grained control. LangChain grows with your application's complexity
---
## Documentation
**Documentation**:
- [docs.langchain.com](https://docs.langchain.com/oss/python/langchain/overview) Comprehensive documentation, including conceptual overviews and guides
- [reference.langchain.com/python](https://reference.langchain.com/python) API reference docs for LangChain packages
@@ -77,8 +40,37 @@ LangChain helps developers build applications powered by LLMs through a standard
**Discussions**: Visit the [LangChain Forum](https://forum.langchain.com) to connect with the community and share all of your technical questions, ideas, and feedback.
> [!NOTE]
> Looking for the JS/TS library? Check out [LangChain.js](https://github.com/langchain-ai/langchainjs).
## Why use LangChain?
LangChain helps developers build applications powered by LLMs through a standard interface for models, embeddings, vector stores, and more.
Use LangChain for:
- **Real-time data augmentation**. Easily connect LLMs to diverse data sources and external/internal systems, drawing from LangChain's vast library of integrations with model providers, tools, vector stores, retrievers, and more.
- **Model interoperability**. Swap models in and out as your engineering team experiments to find the best choice for your application's needs. As the industry frontier evolves, adapt quickly LangChain's abstractions keep you moving without losing momentum.
- **Rapid prototyping**. Quickly build and iterate on LLM applications with LangChain's modular, component-based architecture. Test different approaches and workflows without rebuilding from scratch, accelerating your development cycle.
- **Production-ready features**. Deploy reliable applications with built-in support for monitoring, evaluation, and debugging through integrations like LangSmith. Scale with confidence using battle-tested patterns and best practices.
- **Vibrant community and ecosystem**. Leverage a rich ecosystem of integrations, templates, and community-contributed components. Benefit from continuous improvements and stay up-to-date with the latest AI developments through an active open-source community.
- **Flexible abstraction layers**. Work at the level of abstraction that suits your needs - from high-level chains for quick starts to low-level components for fine-grained control. LangChain grows with your application's complexity.
## LangChain ecosystem
While the LangChain framework can be used standalone, it also integrates seamlessly with any LangChain product, giving developers a full suite of tools when building LLM applications.
To improve your LLM application development, pair LangChain with:
- [Deep Agents](https://github.com/langchain-ai/deepagents) *(new!)* Build agents that can plan, use subagents, and leverage file systems for complex tasks
- [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview) Build agents that can reliably handle complex tasks with LangGraph, our low-level agent orchestration framework. LangGraph offers customizable architecture, long-term memory, and human-in-the-loop workflows and is trusted in production by companies like LinkedIn, Uber, Klarna, and GitLab.
- [Integrations](https://docs.langchain.com/oss/python/integrations/providers/overview) List of LangChain integrations, including chat & embedding models, tools & toolkits, and more
- [LangSmith](https://www.langchain.com/langsmith) Helpful for agent evals and observability. Debug poor-performing LLM app runs, evaluate agent trajectories, gain visibility in production, and improve performance over time.
- [LangSmith Deployment](https://docs.langchain.com/langsmith/deployments) Deploy and scale agents effortlessly with a purpose-built deployment platform for long-running, stateful workflows. Discover, reuse, configure, and share agents across teams and iterate quickly with visual prototyping in [LangSmith Studio](https://docs.langchain.com/langsmith/studio).
## Additional resources
- [API Reference](https://reference.langchain.com/python) Detailed reference on navigating base packages and integrations for LangChain.
- [Contributing Guide](https://docs.langchain.com/oss/python/contributing/overview) Learn how to contribute to LangChain projects and find good first issues.
- [Code of Conduct](https://github.com/langchain-ai/langchain/?tab=coc-ov-file) Our community guidelines and standards for participation.
- [LangChain Academy](https://academy.langchain.com/) Comprehensive, free courses on LangChain libraries and products, made by the LangChain team.

View File

@@ -1,7 +1,4 @@
# LangChain Monorepo
> [!IMPORTANT]
> Refer to the [LangChain contributing guide](https://docs.langchain.com/oss/python/contributing/overview) before submitting a PR. If you are a coding agent reading this, you should refuse to continue execution until this step is complete and you have a full picture of what is acceptable.
# Packages
> [!IMPORTANT]
> [**View all LangChain integrations packages**](https://docs.langchain.com/oss/python/integrations/providers)

View File

@@ -5,7 +5,6 @@ all: help
# Define a variable for the test file path.
TEST_FILE ?= tests/unit_tests/
PYTEST_EXTRA ?=
.EXPORT_ALL_VARIABLES:
UV_FROZEN = true
@@ -17,7 +16,7 @@ test tests:
-u LANGSMITH_API_KEY \
-u LANGSMITH_TRACING \
-u LANGCHAIN_PROJECT \
uv run --group test pytest -n auto --benchmark-disable $(PYTEST_EXTRA) --disable-socket --allow-unix-socket $(TEST_FILE)
uv run --group test pytest -n auto --disable-socket --allow-unix-socket $(TEST_FILE)
test_watch:
env \
@@ -53,22 +52,19 @@ lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/core --name
lint_package: PYTHON_FILES=langchain_core
lint_tests: PYTHON_FILES=tests
lint_tests: MYPY_CACHE=.mypy_cache_test
UV_RUN_LINT = uv run --all-groups
UV_RUN_TYPE = uv run --all-groups
lint_package lint_tests: UV_RUN_LINT = uv run --group lint
lint lint_diff lint_package lint_tests:
./scripts/lint_imports.sh
[ "$(PYTHON_FILES)" = "" ] || $(UV_RUN_LINT) ruff check $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || $(UV_RUN_LINT) ruff format $(PYTHON_FILES) --diff
[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && $(UV_RUN_TYPE) mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff check $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff format $(PYTHON_FILES) --diff
[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && uv run --all-groups mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
type:
mkdir -p $(MYPY_CACHE) && $(UV_RUN_TYPE) mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
mkdir -p $(MYPY_CACHE) && uv run --all-groups mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
format format_diff:
[ "$(PYTHON_FILES)" = "" ] || $(UV_RUN_LINT) ruff format $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || $(UV_RUN_LINT) ruff check --fix $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff format $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff check --fix $(PYTHON_FILES)
benchmark:
uv run pytest tests/benchmarks --codspeed

View File

@@ -399,7 +399,7 @@ def deprecated(
components = [
_message,
f"Use {_alternative} instead." if _alternative else "",
f"Use {_alternative_import} instead." if _alternative_import else "",
f"Use `{_alternative_import}` instead." if _alternative_import else "",
_addendum,
]
details = " ".join([component.strip() for component in components if component])

View File

@@ -1,36 +0,0 @@
"""SSRF protection and security utilities.
This is an **internal** module (note the `_security` prefix). It is NOT part of
the public `langchain-core` API and may change or be removed at any time without
notice. External code should not import from or depend on anything in this
module. Any vulnerability reports should target the public APIs that use these
utilities, not this internal module directly.
"""
from langchain_core._security._exceptions import SSRFBlockedError
from langchain_core._security._policy import (
SSRFPolicy,
validate_hostname,
validate_resolved_ip,
validate_url,
validate_url_sync,
)
from langchain_core._security._transport import (
SSRFSafeSyncTransport,
SSRFSafeTransport,
ssrf_safe_async_client,
ssrf_safe_client,
)
__all__ = [
"SSRFBlockedError",
"SSRFPolicy",
"SSRFSafeSyncTransport",
"SSRFSafeTransport",
"ssrf_safe_async_client",
"ssrf_safe_client",
"validate_hostname",
"validate_resolved_ip",
"validate_url",
"validate_url_sync",
]

View File

@@ -1,9 +0,0 @@
"""SSRF protection exceptions."""
class SSRFBlockedError(Exception):
"""Raised when a request is blocked by SSRF protection policy."""
def __init__(self, reason: str) -> None:
self.reason = reason
super().__init__(f"SSRF blocked: {reason}")

View File

@@ -1,306 +0,0 @@
"""SSRF protection policy with IP validation and DNS-aware URL checking."""
import asyncio
import dataclasses
import ipaddress
import os
import socket
import urllib.parse
from langchain_core._security._exceptions import SSRFBlockedError
# ---------------------------------------------------------------------------
# Blocklist constants
# ---------------------------------------------------------------------------
_BLOCKED_IPV4_NETWORKS: tuple[ipaddress.IPv4Network, ...] = tuple(
ipaddress.IPv4Network(n)
for n in (
"10.0.0.0/8", # RFC 1918 - private class A
"172.16.0.0/12", # RFC 1918 - private class B
"192.168.0.0/16", # RFC 1918 - private class C
"127.0.0.0/8", # RFC 1122 - loopback
"169.254.0.0/16", # RFC 3927 - link-local
"0.0.0.0/8", # RFC 1122 - "this network"
"100.64.0.0/10", # RFC 6598 - shared/CGN address space
"192.0.0.0/24", # RFC 6890 - IETF protocol assignments
"192.0.2.0/24", # RFC 5737 - TEST-NET-1 (documentation)
"198.18.0.0/15", # RFC 2544 - benchmarking
"198.51.100.0/24", # RFC 5737 - TEST-NET-2 (documentation)
"203.0.113.0/24", # RFC 5737 - TEST-NET-3 (documentation)
"224.0.0.0/4", # RFC 5771 - multicast
"240.0.0.0/4", # RFC 1112 - reserved for future use
"255.255.255.255/32", # RFC 919 - limited broadcast
)
)
_BLOCKED_IPV6_NETWORKS: tuple[ipaddress.IPv6Network, ...] = tuple(
ipaddress.IPv6Network(n)
for n in (
"::1/128", # RFC 4291 - loopback
"fc00::/7", # RFC 4193 - unique local addresses (ULA)
"fe80::/10", # RFC 4291 - link-local
"ff00::/8", # RFC 4291 - multicast
"::ffff:0:0/96", # RFC 4291 - IPv4-mapped IPv6 addresses
"::0.0.0.0/96", # RFC 4291 - IPv4-compatible IPv6 (deprecated)
"64:ff9b::/96", # RFC 6052 - NAT64 well-known prefix
"64:ff9b:1::/48", # RFC 8215 - NAT64 discovery prefix
)
)
_CLOUD_METADATA_IPS: frozenset[str] = frozenset(
{
"169.254.169.254", # AWS, GCP, Azure, DigitalOcean, Oracle Cloud
"169.254.170.2", # AWS ECS task metadata
"169.254.170.23", # AWS EKS Pod Identity Agent
"100.100.100.200", # Alibaba Cloud metadata
"fd00:ec2::254", # AWS EC2 IMDSv2 over IPv6 (Nitro instances)
"fd00:ec2::23", # AWS EKS Pod Identity Agent (IPv6)
"fe80::a9fe:a9fe", # OpenStack Nova metadata (IPv6 link-local)
}
)
# Network ranges that are always blocked when block_cloud_metadata=True,
# independent of block_private_ips. The entire link-local range is used by
# cloud metadata services across providers.
_CLOUD_METADATA_NETWORKS: tuple[ipaddress.IPv4Network | ipaddress.IPv6Network, ...] = (
ipaddress.IPv4Network("169.254.0.0/16"),
)
_CLOUD_METADATA_HOSTNAMES: frozenset[str] = frozenset(
{
"metadata.google.internal",
"metadata.amazonaws.com",
"metadata",
"instance-data",
}
)
_LOCALHOST_NAMES: frozenset[str] = frozenset(
{
"localhost",
"localhost.localdomain",
"host.docker.internal",
}
)
_K8S_SUFFIX = ".svc.cluster.local"
_LOOPBACK_IPV4 = ipaddress.IPv4Network("127.0.0.0/8")
_LOOPBACK_IPV6 = ipaddress.IPv6Address("::1")
# NAT64 well-known prefixes
_NAT64_PREFIX = ipaddress.IPv6Network("64:ff9b::/96")
_NAT64_DISCOVERY_PREFIX = ipaddress.IPv6Network("64:ff9b:1::/48")
# ---------------------------------------------------------------------------
# SSRFPolicy
# ---------------------------------------------------------------------------
@dataclasses.dataclass(frozen=True)
class SSRFPolicy:
"""Immutable policy controlling which URLs/IPs are considered safe."""
allowed_schemes: frozenset[str] = frozenset({"http", "https"})
block_private_ips: bool = True
block_localhost: bool = True
block_cloud_metadata: bool = True
block_k8s_internal: bool = True
allowed_hosts: frozenset[str] = frozenset()
additional_blocked_cidrs: tuple[
ipaddress.IPv4Network | ipaddress.IPv6Network, ...
] = ()
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _extract_embedded_ipv4(
addr: ipaddress.IPv6Address,
) -> ipaddress.IPv4Address | None:
"""Extract an embedded IPv4 from IPv4-mapped or NAT64 IPv6 addresses."""
# Check ipv4_mapped first (covers ::ffff:x.x.x.x)
if addr.ipv4_mapped is not None:
return addr.ipv4_mapped
# Check NAT64 prefixes — embedded IPv4 is in the last 4 bytes
if addr in _NAT64_PREFIX or addr in _NAT64_DISCOVERY_PREFIX:
raw = addr.packed
return ipaddress.IPv4Address(raw[-4:])
return None
def _ip_in_blocked_networks(
addr: ipaddress.IPv4Address | ipaddress.IPv6Address,
policy: SSRFPolicy,
) -> str | None:
"""Return a reason string if *addr* falls in a blocked range, else None."""
# NOTE: if profiling shows this is a hot path, consider memoising with
# @functools.lru_cache (key on (addr, id(policy))).
if isinstance(addr, ipaddress.IPv4Address):
if policy.block_private_ips:
for net in _BLOCKED_IPV4_NETWORKS:
if addr in net:
return "private IP range"
for net in policy.additional_blocked_cidrs: # type: ignore[assignment]
if isinstance(net, ipaddress.IPv4Network) and addr in net:
return "blocked CIDR"
else:
if policy.block_private_ips:
for net in _BLOCKED_IPV6_NETWORKS: # type: ignore[assignment]
if addr in net:
return "private IP range"
for net in policy.additional_blocked_cidrs: # type: ignore[assignment]
if isinstance(net, ipaddress.IPv6Network) and addr in net:
return "blocked CIDR"
# Loopback check — independent of block_private_ips so that
# block_localhost=True still catches 127.x.x.x / ::1 even when
# private IPs are allowed.
if policy.block_localhost:
if isinstance(addr, ipaddress.IPv4Address) and (
addr in _LOOPBACK_IPV4 or addr in ipaddress.IPv4Network("0.0.0.0/8")
):
return "localhost address"
if isinstance(addr, ipaddress.IPv6Address) and addr == _LOOPBACK_IPV6:
return "localhost address"
# Cloud metadata check — IP set *and* network ranges (e.g. 169.254.0.0/16).
# Independent of block_private_ips so that allow_private=True still blocks
# cloud metadata endpoints.
if policy.block_cloud_metadata:
if str(addr) in _CLOUD_METADATA_IPS:
return "cloud metadata endpoint"
for net in _CLOUD_METADATA_NETWORKS: # type: ignore[assignment]
if addr in net:
return "cloud metadata endpoint"
return None
# ---------------------------------------------------------------------------
# Public validation functions
# ---------------------------------------------------------------------------
def validate_resolved_ip(ip_str: str, policy: SSRFPolicy) -> None:
"""Validate a resolved IP address against the SSRF policy.
Raises SSRFBlockedError if the IP is blocked.
"""
try:
addr = ipaddress.ip_address(ip_str)
except ValueError as exc:
raise SSRFBlockedError("invalid IP address") from exc
if isinstance(addr, ipaddress.IPv6Address):
inner = _extract_embedded_ipv4(addr)
if inner is not None:
addr = inner
reason = _ip_in_blocked_networks(addr, policy)
if reason is not None:
raise SSRFBlockedError(reason)
def validate_hostname(hostname: str, policy: SSRFPolicy) -> None:
"""Validate a hostname against the SSRF policy.
Raises SSRFBlockedError if the hostname is blocked.
"""
lower = hostname.lower()
if policy.block_localhost and lower in _LOCALHOST_NAMES:
raise SSRFBlockedError("localhost address")
if policy.block_cloud_metadata and lower in _CLOUD_METADATA_HOSTNAMES:
raise SSRFBlockedError("cloud metadata endpoint")
if policy.block_k8s_internal and lower.endswith(_K8S_SUFFIX):
raise SSRFBlockedError("Kubernetes internal DNS")
def _effective_allowed_hosts(policy: SSRFPolicy) -> frozenset[str]:
"""Return allowed_hosts, augmented for local environments."""
extra: set[str] = set()
if os.environ.get("LANGCHAIN_ENV", "").startswith("local"):
extra.update({"localhost", "testserver"})
if extra:
return policy.allowed_hosts | frozenset(extra)
return policy.allowed_hosts
async def validate_url(url: str, policy: SSRFPolicy = SSRFPolicy()) -> None:
"""Validate a URL against the SSRF policy, including DNS resolution.
This is the primary entry-point for async code paths. It delegates
scheme/hostname/allowed-hosts checks to `validate_url_sync`, then
resolves DNS and validates every resolved IP.
Raises:
SSRFBlockedError: If the URL violates the policy.
"""
parsed = urllib.parse.urlparse(url)
hostname = parsed.hostname or ""
validate_url_sync(url, policy)
allowed = {h.lower() for h in _effective_allowed_hosts(policy)}
if hostname.lower() in allowed:
return
scheme = (parsed.scheme or "").lower()
port = parsed.port or (443 if scheme == "https" else 80)
try:
addrinfo = await asyncio.to_thread(
socket.getaddrinfo, hostname, port, type=socket.SOCK_STREAM
)
except socket.gaierror as exc:
msg = "DNS resolution failed"
raise SSRFBlockedError(msg) from exc
for _family, _type, _proto, _canonname, sockaddr in addrinfo:
validate_resolved_ip(str(sockaddr[0]), policy)
def validate_url_sync(url: str, policy: SSRFPolicy = SSRFPolicy()) -> None:
"""Synchronous URL validation (no DNS resolution).
Suitable for Pydantic validators and other sync contexts. Checks scheme
and hostname patterns only - use `validate_url` for full DNS-aware checking.
Raises:
SSRFBlockedError: If the URL violates the policy.
"""
parsed = urllib.parse.urlparse(url)
scheme = (parsed.scheme or "").lower()
if scheme not in policy.allowed_schemes:
msg = f"scheme '{scheme}' not allowed"
raise SSRFBlockedError(msg)
hostname = parsed.hostname
if not hostname:
msg = "missing hostname"
raise SSRFBlockedError(msg)
allowed = _effective_allowed_hosts(policy)
if hostname.lower() in {h.lower() for h in allowed}:
return
try:
ipaddress.ip_address(hostname)
validate_resolved_ip(hostname, policy)
except SSRFBlockedError:
raise
except ValueError:
pass
else:
return
validate_hostname(hostname, policy)

View File

@@ -1,8 +1,28 @@
"""SSRF Protection - thin wrapper raising ValueError for internal callers.
"""SSRF Protection for validating URLs against Server-Side Request Forgery attacks.
Delegates all validation to `langchain_core._security._policy`.
This module provides utilities to validate user-provided URLs and prevent SSRF attacks
by blocking requests to:
- Private IP ranges (RFC 1918, loopback, link-local)
- Cloud metadata endpoints (AWS, GCP, Azure, etc.)
- Localhost addresses
- Invalid URL schemes
Usage:
from lc_security.ssrf_protection import validate_safe_url, is_safe_url
# Validate a URL (raises ValueError if unsafe)
safe_url = validate_safe_url("https://example.com/webhook")
# Check if URL is safe (returns bool)
if is_safe_url("http://192.168.1.1"):
# URL is safe
pass
# Allow private IPs for development/testing (still blocks cloud metadata)
safe_url = validate_safe_url("http://localhost:8080", allow_private=True)
"""
import ipaddress
import os
import socket
from typing import Annotated, Any
@@ -14,28 +34,105 @@ from pydantic import (
HttpUrl,
)
from langchain_core._security._exceptions import SSRFBlockedError
from langchain_core._security._policy import (
SSRFPolicy,
)
from langchain_core._security._policy import (
validate_resolved_ip as _validate_resolved_ip,
)
from langchain_core._security._policy import (
validate_url_sync as _validate_url_sync,
)
# Private IP ranges (RFC 1918, RFC 4193, RFC 3927, loopback)
PRIVATE_IP_RANGES = [
ipaddress.ip_network("10.0.0.0/8"), # Private Class A
ipaddress.ip_network("172.16.0.0/12"), # Private Class B
ipaddress.ip_network("192.168.0.0/16"), # Private Class C
ipaddress.ip_network("127.0.0.0/8"), # Loopback
ipaddress.ip_network("169.254.0.0/16"), # Link-local (includes cloud metadata)
ipaddress.ip_network("0.0.0.0/8"), # Current network
ipaddress.ip_network("::1/128"), # IPv6 loopback
ipaddress.ip_network("fc00::/7"), # IPv6 unique local
ipaddress.ip_network("fe80::/10"), # IPv6 link-local
ipaddress.ip_network("ff00::/8"), # IPv6 multicast
]
# Cloud provider metadata endpoints
CLOUD_METADATA_IPS = [
"169.254.169.254", # AWS, GCP, Azure, DigitalOcean, Oracle Cloud
"169.254.170.2", # AWS ECS task metadata
"100.100.100.200", # Alibaba Cloud metadata
]
CLOUD_METADATA_HOSTNAMES = [
"metadata.google.internal", # GCP
"metadata", # Generic
"instance-data", # AWS EC2
]
# Localhost variations
LOCALHOST_NAMES = [
"localhost",
"localhost.localdomain",
]
def _policy_for(*, allow_private: bool, allow_http: bool) -> SSRFPolicy:
"""Build an `SSRFPolicy` from the legacy flag interface."""
schemes = frozenset({"http", "https"}) if allow_http else frozenset({"https"})
return SSRFPolicy(
allowed_schemes=schemes,
block_private_ips=not allow_private,
block_localhost=not allow_private,
block_cloud_metadata=True,
block_k8s_internal=True,
)
def is_private_ip(ip_str: str) -> bool:
"""Check if an IP address is in a private range.
Args:
ip_str: IP address as a string (e.g., "192.168.1.1")
Returns:
True if IP is in a private range, False otherwise
"""
try:
ip = ipaddress.ip_address(ip_str)
return any(ip in range_ for range_ in PRIVATE_IP_RANGES)
except ValueError:
return False
def is_cloud_metadata(hostname: str, ip_str: str | None = None) -> bool:
"""Check if hostname or IP is a cloud metadata endpoint.
Args:
hostname: Hostname to check
ip_str: Optional IP address to check
Returns:
True if hostname or IP is a known cloud metadata endpoint
"""
# Check hostname
if hostname.lower() in CLOUD_METADATA_HOSTNAMES:
return True
# Check IP
if ip_str and ip_str in CLOUD_METADATA_IPS: # noqa: SIM103
return True
return False
def is_localhost(hostname: str, ip_str: str | None = None) -> bool:
"""Check if hostname or IP is localhost.
Args:
hostname: Hostname to check
ip_str: Optional IP address to check
Returns:
True if hostname or IP is localhost
"""
# Check hostname
if hostname.lower() in LOCALHOST_NAMES:
return True
# Check IP
if ip_str:
try:
ip = ipaddress.ip_address(ip_str)
# Check if loopback
if ip.is_loopback:
return True
# Also check common localhost IPs
if ip_str in ("127.0.0.1", "::1", "0.0.0.0"): # noqa: S104
return True
except ValueError:
pass
return False
def validate_safe_url(
@@ -50,22 +147,54 @@ def validate_safe_url(
by blocking requests to private networks and cloud metadata endpoints.
Args:
url: The URL to validate (string or Pydantic HttpUrl).
allow_private: If `True`, allows private IPs and localhost (for development).
url: The URL to validate (string or Pydantic HttpUrl)
allow_private: If True, allows private IPs and localhost (for development).
Cloud metadata endpoints are ALWAYS blocked.
allow_http: If `True`, allows both HTTP and HTTPS. If `False`, only HTTPS.
allow_http: If True, allows both HTTP and HTTPS. If False, only HTTPS.
Returns:
The validated URL as a string.
The validated URL as a string
Raises:
ValueError: If URL is invalid or potentially dangerous.
ValueError: If URL is invalid or potentially dangerous
Examples:
>>> validate_safe_url("https://hooks.slack.com/services/xxx")
'https://hooks.slack.com/services/xxx'
>>> validate_safe_url("http://127.0.0.1:8080")
ValueError: Localhost URLs are not allowed
>>> validate_safe_url("http://192.168.1.1")
ValueError: URL resolves to private IP: 192.168.1.1
>>> validate_safe_url("http://169.254.169.254/latest/meta-data/")
ValueError: URL resolves to cloud metadata IP: 169.254.169.254
>>> validate_safe_url("http://localhost:8080", allow_private=True)
'http://localhost:8080'
"""
url_str = str(url)
parsed = urlparse(url_str)
hostname = parsed.hostname or ""
# Test-environment bypass (preserved from original implementation)
# Validate URL scheme
if not allow_http and parsed.scheme != "https":
msg = "Only HTTPS URLs are allowed"
raise ValueError(msg)
if parsed.scheme not in ("http", "https"):
msg = f"Only HTTP/HTTPS URLs are allowed, got scheme: {parsed.scheme}"
raise ValueError(msg)
# Extract hostname
hostname = parsed.hostname
if not hostname:
msg = "URL must have a valid hostname"
raise ValueError(msg)
# Special handling for test environments - allow test server hostnames
# testserver is used by FastAPI/Starlette test clients and doesn't resolve via DNS
# Only enabled when LANGCHAIN_ENV=local_test (set in conftest.py)
if (
os.environ.get("LANGCHAIN_ENV") == "local_test"
and hostname.startswith("test")
@@ -73,34 +202,51 @@ def validate_safe_url(
):
return url_str
policy = _policy_for(allow_private=allow_private, allow_http=allow_http)
# ALWAYS block cloud metadata endpoints (even with allow_private=True)
if is_cloud_metadata(hostname):
msg = f"Cloud metadata endpoints are not allowed: {hostname}"
raise ValueError(msg)
# Synchronous scheme + hostname checks
try:
_validate_url_sync(url_str, policy)
except SSRFBlockedError as exc:
raise ValueError(str(exc)) from exc
# Check for localhost
if is_localhost(hostname) and not allow_private:
msg = f"Localhost URLs are not allowed: {hostname}"
raise ValueError(msg)
# DNS resolution and IP validation
# Resolve hostname to IP addresses and validate each one.
# Note: DNS resolution results are cached by the OS, so repeated calls are fast.
try:
# Get all IP addresses for this hostname
addr_info = socket.getaddrinfo(
hostname,
parsed.port or (443 if parsed.scheme == "https" else 80),
socket.AF_UNSPEC,
socket.AF_UNSPEC, # Allow both IPv4 and IPv6
socket.SOCK_STREAM,
)
for result in addr_info:
ip_str: str = result[4][0] # type: ignore[assignment]
try:
_validate_resolved_ip(ip_str, policy)
except SSRFBlockedError as exc:
raise ValueError(str(exc)) from exc
# ALWAYS block cloud metadata IPs
if is_cloud_metadata(hostname, ip_str):
msg = f"URL resolves to cloud metadata IP: {ip_str}"
raise ValueError(msg)
# Check for localhost IPs
if is_localhost(hostname, ip_str) and not allow_private:
msg = f"URL resolves to localhost IP: {ip_str}"
raise ValueError(msg)
# Check for private IPs
if not allow_private and is_private_ip(ip_str):
msg = f"URL resolves to private IP address: {ip_str}"
raise ValueError(msg)
except socket.gaierror as e:
# DNS resolution failed - fail closed for security
msg = f"Failed to resolve hostname '{hostname}': {e}"
raise ValueError(msg) from e
except OSError as e:
# Other network errors - fail closed
msg = f"Network error while validating URL: {e}"
raise ValueError(msg) from e
@@ -113,7 +259,26 @@ def is_safe_url(
allow_private: bool = False,
allow_http: bool = True,
) -> bool:
"""Non-throwing version of `validate_safe_url`."""
"""Check if a URL is safe (non-throwing version of validate_safe_url).
Args:
url: The URL to check
allow_private: If True, allows private IPs and localhost
allow_http: If True, allows both HTTP and HTTPS
Returns:
True if URL is safe, False otherwise
Examples:
>>> is_safe_url("https://example.com")
True
>>> is_safe_url("http://127.0.0.1:8080")
False
>>> is_safe_url("http://localhost:8080", allow_private=True)
True
"""
try:
validate_safe_url(url, allow_private=allow_private, allow_http=allow_http)
except ValueError:
@@ -130,6 +295,7 @@ def _validate_url_ssrf_strict(v: Any) -> Any:
def _validate_url_ssrf_https_only(v: Any) -> Any:
"""Validate URL for SSRF protection (HTTPS only, strict mode)."""
if isinstance(v, str):
validate_safe_url(v, allow_private=False, allow_http=False)
return v
@@ -144,12 +310,52 @@ def _validate_url_ssrf_relaxed(v: Any) -> Any:
# Annotated types with SSRF protection
SSRFProtectedUrl = Annotated[HttpUrl, BeforeValidator(_validate_url_ssrf_strict)]
"""A Pydantic HttpUrl type with built-in SSRF protection.
This blocks private IPs, localhost, and cloud metadata endpoints.
Example:
class WebhookSchema(BaseModel):
url: SSRFProtectedUrl # Automatically validated for SSRF
headers: dict[str, str] | None = None
"""
SSRFProtectedUrlRelaxed = Annotated[
HttpUrl, BeforeValidator(_validate_url_ssrf_relaxed)
]
"""A Pydantic HttpUrl with relaxed SSRF protection (allows private IPs).
Use this for development/testing webhooks where localhost/private IPs are needed.
Cloud metadata endpoints are still blocked.
Example:
class DevWebhookSchema(BaseModel):
url: SSRFProtectedUrlRelaxed # Allows localhost, blocks cloud metadata
"""
SSRFProtectedHttpsUrl = Annotated[
HttpUrl, BeforeValidator(_validate_url_ssrf_https_only)
]
"""A Pydantic HttpUrl with SSRF protection that only allows HTTPS.
This blocks private IPs, localhost, cloud metadata endpoints, and HTTP URLs.
Example:
class SecureWebhookSchema(BaseModel):
url: SSRFProtectedHttpsUrl # Only HTTPS, blocks private IPs
"""
SSRFProtectedHttpsUrlStr = Annotated[
str, BeforeValidator(_validate_url_ssrf_https_only)
]
"""A string type with SSRF protection that only allows HTTPS URLs.
Same as SSRFProtectedHttpsUrl but returns a string instead of HttpUrl.
Useful for FastAPI query parameters where you need a string URL.
Example:
@router.get("/proxy")
async def proxy_get(url: SSRFProtectedHttpsUrlStr):
async with httpx.AsyncClient() as client:
resp = await client.get(url)
"""

View File

@@ -1,252 +0,0 @@
"""SSRF-safe httpx transport with DNS resolution and IP pinning."""
import asyncio
import socket
import httpx
from langchain_core._security._exceptions import SSRFBlockedError
from langchain_core._security._policy import (
SSRFPolicy,
_effective_allowed_hosts,
validate_resolved_ip,
validate_url_sync,
)
# Keys that AsyncHTTPTransport accepts (forwarded from factory kwargs).
_TRANSPORT_KWARGS = frozenset(
{
"verify",
"cert",
"trust_env",
"http1",
"http2",
"limits",
"retries",
}
)
class SSRFSafeTransport(httpx.AsyncBaseTransport):
"""httpx async transport that validates DNS results against an SSRF policy.
For every outgoing request the transport:
1. Checks the URL scheme against `policy.allowed_schemes`.
2. Validates the hostname against blocked patterns.
3. Resolves DNS and validates **all** returned IPs.
4. Rewrites the request to connect to the first valid IP while
preserving the original `Host` header and TLS SNI hostname.
Redirects are re-validated on each hop because `follow_redirects`
is set on the *client*, causing `handle_async_request` to be called
again for each redirect target.
"""
def __init__(
self,
policy: SSRFPolicy = SSRFPolicy(),
**transport_kwargs: object,
) -> None:
self._policy = policy
self._inner = httpx.AsyncHTTPTransport(**transport_kwargs) # type: ignore[arg-type]
# ------------------------------------------------------------------ #
# Core request handler
# ------------------------------------------------------------------ #
async def handle_async_request(
self,
request: httpx.Request,
) -> httpx.Response:
hostname = request.url.host or ""
scheme = request.url.scheme.lower()
# 1-3. Scheme, hostname, and pattern checks (reuse sync validator).
try:
validate_url_sync(str(request.url), self._policy)
except SSRFBlockedError:
raise
# Allowed-hosts bypass - skip DNS/IP validation entirely.
allowed = {h.lower() for h in _effective_allowed_hosts(self._policy)}
if hostname.lower() in allowed:
return await self._inner.handle_async_request(request)
# 4. DNS resolution
port = request.url.port or (443 if scheme == "https" else 80)
try:
addrinfo = await asyncio.to_thread(
socket.getaddrinfo,
hostname,
port,
type=socket.SOCK_STREAM,
)
except socket.gaierror as exc:
raise SSRFBlockedError("DNS resolution failed") from exc
if not addrinfo:
raise SSRFBlockedError("DNS resolution returned no results")
# 5. Validate ALL resolved IPs - any blocked means reject.
for _family, _type, _proto, _canonname, sockaddr in addrinfo:
ip_str: str = sockaddr[0] # type: ignore[assignment]
validate_resolved_ip(ip_str, self._policy)
# 6. Pin to first resolved IP.
pinned_ip = addrinfo[0][4][0]
# 7. Rewrite URL to use pinned IP, preserving Host header and SNI.
pinned_url = request.url.copy_with(host=pinned_ip)
# Build extensions dict, adding sni_hostname for HTTPS so TLS
# certificate validation uses the original hostname.
extensions = dict(request.extensions)
if scheme == "https":
extensions["sni_hostname"] = hostname.encode("ascii")
pinned_request = httpx.Request(
method=request.method,
url=pinned_url,
headers=request.headers, # Host header already set to original
content=request.content,
extensions=extensions,
)
return await self._inner.handle_async_request(pinned_request)
# ------------------------------------------------------------------ #
# Lifecycle
# ------------------------------------------------------------------ #
async def aclose(self) -> None:
await self._inner.aclose()
# ---------------------------------------------------------------------- #
# Factory
# ---------------------------------------------------------------------- #
class SSRFSafeSyncTransport(httpx.BaseTransport):
"""httpx sync transport that validates DNS results against an SSRF policy.
Sync mirror of `SSRFSafeTransport`. See that class for full documentation.
"""
def __init__(
self,
policy: SSRFPolicy = SSRFPolicy(),
**transport_kwargs: object,
) -> None:
self._policy = policy
self._inner = httpx.HTTPTransport(**transport_kwargs) # type: ignore[arg-type]
def handle_request(
self,
request: httpx.Request,
) -> httpx.Response:
hostname = request.url.host or ""
scheme = request.url.scheme.lower()
validate_url_sync(str(request.url), self._policy)
allowed = {h.lower() for h in _effective_allowed_hosts(self._policy)}
if hostname.lower() in allowed:
return self._inner.handle_request(request)
port = request.url.port or (443 if scheme == "https" else 80)
try:
addrinfo = socket.getaddrinfo(
hostname,
port,
type=socket.SOCK_STREAM,
)
except socket.gaierror as exc:
raise SSRFBlockedError("DNS resolution failed") from exc
if not addrinfo:
raise SSRFBlockedError("DNS resolution returned no results")
for _family, _type, _proto, _canonname, sockaddr in addrinfo:
ip_str: str = sockaddr[0] # type: ignore[assignment]
validate_resolved_ip(ip_str, self._policy)
pinned_ip = addrinfo[0][4][0]
pinned_url = request.url.copy_with(host=pinned_ip)
extensions = dict(request.extensions)
if scheme == "https":
extensions["sni_hostname"] = hostname.encode("ascii")
pinned_request = httpx.Request(
method=request.method,
url=pinned_url,
headers=request.headers,
content=request.content,
extensions=extensions,
)
return self._inner.handle_request(pinned_request)
def close(self) -> None:
self._inner.close()
# ---------------------------------------------------------------------- #
# Factories
# ---------------------------------------------------------------------- #
def ssrf_safe_client(
policy: SSRFPolicy = SSRFPolicy(),
**kwargs: object,
) -> httpx.Client:
"""Create an `httpx.Client` with SSRF protection."""
transport_kwargs: dict[str, object] = {}
client_kwargs: dict[str, object] = {}
for key, value in kwargs.items():
if key in _TRANSPORT_KWARGS:
transport_kwargs[key] = value
else:
client_kwargs[key] = value
transport = SSRFSafeSyncTransport(policy=policy, **transport_kwargs)
client_kwargs.setdefault("follow_redirects", True)
client_kwargs.setdefault("max_redirects", 10)
return httpx.Client(
transport=transport,
**client_kwargs, # type: ignore[arg-type]
)
def ssrf_safe_async_client(
policy: SSRFPolicy = SSRFPolicy(),
**kwargs: object,
) -> httpx.AsyncClient:
"""Create an `httpx.AsyncClient` with SSRF protection.
Drop-in replacement for `httpx.AsyncClient(...)` - callers just swap
the constructor call. Transport-specific kwargs (`verify`, `cert`,
`retries`, etc.) are forwarded to the inner `AsyncHTTPTransport`;
everything else goes to the `AsyncClient`.
"""
transport_kwargs: dict[str, object] = {}
client_kwargs: dict[str, object] = {}
for key, value in kwargs.items():
if key in _TRANSPORT_KWARGS:
transport_kwargs[key] = value
else:
client_kwargs[key] = value
transport = SSRFSafeTransport(policy=policy, **transport_kwargs)
# Apply defaults only if not overridden by caller.
client_kwargs.setdefault("follow_redirects", True)
client_kwargs.setdefault("max_redirects", 10)
return httpx.AsyncClient(
transport=transport,
**client_kwargs, # type: ignore[arg-type]
)

View File

@@ -166,14 +166,14 @@ class InMemoryCache(BaseCache):
# Update cache
cache.update(
prompt="What is the capital of France?",
llm_string="model='gpt-5.4-mini',
llm_string="model='gpt-3.5-turbo', temperature=0.1",
return_val=[Generation(text="Paris")],
)
# Lookup cache
result = cache.lookup(
prompt="What is the capital of France?",
llm_string="model='gpt-5.4-mini',
llm_string="model='gpt-3.5-turbo', temperature=0.1",
)
# result is [Generation(text="Paris")]
```

View File

@@ -7,7 +7,7 @@ import atexit
import functools
import logging
from abc import ABC, abstractmethod
from collections.abc import Callable, Mapping
from collections.abc import Callable
from concurrent.futures import ThreadPoolExecutor
from contextlib import asynccontextmanager, contextmanager
from contextvars import copy_context
@@ -1614,9 +1614,6 @@ class CallbackManager(BaseCallbackManager):
local_tags: list[str] | None = None,
inheritable_metadata: dict[str, Any] | None = None,
local_metadata: dict[str, Any] | None = None,
*,
langsmith_inheritable_metadata: Mapping[str, Any] | None = None,
langsmith_inheritable_tags: list[str] | None = None,
) -> CallbackManager:
"""Configure the callback manager.
@@ -1628,10 +1625,6 @@ class CallbackManager(BaseCallbackManager):
local_tags: The local tags.
inheritable_metadata: The inheritable metadata.
local_metadata: The local metadata.
langsmith_inheritable_metadata: Default inheritable metadata applied
to any `LangChainTracer` handlers via `set_defaults`.
langsmith_inheritable_tags: Default inheritable tags applied to any
`LangChainTracer` handlers via `set_defaults`.
Returns:
The configured callback manager.
@@ -1645,8 +1638,6 @@ class CallbackManager(BaseCallbackManager):
inheritable_metadata,
local_metadata,
verbose=verbose,
langsmith_inheritable_metadata=langsmith_inheritable_metadata,
langsmith_inheritable_tags=langsmith_inheritable_tags,
)
@@ -2143,9 +2134,6 @@ class AsyncCallbackManager(BaseCallbackManager):
local_tags: list[str] | None = None,
inheritable_metadata: dict[str, Any] | None = None,
local_metadata: dict[str, Any] | None = None,
*,
langsmith_inheritable_metadata: Mapping[str, Any] | None = None,
langsmith_inheritable_tags: list[str] | None = None,
) -> AsyncCallbackManager:
"""Configure the async callback manager.
@@ -2157,10 +2145,6 @@ class AsyncCallbackManager(BaseCallbackManager):
local_tags: The local tags.
inheritable_metadata: The inheritable metadata.
local_metadata: The local metadata.
langsmith_inheritable_metadata: Default inheritable metadata applied
to any `LangChainTracer` handlers via `set_defaults`.
langsmith_inheritable_tags: Default inheritable tags applied to any
`LangChainTracer` handlers via `set_defaults`.
Returns:
The configured async callback manager.
@@ -2174,8 +2158,6 @@ class AsyncCallbackManager(BaseCallbackManager):
inheritable_metadata,
local_metadata,
verbose=verbose,
langsmith_inheritable_metadata=langsmith_inheritable_metadata,
langsmith_inheritable_tags=langsmith_inheritable_tags,
)
@@ -2322,8 +2304,6 @@ def _configure(
local_metadata: dict[str, Any] | None = None,
*,
verbose: bool = False,
langsmith_inheritable_metadata: Mapping[str, Any] | None = None,
langsmith_inheritable_tags: list[str] | None = None,
) -> T:
"""Configure the callback manager.
@@ -2336,10 +2316,6 @@ def _configure(
inheritable_metadata: The inheritable metadata.
local_metadata: The local metadata.
verbose: Whether to enable verbose mode.
langsmith_inheritable_metadata: Default inheritable metadata applied to
any `LangChainTracer` handlers via `set_defaults`.
langsmith_inheritable_tags: Default inheritable tags applied to any
`LangChainTracer` handlers via `set_defaults`.
Raises:
RuntimeError: If `LANGCHAIN_TRACING` is set but `LANGCHAIN_TRACING_V2` is not.
@@ -2411,6 +2387,8 @@ def _configure(
if inheritable_metadata or local_metadata:
callback_manager.add_metadata(inheritable_metadata or {})
callback_manager.add_metadata(local_metadata or {}, inherit=False)
if tracing_metadata:
callback_manager.add_metadata(tracing_metadata.copy())
if tracing_tags:
callback_manager.add_tags(tracing_tags.copy())
@@ -2462,7 +2440,6 @@ def _configure(
else tracing_context["client"]
),
tags=tracing_tags,
metadata=tracing_metadata,
)
callback_manager.add_handler(handler)
except Exception as e:
@@ -2480,12 +2457,7 @@ def _configure(
run_tree.trace_id,
run_tree.dotted_order,
)
run_id_str = str(run_tree.id)
if run_id_str not in handler.run_map:
handler.run_map[run_id_str] = run_tree
handler._external_run_ids.setdefault( # noqa: SLF001
run_id_str, 0
)
handler.run_map[str(run_tree.id)] = run_tree
for var, inheritable, handler_class, env_var in _configure_hooks:
create_one = (
env_var is not None
@@ -2507,32 +2479,6 @@ def _configure(
for handler in callback_manager.handlers
):
callback_manager.add_handler(var_handler, inheritable)
if tracing_metadata:
langsmith_inheritable_metadata = {
**tracing_metadata,
**(langsmith_inheritable_metadata or {}),
}
if langsmith_inheritable_metadata or langsmith_inheritable_tags:
callback_manager.handlers = [
handler.copy_with_metadata_defaults(
metadata=langsmith_inheritable_metadata,
tags=langsmith_inheritable_tags,
)
if isinstance(handler, LangChainTracer)
else handler
for handler in callback_manager.handlers
]
callback_manager.inheritable_handlers = [
handler.copy_with_metadata_defaults(
metadata=langsmith_inheritable_metadata,
tags=langsmith_inheritable_tags,
)
if isinstance(handler, LangChainTracer)
else handler
for handler in callback_manager.inheritable_handlers
]
return callback_manager

View File

@@ -1,18 +0,0 @@
"""Cross Encoder interface."""
from abc import ABC, abstractmethod
class BaseCrossEncoder(ABC):
"""Interface for cross encoder models."""
@abstractmethod
def score(self, text_pairs: list[tuple[str, str]]) -> list[float]:
"""Score pairs' similarity.
Args:
text_pairs: List of pairs of texts.
Returns:
List of scores.
"""

View File

@@ -2,7 +2,6 @@ import re
from collections.abc import Sequence
from typing import (
TYPE_CHECKING,
Any,
Literal,
TypedDict,
TypeVar,
@@ -15,21 +14,6 @@ from langchain_core.messages.content import (
)
def _filter_invocation_params_for_tracing(params: dict[str, Any]) -> dict[str, Any]:
"""Filter out large/inappropriate fields from invocation params for tracing.
Removes fields like tools, functions, messages, response_format that can be large.
Args:
params: The invocation parameters to filter.
Returns:
The filtered parameters with large fields removed.
"""
excluded_keys = {"tools", "functions", "messages", "response_format"}
return {k: v for k, v in params.items() if k not in excluded_keys}
def is_openai_data_block(
block: dict, filter_: Literal["image", "audio", "file"] | None = None
) -> bool:

View File

@@ -69,8 +69,6 @@ class LangSmithParams(TypedDict, total=False):
ls_stop: list[str] | None
"""Stop words for generation."""
ls_integration: str
"""Integration that created the trace."""
@cache # Cache the tokenizer
@@ -301,22 +299,6 @@ class BaseLanguageModel(
# generate responses that match a given schema.
raise NotImplementedError
def _get_ls_params(
self,
stop: list[str] | None = None, # noqa: ARG002
**kwargs: Any, # noqa: ARG002
) -> LangSmithParams:
"""Get standard params for tracing."""
return LangSmithParams()
def _get_ls_params_with_defaults(
self,
stop: list[str] | None = None,
**kwargs: Any,
) -> LangSmithParams:
"""Wrap _get_ls_params to include any additional default parameters."""
return self._get_ls_params(stop=stop, **kwargs)
@property
def _identifying_params(self) -> Mapping[str, Any]:
"""Get the identifying parameters."""

View File

@@ -3,7 +3,6 @@
from __future__ import annotations
import asyncio
import contextlib
import inspect
import json
from abc import ABC, abstractmethod
@@ -12,8 +11,8 @@ from functools import cached_property
from operator import itemgetter
from typing import TYPE_CHECKING, Any, Literal, cast
from pydantic import BaseModel, ConfigDict, Field, model_validator
from typing_extensions import Self, override
from pydantic import BaseModel, ConfigDict, Field
from typing_extensions import override
from langchain_core.caches import BaseCache
from langchain_core.callbacks import (
@@ -25,7 +24,6 @@ from langchain_core.callbacks import (
)
from langchain_core.globals import get_llm_cache
from langchain_core.language_models._utils import (
_filter_invocation_params_for_tracing,
_normalize_messages,
_update_message_content_to_blocks,
)
@@ -34,10 +32,7 @@ from langchain_core.language_models.base import (
LangSmithParams,
LanguageModelInput,
)
from langchain_core.language_models.model_profile import (
ModelProfile,
_warn_unknown_profile_keys,
)
from langchain_core.language_models.model_profile import ModelProfile
from langchain_core.load import dumpd, dumps
from langchain_core.messages import (
AIMessage,
@@ -362,54 +357,6 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
arbitrary_types_allowed=True,
)
def _resolve_model_profile(self) -> ModelProfile | None:
"""Return the default model profile, or `None` if unavailable.
Override this in subclasses instead of `_set_model_profile`. The base
validator calls it automatically and handles assignment. This avoids
coupling partner code to Pydantic validator mechanics.
Each partner needs its own override because things can vary per-partner,
such as the attribute that identifies the model (e.g., `model`,
`model_name`, `model_id`, `deployment_name`) and the partner-local
`_get_default_model_profile` function that reads from each partner's own
profile data.
"""
# TODO: consider adding a `_model_identifier` property on BaseChatModel
# to standardize how partners identify their model, which could allow a
# default implementation here that calls a shared
# profile-loading mechanism.
return None
@model_validator(mode="after")
def _set_model_profile(self) -> Self:
"""Populate `profile` from `_resolve_model_profile` if not provided.
Partners should override `_resolve_model_profile` rather than this
validator. Overriding this with a new `@model_validator` replaces the
base validator (Pydantic v2 behavior), bypassing the standard resolution
path. A plain method override does not prevent the base validator from
running.
"""
if self.profile is None:
# Suppress errors from partner overrides (e.g., missing profile
# files, broken imports) so model construction never fails over an
# optional field.
with contextlib.suppress(Exception):
self.profile = self._resolve_model_profile()
return self
# NOTE: _check_profile_keys must be defined AFTER _set_model_profile.
# Pydantic v2 runs mode="after" validators in definition order.
@model_validator(mode="after")
def _check_profile_keys(self) -> Self:
"""Warn on unrecognized profile keys."""
# isinstance guard: ModelProfile is a TypedDict (always a dict), but
# protects against unexpected types from partner overrides.
if self.profile and isinstance(self.profile, dict):
_warn_unknown_profile_keys(self.profile)
return self
@cached_property
def _serialized(self) -> dict[str, Any]:
# self is always a Serializable object in this case, thus the result is
@@ -558,7 +505,7 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
options = {"stop": stop, **kwargs, **ls_structured_output_format_dict}
inheritable_metadata = {
**(config.get("metadata") or {}),
**self._get_ls_params_with_defaults(stop=stop, **kwargs),
**self._get_ls_params(stop=stop, **kwargs),
}
callback_manager = CallbackManager.configure(
config.get("callbacks"),
@@ -568,9 +515,6 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
self.tags,
inheritable_metadata,
self.metadata,
langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(
params
),
)
(run_manager,) = callback_manager.on_chat_model_start(
self._serialized,
@@ -689,7 +633,7 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
options = {"stop": stop, **kwargs, **ls_structured_output_format_dict}
inheritable_metadata = {
**(config.get("metadata") or {}),
**self._get_ls_params_with_defaults(stop=stop, **kwargs),
**self._get_ls_params(stop=stop, **kwargs),
}
callback_manager = AsyncCallbackManager.configure(
config.get("callbacks"),
@@ -699,9 +643,6 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
self.tags,
inheritable_metadata,
self.metadata,
langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(
params
),
)
(run_manager,) = await callback_manager.on_chat_model_start(
self._serialized,
@@ -886,16 +827,6 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
return ls_params
def _get_ls_params_with_defaults(
self,
stop: list[str] | None = None,
**kwargs: Any,
) -> LangSmithParams:
"""Wrap _get_ls_params to always include ls_integration."""
ls_params = self._get_ls_params(stop=stop, **kwargs)
ls_params["ls_integration"] = "langchain_chat_model"
return ls_params
def _get_llm_string(self, stop: list[str] | None = None, **kwargs: Any) -> str:
if self.is_lc_serializable():
params = {**kwargs, "stop": stop}
@@ -968,7 +899,7 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
options = {"stop": stop, **ls_structured_output_format_dict}
inheritable_metadata = {
**(metadata or {}),
**self._get_ls_params_with_defaults(stop=stop, **kwargs),
**self._get_ls_params(stop=stop, **kwargs),
}
callback_manager = CallbackManager.configure(
@@ -979,9 +910,6 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
self.tags,
inheritable_metadata,
self.metadata,
langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(
params
),
)
messages_to_trace = [
_format_for_tracing(message_list) for message_list in messages
@@ -1094,7 +1022,7 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
options = {"stop": stop, **ls_structured_output_format_dict}
inheritable_metadata = {
**(metadata or {}),
**self._get_ls_params_with_defaults(stop=stop, **kwargs),
**self._get_ls_params(stop=stop, **kwargs),
}
callback_manager = AsyncCallbackManager.configure(
@@ -1105,9 +1033,6 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
self.tags,
inheritable_metadata,
self.metadata,
langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(
params
),
)
messages_to_trace = [

View File

@@ -42,7 +42,6 @@ from langchain_core.callbacks import (
Callbacks,
)
from langchain_core.globals import get_llm_cache
from langchain_core.language_models._utils import _filter_invocation_params_for_tracing
from langchain_core.language_models.base import (
BaseLanguageModel,
LangSmithParams,
@@ -528,7 +527,7 @@ class BaseLLM(BaseLanguageModel[str], ABC):
options = {"stop": stop}
inheritable_metadata = {
**(config.get("metadata") or {}),
**self._get_ls_params_with_defaults(stop=stop, **kwargs),
**self._get_ls_params(stop=stop, **kwargs),
}
callback_manager = CallbackManager.configure(
config.get("callbacks"),
@@ -538,9 +537,6 @@ class BaseLLM(BaseLanguageModel[str], ABC):
self.tags,
inheritable_metadata,
self.metadata,
langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(
params
),
)
(run_manager,) = callback_manager.on_llm_start(
self._serialized,
@@ -601,7 +597,7 @@ class BaseLLM(BaseLanguageModel[str], ABC):
options = {"stop": stop}
inheritable_metadata = {
**(config.get("metadata") or {}),
**self._get_ls_params_with_defaults(stop=stop, **kwargs),
**self._get_ls_params(stop=stop, **kwargs),
}
callback_manager = AsyncCallbackManager.configure(
config.get("callbacks"),
@@ -611,9 +607,6 @@ class BaseLLM(BaseLanguageModel[str], ABC):
self.tags,
inheritable_metadata,
self.metadata,
langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(
params
),
)
(run_manager,) = await callback_manager.on_llm_start(
self._serialized,
@@ -913,14 +906,14 @@ class BaseLLM(BaseLanguageModel[str], ABC):
metadata = [
{
**(meta or {}),
**self._get_ls_params_with_defaults(stop=stop, **kwargs),
**self._get_ls_params(stop=stop, **kwargs),
}
for meta in metadata
]
elif isinstance(metadata, dict):
metadata = {
**(metadata or {}),
**self._get_ls_params_with_defaults(stop=stop, **kwargs),
**self._get_ls_params(stop=stop, **kwargs),
}
if (
isinstance(callbacks, list)
@@ -957,8 +950,6 @@ class BaseLLM(BaseLanguageModel[str], ABC):
run_name_list = run_name or cast(
"list[str | None]", ([None] * len(prompts))
)
params = self.dict()
params["stop"] = stop
callback_managers = [
CallbackManager.configure(
callback,
@@ -968,9 +959,6 @@ class BaseLLM(BaseLanguageModel[str], ABC):
self.tags,
meta,
self.metadata,
langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(
params
),
)
for callback, tag, meta in zip(
callbacks, tags_list, metadata_list, strict=False
@@ -978,8 +966,6 @@ class BaseLLM(BaseLanguageModel[str], ABC):
]
else:
# We've received a single callbacks arg to apply to all inputs
params = self.dict()
params["stop"] = stop
callback_managers = [
CallbackManager.configure(
cast("Callbacks", callbacks),
@@ -989,13 +975,12 @@ class BaseLLM(BaseLanguageModel[str], ABC):
self.tags,
cast("dict[str, Any]", metadata),
self.metadata,
langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(
params
),
)
] * len(prompts)
run_name_list = [cast("str | None", run_name)] * len(prompts)
run_ids_list = self._get_run_ids_list(run_id, prompts)
params = self.dict()
params["stop"] = stop
options = {"stop": stop}
(
existing_prompts,
@@ -1188,14 +1173,14 @@ class BaseLLM(BaseLanguageModel[str], ABC):
metadata = [
{
**(meta or {}),
**self._get_ls_params_with_defaults(stop=stop, **kwargs),
**self._get_ls_params(stop=stop, **kwargs),
}
for meta in metadata
]
elif isinstance(metadata, dict):
metadata = {
**(metadata or {}),
**self._get_ls_params_with_defaults(stop=stop, **kwargs),
**self._get_ls_params(stop=stop, **kwargs),
}
# Create callback managers
if isinstance(callbacks, list) and (
@@ -1229,8 +1214,6 @@ class BaseLLM(BaseLanguageModel[str], ABC):
run_name_list = run_name or cast(
"list[str | None]", ([None] * len(prompts))
)
params = self.dict()
params["stop"] = stop
callback_managers = [
AsyncCallbackManager.configure(
callback,
@@ -1240,9 +1223,6 @@ class BaseLLM(BaseLanguageModel[str], ABC):
self.tags,
meta,
self.metadata,
langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(
params
),
)
for callback, tag, meta in zip(
callbacks, tags_list, metadata_list, strict=False
@@ -1250,8 +1230,6 @@ class BaseLLM(BaseLanguageModel[str], ABC):
]
else:
# We've received a single callbacks arg to apply to all inputs
params = self.dict()
params["stop"] = stop
callback_managers = [
AsyncCallbackManager.configure(
cast("Callbacks", callbacks),
@@ -1261,13 +1239,12 @@ class BaseLLM(BaseLanguageModel[str], ABC):
self.tags,
cast("dict[str, Any]", metadata),
self.metadata,
langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(
params
),
)
] * len(prompts)
run_name_list = [cast("str | None", run_name)] * len(prompts)
run_ids_list = self._get_run_ids_list(run_id, prompts)
params = self.dict()
params["stop"] = stop
options = {"stop": stop}
(
existing_prompts,

View File

@@ -1,14 +1,7 @@
"""Model profile types and utilities."""
import logging
import warnings
from typing import get_type_hints
from pydantic import ConfigDict
from typing_extensions import TypedDict
logger = logging.getLogger(__name__)
class ModelProfile(TypedDict, total=False):
"""Model profile.
@@ -21,25 +14,6 @@ class ModelProfile(TypedDict, total=False):
and supported features.
"""
__pydantic_config__ = ConfigDict(extra="allow") # type: ignore[misc]
# --- Model metadata ---
name: str
"""Human-readable model name."""
status: str
"""Model status (e.g., `'active'`, `'deprecated'`)."""
release_date: str
"""Model release date (ISO 8601 format, e.g., `'2025-06-01'`)."""
last_updated: str
"""Date the model was last updated (ISO 8601 format)."""
open_weights: bool
"""Whether the model weights are openly available."""
# --- Input constraints ---
max_input_tokens: int
@@ -112,45 +86,6 @@ class ModelProfile(TypedDict, total=False):
"""Whether the model supports a native [structured output](https://docs.langchain.com/oss/python/langchain/models#structured-outputs)
feature"""
# --- Other capabilities ---
attachment: bool
"""Whether the model supports file attachments."""
temperature: bool
"""Whether the model supports a temperature parameter."""
ModelProfileRegistry = dict[str, ModelProfile]
"""Registry mapping model identifiers or names to their ModelProfile."""
def _warn_unknown_profile_keys(profile: ModelProfile) -> None:
"""Warn if `profile` contains keys not declared on `ModelProfile`.
Args:
profile: The model profile dict to check for undeclared keys.
"""
if not isinstance(profile, dict):
return
try:
declared = frozenset(get_type_hints(ModelProfile).keys())
except (TypeError, NameError):
# get_type_hints raises NameError on unresolvable forward refs and
# TypeError when annotations evaluate to non-type objects.
logger.debug(
"Could not resolve type hints for ModelProfile; "
"skipping unknown-key check.",
exc_info=True,
)
return
extra = sorted(set(profile) - declared)
if extra:
warnings.warn(
f"Unrecognized keys in model profile: {extra}. "
f"This may indicate a version mismatch between langchain-core "
f"and your provider package. Consider upgrading langchain-core.",
stacklevel=2,
)

View File

@@ -109,7 +109,6 @@ from langchain_core.load.mapping import (
SERIALIZABLE_MAPPING,
)
from langchain_core.load.serializable import Serializable
from langchain_core.load.validators import CLASS_INIT_VALIDATORS
DEFAULT_NAMESPACES = [
"langchain",
@@ -481,19 +480,6 @@ class Reviver:
msg = f"Invalid namespace: {value}"
raise ValueError(msg)
# We don't need to recurse on kwargs
# as json.loads will do that for us.
kwargs = value.get("kwargs", {})
# Run class-specific validators before the general init_validator.
# These run before importing to fail fast on security violations.
if mapping_key in CLASS_INIT_VALIDATORS:
CLASS_INIT_VALIDATORS[mapping_key](mapping_key, kwargs)
# Also run general init_validator (e.g., jinja2 blocking)
if self.init_validator is not None:
self.init_validator(mapping_key, kwargs)
mod = importlib.import_module(".".join(import_dir))
cls = getattr(mod, name)
@@ -503,6 +489,13 @@ class Reviver:
msg = f"Invalid namespace: {value}"
raise ValueError(msg)
# We don't need to recurse on kwargs
# as json.loads will do that for us.
kwargs = value.get("kwargs", {})
if self.init_validator is not None:
self.init_validator(mapping_key, kwargs)
return cls(**kwargs)
return value

View File

@@ -283,11 +283,6 @@ SERIALIZABLE_MAPPING: dict[tuple[str, ...], tuple[str, ...]] = {
"chat_models",
"ChatXAI",
),
("langchain_baseten", "chat_models", "ChatBaseten"): (
"langchain_baseten",
"chat_models",
"ChatBaseten",
),
("langchain", "chat_models", "fireworks", "ChatFireworks"): (
"langchain_fireworks",
"chat_models",
@@ -321,12 +316,6 @@ SERIALIZABLE_MAPPING: dict[tuple[str, ...], tuple[str, ...]] = {
"bedrock",
"ChatBedrock",
),
("langchain_aws", "chat_models", "ChatBedrockConverse"): (
"langchain_aws",
"chat_models",
"bedrock_converse",
"ChatBedrockConverse",
),
("langchain_google_genai", "chat_models", "ChatGoogleGenerativeAI"): (
"langchain_google_genai",
"chat_models",
@@ -386,12 +375,6 @@ SERIALIZABLE_MAPPING: dict[tuple[str, ...], tuple[str, ...]] = {
"bedrock",
"BedrockLLM",
),
("langchain", "llms", "bedrock", "BedrockLLM"): (
"langchain_aws",
"llms",
"bedrock",
"BedrockLLM",
),
("langchain", "llms", "fireworks", "Fireworks"): (
"langchain_fireworks",
"llms",

View File

@@ -1,77 +0,0 @@
"""Init validators for deserialization security.
This module contains extra validators that are called during deserialization,
ex. to prevent security issues such as SSRF attacks.
Each validator is a callable matching the `InitValidator` protocol: it takes a
class path tuple and kwargs dict, returns `None` on success, and raises
`ValueError` if the deserialization should be blocked.
"""
from typing import TYPE_CHECKING, Any
if TYPE_CHECKING:
from langchain_core.load.load import InitValidator
def _bedrock_validator(class_path: tuple[str, ...], kwargs: dict[str, Any]) -> None:
"""Constructor kwargs validator for AWS Bedrock integrations.
Blocks deserialization if `endpoint_url` or `base_url` parameters are
present, which could enable SSRF attacks.
Args:
class_path: The class path tuple being deserialized.
kwargs: The kwargs dict for the class constructor.
Raises:
ValueError: If `endpoint_url` or `base_url` parameters are present.
"""
dangerous_params = ["endpoint_url", "base_url"]
found_params = [p for p in dangerous_params if p in kwargs]
if found_params:
class_name = class_path[-1] if class_path else "Unknown"
param_str = ", ".join(found_params)
msg = (
f"Deserialization of {class_name} with {param_str} is not allowed "
f"for security reasons. These parameters can enable Server-Side Request "
f"Forgery (SSRF) attacks by directing network requests to arbitrary "
f"endpoints during initialization. If you need to use a custom endpoint, "
f"instantiate {class_name} directly rather than deserializing it."
)
raise ValueError(msg)
# Keys must cover both serialized IDs (SERIALIZABLE_MAPPING keys) and resolved
# import paths (SERIALIZABLE_MAPPING values) to prevent bypass via direct paths.
CLASS_INIT_VALIDATORS: dict[tuple[str, ...], "InitValidator"] = {
# Serialized (legacy) keys
("langchain", "chat_models", "bedrock", "BedrockChat"): _bedrock_validator,
("langchain", "chat_models", "bedrock", "ChatBedrock"): _bedrock_validator,
(
"langchain",
"chat_models",
"anthropic_bedrock",
"ChatAnthropicBedrock",
): _bedrock_validator,
("langchain_aws", "chat_models", "ChatBedrockConverse"): _bedrock_validator,
("langchain", "llms", "bedrock", "Bedrock"): _bedrock_validator,
("langchain", "llms", "bedrock", "BedrockLLM"): _bedrock_validator,
# Resolved import paths (from ALL_SERIALIZABLE_MAPPINGS values) to defend
# against payloads that use the target tuple directly as the "id".
(
"langchain_aws",
"chat_models",
"bedrock_converse",
"ChatBedrockConverse",
): _bedrock_validator,
(
"langchain_aws",
"chat_models",
"anthropic",
"ChatAnthropicBedrock",
): _bedrock_validator,
("langchain_aws", "chat_models", "ChatBedrock"): _bedrock_validator,
("langchain_aws", "llms", "bedrock", "BedrockLLM"): _bedrock_validator,
}

View File

@@ -103,13 +103,11 @@ def convert_to_openai_data_block(
# Backward compat
file["filename"] = extras["filename"]
else:
# Can't infer filename; set a placeholder default for compatibility.
file["filename"] = "LC_AUTOGENERATED"
# Can't infer filename
warnings.warn(
"OpenAI may require a filename for file uploads. Specify a filename"
" in the content block, e.g.: {'type': 'file', 'mime_type': "
"'...', 'base64': '...', 'filename': 'my-file.pdf'}. "
"Using placeholder filename 'LC_AUTOGENERATED'.",
"'...', 'base64': '...', 'filename': 'my-file.pdf'}",
stacklevel=1,
)
formatted_block = {"type": "file", "file": file}
@@ -335,9 +333,10 @@ def _convert_from_v03_ai_message(message: AIMessage) -> AIMessage:
# Reasoning
if reasoning := message.additional_kwargs.get("reasoning"):
if "type" not in reasoning:
reasoning = {**reasoning, "type": "reasoning"}
buckets["reasoning"].append(reasoning)
if isinstance(message, AIMessageChunk) and message.chunk_position != "last":
buckets["reasoning"].append({**reasoning, "type": "reasoning"})
else:
buckets["reasoning"].append(reasoning)
# Refusal
if refusal := message.additional_kwargs.get("refusal"):
@@ -732,11 +731,6 @@ def _convert_to_v1_from_responses(message: AIMessage) -> list[types.ContentBlock
tool_call_block["extras"]["item_id"] = block["id"]
if "index" in block:
tool_call_block["index"] = f"lc_tc_{block['index']}"
for extra_key in ("status", "namespace"):
if extra_key in block:
if "extras" not in tool_call_block:
tool_call_block["extras"] = {}
tool_call_block["extras"][extra_key] = block[extra_key]
yield tool_call_block
elif block_type == "web_search_call":
@@ -985,51 +979,6 @@ def _convert_to_v1_from_responses(message: AIMessage) -> list[types.ContentBlock
mcp_list_tools_result["index"] = f"lc_mltr_{block['index'] + 1}"
yield cast("types.ServerToolResult", mcp_list_tools_result)
elif (
block_type == "tool_search_call" and block.get("execution") == "server"
):
tool_search_call: dict[str, Any] = {
"type": "server_tool_call",
"name": "tool_search",
"id": block["id"],
"args": block.get("arguments", {}),
}
if "index" in block:
tool_search_call["index"] = f"lc_tsc_{block['index']}"
extras: dict[str, Any] = {}
known = {"type", "id", "arguments", "index"}
for key in block:
if key not in known:
extras[key] = block[key]
if extras:
tool_search_call["extras"] = extras
yield cast("types.ServerToolCall", tool_search_call)
elif (
block_type == "tool_search_output"
and block.get("execution") == "server"
):
tool_search_output: dict[str, Any] = {
"type": "server_tool_result",
"tool_call_id": block["id"],
"output": {"tools": block.get("tools", [])},
}
status = block.get("status")
if status == "failed":
tool_search_output["status"] = "error"
elif status == "completed":
tool_search_output["status"] = "success"
if "index" in block and isinstance(block["index"], int):
tool_search_output["index"] = f"lc_tso_{block['index']}"
extras_out: dict[str, Any] = {"name": "tool_search"}
known_out = {"type", "id", "status", "tools", "index"}
for key in block:
if key not in known_out:
extras_out[key] = block[key]
if extras_out:
tool_search_output["extras"] = extras_out
yield cast("types.ServerToolResult", tool_search_output)
elif block_type in types.KNOWN_BLOCK_TYPES:
yield cast("types.ContentBlock", block)
else:

View File

@@ -874,9 +874,9 @@ def filter_messages(
filter_messages(
messages,
include_names=("example_user", "example_assistant"),
include_types=("system",),
exclude_ids=("bar",),
incl_names=("example_user", "example_assistant"),
incl_types=("system",),
excl_ids=("bar",),
)
```
@@ -1551,7 +1551,7 @@ def convert_to_openai_messages(
{
"role": "user",
"content": [
{"type": "text", "text": "what's in this"},
{"type": "text", "text": "whats in this"},
{
"type": "image_url",
"image_url": {"url": "data:image/png;base64,'/9j/4AAQSk'"},
@@ -1570,15 +1570,15 @@ def convert_to_openai_messages(
],
),
ToolMessage("foobar", tool_call_id="1", name="bar"),
{"role": "assistant", "content": "that's nice"},
{"role": "assistant", "content": "thats nice"},
]
oai_messages = convert_to_openai_messages(messages)
# -> [
# {'role': 'system', 'content': 'foo'},
# {'role': 'user', 'content': [{'type': 'text', 'text': 'what's in this'}, {'type': 'image_url', 'image_url': {'url': "data:image/png;base64,'/9j/4AAQSk'"}}]},
# {'role': 'user', 'content': [{'type': 'text', 'text': 'whats in this'}, {'type': 'image_url', 'image_url': {'url': "data:image/png;base64,'/9j/4AAQSk'"}}]},
# {'role': 'assistant', 'tool_calls': [{'type': 'function', 'id': '1','function': {'name': 'analyze', 'arguments': '{"baz": "buz"}'}}], 'content': ''},
# {'role': 'tool', 'name': 'bar', 'content': 'foobar'},
# {'role': 'assistant', 'content': 'that's nice'}
# {'role': 'assistant', 'content': 'thats nice'}
# ]
```

View File

@@ -15,7 +15,6 @@ import yaml
from pydantic import BaseModel, ConfigDict, Field, model_validator
from typing_extensions import Self, override
from langchain_core._api import deprecated
from langchain_core.exceptions import ErrorCode, create_message
from langchain_core.load import dumpd
from langchain_core.output_parsers.base import BaseOutputParser # noqa: TC001
@@ -351,12 +350,6 @@ class BasePromptTemplate(
prompt_dict["_type"] = self._prompt_type
return prompt_dict
@deprecated(
since="1.2.21",
removal="2.0.0",
alternative="Use `dumpd`/`dumps` from `langchain_core.load` to serialize "
"prompts and `load`/`loads` to deserialize them.",
)
def save(self, file_path: Path | str) -> None:
"""Save the prompt.
@@ -389,12 +382,11 @@ class BasePromptTemplate(
directory_path = save_path.parent
directory_path.mkdir(parents=True, exist_ok=True)
resolved_path = save_path.resolve()
if resolved_path.suffix == ".json":
with resolved_path.open("w", encoding="utf-8") as f:
if save_path.suffix == ".json":
with save_path.open("w", encoding="utf-8") as f:
json.dump(prompt_dict, f, indent=4)
elif resolved_path.suffix.endswith((".yaml", ".yml")):
with resolved_path.open("w", encoding="utf-8") as f:
elif save_path.suffix.endswith((".yaml", ".yml")):
with save_path.open("w", encoding="utf-8") as f:
yaml.dump(prompt_dict, f, default_flow_style=False)
else:
msg = f"{save_path} must be json or yaml"

View File

@@ -22,7 +22,6 @@ from pydantic import (
)
from typing_extensions import Self, override
from langchain_core._api import deprecated
from langchain_core.messages import (
AIMessage,
AnyMessage,
@@ -1306,12 +1305,6 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
"""Name of prompt type. Used for serialization."""
return "chat"
@deprecated(
since="1.2.21",
removal="2.0.0",
alternative="Use `dumpd`/`dumps` from `langchain_core.load` to serialize "
"prompts and `load`/`loads` to deserialize them.",
)
def save(self, file_path: Path | str) -> None:
"""Save prompt to file.

View File

@@ -4,7 +4,6 @@ import warnings
from functools import cached_property
from typing import Any, Literal, cast
from pydantic import model_validator
from typing_extensions import override
from langchain_core.load import dumpd
@@ -22,35 +21,11 @@ class DictPromptTemplate(RunnableSerializable[dict, dict]):
Recognizes variables in f-string or mustache formatted string dict values.
Does NOT recognize variables in dict keys. Applies recursively.
Example:
```python
prompt = DictPromptTemplate(
template={
"type": "text",
"text": "Hello {name}",
"metadata": {"source": "{source}"},
},
template_format="f-string",
)
prompt.format(name="Alice", source="docs")
# {
# "type": "text",
# "text": "Hello Alice",
# "metadata": {"source": "docs"},
# }
```
"""
template: dict[str, Any]
template_format: Literal["f-string", "mustache"]
@model_validator(mode="after")
def validate_template(self) -> "DictPromptTemplate":
"""Validate that the template structure contains only safe variables."""
_get_input_variables(self.template, self.template_format)
return self
@property
def input_variables(self) -> list[str]:
"""Template input variables."""

View File

@@ -12,7 +12,6 @@ from pydantic import (
)
from typing_extensions import override
from langchain_core._api import deprecated
from langchain_core.example_selectors import BaseExampleSelector
from langchain_core.messages import BaseMessage, get_buffer_string
from langchain_core.prompts.chat import BaseChatPromptTemplate
@@ -238,12 +237,6 @@ class FewShotPromptTemplate(_FewShotPromptTemplateMixin, StringPromptTemplate):
"""Return the prompt type key."""
return "few_shot"
@deprecated(
since="1.2.21",
removal="2.0.0",
alternative="Use `dumpd`/`dumps` from `langchain_core.load` to serialize "
"prompts and `load`/`loads` to deserialize them.",
)
def save(self, file_path: Path | str) -> None:
"""Save the prompt template to a file.

View File

@@ -6,7 +6,6 @@ from typing import Any
from pydantic import ConfigDict, model_validator
from typing_extensions import Self
from langchain_core._api import deprecated
from langchain_core.example_selectors import BaseExampleSelector
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.prompts.string import (
@@ -216,12 +215,6 @@ class FewShotPromptWithTemplates(StringPromptTemplate):
"""Return the prompt type key."""
return "few_shot_with_templates"
@deprecated(
since="1.2.21",
removal="2.0.0",
alternative="Use `dumpd`/`dumps` from `langchain_core.load` to serialize "
"prompts and `load`/`loads` to deserialize them.",
)
def save(self, file_path: Path | str) -> None:
"""Save the prompt to a file.

View File

@@ -9,25 +9,12 @@ from langchain_core.prompts.base import BasePromptTemplate
from langchain_core.prompts.string import (
DEFAULT_FORMATTER_MAPPING,
PromptTemplateFormat,
get_template_variables,
)
from langchain_core.runnables import run_in_executor
class ImagePromptTemplate(BasePromptTemplate[ImageURL]):
"""Image prompt template for a multimodal model.
Example:
```python
prompt = ImagePromptTemplate(
input_variables=["image_id"],
template={"url": "https://example.com/{image_id}.png", "detail": "high"},
template_format="f-string",
)
prompt.format(image_id="cat")
# {"url": "https://example.com/cat.png", "detail": "high"}
```
"""
"""Image prompt template for a multimodal model."""
template: dict = Field(default_factory=dict)
"""Template for the prompt."""
@@ -56,13 +43,6 @@ class ImagePromptTemplate(BasePromptTemplate[ImageURL]):
f" Found: {overlap}"
)
raise ValueError(msg)
template = kwargs.get("template", {})
template_format = kwargs.get("template_format", "f-string")
for value in template.values():
if isinstance(value, str):
get_template_variables(value, template_format)
super().__init__(**kwargs)
@property

View File

@@ -7,7 +7,6 @@ from pathlib import Path
import yaml
from langchain_core._api import deprecated
from langchain_core.output_parsers.string import StrOutputParser
from langchain_core.prompts.base import BasePromptTemplate
from langchain_core.prompts.chat import ChatPromptTemplate
@@ -18,51 +17,11 @@ URL_BASE = "https://raw.githubusercontent.com/hwchase17/langchain-hub/master/pro
logger = logging.getLogger(__name__)
def _validate_path(path: Path) -> None:
"""Reject absolute paths and ``..`` traversal components.
Args:
path: The path to validate.
Raises:
ValueError: If the path is absolute or contains ``..`` components.
"""
if path.is_absolute():
msg = (
f"Path '{path}' is absolute. Absolute paths are not allowed "
f"when loading prompt configurations to prevent path traversal "
f"attacks. Use relative paths instead, or pass "
f"`allow_dangerous_paths=True` if you trust the input."
)
raise ValueError(msg)
if ".." in path.parts:
msg = (
f"Path '{path}' contains '..' components. Directory traversal "
f"sequences are not allowed when loading prompt configurations. "
f"Use direct relative paths instead, or pass "
f"`allow_dangerous_paths=True` if you trust the input."
)
raise ValueError(msg)
@deprecated(
since="1.2.21",
removal="2.0.0",
alternative="Use `dumpd`/`dumps` from `langchain_core.load` to serialize "
"prompts and `load`/`loads` to deserialize them.",
)
def load_prompt_from_config(
config: dict, *, allow_dangerous_paths: bool = False
) -> BasePromptTemplate:
def load_prompt_from_config(config: dict) -> BasePromptTemplate:
"""Load prompt from config dict.
Args:
config: Dict containing the prompt configuration.
allow_dangerous_paths: If ``False`` (default), file paths in the
config (such as ``template_path``, ``examples``, and
``example_prompt_path``) are validated to reject absolute paths
and directory traversal (``..``) sequences. Set to ``True`` only
if you trust the source of the config.
Returns:
A `PromptTemplate` object.
@@ -79,12 +38,10 @@ def load_prompt_from_config(
raise ValueError(msg)
prompt_loader = type_to_loader_dict[config_type]
return prompt_loader(config, allow_dangerous_paths=allow_dangerous_paths)
return prompt_loader(config)
def _load_template(
var_name: str, config: dict, *, allow_dangerous_paths: bool = False
) -> dict:
def _load_template(var_name: str, config: dict) -> dict:
"""Load template from the path if applicable."""
# Check if template_path exists in config.
if f"{var_name}_path" in config:
@@ -94,14 +51,9 @@ def _load_template(
raise ValueError(msg)
# Pop the template path from the config.
template_path = Path(config.pop(f"{var_name}_path"))
if not allow_dangerous_paths:
_validate_path(template_path)
# Resolve symlinks before checking the suffix so that a symlink named
# "exploit.txt" pointing to a non-.txt file is caught.
resolved_path = template_path.resolve()
# Load the template.
if resolved_path.suffix == ".txt":
template = resolved_path.read_text(encoding="utf-8")
if template_path.suffix == ".txt":
template = template_path.read_text(encoding="utf-8")
else:
raise ValueError
# Set the template variable to the extracted variable.
@@ -109,14 +61,12 @@ def _load_template(
return config
def _load_examples(config: dict, *, allow_dangerous_paths: bool = False) -> dict:
def _load_examples(config: dict) -> dict:
"""Load examples if necessary."""
if isinstance(config["examples"], list):
pass
elif isinstance(config["examples"], str):
path = Path(config["examples"])
if not allow_dangerous_paths:
_validate_path(path)
with path.open(encoding="utf-8") as f:
if path.suffix == ".json":
examples = json.load(f)
@@ -142,17 +92,11 @@ def _load_output_parser(config: dict) -> dict:
return config
def _load_few_shot_prompt(
config: dict, *, allow_dangerous_paths: bool = False
) -> FewShotPromptTemplate:
def _load_few_shot_prompt(config: dict) -> FewShotPromptTemplate:
"""Load the "few shot" prompt from the config."""
# Load the suffix and prefix templates.
config = _load_template(
"suffix", config, allow_dangerous_paths=allow_dangerous_paths
)
config = _load_template(
"prefix", config, allow_dangerous_paths=allow_dangerous_paths
)
config = _load_template("suffix", config)
config = _load_template("prefix", config)
# Load the example prompt.
if "example_prompt_path" in config:
if "example_prompt" in config:
@@ -161,30 +105,19 @@ def _load_few_shot_prompt(
"be specified."
)
raise ValueError(msg)
example_prompt_path = Path(config.pop("example_prompt_path"))
if not allow_dangerous_paths:
_validate_path(example_prompt_path)
config["example_prompt"] = load_prompt(
example_prompt_path, allow_dangerous_paths=allow_dangerous_paths
)
config["example_prompt"] = load_prompt(config.pop("example_prompt_path"))
else:
config["example_prompt"] = load_prompt_from_config(
config["example_prompt"], allow_dangerous_paths=allow_dangerous_paths
)
config["example_prompt"] = load_prompt_from_config(config["example_prompt"])
# Load the examples.
config = _load_examples(config, allow_dangerous_paths=allow_dangerous_paths)
config = _load_examples(config)
config = _load_output_parser(config)
return FewShotPromptTemplate(**config)
def _load_prompt(
config: dict, *, allow_dangerous_paths: bool = False
) -> PromptTemplate:
def _load_prompt(config: dict) -> PromptTemplate:
"""Load the prompt template from config."""
# Load the template from disk if necessary.
config = _load_template(
"template", config, allow_dangerous_paths=allow_dangerous_paths
)
config = _load_template("template", config)
config = _load_output_parser(config)
template_format = config.get("template_format", "f-string")
@@ -201,28 +134,12 @@ def _load_prompt(
return PromptTemplate(**config)
@deprecated(
since="1.2.21",
removal="2.0.0",
alternative="Use `dumpd`/`dumps` from `langchain_core.load` to serialize "
"prompts and `load`/`loads` to deserialize them.",
)
def load_prompt(
path: str | Path,
encoding: str | None = None,
*,
allow_dangerous_paths: bool = False,
) -> BasePromptTemplate:
def load_prompt(path: str | Path, encoding: str | None = None) -> BasePromptTemplate:
"""Unified method for loading a prompt from LangChainHub or local filesystem.
Args:
path: Path to the prompt file.
encoding: Encoding of the file.
allow_dangerous_paths: If ``False`` (default), file paths referenced
inside the loaded config (such as ``template_path``, ``examples``,
and ``example_prompt_path``) are validated to reject absolute paths
and directory traversal (``..``) sequences. Set to ``True`` only
if you trust the source of the config.
Returns:
A `PromptTemplate` object.
@@ -237,16 +154,11 @@ def load_prompt(
"instead."
)
raise RuntimeError(msg)
return _load_prompt_from_file(
path, encoding, allow_dangerous_paths=allow_dangerous_paths
)
return _load_prompt_from_file(path, encoding)
def _load_prompt_from_file(
file: str | Path,
encoding: str | None = None,
*,
allow_dangerous_paths: bool = False,
file: str | Path, encoding: str | None = None
) -> BasePromptTemplate:
"""Load prompt from file."""
# Convert file to a Path object.
@@ -262,14 +174,10 @@ def _load_prompt_from_file(
msg = f"Got unsupported file type {file_path.suffix}"
raise ValueError(msg)
# Load the prompt from the config now.
return load_prompt_from_config(config, allow_dangerous_paths=allow_dangerous_paths)
return load_prompt_from_config(config)
def _load_chat_prompt(
config: dict,
*,
allow_dangerous_paths: bool = False, # noqa: ARG001
) -> ChatPromptTemplate:
def _load_chat_prompt(config: dict) -> ChatPromptTemplate:
"""Load chat prompt from config."""
messages = config.pop("messages")
template = messages[0]["prompt"].pop("template") if messages else None
@@ -282,7 +190,7 @@ def _load_chat_prompt(
return ChatPromptTemplate.from_template(template=template, **config)
type_to_loader_dict: dict[str, Callable[..., BasePromptTemplate]] = {
type_to_loader_dict: dict[str, Callable[[dict], BasePromptTemplate]] = {
"prompt": _load_prompt,
"few_shot": _load_few_shot_prompt,
"chat": _load_chat_prompt,

View File

@@ -219,46 +219,6 @@ DEFAULT_VALIDATOR_MAPPING: dict[str, Callable] = {
}
def _parse_f_string_fields(template: str) -> list[tuple[str, str | None]]:
fields: list[tuple[str, str | None]] = []
for _, field_name, format_spec, _ in Formatter().parse(template):
if field_name is not None:
fields.append((field_name, format_spec))
return fields
def validate_f_string_template(template: str) -> list[str]:
"""Validate an f-string template and return its input variables."""
input_variables = set()
for var, format_spec in _parse_f_string_fields(template):
if "." in var or "[" in var or "]" in var:
msg = (
f"Invalid variable name {var!r} in f-string template. "
f"Variable names cannot contain attribute "
f"access (.) or indexing ([])."
)
raise ValueError(msg)
if var.isdigit():
msg = (
f"Invalid variable name {var!r} in f-string template. "
f"Variable names cannot be all digits as they are interpreted "
f"as positional arguments."
)
raise ValueError(msg)
if format_spec and ("{" in format_spec or "}" in format_spec):
msg = (
"Invalid format specifier in f-string template. "
"Nested replacement fields are not allowed."
)
raise ValueError(msg)
input_variables.add(var)
return sorted(input_variables)
def check_valid_template(
template: str, template_format: str, input_variables: list[str]
) -> None:
@@ -283,8 +243,6 @@ def check_valid_template(
f" {list(DEFAULT_FORMATTER_MAPPING)}."
)
raise ValueError(msg) from exc
if template_format == "f-string":
validate_f_string_template(template)
try:
validator_func(template, input_variables)
except (KeyError, IndexError) as exc:
@@ -310,18 +268,43 @@ def get_template_variables(template: str, template_format: str) -> list[str]:
Raises:
ValueError: If the template format is not supported.
"""
input_variables: list[str] | set[str]
if template_format == "jinja2":
# Get the variables for the template
input_variables = sorted(_get_jinja2_variables_from_template(template))
input_variables = _get_jinja2_variables_from_template(template)
elif template_format == "f-string":
input_variables = validate_f_string_template(template)
input_variables = {
v for _, v, _, _ in Formatter().parse(template) if v is not None
}
elif template_format == "mustache":
input_variables = mustache_template_vars(template)
else:
msg = f"Unsupported template format: {template_format}"
raise ValueError(msg)
# For f-strings, block attribute access and indexing syntax
# This prevents template injection attacks via accessing dangerous attributes
if template_format == "f-string":
for var in input_variables:
# Formatter().parse() returns field names with dots/brackets if present
# e.g., "obj.attr" or "obj[0]" - we need to block these
if "." in var or "[" in var or "]" in var:
msg = (
f"Invalid variable name {var!r} in f-string template. "
f"Variable names cannot contain attribute "
f"access (.) or indexing ([])."
)
raise ValueError(msg)
# Block variable names that are all digits (e.g., "0", "100")
# These are interpreted as positional arguments, not keyword arguments
if var.isdigit():
msg = (
f"Invalid variable name {var!r} in f-string template. "
f"Variable names cannot be all digits as they are interpreted "
f"as positional arguments."
)
raise ValueError(msg)
return sorted(input_variables)

View File

@@ -3,7 +3,7 @@
The LangChain Expression Language (LCEL) offers a declarative method to build
production-grade programs that harness the power of LLMs.
Programs created using LCEL and LangChain `Runnable` objects inherently support
Programs created using LCEL and LangChain `Runnable` objects inherently suppor
synchronous asynchronous, batch, and streaming operations.
Support for **async** allows servers hosting LCEL based programs to scale bette for

View File

@@ -138,28 +138,6 @@ COPIABLE_KEYS = [
"configurable",
]
# Users are expected to use the `context` API with a context object
# (which does not get traced)
CONFIGURABLE_TO_TRACING_METADATA_EXCLUDED_KEYS = frozenset(("api_key",))
def _get_langsmith_inheritable_metadata_from_config(
config: RunnableConfig,
) -> dict[str, Any] | None:
"""Get LangSmith-only inheritable metadata defaults derived from config."""
configurable = config.get("configurable") or {}
metadata = {
key: value
for key, value in configurable.items()
if not key.startswith("__")
and isinstance(value, (str, int, float, bool))
and key not in config.get("metadata", {})
and key not in CONFIGURABLE_TO_TRACING_METADATA_EXCLUDED_KEYS
}
return metadata or None
DEFAULT_RECURSION_LIMIT = 25
@@ -286,17 +264,14 @@ def ensure_config(config: RunnableConfig | None = None) -> RunnableConfig:
for k, v in config.items():
if k not in CONFIG_KEYS and v is not None:
empty["configurable"][k] = v
for configurable_key in ("model", "checkpoint_ns"):
for key, value in empty.get("configurable", {}).items():
if (
isinstance(
configurable_value := empty.get("configurable", {}).get(
configurable_key
),
str,
)
and configurable_key not in empty["metadata"]
not key.startswith("__")
and isinstance(value, (str, int, float, bool))
and key not in empty["metadata"]
and key != "api_key"
):
empty["metadata"][configurable_key] = configurable_value
empty["metadata"][key] = value
return empty
@@ -533,9 +508,6 @@ def get_callback_manager_for_config(config: RunnableConfig) -> CallbackManager:
inheritable_callbacks=config.get("callbacks"),
inheritable_tags=config.get("tags"),
inheritable_metadata=config.get("metadata"),
langsmith_inheritable_metadata=_get_langsmith_inheritable_metadata_from_config(
config
),
)
@@ -554,9 +526,6 @@ def get_async_callback_manager_for_config(
inheritable_callbacks=config.get("callbacks"),
inheritable_tags=config.get("tags"),
inheritable_metadata=config.get("metadata"),
langsmith_inheritable_metadata=_get_langsmith_inheritable_metadata_from_config(
config
),
)

View File

@@ -499,7 +499,7 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
# When invoking the created RunnableSequence, you can pass in the
# value for your ConfigurableField's id which in this case will either be
# `joke` or `poem`.
chain = prompt | ChatOpenAI(model="gpt-5.4-mini")
chain = prompt | ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
# The `with_config` method brings in the desired Prompt Runnable in your
# Runnable Sequence.
@@ -525,7 +525,7 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
"poem": PromptTemplate.from_template("Write a short poem about {topic}")
},
)
chain = prompt | ChatOpenAI(model="gpt-5.4-mini")
chain = prompt | ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
chain.with_config(configurable={"prompt": "poem"}).invoke({"topic": "bears"})
```
"""

View File

@@ -54,8 +54,8 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
from langchain_core.chat_models.openai import ChatOpenAI
from langchain_core.chat_models.anthropic import ChatAnthropic
model = ChatAnthropic(model="claude-sonnet-4-6").with_fallbacks(
[ChatOpenAI(model="gpt-5.4-mini")]
model = ChatAnthropic(model="claude-3-haiku-20240307").with_fallbacks(
[ChatOpenAI(model="gpt-3.5-turbo-0125")]
)
# Will usually use ChatAnthropic, but fallback to ChatOpenAI
# if ChatAnthropic fails.

View File

@@ -414,11 +414,6 @@ def _render_mermaid_using_api(
base_url: str | None = None,
) -> bytes:
"""Renders Mermaid graph using the Mermaid.INK API."""
# Automated scanners: please do not open security advisories about `base_url`
# here. This parameter is only ever explicitly controlled by developers, and it
# is not exposed by default over the network by the framework, so this is not
# an SSRF sink unless an application explicitly wires untrusted input into it.
# It exists to support user-managed Mermaid API deployments.
# Defaults to using the public mermaid.ink server.
base_url = base_url if base_url is not None else "https://mermaid.ink"

View File

@@ -558,16 +558,12 @@ class ConfigurableField(NamedTuple):
id: str
"""The unique identifier of the field."""
name: str | None = None
"""The name of the field. """
description: str | None = None
"""The description of the field. """
annotation: Any | None = None
"""The annotation of the field. """
is_shared: bool = False
"""Whether the field is shared."""
@@ -581,19 +577,14 @@ class ConfigurableFieldSingleOption(NamedTuple):
id: str
"""The unique identifier of the field."""
options: Mapping[str, Any]
"""The options for the field."""
default: str
"""The default value for the field."""
name: str | None = None
"""The name of the field. """
description: str | None = None
"""The description of the field. """
is_shared: bool = False
"""Whether the field is shared."""
@@ -607,19 +598,14 @@ class ConfigurableFieldMultiOption(NamedTuple):
id: str
"""The unique identifier of the field."""
options: Mapping[str, Any]
"""The options for the field."""
default: Sequence[str]
"""The default values for the field."""
name: str | None = None
"""The name of the field. """
description: str | None = None
"""The description of the field. """
is_shared: bool = False
"""Whether the field is shared."""
@@ -638,22 +624,16 @@ class ConfigurableFieldSpec(NamedTuple):
id: str
"""The unique identifier of the field."""
annotation: Any
"""The annotation of the field."""
name: str | None = None
"""The name of the field. """
description: str | None = None
"""The description of the field. """
default: Any = None
"""The default value for the field. """
is_shared: bool = False
"""Whether the field is shared."""
dependencies: list[str] | None = None
"""The dependencies of the field. """

View File

@@ -1,6 +1,6 @@
"""Tools are classes that an Agent uses to interact with the world.
Each tool has a description. Agent uses the description to choose the right tool for the
Each tool has a description. Agent uses the description to choose the righ tool for the
job.
"""

View File

@@ -47,15 +47,6 @@ class BaseTracer(_TracerCore, BaseCallbackHandler, ABC):
if not run.parent_run_id:
self._persist_run(run)
self.run_map.pop(str(run.id))
# If this run's parent was injected from an external tracing context
# (e.g. a langsmith @traceable), decrement its child refcount and
# remove it from run_map once the last child is done.
parent_id = str(run.parent_run_id) if run.parent_run_id else None
if parent_id and parent_id in self._external_run_ids:
self._external_run_ids[parent_id] -= 1
if self._external_run_ids[parent_id] <= 0:
self.run_map.pop(parent_id, None)
del self._external_run_ids[parent_id]
self._on_run_update(run)
def on_chat_model_start(
@@ -577,15 +568,6 @@ class AsyncBaseTracer(_TracerCore, AsyncCallbackHandler, ABC):
if not run.parent_run_id:
await self._persist_run(run)
self.run_map.pop(str(run.id))
# If this run's parent was injected from an external tracing context
# (e.g. a langsmith @traceable), decrement its child refcount and
# remove it from run_map once the last child is done.
parent_id = str(run.parent_run_id) if run.parent_run_id else None
if parent_id and parent_id in self._external_run_ids:
self._external_run_ids[parent_id] -= 1
if self._external_run_ids[parent_id] <= 0:
self.run_map.pop(parent_id, None)
del self._external_run_ids[parent_id]
await self._on_run_update(run)
@override

View File

@@ -51,9 +51,6 @@ class _TracerCore(ABC):
_schema_format: Literal[
"original", "streaming_events", "original+chat"
] = "original",
run_map: dict[str, Run] | None = None,
order_map: dict[UUID, tuple[UUID, str]] | None = None,
_external_run_ids: dict[str, int] | None = None,
**kwargs: Any,
) -> None:
"""Initialize the tracer.
@@ -73,9 +70,6 @@ class _TracerCore(ABC):
streaming events.
- `'original+chat'` is a format that is the same as `'original'` except
it does NOT raise an attribute error `on_chat_model_start`
run_map: Optional shared map of run ID to run.
order_map: Optional shared map of run ID to trace ordering data.
_external_run_ids: Optional shared set of externally injected run IDs.
**kwargs: Additional keyword arguments that will be passed to the
superclass.
"""
@@ -83,22 +77,12 @@ class _TracerCore(ABC):
self._schema_format = _schema_format # For internal use only API will change.
self.run_map = run_map if run_map is not None else {}
self.run_map: dict[str, Run] = {}
"""Map of run ID to run. Cleared on run end."""
self.order_map = order_map if order_map is not None else {}
self.order_map: dict[UUID, tuple[UUID, str]] = {}
"""Map of run ID to (trace_id, dotted_order). Cleared when tracer GCed."""
self._external_run_ids: dict[str, int] = (
_external_run_ids if _external_run_ids is not None else {}
)
"""Refcount of active children per externally-injected run ID.
These runs are added to `run_map` so child runs can find their parent,
but they are not managed by the tracer's callback lifecycle. When
the last child finishes the entry is evicted to avoid memory leaks.
"""
@abstractmethod
def _persist_run(self, run: Run) -> Coroutine[Any, Any, None] | None:
"""Persist a run."""
@@ -129,9 +113,6 @@ class _TracerCore(ABC):
run.dotted_order += "." + current_dotted_order
if parent_run := self.run_map.get(str(run.parent_run_id)):
self._add_child_run(parent_run, run)
parent_key = str(run.parent_run_id)
if parent_key in self._external_run_ids:
self._external_run_ids[parent_key] += 1
else:
if self.log_missing_parent:
logger.debug(

View File

@@ -27,8 +27,6 @@ from langchain_core.tracers.base import BaseTracer
from langchain_core.tracers.schemas import Run
if TYPE_CHECKING:
from collections.abc import Mapping
from langchain_core.messages import BaseMessage
from langchain_core.outputs import ChatGenerationChunk, GenerationChunk
@@ -36,22 +34,6 @@ logger = logging.getLogger(__name__)
_LOGGED = set()
_EXECUTOR: ThreadPoolExecutor | None = None
OVERRIDABLE_LANGSMITH_INHERITABLE_METADATA_KEYS: frozenset[str] = frozenset(
{"ls_agent_type"}
)
"""Allowlist of LangSmith-only tracing metadata keys that bypass the default
"first wins" merge semantics used when propagating tracer metadata to nested
runs.
Keys in this set are ALWAYS overridden by the nearest enclosing tracer config,
so nested callers (e.g. a subagent) can replace a value inherited from an
ancestor.
Keep this list very small: every key here loses the default "first wins"
protection and is always clobbered by the nearest enclosing tracer config.
Only keys that are strictly for LangSmith tracing bookkeeping should be added.
"""
def log_error_once(method: str, exception: Exception) -> None:
"""Log an error once.
@@ -142,8 +124,6 @@ class LangChainTracer(BaseTracer):
project_name: str | None = None,
client: Client | None = None,
tags: list[str] | None = None,
*,
metadata: Mapping[str, str] | None = None,
**kwargs: Any,
) -> None:
"""Initialize the LangChain tracer.
@@ -159,9 +139,6 @@ class LangChainTracer(BaseTracer):
tags: The tags.
Defaults to an empty list.
metadata: Additional metadata to include if it isn't already in the run.
Defaults to None.
**kwargs: Additional keyword arguments.
"""
super().__init__(**kwargs)
@@ -173,49 +150,6 @@ class LangChainTracer(BaseTracer):
self.tags = tags or []
self.latest_run: Run | None = None
self.run_has_token_event_map: dict[str, bool] = {}
self.tracing_metadata: dict[str, str] | None = (
dict(metadata) if metadata is not None else None
)
def copy_with_metadata_defaults(
self,
*,
metadata: Mapping[str, str] | None = None,
tags: list[str] | None = None,
) -> LangChainTracer:
"""Return a new tracer with merged tracer-only defaults."""
base_metadata = self.tracing_metadata
if metadata is None:
merged_metadata = dict(base_metadata) if base_metadata is not None else None
elif base_metadata is None:
merged_metadata = dict(metadata)
else:
merged_metadata = dict(base_metadata)
for key, value in metadata.items():
# For allowlisted LangSmith-only inheritable metadata keys
# (e.g. ``ls_agent_type``), nested callers are allowed to
# OVERRIDE the value inherited from an ancestor. For all
# other keys we keep the existing "first wins" behavior so
# that ancestor-provided tracing metadata is not accidentally
# clobbered by child runs.
if (
key not in merged_metadata
or key in OVERRIDABLE_LANGSMITH_INHERITABLE_METADATA_KEYS
):
merged_metadata[key] = value
merged_tags = sorted(set(self.tags + tags)) if tags else self.tags
return self.__class__(
example_id=self.example_id,
project_name=self.project_name,
client=self.client,
tags=merged_tags,
metadata=merged_metadata,
run_map=self.run_map,
order_map=self.order_map,
_external_run_ids=self._external_run_ids,
)
def _start_trace(self, run: Run) -> None:
if self.project_name:
@@ -329,7 +263,6 @@ class LangChainTracer(BaseTracer):
try:
run.extra["runtime"] = get_runtime_environment()
run.tags = self._get_tags(run)
_patch_missing_metadata(self, run)
if run.ls_client is not self.client:
run.ls_client = self.client
run.post()
@@ -465,26 +398,3 @@ class LangChainTracer(BaseTracer):
"""Wait for the given futures to complete."""
if self.client is not None:
self.client.flush()
def _patch_missing_metadata(self: LangChainTracer, run: Run) -> None:
if not self.tracing_metadata:
return
metadata = run.metadata
patched = None
for k, v in self.tracing_metadata.items():
# ``OVERRIDABLE_LANGSMITH_INHERITABLE_METADATA_KEYS`` are a small,
# LangSmith-only allowlist that bypasses the "first wins" merge
# so a nested caller (e.g. a subagent) can override a parent-set value.
if k not in metadata or k in OVERRIDABLE_LANGSMITH_INHERITABLE_METADATA_KEYS:
# Skip the copy when the value already matches (avoids cloning
# the shared dict in the common "already set" case). Use a
# ``k in metadata`` guard so a legitimate missing key whose
# tracer value happens to be ``None`` is still patched in.
if k in metadata and metadata[k] == v:
continue
if patched is None:
# Copy on first miss to avoid mutating the shared dict.
patched = {**metadata}
run.extra["metadata"] = patched
patched[k] = v

View File

@@ -199,6 +199,8 @@ def _convert_pydantic_to_openai_function(
" 1. Converting them to Pydantic models with JSON-compatible fields\n"
" 2. Using primitive types (str, int, float, bool, list, dict) instead\n"
" 3. Passing the data as serialized JSON strings\n\n"
"For more information, see: "
"https://python.langchain.com/docs/how_to/custom_tools/"
)
raise PydanticInvalidForJsonSchema(msg) from e
return _convert_json_schema_to_openai_function(
@@ -500,7 +502,6 @@ def convert_to_openai_function(
_WellKnownOpenAITools = (
"function",
"file_search",
"computer",
"computer_use_preview",
"code_interpreter",
"mcp",
@@ -571,7 +572,16 @@ def convert_to_openai_tool(
oai_tool["format"] = tool.metadata["format"]
return oai_tool
oai_function = convert_to_openai_function(tool, strict=strict)
return {"type": "function", "function": oai_function}
result: dict[str, Any] = {"type": "function", "function": oai_function}
if (
isinstance(tool, langchain_core.tools.base.BaseTool)
and hasattr(tool, "extras")
and isinstance(tool.extras, dict)
):
for key in ("defer_loading",):
if key in tool.extras:
result[key] = tool.extras[key]
return result
def convert_to_json_schema(

View File

@@ -242,12 +242,7 @@ def _create_subset_model_v2(
for field_name in field_names:
field = model.model_fields[field_name]
description = descriptions_.get(field_name, field.description)
field_kwargs: dict[str, Any] = {"description": description}
if field.default_factory is not None:
field_kwargs["default_factory"] = field.default_factory
else:
field_kwargs["default"] = field.default
field_info = FieldInfoV2(**field_kwargs)
field_info = FieldInfoV2(description=description, default=field.default)
if field.metadata:
field_info.metadata = field.metadata
fields[field_name] = (field.annotation, field_info)

View File

@@ -1,3 +1,3 @@
"""langchain-core version information and utilities."""
VERSION = "1.3.0"
VERSION = "1.2.17"

View File

@@ -21,7 +21,7 @@ classifiers = [
"Topic :: Software Development :: Libraries :: Python Modules",
]
version = "1.3.0"
version = "1.2.17"
requires-python = ">=3.10.0,<4.0.0"
dependencies = [
"langsmith>=0.3.45,<1.0.0",
@@ -58,12 +58,12 @@ dev = [
"grandalf>=0.8.0,<1.0.0",
]
test = [
"pytest>=9.0.3,<10.0.0",
"pytest>=8.0.0,<10.0.0",
"freezegun>=1.2.2,<2.0.0",
"pytest-mock>=3.10.0,<4.0.0",
"syrupy>=5.0.0,<6.0.0",
"syrupy>=4.0.2,<6.0.0",
"pytest-watcher>=0.3.4,<1.0.0",
"pytest-asyncio>=1.3.0,<2.0.0",
"pytest-asyncio>=0.21.1,<2.0.0",
"grandalf>=0.8.0,<1.0.0",
"responses>=0.25.0,<1.0.0",
"pytest-socket>=0.7.0,<1.0.0",
@@ -77,9 +77,6 @@ test = [
]
test_integration = []
[tool.uv]
constraint-dependencies = ["pygments>=2.20.0"] # CVE-2026-4539
[tool.uv.sources]
langchain-tests = { path = "../standard-tests" }
langchain-text-splitters = { path = "../text-splitters" }
@@ -135,10 +132,8 @@ ignore-var-parameters = true # ignore missing documentation for *args and **kwa
"langchain_core/utils/mustache.py" = [ "PLW0603",]
"langchain_core/sys_info.py" = [ "T201",]
"tests/unit_tests/test_tools.py" = [ "ARG",]
"tests/**" = [ "ARG", "D1", "PLR2004", "S", "SLF",]
"tests/**" = [ "D1", "PLR2004", "S", "SLF",]
"scripts/**" = [ "INP", "S", "T201",]
"langchain_core/_security/_policy.py" = [ "EM101", "EM102", "TRY003", "B008", "TRY300",]
"langchain_core/_security/_transport.py" = [ "EM101", "EM102", "TRY003", "TRY203", "B008",]
[tool.coverage.run]
omit = [ "tests/*",]

View File

@@ -6,9 +6,8 @@ set -eu
errors=0
# make sure not importing from langchain or langchain_experimental
# allow langchain.agents and langchain.tools (v1 middleware)
git --no-pager grep "^from langchain\." . | grep -v ":from langchain\.agents" | grep -v ":from langchain\.tools" && errors=$((errors+1))
git --no-pager grep "^from langchain_experimental\." . && errors=$((errors+1))
git --no-pager grep '^from langchain\.' . && errors=$((errors+1))
git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1))
# Decide on an exit status based on the errors
if [ "$errors" -gt 0 ]; then

View File

@@ -17,6 +17,9 @@ def blockbuster() -> Iterator[BlockBuster]:
bb.functions[func]
.can_block_in("langchain_core/_api/internal.py", "is_caller_internal")
.can_block_in("langchain_core/runnables/base.py", "__repr__")
.can_block_in(
"langchain_core/beta/runnables/context.py", "aconfig_with_context"
)
)
for func in ["os.stat", "io.TextIOWrapper.read"]:

View File

@@ -3,13 +3,10 @@
import uuid
import warnings
from collections.abc import AsyncIterator, Iterator
from contextlib import contextmanager
from typing import TYPE_CHECKING, Any, Literal
from unittest.mock import patch
import pytest
from pydantic import model_validator
from typing_extensions import Self, override
from typing_extensions import override
from langchain_core.callbacks import (
CallbackManagerForLLMRun,
@@ -19,19 +16,12 @@ from langchain_core.language_models import (
FakeListChatModel,
ParrotFakeChatModel,
)
from langchain_core.language_models._utils import (
_filter_invocation_params_for_tracing,
_normalize_messages,
)
from langchain_core.language_models.chat_models import (
SimpleChatModel,
_generate_response_from_error,
)
from langchain_core.language_models._utils import _normalize_messages
from langchain_core.language_models.chat_models import _generate_response_from_error
from langchain_core.language_models.fake_chat_models import (
FakeListChatModelError,
GenericFakeChatModel,
)
from langchain_core.language_models.model_profile import ModelProfile
from langchain_core.messages import (
AIMessage,
AIMessageChunk,
@@ -45,7 +35,6 @@ from langchain_core.tracers import LogStreamCallbackHandler
from langchain_core.tracers.base import BaseTracer
from langchain_core.tracers.context import collect_runs
from langchain_core.tracers.event_stream import _AstreamEventsCallbackHandler
from langchain_core.tracers.langchain import LangChainTracer
from langchain_core.tracers.schemas import Run
from tests.unit_tests.fake.callbacks import (
BaseFakeCallbackHandler,
@@ -330,20 +319,6 @@ class FakeTracer(BaseTracer):
self.traced_run_ids.append(run.id)
class LangChainTracerRunCollector:
def __init__(self) -> None:
self.tracer = LangChainTracer()
self.runs: list[Run] = []
@contextmanager
def tracing_callback(self) -> Iterator[LangChainTracer]:
def collect_tracer_run(_: LangChainTracer, run: Run) -> None:
self.runs.append(run)
with patch.object(LangChainTracer, "_persist_run", new=collect_tracer_run):
yield self.tracer
def test_pass_run_id() -> None:
llm = FakeListChatModel(responses=["a", "b", "c"])
cb = FakeTracer()
@@ -1255,76 +1230,6 @@ def test_model_profiles() -> None:
assert model_with_profile.profile == {"max_input_tokens": 100}
def test_resolve_model_profile_hook_populates_profile() -> None:
"""_resolve_model_profile is called when profile is None."""
class ResolverModel(GenericFakeChatModel):
def _resolve_model_profile(self) -> ModelProfile | None:
return {"max_input_tokens": 500}
model = ResolverModel(messages=iter([]))
assert model.profile == {"max_input_tokens": 500}
def test_resolve_model_profile_hook_skipped_when_explicit() -> None:
"""_resolve_model_profile is NOT called when profile is set explicitly."""
class ResolverModel(GenericFakeChatModel):
def _resolve_model_profile(self) -> ModelProfile | None:
return {"max_input_tokens": 500}
model = ResolverModel(messages=iter([]), profile={"max_input_tokens": 999})
assert model.profile is not None
assert model.profile["max_input_tokens"] == 999
def test_resolve_model_profile_hook_exception_is_caught() -> None:
"""Model is still usable if _resolve_model_profile raises."""
class BrokenProfileModel(GenericFakeChatModel):
def _resolve_model_profile(self) -> ModelProfile | None:
msg = "profile file not found"
raise RuntimeError(msg)
with warnings.catch_warnings(record=True):
warnings.simplefilter("always")
model = BrokenProfileModel(messages=iter([]))
assert model.profile is None
def test_check_profile_keys_runs_despite_partner_override() -> None:
"""Verify _check_profile_keys fires even when _set_model_profile is overridden.
Because _check_profile_keys has a distinct validator name from
_set_model_profile, a partner override of the latter does not suppress
the key-checking validator.
"""
class PartnerModel(GenericFakeChatModel):
"""Simulates a partner that overrides _set_model_profile."""
@model_validator(mode="after")
def _set_model_profile(self) -> Self:
if self.profile is None:
profile: dict[str, Any] = {
"max_input_tokens": 100,
"partner_only_field": True,
}
self.profile = profile # type: ignore[assignment]
return self
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
model = PartnerModel(messages=iter([]))
assert model.profile is not None
assert model.profile.get("partner_only_field") is True
profile_warnings = [x for x in w if "Unrecognized keys" in str(x.message)]
assert len(profile_warnings) == 1
assert "partner_only_field" in str(profile_warnings[0].message)
class MockResponse:
"""Mock response for testing _generate_response_from_error."""
@@ -1413,100 +1318,3 @@ def test_generate_response_from_error_handles_streaming_response_failure() -> No
assert metadata["body"] is None
assert metadata["headers"] == {"content-type": "application/json"}
assert metadata["status_code"] == 400
def test_filter_invocation_params_for_tracing() -> None:
"""Test that large fields are filtered from invocation params for tracing."""
params = {
"temperature": 0.7,
"tools": [{"name": "test_tool"}],
"functions": [{"name": "test_function"}],
"messages": [{"role": "system", "content": "test"}],
"response_format": {"type": "json_object"},
}
filtered = _filter_invocation_params_for_tracing(params)
# Should include temperature
assert "temperature" in filtered
assert filtered["temperature"] == 0.7
# Should exclude these large fields
assert "tools" not in filtered
assert "functions" not in filtered
assert "messages" not in filtered
assert "response_format" not in filtered
class FakeChatModelWithInvocationParams(SimpleChatModel):
"""Fake chat model with invocation params for testing tracing."""
temperature: float = 0.7
@property
@override
def _llm_type(self) -> str:
return "fake-chat-model-with-invocation-params"
@property
@override
def _identifying_params(self) -> dict[str, Any]:
return {
"temperature": self.temperature,
"tools": [{"name": "test_tool"}],
"functions": [{"name": "test_function"}],
"messages": [{"role": "system", "content": "test"}],
"response_format": {"type": "json_object"},
}
@override
def _call(
self,
messages: list[BaseMessage],
stop: list[str] | None = None,
run_manager: CallbackManagerForLLMRun | None = None,
**kwargs: Any,
) -> str:
return "test response"
def test_invocation_params_passed_to_tracer_metadata() -> None:
"""Test that invocation params are passed to tracer metadata."""
llm = FakeChatModelWithInvocationParams()
collector = LangChainTracerRunCollector()
with collector.tracing_callback() as tracer:
llm.invoke([HumanMessage(content="Hello")], config={"callbacks": [tracer]})
assert len(collector.runs) == 1
run = collector.runs[0]
key = "LANGSMITH_LANGGRAPH_API_VARIANT"
if key in run.extra["metadata"]:
del run.extra["metadata"][key]
assert run.extra == {
"batch_size": 1,
"invocation_params": {
"_type": "fake-chat-model-with-invocation-params",
"functions": [{"name": "test_function"}],
"messages": [{"content": "test", "role": "system"}],
"response_format": {"type": "json_object"},
"stop": None,
"temperature": 0.7,
"tools": [{"name": "test_tool"}],
},
"metadata": {
"_type": "fake-chat-model-with-invocation-params",
"ls_integration": "langchain_chat_model",
"ls_model_type": "chat",
"ls_provider": "fakechatmodelwithinvocationparams",
"ls_temperature": 0.7,
"revision_id": run.extra["metadata"]["revision_id"],
"stop": None,
"temperature": 0.7,
},
"options": {"stop": None},
"runtime": run.extra["runtime"],
}
assert run.metadata == run.extra["metadata"]

View File

@@ -13,7 +13,6 @@ from langchain_core.language_models import (
BaseLLM,
FakeListLLM,
)
from langchain_core.language_models._utils import _filter_invocation_params_for_tracing
from langchain_core.outputs import Generation, GenerationChunk, LLMResult
from langchain_core.tracers.context import collect_runs
from tests.unit_tests.fake.callbacks import (
@@ -285,94 +284,3 @@ def test_get_ls_params() -> None:
ls_params = llm._get_ls_params(stop=["stop"])
assert ls_params["ls_stop"] == ["stop"]
def test_filter_invocation_params_for_tracing() -> None:
"""Test that large fields are filtered from invocation params for tracing."""
params = {
"temperature": 0.7,
"tools": [{"name": "test_tool"}],
"functions": [{"name": "test_function"}],
"messages": [{"role": "system", "content": "test"}],
"response_format": {"type": "json_object"},
}
filtered = _filter_invocation_params_for_tracing(params)
# Should include temperature
assert "temperature" in filtered
assert filtered["temperature"] == 0.7
# Should exclude these large fields
assert "tools" not in filtered
assert "functions" not in filtered
assert "messages" not in filtered
assert "response_format" not in filtered
class FakeLLMWithInvocationParams(BaseLLM):
"""Fake LLM with invocation params for testing tracing."""
temperature: float = 0.7
@property
@override
def _llm_type(self) -> str:
return "fake-llm-with-invocation-params"
@property
@override
def _identifying_params(self) -> dict[str, Any]:
return {
"temperature": self.temperature,
"tools": [{"name": "test_tool"}],
"functions": [{"name": "test_function"}],
"messages": [{"role": "system", "content": "test"}],
"response_format": {"type": "json_object"},
}
@override
def _generate(
self,
prompts: list[str],
stop: list[str] | None = None,
run_manager: CallbackManagerForLLMRun | None = None,
**kwargs: Any,
) -> LLMResult:
generations = [[Generation(text="test response")]]
return LLMResult(generations=generations)
@override
async def _agenerate(
self,
prompts: list[str],
stop: list[str] | None = None,
run_manager: AsyncCallbackManagerForLLMRun | None = None,
**kwargs: Any,
) -> LLMResult:
generations = [[Generation(text="test response")]]
return LLMResult(generations=generations)
async def test_llm_invocation_params_filtered_in_stream() -> None:
"""Test that invocation params are filtered when streaming."""
# Create a custom LLM that supports streaming
class FakeStreamingLLM(FakeLLMWithInvocationParams):
@override
def _stream(
self,
prompt: str,
stop: list[str] | None = None,
run_manager: CallbackManagerForLLMRun | None = None,
**kwargs: Any,
) -> Iterator[GenerationChunk]:
yield GenerationChunk(text="test ")
streaming_llm = FakeStreamingLLM()
with collect_runs() as cb:
list(streaming_llm.stream("Hello", config={"callbacks": [cb]}))
assert len(cb.traced_runs) == 1
run = cb.traced_runs[0]
# Verify the run was traced
assert run.extra is not None

View File

@@ -1,87 +0,0 @@
"""Tests for model profile types and utilities."""
import warnings
from typing import Any
from unittest.mock import patch
from pydantic import BaseModel, ConfigDict, Field
from langchain_core.language_models.model_profile import (
ModelProfile,
_warn_unknown_profile_keys,
)
class TestModelProfileExtraAllow:
"""Verify extra='allow' on ModelProfile TypedDict."""
def test_accepts_declared_keys(self) -> None:
profile: ModelProfile = {"max_input_tokens": 100, "tool_calling": True}
assert profile["max_input_tokens"] == 100
def test_extra_keys_accepted_via_typed_dict(self) -> None:
"""ModelProfile TypedDict allows extra keys at construction."""
profile = ModelProfile(
max_input_tokens=100,
unknown_future_field="value", # type: ignore[typeddict-unknown-key]
)
assert profile["unknown_future_field"] == "value" # type: ignore[typeddict-item]
def test_extra_keys_survive_pydantic_validation(self) -> None:
"""Extra keys pass through even when parent model forbids extras."""
class StrictModel(BaseModel):
model_config = ConfigDict(extra="forbid")
profile: ModelProfile | None = Field(default=None)
m = StrictModel(
profile={
"max_input_tokens": 100,
"unknown_future_field": True,
}
)
assert m.profile is not None
assert m.profile.get("unknown_future_field") is True
class TestWarnUnknownProfileKeys:
"""Tests for _warn_unknown_profile_keys."""
def test_warns_on_extra_keys(self) -> None:
profile: dict[str, Any] = {
"max_input_tokens": 100,
"future_field": True,
"another": "val",
}
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
_warn_unknown_profile_keys(profile) # type: ignore[arg-type]
assert len(w) == 1
assert "another" in str(w[0].message)
assert "future_field" in str(w[0].message)
assert "upgrading langchain-core" in str(w[0].message)
def test_silent_on_declared_keys_only(self) -> None:
profile: ModelProfile = {"max_input_tokens": 100, "tool_calling": True}
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
_warn_unknown_profile_keys(profile)
assert len(w) == 0
def test_silent_on_empty_profile(self) -> None:
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
_warn_unknown_profile_keys({})
assert len(w) == 0
def test_survives_get_type_hints_failure(self) -> None:
"""Falls back to silent skip on TypeError from get_type_hints."""
profile: dict[str, Any] = {"max_input_tokens": 100, "extra": True}
with patch(
"langchain_core.language_models.model_profile.get_type_hints",
side_effect=TypeError("broken"),
):
_warn_unknown_profile_keys(profile) # type: ignore[arg-type]

View File

@@ -1,4 +1,3 @@
import contextlib
import json
from typing import Any
@@ -7,9 +6,7 @@ from pydantic import BaseModel, ConfigDict, Field, SecretStr
from langchain_core.documents import Document
from langchain_core.load import InitValidator, Serializable, dumpd, dumps, load, loads
from langchain_core.load.load import ALL_SERIALIZABLE_MAPPINGS
from langchain_core.load.serializable import _is_field_useful
from langchain_core.load.validators import CLASS_INIT_VALIDATORS, _bedrock_validator
from langchain_core.messages import AIMessage
from langchain_core.outputs import ChatGeneration, Generation
from langchain_core.prompts import (
@@ -894,267 +891,3 @@ class TestJinja2SecurityBlocking:
# jinja2 should be blocked by default
with pytest.raises(ValueError, match="Jinja2 templates are not allowed"):
load(serialized_jinja2, allowed_objects=[PromptTemplate])
class TestClassSpecificValidatorsInLoad:
"""Tests that load() properly integrates with class-specific validators."""
def test_validator_registry_keys_in_serializable_mapping(self) -> None:
"""All CLASS_INIT_VALIDATORS keys must exist in ALL_SERIALIZABLE_MAPPINGS."""
all_known_paths = set(ALL_SERIALIZABLE_MAPPINGS.keys()) | set(
ALL_SERIALIZABLE_MAPPINGS.values()
)
for key in CLASS_INIT_VALIDATORS:
assert key in all_known_paths, (
f"{key} in CLASS_INIT_VALIDATORS but not in "
f"ALL_SERIALIZABLE_MAPPINGS keys or values"
)
def test_init_validator_still_called_without_class_validator(self) -> None:
"""Test init_validator fires for classes without a class-specific validator."""
msg = AIMessage(content="test")
serialized = dumpd(msg)
init_validator_called = []
def custom_init_validator(
_class_path: tuple[str, ...], _kwargs: dict[str, Any]
) -> None:
init_validator_called.append(True)
loaded = load(
serialized,
allowed_objects=[AIMessage],
init_validator=custom_init_validator,
)
assert loaded == msg
assert len(init_validator_called) == 1
def test_load_blocks_bedrock_with_endpoint_url(self) -> None:
"""Test that load() blocks Bedrock deserialization with `endpoint_url`."""
payload = {
"lc": 1,
"type": "constructor",
"id": ["langchain", "chat_models", "bedrock", "ChatBedrock"],
"kwargs": {
"model_id": "anthropic.claude-v2",
"endpoint_url": "http://169.254.169.254/latest/meta-data",
},
}
with pytest.raises(ValueError, match="SSRF"):
load(payload, allowed_objects="all")
def test_load_blocks_bedrock_chat_legacy_alias(self) -> None:
"""Test that load() blocks BedrockChat (legacy alias) with `endpoint_url`."""
payload = {
"lc": 1,
"type": "constructor",
"id": ["langchain", "chat_models", "bedrock", "BedrockChat"],
"kwargs": {
"model_id": "anthropic.claude-v2",
"endpoint_url": "http://169.254.169.254/latest/meta-data",
},
}
with pytest.raises(ValueError, match="SSRF"):
load(payload, allowed_objects="all")
def test_load_blocks_bedrock_converse_with_base_url(self) -> None:
"""Test that load() blocks ChatBedrockConverse with `base_url`."""
payload = {
"lc": 1,
"type": "constructor",
"id": ["langchain_aws", "chat_models", "ChatBedrockConverse"],
"kwargs": {
"model": "anthropic.claude-v2",
"base_url": "http://malicious-site.com",
},
}
with pytest.raises(ValueError, match="SSRF"):
load(payload, allowed_objects="all")
def test_load_blocks_anthropic_bedrock_legacy_alias(self) -> None:
"""Test load() blocks ChatAnthropicBedrock with `endpoint_url`."""
payload = {
"lc": 1,
"type": "constructor",
"id": [
"langchain",
"chat_models",
"anthropic_bedrock",
"ChatAnthropicBedrock",
],
"kwargs": {
"model_id": "anthropic.claude-v2",
"endpoint_url": "http://169.254.169.254/latest/meta-data",
},
}
with pytest.raises(ValueError, match="SSRF"):
load(payload, allowed_objects="all")
def test_load_blocks_anthropic_bedrock_via_resolved_path(self) -> None:
"""Test load() blocks ChatAnthropicBedrock via resolved import path."""
payload = {
"lc": 1,
"type": "constructor",
"id": [
"langchain_aws",
"chat_models",
"anthropic",
"ChatAnthropicBedrock",
],
"kwargs": {
"model_id": "anthropic.claude-v2",
"base_url": "http://malicious-site.com",
},
}
with pytest.raises(ValueError, match="SSRF"):
load(payload, allowed_objects="all")
def test_load_blocks_bedrock_via_resolved_import_path(self) -> None:
"""Test load() blocks Bedrock via resolved import path (bypass defense)."""
payload = {
"lc": 1,
"type": "constructor",
"id": [
"langchain_aws",
"chat_models",
"bedrock_converse",
"ChatBedrockConverse",
],
"kwargs": {
"model": "anthropic.claude-v2",
"endpoint_url": "http://169.254.169.254/latest/meta-data",
},
}
with pytest.raises(ValueError, match="SSRF"):
load(payload, allowed_objects="all")
def test_both_class_and_general_validators_fire(self) -> None:
"""Test both class-specific and general init_validator fire together."""
payload = {
"lc": 1,
"type": "constructor",
"id": ["langchain", "llms", "bedrock", "Bedrock"],
"kwargs": {
"model_id": "anthropic.claude-v2",
"region_name": "us-west-2",
},
}
init_validator_called: list[bool] = []
def custom_init_validator(
_class_path: tuple[str, ...], _kwargs: dict[str, Any]
) -> None:
init_validator_called.append(True)
# May fail at import time if langchain_aws not installed, that's OK.
# We only care that the init_validator was called before that point.
with contextlib.suppress(ModuleNotFoundError):
load(
payload,
allowed_objects="all",
init_validator=custom_init_validator,
)
assert len(init_validator_called) == 1
def test_load_blocks_bedrock_llm_via_resolved_path(self) -> None:
"""Test load() blocks BedrockLLM via resolved import path."""
payload = {
"lc": 1,
"type": "constructor",
"id": ["langchain_aws", "llms", "bedrock", "BedrockLLM"],
"kwargs": {
"model_id": "anthropic.claude-v2",
"endpoint_url": "http://169.254.169.254/latest/meta-data",
},
}
with pytest.raises(ValueError, match="SSRF"):
load(payload, allowed_objects="all")
def test_load_blocks_chat_bedrock_via_resolved_path(self) -> None:
"""Test load() blocks ChatBedrock via resolved JS import path."""
payload = {
"lc": 1,
"type": "constructor",
"id": ["langchain_aws", "chat_models", "ChatBedrock"],
"kwargs": {
"model_id": "anthropic.claude-v2",
"base_url": "http://malicious-site.com",
},
}
with pytest.raises(ValueError, match="SSRF"):
load(payload, allowed_objects="all")
def test_class_validator_fires_with_init_validator_none(self) -> None:
"""Class-specific validators cannot be bypassed via init_validator=None."""
payload = {
"lc": 1,
"type": "constructor",
"id": ["langchain", "chat_models", "bedrock", "ChatBedrock"],
"kwargs": {
"model_id": "anthropic.claude-v2",
"endpoint_url": "http://169.254.169.254/latest/meta-data",
},
}
with pytest.raises(ValueError, match="SSRF"):
load(payload, allowed_objects="all", init_validator=None)
class TestBedrockValidators:
"""Tests for Bedrock SSRF protection validator."""
def test_bedrock_validator_blocks_endpoint_url(self) -> None:
"""Test that `_bedrock_validator` blocks `endpoint_url` parameter."""
class_path = ("langchain", "llms", "bedrock", "BedrockLLM")
kwargs = {
"model_id": "us.anthropic.claude-sonnet-4-5-20250929-v1:0",
"region_name": "us-west-2",
"endpoint_url": "http://169.254.169.254/latest/meta-data",
}
with pytest.raises(ValueError, match=r"endpoint_url.*SSRF"):
_bedrock_validator(class_path, kwargs)
def test_bedrock_validator_blocks_base_url(self) -> None:
"""Test that `_bedrock_validator` blocks `base_url` parameter."""
class_path = ("langchain_aws", "chat_models", "ChatBedrockConverse")
kwargs = {
"model": "us.anthropic.claude-sonnet-4-5-20250929-v1:0",
"region_name": "us-west-2",
"base_url": "http://malicious-site.com",
}
with pytest.raises(ValueError, match=r"base_url.*SSRF"):
_bedrock_validator(class_path, kwargs)
def test_bedrock_validator_blocks_both_parameters(self) -> None:
"""Test that `_bedrock_validator` blocks when both params are present."""
class_path = ("langchain", "chat_models", "bedrock", "ChatBedrock")
kwargs = {
"model_id": "us.anthropic.claude-sonnet-4-5-20250929-v1:0",
"region_name": "us-west-2",
"endpoint_url": "http://attacker.com",
"base_url": "http://another-attacker.com",
}
with pytest.raises(ValueError, match="SSRF") as exc_info:
_bedrock_validator(class_path, kwargs)
error_msg = str(exc_info.value)
assert "endpoint_url" in error_msg
assert "base_url" in error_msg
def test_bedrock_validator_allows_safe_parameters(self) -> None:
"""Test that `_bedrock_validator` allows safe parameters through."""
class_path = ("langchain", "llms", "bedrock", "Bedrock")
kwargs = {
"model_id": "us.anthropic.claude-sonnet-4-5-20250929-v1:0",
"region_name": "us-west-2",
"credentials_profile_name": "default",
"streaming": True,
"model_kwargs": {"temperature": 0.7},
}
_bedrock_validator(class_path, kwargs)

View File

@@ -815,7 +815,7 @@ def test_parse_with_different_pydantic_2_v1() -> None:
temperature: int
forecast: str
# Can't get pydantic to work here due to the odd typing of trying to support
# Can't get pydantic to work here due to the odd typing of tryig to support
# both v1 and v2 in the same codebase.
parser = PydanticToolsParser(tools=[Forecast])
message = AIMessage(
@@ -848,7 +848,7 @@ def test_parse_with_different_pydantic_2_proper() -> None:
temperature: int
forecast: str
# Can't get pydantic to work here due to the odd typing of trying to support
# Can't get pydantic to work here due to the odd typing of tryig to support
# both v1 and v2 in the same codebase.
parser = PydanticToolsParser(tools=[Forecast])
message = AIMessage(

View File

@@ -1951,24 +1951,6 @@ def test_fstring_rejects_invalid_identifier_variable_names() -> None:
assert result.messages[0].content == expected # type: ignore[attr-defined]
def test_fstring_rejects_nested_replacement_field_in_image_url() -> None:
with pytest.raises(ValueError, match="Nested replacement fields are not allowed"):
ChatPromptTemplate.from_messages(
[
(
"human",
[
{
"type": "image_url",
"image_url": {"url": "{img:{img.__class__.__name__}}"},
}
],
)
],
template_format="f-string",
)
def test_mustache_template_attribute_access_vulnerability() -> None:
"""Test that Mustache template injection is blocked.

View File

@@ -1,9 +1,4 @@
import json
import pytest
from langchain_core.load import load, loads
from langchain_core.prompts import PromptTemplate
from langchain_core.load import load
from langchain_core.prompts.dict import DictPromptTemplate
@@ -37,82 +32,3 @@ def test_deserialize_legacy() -> None:
template={"type": "audio", "audio": "{audio_data}"}, template_format="f-string"
)
assert load(ser, allowed_objects=[DictPromptTemplate]) == expected
def test_dict_prompt_template_rejects_attribute_access_to_rich_objects() -> None:
with pytest.raises(ValueError, match="Variable names cannot contain attribute"):
DictPromptTemplate(
template={"output": "{message.additional_kwargs[secret]}"},
template_format="f-string",
)
def test_dict_prompt_template_loads_payload_rejects_attribute_access() -> None:
payload = json.dumps(
{
"lc": 1,
"type": "constructor",
"id": ["langchain_core", "prompts", "dict", "DictPromptTemplate"],
"kwargs": {
"template": {"output": "{message.additional_kwargs[secret]}"},
"template_format": "f-string",
},
}
)
with pytest.raises(ValueError, match="Variable names cannot contain attribute"):
loads(payload)
def test_dict_prompt_template_dumpd_round_trip_rejects_attribute_access() -> None:
payload = {
"lc": 1,
"type": "constructor",
"id": ["langchain_core", "prompts", "dict", "DictPromptTemplate"],
"kwargs": {
"template": {"output": "{message.additional_kwargs[secret]}"},
"template_format": "f-string",
},
}
with pytest.raises(ValueError, match="Variable names cannot contain attribute"):
load(payload, allowed_objects=[DictPromptTemplate])
def test_dict_prompt_template_deserialization_rejects_attribute_access() -> None:
payload = json.dumps(
{
"lc": 1,
"type": "constructor",
"id": ["langchain_core", "prompts", "dict", "DictPromptTemplate"],
"kwargs": {
"template": {"output": "{name.__class__.__name__}"},
"template_format": "f-string",
},
}
)
with pytest.raises(ValueError, match="Variable names cannot contain attribute"):
loads(payload)
def test_dict_prompt_template_legacy_deserialization_rejects_attribute_access() -> None:
ser = {
"type": "constructor",
"lc": 1,
"id": ["langchain_core", "prompts", "message", "_DictMessagePromptTemplate"],
"kwargs": {
"template_format": "f-string",
"template": {"output": "{name.__class__.__name__}"},
},
}
with pytest.raises(ValueError, match="Variable names cannot contain attribute"):
load(ser, allowed_objects=[DictPromptTemplate])
def test_prompt_template_blocks_attribute_access() -> None:
with pytest.raises(
ValueError, match="Variable names cannot contain attribute access"
):
PromptTemplate.from_template("{name.__class__}", template_format="f-string")

View File

@@ -1,10 +1,7 @@
import json
import pytest
from langchain_core.load import dump, loads
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts.image import ImagePromptTemplate
def test_image_prompt_template_deserializable() -> None:
@@ -110,31 +107,3 @@ def test_image_prompt_template_deserializable_old() -> None:
}
),
)
def test_image_prompt_template_rejects_attribute_access_in_template_values() -> None:
with pytest.raises(ValueError, match="Variable names cannot contain attribute"):
ImagePromptTemplate(
input_variables=["image"],
template={"url": "https://example.com/{image.__class__.__name__}.png"},
)
def test_image_prompt_template_deserialization_rejects_attribute_access() -> None:
payload = json.dumps(
{
"lc": 1,
"type": "constructor",
"id": ["langchain", "prompts", "image", "ImagePromptTemplate"],
"kwargs": {
"template": {
"url": "https://example.com/{image.__class__.__name__}.png"
},
"input_variables": ["image"],
"template_format": "f-string",
},
}
)
with pytest.raises(ValueError, match="Variable names cannot contain attribute"):
loads(payload)

View File

@@ -1,6 +1,5 @@
"""Test loading functionality."""
import json
import os
from collections.abc import Iterator
from contextlib import contextmanager
@@ -8,14 +7,8 @@ from pathlib import Path
import pytest
from langchain_core._api import suppress_langchain_deprecation_warning
from langchain_core.prompts.few_shot import FewShotPromptTemplate
from langchain_core.prompts.loading import (
_load_examples,
_load_template,
load_prompt,
load_prompt_from_config,
)
from langchain_core.prompts.loading import load_prompt
from langchain_core.prompts.prompt import PromptTemplate
EXAMPLE_DIR = (Path(__file__).parent.parent / "examples").absolute()
@@ -34,8 +27,7 @@ def change_directory(dir_path: Path) -> Iterator[None]:
def test_loading_from_yaml() -> None:
"""Test loading from yaml file."""
with suppress_langchain_deprecation_warning():
prompt = load_prompt(EXAMPLE_DIR / "simple_prompt.yaml")
prompt = load_prompt(EXAMPLE_DIR / "simple_prompt.yaml")
expected_prompt = PromptTemplate(
input_variables=["adjective"],
partial_variables={"content": "dogs"},
@@ -46,8 +38,7 @@ def test_loading_from_yaml() -> None:
def test_loading_from_json() -> None:
"""Test loading from json file."""
with suppress_langchain_deprecation_warning():
prompt = load_prompt(EXAMPLE_DIR / "simple_prompt.json")
prompt = load_prompt(EXAMPLE_DIR / "simple_prompt.json")
expected_prompt = PromptTemplate(
input_variables=["adjective", "content"],
template="Tell me a {adjective} joke about {content}.",
@@ -58,20 +49,14 @@ def test_loading_from_json() -> None:
def test_loading_jinja_from_json() -> None:
"""Test that loading jinja2 format prompts from JSON raises ValueError."""
prompt_path = EXAMPLE_DIR / "jinja_injection_prompt.json"
with (
suppress_langchain_deprecation_warning(),
pytest.raises(ValueError, match=r".*can lead to arbitrary code execution.*"),
):
with pytest.raises(ValueError, match=r".*can lead to arbitrary code execution.*"):
load_prompt(prompt_path)
def test_loading_jinja_from_yaml() -> None:
"""Test that loading jinja2 format prompts from YAML raises ValueError."""
prompt_path = EXAMPLE_DIR / "jinja_injection_prompt.yaml"
with (
suppress_langchain_deprecation_warning(),
pytest.raises(ValueError, match=r".*can lead to arbitrary code execution.*"),
):
with pytest.raises(ValueError, match=r".*can lead to arbitrary code execution.*"):
load_prompt(prompt_path)
@@ -81,9 +66,8 @@ def test_saving_loading_round_trip(tmp_path: Path) -> None:
input_variables=["adjective", "content"],
template="Tell me a {adjective} joke about {content}.",
)
with suppress_langchain_deprecation_warning():
simple_prompt.save(file_path=tmp_path / "prompt.yaml")
loaded_prompt = load_prompt(tmp_path / "prompt.yaml")
simple_prompt.save(file_path=tmp_path / "prompt.yaml")
loaded_prompt = load_prompt(tmp_path / "prompt.yaml")
assert loaded_prompt == simple_prompt
few_shot_prompt = FewShotPromptTemplate(
@@ -99,18 +83,15 @@ def test_saving_loading_round_trip(tmp_path: Path) -> None:
],
suffix="Input: {adjective}\nOutput:",
)
with suppress_langchain_deprecation_warning():
few_shot_prompt.save(file_path=tmp_path / "few_shot.yaml")
loaded_prompt = load_prompt(tmp_path / "few_shot.yaml")
few_shot_prompt.save(file_path=tmp_path / "few_shot.yaml")
loaded_prompt = load_prompt(tmp_path / "few_shot.yaml")
assert loaded_prompt == few_shot_prompt
def test_loading_with_template_as_file() -> None:
"""Test loading when the template is a file."""
with change_directory(EXAMPLE_DIR), suppress_langchain_deprecation_warning():
prompt = load_prompt(
"simple_prompt_with_template_file.json", allow_dangerous_paths=True
)
with change_directory(EXAMPLE_DIR):
prompt = load_prompt("simple_prompt_with_template_file.json")
expected_prompt = PromptTemplate(
input_variables=["adjective", "content"],
template="Tell me a {adjective} joke about {content}.",
@@ -118,233 +99,10 @@ def test_loading_with_template_as_file() -> None:
assert prompt == expected_prompt
def test_load_template_rejects_absolute_path(tmp_path: Path) -> None:
secret = tmp_path / "secret.txt"
secret.write_text("SECRET")
config = {"template_path": str(secret)}
with pytest.raises(ValueError, match="is absolute"):
_load_template("template", config)
def test_load_template_rejects_traversal() -> None:
config = {"template_path": "../../etc/secret.txt"}
with pytest.raises(ValueError, match=r"contains '\.\.' components"):
_load_template("template", config)
def test_load_template_allows_dangerous_paths_when_opted_in(tmp_path: Path) -> None:
secret = tmp_path / "secret.txt"
secret.write_text("SECRET")
config = {"template_path": str(secret)}
result = _load_template("template", config, allow_dangerous_paths=True)
assert result["template"] == "SECRET"
def test_load_examples_rejects_absolute_path(tmp_path: Path) -> None:
examples_file = tmp_path / "examples.json"
examples_file.write_text(json.dumps([{"input": "a", "output": "b"}]))
config = {"examples": str(examples_file)}
with pytest.raises(ValueError, match="is absolute"):
_load_examples(config)
def test_load_examples_rejects_traversal() -> None:
config = {"examples": "../../secrets/data.json"}
with pytest.raises(ValueError, match=r"contains '\.\.' components"):
_load_examples(config)
def test_load_examples_allows_dangerous_paths_when_opted_in(tmp_path: Path) -> None:
examples_file = tmp_path / "examples.json"
examples_file.write_text(json.dumps([{"input": "a", "output": "b"}]))
config = {"examples": str(examples_file)}
result = _load_examples(config, allow_dangerous_paths=True)
assert result["examples"] == [{"input": "a", "output": "b"}]
def test_load_prompt_from_config_rejects_absolute_template_path(
tmp_path: Path,
) -> None:
secret = tmp_path / "secret.txt"
secret.write_text("SECRET")
config = {
"_type": "prompt",
"template_path": str(secret),
"input_variables": [],
}
with (
suppress_langchain_deprecation_warning(),
pytest.raises(ValueError, match="is absolute"),
):
load_prompt_from_config(config)
def test_load_prompt_from_config_rejects_traversal_template_path() -> None:
config = {
"_type": "prompt",
"template_path": "../../../tmp/secret.txt",
"input_variables": [],
}
with (
suppress_langchain_deprecation_warning(),
pytest.raises(ValueError, match=r"contains '\.\.' components"),
):
load_prompt_from_config(config)
def test_load_prompt_from_config_allows_dangerous_paths(tmp_path: Path) -> None:
secret = tmp_path / "secret.txt"
secret.write_text("SECRET")
config = {
"_type": "prompt",
"template_path": str(secret),
"input_variables": [],
}
with suppress_langchain_deprecation_warning():
prompt = load_prompt_from_config(config, allow_dangerous_paths=True)
assert isinstance(prompt, PromptTemplate)
assert prompt.template == "SECRET"
def test_load_prompt_from_config_few_shot_rejects_traversal_examples() -> None:
config = {
"_type": "few_shot",
"input_variables": ["query"],
"prefix": "Examples:",
"example_prompt": {
"_type": "prompt",
"input_variables": ["input", "output"],
"template": "{input}: {output}",
},
"examples": "../../../../.docker/config.json",
"suffix": "Query: {query}",
}
with (
suppress_langchain_deprecation_warning(),
pytest.raises(ValueError, match=r"contains '\.\.' components"),
):
load_prompt_from_config(config)
def test_load_prompt_from_config_few_shot_rejects_absolute_examples(
tmp_path: Path,
) -> None:
examples_file = tmp_path / "examples.json"
examples_file.write_text(json.dumps([{"input": "a", "output": "b"}]))
config = {
"_type": "few_shot",
"input_variables": ["query"],
"prefix": "Examples:",
"example_prompt": {
"_type": "prompt",
"input_variables": ["input", "output"],
"template": "{input}: {output}",
},
"examples": str(examples_file),
"suffix": "Query: {query}",
}
with (
suppress_langchain_deprecation_warning(),
pytest.raises(ValueError, match="is absolute"),
):
load_prompt_from_config(config)
def test_load_prompt_from_config_few_shot_rejects_absolute_example_prompt_path(
tmp_path: Path,
) -> None:
prompt_file = tmp_path / "prompt.json"
prompt_file.write_text(
json.dumps(
{
"_type": "prompt",
"template": "{input}: {output}",
"input_variables": ["input", "output"],
}
)
)
config = {
"_type": "few_shot",
"input_variables": ["query"],
"prefix": "Examples:",
"example_prompt_path": str(prompt_file),
"examples": [{"input": "a", "output": "b"}],
"suffix": "Query: {query}",
}
with (
suppress_langchain_deprecation_warning(),
pytest.raises(ValueError, match="is absolute"),
):
load_prompt_from_config(config)
def test_symlink_txt_to_py_is_blocked(tmp_path: Path) -> None:
"""Test symlink redirects cannot get around file extension check."""
sensitive = tmp_path / "sensitive_source.py"
sensitive.write_text("INTERNAL_SECRET='ABC-123-XYZ'")
symlink = tmp_path / "exploit_link.txt"
symlink.symlink_to(sensitive)
config = {
"_type": "prompt",
"template_path": "exploit_link.txt",
"input_variables": [],
}
original_dir = Path.cwd()
try:
os.chdir(tmp_path)
with (
suppress_langchain_deprecation_warning(),
pytest.raises(ValueError), # noqa: PT011
):
load_prompt_from_config(config)
finally:
os.chdir(original_dir)
def test_symlink_jinja2_rce_is_blocked(tmp_path: Path) -> None:
"""Check jinja2 templates cannot be used to perform RCE via symlinks."""
payload = tmp_path / "rce_payload.py"
payload.write_text(
"{{ self.__init__.__globals__.__builtins__"
".__import__('os').popen('id').read() }}"
)
symlink = tmp_path / "rce_bypass.txt"
symlink.symlink_to(payload)
config = {
"_type": "prompt",
"template_path": str(symlink),
"template_format": "jinja2",
"input_variables": [],
}
with (
suppress_langchain_deprecation_warning(),
pytest.raises(ValueError), # noqa: PT011
):
load_prompt_from_config(config, allow_dangerous_paths=True)
def test_save_symlink_to_py_is_blocked(tmp_path: Path) -> None:
"""Test that save() resolves symlinks before checking the file extension."""
target = tmp_path / "malicious.py"
symlink = tmp_path / "output.json"
symlink.symlink_to(target)
prompt = PromptTemplate(input_variables=["name"], template="Hello {name}")
with (
suppress_langchain_deprecation_warning(),
pytest.raises(ValueError, match="must be json or yaml"),
):
prompt.save(symlink)
assert not target.exists()
def test_loading_few_shot_prompt_from_yaml() -> None:
"""Test loading few shot prompt from yaml."""
with change_directory(EXAMPLE_DIR), suppress_langchain_deprecation_warning():
prompt = load_prompt("few_shot_prompt.yaml", allow_dangerous_paths=True)
with change_directory(EXAMPLE_DIR):
prompt = load_prompt("few_shot_prompt.yaml")
expected_prompt = FewShotPromptTemplate(
input_variables=["adjective"],
prefix="Write antonyms for the following words.",
@@ -363,8 +121,8 @@ def test_loading_few_shot_prompt_from_yaml() -> None:
def test_loading_few_shot_prompt_from_json() -> None:
"""Test loading few shot prompt from json."""
with change_directory(EXAMPLE_DIR), suppress_langchain_deprecation_warning():
prompt = load_prompt("few_shot_prompt.json", allow_dangerous_paths=True)
with change_directory(EXAMPLE_DIR):
prompt = load_prompt("few_shot_prompt.json")
expected_prompt = FewShotPromptTemplate(
input_variables=["adjective"],
prefix="Write antonyms for the following words.",
@@ -383,10 +141,8 @@ def test_loading_few_shot_prompt_from_json() -> None:
def test_loading_few_shot_prompt_when_examples_in_config() -> None:
"""Test loading few shot prompt when the examples are in the config."""
with change_directory(EXAMPLE_DIR), suppress_langchain_deprecation_warning():
prompt = load_prompt(
"few_shot_prompt_examples_in.json", allow_dangerous_paths=True
)
with change_directory(EXAMPLE_DIR):
prompt = load_prompt("few_shot_prompt_examples_in.json")
expected_prompt = FewShotPromptTemplate(
input_variables=["adjective"],
prefix="Write antonyms for the following words.",
@@ -405,10 +161,8 @@ def test_loading_few_shot_prompt_when_examples_in_config() -> None:
def test_loading_few_shot_prompt_example_prompt() -> None:
"""Test loading few shot when the example prompt is in its own file."""
with change_directory(EXAMPLE_DIR), suppress_langchain_deprecation_warning():
prompt = load_prompt(
"few_shot_prompt_example_prompt.json", allow_dangerous_paths=True
)
with change_directory(EXAMPLE_DIR):
prompt = load_prompt("few_shot_prompt_example_prompt.json")
expected_prompt = FewShotPromptTemplate(
input_variables=["adjective"],
prefix="Write antonyms for the following words.",

View File

@@ -1,12 +1,7 @@
import pytest
from packaging import version
from langchain_core.prompts.string import (
check_valid_template,
get_template_variables,
mustache_schema,
)
from langchain_core.utils.formatting import formatter
from langchain_core.prompts.string import get_template_variables, mustache_schema
from langchain_core.utils.pydantic import PYDANTIC_VERSION
PYDANTIC_VERSION_AT_LEAST_29 = version.parse("2.9") <= PYDANTIC_VERSION
@@ -44,47 +39,3 @@ def test_get_template_variables_mustache_nested() -> None:
expected = ["user"]
actual = get_template_variables(template, template_format)
assert actual == expected
def test_get_template_variables_rejects_nested_replacement_field_in_format_spec() -> (
None
):
template = "{name:{name.__class__.__name__}}"
with pytest.raises(ValueError, match="Nested replacement fields are not allowed"):
get_template_variables(template, "f-string")
def test_formatter_rejects_nested_replacement_field_in_format_spec() -> None:
template = "{name:{name.__class__.__name__}}"
with pytest.raises(ValueError, match="Invalid format specifier"):
formatter.format(template, name="hello")
def test_check_valid_template_rejects_nested_replacement_field_in_format_spec() -> None:
template = "{name:{name.__class__.__name__}}"
with pytest.raises(ValueError, match="Nested replacement fields are not allowed"):
check_valid_template(template, "f-string", ["name"])
@pytest.mark.parametrize(
("template", "kwargs", "expected_variables", "expected_output"),
[
("{value:.2f}", {"value": 3.14159}, ["value"], "3.14"),
("{value:>10}", {"value": "cat"}, ["value"], " cat"),
("{value:*^10}", {"value": "cat"}, ["value"], "***cat****"),
("{value:,}", {"value": 1234567}, ["value"], "1,234,567"),
("{value:%}", {"value": 0.125}, ["value"], "12.500000%"),
("{value!r}", {"value": "cat"}, ["value"], "'cat'"),
],
)
def test_f_string_templates_allow_safe_format_specs(
template: str,
kwargs: dict[str, object],
expected_variables: list[str],
expected_output: str,
) -> None:
assert get_template_variables(template, "f-string") == expected_variables
assert formatter.format(template, **kwargs) == expected_output

View File

@@ -16,7 +16,6 @@ from langchain_core.callbacks.streaming_stdout import StreamingStdOutCallbackHan
from langchain_core.runnables import RunnableBinding, RunnablePassthrough
from langchain_core.runnables.config import (
RunnableConfig,
_get_langsmith_inheritable_metadata_from_config,
_set_config_context,
ensure_config,
merge_configs,
@@ -62,7 +61,7 @@ def test_ensure_config() -> None:
assert config["configurable"] is not arg["configurable"]
assert config == {
"tags": ["tag1", "tag2"],
"metadata": {"foo": "bar"},
"metadata": {"foo": "bar", "baz": "qux", "something": "else"},
"callbacks": [arg["callbacks"][0]],
"recursion_limit": 100,
"configurable": {"baz": "qux", "something": "else"},
@@ -72,164 +71,6 @@ def test_ensure_config() -> None:
}
def test_ensure_config_copies_model_to_metadata() -> None:
config = ensure_config(
{
"configurable": {
"thread_id": "th-123",
"checkpoint_id": "ckpt-1",
"checkpoint_ns": "ns-1",
"task_id": "task-1",
"run_id": "run-456",
"assistant_id": "asst-789",
"graph_id": "graph-0",
"model": "gpt-4o",
"user_id": "uid-1",
"cron_id": "cron-1",
"langgraph_auth_user_id": "user-1",
"some_api_key": "opaque-token",
"custom_setting": {"nested": True},
"none_value": None,
},
"metadata": {"nooverride": 18},
}
)
assert config["metadata"] == {
"nooverride": 18,
"model": "gpt-4o",
"checkpoint_ns": "ns-1",
}
assert config["configurable"] == {
"thread_id": "th-123",
"checkpoint_id": "ckpt-1",
"checkpoint_ns": "ns-1",
"task_id": "task-1",
"run_id": "run-456",
"assistant_id": "asst-789",
"graph_id": "graph-0",
"model": "gpt-4o",
"user_id": "uid-1",
"cron_id": "cron-1",
"langgraph_auth_user_id": "user-1",
"some_api_key": "opaque-token",
"custom_setting": {"nested": True},
"none_value": None,
}
def test_ensure_config_metadata_is_not_overridden_by_configurable_model() -> None:
config = ensure_config(
{
"configurable": {
"model": "from-configurable",
"run_id": None,
"checkpoint_ns": "from-configurable",
},
"metadata": {
"model": "from-metadata",
"run_id": "from-metadata",
"checkpoint_ns": "from-metadata",
},
}
)
assert config["metadata"] == {
"model": "from-metadata",
"run_id": "from-metadata",
"checkpoint_ns": "from-metadata",
}
assert config["configurable"] == {
"model": "from-configurable",
"run_id": None,
"checkpoint_ns": "from-configurable",
}
def test_ensure_config_copies_top_level_model_to_metadata() -> None:
config = ensure_config(
cast(
"RunnableConfig",
{
"model": "gpt-4o",
"metadata": {"nooverride": 18},
},
)
)
assert config["metadata"] == {"nooverride": 18, "model": "gpt-4o"}
assert config["configurable"] == {"model": "gpt-4o"}
def test_ensure_config_copies_top_level_checkpoint_ns_to_metadata() -> None:
config = ensure_config(
cast(
"RunnableConfig",
{
"checkpoint_ns": "ns-1",
"metadata": {"nooverride": 18},
},
)
)
assert config["metadata"] == {"nooverride": 18, "checkpoint_ns": "ns-1"}
assert config["configurable"] == {"checkpoint_ns": "ns-1"}
def test_get_langsmith_inheritable_metadata_from_config_uses_previous_copy_rules() -> (
None
):
config = ensure_config(
cast(
"RunnableConfig",
{
"something": "else",
"metadata": {
"foo": "bar",
"model": "from-metadata",
"checkpoint_ns": "from-metadata",
},
"configurable": {
"baz": "qux",
"thread_id": "th-123",
"checkpoint_id": "ckpt-1",
"checkpoint_ns": "from-configurable",
"task_id": "task-1",
"run_id": "run-456",
"assistant_id": "asst-789",
"graph_id": "graph-0",
"model": "from-configurable",
"user_id": "uid-1",
"cron_id": "cron-1",
"langgraph_auth_user_id": "user-1",
"api_key": "should-not-propagate",
"__secret_key": "should-not-propagate",
"temperature": 0.5,
"streaming": True,
"custom_setting": {"nested": True},
"none_value": None,
},
},
)
)
assert _get_langsmith_inheritable_metadata_from_config(config) == {
"something": "else",
"baz": "qux",
"thread_id": "th-123",
"checkpoint_id": "ckpt-1",
"task_id": "task-1",
"run_id": "run-456",
"assistant_id": "asst-789",
"graph_id": "graph-0",
"user_id": "uid-1",
"cron_id": "cron-1",
"langgraph_auth_user_id": "user-1",
"temperature": 0.5,
"streaming": True,
}
async def test_merge_config_callbacks() -> None:
manager: RunnableConfig = {
"callbacks": CallbackManager(handlers=[StdOutCallbackHandler()])

View File

@@ -1162,7 +1162,7 @@ async def test_with_config_metadata_passthrough(mocker: MockerFixture) -> None:
"callbacks": None,
"recursion_limit": 25,
"configurable": {"hello": "there", "__secret_key": "nahnah"},
"metadata": {"bye": "now"},
"metadata": {"hello": "there", "bye": "now"},
},
)
spy.reset_mock()

View File

@@ -2843,7 +2843,7 @@ async def test_tool_error_event_includes_tool_call_id() -> None:
"""Test that on_tool_error event includes tool_call_id when provided."""
@tool
def failing_tool(x: int) -> str:
def failing_tool(x: int) -> str: # noqa: ARG001
"""A tool that always fails."""
msg = "Tool execution failed"
raise ValueError(msg)
@@ -2883,7 +2883,7 @@ async def test_tool_error_event_tool_call_id_is_none_when_not_provided() -> None
"""Test that on_tool_error event has tool_call_id=None when not provided."""
@tool
def failing_tool_no_id(x: int) -> str:
def failing_tool_no_id(x: int) -> str: # noqa: ARG001
"""A tool that always fails."""
msg = "Tool execution failed"
raise ValueError(msg)

View File

@@ -1,10 +1,7 @@
from __future__ import annotations
import asyncio
import concurrent.futures
import json
import sys
import threading
import uuid
from inspect import isasyncgenfunction
from typing import TYPE_CHECKING, Any, Literal
@@ -15,15 +12,13 @@ from langsmith import Client, RunTree, get_current_run_tree, traceable
from langsmith.run_helpers import tracing_context
from langsmith.utils import get_env_var
from langchain_core.callbacks.base import BaseCallbackHandler
from langchain_core.callbacks.manager import CallbackManager
from langchain_core.runnables.base import RunnableLambda, RunnableParallel
from langchain_core.tracers.langchain import LangChainTracer
if TYPE_CHECKING:
from collections.abc import AsyncGenerator, Callable, Coroutine, Generator, Mapping
from collections.abc import AsyncGenerator, Callable, Coroutine, Generator
from langchain_core.runnables.config import RunnableConfig
from langchain_core.callbacks import BaseCallbackHandler
def _get_posts(client: Client) -> list[dict[str, Any]]:
@@ -48,15 +43,12 @@ def _get_posts(client: Client) -> list[dict[str, Any]]:
def _create_tracer_with_mocked_client(
project_name: str | None = None,
tags: list[str] | None = None,
metadata: Mapping[str, str] | None = None,
) -> LangChainTracer:
mock_session = MagicMock()
mock_client_ = Client(
session=mock_session, api_key="test", auto_batch_tracing=False
)
return LangChainTracer(
client=mock_client_, project_name=project_name, tags=tags, metadata=metadata
)
return LangChainTracer(client=mock_client_, project_name=project_name, tags=tags)
def test_tracing_context() -> None:
@@ -83,38 +75,6 @@ def test_tracing_context() -> None:
assert all(post["session_name"] == project_name for post in posts)
def test_inheritable_metadata_respects_explicit_metadata_with_tracing_context() -> None:
"""Tracer defaults fill missing keys while run metadata keeps precedence."""
tracer = _create_tracer_with_mocked_client()
@RunnableLambda
def my_func(x: int) -> int:
return x
callbacks = CallbackManager.configure(
inheritable_callbacks=[tracer],
langsmith_inheritable_metadata={
"tenant": "from_tracer",
"shared": "from_tracer",
},
)
with tracing_context(enabled=True, client=tracer.client):
my_func.invoke(
1,
{
"callbacks": callbacks,
"metadata": {"shared": "from_run", "explicit": "from_run"},
},
)
posts = _get_posts(tracer.client)
assert len(posts) == 1
metadata = posts[0].get("extra", {}).get("metadata", {})
assert metadata["tenant"] == "from_tracer"
assert metadata["shared"] == "from_run"
assert metadata["explicit"] == "from_run"
def test_config_traceable_handoff() -> None:
if hasattr(get_env_var, "cache_clear"):
get_env_var.cache_clear() # type: ignore[attr-defined]
@@ -506,10 +466,7 @@ def test_tree_is_constructed(parent_type: Literal["ls", "lc"]) -> None:
):
collected: dict[str, RunTree] = {}
def collect_langsmith_run(run: RunTree) -> None:
collected[str(run.id)] = run
def collect_tracer_run(_: LangChainTracer, run: RunTree) -> None:
def collect_run(run: RunTree) -> None:
collected[str(run.id)] = run
if parent_type == "ls":
@@ -519,8 +476,7 @@ def test_tree_is_constructed(parent_type: Literal["ls", "lc"]) -> None:
return child.invoke("foo")
assert (
parent(langsmith_extra={"on_end": collect_langsmith_run, "run_id": rid})
== "foo"
parent(langsmith_extra={"on_end": collect_run, "run_id": rid}) == "foo"
)
assert collected
@@ -531,10 +487,9 @@ def test_tree_is_constructed(parent_type: Literal["ls", "lc"]) -> None:
return child.invoke("foo")
tracer = LangChainTracer()
with patch.object(LangChainTracer, "_persist_run", new=collect_tracer_run):
assert (
parent.invoke(..., {"run_id": rid, "callbacks": [tracer]}) == "foo" # type: ignore[attr-defined]
)
tracer._persist_run = collect_run # type: ignore[method-assign]
assert parent.invoke(..., {"run_id": rid, "callbacks": [tracer]}) == "foo" # type: ignore[attr-defined]
run = collected.get(str(rid))
assert run is not None
@@ -553,749 +508,3 @@ def test_tree_is_constructed(parent_type: Literal["ls", "lc"]) -> None:
assert "afoo" in kitten_run.tags # type: ignore[operator]
assert grandchild_run is not None
assert kitten_run.dotted_order.startswith(grandchild_run.dotted_order)
def test_traceable_parent_run_map_cleanup() -> None:
"""External RunTree injected into run_map is cleaned up when its child ends.
When a `@traceable` function invokes a LangChain `Runnable`, the
`RunTree` is added to the tracer's `run_map` so child runs can
reference it. Previously the entry was never removed, causing a
memory leak that grew with every call.
Uses an explicit tracer so we can inspect `run_map` directly after
the call — the `_configure` insertion path is identical regardless
of whether the tracer was created internally or passed in.
"""
tracer = _create_tracer_with_mocked_client()
@RunnableLambda
def child(x: str) -> str:
return x
with tracing_context(client=tracer.client, enabled=True):
@traceable
def parent(x: str) -> str:
return child.invoke(x, config={"callbacks": [tracer]})
parent("hello")
assert tracer.run_map == {}, (
f"run_map should be empty but contains: "
f"{[getattr(v, 'name', k) for k, v in tracer.run_map.items()]}"
)
def test_traceable_parent_run_map_cleanup_with_sibling_children() -> None:
"""External parent survives in run_map until ALL its children finish.
When a `@traceable` function invokes a chain with multiple steps
(e.g. prompt | llm), each step is a sibling child of the same
intermediate run. The external parent must stay in `run_map` until
the last child completes, not be removed when the first child ends.
"""
from langchain_core.language_models.fake_chat_models import ( # noqa: PLC0415
FakeListChatModel,
)
from langchain_core.prompts import ChatPromptTemplate # noqa: PLC0415
tracer = _create_tracer_with_mocked_client()
prompt = ChatPromptTemplate.from_messages([("system", "bot"), ("human", "{input}")])
llm = FakeListChatModel(responses=["hi"])
chain = prompt | llm
with tracing_context(client=tracer.client, enabled=True):
@traceable
def parent(x: dict) -> Any:
return chain.invoke(x, config={"callbacks": [tracer]})
result = parent({"input": "hello"})
assert result is not None
assert tracer.run_map == {}, (
f"run_map should be empty but contains: "
f"{[getattr(v, 'name', k) for k, v in tracer.run_map.items()]}"
)
def test_traceable_parent_run_map_no_runttree_accumulation() -> None:
"""RunTree objects reachable from run_map must not grow across calls.
This is the memory-level regression test: a long-lived tracer is
reused across many @traceable → Runnable invocations. Without the
fix, each call leaves a RunTree (plus its child tree) in run_map,
causing unbounded growth. With the fix, run_map is empty after
every call, so the count stays flat.
"""
import gc # noqa: PLC0415
tracer = _create_tracer_with_mocked_client()
@RunnableLambda
def child(x: str) -> str:
return x
counts: list[int] = []
with tracing_context(client=tracer.client, enabled=True):
@traceable
def parent(x: str) -> str:
return child.invoke(x, config={"callbacks": [tracer]})
for _ in range(5):
parent("hello")
gc.collect()
# Count RunTree objects reachable from the tracer's run_map.
run_map_runtrees = sum(
1 + len(v.child_runs) for v in tracer.run_map.values()
)
counts.append(run_map_runtrees)
# With the fix every call cleans up → counts are all 0.
# Without the fix they grow: [1, 2, 3, 4, 5] (or more with children).
assert counts == [0, 0, 0, 0, 0], (
f"RunTree objects in run_map should not accumulate, got counts: {counts}"
)
class TestTracerMetadataThroughInvoke:
"""Tests for tracer metadata merging through invoke calls."""
def test_tracer_metadata_applied_to_all_runs(self) -> None:
"""Tracer metadata appears on every run when no config metadata is set."""
tracer = _create_tracer_with_mocked_client(
metadata={"env": "prod", "service": "api"}
)
@RunnableLambda
def child(x: int) -> int:
return x + 1
@RunnableLambda
def parent(x: int) -> int:
return child.invoke(x)
parent.invoke(1, {"callbacks": [tracer]})
posts = _get_posts(tracer.client)
assert len(posts) == 2
for post in posts:
md = post.get("extra", {}).get("metadata", {})
assert md.get("env") == "prod", f"run {post['name']} missing env"
assert md.get("service") == "api", f"run {post['name']} missing service"
def test_config_metadata_takes_precedence(self) -> None:
"""Config metadata wins over tracer metadata for overlapping keys."""
tracer = _create_tracer_with_mocked_client(
metadata={"env": "prod", "tracer_only": "yes"}
)
@RunnableLambda
def my_func(x: int) -> int:
return x
my_func.invoke(
1,
{
"callbacks": [tracer],
"metadata": {"env": "staging", "config_only": "yes"},
},
)
posts = _get_posts(tracer.client)
assert len(posts) == 1
md = posts[0].get("extra", {}).get("metadata", {})
# Config wins for overlapping key
assert md["env"] == "staging"
# Both non-overlapping keys are present
assert md["tracer_only"] == "yes"
assert md["config_only"] == "yes"
def test_nested_calls_inherit_config_metadata(self) -> None:
"""Child runs inherit config metadata; tracer metadata fills gaps."""
tracer = _create_tracer_with_mocked_client(
metadata={"tracer_key": "tracer_val"}
)
@RunnableLambda
def child(x: int) -> int:
return x + 1
@RunnableLambda
def parent(x: int) -> int:
return child.invoke(x)
parent.invoke(
1,
{
"callbacks": [tracer],
"metadata": {"config_key": "config_val"},
},
)
posts = _get_posts(tracer.client)
assert len(posts) == 2
name_to_md = {
post["name"]: post.get("extra", {}).get("metadata", {}) for post in posts
}
# Both parent and child should have config metadata (inherited)
# and tracer metadata (patched in)
for name, md in name_to_md.items():
assert md.get("config_key") == "config_val", f"{name} missing config_key"
assert md.get("tracer_key") == "tracer_val", f"{name} missing tracer_key"
def test_tracer_metadata_not_applied_to_sibling_handlers(self) -> None:
"""Tracer metadata is not applied to other callback handlers.
`_patch_missing_metadata` copies the metadata dict before patching,
so the callback manager's shared metadata dict is not mutated.
Other handlers should only see config metadata, not tracer metadata.
"""
tracer = _create_tracer_with_mocked_client(
metadata={"tracer_key": "tracer_val"}
)
received_metadata: list[dict[str, Any]] = []
class MetadataCapture(BaseCallbackHandler):
"""Callback handler that records metadata from chain events."""
def on_chain_start(self, *_args: Any, **kwargs: Any) -> None:
received_metadata.append(dict(kwargs.get("metadata", {})))
capture = MetadataCapture()
@RunnableLambda
def my_func(x: int) -> int:
return x
my_func.invoke(
1,
{
"callbacks": [tracer, capture],
"metadata": {"shared_key": "shared_val"},
},
)
assert len(received_metadata) >= 1
for md in received_metadata:
assert md["shared_key"] == "shared_val"
assert "tracer_key" not in md
# But the posted run DOES have tracer metadata
posts = _get_posts(tracer.client)
assert len(posts) >= 1
for post in posts:
post_md = post.get("extra", {}).get("metadata", {})
assert post_md["shared_key"] == "shared_val"
assert post_md["tracer_key"] == "tracer_val"
def test_tracer_metadata_with_no_config_metadata(self) -> None:
"""When no config metadata is set, tracer metadata is the sole source."""
tracer = _create_tracer_with_mocked_client(
metadata={"only_from_tracer": "value"}
)
@RunnableLambda
def my_func(x: int) -> int:
return x
my_func.invoke(1, {"callbacks": [tracer]})
posts = _get_posts(tracer.client)
assert len(posts) == 1
md = posts[0].get("extra", {}).get("metadata", {})
assert md["only_from_tracer"] == "value"
def test_empty_tracer_metadata_does_not_interfere(self) -> None:
"""Tracer with no metadata does not interfere with config metadata."""
tracer = _create_tracer_with_mocked_client(metadata=None)
@RunnableLambda
def my_func(x: int) -> int:
return x
my_func.invoke(
1,
{"callbacks": [tracer], "metadata": {"config_key": "config_val"}},
)
posts = _get_posts(tracer.client)
assert len(posts) == 1
md = posts[0].get("extra", {}).get("metadata", {})
assert md["config_key"] == "config_val"
def test_inheritable_metadata_nested_runs_preserve_parent_child_shape() -> None:
"""Concurrent nested runs keep parent-child linkage within each invocation."""
tracer = _create_tracer_with_mocked_client()
barrier = threading.Barrier(2)
@RunnableLambda
def child(x: int) -> int:
barrier.wait()
return x + 1
@RunnableLambda
def parent(x: int) -> int:
return child.invoke(x)
def invoke_for_tenant(tenant: str, value: int) -> int:
callbacks = CallbackManager.configure(
inheritable_callbacks=[tracer],
langsmith_inheritable_metadata={"tenant": tenant},
)
return parent.invoke(value, {"callbacks": callbacks})
threads = [
threading.Thread(target=invoke_for_tenant, args=("alpha", 1)),
threading.Thread(target=invoke_for_tenant, args=("beta", 2)),
]
for thread in threads:
thread.start()
for thread in threads:
thread.join()
posts = _get_posts(tracer.client)
assert len(posts) == 4
parents = [post for post in posts if post["name"] == "parent"]
children = [post for post in posts if post["name"] == "child"]
assert len(parents) == 2
assert len(children) == 2
parent_ids = {parent["id"] for parent in parents}
assert {child["parent_run_id"] for child in children} == parent_ids
assert {
post.get("extra", {}).get("metadata", {}).get("tenant") for post in posts
} == {
"alpha",
"beta",
}
def test_inheritable_metadata_parallel_children_keep_tenant_isolation() -> None:
"""Concurrent roots with parallel child runs keep tenant metadata isolated."""
tracer = _create_tracer_with_mocked_client()
barrier = threading.Barrier(4)
@RunnableLambda
def add_one(x: int) -> int:
barrier.wait()
return x + 1
@RunnableLambda
def add_two(x: int) -> int:
barrier.wait()
return x + 2
parallel = RunnableParallel(first=add_one, second=add_two)
def invoke_for_tenant(tenant: str, value: int) -> dict[str, int]:
callbacks = CallbackManager.configure(
inheritable_callbacks=[tracer],
langsmith_inheritable_metadata={"tenant": tenant},
)
return parallel.invoke(value, {"callbacks": callbacks})
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
list(executor.map(invoke_for_tenant, ["alpha", "beta"], [1, 2]))
posts = _get_posts(tracer.client)
assert len(posts) == 6
assert {
post.get("extra", {}).get("metadata", {}).get("tenant") for post in posts
} == {
"alpha",
"beta",
}
posts_by_trace: dict[str, list[dict[str, Any]]] = {}
for post in posts:
posts_by_trace.setdefault(post["trace_id"], []).append(post)
assert len(posts_by_trace) == 2
assert all(len(trace_posts) == 3 for trace_posts in posts_by_trace.values())
@pytest.mark.skipif(
sys.version_info < (3, 11), reason="Asyncio context vars require Python 3.11+"
)
async def test_langsmith_inheritable_metadata_mixed_sync_async_managers_isolated() -> (
None
):
"""Sync and async manager configure paths can overlap without metadata sharing."""
tracer = _create_tracer_with_mocked_client()
@RunnableLambda
async def async_runnable(x: int) -> int:
await asyncio.sleep(0)
return x + 1
@RunnableLambda
def sync_runnable(x: int) -> int:
return x + 1
async def run_sync() -> int:
callbacks = CallbackManager.configure(
inheritable_callbacks=[tracer],
langsmith_inheritable_metadata={"path": "sync"},
)
return await asyncio.to_thread(
sync_runnable.invoke, 1, {"callbacks": callbacks}
)
async def run_async() -> int:
callbacks = CallbackManager.configure(
inheritable_callbacks=[tracer],
langsmith_inheritable_metadata={"path": "async"},
)
return await async_runnable.ainvoke(1, {"callbacks": callbacks})
await asyncio.gather(run_sync(), run_async())
posts = _get_posts(tracer.client)
assert len(posts) == 2
assert {
post.get("extra", {}).get("metadata", {}).get("path") for post in posts
} == {
"sync",
"async",
}
class TestLangsmithInheritableTracingDefaultsInConfigure:
"""Tests for LangSmith inheritable tracing defaults in configure."""
def test_langsmith_inheritable_metadata_applied_via_configure(self) -> None:
"""langsmith_inheritable_metadata flows to a copied tracer."""
tracer = _create_tracer_with_mocked_client()
cm = CallbackManager.configure(
inheritable_callbacks=[tracer],
langsmith_inheritable_metadata={"env": "prod", "service": "api"},
)
lc_tracers = [h for h in cm.handlers if isinstance(h, LangChainTracer)]
assert len(lc_tracers) == 1
assert lc_tracers[0] is not tracer
assert lc_tracers[0].tracing_metadata == {"env": "prod", "service": "api"}
assert tracer.tracing_metadata is None
def test_langsmith_inheritable_metadata_does_not_overwrite_tracer_metadata(
self,
) -> None:
"""Tracer metadata takes precedence over langsmith_inheritable_metadata."""
tracer = _create_tracer_with_mocked_client(metadata={"env": "staging"})
cm = CallbackManager.configure(
inheritable_callbacks=[tracer],
langsmith_inheritable_metadata={"env": "prod", "service": "api"},
)
lc_tracer = next(h for h in cm.handlers if isinstance(h, LangChainTracer))
assert tracer.tracing_metadata == {"env": "staging"}
assert lc_tracer.tracing_metadata == {"env": "staging", "service": "api"}
def test_tracing_context_metadata_merged_into_langsmith_inheritable_metadata(
self,
) -> None:
"""Tracing-context metadata merges into tracer defaults.
LangSmith metadata keeps precedence on collisions.
"""
tracer = _create_tracer_with_mocked_client()
with tracing_context(
enabled=True,
client=tracer.client,
metadata={"trace_only": "value", "shared": "trace"},
):
cm = CallbackManager.configure(
inheritable_callbacks=[tracer],
langsmith_inheritable_metadata={
"shared": "langsmith",
"tenant": "alpha",
},
)
lc_tracer = next(h for h in cm.handlers if isinstance(h, LangChainTracer))
assert lc_tracer.tracing_metadata == {
"trace_only": "value",
"shared": "langsmith",
"tenant": "alpha",
}
def test_langsmith_inheritable_metadata_end_to_end(self) -> None:
"""langsmith_inheritable_metadata in configure propagates to posted runs."""
tracer = _create_tracer_with_mocked_client()
@RunnableLambda
def my_func(x: int) -> int:
return x
# Use langsmith_inheritable_metadata through the config callbacks path
cm = CallbackManager.configure(
inheritable_callbacks=[tracer],
langsmith_inheritable_metadata={"env": "prod"},
)
my_func.invoke(1, {"callbacks": cm})
posts = _get_posts(tracer.client)
assert len(posts) == 1
md = posts[0].get("extra", {}).get("metadata", {})
assert md["env"] == "prod"
def test_runnable_config_copies_configurable_values_to_tracing_metadata(
self,
) -> None:
tracer = _create_tracer_with_mocked_client()
@RunnableLambda
def my_func(x: int) -> int:
return x
config: RunnableConfig = {
"callbacks": [tracer],
"metadata": {
"something": "else",
"checkpoint_ns": "from-metadata",
"model": "from-metadata",
},
"configurable": {
"thread_id": "th-123",
"checkpoint_id": "ckpt-1",
"checkpoint_ns": "from-configurable",
"task_id": "task-1",
"run_id": "run-456",
"assistant_id": "asst-789",
"graph_id": "graph-0",
"model": "from-configurable",
"user_id": "uid-1",
"cron_id": "cron-1",
"langgraph_auth_user_id": "user-1",
"api_key": "should-not-propagate",
"__secret_key": "should-not-propagate",
"temperature": 0.5,
"streaming": True,
"custom_setting": {"nested": True},
"none_value": None,
},
}
my_func.invoke(1, config)
posts = _get_posts(tracer.client)
assert len(posts) == 1
md = posts[0].get("extra", {}).get("metadata", {})
assert {
key: md[key]
for key in (
"something",
"thread_id",
"checkpoint_id",
"task_id",
"run_id",
"assistant_id",
"graph_id",
"user_id",
"cron_id",
"langgraph_auth_user_id",
"temperature",
"streaming",
"model",
"checkpoint_ns",
)
} == {
"something": "else",
"thread_id": "th-123",
"checkpoint_id": "ckpt-1",
"task_id": "task-1",
"run_id": "run-456",
"assistant_id": "asst-789",
"graph_id": "graph-0",
"user_id": "uid-1",
"cron_id": "cron-1",
"langgraph_auth_user_id": "user-1",
"temperature": 0.5,
"streaming": True,
"model": "from-metadata",
"checkpoint_ns": "from-metadata",
}
assert "api_key" not in md
assert "__secret_key" not in md
assert "custom_setting" not in md
assert "none_value" not in md
def test_langsmith_inheritable_metadata_does_not_affect_non_tracer_handlers(
self,
) -> None:
"""langsmith_inheritable_metadata only applies to tracers."""
tracer = _create_tracer_with_mocked_client()
received_metadata: list[dict[str, Any]] = []
class MetadataCapture(BaseCallbackHandler):
def on_chain_start(self, *_args: Any, **kwargs: Any) -> None:
received_metadata.append(dict(kwargs.get("metadata", {})))
capture = MetadataCapture()
cm = CallbackManager.configure(
inheritable_callbacks=[tracer, capture],
langsmith_inheritable_metadata={"tracer_only": "yes"},
)
@RunnableLambda
def my_func(x: int) -> int:
return x
my_func.invoke(1, {"callbacks": cm})
# Non-tracer handler should NOT see langsmith_inheritable_metadata
assert len(received_metadata) >= 1
for md in received_metadata:
assert "tracer_only" not in md
# But the tracer's posted runs SHOULD have it
posts = _get_posts(tracer.client)
assert len(posts) >= 1
for post in posts:
post_md = post.get("extra", {}).get("metadata", {})
assert post_md["tracer_only"] == "yes"
def test_no_langsmith_inheritable_metadata_is_noop(self) -> None:
"""Passing langsmith_inheritable_metadata=None does not alter tracer state."""
tracer = _create_tracer_with_mocked_client()
cm = CallbackManager.configure(
inheritable_callbacks=[tracer],
langsmith_inheritable_metadata=None,
)
lc_tracer = next(h for h in cm.handlers if isinstance(h, LangChainTracer))
assert lc_tracer is tracer
assert tracer.tracing_metadata is None
def test_langsmith_inheritable_tags_applied_via_configure(self) -> None:
"""langsmith_inheritable_tags flow to a copied tracer."""
tracer = _create_tracer_with_mocked_client()
tracer.tags = ["existing"]
cm = CallbackManager.configure(
inheritable_callbacks=[tracer],
langsmith_inheritable_tags=["tenant:alpha", "existing"],
)
lc_tracer = next(h for h in cm.handlers if isinstance(h, LangChainTracer))
assert lc_tracer is not tracer
assert lc_tracer.tags == ["existing", "tenant:alpha"]
assert tracer.tags == ["existing"]
def test_inheritable_tags_do_not_affect_non_tracer_handlers(self) -> None:
"""langsmith_inheritable_tags only apply to tracers."""
tracer = _create_tracer_with_mocked_client()
received_tags: list[list[str]] = []
class TagCapture(BaseCallbackHandler):
def on_chain_start(self, *_args: Any, **kwargs: Any) -> None:
received_tags.append(list(kwargs.get("tags", [])))
capture = TagCapture()
cm = CallbackManager.configure(
inheritable_callbacks=[tracer, capture],
langsmith_inheritable_tags=["tracer-only"],
)
@RunnableLambda
def my_func(x: int) -> int:
return x
my_func.invoke(1, {"callbacks": cm})
assert received_tags
assert all("tracer-only" not in tags for tags in received_tags)
posts = _get_posts(tracer.client)
assert posts
assert all("tracer-only" in post.get("tags", []) for post in posts)
def test_langsmith_inheritable_metadata_copies_handlers_without_mutating_original(
self,
) -> None:
"""Configured manager copies tracers and leaves the original unchanged."""
tracer = _create_tracer_with_mocked_client()
cm = CallbackManager.configure(
inheritable_callbacks=[tracer],
langsmith_inheritable_metadata={"env": "prod"},
)
handler_tracer = next(h for h in cm.handlers if isinstance(h, LangChainTracer))
inheritable_tracer = next(
h for h in cm.inheritable_handlers if isinstance(h, LangChainTracer)
)
assert handler_tracer is not tracer
assert inheritable_tracer is not tracer
assert tracer.tracing_metadata is None
def test_langsmith_inheritable_metadata_configure_isolated_per_manager(
self,
) -> None:
"""Separate configure calls keep tracer-only defaults isolated."""
tracer = _create_tracer_with_mocked_client()
alpha_manager = CallbackManager.configure(
inheritable_callbacks=[tracer],
langsmith_inheritable_metadata={"tenant": "alpha"},
)
beta_manager = CallbackManager.configure(
inheritable_callbacks=[tracer],
langsmith_inheritable_metadata={"tenant": "beta"},
)
alpha_tracer = next(
handler
for handler in alpha_manager.handlers
if isinstance(handler, LangChainTracer)
)
beta_tracer = next(
handler
for handler in beta_manager.handlers
if isinstance(handler, LangChainTracer)
)
assert tracer.tracing_metadata is None
assert alpha_tracer is not tracer
assert beta_tracer is not tracer
assert alpha_tracer is not beta_tracer
assert alpha_tracer.tracing_metadata == {"tenant": "alpha"}
assert beta_tracer.tracing_metadata == {"tenant": "beta"}
assert alpha_tracer.run_map is tracer.run_map
assert beta_tracer.run_map is tracer.run_map
assert alpha_tracer.order_map is tracer.order_map
assert beta_tracer.order_map is tracer.order_map
def test_inheritable_metadata_concurrent_invocations_remain_isolated(
self,
) -> None:
"""Parallel invocations through copied tracers keep metadata separated."""
tracer = _create_tracer_with_mocked_client()
barrier = threading.Barrier(2)
@traceable
def traced_leaf(x: int) -> int:
barrier.wait()
return x
@RunnableLambda
def my_func(x: int) -> int:
return traced_leaf(x)
def invoke_for_tenant(tenant: str, value: int) -> int:
callbacks = CallbackManager.configure(
inheritable_callbacks=[tracer],
langsmith_inheritable_metadata={"tenant": tenant},
)
return my_func.invoke(value, {"callbacks": callbacks})
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
list(executor.map(invoke_for_tenant, ["alpha", "beta"], [1, 2]))
posts = _get_posts(tracer.client)
assert len(posts) == 4
assert {post["name"] for post in posts} == {"my_func", "traced_leaf"}
my_func_posts = [post for post in posts if post["name"] == "my_func"]
assert len(my_func_posts) == 2
assert {
post.get("extra", {}).get("metadata", {}).get("tenant")
for post in my_func_posts
} == {"alpha", "beta"}
assert tracer.run_map == {}
assert len(tracer.order_map) == 2

Some files were not shown because too many files have changed in this diff Show More