mirror of
https://github.com/hwchase17/langchain.git
synced 2026-04-22 03:33:59 +00:00
Compare commits
1 Commits
langchain-
...
feat/opena
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
136aed4dc9 |
4
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
4
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
@@ -6,8 +6,6 @@ body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
> **All contributions must be in English.** See the [language policy](https://docs.langchain.com/oss/python/contributing/overview#language-policy).
|
||||
|
||||
Thank you for taking the time to file a bug report.
|
||||
|
||||
For usage questions, feature requests and general design questions, please use the [LangChain Forum](https://forum.langchain.com/).
|
||||
@@ -22,7 +20,7 @@ body:
|
||||
- type: checkboxes
|
||||
id: checks
|
||||
attributes:
|
||||
label: Submission checklist
|
||||
label: Checked other resources
|
||||
description: Please confirm and check all the following options.
|
||||
options:
|
||||
- label: This is a bug, not a usage question.
|
||||
|
||||
6
.github/ISSUE_TEMPLATE/feature-request.yml
vendored
6
.github/ISSUE_TEMPLATE/feature-request.yml
vendored
@@ -6,8 +6,6 @@ body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
> **All contributions must be in English.** See the [language policy](https://docs.langchain.com/oss/python/contributing/overview#language-policy).
|
||||
|
||||
Thank you for taking the time to request a new feature.
|
||||
|
||||
Use this to request NEW FEATURES or ENHANCEMENTS in LangChain. For bug reports, please use the bug report template. For usage questions and general design questions, please use the [LangChain Forum](https://forum.langchain.com/).
|
||||
@@ -20,12 +18,10 @@ body:
|
||||
* [LangChain ChatBot](https://chat.langchain.com/)
|
||||
* [GitHub search](https://github.com/langchain-ai/langchain),
|
||||
* [LangChain Forum](https://forum.langchain.com/),
|
||||
|
||||
**Note:** Do not begin work on a PR unless explicitly assigned to this issue by a maintainer.
|
||||
- type: checkboxes
|
||||
id: checks
|
||||
attributes:
|
||||
label: Submission checklist
|
||||
label: Checked other resources
|
||||
description: Please confirm and check all the following options.
|
||||
options:
|
||||
- label: This is a feature request, not a bug report or usage question.
|
||||
|
||||
13
.github/PULL_REQUEST_TEMPLATE.md
vendored
13
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -1,13 +1,6 @@
|
||||
Fixes #
|
||||
|
||||
---
|
||||
|
||||
<!-- Keep the `Fixes #xx` keyword at the very top and update the issue number — this auto-closes the issue on merge. Replace this comment with a 1-2 sentence description of your change. No `# Summary` header; the description is the summary. -->
|
||||
(Replace this entire block of text)
|
||||
|
||||
Read the full contributing guidelines: https://docs.langchain.com/oss/python/contributing/overview
|
||||
|
||||
> **All contributions must be in English.** See the [language policy](https://docs.langchain.com/oss/python/contributing/overview#language-policy).
|
||||
|
||||
If you paste a large clearly AI generated description here your PR may be IGNORED or CLOSED!
|
||||
|
||||
Thank you for contributing to LangChain! Follow these steps to have your pull request considered as ready for review.
|
||||
@@ -23,7 +16,7 @@ Thank you for contributing to LangChain! Follow these steps to have your pull re
|
||||
2. PR description:
|
||||
|
||||
- Write 1-2 sentences summarizing the change.
|
||||
- The `Fixes #xx` line at the top is **required** for external contributions — update the issue number and keep the keyword. This links your PR to the approved issue and auto-closes it on merge.
|
||||
- If this PR addresses a specific issue, please include "Fixes #ISSUE_NUMBER" in the description to automatically close the issue when the PR is merged.
|
||||
- If there are any breaking changes, please clearly describe them.
|
||||
- If this PR depends on another PR being merged first, please include "Depends on #PR_NUMBER" in the description.
|
||||
|
||||
@@ -35,7 +28,7 @@ Thank you for contributing to LangChain! Follow these steps to have your pull re
|
||||
|
||||
Additional guidelines:
|
||||
|
||||
- All external PRs must link to an issue or discussion where a solution has been approved by a maintainer, and you must be assigned to that issue. PRs without prior approval will be closed.
|
||||
- We ask that if you use generative AI for your contribution, you include a disclaimer.
|
||||
- PRs should not touch more than one package unless absolutely necessary.
|
||||
- Do not update the `uv.lock` files or add dependencies to `pyproject.toml` files (even optional ones) unless you have explicit permission to do so by a maintainer.
|
||||
|
||||
|
||||
2
.github/actions/uv_setup/action.yml
vendored
2
.github/actions/uv_setup/action.yml
vendored
@@ -27,7 +27,7 @@ runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Install uv and set the python version
|
||||
uses: astral-sh/setup-uv@0ca8f610542aa7f4acaf39e65cf4eb3c35091883 # v7
|
||||
uses: astral-sh/setup-uv@v7
|
||||
with:
|
||||
version: ${{ env.UV_VERSION }}
|
||||
python-version: ${{ inputs.python-version }}
|
||||
|
||||
52
.github/dependabot.yml
vendored
52
.github/dependabot.yml
vendored
@@ -8,19 +8,12 @@ updates:
|
||||
- package-ecosystem: "github-actions"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "monthly"
|
||||
interval: "weekly"
|
||||
day: "monday"
|
||||
groups:
|
||||
minor-and-patch:
|
||||
github-actions:
|
||||
patterns:
|
||||
- "*"
|
||||
update-types:
|
||||
- "minor"
|
||||
- "patch"
|
||||
major:
|
||||
patterns:
|
||||
- "*"
|
||||
update-types:
|
||||
- "major"
|
||||
|
||||
- package-ecosystem: "uv"
|
||||
directories:
|
||||
@@ -28,19 +21,12 @@ updates:
|
||||
- "/libs/langchain/"
|
||||
- "/libs/langchain_v1/"
|
||||
schedule:
|
||||
interval: "monthly"
|
||||
interval: "weekly"
|
||||
day: "monday"
|
||||
groups:
|
||||
minor-and-patch:
|
||||
langchain-deps:
|
||||
patterns:
|
||||
- "*"
|
||||
update-types:
|
||||
- "minor"
|
||||
- "patch"
|
||||
major:
|
||||
patterns:
|
||||
- "*"
|
||||
update-types:
|
||||
- "major"
|
||||
|
||||
- package-ecosystem: "uv"
|
||||
directories:
|
||||
@@ -60,19 +46,12 @@ updates:
|
||||
- "/libs/partners/qdrant/"
|
||||
- "/libs/partners/xai/"
|
||||
schedule:
|
||||
interval: "monthly"
|
||||
interval: "weekly"
|
||||
day: "monday"
|
||||
groups:
|
||||
minor-and-patch:
|
||||
partner-deps:
|
||||
patterns:
|
||||
- "*"
|
||||
update-types:
|
||||
- "minor"
|
||||
- "patch"
|
||||
major:
|
||||
patterns:
|
||||
- "*"
|
||||
update-types:
|
||||
- "major"
|
||||
|
||||
- package-ecosystem: "uv"
|
||||
directories:
|
||||
@@ -80,16 +59,9 @@ updates:
|
||||
- "/libs/standard-tests/"
|
||||
- "/libs/model-profiles/"
|
||||
schedule:
|
||||
interval: "monthly"
|
||||
interval: "weekly"
|
||||
day: "monday"
|
||||
groups:
|
||||
minor-and-patch:
|
||||
other-deps:
|
||||
patterns:
|
||||
- "*"
|
||||
update-types:
|
||||
- "minor"
|
||||
- "patch"
|
||||
major:
|
||||
patterns:
|
||||
- "*"
|
||||
update-types:
|
||||
- "major"
|
||||
|
||||
128
.github/pr-file-labeler.yml
vendored
Normal file
128
.github/pr-file-labeler.yml
vendored
Normal file
@@ -0,0 +1,128 @@
|
||||
# Label PRs (config)
|
||||
# Automatically applies labels based on changed files and branch patterns
|
||||
|
||||
# Core packages
|
||||
core:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/core/**/*"
|
||||
|
||||
langchain-classic:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/langchain/**/*"
|
||||
|
||||
langchain:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/langchain_v1/**/*"
|
||||
|
||||
standard-tests:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/standard-tests/**/*"
|
||||
|
||||
model-profiles:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/model-profiles/**/*"
|
||||
|
||||
text-splitters:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/text-splitters/**/*"
|
||||
|
||||
# Partner integrations
|
||||
integration:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/**/*"
|
||||
|
||||
anthropic:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/anthropic/**/*"
|
||||
|
||||
chroma:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/chroma/**/*"
|
||||
|
||||
deepseek:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/deepseek/**/*"
|
||||
|
||||
exa:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/exa/**/*"
|
||||
|
||||
fireworks:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/fireworks/**/*"
|
||||
|
||||
groq:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/groq/**/*"
|
||||
|
||||
huggingface:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/huggingface/**/*"
|
||||
|
||||
mistralai:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/mistralai/**/*"
|
||||
|
||||
nomic:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/nomic/**/*"
|
||||
|
||||
ollama:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/ollama/**/*"
|
||||
|
||||
openai:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/openai/**/*"
|
||||
|
||||
openrouter:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/openrouter/**/*"
|
||||
|
||||
perplexity:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/perplexity/**/*"
|
||||
|
||||
qdrant:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/qdrant/**/*"
|
||||
|
||||
xai:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/xai/**/*"
|
||||
|
||||
github_actions:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- ".github/workflows/**/*"
|
||||
- ".github/actions/**/*"
|
||||
|
||||
dependencies:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "**/pyproject.toml"
|
||||
- "uv.lock"
|
||||
- "**/requirements*.txt"
|
||||
- "**/poetry.lock"
|
||||
51
.github/scripts/check_diff.py
vendored
51
.github/scripts/check_diff.py
vendored
@@ -33,22 +33,18 @@ LANGCHAIN_DIRS = [
|
||||
"libs/model-profiles",
|
||||
]
|
||||
|
||||
# Packages with VCR cassette-backed integration tests.
|
||||
# These get a playback-only CI check to catch stale cassettes.
|
||||
VCR_PACKAGES = {
|
||||
"libs/partners/openai",
|
||||
}
|
||||
|
||||
# When set to True, we are ignoring core dependents
|
||||
# in order to be able to get CI to pass for each individual
|
||||
# package that depends on core
|
||||
# e.g. if you touch core, we don't then add textsplitters/etc to CI
|
||||
IGNORE_CORE_DEPENDENTS = False
|
||||
|
||||
# Ignored partners are removed from dependents but still run if directly edited
|
||||
# ignored partners are removed from dependents
|
||||
# but still run if directly edited
|
||||
IGNORED_PARTNERS = [
|
||||
# remove huggingface from dependents because of CI instability
|
||||
# specifically in huggingface jobs
|
||||
# https://github.com/langchain-ai/langchain/issues/25558
|
||||
"huggingface",
|
||||
]
|
||||
|
||||
@@ -132,23 +128,12 @@ def _get_configs_for_single_dir(job: str, dir_: str) -> List[Dict[str, str]]:
|
||||
return _get_pydantic_test_configs(dir_)
|
||||
|
||||
if job == "codspeed":
|
||||
# CPU simulation (<1% variance, Valgrind-based) is the default.
|
||||
# Partners with heavy SDK inits use walltime instead to keep CI fast.
|
||||
CODSPEED_WALLTIME_DIRS = {
|
||||
"libs/core",
|
||||
"libs/partners/fireworks", # ~328s under simulation
|
||||
"libs/partners/openai", # 6 benchmarks, ~6 min under simulation
|
||||
}
|
||||
mode = "walltime" if dir_ in CODSPEED_WALLTIME_DIRS else "simulation"
|
||||
return [
|
||||
{
|
||||
"working-directory": dir_,
|
||||
"python-version": "3.13",
|
||||
"codspeed-mode": mode,
|
||||
}
|
||||
]
|
||||
if dir_ == "libs/core":
|
||||
py_versions = ["3.13"]
|
||||
elif dir_ == "libs/core":
|
||||
py_versions = ["3.10", "3.11", "3.12", "3.13", "3.14"]
|
||||
# custom logic for specific directories
|
||||
elif dir_ in {"libs/partners/chroma"}:
|
||||
py_versions = ["3.10", "3.13"]
|
||||
else:
|
||||
py_versions = ["3.10", "3.14"]
|
||||
|
||||
@@ -226,14 +211,6 @@ def _get_configs_for_multi_dirs(
|
||||
dirs = list(dirs_to_run["extended-test"])
|
||||
elif job == "codspeed":
|
||||
dirs = list(dirs_to_run["codspeed"])
|
||||
elif job == "vcr-tests":
|
||||
# Only run VCR tests for packages that have cassettes and are affected
|
||||
all_affected = set(
|
||||
add_dependents(
|
||||
dirs_to_run["test"] | dirs_to_run["extended-test"], dependents
|
||||
)
|
||||
)
|
||||
dirs = [d for d in VCR_PACKAGES if d in all_affected]
|
||||
else:
|
||||
raise ValueError(f"Unknown job: {job}")
|
||||
|
||||
@@ -282,8 +259,6 @@ if __name__ == "__main__":
|
||||
|
||||
if file.startswith("libs/core"):
|
||||
dirs_to_run["codspeed"].add("libs/core")
|
||||
if file.startswith("libs/langchain_v1"):
|
||||
dirs_to_run["codspeed"].add("libs/langchain_v1")
|
||||
if any(file.startswith(dir_) for dir_ in LANGCHAIN_DIRS):
|
||||
# add that dir and all dirs after in LANGCHAIN_DIRS
|
||||
# for extended testing
|
||||
@@ -316,13 +291,8 @@ if __name__ == "__main__":
|
||||
if not filename.startswith(".")
|
||||
] != ["README.md"]:
|
||||
dirs_to_run["test"].add(f"libs/partners/{partner_dir}")
|
||||
# Only add to codspeed if the partner has benchmarks and is not ignored
|
||||
if (
|
||||
partner_dir not in IGNORED_PARTNERS
|
||||
and os.path.isdir(
|
||||
f"libs/partners/{partner_dir}/tests/benchmarks"
|
||||
)
|
||||
):
|
||||
# Skip codspeed for partners without benchmarks or in IGNORED_PARTNERS
|
||||
if partner_dir not in IGNORED_PARTNERS:
|
||||
dirs_to_run["codspeed"].add(f"libs/partners/{partner_dir}")
|
||||
# Skip if the directory was deleted or is just a tombstone readme
|
||||
elif file.startswith("libs/"):
|
||||
@@ -355,7 +325,6 @@ if __name__ == "__main__":
|
||||
"dependencies",
|
||||
"test-pydantic",
|
||||
"codspeed",
|
||||
"vcr-tests",
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
2
.github/scripts/get_min_versions.py
vendored
2
.github/scripts/get_min_versions.py
vendored
@@ -48,7 +48,7 @@ def get_pypi_versions(package_name: str) -> List[str]:
|
||||
KeyError: If package not found or response format unexpected
|
||||
"""
|
||||
pypi_url = f"https://pypi.org/pypi/{package_name}/json"
|
||||
response = requests.get(pypi_url, timeout=10.0)
|
||||
response = requests.get(pypi_url)
|
||||
response.raise_for_status()
|
||||
return list(response.json()["releases"].keys())
|
||||
|
||||
|
||||
84
.github/scripts/pr-labeler-config.json
vendored
84
.github/scripts/pr-labeler-config.json
vendored
@@ -1,84 +0,0 @@
|
||||
{
|
||||
"trustedThreshold": 5,
|
||||
"labelColor": "b76e79",
|
||||
"sizeThresholds": [
|
||||
{ "label": "size: XS", "max": 50 },
|
||||
{ "label": "size: S", "max": 200 },
|
||||
{ "label": "size: M", "max": 500 },
|
||||
{ "label": "size: L", "max": 1000 },
|
||||
{ "label": "size: XL" }
|
||||
],
|
||||
"excludedFiles": ["uv.lock"],
|
||||
"excludedPaths": ["docs/"],
|
||||
"typeToLabel": {
|
||||
"feat": "feature",
|
||||
"fix": "fix",
|
||||
"docs": "documentation",
|
||||
"style": "linting",
|
||||
"refactor": "refactor",
|
||||
"perf": "performance",
|
||||
"test": "tests",
|
||||
"build": "infra",
|
||||
"ci": "infra",
|
||||
"chore": "infra",
|
||||
"revert": "revert",
|
||||
"release": "release",
|
||||
"hotfix": "hotfix",
|
||||
"breaking": "breaking"
|
||||
},
|
||||
"scopeToLabel": {
|
||||
"core": "core",
|
||||
"langchain": "langchain",
|
||||
"langchain-classic": "langchain-classic",
|
||||
"model-profiles": "model-profiles",
|
||||
"standard-tests": "standard-tests",
|
||||
"text-splitters": "text-splitters",
|
||||
"anthropic": "anthropic",
|
||||
"chroma": "chroma",
|
||||
"deepseek": "deepseek",
|
||||
"exa": "exa",
|
||||
"fireworks": "fireworks",
|
||||
"groq": "groq",
|
||||
"huggingface": "huggingface",
|
||||
"mistralai": "mistralai",
|
||||
"nomic": "nomic",
|
||||
"ollama": "ollama",
|
||||
"openai": "openai",
|
||||
"openrouter": "openrouter",
|
||||
"perplexity": "perplexity",
|
||||
"qdrant": "qdrant",
|
||||
"xai": "xai",
|
||||
"deps": "dependencies",
|
||||
"docs": "documentation",
|
||||
"infra": "infra"
|
||||
},
|
||||
"fileRules": [
|
||||
{ "label": "core", "prefix": "libs/core/", "skipExcludedFiles": true },
|
||||
{ "label": "langchain-classic", "prefix": "libs/langchain/", "skipExcludedFiles": true },
|
||||
{ "label": "langchain", "prefix": "libs/langchain_v1/", "skipExcludedFiles": true },
|
||||
{ "label": "standard-tests", "prefix": "libs/standard-tests/", "skipExcludedFiles": true },
|
||||
{ "label": "model-profiles", "prefix": "libs/model-profiles/", "skipExcludedFiles": true },
|
||||
{ "label": "text-splitters", "prefix": "libs/text-splitters/", "skipExcludedFiles": true },
|
||||
{ "label": "integration", "prefix": "libs/partners/", "skipExcludedFiles": true },
|
||||
{ "label": "anthropic", "prefix": "libs/partners/anthropic/", "skipExcludedFiles": true },
|
||||
{ "label": "chroma", "prefix": "libs/partners/chroma/", "skipExcludedFiles": true },
|
||||
{ "label": "deepseek", "prefix": "libs/partners/deepseek/", "skipExcludedFiles": true },
|
||||
{ "label": "exa", "prefix": "libs/partners/exa/", "skipExcludedFiles": true },
|
||||
{ "label": "fireworks", "prefix": "libs/partners/fireworks/", "skipExcludedFiles": true },
|
||||
{ "label": "groq", "prefix": "libs/partners/groq/", "skipExcludedFiles": true },
|
||||
{ "label": "huggingface", "prefix": "libs/partners/huggingface/", "skipExcludedFiles": true },
|
||||
{ "label": "mistralai", "prefix": "libs/partners/mistralai/", "skipExcludedFiles": true },
|
||||
{ "label": "nomic", "prefix": "libs/partners/nomic/", "skipExcludedFiles": true },
|
||||
{ "label": "ollama", "prefix": "libs/partners/ollama/", "skipExcludedFiles": true },
|
||||
{ "label": "openai", "prefix": "libs/partners/openai/", "skipExcludedFiles": true },
|
||||
{ "label": "openrouter", "prefix": "libs/partners/openrouter/", "skipExcludedFiles": true },
|
||||
{ "label": "perplexity", "prefix": "libs/partners/perplexity/", "skipExcludedFiles": true },
|
||||
{ "label": "qdrant", "prefix": "libs/partners/qdrant/", "skipExcludedFiles": true },
|
||||
{ "label": "xai", "prefix": "libs/partners/xai/", "skipExcludedFiles": true },
|
||||
{ "label": "github_actions", "prefix": ".github/workflows/" },
|
||||
{ "label": "github_actions", "prefix": ".github/actions/" },
|
||||
{ "label": "dependencies", "suffix": "pyproject.toml" },
|
||||
{ "label": "dependencies", "exact": "uv.lock" },
|
||||
{ "label": "dependencies", "pattern": "(?:^|/)requirements[^/]*\\.txt$" }
|
||||
]
|
||||
}
|
||||
278
.github/scripts/pr-labeler.js
vendored
278
.github/scripts/pr-labeler.js
vendored
@@ -1,278 +0,0 @@
|
||||
// Shared helpers for pr_labeler.yml and tag-external-issues.yml.
|
||||
//
|
||||
// Usage from actions/github-script (requires actions/checkout first):
|
||||
// const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
function loadConfig() {
|
||||
const configPath = path.join(__dirname, 'pr-labeler-config.json');
|
||||
let raw;
|
||||
try {
|
||||
raw = fs.readFileSync(configPath, 'utf8');
|
||||
} catch (e) {
|
||||
throw new Error(`Failed to read ${configPath}: ${e.message}`);
|
||||
}
|
||||
let config;
|
||||
try {
|
||||
config = JSON.parse(raw);
|
||||
} catch (e) {
|
||||
throw new Error(`Failed to parse pr-labeler-config.json: ${e.message}`);
|
||||
}
|
||||
const required = [
|
||||
'labelColor', 'sizeThresholds', 'fileRules',
|
||||
'typeToLabel', 'scopeToLabel', 'trustedThreshold',
|
||||
'excludedFiles', 'excludedPaths',
|
||||
];
|
||||
const missing = required.filter(k => !(k in config));
|
||||
if (missing.length > 0) {
|
||||
throw new Error(`pr-labeler-config.json missing required keys: ${missing.join(', ')}`);
|
||||
}
|
||||
return config;
|
||||
}
|
||||
|
||||
function init(github, owner, repo, config, core) {
|
||||
if (!core) {
|
||||
throw new Error('init() requires a `core` parameter (e.g., from actions/github-script)');
|
||||
}
|
||||
const {
|
||||
trustedThreshold,
|
||||
labelColor,
|
||||
sizeThresholds,
|
||||
scopeToLabel,
|
||||
typeToLabel,
|
||||
fileRules: fileRulesDef,
|
||||
excludedFiles,
|
||||
excludedPaths,
|
||||
} = config;
|
||||
|
||||
const sizeLabels = sizeThresholds.map(t => t.label);
|
||||
const allTypeLabels = [...new Set(Object.values(typeToLabel))];
|
||||
const tierLabels = ['new-contributor', 'trusted-contributor'];
|
||||
|
||||
// ── Label management ──────────────────────────────────────────────
|
||||
|
||||
async function ensureLabel(name, color = labelColor) {
|
||||
try {
|
||||
await github.rest.issues.getLabel({ owner, repo, name });
|
||||
} catch (e) {
|
||||
if (e.status !== 404) throw e;
|
||||
try {
|
||||
await github.rest.issues.createLabel({ owner, repo, name, color });
|
||||
} catch (createErr) {
|
||||
// 422 = label created by a concurrent run between our get and create
|
||||
if (createErr.status !== 422) throw createErr;
|
||||
core.info(`Label "${name}" creation returned 422 (likely already exists)`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Size calculation ──────────────────────────────────────────────
|
||||
|
||||
function getSizeLabel(totalChanged) {
|
||||
for (const t of sizeThresholds) {
|
||||
if (t.max != null && totalChanged < t.max) return t.label;
|
||||
}
|
||||
// Last entry has no max — it's the catch-all
|
||||
return sizeThresholds[sizeThresholds.length - 1].label;
|
||||
}
|
||||
|
||||
function computeSize(files) {
|
||||
const excluded = new Set(excludedFiles);
|
||||
const totalChanged = files.reduce((sum, f) => {
|
||||
const p = f.filename ?? '';
|
||||
const base = p.split('/').pop();
|
||||
if (excluded.has(base)) return sum;
|
||||
for (const prefix of excludedPaths) {
|
||||
if (p.startsWith(prefix)) return sum;
|
||||
}
|
||||
return sum + (f.additions ?? 0) + (f.deletions ?? 0);
|
||||
}, 0);
|
||||
return { totalChanged, sizeLabel: getSizeLabel(totalChanged) };
|
||||
}
|
||||
|
||||
// ── File-based labels ─────────────────────────────────────────────
|
||||
|
||||
function buildFileRules() {
|
||||
return fileRulesDef.map((rule, i) => {
|
||||
let test;
|
||||
if (rule.prefix) test = p => p.startsWith(rule.prefix);
|
||||
else if (rule.suffix) test = p => p.endsWith(rule.suffix);
|
||||
else if (rule.exact) test = p => p === rule.exact;
|
||||
else if (rule.pattern) {
|
||||
const re = new RegExp(rule.pattern);
|
||||
test = p => re.test(p);
|
||||
} else {
|
||||
throw new Error(
|
||||
`fileRules[${i}] (label: "${rule.label}") has no recognized matcher ` +
|
||||
`(expected one of: prefix, suffix, exact, pattern)`
|
||||
);
|
||||
}
|
||||
return { label: rule.label, test, skipExcluded: !!rule.skipExcludedFiles };
|
||||
});
|
||||
}
|
||||
|
||||
function matchFileLabels(files, fileRules) {
|
||||
const rules = fileRules || buildFileRules();
|
||||
const excluded = new Set(excludedFiles);
|
||||
const labels = new Set();
|
||||
for (const rule of rules) {
|
||||
// skipExcluded: ignore files whose basename is in the top-level
|
||||
// "excludedFiles" list (e.g. uv.lock) so lockfile-only changes
|
||||
// don't trigger package labels.
|
||||
const candidates = rule.skipExcluded
|
||||
? files.filter(f => !excluded.has((f.filename ?? '').split('/').pop()))
|
||||
: files;
|
||||
if (candidates.some(f => rule.test(f.filename ?? ''))) {
|
||||
labels.add(rule.label);
|
||||
}
|
||||
}
|
||||
return labels;
|
||||
}
|
||||
|
||||
// ── Title-based labels ────────────────────────────────────────────
|
||||
|
||||
function matchTitleLabels(title) {
|
||||
const labels = new Set();
|
||||
const m = (title ?? '').match(/^(\w+)(?:\(([^)]+)\))?(!)?:/);
|
||||
if (!m) return { labels, type: null, typeLabel: null, scopes: [], breaking: false };
|
||||
|
||||
const type = m[1].toLowerCase();
|
||||
const scopeStr = m[2] ?? '';
|
||||
const breaking = !!m[3];
|
||||
|
||||
const typeLabel = typeToLabel[type] || null;
|
||||
if (typeLabel) labels.add(typeLabel);
|
||||
if (breaking) labels.add('breaking');
|
||||
|
||||
const scopes = scopeStr.split(',').map(s => s.trim()).filter(Boolean);
|
||||
for (const scope of scopes) {
|
||||
const sl = scopeToLabel[scope];
|
||||
if (sl) labels.add(sl);
|
||||
}
|
||||
|
||||
return { labels, type, typeLabel, scopes, breaking };
|
||||
}
|
||||
|
||||
// ── Org membership ────────────────────────────────────────────────
|
||||
|
||||
async function checkMembership(author, userType) {
|
||||
if (userType === 'Bot') {
|
||||
console.log(`${author} is a Bot — treating as internal`);
|
||||
return { isExternal: false };
|
||||
}
|
||||
|
||||
try {
|
||||
const membership = await github.rest.orgs.getMembershipForUser({
|
||||
org: 'langchain-ai',
|
||||
username: author,
|
||||
});
|
||||
const isExternal = membership.data.state !== 'active';
|
||||
console.log(
|
||||
isExternal
|
||||
? `${author} has pending membership — treating as external`
|
||||
: `${author} is an active member of langchain-ai`,
|
||||
);
|
||||
return { isExternal };
|
||||
} catch (e) {
|
||||
if (e.status === 404) {
|
||||
console.log(`${author} is not a member of langchain-ai`);
|
||||
return { isExternal: true };
|
||||
}
|
||||
// Non-404 errors (rate limit, auth failure, server error) must not
|
||||
// silently default to external — rethrow to fail the step.
|
||||
throw new Error(
|
||||
`Membership check failed for ${author} (${e.status}): ${e.message}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Contributor analysis ──────────────────────────────────────────
|
||||
|
||||
async function getContributorInfo(contributorCache, author, userType) {
|
||||
if (contributorCache.has(author)) return contributorCache.get(author);
|
||||
|
||||
const { isExternal } = await checkMembership(author, userType);
|
||||
|
||||
let mergedCount = null;
|
||||
if (isExternal) {
|
||||
try {
|
||||
const result = await github.rest.search.issuesAndPullRequests({
|
||||
q: `repo:${owner}/${repo} is:pr is:merged author:"${author}"`,
|
||||
per_page: 1,
|
||||
});
|
||||
mergedCount = result?.data?.total_count ?? null;
|
||||
} catch (e) {
|
||||
if (e?.status !== 422) throw e;
|
||||
core.warning(`Search failed for ${author}; skipping tier.`);
|
||||
}
|
||||
}
|
||||
|
||||
const info = { isExternal, mergedCount };
|
||||
contributorCache.set(author, info);
|
||||
return info;
|
||||
}
|
||||
|
||||
// ── Tier label resolution ───────────────────────────────────────────
|
||||
|
||||
async function applyTierLabel(issueNumber, author, { skipNewContributor = false } = {}) {
|
||||
let mergedCount;
|
||||
try {
|
||||
const result = await github.rest.search.issuesAndPullRequests({
|
||||
q: `repo:${owner}/${repo} is:pr is:merged author:"${author}"`,
|
||||
per_page: 1,
|
||||
});
|
||||
mergedCount = result?.data?.total_count;
|
||||
} catch (error) {
|
||||
if (error?.status !== 422) throw error;
|
||||
core.warning(`Search failed for ${author}; skipping tier label.`);
|
||||
return;
|
||||
}
|
||||
|
||||
if (mergedCount == null) {
|
||||
core.warning(`Search response missing total_count for ${author}; skipping tier label.`);
|
||||
return;
|
||||
}
|
||||
|
||||
let tierLabel = null;
|
||||
if (mergedCount >= trustedThreshold) tierLabel = 'trusted-contributor';
|
||||
else if (mergedCount === 0 && !skipNewContributor) tierLabel = 'new-contributor';
|
||||
|
||||
if (tierLabel) {
|
||||
await ensureLabel(tierLabel);
|
||||
await github.rest.issues.addLabels({
|
||||
owner, repo, issue_number: issueNumber, labels: [tierLabel],
|
||||
});
|
||||
console.log(`Applied '${tierLabel}' to #${issueNumber} (${mergedCount} merged PRs)`);
|
||||
} else {
|
||||
console.log(`No tier label for ${author} (${mergedCount} merged PRs)`);
|
||||
}
|
||||
|
||||
return tierLabel;
|
||||
}
|
||||
|
||||
return {
|
||||
ensureLabel,
|
||||
getSizeLabel,
|
||||
computeSize,
|
||||
buildFileRules,
|
||||
matchFileLabels,
|
||||
matchTitleLabels,
|
||||
allTypeLabels,
|
||||
checkMembership,
|
||||
getContributorInfo,
|
||||
applyTierLabel,
|
||||
sizeLabels,
|
||||
tierLabels,
|
||||
trustedThreshold,
|
||||
labelColor,
|
||||
};
|
||||
}
|
||||
|
||||
function loadAndInit(github, owner, repo, core) {
|
||||
const config = loadConfig();
|
||||
return { config, h: init(github, owner, repo, config, core) };
|
||||
}
|
||||
|
||||
module.exports = { loadConfig, init, loadAndInit };
|
||||
48
.github/scripts/test_release_options.py
vendored
48
.github/scripts/test_release_options.py
vendored
@@ -1,48 +0,0 @@
|
||||
"""Verify _release.yml dropdown options match actual package directories."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
|
||||
|
||||
def _get_release_options() -> list[str]:
|
||||
workflow = REPO_ROOT / ".github" / "workflows" / "_release.yml"
|
||||
with open(workflow) as f:
|
||||
data = yaml.safe_load(f)
|
||||
try:
|
||||
# PyYAML (YAML 1.1) parses the bare key `on` as boolean True
|
||||
return data[True]["workflow_dispatch"]["inputs"]["working-directory"]["options"]
|
||||
except (KeyError, TypeError) as e:
|
||||
msg = f"Could not find workflow_dispatch options in {workflow}: {e}"
|
||||
raise AssertionError(msg) from e
|
||||
|
||||
|
||||
def _get_package_dirs() -> set[str]:
|
||||
libs = REPO_ROOT / "libs"
|
||||
dirs: set[str] = set()
|
||||
# Top-level packages (libs/core, libs/langchain, etc.)
|
||||
for p in libs.iterdir():
|
||||
if p.is_dir() and (p / "pyproject.toml").exists():
|
||||
dirs.add(f"libs/{p.name}")
|
||||
# Partner packages (libs/partners/*)
|
||||
partners = libs / "partners"
|
||||
if partners.exists():
|
||||
for p in partners.iterdir():
|
||||
if p.is_dir() and (p / "pyproject.toml").exists():
|
||||
dirs.add(f"libs/partners/{p.name}")
|
||||
return dirs
|
||||
|
||||
|
||||
def test_release_options_match_packages() -> None:
|
||||
options = set(_get_release_options())
|
||||
packages = _get_package_dirs()
|
||||
missing_from_dropdown = packages - options
|
||||
extra_in_dropdown = options - packages
|
||||
assert not missing_from_dropdown, (
|
||||
f"Packages on disk missing from _release.yml dropdown: {missing_from_dropdown}"
|
||||
)
|
||||
assert not extra_in_dropdown, (
|
||||
f"Dropdown options with no matching package directory: {extra_in_dropdown}"
|
||||
)
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
timeout-minutes: 20
|
||||
name: "Python ${{ inputs.python-version }}"
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
|
||||
2
.github/workflows/_lint.yml
vendored
2
.github/workflows/_lint.yml
vendored
@@ -38,7 +38,7 @@ jobs:
|
||||
timeout-minutes: 20
|
||||
steps:
|
||||
- name: "📋 Checkout Code"
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
|
||||
202
.github/workflows/_refresh_model_profiles.yml
vendored
202
.github/workflows/_refresh_model_profiles.yml
vendored
@@ -1,202 +0,0 @@
|
||||
# Reusable workflow: refreshes model profile data for any repo that uses the
|
||||
# `langchain-profiles` CLI. Creates (or updates) a pull request with the
|
||||
# resulting changes.
|
||||
#
|
||||
# Callers MUST set `permissions: { contents: write, pull-requests: write }` —
|
||||
# reusable workflows cannot escalate the caller's token permissions.
|
||||
#
|
||||
# ── Example: external repo (langchain-google) ──────────────────────────
|
||||
#
|
||||
# jobs:
|
||||
# refresh-profiles:
|
||||
# uses: langchain-ai/langchain/.github/workflows/_refresh_model_profiles.yml@master
|
||||
# with:
|
||||
# providers: >-
|
||||
# [
|
||||
# {"provider":"google", "data_dir":"libs/genai/langchain_google_genai/data"},
|
||||
# ]
|
||||
# secrets:
|
||||
# MODEL_PROFILE_BOT_APP_ID: ${{ secrets.MODEL_PROFILE_BOT_APP_ID }}
|
||||
# MODEL_PROFILE_BOT_PRIVATE_KEY: ${{ secrets.MODEL_PROFILE_BOT_PRIVATE_KEY }}
|
||||
|
||||
name: "Refresh Model Profiles (reusable)"
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
providers:
|
||||
description: >-
|
||||
JSON array of objects, each with `provider` (models.dev provider ID)
|
||||
and `data_dir` (path relative to repo root where `_profiles.py` and
|
||||
`profile_augmentations.toml` live).
|
||||
required: true
|
||||
type: string
|
||||
cli-path:
|
||||
description: >-
|
||||
Path (relative to workspace) to an existing `libs/model-profiles`
|
||||
checkout. When set the workflow skips cloning the langchain repo and
|
||||
uses this directory for the CLI instead. Useful when the caller IS
|
||||
the langchain monorepo.
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
cli-ref:
|
||||
description: >-
|
||||
Git ref of langchain-ai/langchain to checkout for the CLI.
|
||||
Ignored when `cli-path` is set.
|
||||
required: false
|
||||
type: string
|
||||
default: master
|
||||
add-paths:
|
||||
description: "Glob for files to stage in the PR commit."
|
||||
required: false
|
||||
type: string
|
||||
default: "**/_profiles.py"
|
||||
pr-branch:
|
||||
description: "Branch name for the auto-created PR."
|
||||
required: false
|
||||
type: string
|
||||
default: bot/refresh-model-profiles
|
||||
pr-title:
|
||||
description: "PR / commit title."
|
||||
required: false
|
||||
type: string
|
||||
default: "chore(model-profiles): refresh model profile data"
|
||||
pr-body:
|
||||
description: "PR body."
|
||||
required: false
|
||||
type: string
|
||||
default: |
|
||||
Automated refresh of model profile data via `langchain-profiles refresh`.
|
||||
|
||||
🤖 Generated by the `refresh_model_profiles` workflow.
|
||||
pr-labels:
|
||||
description: "Comma-separated labels to apply to the PR."
|
||||
required: false
|
||||
type: string
|
||||
default: bot
|
||||
secrets:
|
||||
MODEL_PROFILE_BOT_APP_ID:
|
||||
required: true
|
||||
MODEL_PROFILE_BOT_PRIVATE_KEY:
|
||||
required: true
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
|
||||
jobs:
|
||||
refresh-profiles:
|
||||
name: refresh model profiles
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: "📋 Checkout"
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
|
||||
- name: "📋 Checkout langchain-profiles CLI"
|
||||
if: inputs.cli-path == ''
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
with:
|
||||
repository: langchain-ai/langchain
|
||||
ref: ${{ inputs.cli-ref }}
|
||||
sparse-checkout: libs/model-profiles
|
||||
path: _langchain-cli
|
||||
|
||||
- name: "🔧 Resolve CLI directory"
|
||||
id: cli
|
||||
env:
|
||||
CLI_PATH: ${{ inputs.cli-path }}
|
||||
run: |
|
||||
if [ -n "${CLI_PATH}" ]; then
|
||||
resolved="${GITHUB_WORKSPACE}/${CLI_PATH}"
|
||||
if [ ! -d "${resolved}" ]; then
|
||||
echo "::error::cli-path '${CLI_PATH}' does not exist at ${resolved}"
|
||||
exit 1
|
||||
fi
|
||||
echo "dir=${CLI_PATH}" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "dir=_langchain-cli/libs/model-profiles" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- name: "🐍 Set up Python + uv"
|
||||
uses: astral-sh/setup-uv@0ca8f610542aa7f4acaf39e65cf4eb3c35091883 # v7
|
||||
with:
|
||||
version: "0.5.25"
|
||||
python-version: "3.12"
|
||||
enable-cache: true
|
||||
cache-dependency-glob: "**/model-profiles/uv.lock"
|
||||
|
||||
- name: "📦 Install langchain-profiles CLI"
|
||||
working-directory: ${{ steps.cli.outputs.dir }}
|
||||
run: uv sync --frozen --no-group test --no-group dev --no-group lint
|
||||
|
||||
- name: "✅ Validate providers input"
|
||||
env:
|
||||
PROVIDERS_JSON: ${{ inputs.providers }}
|
||||
run: |
|
||||
echo "${PROVIDERS_JSON}" | jq -e 'type == "array" and length > 0' > /dev/null || {
|
||||
echo "::error::providers input must be a non-empty JSON array"
|
||||
exit 1
|
||||
}
|
||||
echo "${PROVIDERS_JSON}" | jq -e 'all(has("provider") and has("data_dir"))' > /dev/null || {
|
||||
echo "::error::every entry in providers must have 'provider' and 'data_dir' keys"
|
||||
exit 1
|
||||
}
|
||||
|
||||
- name: "🔄 Refresh profiles"
|
||||
env:
|
||||
PROVIDERS_JSON: ${{ inputs.providers }}
|
||||
run: |
|
||||
cli_dir="${GITHUB_WORKSPACE}/${{ steps.cli.outputs.dir }}"
|
||||
failed=""
|
||||
mapfile -t rows < <(echo "${PROVIDERS_JSON}" | jq -c '.[]')
|
||||
for row in "${rows[@]}"; do
|
||||
provider=$(echo "${row}" | jq -r '.provider')
|
||||
data_dir=$(echo "${row}" | jq -r '.data_dir')
|
||||
echo "--- Refreshing ${provider} -> ${data_dir} ---"
|
||||
if ! echo y | uv run --frozen --project "${cli_dir}" \
|
||||
langchain-profiles refresh \
|
||||
--provider "${provider}" \
|
||||
--data-dir "${GITHUB_WORKSPACE}/${data_dir}"; then
|
||||
echo "::error::Failed to refresh provider: ${provider}"
|
||||
failed="${failed} ${provider}"
|
||||
fi
|
||||
done
|
||||
if [ -n "${failed}" ]; then
|
||||
echo "::error::The following providers failed:${failed}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: "🔑 Generate GitHub App token"
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3
|
||||
with:
|
||||
app-id: ${{ secrets.MODEL_PROFILE_BOT_APP_ID }}
|
||||
private-key: ${{ secrets.MODEL_PROFILE_BOT_PRIVATE_KEY }}
|
||||
|
||||
- name: "🔀 Create pull request"
|
||||
id: create-pr
|
||||
uses: peter-evans/create-pull-request@c0f553fe549906ede9cf27b5156039d195d2ece0 # v8
|
||||
with:
|
||||
token: ${{ steps.app-token.outputs.token }}
|
||||
branch: ${{ inputs.pr-branch }}
|
||||
commit-message: ${{ inputs.pr-title }}
|
||||
title: ${{ inputs.pr-title }}
|
||||
body: ${{ inputs.pr-body }}
|
||||
labels: ${{ inputs.pr-labels }}
|
||||
add-paths: ${{ inputs.add-paths }}
|
||||
|
||||
- name: "📝 Summary"
|
||||
if: always()
|
||||
env:
|
||||
PR_OP: ${{ steps.create-pr.outputs.pull-request-operation }}
|
||||
PR_URL: ${{ steps.create-pr.outputs.pull-request-url }}
|
||||
JOB_STATUS: ${{ job.status }}
|
||||
run: |
|
||||
if [ "${PR_OP}" = "created" ] || [ "${PR_OP}" = "updated" ]; then
|
||||
echo "### ✅ PR ${PR_OP}: ${PR_URL}" >> "$GITHUB_STEP_SUMMARY"
|
||||
elif [ -z "${PR_OP}" ] && [ "${JOB_STATUS}" = "success" ]; then
|
||||
echo "### ⏭️ Skipped: profiles already up to date" >> "$GITHUB_STEP_SUMMARY"
|
||||
elif [ "${JOB_STATUS}" = "failure" ]; then
|
||||
echo "### ❌ Job failed — check step logs for details" >> "$GITHUB_STEP_SUMMARY"
|
||||
fi
|
||||
155
.github/workflows/_release.yml
vendored
155
.github/workflows/_release.yml
vendored
@@ -5,7 +5,7 @@
|
||||
# Handles version bumping, building, and publishing to PyPI with authentication.
|
||||
|
||||
name: "🚀 Package Release"
|
||||
run-name: "Release ${{ inputs.working-directory-override || inputs.working-directory }} ${{ inputs.release-version }}"
|
||||
run-name: "Release ${{ inputs.working-directory }} ${{ inputs.release-version }}"
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
@@ -17,35 +17,9 @@ on:
|
||||
inputs:
|
||||
working-directory:
|
||||
required: true
|
||||
type: choice
|
||||
type: string
|
||||
description: "From which folder this pipeline executes"
|
||||
default: "libs/langchain_v1"
|
||||
options:
|
||||
- libs/core
|
||||
- libs/langchain
|
||||
- libs/langchain_v1
|
||||
- libs/text-splitters
|
||||
- libs/standard-tests
|
||||
- libs/model-profiles
|
||||
- libs/partners/anthropic
|
||||
- libs/partners/chroma
|
||||
- libs/partners/deepseek
|
||||
- libs/partners/exa
|
||||
- libs/partners/fireworks
|
||||
- libs/partners/groq
|
||||
- libs/partners/huggingface
|
||||
- libs/partners/mistralai
|
||||
- libs/partners/nomic
|
||||
- libs/partners/ollama
|
||||
- libs/partners/openai
|
||||
- libs/partners/openrouter
|
||||
- libs/partners/perplexity
|
||||
- libs/partners/qdrant
|
||||
- libs/partners/xai
|
||||
working-directory-override:
|
||||
required: false
|
||||
type: string
|
||||
description: "Manual override — takes precedence over dropdown (e.g. libs/partners/partner-xyz)"
|
||||
release-version:
|
||||
required: true
|
||||
type: string
|
||||
@@ -61,10 +35,9 @@ env:
|
||||
PYTHON_VERSION: "3.11"
|
||||
UV_FROZEN: "true"
|
||||
UV_NO_SYNC: "true"
|
||||
EFFECTIVE_WORKING_DIR: ${{ inputs.working-directory-override || inputs.working-directory }}
|
||||
|
||||
permissions:
|
||||
contents: read # Job-level overrides grant write only where needed (mark-release)
|
||||
contents: write # Required for creating GitHub releases
|
||||
|
||||
jobs:
|
||||
# Build the distribution package and extract version info
|
||||
@@ -81,7 +54,7 @@ jobs:
|
||||
version: ${{ steps.check-version.outputs.version }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Set up Python + uv
|
||||
uses: "./.github/actions/uv_setup"
|
||||
@@ -91,7 +64,6 @@ jobs:
|
||||
# We want to keep this build stage *separate* from the release stage,
|
||||
# so that there's no sharing of permissions between them.
|
||||
# (Release stage has trusted publishing and GitHub repo contents write access,
|
||||
# which the build stage must not have access to.)
|
||||
#
|
||||
# Otherwise, a malicious `build` step (e.g. via a compromised dependency)
|
||||
# could get access to our GitHub or PyPI credentials.
|
||||
@@ -102,18 +74,18 @@ jobs:
|
||||
# https://github.com/pypa/gh-action-pypi-publish#non-goals
|
||||
- name: Build project for distribution
|
||||
run: uv build
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
- name: Upload build
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ env.EFFECTIVE_WORKING_DIR }}/dist/
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
|
||||
- name: Check version
|
||||
id: check-version
|
||||
shell: python
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
import os
|
||||
import tomllib
|
||||
@@ -125,8 +97,6 @@ jobs:
|
||||
f.write(f"pkg-name={pkg_name}\n")
|
||||
f.write(f"version={version}\n")
|
||||
release-notes:
|
||||
# release-notes must run before publishing because its check-tags step
|
||||
# validates version/tag state — do not remove this dependency.
|
||||
needs:
|
||||
- build
|
||||
runs-on: ubuntu-latest
|
||||
@@ -135,18 +105,18 @@ jobs:
|
||||
outputs:
|
||||
release-body: ${{ steps.generate-release-body.outputs.release-body }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: langchain-ai/langchain
|
||||
path: langchain
|
||||
sparse-checkout: | # this only grabs files for relevant dir
|
||||
${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
${{ inputs.working-directory }}
|
||||
ref: ${{ github.ref }} # this scopes to just ref'd branch
|
||||
fetch-depth: 0 # this fetches entire commit history
|
||||
- name: Check tags
|
||||
id: check-tags
|
||||
shell: bash
|
||||
working-directory: langchain/${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
working-directory: langchain/${{ inputs.working-directory }}
|
||||
env:
|
||||
PKG_NAME: ${{ needs.build.outputs.pkg-name }}
|
||||
VERSION: ${{ needs.build.outputs.version }}
|
||||
@@ -203,7 +173,7 @@ jobs:
|
||||
id: generate-release-body
|
||||
working-directory: langchain
|
||||
env:
|
||||
WORKING_DIR: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
WORKING_DIR: ${{ inputs.working-directory }}
|
||||
PKG_NAME: ${{ needs.build.outputs.pkg-name }}
|
||||
TAG: ${{ steps.check-tags.outputs.tag }}
|
||||
PREV_TAG: ${{ steps.check-tags.outputs.prev-tag }}
|
||||
@@ -223,8 +193,6 @@ jobs:
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
|
||||
test-pypi-publish:
|
||||
# release-notes must run before publishing because its check-tags step
|
||||
# validates version/tag state — do not remove this dependency.
|
||||
needs:
|
||||
- build
|
||||
- release-notes
|
||||
@@ -238,17 +206,17 @@ jobs:
|
||||
id-token: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
|
||||
- uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ env.EFFECTIVE_WORKING_DIR }}/dist/
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
|
||||
- name: Publish to test PyPI
|
||||
uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # release/v1
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
packages-dir: ${{ env.EFFECTIVE_WORKING_DIR }}/dist/
|
||||
packages-dir: ${{ inputs.working-directory }}/dist/
|
||||
verbose: true
|
||||
print-hash: true
|
||||
repository-url: https://test.pypi.org/legacy/
|
||||
@@ -269,7 +237,7 @@ jobs:
|
||||
contents: read
|
||||
timeout-minutes: 20
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
# We explicitly *don't* set up caching here. This ensures our tests are
|
||||
# maximally sensitive to catching breakage.
|
||||
@@ -290,18 +258,26 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
|
||||
- uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ env.EFFECTIVE_WORKING_DIR }}/dist/
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
|
||||
- name: Import dist package
|
||||
shell: bash
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
env:
|
||||
PKG_NAME: ${{ needs.build.outputs.pkg-name }}
|
||||
VERSION: ${{ needs.build.outputs.version }}
|
||||
# Install directly from the locally-built wheel (no index resolution needed)
|
||||
# Here we use:
|
||||
# - The default regular PyPI index as the *primary* index, meaning
|
||||
# that it takes priority (https://pypi.org/simple)
|
||||
# - The test PyPI index as an extra index, so that any dependencies that
|
||||
# are not found on test PyPI can be resolved and installed anyway.
|
||||
# (https://test.pypi.org/simple). This will include the PKG_NAME==VERSION
|
||||
# package because VERSION will not have been uploaded to regular PyPI yet.
|
||||
# - attempt install again after 5 seconds if it fails because there is
|
||||
# sometimes a delay in availability on test pypi
|
||||
run: |
|
||||
uv venv
|
||||
VIRTUAL_ENV=.venv uv pip install dist/*.whl
|
||||
@@ -315,11 +291,11 @@ jobs:
|
||||
|
||||
- name: Import test dependencies
|
||||
run: uv sync --group test
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
# Overwrite the local version of the package with the built version
|
||||
- name: Import published package (again)
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
shell: bash
|
||||
env:
|
||||
PKG_NAME: ${{ needs.build.outputs.pkg-name }}
|
||||
@@ -330,17 +306,17 @@ jobs:
|
||||
- name: Check for prerelease versions
|
||||
# Block release if any dependencies allow prerelease versions
|
||||
# (unless this is itself a prerelease version)
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
uv run python $GITHUB_WORKSPACE/.github/scripts/check_prerelease_dependencies.py pyproject.toml
|
||||
|
||||
- name: Run unit tests
|
||||
run: make tests
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
- name: Get minimum versions
|
||||
# Find the minimum published versions that satisfies the given constraints
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
id: min-version
|
||||
run: |
|
||||
VIRTUAL_ENV=.venv uv pip install packaging requests
|
||||
@@ -356,16 +332,16 @@ jobs:
|
||||
run: |
|
||||
VIRTUAL_ENV=.venv uv pip install --force-reinstall --editable .
|
||||
VIRTUAL_ENV=.venv uv pip install --force-reinstall $MIN_VERSIONS
|
||||
make tests PYTEST_EXTRA="-q -k 'not test_serdes'"
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
make tests
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
- name: Import integration test dependencies
|
||||
run: uv sync --group test --group test_integration
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
- name: Run integration tests
|
||||
# Uses the Makefile's `integration_tests` target for the specified package
|
||||
if: ${{ startsWith(env.EFFECTIVE_WORKING_DIR, 'libs/partners/') }}
|
||||
if: ${{ startsWith(inputs.working-directory, 'libs/partners/') }}
|
||||
env:
|
||||
AI21_API_KEY: ${{ secrets.AI21_API_KEY }}
|
||||
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
@@ -405,7 +381,7 @@ jobs:
|
||||
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
||||
LANGCHAIN_TESTS_USER_AGENT: ${{ secrets.LANGCHAIN_TESTS_USER_AGENT }}
|
||||
run: make integration_tests
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
# Test select published packages against new core
|
||||
# Done when code changes are made to langchain-core
|
||||
@@ -439,32 +415,32 @@ jobs:
|
||||
AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME }}
|
||||
LANGCHAIN_TESTS_USER_AGENT: ${{ secrets.LANGCHAIN_TESTS_USER_AGENT }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
# We implement this conditional as Github Actions does not have good support
|
||||
# for conditionally needing steps. https://github.com/actions/runner/issues/491
|
||||
# TODO: this seems to be resolved upstream, so we can probably remove this workaround
|
||||
- name: Check if libs/core
|
||||
run: |
|
||||
if [ "${{ startsWith(env.EFFECTIVE_WORKING_DIR, 'libs/core') }}" != "true" ]; then
|
||||
if [ "${{ startsWith(inputs.working-directory, 'libs/core') }}" != "true" ]; then
|
||||
echo "Not in libs/core. Exiting successfully."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
- name: Set up Python + uv
|
||||
if: startsWith(env.EFFECTIVE_WORKING_DIR, 'libs/core')
|
||||
if: startsWith(inputs.working-directory, 'libs/core')
|
||||
uses: "./.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
|
||||
if: startsWith(env.EFFECTIVE_WORKING_DIR, 'libs/core')
|
||||
- uses: actions/download-artifact@v8
|
||||
if: startsWith(inputs.working-directory, 'libs/core')
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ env.EFFECTIVE_WORKING_DIR }}/dist/
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
|
||||
- name: Test against ${{ matrix.partner }}
|
||||
if: startsWith(env.EFFECTIVE_WORKING_DIR, 'libs/core')
|
||||
if: startsWith(inputs.working-directory, 'libs/core')
|
||||
run: |
|
||||
# Identify latest tag, excluding pre-releases
|
||||
LATEST_PACKAGE_TAG="$(
|
||||
@@ -509,11 +485,8 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
# Only run for core or langchain_v1 releases.
|
||||
# Job-level 'if' does not support env context; must use inputs directly.
|
||||
if: >-
|
||||
startsWith(inputs.working-directory-override || inputs.working-directory, 'libs/core')
|
||||
|| startsWith(inputs.working-directory-override || inputs.working-directory, 'libs/langchain_v1')
|
||||
# Only run for core or langchain_v1 releases
|
||||
if: startsWith(inputs.working-directory, 'libs/core') || startsWith(inputs.working-directory, 'libs/langchain_v1')
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -525,11 +498,11 @@ jobs:
|
||||
# No API keys needed for now - deepagents `make test` only runs unit tests
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
path: langchain
|
||||
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: ${{ matrix.package.repo }}
|
||||
path: ${{ matrix.package.name }}
|
||||
@@ -539,7 +512,7 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
|
||||
- uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: dist
|
||||
path: dist/
|
||||
@@ -583,25 +556,25 @@ jobs:
|
||||
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Set up Python + uv
|
||||
uses: "./.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
|
||||
- uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ env.EFFECTIVE_WORKING_DIR }}/dist/
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
|
||||
- name: Publish package distributions to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # release/v1
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
packages-dir: ${{ env.EFFECTIVE_WORKING_DIR }}/dist/
|
||||
packages-dir: ${{ inputs.working-directory }}/dist/
|
||||
verbose: true
|
||||
print-hash: true
|
||||
# Temp workaround since attestations are on by default as of gh-action-pypi-publish v1.11.0
|
||||
@@ -615,7 +588,7 @@ jobs:
|
||||
- test-pypi-publish
|
||||
- pre-release-checks
|
||||
- publish
|
||||
# Run if all needed jobs succeeded or were skipped
|
||||
# Run if all needed jobs succeeded or were skipped (test-dependents only runs for core/langchain_v1)
|
||||
if: ${{ !cancelled() && !failure() }}
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
@@ -625,23 +598,23 @@ jobs:
|
||||
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Set up Python + uv
|
||||
uses: "./.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
|
||||
- uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ env.EFFECTIVE_WORKING_DIR }}/dist/
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
|
||||
- name: Create Tag
|
||||
uses: ncipollo/release-action@339a81892b84b4eeb0f6e744e4574d79d0d9b8dd # v1
|
||||
uses: ncipollo/release-action@v1
|
||||
with:
|
||||
artifacts: "dist/*"
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
6
.github/workflows/_test.yml
vendored
6
.github/workflows/_test.yml
vendored
@@ -33,7 +33,7 @@ jobs:
|
||||
name: "Python ${{ inputs.python-version }}"
|
||||
steps:
|
||||
- name: "📋 Checkout Code"
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
@@ -50,7 +50,7 @@ jobs:
|
||||
- name: "🧪 Run Core Unit Tests"
|
||||
shell: bash
|
||||
run: |
|
||||
make test PYTEST_EXTRA=-q
|
||||
make test
|
||||
|
||||
- name: "🔍 Calculate Minimum Dependency Versions"
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
@@ -69,7 +69,7 @@ jobs:
|
||||
MIN_VERSIONS: ${{ steps.min-version.outputs.min-versions }}
|
||||
run: |
|
||||
VIRTUAL_ENV=.venv uv pip install $MIN_VERSIONS
|
||||
make tests PYTEST_EXTRA=-q
|
||||
make tests
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
- name: "🧹 Verify Clean Working Directory"
|
||||
|
||||
2
.github/workflows/_test_pydantic.yml
vendored
2
.github/workflows/_test_pydantic.yml
vendored
@@ -36,7 +36,7 @@ jobs:
|
||||
name: "Pydantic ~=${{ inputs.pydantic-version }}"
|
||||
steps:
|
||||
- name: "📋 Checkout Code"
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
|
||||
66
.github/workflows/_test_vcr.yml
vendored
66
.github/workflows/_test_vcr.yml
vendored
@@ -1,66 +0,0 @@
|
||||
# Runs VCR cassette-backed integration tests in playback-only mode.
|
||||
#
|
||||
# No API keys needed — catches stale cassettes caused by test input
|
||||
# changes without re-recording.
|
||||
#
|
||||
# Called as part of check_diffs.yml workflow.
|
||||
|
||||
name: "📼 VCR Cassette Tests"
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
working-directory:
|
||||
required: true
|
||||
type: string
|
||||
description: "From which folder this pipeline executes"
|
||||
python-version:
|
||||
required: true
|
||||
type: string
|
||||
description: "Python version to use"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
UV_FROZEN: "true"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
name: "Python ${{ inputs.python-version }}"
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
cache-suffix: test-vcr-${{ inputs.working-directory }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
- name: "📦 Install Test Dependencies"
|
||||
shell: bash
|
||||
run: uv sync --group test
|
||||
|
||||
- name: "📼 Run VCR Cassette Tests (playback-only)"
|
||||
shell: bash
|
||||
env:
|
||||
OPENAI_API_KEY: sk-fake
|
||||
run: make test_vcr
|
||||
|
||||
- name: "🧹 Verify Clean Working Directory"
|
||||
shell: bash
|
||||
run: |
|
||||
set -eu
|
||||
|
||||
STATUS="$(git status)"
|
||||
echo "$STATUS"
|
||||
|
||||
# grep will exit non-zero if the target message isn't found,
|
||||
# and `set -e` above will cause the step to fail.
|
||||
echo "$STATUS" | grep 'nothing to commit, working tree clean'
|
||||
17
.github/workflows/auto-label-by-package.yml
vendored
17
.github/workflows/auto-label-by-package.yml
vendored
@@ -4,9 +4,6 @@ on:
|
||||
issues:
|
||||
types: [opened, edited]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
label-by-package:
|
||||
permissions:
|
||||
@@ -15,20 +12,14 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Sync package labels
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
uses: actions/github-script@v8
|
||||
with:
|
||||
script: |
|
||||
const body = context.payload.issue.body || "";
|
||||
|
||||
// Extract text under "## Package" or "### Package" (handles " (Required)" suffix and being last section)
|
||||
const match = body.match(/#{2,3} Package[^\n]*\n([\s\S]*?)(?:\n#{2,3} |$)/i);
|
||||
if (!match) {
|
||||
core.setFailed(
|
||||
`Could not find "## Package" section in issue #${context.issue.number} body. ` +
|
||||
`The issue template may have changed — update the regex in this workflow.`
|
||||
);
|
||||
return;
|
||||
}
|
||||
// Extract text under "### Package" (handles " (Required)" suffix and being last section)
|
||||
const match = body.match(/### Package[^\n]*\n([\s\S]*?)(?:\n###|$)/i);
|
||||
if (!match) return;
|
||||
|
||||
const packageSection = match[1].trim();
|
||||
|
||||
|
||||
2
.github/workflows/check_agents_sync.yml
vendored
2
.github/workflows/check_agents_sync.yml
vendored
@@ -26,7 +26,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: "📋 Checkout Code"
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: "🔍 Check CLAUDE.md and AGENTS.md are in sync"
|
||||
run: |
|
||||
|
||||
2
.github/workflows/check_core_versions.yml
vendored
2
.github/workflows/check_core_versions.yml
vendored
@@ -20,7 +20,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: "✅ Verify pyproject.toml & version.py Match"
|
||||
run: |
|
||||
|
||||
103
.github/workflows/check_diffs.yml
vendored
103
.github/workflows/check_diffs.yml
vendored
@@ -8,6 +8,7 @@
|
||||
# - Pydantic compatibility tests (_test_pydantic.yml)
|
||||
# - Integration test compilation checks (_compile_integration_test.yml)
|
||||
# - Extended test suites that require additional dependencies
|
||||
# - Codspeed benchmarks (if not labeled 'codspeed-ignore')
|
||||
#
|
||||
# Reports status to GitHub checks and PR status.
|
||||
|
||||
@@ -46,14 +47,14 @@ jobs:
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'ci-ignore') }}
|
||||
steps:
|
||||
- name: "📋 Checkout Code"
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
uses: actions/checkout@v6
|
||||
- name: "🐍 Setup Python 3.11"
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: "📂 Get Changed Files"
|
||||
id: files
|
||||
uses: Ana06/get-changed-files@25f79e676e7ea1868813e21465014798211fad8c # v2.3.0
|
||||
uses: Ana06/get-changed-files@v2.3.0
|
||||
- name: "🔍 Analyze Changed Files & Generate Build Matrix"
|
||||
id: set-matrix
|
||||
run: |
|
||||
@@ -66,7 +67,7 @@ jobs:
|
||||
compile-integration-tests: ${{ steps.set-matrix.outputs.compile-integration-tests }}
|
||||
dependencies: ${{ steps.set-matrix.outputs.dependencies }}
|
||||
test-pydantic: ${{ steps.set-matrix.outputs.test-pydantic }}
|
||||
vcr-tests: ${{ steps.set-matrix.outputs.vcr-tests }}
|
||||
codspeed: ${{ steps.set-matrix.outputs.codspeed }}
|
||||
# Run linting only on packages that have changed files
|
||||
lint:
|
||||
needs: [build]
|
||||
@@ -124,21 +125,6 @@ jobs:
|
||||
python-version: ${{ matrix.job-configs.python-version }}
|
||||
secrets: inherit
|
||||
|
||||
# Run VCR cassette-backed integration tests in playback-only mode (no API keys)
|
||||
vcr-tests:
|
||||
name: "VCR Cassette Tests"
|
||||
needs: [build]
|
||||
if: ${{ needs.build.outputs.vcr-tests != '[]' }}
|
||||
strategy:
|
||||
matrix:
|
||||
job-configs: ${{ fromJson(needs.build.outputs.vcr-tests) }}
|
||||
fail-fast: false
|
||||
uses: ./.github/workflows/_test_vcr.yml
|
||||
with:
|
||||
working-directory: ${{ matrix.job-configs.working-directory }}
|
||||
python-version: ${{ matrix.job-configs.python-version }}
|
||||
secrets: inherit
|
||||
|
||||
# Run extended test suites that require additional dependencies
|
||||
extended-tests:
|
||||
name: "Extended Tests"
|
||||
@@ -155,7 +141,7 @@ jobs:
|
||||
run:
|
||||
working-directory: ${{ matrix.job-configs.working-directory }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ matrix.job-configs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
@@ -185,20 +171,72 @@ jobs:
|
||||
# and `set -e` above will cause the step to fail.
|
||||
echo "$STATUS" | grep 'nothing to commit, working tree clean'
|
||||
|
||||
# Verify _release.yml dropdown options stay in sync with package directories
|
||||
check-release-options:
|
||||
name: "Validate Release Options"
|
||||
# Run codspeed benchmarks only on packages that have changed files
|
||||
codspeed:
|
||||
name: "⚡ CodSpeed Benchmarks"
|
||||
needs: [build]
|
||||
if: ${{ needs.build.outputs.codspeed != '[]' && !contains(github.event.pull_request.labels.*.name, 'codspeed-ignore') }}
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
job-configs: ${{ fromJson(needs.build.outputs.codspeed) }}
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- name: "🐍 Setup Python 3.11"
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: "📦 Install UV Package Manager"
|
||||
uses: astral-sh/setup-uv@v7
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: "📦 Install Dependencies"
|
||||
run: python -m pip install pyyaml pytest
|
||||
- name: "🔍 Check release dropdown matches packages"
|
||||
run: python -m pytest .github/scripts/test_release_options.py -v
|
||||
# Pinned to 3.13.11 to work around CodSpeed walltime segfault on 3.13.12+
|
||||
# See: https://github.com/CodSpeedHQ/pytest-codspeed/issues/106
|
||||
python-version: "3.13.11"
|
||||
|
||||
- uses: actions/setup-python@v6
|
||||
with:
|
||||
# Pinned to 3.13.11 to work around CodSpeed walltime segfault on 3.13.12+
|
||||
# See: https://github.com/CodSpeedHQ/pytest-codspeed/issues/106
|
||||
python-version: "3.13.11"
|
||||
|
||||
- name: "📦 Install Test Dependencies"
|
||||
run: uv sync --group test
|
||||
working-directory: ${{ matrix.job-configs.working-directory }}
|
||||
|
||||
- name: "⚡ Run Benchmarks: ${{ matrix.job-configs.working-directory }}"
|
||||
uses: CodSpeedHQ/action@v4
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
ANTHROPIC_FILES_API_IMAGE_ID: ${{ secrets.ANTHROPIC_FILES_API_IMAGE_ID }}
|
||||
ANTHROPIC_FILES_API_PDF_ID: ${{ secrets.ANTHROPIC_FILES_API_PDF_ID }}
|
||||
AZURE_OPENAI_API_VERSION: ${{ secrets.AZURE_OPENAI_API_VERSION }}
|
||||
AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
|
||||
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
|
||||
AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_CHAT_DEPLOYMENT_NAME }}
|
||||
AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME }}
|
||||
AZURE_OPENAI_LLM_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LLM_DEPLOYMENT_NAME }}
|
||||
AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME }}
|
||||
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
|
||||
DEEPSEEK_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }}
|
||||
EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
|
||||
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
|
||||
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
|
||||
NOMIC_API_KEY: ${{ secrets.NOMIC_API_KEY }}
|
||||
OLLAMA_API_KEY: ${{ secrets.OLLAMA_API_KEY }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
||||
PPLX_API_KEY: ${{ secrets.PPLX_API_KEY }}
|
||||
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
|
||||
with:
|
||||
token: ${{ secrets.CODSPEED_TOKEN }}
|
||||
run: |
|
||||
cd ${{ matrix.job-configs.working-directory }}
|
||||
if [ "${{ matrix.job-configs.working-directory }}" = "libs/core" ]; then
|
||||
uv run --no-sync pytest ./tests/benchmarks --codspeed
|
||||
else
|
||||
uv run --no-sync pytest ./tests/ --codspeed
|
||||
fi
|
||||
mode: ${{ matrix.job-configs.working-directory == 'libs/core' && 'walltime' || 'instrumentation' }}
|
||||
|
||||
# Final status check - ensures all required jobs passed before allowing merge
|
||||
ci_success:
|
||||
@@ -209,10 +247,9 @@ jobs:
|
||||
lint,
|
||||
test,
|
||||
compile-integration-tests,
|
||||
vcr-tests,
|
||||
extended-tests,
|
||||
test-pydantic,
|
||||
check-release-options,
|
||||
codspeed,
|
||||
]
|
||||
if: |
|
||||
always()
|
||||
|
||||
196
.github/workflows/close_unchecked_issues.yml
vendored
196
.github/workflows/close_unchecked_issues.yml
vendored
@@ -1,196 +0,0 @@
|
||||
# Auto-close issues that bypass or ignore the issue template checkboxes.
|
||||
#
|
||||
# GitHub issue forms enforce `required: true` checkboxes in the web UI,
|
||||
# but the API bypasses form validation entirely — bots/scripts can open
|
||||
# issues with every box unchecked or skip the template altogether.
|
||||
#
|
||||
# Rules:
|
||||
# 0. No issue type -> close unless author is an org member
|
||||
# 1. No checkboxes at all -> close unless author is an org member or bot
|
||||
# 2. Checkboxes present but none checked -> close
|
||||
# 3. "Submission checklist" section incomplete -> close
|
||||
# 4. "Package (Required)" section has no selection -> close
|
||||
#
|
||||
# Org membership check reuses the shared helper from pr-labeler.js and
|
||||
# the same GitHub App used by tag-external-issues.yml.
|
||||
|
||||
name: Close Unchecked Issues
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [opened]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.issue.number }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
check-boxes:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
|
||||
- name: Generate GitHub App token
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3
|
||||
with:
|
||||
app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }}
|
||||
private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }}
|
||||
|
||||
- name: Validate issue checkboxes
|
||||
if: steps.app-token.outcome == 'success'
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
github-token: ${{ steps.app-token.outputs.token }}
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const issue_number = context.payload.issue.number;
|
||||
const body = context.payload.issue.body ?? '';
|
||||
const allChecked = (body.match(/- \[x\]/gi) || []).length;
|
||||
const allUnchecked = (body.match(/- \[ \]/g) || []).length;
|
||||
const total = allChecked + allUnchecked;
|
||||
|
||||
// ── Helpers ─────────────────────────────────────────────────
|
||||
// Extract checkboxes under a markdown H2/H3 heading.
|
||||
// Returns { checked, unchecked } counts, or null if the
|
||||
// section heading is not found in the body.
|
||||
function parseSection(heading) {
|
||||
const escaped = heading.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
// Find the heading line
|
||||
const headingRe = new RegExp(`^#{2,3}\\s+${escaped}\\s*$`, 'm');
|
||||
const headingMatch = headingRe.exec(body);
|
||||
if (!headingMatch) return null;
|
||||
// Slice from after the heading to the next heading or end
|
||||
const rest = body.slice(headingMatch.index + headingMatch[0].length);
|
||||
const nextHeading = rest.search(/\n#{2,3}\s/);
|
||||
const block = nextHeading === -1 ? rest : rest.slice(0, nextHeading);
|
||||
return {
|
||||
checked: (block.match(/- \[x\]/gi) || []).length,
|
||||
unchecked: (block.match(/- \[ \]/g) || []).length,
|
||||
};
|
||||
}
|
||||
|
||||
let _cachedMember;
|
||||
async function isOrgMember() {
|
||||
if (_cachedMember) return _cachedMember;
|
||||
const { h } = require('./.github/scripts/pr-labeler.js')
|
||||
.loadAndInit(github, owner, repo, core);
|
||||
const author = context.payload.sender.login;
|
||||
const { isExternal } = await h.checkMembership(
|
||||
author, context.payload.sender.type,
|
||||
);
|
||||
_cachedMember = { internal: !isExternal, author };
|
||||
return _cachedMember;
|
||||
}
|
||||
|
||||
async function closeWithComment(lines) {
|
||||
const templateUrl = `https://github.com/${owner}/${repo}/issues/new/choose`;
|
||||
lines.push(
|
||||
'',
|
||||
`Please use one of the [issue templates](${templateUrl}).`,
|
||||
);
|
||||
|
||||
// Post comment first so the author sees the reason even if
|
||||
// the subsequent close call fails.
|
||||
await github.rest.issues.createComment({
|
||||
owner, repo, issue_number,
|
||||
body: lines.join('\n'),
|
||||
});
|
||||
|
||||
await github.rest.issues.update({
|
||||
owner, repo, issue_number,
|
||||
state: 'closed',
|
||||
state_reason: 'not_planned',
|
||||
});
|
||||
}
|
||||
|
||||
// ── Rule 0: no issue type (API/CLI bypass) ──────────────────
|
||||
// Issue types are set automatically when using web UI templates.
|
||||
// External users cannot set issue types via the API (requires
|
||||
// write/triage permissions), so a missing type reliably indicates
|
||||
// programmatic submission.
|
||||
if (!context.payload.issue.type) {
|
||||
let membership;
|
||||
try {
|
||||
membership = await isOrgMember();
|
||||
} catch (e) {
|
||||
// Org membership check failed — skip Rule 0 and let
|
||||
// Rules 1-4 handle validation via checkboxes.
|
||||
core.warning(`Rule 0: org membership check failed, skipping: ${e.message}`);
|
||||
}
|
||||
if (membership?.internal) {
|
||||
console.log(`No issue type, but ${membership.author} is internal — OK`);
|
||||
} else if (membership) {
|
||||
console.log(`No issue type and ${membership.author} is external — closing`);
|
||||
await closeWithComment([
|
||||
'This issue was automatically closed because it appears to have been submitted programmatically — issue types are automatically set when using the GitHub web interface, and this issue has none.',
|
||||
'',
|
||||
'We do not allow automated issue submission at this time.',
|
||||
]);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Rule 1: no checkboxes at all ────────────────────────────
|
||||
if (total === 0) {
|
||||
const { internal, author } = await isOrgMember();
|
||||
if (internal) {
|
||||
console.log(`No checkboxes, but ${author} is internal — OK`);
|
||||
return;
|
||||
}
|
||||
console.log(`No checkboxes and ${author} is external — closing`);
|
||||
await closeWithComment([
|
||||
'This issue was automatically closed because no issue template was used.',
|
||||
]);
|
||||
return;
|
||||
}
|
||||
|
||||
// ── Rule 2: checkboxes present but none checked ─────────────
|
||||
if (allChecked === 0) {
|
||||
console.log(`${allUnchecked} checkbox(es) present, none checked — closing`);
|
||||
await closeWithComment([
|
||||
'This issue was automatically closed because none of the required checkboxes were checked. Please re-file using an issue template and complete the checklist.',
|
||||
]);
|
||||
return;
|
||||
}
|
||||
|
||||
// ── Rules 3–4: parse sections for targeted feedback ─────────
|
||||
const checklist = parseSection('Submission checklist');
|
||||
const pkg = parseSection('Package (Required)');
|
||||
console.log(`Section parse — checklist: ${JSON.stringify(checklist)}, pkg: ${JSON.stringify(pkg)}`);
|
||||
|
||||
const problems = [];
|
||||
|
||||
if (checklist && checklist.unchecked > 0) {
|
||||
problems.push(
|
||||
'the submission checklist is incomplete — please confirm you searched for duplicates, included a reproduction, etc.'
|
||||
);
|
||||
}
|
||||
if (pkg !== null && pkg.checked === 0) {
|
||||
problems.push(
|
||||
'no package was selected (e.g. langchain-core, langchain, langgraph) — this helps us route the issue to the right team'
|
||||
);
|
||||
} else if (pkg === null) {
|
||||
problems.push(
|
||||
'the package selection is missing (e.g. langchain-core, langchain, langgraph) — this helps us route the issue to the right team'
|
||||
);
|
||||
}
|
||||
|
||||
if (problems.length === 0) {
|
||||
console.log(`All section checks passed (${allChecked} checked) — OK`);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Closing — problems: ${problems.join('; ')}`);
|
||||
await closeWithComment([
|
||||
'Thanks for opening an issue! It was automatically closed because:',
|
||||
'',
|
||||
...problems.map(p => `- ${p}`),
|
||||
]);
|
||||
81
.github/workflows/codspeed.yml
vendored
81
.github/workflows/codspeed.yml
vendored
@@ -1,81 +0,0 @@
|
||||
# CodSpeed performance benchmarks.
|
||||
#
|
||||
# Runs benchmarks on changed packages and uploads results to CodSpeed.
|
||||
# Separated from the main CI workflow so that push-to-master baseline runs
|
||||
# are never cancelled by subsequent merges (cancel-in-progress is only
|
||||
# enabled for pull_request events).
|
||||
|
||||
name: "⚡ CodSpeed"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
pull_request:
|
||||
|
||||
# On PRs, cancel stale runs when new commits are pushed.
|
||||
# On push-to-master, never cancel — these runs populate CodSpeed baselines.
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event_name == 'push' && github.sha || github.ref }}
|
||||
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
UV_FROZEN: "true"
|
||||
UV_NO_SYNC: "true"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: "Detect Changes"
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'codspeed-ignore') }}
|
||||
steps:
|
||||
- name: "📋 Checkout Code"
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- name: "🐍 Setup Python 3.11"
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: "📂 Get Changed Files"
|
||||
id: files
|
||||
uses: Ana06/get-changed-files@25f79e676e7ea1868813e21465014798211fad8c # v2.3.0
|
||||
- name: "🔍 Analyze Changed Files"
|
||||
id: set-matrix
|
||||
run: |
|
||||
python -m pip install packaging requests
|
||||
python .github/scripts/check_diff.py ${{ steps.files.outputs.all }} >> $GITHUB_OUTPUT
|
||||
outputs:
|
||||
codspeed: ${{ steps.set-matrix.outputs.codspeed }}
|
||||
|
||||
benchmarks:
|
||||
name: "⚡ CodSpeed Benchmarks"
|
||||
needs: [build]
|
||||
if: ${{ needs.build.outputs.codspeed != '[]' }}
|
||||
runs-on: codspeed-macro
|
||||
strategy:
|
||||
matrix:
|
||||
job-configs: ${{ fromJson(needs.build.outputs.codspeed) }}
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
|
||||
- name: "📦 Install UV Package Manager"
|
||||
uses: astral-sh/setup-uv@0ca8f610542aa7f4acaf39e65cf4eb3c35091883 # v7
|
||||
with:
|
||||
# Pinned to 3.13.11 to work around CodSpeed walltime segfault on 3.13.12+
|
||||
# See: https://github.com/CodSpeedHQ/pytest-codspeed/issues/106
|
||||
python-version: "3.13.11"
|
||||
|
||||
- name: "📦 Install Test Dependencies"
|
||||
run: uv sync --group test
|
||||
working-directory: ${{ matrix.job-configs.working-directory }}
|
||||
|
||||
- name: "⚡ Run Benchmarks: ${{ matrix.job-configs.working-directory }}"
|
||||
uses: CodSpeedHQ/action@a50965600eafa04edcd6717761f55b77e52aafbd # v4
|
||||
with:
|
||||
token: ${{ secrets.CODSPEED_TOKEN }}
|
||||
run: |
|
||||
cd ${{ matrix.job-configs.working-directory }}
|
||||
uv run --no-sync pytest ./tests/benchmarks --codspeed
|
||||
mode: ${{ matrix.job-configs.codspeed-mode }}
|
||||
78
.github/workflows/integration_tests.yml
vendored
78
.github/workflows/integration_tests.yml
vendored
@@ -5,44 +5,17 @@
|
||||
# Runs daily with the option to trigger manually.
|
||||
|
||||
name: "⏰ Integration Tests"
|
||||
run-name: "Run Integration Tests - ${{ inputs.working-directory-override || (inputs.working-directory != 'all' && inputs.working-directory) || 'all libs' }} (Python ${{ inputs.python-version-override || '3.10, 3.13' }})"
|
||||
run-name: "Run Integration Tests - ${{ inputs.working-directory-force || 'all libs' }} (Python ${{ inputs.python-version-force || '3.10, 3.13' }})"
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
working-directory:
|
||||
type: choice
|
||||
description: "Library to test (select from dropdown)"
|
||||
default: "all"
|
||||
options:
|
||||
- "all"
|
||||
- "libs/core"
|
||||
- "libs/langchain"
|
||||
- "libs/langchain_v1"
|
||||
- "libs/text-splitters"
|
||||
- "libs/standard-tests"
|
||||
- "libs/model-profiles"
|
||||
- "libs/partners/anthropic"
|
||||
- "libs/partners/chroma"
|
||||
- "libs/partners/deepseek"
|
||||
- "libs/partners/exa"
|
||||
- "libs/partners/fireworks"
|
||||
- "libs/partners/groq"
|
||||
- "libs/partners/huggingface"
|
||||
- "libs/partners/mistralai"
|
||||
- "libs/partners/nomic"
|
||||
- "libs/partners/ollama"
|
||||
- "libs/partners/openai"
|
||||
- "libs/partners/openrouter"
|
||||
- "libs/partners/perplexity"
|
||||
- "libs/partners/qdrant"
|
||||
- "libs/partners/xai"
|
||||
working-directory-override:
|
||||
working-directory-force:
|
||||
type: string
|
||||
description: "Manual override — takes precedence over dropdown (e.g. libs/partners/partner-xyz)"
|
||||
python-version-override:
|
||||
description: "From which folder this pipeline executes - defaults to all in matrix - example value: libs/partners/anthropic"
|
||||
python-version-force:
|
||||
type: string
|
||||
description: "Python version override — defaults to 3.10 and 3.13 in matrix (e.g. 3.11)"
|
||||
description: "Python version to use - defaults to 3.10 and 3.13 in matrix - example value: 3.11"
|
||||
schedule:
|
||||
- cron: "0 13 * * *" # Runs daily at 1PM UTC (9AM EDT/6AM PDT)
|
||||
|
||||
@@ -79,32 +52,29 @@ jobs:
|
||||
id: set-matrix
|
||||
env:
|
||||
DEFAULT_LIBS: ${{ env.DEFAULT_LIBS }}
|
||||
WORKING_DIRECTORY_OVERRIDE: ${{ github.event.inputs.working-directory-override || '' }}
|
||||
WORKING_DIRECTORY_CHOICE: ${{ github.event.inputs.working-directory || 'all' }}
|
||||
PYTHON_VERSION_OVERRIDE: ${{ github.event.inputs.python-version-override || '' }}
|
||||
WORKING_DIRECTORY_FORCE: ${{ github.event.inputs.working-directory-force || '' }}
|
||||
PYTHON_VERSION_FORCE: ${{ github.event.inputs.python-version-force || '' }}
|
||||
run: |
|
||||
# echo "matrix=..." where matrix is a json formatted str with keys python-version and working-directory
|
||||
# python-version defaults to 3.10 and 3.13, overridden to [PYTHON_VERSION_OVERRIDE] if set
|
||||
# working-directory priority: override string > dropdown choice > DEFAULT_LIBS
|
||||
# python-version should default to 3.10 and 3.13, but is overridden to [PYTHON_VERSION_FORCE] if set
|
||||
# working-directory should default to DEFAULT_LIBS, but is overridden to [WORKING_DIRECTORY_FORCE] if set
|
||||
python_version='["3.10", "3.13"]'
|
||||
python_version_min_3_11='["3.11", "3.13"]'
|
||||
working_directory="$DEFAULT_LIBS"
|
||||
if [ -n "$PYTHON_VERSION_OVERRIDE" ]; then
|
||||
python_version="[\"$PYTHON_VERSION_OVERRIDE\"]"
|
||||
# Bound override version to >= 3.11 for packages requiring it
|
||||
if [ "$(echo "$PYTHON_VERSION_OVERRIDE >= 3.11" | bc -l)" -eq 1 ]; then
|
||||
python_version_min_3_11="[\"$PYTHON_VERSION_OVERRIDE\"]"
|
||||
if [ -n "$PYTHON_VERSION_FORCE" ]; then
|
||||
python_version="[\"$PYTHON_VERSION_FORCE\"]"
|
||||
# Bound forced version to >= 3.11 for packages requiring it
|
||||
if [ "$(echo "$PYTHON_VERSION_FORCE >= 3.11" | bc -l)" -eq 1 ]; then
|
||||
python_version_min_3_11="[\"$PYTHON_VERSION_FORCE\"]"
|
||||
else
|
||||
python_version_min_3_11='["3.11"]'
|
||||
fi
|
||||
fi
|
||||
if [ -n "$WORKING_DIRECTORY_OVERRIDE" ]; then
|
||||
working_directory="[\"$WORKING_DIRECTORY_OVERRIDE\"]"
|
||||
elif [ "$WORKING_DIRECTORY_CHOICE" != "all" ]; then
|
||||
working_directory="[\"$WORKING_DIRECTORY_CHOICE\"]"
|
||||
if [ -n "$WORKING_DIRECTORY_FORCE" ]; then
|
||||
working_directory="[\"$WORKING_DIRECTORY_FORCE\"]"
|
||||
fi
|
||||
matrix="{\"python-version\": $python_version, \"working-directory\": $working_directory}"
|
||||
echo "$matrix"
|
||||
echo $matrix
|
||||
echo "matrix=$matrix" >> $GITHUB_OUTPUT
|
||||
echo "python-version-min-3-11=$python_version_min_3_11" >> $GITHUB_OUTPUT
|
||||
|
||||
@@ -122,26 +92,26 @@ jobs:
|
||||
working-directory: ${{ fromJSON(needs.compute-matrix.outputs.matrix).working-directory }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
path: langchain
|
||||
|
||||
# These libraries exist outside of the monorepo and need to be checked out separately
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: langchain-ai/langchain-google
|
||||
path: langchain-google
|
||||
- name: "🔐 Authenticate to Google Cloud"
|
||||
id: "auth"
|
||||
uses: google-github-actions/auth@7c6bc770dae815cd3e89ee6cdf493a5fab2cc093 # v3
|
||||
uses: google-github-actions/auth@v3
|
||||
with:
|
||||
credentials_json: "${{ secrets.GOOGLE_CREDENTIALS }}"
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: langchain-ai/langchain-aws
|
||||
path: langchain-aws
|
||||
- name: "🔐 Configure AWS Credentials"
|
||||
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # v6
|
||||
uses: aws-actions/configure-aws-credentials@v6
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
@@ -264,11 +234,11 @@ jobs:
|
||||
path: libs/deepagents
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
path: langchain
|
||||
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: ${{ matrix.package.repo }}
|
||||
path: ${{ matrix.package.name }}
|
||||
|
||||
213
.github/workflows/pr_labeler.yml
vendored
213
.github/workflows/pr_labeler.yml
vendored
@@ -1,213 +0,0 @@
|
||||
# Unified PR labeler — applies size, file-based, title-based, and
|
||||
# contributor classification labels in a single sequential workflow.
|
||||
#
|
||||
# Consolidates pr_labeler_file.yml, pr_labeler_title.yml,
|
||||
# pr_size_labeler.yml, and PR-handling from tag-external-contributions.yml
|
||||
# into one workflow to eliminate race conditions from concurrent label
|
||||
# mutations. tag-external-issues.yml remains active for issue-only
|
||||
# labeling. Backfill lives in pr_labeler_backfill.yml.
|
||||
#
|
||||
# Config and shared logic live in .github/scripts/pr-labeler-config.json
|
||||
# and .github/scripts/pr-labeler.js — update those when adding partners.
|
||||
#
|
||||
# Setup Requirements:
|
||||
# 1. Create a GitHub App with permissions:
|
||||
# - Repository: Pull requests (write)
|
||||
# - Repository: Issues (write)
|
||||
# - Organization: Members (read)
|
||||
# 2. Install the app on your organization and this repository
|
||||
# 3. Add these repository secrets:
|
||||
# - ORG_MEMBERSHIP_APP_ID: Your app's ID
|
||||
# - ORG_MEMBERSHIP_APP_PRIVATE_KEY: Your app's private key
|
||||
#
|
||||
# The GitHub App token is required to check private organization membership
|
||||
# and to propagate label events to downstream workflows.
|
||||
|
||||
name: "🏷️ PR Labeler"
|
||||
|
||||
on:
|
||||
# Safe since we're not checking out or running the PR's code.
|
||||
# NEVER CHECK OUT UNTRUSTED CODE FROM A PR's HEAD IN A pull_request_target JOB.
|
||||
# Doing so would allow attackers to execute arbitrary code in the context of your repository.
|
||||
pull_request_target:
|
||||
types: [opened, synchronize, reopened, edited]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
# Separate opened events so external/tier labels are never lost to cancellation
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}-${{ github.event.action == 'opened' && 'opened' || 'update' }}
|
||||
cancel-in-progress: ${{ github.event.action != 'opened' }}
|
||||
|
||||
jobs:
|
||||
label:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
issues: write
|
||||
|
||||
steps:
|
||||
# Checks out the BASE branch (safe for pull_request_target — never
|
||||
# the PR head). Needed to load .github/scripts/pr-labeler*.
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
|
||||
- name: Generate GitHub App token
|
||||
if: github.event.action == 'opened'
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3
|
||||
with:
|
||||
app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }}
|
||||
private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }}
|
||||
|
||||
- name: Verify App token
|
||||
if: github.event.action == 'opened'
|
||||
run: |
|
||||
if [ -z "${{ steps.app-token.outputs.token }}" ]; then
|
||||
echo "::error::GitHub App token generation failed — cannot classify contributor"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Check org membership
|
||||
if: github.event.action == 'opened'
|
||||
id: check-membership
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
github-token: ${{ steps.app-token.outputs.token }}
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
|
||||
|
||||
const author = context.payload.sender.login;
|
||||
const { isExternal } = await h.checkMembership(
|
||||
author, context.payload.sender.type,
|
||||
);
|
||||
core.setOutput('is-external', isExternal ? 'true' : 'false');
|
||||
|
||||
- name: Apply PR labels
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
env:
|
||||
IS_EXTERNAL: ${{ steps.check-membership.outputs.is-external }}
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
|
||||
|
||||
const pr = context.payload.pull_request;
|
||||
if (!pr) return;
|
||||
const prNumber = pr.number;
|
||||
const action = context.payload.action;
|
||||
|
||||
const toAdd = new Set();
|
||||
const toRemove = new Set();
|
||||
|
||||
const currentLabels = (await github.paginate(
|
||||
github.rest.issues.listLabelsOnIssue,
|
||||
{ owner, repo, issue_number: prNumber, per_page: 100 },
|
||||
)).map(l => l.name ?? '');
|
||||
|
||||
// ── Size + file labels (skip on 'edited' — files unchanged) ──
|
||||
if (action !== 'edited') {
|
||||
for (const sl of h.sizeLabels) await h.ensureLabel(sl);
|
||||
|
||||
const files = await github.paginate(github.rest.pulls.listFiles, {
|
||||
owner, repo, pull_number: prNumber, per_page: 100,
|
||||
});
|
||||
|
||||
const { totalChanged, sizeLabel } = h.computeSize(files);
|
||||
toAdd.add(sizeLabel);
|
||||
for (const sl of h.sizeLabels) {
|
||||
if (currentLabels.includes(sl) && sl !== sizeLabel) toRemove.add(sl);
|
||||
}
|
||||
console.log(`Size: ${totalChanged} changed lines → ${sizeLabel}`);
|
||||
|
||||
for (const label of h.matchFileLabels(files)) {
|
||||
toAdd.add(label);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Title-based labels ──
|
||||
const { labels: titleLabels, typeLabel } = h.matchTitleLabels(pr.title || '');
|
||||
for (const label of titleLabels) toAdd.add(label);
|
||||
|
||||
// Remove stale type labels only when a type was detected
|
||||
if (typeLabel) {
|
||||
for (const tl of h.allTypeLabels) {
|
||||
if (currentLabels.includes(tl) && !titleLabels.has(tl)) toRemove.add(tl);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Internal label (only on open, non-external contributors) ──
|
||||
// IS_EXTERNAL is empty string on non-opened events (step didn't
|
||||
// run), so this guard is only true for opened + internal.
|
||||
if (action === 'opened' && process.env.IS_EXTERNAL === 'false') {
|
||||
toAdd.add('internal');
|
||||
}
|
||||
|
||||
// ── Apply changes ──
|
||||
// Ensure all labels we're about to add exist (addLabels returns
|
||||
// 422 if any label in the batch is missing, which would prevent
|
||||
// ALL labels from being applied).
|
||||
for (const name of toAdd) {
|
||||
await h.ensureLabel(name);
|
||||
}
|
||||
|
||||
for (const name of toRemove) {
|
||||
if (toAdd.has(name)) continue;
|
||||
try {
|
||||
await github.rest.issues.removeLabel({
|
||||
owner, repo, issue_number: prNumber, name,
|
||||
});
|
||||
} catch (e) {
|
||||
if (e.status !== 404) throw e;
|
||||
}
|
||||
}
|
||||
|
||||
const addList = [...toAdd];
|
||||
if (addList.length > 0) {
|
||||
await github.rest.issues.addLabels({
|
||||
owner, repo, issue_number: prNumber, labels: addList,
|
||||
});
|
||||
}
|
||||
|
||||
const removed = [...toRemove].filter(r => !toAdd.has(r));
|
||||
console.log(`PR #${prNumber}: +[${addList.join(', ')}] -[${removed.join(', ')}]`);
|
||||
|
||||
# Apply tier label BEFORE the external label so that
|
||||
# "trusted-contributor" is already present when the "external" labeled
|
||||
# event fires and triggers require_issue_link.yml.
|
||||
- name: Apply contributor tier label
|
||||
if: github.event.action == 'opened' && steps.check-membership.outputs.is-external == 'true'
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
github-token: ${{ steps.app-token.outputs.token }}
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
|
||||
|
||||
const pr = context.payload.pull_request;
|
||||
await h.applyTierLabel(pr.number, pr.user.login);
|
||||
|
||||
- name: Add external label
|
||||
if: github.event.action == 'opened' && steps.check-membership.outputs.is-external == 'true'
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
# Use App token so the "labeled" event propagates to downstream
|
||||
# workflows (e.g. require_issue_link.yml). Events created by the
|
||||
# default GITHUB_TOKEN do not trigger additional workflow runs.
|
||||
github-token: ${{ steps.app-token.outputs.token }}
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const prNumber = context.payload.pull_request.number;
|
||||
|
||||
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
|
||||
|
||||
await h.ensureLabel('external');
|
||||
await github.rest.issues.addLabels({
|
||||
owner, repo,
|
||||
issue_number: prNumber,
|
||||
labels: ['external'],
|
||||
});
|
||||
console.log(`Added 'external' label to PR #${prNumber}`);
|
||||
130
.github/workflows/pr_labeler_backfill.yml
vendored
130
.github/workflows/pr_labeler_backfill.yml
vendored
@@ -1,130 +0,0 @@
|
||||
# Backfill PR labels on all open PRs.
|
||||
#
|
||||
# Manual-only workflow that applies the same labels as pr_labeler.yml
|
||||
# (size, file, title, contributor classification) to existing open PRs.
|
||||
# Reuses shared logic from .github/scripts/pr-labeler.js.
|
||||
|
||||
name: "🏷️ PR Labeler Backfill"
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
max_items:
|
||||
description: "Maximum number of open PRs to process"
|
||||
default: "100"
|
||||
type: string
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
backfill:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
issues: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
|
||||
- name: Generate GitHub App token
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3
|
||||
with:
|
||||
app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }}
|
||||
private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }}
|
||||
|
||||
- name: Backfill labels on open PRs
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
github-token: ${{ steps.app-token.outputs.token }}
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const rawMax = '${{ inputs.max_items }}';
|
||||
const maxItems = parseInt(rawMax, 10);
|
||||
if (isNaN(maxItems) || maxItems <= 0) {
|
||||
core.setFailed(`Invalid max_items: "${rawMax}" — must be a positive integer`);
|
||||
return;
|
||||
}
|
||||
|
||||
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
|
||||
|
||||
for (const name of [...h.sizeLabels, ...h.tierLabels]) {
|
||||
await h.ensureLabel(name);
|
||||
}
|
||||
|
||||
const contributorCache = new Map();
|
||||
const fileRules = h.buildFileRules();
|
||||
|
||||
const prs = await github.paginate(github.rest.pulls.list, {
|
||||
owner, repo, state: 'open', per_page: 100,
|
||||
});
|
||||
|
||||
let processed = 0;
|
||||
let failures = 0;
|
||||
for (const pr of prs) {
|
||||
if (processed >= maxItems) break;
|
||||
try {
|
||||
const author = pr.user.login;
|
||||
const info = await h.getContributorInfo(contributorCache, author, pr.user.type);
|
||||
const labels = new Set();
|
||||
|
||||
labels.add(info.isExternal ? 'external' : 'internal');
|
||||
if (info.isExternal && info.mergedCount != null && info.mergedCount >= h.trustedThreshold) {
|
||||
labels.add('trusted-contributor');
|
||||
} else if (info.isExternal && info.mergedCount === 0) {
|
||||
labels.add('new-contributor');
|
||||
}
|
||||
|
||||
// Size + file labels
|
||||
const files = await github.paginate(github.rest.pulls.listFiles, {
|
||||
owner, repo, pull_number: pr.number, per_page: 100,
|
||||
});
|
||||
const { sizeLabel } = h.computeSize(files);
|
||||
labels.add(sizeLabel);
|
||||
|
||||
for (const label of h.matchFileLabels(files, fileRules)) {
|
||||
labels.add(label);
|
||||
}
|
||||
|
||||
// Title labels
|
||||
const { labels: titleLabels } = h.matchTitleLabels(pr.title ?? '');
|
||||
for (const tl of titleLabels) labels.add(tl);
|
||||
|
||||
// Ensure all labels exist before batch add
|
||||
for (const name of labels) {
|
||||
await h.ensureLabel(name);
|
||||
}
|
||||
|
||||
// Remove stale managed labels
|
||||
const currentLabels = (await github.paginate(
|
||||
github.rest.issues.listLabelsOnIssue,
|
||||
{ owner, repo, issue_number: pr.number, per_page: 100 },
|
||||
)).map(l => l.name ?? '');
|
||||
|
||||
const managed = [...h.sizeLabels, ...h.tierLabels, ...h.allTypeLabels];
|
||||
for (const name of currentLabels) {
|
||||
if (managed.includes(name) && !labels.has(name)) {
|
||||
try {
|
||||
await github.rest.issues.removeLabel({
|
||||
owner, repo, issue_number: pr.number, name,
|
||||
});
|
||||
} catch (e) {
|
||||
if (e.status !== 404) throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
await github.rest.issues.addLabels({
|
||||
owner, repo, issue_number: pr.number, labels: [...labels],
|
||||
});
|
||||
console.log(`PR #${pr.number} (${author}): ${[...labels].join(', ')}`);
|
||||
processed++;
|
||||
} catch (e) {
|
||||
failures++;
|
||||
core.warning(`Failed to process PR #${pr.number}: ${e.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\nBackfill complete. Processed ${processed} PRs, ${failures} failures. ${contributorCache.size} unique authors.`);
|
||||
28
.github/workflows/pr_labeler_file.yml
vendored
Normal file
28
.github/workflows/pr_labeler_file.yml
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
# Label PRs based on changed files.
|
||||
#
|
||||
# See `.github/pr-file-labeler.yml` to see rules for each label/directory.
|
||||
|
||||
name: "🏷️ Pull Request Labeler"
|
||||
|
||||
on:
|
||||
# Safe since we're not checking out or running the PR's code
|
||||
# Never check out the PR's head in a pull_request_target job
|
||||
pull_request_target:
|
||||
types: [opened, synchronize, reopened]
|
||||
|
||||
jobs:
|
||||
labeler:
|
||||
name: "label"
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
issues: write
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Label Pull Request
|
||||
uses: actions/labeler@v6
|
||||
with:
|
||||
repo-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||
configuration-path: .github/pr-file-labeler.yml
|
||||
sync-labels: false
|
||||
44
.github/workflows/pr_labeler_title.yml
vendored
Normal file
44
.github/workflows/pr_labeler_title.yml
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
# Label PRs based on their titles.
|
||||
#
|
||||
# Uses conventional commit types from PR titles to apply labels.
|
||||
# Note: Scope-based labeling (e.g., integration labels) is handled by pr_labeler_file.yml
|
||||
|
||||
name: "🏷️ PR Title Labeler"
|
||||
|
||||
on:
|
||||
# Safe since we're not checking out or running the PR's code
|
||||
# Never check out the PR's head in a pull_request_target job
|
||||
pull_request_target:
|
||||
types: [opened, edited]
|
||||
|
||||
jobs:
|
||||
pr-title-labeler:
|
||||
name: "label"
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
issues: write
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Label PR based on title
|
||||
uses: bcoe/conventional-release-labels@v1
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
type_labels: >-
|
||||
{
|
||||
"feat": "feature",
|
||||
"fix": "fix",
|
||||
"docs": "documentation",
|
||||
"style": "linting",
|
||||
"refactor": "refactor",
|
||||
"perf": "performance",
|
||||
"test": "tests",
|
||||
"build": "infra",
|
||||
"ci": "infra",
|
||||
"chore": "infra",
|
||||
"revert": "revert",
|
||||
"release": "release",
|
||||
"breaking": "breaking"
|
||||
}
|
||||
ignored_types: '[]'
|
||||
16
.github/workflows/pr_lint.yml
vendored
16
.github/workflows/pr_lint.yml
vendored
@@ -25,13 +25,12 @@
|
||||
# * chore — other changes that don't modify source or test files
|
||||
# * revert — reverts a previous commit
|
||||
# * release — prepare a new release
|
||||
# * hotfix — urgent fix
|
||||
#
|
||||
# Allowed Scope(s) (optional):
|
||||
# core, langchain, langchain-classic, model-profiles,
|
||||
# standard-tests, text-splitters, docs, anthropic, chroma, deepseek, exa,
|
||||
# fireworks, groq, huggingface, mistralai, nomic, ollama, openai,
|
||||
# perplexity, qdrant, xai, infra, deps, partners
|
||||
# perplexity, qdrant, xai, infra, deps
|
||||
#
|
||||
# Multiple scopes can be used by separating them with a comma. For example:
|
||||
#
|
||||
@@ -66,17 +65,8 @@ jobs:
|
||||
name: "validate format"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: "🚫 Reject empty scope"
|
||||
env:
|
||||
PR_TITLE: ${{ github.event.pull_request.title }}
|
||||
run: |
|
||||
if [[ "$PR_TITLE" =~ ^[a-z]+\(\)[!]?: ]]; then
|
||||
echo "::error::PR title has empty scope parentheses: '$PR_TITLE'"
|
||||
echo "Either remove the parentheses or provide a scope (e.g., 'fix(core): ...')."
|
||||
exit 1
|
||||
fi
|
||||
- name: "✅ Validate Conventional Commits Format"
|
||||
uses: amannn/action-semantic-pull-request@48f256284bd46cdaab1048c3721360e808335d50 # v6
|
||||
uses: amannn/action-semantic-pull-request@v6
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
@@ -93,7 +83,6 @@ jobs:
|
||||
chore
|
||||
revert
|
||||
release
|
||||
hotfix
|
||||
scopes: |
|
||||
core
|
||||
langchain
|
||||
@@ -119,7 +108,6 @@ jobs:
|
||||
xai
|
||||
infra
|
||||
deps
|
||||
partners
|
||||
requireScope: false
|
||||
disallowScopes: |
|
||||
release
|
||||
|
||||
96
.github/workflows/refresh_model_profiles.yml
vendored
96
.github/workflows/refresh_model_profiles.yml
vendored
@@ -18,28 +18,76 @@ permissions:
|
||||
|
||||
jobs:
|
||||
refresh-profiles:
|
||||
uses: ./.github/workflows/_refresh_model_profiles.yml
|
||||
with:
|
||||
providers: >-
|
||||
[
|
||||
{"provider":"anthropic", "data_dir":"libs/partners/anthropic/langchain_anthropic/data"},
|
||||
{"provider":"deepseek", "data_dir":"libs/partners/deepseek/langchain_deepseek/data"},
|
||||
{"provider":"fireworks-ai", "data_dir":"libs/partners/fireworks/langchain_fireworks/data"},
|
||||
{"provider":"groq", "data_dir":"libs/partners/groq/langchain_groq/data"},
|
||||
{"provider":"huggingface", "data_dir":"libs/partners/huggingface/langchain_huggingface/data"},
|
||||
{"provider":"mistral", "data_dir":"libs/partners/mistralai/langchain_mistralai/data"},
|
||||
{"provider":"openai", "data_dir":"libs/partners/openai/langchain_openai/data"},
|
||||
{"provider":"openrouter", "data_dir":"libs/partners/openrouter/langchain_openrouter/data"},
|
||||
{"provider":"perplexity", "data_dir":"libs/partners/perplexity/langchain_perplexity/data"},
|
||||
{"provider":"xai", "data_dir":"libs/partners/xai/langchain_xai/data"}
|
||||
]
|
||||
cli-path: libs/model-profiles
|
||||
add-paths: libs/partners/**/data/_profiles.py
|
||||
pr-body: |
|
||||
Automated refresh of model profile data for all in-monorepo partner
|
||||
integrations via `langchain-profiles refresh`.
|
||||
name: "refresh all partner profiles"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: "📋 Checkout"
|
||||
uses: actions/checkout@v6
|
||||
|
||||
🤖 Generated by the `refresh_model_profiles` workflow.
|
||||
secrets:
|
||||
MODEL_PROFILE_BOT_APP_ID: ${{ secrets.MODEL_PROFILE_BOT_APP_ID }}
|
||||
MODEL_PROFILE_BOT_PRIVATE_KEY: ${{ secrets.MODEL_PROFILE_BOT_PRIVATE_KEY }}
|
||||
- name: "🐍 Set up Python + uv"
|
||||
uses: ./.github/actions/uv_setup
|
||||
with:
|
||||
python-version: "3.12"
|
||||
working-directory: libs/model-profiles
|
||||
|
||||
- name: "📦 Install langchain-profiles CLI"
|
||||
working-directory: libs/model-profiles
|
||||
run: uv sync
|
||||
|
||||
- name: "🔄 Refresh profiles"
|
||||
working-directory: libs/model-profiles
|
||||
run: |
|
||||
declare -A PROVIDERS=(
|
||||
[anthropic]=anthropic
|
||||
[deepseek]=deepseek
|
||||
[fireworks]=fireworks-ai
|
||||
[groq]=groq
|
||||
[huggingface]=huggingface
|
||||
[mistralai]=mistral
|
||||
[openai]=openai
|
||||
[openrouter]=openrouter
|
||||
[perplexity]=perplexity
|
||||
[xai]=xai
|
||||
)
|
||||
|
||||
for partner in "${!PROVIDERS[@]}"; do
|
||||
provider="${PROVIDERS[$partner]}"
|
||||
data_dir="../../libs/partners/${partner}/langchain_${partner//-/_}/data"
|
||||
echo "--- Refreshing ${partner} (provider: ${provider}) ---"
|
||||
echo y | uv run langchain-profiles refresh \
|
||||
--provider "$provider" \
|
||||
--data-dir "$data_dir"
|
||||
done
|
||||
|
||||
- name: "🔑 Generate GitHub App token"
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@v2
|
||||
with:
|
||||
app-id: ${{ secrets.MODEL_PROFILE_BOT_APP_ID }}
|
||||
private-key: ${{ secrets.MODEL_PROFILE_BOT_PRIVATE_KEY }}
|
||||
|
||||
- name: "🔀 Create pull request"
|
||||
id: create-pr
|
||||
uses: peter-evans/create-pull-request@v8
|
||||
with:
|
||||
token: ${{ steps.app-token.outputs.token }}
|
||||
branch: bot/refresh-model-profiles
|
||||
commit-message: "chore(model-profiles): refresh model profile data"
|
||||
title: "chore(model-profiles): refresh model profile data"
|
||||
body: |
|
||||
Automated refresh of model profile data for all in-monorepo partner
|
||||
integrations via `langchain-profiles refresh`.
|
||||
|
||||
🤖 Generated by the `refresh_model_profiles` workflow.
|
||||
labels: bot
|
||||
add-paths: libs/partners/**/data/_profiles.py
|
||||
|
||||
- name: "📝 Summary"
|
||||
run: |
|
||||
op="${{ steps.create-pr.outputs.pull-request-operation }}"
|
||||
url="${{ steps.create-pr.outputs.pull-request-url }}"
|
||||
if [ "$op" = "created" ] || [ "$op" = "updated" ]; then
|
||||
echo "### ✅ PR ${op}: ${url}" >> "$GITHUB_STEP_SUMMARY"
|
||||
else
|
||||
echo "### ⏭️ Skipped: profiles already up to date" >> "$GITHUB_STEP_SUMMARY"
|
||||
fi
|
||||
|
||||
195
.github/workflows/reopen_on_assignment.yml
vendored
195
.github/workflows/reopen_on_assignment.yml
vendored
@@ -1,195 +0,0 @@
|
||||
# Reopen PRs that were auto-closed by require_issue_link.yml when the
|
||||
# contributor was not assigned to the linked issue. When a maintainer
|
||||
# assigns the contributor to the issue, this workflow finds matching
|
||||
# closed PRs, verifies the issue link, and reopens them.
|
||||
#
|
||||
# Uses the default GITHUB_TOKEN (not a PAT or app token) so that the
|
||||
# reopen and label-removal events do NOT re-trigger other workflows.
|
||||
# GitHub suppresses events created by the default GITHUB_TOKEN within
|
||||
# workflow runs to prevent infinite loops.
|
||||
|
||||
name: Reopen PR on Issue Assignment
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [assigned]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
reopen-linked-prs:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
actions: write
|
||||
pull-requests: write
|
||||
|
||||
steps:
|
||||
- name: Find and reopen matching PRs
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const issueNumber = context.payload.issue.number;
|
||||
const assignee = context.payload.assignee.login;
|
||||
|
||||
console.log(
|
||||
`Issue #${issueNumber} assigned to ${assignee} — searching for closed PRs to reopen`,
|
||||
);
|
||||
|
||||
const q = [
|
||||
`is:pr`,
|
||||
`is:closed`,
|
||||
`author:${assignee}`,
|
||||
`label:missing-issue-link`,
|
||||
`repo:${owner}/${repo}`,
|
||||
].join(' ');
|
||||
|
||||
let data;
|
||||
try {
|
||||
({ data } = await github.rest.search.issuesAndPullRequests({
|
||||
q,
|
||||
per_page: 30,
|
||||
}));
|
||||
} catch (e) {
|
||||
throw new Error(
|
||||
`Failed to search for closed PRs to reopen after assigning ${assignee} ` +
|
||||
`to #${issueNumber} (HTTP ${e.status ?? 'unknown'}): ${e.message}`,
|
||||
);
|
||||
}
|
||||
|
||||
if (data.total_count === 0) {
|
||||
console.log('No matching closed PRs found');
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Found ${data.total_count} candidate PR(s)`);
|
||||
|
||||
// Must stay in sync with the identical pattern in require_issue_link.yml
|
||||
const pattern = /(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)\s*#(\d+)/gi;
|
||||
|
||||
for (const item of data.items) {
|
||||
const prNumber = item.number;
|
||||
const body = item.body || '';
|
||||
const matches = [...body.matchAll(pattern)];
|
||||
const referencedIssues = matches.map(m => parseInt(m[1], 10));
|
||||
|
||||
if (!referencedIssues.includes(issueNumber)) {
|
||||
console.log(`PR #${prNumber} does not reference #${issueNumber} — skipping`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip if already bypassed
|
||||
const labels = item.labels.map(l => l.name);
|
||||
if (labels.includes('bypass-issue-check')) {
|
||||
console.log(`PR #${prNumber} already has bypass-issue-check — skipping`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Reopen first, remove label second — a closed PR that still has
|
||||
// missing-issue-link is recoverable; a closed PR with the label
|
||||
// stripped is invisible to both workflows.
|
||||
try {
|
||||
await github.rest.pulls.update({
|
||||
owner,
|
||||
repo,
|
||||
pull_number: prNumber,
|
||||
state: 'open',
|
||||
});
|
||||
console.log(`Reopened PR #${prNumber}`);
|
||||
} catch (e) {
|
||||
if (e.status === 422) {
|
||||
// Head branch deleted — PR is unrecoverable. Notify the
|
||||
// contributor so they know to open a new PR.
|
||||
core.warning(`Cannot reopen PR #${prNumber}: head branch was likely deleted`);
|
||||
try {
|
||||
await github.rest.issues.createComment({
|
||||
owner,
|
||||
repo,
|
||||
issue_number: prNumber,
|
||||
body:
|
||||
`You have been assigned to #${issueNumber}, but this PR could not be ` +
|
||||
`reopened because the head branch has been deleted. Please open a new ` +
|
||||
`PR referencing the issue.`,
|
||||
});
|
||||
} catch (commentErr) {
|
||||
core.warning(
|
||||
`Also failed to post comment on PR #${prNumber}: ${commentErr.message}`,
|
||||
);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// Transient errors (rate limit, 5xx) should fail the job so
|
||||
// the label is NOT removed and the run can be retried.
|
||||
throw e;
|
||||
}
|
||||
|
||||
// Remove missing-issue-link label only after successful reopen
|
||||
try {
|
||||
await github.rest.issues.removeLabel({
|
||||
owner,
|
||||
repo,
|
||||
issue_number: prNumber,
|
||||
name: 'missing-issue-link',
|
||||
});
|
||||
console.log(`Removed missing-issue-link from PR #${prNumber}`);
|
||||
} catch (e) {
|
||||
if (e.status !== 404) throw e;
|
||||
}
|
||||
|
||||
// Minimize stale enforcement comment (best-effort;
|
||||
// sync w/ require_issue_link.yml minimize blocks)
|
||||
try {
|
||||
const marker = '<!-- require-issue-link -->';
|
||||
const comments = await github.paginate(
|
||||
github.rest.issues.listComments,
|
||||
{ owner, repo, issue_number: prNumber, per_page: 100 },
|
||||
);
|
||||
const stale = comments.find(c => c.body && c.body.includes(marker));
|
||||
if (stale) {
|
||||
await github.graphql(`
|
||||
mutation($id: ID!) {
|
||||
minimizeComment(input: {subjectId: $id, classifier: OUTDATED}) {
|
||||
minimizedComment { isMinimized }
|
||||
}
|
||||
}
|
||||
`, { id: stale.node_id });
|
||||
console.log(`Minimized stale enforcement comment ${stale.id} as outdated`);
|
||||
}
|
||||
} catch (e) {
|
||||
core.warning(`Could not minimize stale comment on PR #${prNumber}: ${e.message}`);
|
||||
}
|
||||
|
||||
// Re-run the failed require_issue_link check so it picks up the
|
||||
// new assignment. The re-run uses the original event payload but
|
||||
// fetches live issue data, so the assignment check will pass.
|
||||
//
|
||||
// Limitation: we look up runs by the PR's current head SHA. If the
|
||||
// contributor pushed new commits while the PR was closed, head.sha
|
||||
// won't match the SHA of the original failed run and the query will
|
||||
// return 0 results. This is acceptable because any push after reopen
|
||||
// triggers a fresh require_issue_link run against the new SHA.
|
||||
try {
|
||||
const { data: pr } = await github.rest.pulls.get({
|
||||
owner, repo, pull_number: prNumber,
|
||||
});
|
||||
const { data: runs } = await github.rest.actions.listWorkflowRuns({
|
||||
owner, repo,
|
||||
workflow_id: 'require_issue_link.yml',
|
||||
head_sha: pr.head.sha,
|
||||
status: 'failure',
|
||||
per_page: 1,
|
||||
});
|
||||
if (runs.workflow_runs.length > 0) {
|
||||
await github.rest.actions.reRunWorkflowFailedJobs({
|
||||
owner, repo,
|
||||
run_id: runs.workflow_runs[0].id,
|
||||
});
|
||||
console.log(`Re-ran failed require_issue_link run ${runs.workflow_runs[0].id} for PR #${prNumber}`);
|
||||
} else {
|
||||
console.log(`No failed require_issue_link runs found for PR #${prNumber} — skipping re-run`);
|
||||
}
|
||||
} catch (e) {
|
||||
core.warning(`Could not re-run require_issue_link check for PR #${prNumber} (HTTP ${e.status ?? 'unknown'}): ${e.message}`);
|
||||
}
|
||||
}
|
||||
467
.github/workflows/require_issue_link.yml
vendored
467
.github/workflows/require_issue_link.yml
vendored
@@ -1,467 +0,0 @@
|
||||
# Require external PRs to reference an approved issue (e.g. Fixes #NNN) and
|
||||
# the PR author to be assigned to that issue. On failure the PR is
|
||||
# labeled "missing-issue-link", commented on, and closed.
|
||||
#
|
||||
# Maintainer override: an org member can reopen the PR or remove
|
||||
# "missing-issue-link" — both add "bypass-issue-check" and reopen.
|
||||
#
|
||||
# Dependency: pr_labeler.yml must apply the "external" label first. This
|
||||
# workflow does NOT trigger on "opened" (new PRs have no labels yet, so the
|
||||
# gate would always skip).
|
||||
|
||||
name: Require Issue Link
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
# NEVER CHECK OUT UNTRUSTED CODE FROM A PR's HEAD IN A pull_request_target JOB.
|
||||
# Doing so would allow attackers to execute arbitrary code in the context of your repository.
|
||||
types: [edited, reopened, labeled, unlabeled]
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
# Enforcement gate: set to 'true' to activate the issue link requirement.
|
||||
# When 'false', the workflow still runs the check logic (useful for dry-run
|
||||
# visibility) but will NOT label, comment, close, or fail PRs.
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
env:
|
||||
ENFORCE_ISSUE_LINK: "true"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
check-issue-link:
|
||||
# Run when the "external" label is added, on edit/reopen if already labeled,
|
||||
# or when "missing-issue-link" is removed (triggers maintainer override check).
|
||||
# Skip entirely when the PR already carries "trusted-contributor" or
|
||||
# "bypass-issue-check".
|
||||
if: >-
|
||||
!contains(github.event.pull_request.labels.*.name, 'trusted-contributor') &&
|
||||
!contains(github.event.pull_request.labels.*.name, 'bypass-issue-check') &&
|
||||
(
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'external') ||
|
||||
(github.event.action == 'unlabeled' && github.event.label.name == 'missing-issue-link' && contains(github.event.pull_request.labels.*.name, 'external')) ||
|
||||
(github.event.action != 'labeled' && github.event.action != 'unlabeled' && contains(github.event.pull_request.labels.*.name, 'external'))
|
||||
)
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
actions: write
|
||||
pull-requests: write
|
||||
|
||||
steps:
|
||||
- name: Check for issue link and assignee
|
||||
id: check-link
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const prNumber = context.payload.pull_request.number;
|
||||
const action = context.payload.action;
|
||||
|
||||
// ── Helper: ensure a label exists, then add it to the PR ────────
|
||||
async function ensureAndAddLabel(labelName, color) {
|
||||
try {
|
||||
await github.rest.issues.getLabel({ owner, repo, name: labelName });
|
||||
} catch (e) {
|
||||
if (e.status !== 404) throw e;
|
||||
try {
|
||||
await github.rest.issues.createLabel({ owner, repo, name: labelName, color });
|
||||
} catch (createErr) {
|
||||
// 422 = label was created by a concurrent run between our
|
||||
// GET and POST — safe to ignore.
|
||||
if (createErr.status !== 422) throw createErr;
|
||||
}
|
||||
}
|
||||
await github.rest.issues.addLabels({
|
||||
owner, repo, issue_number: prNumber, labels: [labelName],
|
||||
});
|
||||
}
|
||||
|
||||
// ── Helper: check if the user who triggered this event (reopened
|
||||
// the PR / removed the label) has write+ access on the repo ───
|
||||
// Uses the repo collaborator permission endpoint instead of the
|
||||
// org membership endpoint. The org endpoint requires the caller
|
||||
// to be an org member, which GITHUB_TOKEN (an app installation
|
||||
// token) never is — so it always returns 403.
|
||||
async function senderIsOrgMember() {
|
||||
const sender = context.payload.sender?.login;
|
||||
if (!sender) {
|
||||
throw new Error('Event has no sender — cannot check permissions');
|
||||
}
|
||||
try {
|
||||
const { data } = await github.rest.repos.getCollaboratorPermissionLevel({
|
||||
owner, repo, username: sender,
|
||||
});
|
||||
const perm = data.permission;
|
||||
if (['admin', 'maintain', 'write'].includes(perm)) {
|
||||
console.log(`${sender} has ${perm} permission — treating as maintainer`);
|
||||
return { isMember: true, login: sender };
|
||||
}
|
||||
console.log(`${sender} has ${perm} permission — not a maintainer`);
|
||||
return { isMember: false, login: sender };
|
||||
} catch (e) {
|
||||
if (e.status === 404) {
|
||||
console.log(`Cannot check permissions for ${sender} — treating as non-maintainer`);
|
||||
return { isMember: false, login: sender };
|
||||
}
|
||||
const status = e.status ?? 'unknown';
|
||||
throw new Error(
|
||||
`Permission check failed for ${sender} (HTTP ${status}): ${e.message}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Helper: apply maintainer bypass (shared by both override paths) ──
|
||||
async function applyMaintainerBypass(reason) {
|
||||
console.log(reason);
|
||||
|
||||
// Remove missing-issue-link if present
|
||||
try {
|
||||
await github.rest.issues.removeLabel({
|
||||
owner, repo, issue_number: prNumber, name: 'missing-issue-link',
|
||||
});
|
||||
} catch (e) {
|
||||
if (e.status !== 404) throw e;
|
||||
}
|
||||
|
||||
// Reopen before adding bypass label — a failed reopen is more
|
||||
// actionable than a closed PR with a bypass label stuck on it.
|
||||
if (context.payload.pull_request.state === 'closed') {
|
||||
try {
|
||||
await github.rest.pulls.update({
|
||||
owner, repo, pull_number: prNumber, state: 'open',
|
||||
});
|
||||
console.log(`Reopened PR #${prNumber}`);
|
||||
} catch (e) {
|
||||
// 422 if head branch deleted; 403 if permissions insufficient.
|
||||
// Bypass labels still apply — maintainer can reopen manually.
|
||||
core.warning(
|
||||
`Could not reopen PR #${prNumber} (HTTP ${e.status ?? 'unknown'}): ${e.message}. ` +
|
||||
`Bypass labels were applied — a maintainer may need to reopen manually.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Add bypass-issue-check so future triggers skip enforcement
|
||||
await ensureAndAddLabel('bypass-issue-check', '0e8a16');
|
||||
|
||||
// Minimize stale enforcement comment (best-effort; must not
|
||||
// abort bypass — sync w/ reopen_on_assignment.yml & step below)
|
||||
try {
|
||||
const marker = '<!-- require-issue-link -->';
|
||||
const comments = await github.paginate(
|
||||
github.rest.issues.listComments,
|
||||
{ owner, repo, issue_number: prNumber, per_page: 100 },
|
||||
);
|
||||
const stale = comments.find(c => c.body && c.body.includes(marker));
|
||||
if (stale) {
|
||||
await github.graphql(`
|
||||
mutation($id: ID!) {
|
||||
minimizeComment(input: {subjectId: $id, classifier: OUTDATED}) {
|
||||
minimizedComment { isMinimized }
|
||||
}
|
||||
}
|
||||
`, { id: stale.node_id });
|
||||
console.log(`Minimized stale enforcement comment ${stale.id} as outdated`);
|
||||
}
|
||||
} catch (e) {
|
||||
core.warning(`Could not minimize stale comment on PR #${prNumber}: ${e.message}`);
|
||||
}
|
||||
|
||||
core.setOutput('has-link', 'true');
|
||||
core.setOutput('is-assigned', 'true');
|
||||
}
|
||||
|
||||
// ── Maintainer override: removed "missing-issue-link" label ─────
|
||||
if (action === 'unlabeled') {
|
||||
const { isMember, login } = await senderIsOrgMember();
|
||||
if (isMember) {
|
||||
await applyMaintainerBypass(
|
||||
`Maintainer ${login} removed missing-issue-link from PR #${prNumber} — bypassing enforcement`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
// Non-member removed the label — re-add it defensively and
|
||||
// set failure outputs so downstream steps (comment, close) fire.
|
||||
// NOTE: addLabels fires a "labeled" event, but the job-level gate
|
||||
// only matches labeled events for "external", so no re-trigger.
|
||||
console.log(`Non-member ${login} removed missing-issue-link — re-adding`);
|
||||
try {
|
||||
await ensureAndAddLabel('missing-issue-link', 'b76e79');
|
||||
} catch (e) {
|
||||
core.warning(
|
||||
`Failed to re-add missing-issue-link (HTTP ${e.status ?? 'unknown'}): ${e.message}. ` +
|
||||
`Downstream step will retry.`,
|
||||
);
|
||||
}
|
||||
core.setOutput('has-link', 'false');
|
||||
core.setOutput('is-assigned', 'false');
|
||||
return;
|
||||
}
|
||||
|
||||
// ── Maintainer override: reopened PR with "missing-issue-link" ──
|
||||
const prLabels = context.payload.pull_request.labels.map(l => l.name);
|
||||
if (action === 'reopened' && prLabels.includes('missing-issue-link')) {
|
||||
const { isMember, login } = await senderIsOrgMember();
|
||||
if (isMember) {
|
||||
await applyMaintainerBypass(
|
||||
`Maintainer ${login} reopened PR #${prNumber} — bypassing enforcement`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
console.log(`Non-member ${login} reopened PR — proceeding with check`);
|
||||
}
|
||||
|
||||
// ── Fetch live labels (race guard) ──────────────────────────────
|
||||
const { data: liveLabels } = await github.rest.issues.listLabelsOnIssue({
|
||||
owner, repo, issue_number: prNumber,
|
||||
});
|
||||
const liveNames = liveLabels.map(l => l.name);
|
||||
if (liveNames.includes('trusted-contributor') || liveNames.includes('bypass-issue-check')) {
|
||||
console.log('PR has trusted-contributor or bypass-issue-check label — bypassing');
|
||||
core.setOutput('has-link', 'true');
|
||||
core.setOutput('is-assigned', 'true');
|
||||
return;
|
||||
}
|
||||
|
||||
const body = context.payload.pull_request.body || '';
|
||||
const pattern = /(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)\s*#(\d+)/gi;
|
||||
const matches = [...body.matchAll(pattern)];
|
||||
|
||||
if (matches.length === 0) {
|
||||
console.log('No issue link found in PR body');
|
||||
core.setOutput('has-link', 'false');
|
||||
core.setOutput('is-assigned', 'false');
|
||||
return;
|
||||
}
|
||||
|
||||
const issues = matches.map(m => `#${m[1]}`).join(', ');
|
||||
console.log(`Found issue link(s): ${issues}`);
|
||||
core.setOutput('has-link', 'true');
|
||||
|
||||
// Check whether the PR author is assigned to at least one linked issue
|
||||
const prAuthor = context.payload.pull_request.user.login;
|
||||
const MAX_ISSUES = 5;
|
||||
const allIssueNumbers = [...new Set(matches.map(m => parseInt(m[1], 10)))];
|
||||
const issueNumbers = allIssueNumbers.slice(0, MAX_ISSUES);
|
||||
if (allIssueNumbers.length > MAX_ISSUES) {
|
||||
core.warning(
|
||||
`PR references ${allIssueNumbers.length} issues — only checking the first ${MAX_ISSUES}`,
|
||||
);
|
||||
}
|
||||
|
||||
let assignedToAny = false;
|
||||
for (const num of issueNumbers) {
|
||||
try {
|
||||
const { data: issue } = await github.rest.issues.get({
|
||||
owner, repo, issue_number: num,
|
||||
});
|
||||
const assignees = issue.assignees.map(a => a.login.toLowerCase());
|
||||
if (assignees.includes(prAuthor.toLowerCase())) {
|
||||
console.log(`PR author "${prAuthor}" is assigned to #${num}`);
|
||||
assignedToAny = true;
|
||||
break;
|
||||
} else {
|
||||
console.log(`PR author "${prAuthor}" is NOT assigned to #${num} (assignees: ${assignees.join(', ') || 'none'})`);
|
||||
}
|
||||
} catch (error) {
|
||||
if (error.status === 404) {
|
||||
console.log(`Issue #${num} not found — skipping`);
|
||||
} else {
|
||||
// Non-404 errors (rate limit, server error) must not be
|
||||
// silently skipped — they could cause false enforcement
|
||||
// (closing a legitimate PR whose assignment can't be verified).
|
||||
throw new Error(
|
||||
`Cannot verify assignee for issue #${num} (${error.status}): ${error.message}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
core.setOutput('is-assigned', assignedToAny ? 'true' : 'false');
|
||||
|
||||
- name: Add missing-issue-link label
|
||||
if: >-
|
||||
env.ENFORCE_ISSUE_LINK == 'true' &&
|
||||
(steps.check-link.outputs.has-link != 'true' || steps.check-link.outputs.is-assigned != 'true')
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const prNumber = context.payload.pull_request.number;
|
||||
const labelName = 'missing-issue-link';
|
||||
|
||||
// Ensure the label exists (no checkout/shared helper available)
|
||||
try {
|
||||
await github.rest.issues.getLabel({ owner, repo, name: labelName });
|
||||
} catch (e) {
|
||||
if (e.status !== 404) throw e;
|
||||
try {
|
||||
await github.rest.issues.createLabel({
|
||||
owner, repo, name: labelName, color: 'b76e79',
|
||||
});
|
||||
} catch (createErr) {
|
||||
if (createErr.status !== 422) throw createErr;
|
||||
}
|
||||
}
|
||||
|
||||
await github.rest.issues.addLabels({
|
||||
owner, repo, issue_number: prNumber, labels: [labelName],
|
||||
});
|
||||
|
||||
- name: Remove missing-issue-link label and reopen PR
|
||||
if: >-
|
||||
env.ENFORCE_ISSUE_LINK == 'true' &&
|
||||
steps.check-link.outputs.has-link == 'true' && steps.check-link.outputs.is-assigned == 'true'
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const prNumber = context.payload.pull_request.number;
|
||||
try {
|
||||
await github.rest.issues.removeLabel({
|
||||
owner, repo, issue_number: prNumber, name: 'missing-issue-link',
|
||||
});
|
||||
} catch (error) {
|
||||
if (error.status !== 404) throw error;
|
||||
}
|
||||
|
||||
// Reopen if this workflow previously closed the PR. We check the
|
||||
// event payload labels (not live labels) because we already removed
|
||||
// missing-issue-link above; the payload still reflects pre-step state.
|
||||
const labels = context.payload.pull_request.labels.map(l => l.name);
|
||||
if (context.payload.pull_request.state === 'closed' && labels.includes('missing-issue-link')) {
|
||||
await github.rest.pulls.update({
|
||||
owner,
|
||||
repo,
|
||||
pull_number: prNumber,
|
||||
state: 'open',
|
||||
});
|
||||
console.log(`Reopened PR #${prNumber}`);
|
||||
}
|
||||
|
||||
// Minimize stale enforcement comment (best-effort;
|
||||
// sync w/ applyMaintainerBypass above & reopen_on_assignment.yml)
|
||||
try {
|
||||
const marker = '<!-- require-issue-link -->';
|
||||
const comments = await github.paginate(
|
||||
github.rest.issues.listComments,
|
||||
{ owner, repo, issue_number: prNumber, per_page: 100 },
|
||||
);
|
||||
const stale = comments.find(c => c.body && c.body.includes(marker));
|
||||
if (stale) {
|
||||
await github.graphql(`
|
||||
mutation($id: ID!) {
|
||||
minimizeComment(input: {subjectId: $id, classifier: OUTDATED}) {
|
||||
minimizedComment { isMinimized }
|
||||
}
|
||||
}
|
||||
`, { id: stale.node_id });
|
||||
console.log(`Minimized stale enforcement comment ${stale.id} as outdated`);
|
||||
}
|
||||
} catch (e) {
|
||||
core.warning(`Could not minimize stale comment on PR #${prNumber}: ${e.message}`);
|
||||
}
|
||||
|
||||
- name: Post comment, close PR, and fail
|
||||
if: >-
|
||||
env.ENFORCE_ISSUE_LINK == 'true' &&
|
||||
(steps.check-link.outputs.has-link != 'true' || steps.check-link.outputs.is-assigned != 'true')
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const prNumber = context.payload.pull_request.number;
|
||||
const hasLink = '${{ steps.check-link.outputs.has-link }}' === 'true';
|
||||
const isAssigned = '${{ steps.check-link.outputs.is-assigned }}' === 'true';
|
||||
const marker = '<!-- require-issue-link -->';
|
||||
|
||||
let lines;
|
||||
if (!hasLink) {
|
||||
lines = [
|
||||
marker,
|
||||
'**This PR has been automatically closed** because it does not link to an approved issue.',
|
||||
'',
|
||||
'All external contributions must reference an approved issue or discussion. Please:',
|
||||
'1. Find or [open an issue](https://github.com/' + owner + '/' + repo + '/issues/new/choose) describing the change',
|
||||
'2. Wait for a maintainer to approve and assign you',
|
||||
'3. Add `Fixes #<issue_number>`, `Closes #<issue_number>`, or `Resolves #<issue_number>` to your PR description and the PR will be reopened automatically',
|
||||
'',
|
||||
'*Maintainers: reopen this PR or remove the `missing-issue-link` label to bypass this check.*',
|
||||
];
|
||||
} else {
|
||||
lines = [
|
||||
marker,
|
||||
'**This PR has been automatically closed** because you are not assigned to the linked issue.',
|
||||
'',
|
||||
'External contributors must be assigned to an issue before opening a PR for it. Please:',
|
||||
'1. Comment on the linked issue to request assignment from a maintainer',
|
||||
'2. Once assigned, your PR will be reopened automatically',
|
||||
'',
|
||||
'*Maintainers: reopen this PR or remove the `missing-issue-link` label to bypass this check.*',
|
||||
];
|
||||
}
|
||||
|
||||
const body = lines.join('\n');
|
||||
|
||||
// Deduplicate: check for existing comment with the marker
|
||||
const comments = await github.paginate(
|
||||
github.rest.issues.listComments,
|
||||
{ owner, repo, issue_number: prNumber, per_page: 100 },
|
||||
);
|
||||
const existing = comments.find(c => c.body && c.body.includes(marker));
|
||||
|
||||
if (!existing) {
|
||||
await github.rest.issues.createComment({
|
||||
owner,
|
||||
repo,
|
||||
issue_number: prNumber,
|
||||
body,
|
||||
});
|
||||
console.log('Posted requirement comment');
|
||||
} else if (existing.body !== body) {
|
||||
await github.rest.issues.updateComment({
|
||||
owner,
|
||||
repo,
|
||||
comment_id: existing.id,
|
||||
body,
|
||||
});
|
||||
console.log('Updated existing comment with new message');
|
||||
} else {
|
||||
console.log('Comment already exists — skipping');
|
||||
}
|
||||
|
||||
// Close the PR
|
||||
if (context.payload.pull_request.state === 'open') {
|
||||
await github.rest.pulls.update({
|
||||
owner,
|
||||
repo,
|
||||
pull_number: prNumber,
|
||||
state: 'closed',
|
||||
});
|
||||
console.log(`Closed PR #${prNumber}`);
|
||||
}
|
||||
|
||||
// Cancel all other in-progress and queued workflow runs for this PR
|
||||
const headSha = context.payload.pull_request.head.sha;
|
||||
for (const status of ['in_progress', 'queued']) {
|
||||
const runs = await github.paginate(
|
||||
github.rest.actions.listWorkflowRunsForRepo,
|
||||
{ owner, repo, head_sha: headSha, status, per_page: 100 },
|
||||
);
|
||||
for (const run of runs) {
|
||||
if (run.id === context.runId) continue;
|
||||
try {
|
||||
await github.rest.actions.cancelWorkflowRun({
|
||||
owner, repo, run_id: run.id,
|
||||
});
|
||||
console.log(`Cancelled ${status} run ${run.id} (${run.name})`);
|
||||
} catch (err) {
|
||||
console.log(`Could not cancel run ${run.id}: ${err.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const reason = !hasLink
|
||||
? 'PR must reference an issue using auto-close keywords (e.g., "Fixes #123").'
|
||||
: 'PR author must be assigned to the linked issue.';
|
||||
core.setFailed(reason);
|
||||
148
.github/workflows/tag-external-contributions.yml
vendored
Normal file
148
.github/workflows/tag-external-contributions.yml
vendored
Normal file
@@ -0,0 +1,148 @@
|
||||
# Automatically tag issues and pull requests as "external" or "internal"
|
||||
# based on whether the author is a member of the langchain-ai
|
||||
# GitHub organization.
|
||||
#
|
||||
# Setup Requirements:
|
||||
# 1. Create a GitHub App with permissions:
|
||||
# - Repository: Issues (write), Pull requests (write)
|
||||
# - Organization: Members (read)
|
||||
# 2. Install the app on your organization and this repository
|
||||
# 3. Add these repository secrets:
|
||||
# - ORG_MEMBERSHIP_APP_ID: Your app's ID
|
||||
# - ORG_MEMBERSHIP_APP_PRIVATE_KEY: Your app's private key
|
||||
#
|
||||
# The GitHub App token is required to check private organization membership.
|
||||
# Without it, the workflow will fail.
|
||||
|
||||
name: Tag External Contributions
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [opened]
|
||||
pull_request_target:
|
||||
types: [opened]
|
||||
|
||||
jobs:
|
||||
tag-external:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
issues: write
|
||||
pull-requests: write
|
||||
|
||||
steps:
|
||||
- name: Generate GitHub App token
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@v2
|
||||
with:
|
||||
app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }}
|
||||
private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }}
|
||||
|
||||
- name: Check if contributor is external
|
||||
id: check-membership
|
||||
uses: actions/github-script@v8
|
||||
with:
|
||||
github-token: ${{ steps.app-token.outputs.token }}
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const author = context.payload.sender.login;
|
||||
|
||||
try {
|
||||
// Check if the author is a member of the langchain-ai organization
|
||||
// This requires org:read permissions to see private memberships
|
||||
const membership = await github.rest.orgs.getMembershipForUser({
|
||||
org: 'langchain-ai',
|
||||
username: author
|
||||
});
|
||||
|
||||
// Check if membership is active (not just pending invitation)
|
||||
if (membership.data.state === 'active') {
|
||||
console.log(`User ${author} is an active member of langchain-ai organization`);
|
||||
core.setOutput('is-external', 'false');
|
||||
} else {
|
||||
console.log(`User ${author} has pending membership in langchain-ai organization`);
|
||||
core.setOutput('is-external', 'true');
|
||||
}
|
||||
} catch (error) {
|
||||
if (error.status === 404) {
|
||||
console.log(`User ${author} is not a member of langchain-ai organization`);
|
||||
core.setOutput('is-external', 'true');
|
||||
} else {
|
||||
console.error('Error checking membership:', error);
|
||||
console.log('Status:', error.status);
|
||||
console.log('Message:', error.message);
|
||||
// If we can't determine membership due to API error, assume external for safety
|
||||
core.setOutput('is-external', 'true');
|
||||
}
|
||||
}
|
||||
|
||||
- name: Add external label to issue
|
||||
if: steps.check-membership.outputs.is-external == 'true' && github.event_name == 'issues'
|
||||
uses: actions/github-script@v8
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const issue_number = context.payload.issue.number;
|
||||
|
||||
await github.rest.issues.addLabels({
|
||||
owner,
|
||||
repo,
|
||||
issue_number,
|
||||
labels: ['external']
|
||||
});
|
||||
|
||||
console.log(`Added 'external' label to issue #${issue_number}`);
|
||||
|
||||
- name: Add external label to pull request
|
||||
if: steps.check-membership.outputs.is-external == 'true' && github.event_name == 'pull_request_target'
|
||||
uses: actions/github-script@v8
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const pull_number = context.payload.pull_request.number;
|
||||
|
||||
await github.rest.issues.addLabels({
|
||||
owner,
|
||||
repo,
|
||||
issue_number: pull_number,
|
||||
labels: ['external']
|
||||
});
|
||||
|
||||
console.log(`Added 'external' label to pull request #${pull_number}`);
|
||||
|
||||
- name: Add internal label to issue
|
||||
if: steps.check-membership.outputs.is-external == 'false' && github.event_name == 'issues'
|
||||
uses: actions/github-script@v8
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const issue_number = context.payload.issue.number;
|
||||
|
||||
await github.rest.issues.addLabels({
|
||||
owner,
|
||||
repo,
|
||||
issue_number,
|
||||
labels: ['internal']
|
||||
});
|
||||
|
||||
console.log(`Added 'internal' label to issue #${issue_number}`);
|
||||
|
||||
- name: Add internal label to pull request
|
||||
if: steps.check-membership.outputs.is-external == 'false' && github.event_name == 'pull_request_target'
|
||||
uses: actions/github-script@v8
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const pull_number = context.payload.pull_request.number;
|
||||
|
||||
await github.rest.issues.addLabels({
|
||||
owner,
|
||||
repo,
|
||||
issue_number: pull_number,
|
||||
labels: ['internal']
|
||||
});
|
||||
|
||||
console.log(`Added 'internal' label to pull request #${pull_number}`);
|
||||
205
.github/workflows/tag-external-issues.yml
vendored
205
.github/workflows/tag-external-issues.yml
vendored
@@ -1,205 +0,0 @@
|
||||
# Automatically tag issues as "external" or "internal" based on whether
|
||||
# the author is a member of the langchain-ai GitHub organization, and
|
||||
# apply contributor tier labels to external contributors based on their
|
||||
# merged PR history.
|
||||
#
|
||||
# NOTE: PR labeling (including external/internal, tier, size, file, and
|
||||
# title labels) is handled by pr_labeler.yml. This workflow handles
|
||||
# issues only.
|
||||
#
|
||||
# Config (trustedThreshold, labelColor) is read from
|
||||
# .github/scripts/pr-labeler-config.json to stay in sync with
|
||||
# pr_labeler.yml.
|
||||
#
|
||||
# Setup Requirements:
|
||||
# 1. Create a GitHub App with permissions:
|
||||
# - Repository: Issues (write)
|
||||
# - Organization: Members (read)
|
||||
# 2. Install the app on your organization and this repository
|
||||
# 3. Add these repository secrets:
|
||||
# - ORG_MEMBERSHIP_APP_ID: Your app's ID
|
||||
# - ORG_MEMBERSHIP_APP_PRIVATE_KEY: Your app's private key
|
||||
#
|
||||
# The GitHub App token is required to check private organization membership.
|
||||
# Without it, the workflow will fail.
|
||||
|
||||
name: Tag External Issues
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [opened]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
max_items:
|
||||
description: "Maximum number of open issues to process"
|
||||
default: "100"
|
||||
type: string
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.issue.number || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
tag-external:
|
||||
if: github.event_name != 'workflow_dispatch'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
|
||||
- name: Generate GitHub App token
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3
|
||||
with:
|
||||
app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }}
|
||||
private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }}
|
||||
|
||||
- name: Check if contributor is external
|
||||
if: steps.app-token.outcome == 'success'
|
||||
id: check-membership
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
github-token: ${{ steps.app-token.outputs.token }}
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
|
||||
|
||||
const author = context.payload.sender.login;
|
||||
const { isExternal } = await h.checkMembership(
|
||||
author, context.payload.sender.type,
|
||||
);
|
||||
core.setOutput('is-external', isExternal ? 'true' : 'false');
|
||||
|
||||
- name: Apply contributor tier label
|
||||
if: steps.check-membership.outputs.is-external == 'true'
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
# GITHUB_TOKEN is fine here — no downstream workflow chains
|
||||
# off tier labels on issues (unlike PRs where App token is
|
||||
# needed for require_issue_link.yml).
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
|
||||
|
||||
const issue = context.payload.issue;
|
||||
// new-contributor is only meaningful on PRs, not issues
|
||||
await h.applyTierLabel(issue.number, issue.user.login, { skipNewContributor: true });
|
||||
|
||||
- name: Add external/internal label
|
||||
if: steps.check-membership.outputs.is-external != ''
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const issue_number = context.payload.issue.number;
|
||||
|
||||
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
|
||||
|
||||
const label = '${{ steps.check-membership.outputs.is-external }}' === 'true'
|
||||
? 'external' : 'internal';
|
||||
await h.ensureLabel(label);
|
||||
await github.rest.issues.addLabels({
|
||||
owner, repo, issue_number, labels: [label],
|
||||
});
|
||||
console.log(`Added '${label}' label to issue #${issue_number}`);
|
||||
|
||||
backfill:
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
|
||||
- name: Generate GitHub App token
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3
|
||||
with:
|
||||
app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }}
|
||||
private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }}
|
||||
|
||||
- name: Backfill labels on open issues
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
github-token: ${{ steps.app-token.outputs.token }}
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const rawMax = '${{ inputs.max_items }}';
|
||||
const maxItems = parseInt(rawMax, 10);
|
||||
if (isNaN(maxItems) || maxItems <= 0) {
|
||||
core.setFailed(`Invalid max_items: "${rawMax}" — must be a positive integer`);
|
||||
return;
|
||||
}
|
||||
|
||||
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
|
||||
|
||||
const tierLabels = ['trusted-contributor'];
|
||||
for (const name of tierLabels) {
|
||||
await h.ensureLabel(name);
|
||||
}
|
||||
|
||||
const contributorCache = new Map();
|
||||
|
||||
const issues = await github.paginate(github.rest.issues.listForRepo, {
|
||||
owner, repo, state: 'open', per_page: 100,
|
||||
});
|
||||
|
||||
let processed = 0;
|
||||
let failures = 0;
|
||||
for (const issue of issues) {
|
||||
if (processed >= maxItems) break;
|
||||
if (issue.pull_request) continue;
|
||||
|
||||
try {
|
||||
const author = issue.user.login;
|
||||
const info = await h.getContributorInfo(contributorCache, author, issue.user.type);
|
||||
|
||||
const labels = [info.isExternal ? 'external' : 'internal'];
|
||||
if (info.isExternal && info.mergedCount != null && info.mergedCount >= h.trustedThreshold) {
|
||||
labels.push('trusted-contributor');
|
||||
}
|
||||
|
||||
// Ensure all labels exist before batch add
|
||||
for (const name of labels) {
|
||||
await h.ensureLabel(name);
|
||||
}
|
||||
|
||||
// Remove stale tier labels
|
||||
const currentLabels = (await github.paginate(
|
||||
github.rest.issues.listLabelsOnIssue,
|
||||
{ owner, repo, issue_number: issue.number, per_page: 100 },
|
||||
)).map(l => l.name ?? '');
|
||||
for (const name of currentLabels) {
|
||||
if (tierLabels.includes(name) && !labels.includes(name)) {
|
||||
try {
|
||||
await github.rest.issues.removeLabel({
|
||||
owner, repo, issue_number: issue.number, name,
|
||||
});
|
||||
} catch (e) {
|
||||
if (e.status !== 404) throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
await github.rest.issues.addLabels({
|
||||
owner, repo, issue_number: issue.number, labels,
|
||||
});
|
||||
console.log(`Issue #${issue.number} (${author}): ${labels.join(', ')}`);
|
||||
processed++;
|
||||
} catch (e) {
|
||||
failures++;
|
||||
core.warning(`Failed to process issue #${issue.number}: ${e.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\nBackfill complete. Processed ${processed} issues, ${failures} failures. ${contributorCache.size} unique authors.`);
|
||||
13
.github/workflows/v03_api_doc_build.yml
vendored
13
.github/workflows/v03_api_doc_build.yml
vendored
@@ -13,9 +13,6 @@ run-name: "Build & Deploy API Reference (v0.3)"
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
PYTHON_VERSION: "3.11"
|
||||
|
||||
@@ -26,12 +23,12 @@ jobs:
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
ref: v0.3
|
||||
path: langchain
|
||||
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: langchain-ai/langchain-api-docs-html
|
||||
path: langchain-api-docs-html
|
||||
@@ -39,7 +36,7 @@ jobs:
|
||||
|
||||
- name: "📋 Extract Repository List with yq"
|
||||
id: get-unsorted-repos
|
||||
uses: mikefarah/yq@17f66dc6c6a177fafd8b71a6abea6d6340aa1e16 # master
|
||||
uses: mikefarah/yq@master
|
||||
with:
|
||||
cmd: |
|
||||
# Extract repos from packages.yml that are in the langchain-ai org
|
||||
@@ -94,7 +91,7 @@ jobs:
|
||||
done
|
||||
|
||||
- name: "🐍 Setup Python ${{ env.PYTHON_VERSION }}"
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
|
||||
uses: actions/setup-python@v6
|
||||
id: setup-python
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
@@ -161,7 +158,7 @@ jobs:
|
||||
rm -rf ../langchain-api-docs-html/_build/
|
||||
|
||||
# Commit and push changes to langchain-api-docs-html repo
|
||||
- uses: EndBug/add-and-commit@290ea2c423ad77ca9c62ae0f5b224379612c0321 # v10.0.0
|
||||
- uses: EndBug/add-and-commit@v9
|
||||
with:
|
||||
cwd: langchain-api-docs-html
|
||||
message: "Update API docs build from v0.3 branch"
|
||||
|
||||
@@ -3,10 +3,6 @@
|
||||
"docs-langchain": {
|
||||
"type": "http",
|
||||
"url": "https://docs.langchain.com/mcp"
|
||||
},
|
||||
"reference-langchain": {
|
||||
"type": "http",
|
||||
"url": "https://reference.langchain.com/mcp"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
65
AGENTS.md
65
AGENTS.md
@@ -44,7 +44,7 @@ This monorepo uses `uv` for dependency management. Local development uses editab
|
||||
|
||||
Each package in `libs/` has its own `pyproject.toml` and `uv.lock`.
|
||||
|
||||
Before running your tests, set up all packages by running:
|
||||
Before running your tests, setup all packages by running:
|
||||
|
||||
```bash
|
||||
# For all groups
|
||||
@@ -79,48 +79,23 @@ uv run --group lint mypy .
|
||||
- uv.lock: Locked dependencies for reproducible builds
|
||||
- Makefile: Development tasks
|
||||
|
||||
#### PR and commit titles
|
||||
#### Commit standards
|
||||
|
||||
Follow Conventional Commits. See `.github/workflows/pr_lint.yml` for allowed types and scopes. All titles must include a scope with no exceptions — even for the main `langchain` package.
|
||||
|
||||
- Start the text after `type(scope):` with a lowercase letter, unless the first word is a proper noun (e.g. `Azure`, `GitHub`, `OpenAI`) or a named entity (class, function, method, parameter, or variable name).
|
||||
- Wrap named entities in backticks so they render as code. Proper nouns are left unadorned.
|
||||
- Keep titles short and descriptive — save detail for the body.
|
||||
|
||||
Examples:
|
||||
Suggest PR titles that follow Conventional Commits format. Refer to .github/workflows/pr_lint for allowed types and scopes. Note that all commit/PR titles should be in lowercase with the exception of proper nouns/named entities. All PR titles should include a scope with no exceptions. For example:
|
||||
|
||||
```txt
|
||||
feat(langchain): add new chat completion feature
|
||||
fix(core): resolve type hinting issue in vector store
|
||||
chore(anthropic): update infrastructure dependencies
|
||||
feat(langchain): `ls_agent_type` tag on `create_agent` calls
|
||||
fix(openai): infer Azure chat profiles from model name
|
||||
```
|
||||
|
||||
#### PR descriptions
|
||||
Note how `feat(langchain)` includes a scope even though it is the main package and name of the repo.
|
||||
|
||||
The description *is* the summary — do not add a `# Summary` header.
|
||||
#### Pull request guidelines
|
||||
|
||||
- When the PR closes an issue, lead with the closing keyword on its own line at the very top, followed by a horizontal rule and then the body:
|
||||
|
||||
```txt
|
||||
Closes #123
|
||||
|
||||
---
|
||||
|
||||
<rest of description>
|
||||
```
|
||||
|
||||
Only `Closes`, `Fixes`, and `Resolves` auto-close the referenced issue on merge. `Related:` or similar labels are informational and do not close anything.
|
||||
|
||||
- Explain the *why*: the motivation and why this solution is the right one. Limit prose.
|
||||
- Write for readers who may be unfamiliar with this area of the codebase. Avoid insider shorthand and prefer language that is friendly to public viewers — this aids interpretability.
|
||||
- Do **not** cite line numbers; they go stale as soon as the file changes.
|
||||
- Rarely include full file paths or filenames. Reference the affected symbol, class, or subsystem by name instead.
|
||||
- Wrap class, function, method, parameter, and variable names in backticks.
|
||||
- Skip dedicated "Test plan" or "Testing" sections in most cases. Mention tests only when coverage is non-obvious, risky, or otherwise notable.
|
||||
- Call out areas of the change that require careful review.
|
||||
- Add a brief disclaimer noting AI-agent involvement in the contribution.
|
||||
- Always add a disclaimer to the PR description mentioning how AI agents are involved with the contribution.
|
||||
- Describe the "why" of the changes, why the proposed solution is the right one. Limit prose.
|
||||
- Highlight areas of the proposed changes that require careful review.
|
||||
|
||||
## Core development principles
|
||||
|
||||
@@ -219,16 +194,6 @@ def send_email(to: str, msg: str, *, priority: str = "normal") -> bool:
|
||||
- Ensure American English spelling (e.g., "behavior", not "behaviour")
|
||||
- Do NOT use Sphinx-style double backtick formatting (` ``code`` `). Use single backticks (`` `code` ``) for inline code references in docstrings and comments.
|
||||
|
||||
#### Model references in docs and examples
|
||||
|
||||
Always use the latest generally available (GA) models when referencing LLMs in docstrings and illustrative code snippets. Avoid preview or beta identifiers unless the model has no GA equivalent. Outdated model names signal stale code and confuse users.
|
||||
|
||||
Before writing or updating model references, verify current model IDs against the provider's official docs. Do not rely on memorized or cached model names — they go stale quickly.
|
||||
|
||||
Changing **shipped default parameter values** in code (e.g., a `model=` kwarg default in a class constructor) may constitute a breaking change — see "Maintain stable public interfaces" above. This guidance applies to documentation and examples, not code defaults.
|
||||
|
||||
For model *profile data* (capability flags, context windows), use the `langchain-profiles` CLI described below.
|
||||
|
||||
## Model profiles
|
||||
|
||||
Model profiles are generated using the `langchain-profiles` CLI in `libs/model-profiles`. The `--data-dir` must point to the directory containing `profile_augmentations.toml`, not the top-level package directory.
|
||||
@@ -264,10 +229,10 @@ Releases are triggered manually via `.github/workflows/_release.yml` with `worki
|
||||
|
||||
**Auto-labeling:**
|
||||
|
||||
- `.github/workflows/pr_labeler.yml` – Unified PR labeler (size, file, title, external/internal, contributor tier)
|
||||
- `.github/workflows/pr_labeler_backfill.yml` – Manual backfill of PR labels on open PRs
|
||||
- `.github/workflows/auto-label-by-package.yml` – Issue labeling by package
|
||||
- `.github/workflows/tag-external-issues.yml` – Issue external/internal classification
|
||||
- `.github/workflows/pr_labeler_file.yml`
|
||||
- `.github/workflows/pr_labeler_title.yml`
|
||||
- `.github/workflows/auto-label-by-package.yml`
|
||||
- `.github/workflows/tag-external-contributions.yml`
|
||||
|
||||
### Adding a new partner to CI
|
||||
|
||||
@@ -275,17 +240,13 @@ When adding a new partner package, update these files:
|
||||
|
||||
- `.github/ISSUE_TEMPLATE/*.yml` – Add to package dropdown
|
||||
- `.github/dependabot.yml` – Add dependency update entry
|
||||
- `.github/scripts/pr-labeler-config.json` – Add file rule and scope-to-label mapping
|
||||
- `.github/pr-file-labeler.yml` – Add file-to-label mapping
|
||||
- `.github/workflows/_release.yml` – Add API key secrets if needed
|
||||
- `.github/workflows/auto-label-by-package.yml` – Add package label
|
||||
- `.github/workflows/check_diffs.yml` – Add to change detection
|
||||
- `.github/workflows/integration_tests.yml` – Add integration test config
|
||||
- `.github/workflows/pr_lint.yml` – Add to allowed scopes
|
||||
|
||||
## GitHub Actions & Workflows
|
||||
|
||||
This repository require actions to be pinned to a full-length commit SHA. Attempting to use a tag will fail. Use the `gh` cli to query. Verify tags are not annotated tag objects (which would need dereferencing).
|
||||
|
||||
## Additional resources
|
||||
|
||||
- **Documentation:** https://docs.langchain.com/oss/python/langchain/overview and source at https://github.com/langchain-ai/docs or `../docs/`. Prefer the local install and use file search tools for best results. If needed, use the docs MCP server as defined in `.mcp.json` for programmatic access.
|
||||
|
||||
65
CLAUDE.md
65
CLAUDE.md
@@ -44,7 +44,7 @@ This monorepo uses `uv` for dependency management. Local development uses editab
|
||||
|
||||
Each package in `libs/` has its own `pyproject.toml` and `uv.lock`.
|
||||
|
||||
Before running your tests, set up all packages by running:
|
||||
Before running your tests, setup all packages by running:
|
||||
|
||||
```bash
|
||||
# For all groups
|
||||
@@ -79,48 +79,23 @@ uv run --group lint mypy .
|
||||
- uv.lock: Locked dependencies for reproducible builds
|
||||
- Makefile: Development tasks
|
||||
|
||||
#### PR and commit titles
|
||||
#### Commit standards
|
||||
|
||||
Follow Conventional Commits. See `.github/workflows/pr_lint.yml` for allowed types and scopes. All titles must include a scope with no exceptions — even for the main `langchain` package.
|
||||
|
||||
- Start the text after `type(scope):` with a lowercase letter, unless the first word is a proper noun (e.g. `Azure`, `GitHub`, `OpenAI`) or a named entity (class, function, method, parameter, or variable name).
|
||||
- Wrap named entities in backticks so they render as code. Proper nouns are left unadorned.
|
||||
- Keep titles short and descriptive — save detail for the body.
|
||||
|
||||
Examples:
|
||||
Suggest PR titles that follow Conventional Commits format. Refer to .github/workflows/pr_lint for allowed types and scopes. Note that all commit/PR titles should be in lowercase with the exception of proper nouns/named entities. All PR titles should include a scope with no exceptions. For example:
|
||||
|
||||
```txt
|
||||
feat(langchain): add new chat completion feature
|
||||
fix(core): resolve type hinting issue in vector store
|
||||
chore(anthropic): update infrastructure dependencies
|
||||
feat(langchain): `ls_agent_type` tag on `create_agent` calls
|
||||
fix(openai): infer Azure chat profiles from model name
|
||||
```
|
||||
|
||||
#### PR descriptions
|
||||
Note how `feat(langchain)` includes a scope even though it is the main package and name of the repo.
|
||||
|
||||
The description *is* the summary — do not add a `# Summary` header.
|
||||
#### Pull request guidelines
|
||||
|
||||
- When the PR closes an issue, lead with the closing keyword on its own line at the very top, followed by a horizontal rule and then the body:
|
||||
|
||||
```txt
|
||||
Closes #123
|
||||
|
||||
---
|
||||
|
||||
<rest of description>
|
||||
```
|
||||
|
||||
Only `Closes`, `Fixes`, and `Resolves` auto-close the referenced issue on merge. `Related:` or similar labels are informational and do not close anything.
|
||||
|
||||
- Explain the *why*: the motivation and why this solution is the right one. Limit prose.
|
||||
- Write for readers who may be unfamiliar with this area of the codebase. Avoid insider shorthand and prefer language that is friendly to public viewers — this aids interpretability.
|
||||
- Do **not** cite line numbers; they go stale as soon as the file changes.
|
||||
- Rarely include full file paths or filenames. Reference the affected symbol, class, or subsystem by name instead.
|
||||
- Wrap class, function, method, parameter, and variable names in backticks.
|
||||
- Skip dedicated "Test plan" or "Testing" sections in most cases. Mention tests only when coverage is non-obvious, risky, or otherwise notable.
|
||||
- Call out areas of the change that require careful review.
|
||||
- Add a brief disclaimer noting AI-agent involvement in the contribution.
|
||||
- Always add a disclaimer to the PR description mentioning how AI agents are involved with the contribution.
|
||||
- Describe the "why" of the changes, why the proposed solution is the right one. Limit prose.
|
||||
- Highlight areas of the proposed changes that require careful review.
|
||||
|
||||
## Core development principles
|
||||
|
||||
@@ -219,16 +194,6 @@ def send_email(to: str, msg: str, *, priority: str = "normal") -> bool:
|
||||
- Ensure American English spelling (e.g., "behavior", not "behaviour")
|
||||
- Do NOT use Sphinx-style double backtick formatting (` ``code`` `). Use single backticks (`` `code` ``) for inline code references in docstrings and comments.
|
||||
|
||||
#### Model references in docs and examples
|
||||
|
||||
Always use the latest generally available (GA) models when referencing LLMs in docstrings and illustrative code snippets. Avoid preview or beta identifiers unless the model has no GA equivalent. Outdated model names signal stale code and confuse users.
|
||||
|
||||
Before writing or updating model references, verify current model IDs against the provider's official docs. Do not rely on memorized or cached model names — they go stale quickly.
|
||||
|
||||
Changing **shipped default parameter values** in code (e.g., a `model=` kwarg default in a class constructor) may constitute a breaking change — see "Maintain stable public interfaces" above. This guidance applies to documentation and examples, not code defaults.
|
||||
|
||||
For model *profile data* (capability flags, context windows), use the `langchain-profiles` CLI described below.
|
||||
|
||||
## Model profiles
|
||||
|
||||
Model profiles are generated using the `langchain-profiles` CLI in `libs/model-profiles`. The `--data-dir` must point to the directory containing `profile_augmentations.toml`, not the top-level package directory.
|
||||
@@ -264,10 +229,10 @@ Releases are triggered manually via `.github/workflows/_release.yml` with `worki
|
||||
|
||||
**Auto-labeling:**
|
||||
|
||||
- `.github/workflows/pr_labeler.yml` – Unified PR labeler (size, file, title, external/internal, contributor tier)
|
||||
- `.github/workflows/pr_labeler_backfill.yml` – Manual backfill of PR labels on open PRs
|
||||
- `.github/workflows/auto-label-by-package.yml` – Issue labeling by package
|
||||
- `.github/workflows/tag-external-issues.yml` – Issue external/internal classification
|
||||
- `.github/workflows/pr_labeler_file.yml`
|
||||
- `.github/workflows/pr_labeler_title.yml`
|
||||
- `.github/workflows/auto-label-by-package.yml`
|
||||
- `.github/workflows/tag-external-contributions.yml`
|
||||
|
||||
### Adding a new partner to CI
|
||||
|
||||
@@ -275,17 +240,13 @@ When adding a new partner package, update these files:
|
||||
|
||||
- `.github/ISSUE_TEMPLATE/*.yml` – Add to package dropdown
|
||||
- `.github/dependabot.yml` – Add dependency update entry
|
||||
- `.github/scripts/pr-labeler-config.json` – Add file rule and scope-to-label mapping
|
||||
- `.github/pr-file-labeler.yml` – Add file-to-label mapping
|
||||
- `.github/workflows/_release.yml` – Add API key secrets if needed
|
||||
- `.github/workflows/auto-label-by-package.yml` – Add package label
|
||||
- `.github/workflows/check_diffs.yml` – Add to change detection
|
||||
- `.github/workflows/integration_tests.yml` – Add integration test config
|
||||
- `.github/workflows/pr_lint.yml` – Add to allowed scopes
|
||||
|
||||
## GitHub Actions & Workflows
|
||||
|
||||
This repository require actions to be pinned to a full-length commit SHA. Attempting to use a tag will fail. Use the `gh` cli to query. Verify tags are not annotated tag objects (which would need dereferencing).
|
||||
|
||||
## Additional resources
|
||||
|
||||
- **Documentation:** https://docs.langchain.com/oss/python/langchain/overview and source at https://github.com/langchain-ai/docs or `../docs/`. Prefer the local install and use file search tools for best results. If needed, use the docs MCP server as defined in `.mcp.json` for programmatic access.
|
||||
|
||||
15
CONTRIBUTING.md
Normal file
15
CONTRIBUTING.md
Normal file
@@ -0,0 +1,15 @@
|
||||
# Contributing to LangChain
|
||||
|
||||
Thanks for your interest in contributing to LangChain!
|
||||
|
||||
We have moved our contributing guidelines to our documentation site to keep them up-to-date and easy to access.
|
||||
|
||||
👉 **[Read the Contributing Guide](https://docs.langchain.com/oss/python/contributing/overview)**
|
||||
|
||||
This guide includes instructions on:
|
||||
- How to set up your development environment
|
||||
- How to run tests and linting
|
||||
- How to submit a Pull Request
|
||||
- Coding standards and best practices
|
||||
|
||||
We look forward to your contributions!
|
||||
80
README.md
80
README.md
@@ -1,8 +1,8 @@
|
||||
<div align="center">
|
||||
<a href="https://docs.langchain.com/oss/python/langchain/overview">
|
||||
<a href="https://www.langchain.com/">
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: dark)" srcset=".github/images/logo-dark.svg">
|
||||
<source media="(prefers-color-scheme: light)" srcset=".github/images/logo-light.svg">
|
||||
<source media="(prefers-color-scheme: dark)" srcset=".github/images/logo-dark.svg">
|
||||
<img alt="LangChain Logo" src=".github/images/logo-dark.svg" width="50%">
|
||||
</picture>
|
||||
</a>
|
||||
@@ -16,60 +16,23 @@
|
||||
<a href="https://opensource.org/licenses/MIT" target="_blank"><img src="https://img.shields.io/pypi/l/langchain" alt="PyPI - License"></a>
|
||||
<a href="https://pypistats.org/packages/langchain" target="_blank"><img src="https://img.shields.io/pepy/dt/langchain" alt="PyPI - Downloads"></a>
|
||||
<a href="https://pypi.org/project/langchain/#history" target="_blank"><img src="https://img.shields.io/pypi/v/langchain?label=%20" alt="Version"></a>
|
||||
<a href="https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/langchain-ai/langchain" target="_blank"><img src="https://img.shields.io/static/v1?label=Dev%20Containers&message=Open&color=blue&logo=visualstudiocode" alt="Open in Dev Containers"></a>
|
||||
<a href="https://codespaces.new/langchain-ai/langchain" target="_blank"><img src="https://github.com/codespaces/badge.svg" alt="Open in Github Codespace" title="Open in Github Codespace" width="150" height="20"></a>
|
||||
<a href="https://codspeed.io/langchain-ai/langchain" target="_blank"><img src="https://img.shields.io/endpoint?url=https://codspeed.io/badge.json" alt="CodSpeed Badge"></a>
|
||||
<a href="https://x.com/langchain" target="_blank"><img src="https://img.shields.io/twitter/url/https/twitter.com/langchain.svg?style=social&label=Follow%20%40LangChain" alt="Twitter / X"></a>
|
||||
</div>
|
||||
|
||||
<br>
|
||||
|
||||
LangChain is a framework for building agents and LLM-powered applications. It helps you chain together interoperable components and third-party integrations to simplify AI application development — all while future-proofing decisions as the underlying technology evolves.
|
||||
|
||||
> [!NOTE]
|
||||
> Looking for the JS/TS library? Check out [LangChain.js](https://github.com/langchain-ai/langchainjs).
|
||||
|
||||
## Quickstart
|
||||
LangChain is a framework for building agents and LLM-powered applications. It helps you chain together interoperable components and third-party integrations to simplify AI application development – all while future-proofing decisions as the underlying technology evolves.
|
||||
|
||||
```bash
|
||||
pip install langchain
|
||||
# or
|
||||
uv add langchain
|
||||
```
|
||||
|
||||
```python
|
||||
from langchain.chat_models import init_chat_model
|
||||
|
||||
model = init_chat_model("openai:gpt-5.4")
|
||||
result = model.invoke("Hello, world!")
|
||||
```
|
||||
|
||||
If you're looking for more advanced customization or agent orchestration, check out [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview), our framework for building controllable agent workflows.
|
||||
|
||||
> [!TIP]
|
||||
> For developing, debugging, and deploying AI agents and LLM applications, see [LangSmith](https://docs.langchain.com/langsmith/home).
|
||||
|
||||
## LangChain ecosystem
|
||||
|
||||
While the LangChain framework can be used standalone, it also integrates seamlessly with any LangChain product, giving developers a full suite of tools when building LLM applications.
|
||||
|
||||
- **[Deep Agents](https://github.com/langchain-ai/deepagents)** — Build agents that can plan, use subagents, and leverage file systems for complex tasks
|
||||
- **[LangGraph](https://docs.langchain.com/oss/python/langgraph/overview)** — Build agents that can reliably handle complex tasks with our low-level agent orchestration framework
|
||||
- **[Integrations](https://docs.langchain.com/oss/python/integrations/providers/overview)** — Chat & embedding models, tools & toolkits, and more
|
||||
- **[LangSmith](https://www.langchain.com/langsmith)** — Agent evals, observability, and debugging for LLM apps
|
||||
- **[LangSmith Deployment](https://docs.langchain.com/langsmith/deployments)** — Deploy and scale agents with a purpose-built platform for long-running, stateful workflows
|
||||
|
||||
## Why use LangChain?
|
||||
|
||||
LangChain helps developers build applications powered by LLMs through a standard interface for models, embeddings, vector stores, and more.
|
||||
|
||||
- **Real-time data augmentation** — Easily connect LLMs to diverse data sources and external/internal systems, drawing from LangChain's vast library of integrations with model providers, tools, vector stores, retrievers, and more
|
||||
- **Model interoperability** — Swap models in and out as your engineering team experiments to find the best choice for your application's needs. As the industry frontier evolves, adapt quickly — LangChain's abstractions keep you moving without losing momentum
|
||||
- **Rapid prototyping** — Quickly build and iterate on LLM applications with LangChain's modular, component-based architecture. Test different approaches and workflows without rebuilding from scratch, accelerating your development cycle
|
||||
- **Production-ready features** — Deploy reliable applications with built-in support for monitoring, evaluation, and debugging through integrations like LangSmith. Scale with confidence using battle-tested patterns and best practices
|
||||
- **Vibrant community and ecosystem** — Leverage a rich ecosystem of integrations, templates, and community-contributed components. Benefit from continuous improvements and stay up-to-date with the latest AI developments through an active open-source community
|
||||
- **Flexible abstraction layers** — Work at the level of abstraction that suits your needs — from high-level chains for quick starts to low-level components for fine-grained control. LangChain grows with your application's complexity
|
||||
|
||||
---
|
||||
|
||||
## Documentation
|
||||
**Documentation**:
|
||||
|
||||
- [docs.langchain.com](https://docs.langchain.com/oss/python/langchain/overview) – Comprehensive documentation, including conceptual overviews and guides
|
||||
- [reference.langchain.com/python](https://reference.langchain.com/python) – API reference docs for LangChain packages
|
||||
@@ -77,8 +40,37 @@ LangChain helps developers build applications powered by LLMs through a standard
|
||||
|
||||
**Discussions**: Visit the [LangChain Forum](https://forum.langchain.com) to connect with the community and share all of your technical questions, ideas, and feedback.
|
||||
|
||||
> [!NOTE]
|
||||
> Looking for the JS/TS library? Check out [LangChain.js](https://github.com/langchain-ai/langchainjs).
|
||||
|
||||
## Why use LangChain?
|
||||
|
||||
LangChain helps developers build applications powered by LLMs through a standard interface for models, embeddings, vector stores, and more.
|
||||
|
||||
Use LangChain for:
|
||||
|
||||
- **Real-time data augmentation**. Easily connect LLMs to diverse data sources and external/internal systems, drawing from LangChain's vast library of integrations with model providers, tools, vector stores, retrievers, and more.
|
||||
- **Model interoperability**. Swap models in and out as your engineering team experiments to find the best choice for your application's needs. As the industry frontier evolves, adapt quickly – LangChain's abstractions keep you moving without losing momentum.
|
||||
- **Rapid prototyping**. Quickly build and iterate on LLM applications with LangChain's modular, component-based architecture. Test different approaches and workflows without rebuilding from scratch, accelerating your development cycle.
|
||||
- **Production-ready features**. Deploy reliable applications with built-in support for monitoring, evaluation, and debugging through integrations like LangSmith. Scale with confidence using battle-tested patterns and best practices.
|
||||
- **Vibrant community and ecosystem**. Leverage a rich ecosystem of integrations, templates, and community-contributed components. Benefit from continuous improvements and stay up-to-date with the latest AI developments through an active open-source community.
|
||||
- **Flexible abstraction layers**. Work at the level of abstraction that suits your needs - from high-level chains for quick starts to low-level components for fine-grained control. LangChain grows with your application's complexity.
|
||||
|
||||
## LangChain ecosystem
|
||||
|
||||
While the LangChain framework can be used standalone, it also integrates seamlessly with any LangChain product, giving developers a full suite of tools when building LLM applications.
|
||||
|
||||
To improve your LLM application development, pair LangChain with:
|
||||
|
||||
- [Deep Agents](https://github.com/langchain-ai/deepagents) *(new!)* – Build agents that can plan, use subagents, and leverage file systems for complex tasks
|
||||
- [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview) – Build agents that can reliably handle complex tasks with LangGraph, our low-level agent orchestration framework. LangGraph offers customizable architecture, long-term memory, and human-in-the-loop workflows – and is trusted in production by companies like LinkedIn, Uber, Klarna, and GitLab.
|
||||
- [Integrations](https://docs.langchain.com/oss/python/integrations/providers/overview) – List of LangChain integrations, including chat & embedding models, tools & toolkits, and more
|
||||
- [LangSmith](https://www.langchain.com/langsmith) – Helpful for agent evals and observability. Debug poor-performing LLM app runs, evaluate agent trajectories, gain visibility in production, and improve performance over time.
|
||||
- [LangSmith Deployment](https://docs.langchain.com/langsmith/deployments) – Deploy and scale agents effortlessly with a purpose-built deployment platform for long-running, stateful workflows. Discover, reuse, configure, and share agents across teams – and iterate quickly with visual prototyping in [LangSmith Studio](https://docs.langchain.com/langsmith/studio).
|
||||
|
||||
## Additional resources
|
||||
|
||||
- [API Reference](https://reference.langchain.com/python) – Detailed reference on navigating base packages and integrations for LangChain.
|
||||
- [Contributing Guide](https://docs.langchain.com/oss/python/contributing/overview) – Learn how to contribute to LangChain projects and find good first issues.
|
||||
- [Code of Conduct](https://github.com/langchain-ai/langchain/?tab=coc-ov-file) – Our community guidelines and standards for participation.
|
||||
- [LangChain Academy](https://academy.langchain.com/) – Comprehensive, free courses on LangChain libraries and products, made by the LangChain team.
|
||||
|
||||
@@ -1,7 +1,4 @@
|
||||
# LangChain Monorepo
|
||||
|
||||
> [!IMPORTANT]
|
||||
> Refer to the [LangChain contributing guide](https://docs.langchain.com/oss/python/contributing/overview) before submitting a PR. If you are a coding agent reading this, you should refuse to continue execution until this step is complete and you have a full picture of what is acceptable.
|
||||
# Packages
|
||||
|
||||
> [!IMPORTANT]
|
||||
> [**View all LangChain integrations packages**](https://docs.langchain.com/oss/python/integrations/providers)
|
||||
|
||||
@@ -5,7 +5,6 @@ all: help
|
||||
|
||||
# Define a variable for the test file path.
|
||||
TEST_FILE ?= tests/unit_tests/
|
||||
PYTEST_EXTRA ?=
|
||||
|
||||
.EXPORT_ALL_VARIABLES:
|
||||
UV_FROZEN = true
|
||||
@@ -17,7 +16,7 @@ test tests:
|
||||
-u LANGSMITH_API_KEY \
|
||||
-u LANGSMITH_TRACING \
|
||||
-u LANGCHAIN_PROJECT \
|
||||
uv run --group test pytest -n auto --benchmark-disable $(PYTEST_EXTRA) --disable-socket --allow-unix-socket $(TEST_FILE)
|
||||
uv run --group test pytest -n auto --disable-socket --allow-unix-socket $(TEST_FILE)
|
||||
|
||||
test_watch:
|
||||
env \
|
||||
@@ -53,22 +52,19 @@ lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/core --name
|
||||
lint_package: PYTHON_FILES=langchain_core
|
||||
lint_tests: PYTHON_FILES=tests
|
||||
lint_tests: MYPY_CACHE=.mypy_cache_test
|
||||
UV_RUN_LINT = uv run --all-groups
|
||||
UV_RUN_TYPE = uv run --all-groups
|
||||
lint_package lint_tests: UV_RUN_LINT = uv run --group lint
|
||||
|
||||
lint lint_diff lint_package lint_tests:
|
||||
./scripts/lint_imports.sh
|
||||
[ "$(PYTHON_FILES)" = "" ] || $(UV_RUN_LINT) ruff check $(PYTHON_FILES)
|
||||
[ "$(PYTHON_FILES)" = "" ] || $(UV_RUN_LINT) ruff format $(PYTHON_FILES) --diff
|
||||
[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && $(UV_RUN_TYPE) mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
|
||||
[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff check $(PYTHON_FILES)
|
||||
[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff format $(PYTHON_FILES) --diff
|
||||
[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && uv run --all-groups mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
|
||||
|
||||
type:
|
||||
mkdir -p $(MYPY_CACHE) && $(UV_RUN_TYPE) mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
|
||||
mkdir -p $(MYPY_CACHE) && uv run --all-groups mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
|
||||
|
||||
format format_diff:
|
||||
[ "$(PYTHON_FILES)" = "" ] || $(UV_RUN_LINT) ruff format $(PYTHON_FILES)
|
||||
[ "$(PYTHON_FILES)" = "" ] || $(UV_RUN_LINT) ruff check --fix $(PYTHON_FILES)
|
||||
[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff format $(PYTHON_FILES)
|
||||
[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff check --fix $(PYTHON_FILES)
|
||||
|
||||
benchmark:
|
||||
uv run pytest tests/benchmarks --codspeed
|
||||
|
||||
@@ -399,7 +399,7 @@ def deprecated(
|
||||
components = [
|
||||
_message,
|
||||
f"Use {_alternative} instead." if _alternative else "",
|
||||
f"Use {_alternative_import} instead." if _alternative_import else "",
|
||||
f"Use `{_alternative_import}` instead." if _alternative_import else "",
|
||||
_addendum,
|
||||
]
|
||||
details = " ".join([component.strip() for component in components if component])
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
"""SSRF protection and security utilities.
|
||||
|
||||
This is an **internal** module (note the `_security` prefix). It is NOT part of
|
||||
the public `langchain-core` API and may change or be removed at any time without
|
||||
notice. External code should not import from or depend on anything in this
|
||||
module. Any vulnerability reports should target the public APIs that use these
|
||||
utilities, not this internal module directly.
|
||||
"""
|
||||
|
||||
from langchain_core._security._exceptions import SSRFBlockedError
|
||||
from langchain_core._security._policy import (
|
||||
SSRFPolicy,
|
||||
validate_hostname,
|
||||
validate_resolved_ip,
|
||||
validate_url,
|
||||
validate_url_sync,
|
||||
)
|
||||
from langchain_core._security._transport import (
|
||||
SSRFSafeSyncTransport,
|
||||
SSRFSafeTransport,
|
||||
ssrf_safe_async_client,
|
||||
ssrf_safe_client,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"SSRFBlockedError",
|
||||
"SSRFPolicy",
|
||||
"SSRFSafeSyncTransport",
|
||||
"SSRFSafeTransport",
|
||||
"ssrf_safe_async_client",
|
||||
"ssrf_safe_client",
|
||||
"validate_hostname",
|
||||
"validate_resolved_ip",
|
||||
"validate_url",
|
||||
"validate_url_sync",
|
||||
]
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
"""SSRF protection exceptions."""
|
||||
|
||||
|
||||
class SSRFBlockedError(Exception):
|
||||
"""Raised when a request is blocked by SSRF protection policy."""
|
||||
|
||||
def __init__(self, reason: str) -> None:
|
||||
self.reason = reason
|
||||
super().__init__(f"SSRF blocked: {reason}")
|
||||
@@ -1,306 +0,0 @@
|
||||
"""SSRF protection policy with IP validation and DNS-aware URL checking."""
|
||||
|
||||
import asyncio
|
||||
import dataclasses
|
||||
import ipaddress
|
||||
import os
|
||||
import socket
|
||||
import urllib.parse
|
||||
|
||||
from langchain_core._security._exceptions import SSRFBlockedError
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Blocklist constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_BLOCKED_IPV4_NETWORKS: tuple[ipaddress.IPv4Network, ...] = tuple(
|
||||
ipaddress.IPv4Network(n)
|
||||
for n in (
|
||||
"10.0.0.0/8", # RFC 1918 - private class A
|
||||
"172.16.0.0/12", # RFC 1918 - private class B
|
||||
"192.168.0.0/16", # RFC 1918 - private class C
|
||||
"127.0.0.0/8", # RFC 1122 - loopback
|
||||
"169.254.0.0/16", # RFC 3927 - link-local
|
||||
"0.0.0.0/8", # RFC 1122 - "this network"
|
||||
"100.64.0.0/10", # RFC 6598 - shared/CGN address space
|
||||
"192.0.0.0/24", # RFC 6890 - IETF protocol assignments
|
||||
"192.0.2.0/24", # RFC 5737 - TEST-NET-1 (documentation)
|
||||
"198.18.0.0/15", # RFC 2544 - benchmarking
|
||||
"198.51.100.0/24", # RFC 5737 - TEST-NET-2 (documentation)
|
||||
"203.0.113.0/24", # RFC 5737 - TEST-NET-3 (documentation)
|
||||
"224.0.0.0/4", # RFC 5771 - multicast
|
||||
"240.0.0.0/4", # RFC 1112 - reserved for future use
|
||||
"255.255.255.255/32", # RFC 919 - limited broadcast
|
||||
)
|
||||
)
|
||||
|
||||
_BLOCKED_IPV6_NETWORKS: tuple[ipaddress.IPv6Network, ...] = tuple(
|
||||
ipaddress.IPv6Network(n)
|
||||
for n in (
|
||||
"::1/128", # RFC 4291 - loopback
|
||||
"fc00::/7", # RFC 4193 - unique local addresses (ULA)
|
||||
"fe80::/10", # RFC 4291 - link-local
|
||||
"ff00::/8", # RFC 4291 - multicast
|
||||
"::ffff:0:0/96", # RFC 4291 - IPv4-mapped IPv6 addresses
|
||||
"::0.0.0.0/96", # RFC 4291 - IPv4-compatible IPv6 (deprecated)
|
||||
"64:ff9b::/96", # RFC 6052 - NAT64 well-known prefix
|
||||
"64:ff9b:1::/48", # RFC 8215 - NAT64 discovery prefix
|
||||
)
|
||||
)
|
||||
|
||||
_CLOUD_METADATA_IPS: frozenset[str] = frozenset(
|
||||
{
|
||||
"169.254.169.254", # AWS, GCP, Azure, DigitalOcean, Oracle Cloud
|
||||
"169.254.170.2", # AWS ECS task metadata
|
||||
"169.254.170.23", # AWS EKS Pod Identity Agent
|
||||
"100.100.100.200", # Alibaba Cloud metadata
|
||||
"fd00:ec2::254", # AWS EC2 IMDSv2 over IPv6 (Nitro instances)
|
||||
"fd00:ec2::23", # AWS EKS Pod Identity Agent (IPv6)
|
||||
"fe80::a9fe:a9fe", # OpenStack Nova metadata (IPv6 link-local)
|
||||
}
|
||||
)
|
||||
|
||||
# Network ranges that are always blocked when block_cloud_metadata=True,
|
||||
# independent of block_private_ips. The entire link-local range is used by
|
||||
# cloud metadata services across providers.
|
||||
_CLOUD_METADATA_NETWORKS: tuple[ipaddress.IPv4Network | ipaddress.IPv6Network, ...] = (
|
||||
ipaddress.IPv4Network("169.254.0.0/16"),
|
||||
)
|
||||
|
||||
_CLOUD_METADATA_HOSTNAMES: frozenset[str] = frozenset(
|
||||
{
|
||||
"metadata.google.internal",
|
||||
"metadata.amazonaws.com",
|
||||
"metadata",
|
||||
"instance-data",
|
||||
}
|
||||
)
|
||||
|
||||
_LOCALHOST_NAMES: frozenset[str] = frozenset(
|
||||
{
|
||||
"localhost",
|
||||
"localhost.localdomain",
|
||||
"host.docker.internal",
|
||||
}
|
||||
)
|
||||
|
||||
_K8S_SUFFIX = ".svc.cluster.local"
|
||||
|
||||
_LOOPBACK_IPV4 = ipaddress.IPv4Network("127.0.0.0/8")
|
||||
_LOOPBACK_IPV6 = ipaddress.IPv6Address("::1")
|
||||
|
||||
# NAT64 well-known prefixes
|
||||
_NAT64_PREFIX = ipaddress.IPv6Network("64:ff9b::/96")
|
||||
_NAT64_DISCOVERY_PREFIX = ipaddress.IPv6Network("64:ff9b:1::/48")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SSRFPolicy
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class SSRFPolicy:
|
||||
"""Immutable policy controlling which URLs/IPs are considered safe."""
|
||||
|
||||
allowed_schemes: frozenset[str] = frozenset({"http", "https"})
|
||||
block_private_ips: bool = True
|
||||
block_localhost: bool = True
|
||||
block_cloud_metadata: bool = True
|
||||
block_k8s_internal: bool = True
|
||||
allowed_hosts: frozenset[str] = frozenset()
|
||||
additional_blocked_cidrs: tuple[
|
||||
ipaddress.IPv4Network | ipaddress.IPv6Network, ...
|
||||
] = ()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _extract_embedded_ipv4(
|
||||
addr: ipaddress.IPv6Address,
|
||||
) -> ipaddress.IPv4Address | None:
|
||||
"""Extract an embedded IPv4 from IPv4-mapped or NAT64 IPv6 addresses."""
|
||||
# Check ipv4_mapped first (covers ::ffff:x.x.x.x)
|
||||
if addr.ipv4_mapped is not None:
|
||||
return addr.ipv4_mapped
|
||||
|
||||
# Check NAT64 prefixes — embedded IPv4 is in the last 4 bytes
|
||||
if addr in _NAT64_PREFIX or addr in _NAT64_DISCOVERY_PREFIX:
|
||||
raw = addr.packed
|
||||
return ipaddress.IPv4Address(raw[-4:])
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _ip_in_blocked_networks(
|
||||
addr: ipaddress.IPv4Address | ipaddress.IPv6Address,
|
||||
policy: SSRFPolicy,
|
||||
) -> str | None:
|
||||
"""Return a reason string if *addr* falls in a blocked range, else None."""
|
||||
# NOTE: if profiling shows this is a hot path, consider memoising with
|
||||
# @functools.lru_cache (key on (addr, id(policy))).
|
||||
if isinstance(addr, ipaddress.IPv4Address):
|
||||
if policy.block_private_ips:
|
||||
for net in _BLOCKED_IPV4_NETWORKS:
|
||||
if addr in net:
|
||||
return "private IP range"
|
||||
for net in policy.additional_blocked_cidrs: # type: ignore[assignment]
|
||||
if isinstance(net, ipaddress.IPv4Network) and addr in net:
|
||||
return "blocked CIDR"
|
||||
else:
|
||||
if policy.block_private_ips:
|
||||
for net in _BLOCKED_IPV6_NETWORKS: # type: ignore[assignment]
|
||||
if addr in net:
|
||||
return "private IP range"
|
||||
for net in policy.additional_blocked_cidrs: # type: ignore[assignment]
|
||||
if isinstance(net, ipaddress.IPv6Network) and addr in net:
|
||||
return "blocked CIDR"
|
||||
|
||||
# Loopback check — independent of block_private_ips so that
|
||||
# block_localhost=True still catches 127.x.x.x / ::1 even when
|
||||
# private IPs are allowed.
|
||||
if policy.block_localhost:
|
||||
if isinstance(addr, ipaddress.IPv4Address) and (
|
||||
addr in _LOOPBACK_IPV4 or addr in ipaddress.IPv4Network("0.0.0.0/8")
|
||||
):
|
||||
return "localhost address"
|
||||
if isinstance(addr, ipaddress.IPv6Address) and addr == _LOOPBACK_IPV6:
|
||||
return "localhost address"
|
||||
|
||||
# Cloud metadata check — IP set *and* network ranges (e.g. 169.254.0.0/16).
|
||||
# Independent of block_private_ips so that allow_private=True still blocks
|
||||
# cloud metadata endpoints.
|
||||
if policy.block_cloud_metadata:
|
||||
if str(addr) in _CLOUD_METADATA_IPS:
|
||||
return "cloud metadata endpoint"
|
||||
for net in _CLOUD_METADATA_NETWORKS: # type: ignore[assignment]
|
||||
if addr in net:
|
||||
return "cloud metadata endpoint"
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public validation functions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def validate_resolved_ip(ip_str: str, policy: SSRFPolicy) -> None:
|
||||
"""Validate a resolved IP address against the SSRF policy.
|
||||
|
||||
Raises SSRFBlockedError if the IP is blocked.
|
||||
"""
|
||||
try:
|
||||
addr = ipaddress.ip_address(ip_str)
|
||||
except ValueError as exc:
|
||||
raise SSRFBlockedError("invalid IP address") from exc
|
||||
|
||||
if isinstance(addr, ipaddress.IPv6Address):
|
||||
inner = _extract_embedded_ipv4(addr)
|
||||
if inner is not None:
|
||||
addr = inner
|
||||
|
||||
reason = _ip_in_blocked_networks(addr, policy)
|
||||
if reason is not None:
|
||||
raise SSRFBlockedError(reason)
|
||||
|
||||
|
||||
def validate_hostname(hostname: str, policy: SSRFPolicy) -> None:
|
||||
"""Validate a hostname against the SSRF policy.
|
||||
|
||||
Raises SSRFBlockedError if the hostname is blocked.
|
||||
"""
|
||||
lower = hostname.lower()
|
||||
|
||||
if policy.block_localhost and lower in _LOCALHOST_NAMES:
|
||||
raise SSRFBlockedError("localhost address")
|
||||
|
||||
if policy.block_cloud_metadata and lower in _CLOUD_METADATA_HOSTNAMES:
|
||||
raise SSRFBlockedError("cloud metadata endpoint")
|
||||
|
||||
if policy.block_k8s_internal and lower.endswith(_K8S_SUFFIX):
|
||||
raise SSRFBlockedError("Kubernetes internal DNS")
|
||||
|
||||
|
||||
def _effective_allowed_hosts(policy: SSRFPolicy) -> frozenset[str]:
|
||||
"""Return allowed_hosts, augmented for local environments."""
|
||||
extra: set[str] = set()
|
||||
if os.environ.get("LANGCHAIN_ENV", "").startswith("local"):
|
||||
extra.update({"localhost", "testserver"})
|
||||
if extra:
|
||||
return policy.allowed_hosts | frozenset(extra)
|
||||
return policy.allowed_hosts
|
||||
|
||||
|
||||
async def validate_url(url: str, policy: SSRFPolicy = SSRFPolicy()) -> None:
|
||||
"""Validate a URL against the SSRF policy, including DNS resolution.
|
||||
|
||||
This is the primary entry-point for async code paths. It delegates
|
||||
scheme/hostname/allowed-hosts checks to `validate_url_sync`, then
|
||||
resolves DNS and validates every resolved IP.
|
||||
|
||||
Raises:
|
||||
SSRFBlockedError: If the URL violates the policy.
|
||||
"""
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
hostname = parsed.hostname or ""
|
||||
|
||||
validate_url_sync(url, policy)
|
||||
|
||||
allowed = {h.lower() for h in _effective_allowed_hosts(policy)}
|
||||
if hostname.lower() in allowed:
|
||||
return
|
||||
|
||||
scheme = (parsed.scheme or "").lower()
|
||||
port = parsed.port or (443 if scheme == "https" else 80)
|
||||
try:
|
||||
addrinfo = await asyncio.to_thread(
|
||||
socket.getaddrinfo, hostname, port, type=socket.SOCK_STREAM
|
||||
)
|
||||
except socket.gaierror as exc:
|
||||
msg = "DNS resolution failed"
|
||||
raise SSRFBlockedError(msg) from exc
|
||||
|
||||
for _family, _type, _proto, _canonname, sockaddr in addrinfo:
|
||||
validate_resolved_ip(str(sockaddr[0]), policy)
|
||||
|
||||
|
||||
def validate_url_sync(url: str, policy: SSRFPolicy = SSRFPolicy()) -> None:
|
||||
"""Synchronous URL validation (no DNS resolution).
|
||||
|
||||
Suitable for Pydantic validators and other sync contexts. Checks scheme
|
||||
and hostname patterns only - use `validate_url` for full DNS-aware checking.
|
||||
|
||||
Raises:
|
||||
SSRFBlockedError: If the URL violates the policy.
|
||||
"""
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
|
||||
scheme = (parsed.scheme or "").lower()
|
||||
if scheme not in policy.allowed_schemes:
|
||||
msg = f"scheme '{scheme}' not allowed"
|
||||
raise SSRFBlockedError(msg)
|
||||
|
||||
hostname = parsed.hostname
|
||||
if not hostname:
|
||||
msg = "missing hostname"
|
||||
raise SSRFBlockedError(msg)
|
||||
|
||||
allowed = _effective_allowed_hosts(policy)
|
||||
if hostname.lower() in {h.lower() for h in allowed}:
|
||||
return
|
||||
|
||||
try:
|
||||
ipaddress.ip_address(hostname)
|
||||
validate_resolved_ip(hostname, policy)
|
||||
except SSRFBlockedError:
|
||||
raise
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
return
|
||||
|
||||
validate_hostname(hostname, policy)
|
||||
@@ -1,8 +1,28 @@
|
||||
"""SSRF Protection - thin wrapper raising ValueError for internal callers.
|
||||
"""SSRF Protection for validating URLs against Server-Side Request Forgery attacks.
|
||||
|
||||
Delegates all validation to `langchain_core._security._policy`.
|
||||
This module provides utilities to validate user-provided URLs and prevent SSRF attacks
|
||||
by blocking requests to:
|
||||
- Private IP ranges (RFC 1918, loopback, link-local)
|
||||
- Cloud metadata endpoints (AWS, GCP, Azure, etc.)
|
||||
- Localhost addresses
|
||||
- Invalid URL schemes
|
||||
|
||||
Usage:
|
||||
from lc_security.ssrf_protection import validate_safe_url, is_safe_url
|
||||
|
||||
# Validate a URL (raises ValueError if unsafe)
|
||||
safe_url = validate_safe_url("https://example.com/webhook")
|
||||
|
||||
# Check if URL is safe (returns bool)
|
||||
if is_safe_url("http://192.168.1.1"):
|
||||
# URL is safe
|
||||
pass
|
||||
|
||||
# Allow private IPs for development/testing (still blocks cloud metadata)
|
||||
safe_url = validate_safe_url("http://localhost:8080", allow_private=True)
|
||||
"""
|
||||
|
||||
import ipaddress
|
||||
import os
|
||||
import socket
|
||||
from typing import Annotated, Any
|
||||
@@ -14,28 +34,105 @@ from pydantic import (
|
||||
HttpUrl,
|
||||
)
|
||||
|
||||
from langchain_core._security._exceptions import SSRFBlockedError
|
||||
from langchain_core._security._policy import (
|
||||
SSRFPolicy,
|
||||
)
|
||||
from langchain_core._security._policy import (
|
||||
validate_resolved_ip as _validate_resolved_ip,
|
||||
)
|
||||
from langchain_core._security._policy import (
|
||||
validate_url_sync as _validate_url_sync,
|
||||
)
|
||||
# Private IP ranges (RFC 1918, RFC 4193, RFC 3927, loopback)
|
||||
PRIVATE_IP_RANGES = [
|
||||
ipaddress.ip_network("10.0.0.0/8"), # Private Class A
|
||||
ipaddress.ip_network("172.16.0.0/12"), # Private Class B
|
||||
ipaddress.ip_network("192.168.0.0/16"), # Private Class C
|
||||
ipaddress.ip_network("127.0.0.0/8"), # Loopback
|
||||
ipaddress.ip_network("169.254.0.0/16"), # Link-local (includes cloud metadata)
|
||||
ipaddress.ip_network("0.0.0.0/8"), # Current network
|
||||
ipaddress.ip_network("::1/128"), # IPv6 loopback
|
||||
ipaddress.ip_network("fc00::/7"), # IPv6 unique local
|
||||
ipaddress.ip_network("fe80::/10"), # IPv6 link-local
|
||||
ipaddress.ip_network("ff00::/8"), # IPv6 multicast
|
||||
]
|
||||
|
||||
# Cloud provider metadata endpoints
|
||||
CLOUD_METADATA_IPS = [
|
||||
"169.254.169.254", # AWS, GCP, Azure, DigitalOcean, Oracle Cloud
|
||||
"169.254.170.2", # AWS ECS task metadata
|
||||
"100.100.100.200", # Alibaba Cloud metadata
|
||||
]
|
||||
|
||||
CLOUD_METADATA_HOSTNAMES = [
|
||||
"metadata.google.internal", # GCP
|
||||
"metadata", # Generic
|
||||
"instance-data", # AWS EC2
|
||||
]
|
||||
|
||||
# Localhost variations
|
||||
LOCALHOST_NAMES = [
|
||||
"localhost",
|
||||
"localhost.localdomain",
|
||||
]
|
||||
|
||||
|
||||
def _policy_for(*, allow_private: bool, allow_http: bool) -> SSRFPolicy:
|
||||
"""Build an `SSRFPolicy` from the legacy flag interface."""
|
||||
schemes = frozenset({"http", "https"}) if allow_http else frozenset({"https"})
|
||||
return SSRFPolicy(
|
||||
allowed_schemes=schemes,
|
||||
block_private_ips=not allow_private,
|
||||
block_localhost=not allow_private,
|
||||
block_cloud_metadata=True,
|
||||
block_k8s_internal=True,
|
||||
)
|
||||
def is_private_ip(ip_str: str) -> bool:
|
||||
"""Check if an IP address is in a private range.
|
||||
|
||||
Args:
|
||||
ip_str: IP address as a string (e.g., "192.168.1.1")
|
||||
|
||||
Returns:
|
||||
True if IP is in a private range, False otherwise
|
||||
"""
|
||||
try:
|
||||
ip = ipaddress.ip_address(ip_str)
|
||||
return any(ip in range_ for range_ in PRIVATE_IP_RANGES)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def is_cloud_metadata(hostname: str, ip_str: str | None = None) -> bool:
|
||||
"""Check if hostname or IP is a cloud metadata endpoint.
|
||||
|
||||
Args:
|
||||
hostname: Hostname to check
|
||||
ip_str: Optional IP address to check
|
||||
|
||||
Returns:
|
||||
True if hostname or IP is a known cloud metadata endpoint
|
||||
"""
|
||||
# Check hostname
|
||||
if hostname.lower() in CLOUD_METADATA_HOSTNAMES:
|
||||
return True
|
||||
|
||||
# Check IP
|
||||
if ip_str and ip_str in CLOUD_METADATA_IPS: # noqa: SIM103
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def is_localhost(hostname: str, ip_str: str | None = None) -> bool:
|
||||
"""Check if hostname or IP is localhost.
|
||||
|
||||
Args:
|
||||
hostname: Hostname to check
|
||||
ip_str: Optional IP address to check
|
||||
|
||||
Returns:
|
||||
True if hostname or IP is localhost
|
||||
"""
|
||||
# Check hostname
|
||||
if hostname.lower() in LOCALHOST_NAMES:
|
||||
return True
|
||||
|
||||
# Check IP
|
||||
if ip_str:
|
||||
try:
|
||||
ip = ipaddress.ip_address(ip_str)
|
||||
# Check if loopback
|
||||
if ip.is_loopback:
|
||||
return True
|
||||
# Also check common localhost IPs
|
||||
if ip_str in ("127.0.0.1", "::1", "0.0.0.0"): # noqa: S104
|
||||
return True
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def validate_safe_url(
|
||||
@@ -50,22 +147,54 @@ def validate_safe_url(
|
||||
by blocking requests to private networks and cloud metadata endpoints.
|
||||
|
||||
Args:
|
||||
url: The URL to validate (string or Pydantic HttpUrl).
|
||||
allow_private: If `True`, allows private IPs and localhost (for development).
|
||||
url: The URL to validate (string or Pydantic HttpUrl)
|
||||
allow_private: If True, allows private IPs and localhost (for development).
|
||||
Cloud metadata endpoints are ALWAYS blocked.
|
||||
allow_http: If `True`, allows both HTTP and HTTPS. If `False`, only HTTPS.
|
||||
allow_http: If True, allows both HTTP and HTTPS. If False, only HTTPS.
|
||||
|
||||
Returns:
|
||||
The validated URL as a string.
|
||||
The validated URL as a string
|
||||
|
||||
Raises:
|
||||
ValueError: If URL is invalid or potentially dangerous.
|
||||
ValueError: If URL is invalid or potentially dangerous
|
||||
|
||||
Examples:
|
||||
>>> validate_safe_url("https://hooks.slack.com/services/xxx")
|
||||
'https://hooks.slack.com/services/xxx'
|
||||
|
||||
>>> validate_safe_url("http://127.0.0.1:8080")
|
||||
ValueError: Localhost URLs are not allowed
|
||||
|
||||
>>> validate_safe_url("http://192.168.1.1")
|
||||
ValueError: URL resolves to private IP: 192.168.1.1
|
||||
|
||||
>>> validate_safe_url("http://169.254.169.254/latest/meta-data/")
|
||||
ValueError: URL resolves to cloud metadata IP: 169.254.169.254
|
||||
|
||||
>>> validate_safe_url("http://localhost:8080", allow_private=True)
|
||||
'http://localhost:8080'
|
||||
"""
|
||||
url_str = str(url)
|
||||
parsed = urlparse(url_str)
|
||||
hostname = parsed.hostname or ""
|
||||
|
||||
# Test-environment bypass (preserved from original implementation)
|
||||
# Validate URL scheme
|
||||
if not allow_http and parsed.scheme != "https":
|
||||
msg = "Only HTTPS URLs are allowed"
|
||||
raise ValueError(msg)
|
||||
|
||||
if parsed.scheme not in ("http", "https"):
|
||||
msg = f"Only HTTP/HTTPS URLs are allowed, got scheme: {parsed.scheme}"
|
||||
raise ValueError(msg)
|
||||
|
||||
# Extract hostname
|
||||
hostname = parsed.hostname
|
||||
if not hostname:
|
||||
msg = "URL must have a valid hostname"
|
||||
raise ValueError(msg)
|
||||
|
||||
# Special handling for test environments - allow test server hostnames
|
||||
# testserver is used by FastAPI/Starlette test clients and doesn't resolve via DNS
|
||||
# Only enabled when LANGCHAIN_ENV=local_test (set in conftest.py)
|
||||
if (
|
||||
os.environ.get("LANGCHAIN_ENV") == "local_test"
|
||||
and hostname.startswith("test")
|
||||
@@ -73,34 +202,51 @@ def validate_safe_url(
|
||||
):
|
||||
return url_str
|
||||
|
||||
policy = _policy_for(allow_private=allow_private, allow_http=allow_http)
|
||||
# ALWAYS block cloud metadata endpoints (even with allow_private=True)
|
||||
if is_cloud_metadata(hostname):
|
||||
msg = f"Cloud metadata endpoints are not allowed: {hostname}"
|
||||
raise ValueError(msg)
|
||||
|
||||
# Synchronous scheme + hostname checks
|
||||
try:
|
||||
_validate_url_sync(url_str, policy)
|
||||
except SSRFBlockedError as exc:
|
||||
raise ValueError(str(exc)) from exc
|
||||
# Check for localhost
|
||||
if is_localhost(hostname) and not allow_private:
|
||||
msg = f"Localhost URLs are not allowed: {hostname}"
|
||||
raise ValueError(msg)
|
||||
|
||||
# DNS resolution and IP validation
|
||||
# Resolve hostname to IP addresses and validate each one.
|
||||
# Note: DNS resolution results are cached by the OS, so repeated calls are fast.
|
||||
try:
|
||||
# Get all IP addresses for this hostname
|
||||
addr_info = socket.getaddrinfo(
|
||||
hostname,
|
||||
parsed.port or (443 if parsed.scheme == "https" else 80),
|
||||
socket.AF_UNSPEC,
|
||||
socket.AF_UNSPEC, # Allow both IPv4 and IPv6
|
||||
socket.SOCK_STREAM,
|
||||
)
|
||||
|
||||
for result in addr_info:
|
||||
ip_str: str = result[4][0] # type: ignore[assignment]
|
||||
try:
|
||||
_validate_resolved_ip(ip_str, policy)
|
||||
except SSRFBlockedError as exc:
|
||||
raise ValueError(str(exc)) from exc
|
||||
|
||||
# ALWAYS block cloud metadata IPs
|
||||
if is_cloud_metadata(hostname, ip_str):
|
||||
msg = f"URL resolves to cloud metadata IP: {ip_str}"
|
||||
raise ValueError(msg)
|
||||
|
||||
# Check for localhost IPs
|
||||
if is_localhost(hostname, ip_str) and not allow_private:
|
||||
msg = f"URL resolves to localhost IP: {ip_str}"
|
||||
raise ValueError(msg)
|
||||
|
||||
# Check for private IPs
|
||||
if not allow_private and is_private_ip(ip_str):
|
||||
msg = f"URL resolves to private IP address: {ip_str}"
|
||||
raise ValueError(msg)
|
||||
|
||||
except socket.gaierror as e:
|
||||
# DNS resolution failed - fail closed for security
|
||||
msg = f"Failed to resolve hostname '{hostname}': {e}"
|
||||
raise ValueError(msg) from e
|
||||
except OSError as e:
|
||||
# Other network errors - fail closed
|
||||
msg = f"Network error while validating URL: {e}"
|
||||
raise ValueError(msg) from e
|
||||
|
||||
@@ -113,7 +259,26 @@ def is_safe_url(
|
||||
allow_private: bool = False,
|
||||
allow_http: bool = True,
|
||||
) -> bool:
|
||||
"""Non-throwing version of `validate_safe_url`."""
|
||||
"""Check if a URL is safe (non-throwing version of validate_safe_url).
|
||||
|
||||
Args:
|
||||
url: The URL to check
|
||||
allow_private: If True, allows private IPs and localhost
|
||||
allow_http: If True, allows both HTTP and HTTPS
|
||||
|
||||
Returns:
|
||||
True if URL is safe, False otherwise
|
||||
|
||||
Examples:
|
||||
>>> is_safe_url("https://example.com")
|
||||
True
|
||||
|
||||
>>> is_safe_url("http://127.0.0.1:8080")
|
||||
False
|
||||
|
||||
>>> is_safe_url("http://localhost:8080", allow_private=True)
|
||||
True
|
||||
"""
|
||||
try:
|
||||
validate_safe_url(url, allow_private=allow_private, allow_http=allow_http)
|
||||
except ValueError:
|
||||
@@ -130,6 +295,7 @@ def _validate_url_ssrf_strict(v: Any) -> Any:
|
||||
|
||||
|
||||
def _validate_url_ssrf_https_only(v: Any) -> Any:
|
||||
"""Validate URL for SSRF protection (HTTPS only, strict mode)."""
|
||||
if isinstance(v, str):
|
||||
validate_safe_url(v, allow_private=False, allow_http=False)
|
||||
return v
|
||||
@@ -144,12 +310,52 @@ def _validate_url_ssrf_relaxed(v: Any) -> Any:
|
||||
|
||||
# Annotated types with SSRF protection
|
||||
SSRFProtectedUrl = Annotated[HttpUrl, BeforeValidator(_validate_url_ssrf_strict)]
|
||||
"""A Pydantic HttpUrl type with built-in SSRF protection.
|
||||
|
||||
This blocks private IPs, localhost, and cloud metadata endpoints.
|
||||
|
||||
Example:
|
||||
class WebhookSchema(BaseModel):
|
||||
url: SSRFProtectedUrl # Automatically validated for SSRF
|
||||
headers: dict[str, str] | None = None
|
||||
"""
|
||||
|
||||
SSRFProtectedUrlRelaxed = Annotated[
|
||||
HttpUrl, BeforeValidator(_validate_url_ssrf_relaxed)
|
||||
]
|
||||
"""A Pydantic HttpUrl with relaxed SSRF protection (allows private IPs).
|
||||
|
||||
Use this for development/testing webhooks where localhost/private IPs are needed.
|
||||
Cloud metadata endpoints are still blocked.
|
||||
|
||||
Example:
|
||||
class DevWebhookSchema(BaseModel):
|
||||
url: SSRFProtectedUrlRelaxed # Allows localhost, blocks cloud metadata
|
||||
"""
|
||||
|
||||
SSRFProtectedHttpsUrl = Annotated[
|
||||
HttpUrl, BeforeValidator(_validate_url_ssrf_https_only)
|
||||
]
|
||||
"""A Pydantic HttpUrl with SSRF protection that only allows HTTPS.
|
||||
|
||||
This blocks private IPs, localhost, cloud metadata endpoints, and HTTP URLs.
|
||||
|
||||
Example:
|
||||
class SecureWebhookSchema(BaseModel):
|
||||
url: SSRFProtectedHttpsUrl # Only HTTPS, blocks private IPs
|
||||
"""
|
||||
|
||||
SSRFProtectedHttpsUrlStr = Annotated[
|
||||
str, BeforeValidator(_validate_url_ssrf_https_only)
|
||||
]
|
||||
"""A string type with SSRF protection that only allows HTTPS URLs.
|
||||
|
||||
Same as SSRFProtectedHttpsUrl but returns a string instead of HttpUrl.
|
||||
Useful for FastAPI query parameters where you need a string URL.
|
||||
|
||||
Example:
|
||||
@router.get("/proxy")
|
||||
async def proxy_get(url: SSRFProtectedHttpsUrlStr):
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.get(url)
|
||||
"""
|
||||
|
||||
@@ -1,252 +0,0 @@
|
||||
"""SSRF-safe httpx transport with DNS resolution and IP pinning."""
|
||||
|
||||
import asyncio
|
||||
import socket
|
||||
|
||||
import httpx
|
||||
|
||||
from langchain_core._security._exceptions import SSRFBlockedError
|
||||
from langchain_core._security._policy import (
|
||||
SSRFPolicy,
|
||||
_effective_allowed_hosts,
|
||||
validate_resolved_ip,
|
||||
validate_url_sync,
|
||||
)
|
||||
|
||||
# Keys that AsyncHTTPTransport accepts (forwarded from factory kwargs).
|
||||
_TRANSPORT_KWARGS = frozenset(
|
||||
{
|
||||
"verify",
|
||||
"cert",
|
||||
"trust_env",
|
||||
"http1",
|
||||
"http2",
|
||||
"limits",
|
||||
"retries",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class SSRFSafeTransport(httpx.AsyncBaseTransport):
|
||||
"""httpx async transport that validates DNS results against an SSRF policy.
|
||||
|
||||
For every outgoing request the transport:
|
||||
1. Checks the URL scheme against `policy.allowed_schemes`.
|
||||
2. Validates the hostname against blocked patterns.
|
||||
3. Resolves DNS and validates **all** returned IPs.
|
||||
4. Rewrites the request to connect to the first valid IP while
|
||||
preserving the original `Host` header and TLS SNI hostname.
|
||||
|
||||
Redirects are re-validated on each hop because `follow_redirects`
|
||||
is set on the *client*, causing `handle_async_request` to be called
|
||||
again for each redirect target.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
policy: SSRFPolicy = SSRFPolicy(),
|
||||
**transport_kwargs: object,
|
||||
) -> None:
|
||||
self._policy = policy
|
||||
self._inner = httpx.AsyncHTTPTransport(**transport_kwargs) # type: ignore[arg-type]
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Core request handler
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
async def handle_async_request(
|
||||
self,
|
||||
request: httpx.Request,
|
||||
) -> httpx.Response:
|
||||
hostname = request.url.host or ""
|
||||
scheme = request.url.scheme.lower()
|
||||
|
||||
# 1-3. Scheme, hostname, and pattern checks (reuse sync validator).
|
||||
try:
|
||||
validate_url_sync(str(request.url), self._policy)
|
||||
except SSRFBlockedError:
|
||||
raise
|
||||
|
||||
# Allowed-hosts bypass - skip DNS/IP validation entirely.
|
||||
allowed = {h.lower() for h in _effective_allowed_hosts(self._policy)}
|
||||
if hostname.lower() in allowed:
|
||||
return await self._inner.handle_async_request(request)
|
||||
|
||||
# 4. DNS resolution
|
||||
port = request.url.port or (443 if scheme == "https" else 80)
|
||||
try:
|
||||
addrinfo = await asyncio.to_thread(
|
||||
socket.getaddrinfo,
|
||||
hostname,
|
||||
port,
|
||||
type=socket.SOCK_STREAM,
|
||||
)
|
||||
except socket.gaierror as exc:
|
||||
raise SSRFBlockedError("DNS resolution failed") from exc
|
||||
|
||||
if not addrinfo:
|
||||
raise SSRFBlockedError("DNS resolution returned no results")
|
||||
|
||||
# 5. Validate ALL resolved IPs - any blocked means reject.
|
||||
for _family, _type, _proto, _canonname, sockaddr in addrinfo:
|
||||
ip_str: str = sockaddr[0] # type: ignore[assignment]
|
||||
validate_resolved_ip(ip_str, self._policy)
|
||||
|
||||
# 6. Pin to first resolved IP.
|
||||
pinned_ip = addrinfo[0][4][0]
|
||||
|
||||
# 7. Rewrite URL to use pinned IP, preserving Host header and SNI.
|
||||
pinned_url = request.url.copy_with(host=pinned_ip)
|
||||
|
||||
# Build extensions dict, adding sni_hostname for HTTPS so TLS
|
||||
# certificate validation uses the original hostname.
|
||||
extensions = dict(request.extensions)
|
||||
if scheme == "https":
|
||||
extensions["sni_hostname"] = hostname.encode("ascii")
|
||||
|
||||
pinned_request = httpx.Request(
|
||||
method=request.method,
|
||||
url=pinned_url,
|
||||
headers=request.headers, # Host header already set to original
|
||||
content=request.content,
|
||||
extensions=extensions,
|
||||
)
|
||||
|
||||
return await self._inner.handle_async_request(pinned_request)
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Lifecycle
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
async def aclose(self) -> None:
|
||||
await self._inner.aclose()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# Factory
|
||||
# ---------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class SSRFSafeSyncTransport(httpx.BaseTransport):
|
||||
"""httpx sync transport that validates DNS results against an SSRF policy.
|
||||
|
||||
Sync mirror of `SSRFSafeTransport`. See that class for full documentation.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
policy: SSRFPolicy = SSRFPolicy(),
|
||||
**transport_kwargs: object,
|
||||
) -> None:
|
||||
self._policy = policy
|
||||
self._inner = httpx.HTTPTransport(**transport_kwargs) # type: ignore[arg-type]
|
||||
|
||||
def handle_request(
|
||||
self,
|
||||
request: httpx.Request,
|
||||
) -> httpx.Response:
|
||||
hostname = request.url.host or ""
|
||||
scheme = request.url.scheme.lower()
|
||||
|
||||
validate_url_sync(str(request.url), self._policy)
|
||||
|
||||
allowed = {h.lower() for h in _effective_allowed_hosts(self._policy)}
|
||||
if hostname.lower() in allowed:
|
||||
return self._inner.handle_request(request)
|
||||
|
||||
port = request.url.port or (443 if scheme == "https" else 80)
|
||||
try:
|
||||
addrinfo = socket.getaddrinfo(
|
||||
hostname,
|
||||
port,
|
||||
type=socket.SOCK_STREAM,
|
||||
)
|
||||
except socket.gaierror as exc:
|
||||
raise SSRFBlockedError("DNS resolution failed") from exc
|
||||
|
||||
if not addrinfo:
|
||||
raise SSRFBlockedError("DNS resolution returned no results")
|
||||
|
||||
for _family, _type, _proto, _canonname, sockaddr in addrinfo:
|
||||
ip_str: str = sockaddr[0] # type: ignore[assignment]
|
||||
validate_resolved_ip(ip_str, self._policy)
|
||||
|
||||
pinned_ip = addrinfo[0][4][0]
|
||||
pinned_url = request.url.copy_with(host=pinned_ip)
|
||||
|
||||
extensions = dict(request.extensions)
|
||||
if scheme == "https":
|
||||
extensions["sni_hostname"] = hostname.encode("ascii")
|
||||
|
||||
pinned_request = httpx.Request(
|
||||
method=request.method,
|
||||
url=pinned_url,
|
||||
headers=request.headers,
|
||||
content=request.content,
|
||||
extensions=extensions,
|
||||
)
|
||||
|
||||
return self._inner.handle_request(pinned_request)
|
||||
|
||||
def close(self) -> None:
|
||||
self._inner.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- #
|
||||
# Factories
|
||||
# ---------------------------------------------------------------------- #
|
||||
|
||||
|
||||
def ssrf_safe_client(
|
||||
policy: SSRFPolicy = SSRFPolicy(),
|
||||
**kwargs: object,
|
||||
) -> httpx.Client:
|
||||
"""Create an `httpx.Client` with SSRF protection."""
|
||||
transport_kwargs: dict[str, object] = {}
|
||||
client_kwargs: dict[str, object] = {}
|
||||
for key, value in kwargs.items():
|
||||
if key in _TRANSPORT_KWARGS:
|
||||
transport_kwargs[key] = value
|
||||
else:
|
||||
client_kwargs[key] = value
|
||||
|
||||
transport = SSRFSafeSyncTransport(policy=policy, **transport_kwargs)
|
||||
|
||||
client_kwargs.setdefault("follow_redirects", True)
|
||||
client_kwargs.setdefault("max_redirects", 10)
|
||||
|
||||
return httpx.Client(
|
||||
transport=transport,
|
||||
**client_kwargs, # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
|
||||
def ssrf_safe_async_client(
|
||||
policy: SSRFPolicy = SSRFPolicy(),
|
||||
**kwargs: object,
|
||||
) -> httpx.AsyncClient:
|
||||
"""Create an `httpx.AsyncClient` with SSRF protection.
|
||||
|
||||
Drop-in replacement for `httpx.AsyncClient(...)` - callers just swap
|
||||
the constructor call. Transport-specific kwargs (`verify`, `cert`,
|
||||
`retries`, etc.) are forwarded to the inner `AsyncHTTPTransport`;
|
||||
everything else goes to the `AsyncClient`.
|
||||
"""
|
||||
transport_kwargs: dict[str, object] = {}
|
||||
client_kwargs: dict[str, object] = {}
|
||||
for key, value in kwargs.items():
|
||||
if key in _TRANSPORT_KWARGS:
|
||||
transport_kwargs[key] = value
|
||||
else:
|
||||
client_kwargs[key] = value
|
||||
|
||||
transport = SSRFSafeTransport(policy=policy, **transport_kwargs)
|
||||
|
||||
# Apply defaults only if not overridden by caller.
|
||||
client_kwargs.setdefault("follow_redirects", True)
|
||||
client_kwargs.setdefault("max_redirects", 10)
|
||||
|
||||
return httpx.AsyncClient(
|
||||
transport=transport,
|
||||
**client_kwargs, # type: ignore[arg-type]
|
||||
)
|
||||
@@ -166,14 +166,14 @@ class InMemoryCache(BaseCache):
|
||||
# Update cache
|
||||
cache.update(
|
||||
prompt="What is the capital of France?",
|
||||
llm_string="model='gpt-5.4-mini',
|
||||
llm_string="model='gpt-3.5-turbo', temperature=0.1",
|
||||
return_val=[Generation(text="Paris")],
|
||||
)
|
||||
|
||||
# Lookup cache
|
||||
result = cache.lookup(
|
||||
prompt="What is the capital of France?",
|
||||
llm_string="model='gpt-5.4-mini',
|
||||
llm_string="model='gpt-3.5-turbo', temperature=0.1",
|
||||
)
|
||||
# result is [Generation(text="Paris")]
|
||||
```
|
||||
|
||||
@@ -7,7 +7,7 @@ import atexit
|
||||
import functools
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Callable, Mapping
|
||||
from collections.abc import Callable
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from contextlib import asynccontextmanager, contextmanager
|
||||
from contextvars import copy_context
|
||||
@@ -1614,9 +1614,6 @@ class CallbackManager(BaseCallbackManager):
|
||||
local_tags: list[str] | None = None,
|
||||
inheritable_metadata: dict[str, Any] | None = None,
|
||||
local_metadata: dict[str, Any] | None = None,
|
||||
*,
|
||||
langsmith_inheritable_metadata: Mapping[str, Any] | None = None,
|
||||
langsmith_inheritable_tags: list[str] | None = None,
|
||||
) -> CallbackManager:
|
||||
"""Configure the callback manager.
|
||||
|
||||
@@ -1628,10 +1625,6 @@ class CallbackManager(BaseCallbackManager):
|
||||
local_tags: The local tags.
|
||||
inheritable_metadata: The inheritable metadata.
|
||||
local_metadata: The local metadata.
|
||||
langsmith_inheritable_metadata: Default inheritable metadata applied
|
||||
to any `LangChainTracer` handlers via `set_defaults`.
|
||||
langsmith_inheritable_tags: Default inheritable tags applied to any
|
||||
`LangChainTracer` handlers via `set_defaults`.
|
||||
|
||||
Returns:
|
||||
The configured callback manager.
|
||||
@@ -1645,8 +1638,6 @@ class CallbackManager(BaseCallbackManager):
|
||||
inheritable_metadata,
|
||||
local_metadata,
|
||||
verbose=verbose,
|
||||
langsmith_inheritable_metadata=langsmith_inheritable_metadata,
|
||||
langsmith_inheritable_tags=langsmith_inheritable_tags,
|
||||
)
|
||||
|
||||
|
||||
@@ -2143,9 +2134,6 @@ class AsyncCallbackManager(BaseCallbackManager):
|
||||
local_tags: list[str] | None = None,
|
||||
inheritable_metadata: dict[str, Any] | None = None,
|
||||
local_metadata: dict[str, Any] | None = None,
|
||||
*,
|
||||
langsmith_inheritable_metadata: Mapping[str, Any] | None = None,
|
||||
langsmith_inheritable_tags: list[str] | None = None,
|
||||
) -> AsyncCallbackManager:
|
||||
"""Configure the async callback manager.
|
||||
|
||||
@@ -2157,10 +2145,6 @@ class AsyncCallbackManager(BaseCallbackManager):
|
||||
local_tags: The local tags.
|
||||
inheritable_metadata: The inheritable metadata.
|
||||
local_metadata: The local metadata.
|
||||
langsmith_inheritable_metadata: Default inheritable metadata applied
|
||||
to any `LangChainTracer` handlers via `set_defaults`.
|
||||
langsmith_inheritable_tags: Default inheritable tags applied to any
|
||||
`LangChainTracer` handlers via `set_defaults`.
|
||||
|
||||
Returns:
|
||||
The configured async callback manager.
|
||||
@@ -2174,8 +2158,6 @@ class AsyncCallbackManager(BaseCallbackManager):
|
||||
inheritable_metadata,
|
||||
local_metadata,
|
||||
verbose=verbose,
|
||||
langsmith_inheritable_metadata=langsmith_inheritable_metadata,
|
||||
langsmith_inheritable_tags=langsmith_inheritable_tags,
|
||||
)
|
||||
|
||||
|
||||
@@ -2322,8 +2304,6 @@ def _configure(
|
||||
local_metadata: dict[str, Any] | None = None,
|
||||
*,
|
||||
verbose: bool = False,
|
||||
langsmith_inheritable_metadata: Mapping[str, Any] | None = None,
|
||||
langsmith_inheritable_tags: list[str] | None = None,
|
||||
) -> T:
|
||||
"""Configure the callback manager.
|
||||
|
||||
@@ -2336,10 +2316,6 @@ def _configure(
|
||||
inheritable_metadata: The inheritable metadata.
|
||||
local_metadata: The local metadata.
|
||||
verbose: Whether to enable verbose mode.
|
||||
langsmith_inheritable_metadata: Default inheritable metadata applied to
|
||||
any `LangChainTracer` handlers via `set_defaults`.
|
||||
langsmith_inheritable_tags: Default inheritable tags applied to any
|
||||
`LangChainTracer` handlers via `set_defaults`.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If `LANGCHAIN_TRACING` is set but `LANGCHAIN_TRACING_V2` is not.
|
||||
@@ -2411,6 +2387,8 @@ def _configure(
|
||||
if inheritable_metadata or local_metadata:
|
||||
callback_manager.add_metadata(inheritable_metadata or {})
|
||||
callback_manager.add_metadata(local_metadata or {}, inherit=False)
|
||||
if tracing_metadata:
|
||||
callback_manager.add_metadata(tracing_metadata.copy())
|
||||
if tracing_tags:
|
||||
callback_manager.add_tags(tracing_tags.copy())
|
||||
|
||||
@@ -2462,7 +2440,6 @@ def _configure(
|
||||
else tracing_context["client"]
|
||||
),
|
||||
tags=tracing_tags,
|
||||
metadata=tracing_metadata,
|
||||
)
|
||||
callback_manager.add_handler(handler)
|
||||
except Exception as e:
|
||||
@@ -2480,12 +2457,7 @@ def _configure(
|
||||
run_tree.trace_id,
|
||||
run_tree.dotted_order,
|
||||
)
|
||||
run_id_str = str(run_tree.id)
|
||||
if run_id_str not in handler.run_map:
|
||||
handler.run_map[run_id_str] = run_tree
|
||||
handler._external_run_ids.setdefault( # noqa: SLF001
|
||||
run_id_str, 0
|
||||
)
|
||||
handler.run_map[str(run_tree.id)] = run_tree
|
||||
for var, inheritable, handler_class, env_var in _configure_hooks:
|
||||
create_one = (
|
||||
env_var is not None
|
||||
@@ -2507,32 +2479,6 @@ def _configure(
|
||||
for handler in callback_manager.handlers
|
||||
):
|
||||
callback_manager.add_handler(var_handler, inheritable)
|
||||
|
||||
if tracing_metadata:
|
||||
langsmith_inheritable_metadata = {
|
||||
**tracing_metadata,
|
||||
**(langsmith_inheritable_metadata or {}),
|
||||
}
|
||||
|
||||
if langsmith_inheritable_metadata or langsmith_inheritable_tags:
|
||||
callback_manager.handlers = [
|
||||
handler.copy_with_metadata_defaults(
|
||||
metadata=langsmith_inheritable_metadata,
|
||||
tags=langsmith_inheritable_tags,
|
||||
)
|
||||
if isinstance(handler, LangChainTracer)
|
||||
else handler
|
||||
for handler in callback_manager.handlers
|
||||
]
|
||||
callback_manager.inheritable_handlers = [
|
||||
handler.copy_with_metadata_defaults(
|
||||
metadata=langsmith_inheritable_metadata,
|
||||
tags=langsmith_inheritable_tags,
|
||||
)
|
||||
if isinstance(handler, LangChainTracer)
|
||||
else handler
|
||||
for handler in callback_manager.inheritable_handlers
|
||||
]
|
||||
return callback_manager
|
||||
|
||||
|
||||
|
||||
@@ -1,18 +0,0 @@
|
||||
"""Cross Encoder interface."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class BaseCrossEncoder(ABC):
|
||||
"""Interface for cross encoder models."""
|
||||
|
||||
@abstractmethod
|
||||
def score(self, text_pairs: list[tuple[str, str]]) -> list[float]:
|
||||
"""Score pairs' similarity.
|
||||
|
||||
Args:
|
||||
text_pairs: List of pairs of texts.
|
||||
|
||||
Returns:
|
||||
List of scores.
|
||||
"""
|
||||
@@ -2,7 +2,6 @@ import re
|
||||
from collections.abc import Sequence
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Literal,
|
||||
TypedDict,
|
||||
TypeVar,
|
||||
@@ -15,21 +14,6 @@ from langchain_core.messages.content import (
|
||||
)
|
||||
|
||||
|
||||
def _filter_invocation_params_for_tracing(params: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Filter out large/inappropriate fields from invocation params for tracing.
|
||||
|
||||
Removes fields like tools, functions, messages, response_format that can be large.
|
||||
|
||||
Args:
|
||||
params: The invocation parameters to filter.
|
||||
|
||||
Returns:
|
||||
The filtered parameters with large fields removed.
|
||||
"""
|
||||
excluded_keys = {"tools", "functions", "messages", "response_format"}
|
||||
return {k: v for k, v in params.items() if k not in excluded_keys}
|
||||
|
||||
|
||||
def is_openai_data_block(
|
||||
block: dict, filter_: Literal["image", "audio", "file"] | None = None
|
||||
) -> bool:
|
||||
|
||||
@@ -69,8 +69,6 @@ class LangSmithParams(TypedDict, total=False):
|
||||
|
||||
ls_stop: list[str] | None
|
||||
"""Stop words for generation."""
|
||||
ls_integration: str
|
||||
"""Integration that created the trace."""
|
||||
|
||||
|
||||
@cache # Cache the tokenizer
|
||||
@@ -301,22 +299,6 @@ class BaseLanguageModel(
|
||||
# generate responses that match a given schema.
|
||||
raise NotImplementedError
|
||||
|
||||
def _get_ls_params(
|
||||
self,
|
||||
stop: list[str] | None = None, # noqa: ARG002
|
||||
**kwargs: Any, # noqa: ARG002
|
||||
) -> LangSmithParams:
|
||||
"""Get standard params for tracing."""
|
||||
return LangSmithParams()
|
||||
|
||||
def _get_ls_params_with_defaults(
|
||||
self,
|
||||
stop: list[str] | None = None,
|
||||
**kwargs: Any,
|
||||
) -> LangSmithParams:
|
||||
"""Wrap _get_ls_params to include any additional default parameters."""
|
||||
return self._get_ls_params(stop=stop, **kwargs)
|
||||
|
||||
@property
|
||||
def _identifying_params(self) -> Mapping[str, Any]:
|
||||
"""Get the identifying parameters."""
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import inspect
|
||||
import json
|
||||
from abc import ABC, abstractmethod
|
||||
@@ -12,8 +11,8 @@ from functools import cached_property
|
||||
from operator import itemgetter
|
||||
from typing import TYPE_CHECKING, Any, Literal, cast
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
||||
from typing_extensions import Self, override
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
from typing_extensions import override
|
||||
|
||||
from langchain_core.caches import BaseCache
|
||||
from langchain_core.callbacks import (
|
||||
@@ -25,7 +24,6 @@ from langchain_core.callbacks import (
|
||||
)
|
||||
from langchain_core.globals import get_llm_cache
|
||||
from langchain_core.language_models._utils import (
|
||||
_filter_invocation_params_for_tracing,
|
||||
_normalize_messages,
|
||||
_update_message_content_to_blocks,
|
||||
)
|
||||
@@ -34,10 +32,7 @@ from langchain_core.language_models.base import (
|
||||
LangSmithParams,
|
||||
LanguageModelInput,
|
||||
)
|
||||
from langchain_core.language_models.model_profile import (
|
||||
ModelProfile,
|
||||
_warn_unknown_profile_keys,
|
||||
)
|
||||
from langchain_core.language_models.model_profile import ModelProfile
|
||||
from langchain_core.load import dumpd, dumps
|
||||
from langchain_core.messages import (
|
||||
AIMessage,
|
||||
@@ -362,54 +357,6 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
arbitrary_types_allowed=True,
|
||||
)
|
||||
|
||||
def _resolve_model_profile(self) -> ModelProfile | None:
|
||||
"""Return the default model profile, or `None` if unavailable.
|
||||
|
||||
Override this in subclasses instead of `_set_model_profile`. The base
|
||||
validator calls it automatically and handles assignment. This avoids
|
||||
coupling partner code to Pydantic validator mechanics.
|
||||
|
||||
Each partner needs its own override because things can vary per-partner,
|
||||
such as the attribute that identifies the model (e.g., `model`,
|
||||
`model_name`, `model_id`, `deployment_name`) and the partner-local
|
||||
`_get_default_model_profile` function that reads from each partner's own
|
||||
profile data.
|
||||
"""
|
||||
# TODO: consider adding a `_model_identifier` property on BaseChatModel
|
||||
# to standardize how partners identify their model, which could allow a
|
||||
# default implementation here that calls a shared
|
||||
# profile-loading mechanism.
|
||||
return None
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _set_model_profile(self) -> Self:
|
||||
"""Populate `profile` from `_resolve_model_profile` if not provided.
|
||||
|
||||
Partners should override `_resolve_model_profile` rather than this
|
||||
validator. Overriding this with a new `@model_validator` replaces the
|
||||
base validator (Pydantic v2 behavior), bypassing the standard resolution
|
||||
path. A plain method override does not prevent the base validator from
|
||||
running.
|
||||
"""
|
||||
if self.profile is None:
|
||||
# Suppress errors from partner overrides (e.g., missing profile
|
||||
# files, broken imports) so model construction never fails over an
|
||||
# optional field.
|
||||
with contextlib.suppress(Exception):
|
||||
self.profile = self._resolve_model_profile()
|
||||
return self
|
||||
|
||||
# NOTE: _check_profile_keys must be defined AFTER _set_model_profile.
|
||||
# Pydantic v2 runs mode="after" validators in definition order.
|
||||
@model_validator(mode="after")
|
||||
def _check_profile_keys(self) -> Self:
|
||||
"""Warn on unrecognized profile keys."""
|
||||
# isinstance guard: ModelProfile is a TypedDict (always a dict), but
|
||||
# protects against unexpected types from partner overrides.
|
||||
if self.profile and isinstance(self.profile, dict):
|
||||
_warn_unknown_profile_keys(self.profile)
|
||||
return self
|
||||
|
||||
@cached_property
|
||||
def _serialized(self) -> dict[str, Any]:
|
||||
# self is always a Serializable object in this case, thus the result is
|
||||
@@ -558,7 +505,7 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
options = {"stop": stop, **kwargs, **ls_structured_output_format_dict}
|
||||
inheritable_metadata = {
|
||||
**(config.get("metadata") or {}),
|
||||
**self._get_ls_params_with_defaults(stop=stop, **kwargs),
|
||||
**self._get_ls_params(stop=stop, **kwargs),
|
||||
}
|
||||
callback_manager = CallbackManager.configure(
|
||||
config.get("callbacks"),
|
||||
@@ -568,9 +515,6 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
self.tags,
|
||||
inheritable_metadata,
|
||||
self.metadata,
|
||||
langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(
|
||||
params
|
||||
),
|
||||
)
|
||||
(run_manager,) = callback_manager.on_chat_model_start(
|
||||
self._serialized,
|
||||
@@ -689,7 +633,7 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
options = {"stop": stop, **kwargs, **ls_structured_output_format_dict}
|
||||
inheritable_metadata = {
|
||||
**(config.get("metadata") or {}),
|
||||
**self._get_ls_params_with_defaults(stop=stop, **kwargs),
|
||||
**self._get_ls_params(stop=stop, **kwargs),
|
||||
}
|
||||
callback_manager = AsyncCallbackManager.configure(
|
||||
config.get("callbacks"),
|
||||
@@ -699,9 +643,6 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
self.tags,
|
||||
inheritable_metadata,
|
||||
self.metadata,
|
||||
langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(
|
||||
params
|
||||
),
|
||||
)
|
||||
(run_manager,) = await callback_manager.on_chat_model_start(
|
||||
self._serialized,
|
||||
@@ -886,16 +827,6 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
|
||||
return ls_params
|
||||
|
||||
def _get_ls_params_with_defaults(
|
||||
self,
|
||||
stop: list[str] | None = None,
|
||||
**kwargs: Any,
|
||||
) -> LangSmithParams:
|
||||
"""Wrap _get_ls_params to always include ls_integration."""
|
||||
ls_params = self._get_ls_params(stop=stop, **kwargs)
|
||||
ls_params["ls_integration"] = "langchain_chat_model"
|
||||
return ls_params
|
||||
|
||||
def _get_llm_string(self, stop: list[str] | None = None, **kwargs: Any) -> str:
|
||||
if self.is_lc_serializable():
|
||||
params = {**kwargs, "stop": stop}
|
||||
@@ -968,7 +899,7 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
options = {"stop": stop, **ls_structured_output_format_dict}
|
||||
inheritable_metadata = {
|
||||
**(metadata or {}),
|
||||
**self._get_ls_params_with_defaults(stop=stop, **kwargs),
|
||||
**self._get_ls_params(stop=stop, **kwargs),
|
||||
}
|
||||
|
||||
callback_manager = CallbackManager.configure(
|
||||
@@ -979,9 +910,6 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
self.tags,
|
||||
inheritable_metadata,
|
||||
self.metadata,
|
||||
langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(
|
||||
params
|
||||
),
|
||||
)
|
||||
messages_to_trace = [
|
||||
_format_for_tracing(message_list) for message_list in messages
|
||||
@@ -1094,7 +1022,7 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
options = {"stop": stop, **ls_structured_output_format_dict}
|
||||
inheritable_metadata = {
|
||||
**(metadata or {}),
|
||||
**self._get_ls_params_with_defaults(stop=stop, **kwargs),
|
||||
**self._get_ls_params(stop=stop, **kwargs),
|
||||
}
|
||||
|
||||
callback_manager = AsyncCallbackManager.configure(
|
||||
@@ -1105,9 +1033,6 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
self.tags,
|
||||
inheritable_metadata,
|
||||
self.metadata,
|
||||
langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(
|
||||
params
|
||||
),
|
||||
)
|
||||
|
||||
messages_to_trace = [
|
||||
|
||||
@@ -42,7 +42,6 @@ from langchain_core.callbacks import (
|
||||
Callbacks,
|
||||
)
|
||||
from langchain_core.globals import get_llm_cache
|
||||
from langchain_core.language_models._utils import _filter_invocation_params_for_tracing
|
||||
from langchain_core.language_models.base import (
|
||||
BaseLanguageModel,
|
||||
LangSmithParams,
|
||||
@@ -528,7 +527,7 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
options = {"stop": stop}
|
||||
inheritable_metadata = {
|
||||
**(config.get("metadata") or {}),
|
||||
**self._get_ls_params_with_defaults(stop=stop, **kwargs),
|
||||
**self._get_ls_params(stop=stop, **kwargs),
|
||||
}
|
||||
callback_manager = CallbackManager.configure(
|
||||
config.get("callbacks"),
|
||||
@@ -538,9 +537,6 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
self.tags,
|
||||
inheritable_metadata,
|
||||
self.metadata,
|
||||
langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(
|
||||
params
|
||||
),
|
||||
)
|
||||
(run_manager,) = callback_manager.on_llm_start(
|
||||
self._serialized,
|
||||
@@ -601,7 +597,7 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
options = {"stop": stop}
|
||||
inheritable_metadata = {
|
||||
**(config.get("metadata") or {}),
|
||||
**self._get_ls_params_with_defaults(stop=stop, **kwargs),
|
||||
**self._get_ls_params(stop=stop, **kwargs),
|
||||
}
|
||||
callback_manager = AsyncCallbackManager.configure(
|
||||
config.get("callbacks"),
|
||||
@@ -611,9 +607,6 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
self.tags,
|
||||
inheritable_metadata,
|
||||
self.metadata,
|
||||
langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(
|
||||
params
|
||||
),
|
||||
)
|
||||
(run_manager,) = await callback_manager.on_llm_start(
|
||||
self._serialized,
|
||||
@@ -913,14 +906,14 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
metadata = [
|
||||
{
|
||||
**(meta or {}),
|
||||
**self._get_ls_params_with_defaults(stop=stop, **kwargs),
|
||||
**self._get_ls_params(stop=stop, **kwargs),
|
||||
}
|
||||
for meta in metadata
|
||||
]
|
||||
elif isinstance(metadata, dict):
|
||||
metadata = {
|
||||
**(metadata or {}),
|
||||
**self._get_ls_params_with_defaults(stop=stop, **kwargs),
|
||||
**self._get_ls_params(stop=stop, **kwargs),
|
||||
}
|
||||
if (
|
||||
isinstance(callbacks, list)
|
||||
@@ -957,8 +950,6 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
run_name_list = run_name or cast(
|
||||
"list[str | None]", ([None] * len(prompts))
|
||||
)
|
||||
params = self.dict()
|
||||
params["stop"] = stop
|
||||
callback_managers = [
|
||||
CallbackManager.configure(
|
||||
callback,
|
||||
@@ -968,9 +959,6 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
self.tags,
|
||||
meta,
|
||||
self.metadata,
|
||||
langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(
|
||||
params
|
||||
),
|
||||
)
|
||||
for callback, tag, meta in zip(
|
||||
callbacks, tags_list, metadata_list, strict=False
|
||||
@@ -978,8 +966,6 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
]
|
||||
else:
|
||||
# We've received a single callbacks arg to apply to all inputs
|
||||
params = self.dict()
|
||||
params["stop"] = stop
|
||||
callback_managers = [
|
||||
CallbackManager.configure(
|
||||
cast("Callbacks", callbacks),
|
||||
@@ -989,13 +975,12 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
self.tags,
|
||||
cast("dict[str, Any]", metadata),
|
||||
self.metadata,
|
||||
langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(
|
||||
params
|
||||
),
|
||||
)
|
||||
] * len(prompts)
|
||||
run_name_list = [cast("str | None", run_name)] * len(prompts)
|
||||
run_ids_list = self._get_run_ids_list(run_id, prompts)
|
||||
params = self.dict()
|
||||
params["stop"] = stop
|
||||
options = {"stop": stop}
|
||||
(
|
||||
existing_prompts,
|
||||
@@ -1188,14 +1173,14 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
metadata = [
|
||||
{
|
||||
**(meta or {}),
|
||||
**self._get_ls_params_with_defaults(stop=stop, **kwargs),
|
||||
**self._get_ls_params(stop=stop, **kwargs),
|
||||
}
|
||||
for meta in metadata
|
||||
]
|
||||
elif isinstance(metadata, dict):
|
||||
metadata = {
|
||||
**(metadata or {}),
|
||||
**self._get_ls_params_with_defaults(stop=stop, **kwargs),
|
||||
**self._get_ls_params(stop=stop, **kwargs),
|
||||
}
|
||||
# Create callback managers
|
||||
if isinstance(callbacks, list) and (
|
||||
@@ -1229,8 +1214,6 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
run_name_list = run_name or cast(
|
||||
"list[str | None]", ([None] * len(prompts))
|
||||
)
|
||||
params = self.dict()
|
||||
params["stop"] = stop
|
||||
callback_managers = [
|
||||
AsyncCallbackManager.configure(
|
||||
callback,
|
||||
@@ -1240,9 +1223,6 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
self.tags,
|
||||
meta,
|
||||
self.metadata,
|
||||
langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(
|
||||
params
|
||||
),
|
||||
)
|
||||
for callback, tag, meta in zip(
|
||||
callbacks, tags_list, metadata_list, strict=False
|
||||
@@ -1250,8 +1230,6 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
]
|
||||
else:
|
||||
# We've received a single callbacks arg to apply to all inputs
|
||||
params = self.dict()
|
||||
params["stop"] = stop
|
||||
callback_managers = [
|
||||
AsyncCallbackManager.configure(
|
||||
cast("Callbacks", callbacks),
|
||||
@@ -1261,13 +1239,12 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
self.tags,
|
||||
cast("dict[str, Any]", metadata),
|
||||
self.metadata,
|
||||
langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(
|
||||
params
|
||||
),
|
||||
)
|
||||
] * len(prompts)
|
||||
run_name_list = [cast("str | None", run_name)] * len(prompts)
|
||||
run_ids_list = self._get_run_ids_list(run_id, prompts)
|
||||
params = self.dict()
|
||||
params["stop"] = stop
|
||||
options = {"stop": stop}
|
||||
(
|
||||
existing_prompts,
|
||||
|
||||
@@ -1,14 +1,7 @@
|
||||
"""Model profile types and utilities."""
|
||||
|
||||
import logging
|
||||
import warnings
|
||||
from typing import get_type_hints
|
||||
|
||||
from pydantic import ConfigDict
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ModelProfile(TypedDict, total=False):
|
||||
"""Model profile.
|
||||
@@ -21,25 +14,6 @@ class ModelProfile(TypedDict, total=False):
|
||||
and supported features.
|
||||
"""
|
||||
|
||||
__pydantic_config__ = ConfigDict(extra="allow") # type: ignore[misc]
|
||||
|
||||
# --- Model metadata ---
|
||||
|
||||
name: str
|
||||
"""Human-readable model name."""
|
||||
|
||||
status: str
|
||||
"""Model status (e.g., `'active'`, `'deprecated'`)."""
|
||||
|
||||
release_date: str
|
||||
"""Model release date (ISO 8601 format, e.g., `'2025-06-01'`)."""
|
||||
|
||||
last_updated: str
|
||||
"""Date the model was last updated (ISO 8601 format)."""
|
||||
|
||||
open_weights: bool
|
||||
"""Whether the model weights are openly available."""
|
||||
|
||||
# --- Input constraints ---
|
||||
|
||||
max_input_tokens: int
|
||||
@@ -112,45 +86,6 @@ class ModelProfile(TypedDict, total=False):
|
||||
"""Whether the model supports a native [structured output](https://docs.langchain.com/oss/python/langchain/models#structured-outputs)
|
||||
feature"""
|
||||
|
||||
# --- Other capabilities ---
|
||||
|
||||
attachment: bool
|
||||
"""Whether the model supports file attachments."""
|
||||
|
||||
temperature: bool
|
||||
"""Whether the model supports a temperature parameter."""
|
||||
|
||||
|
||||
ModelProfileRegistry = dict[str, ModelProfile]
|
||||
"""Registry mapping model identifiers or names to their ModelProfile."""
|
||||
|
||||
|
||||
def _warn_unknown_profile_keys(profile: ModelProfile) -> None:
|
||||
"""Warn if `profile` contains keys not declared on `ModelProfile`.
|
||||
|
||||
Args:
|
||||
profile: The model profile dict to check for undeclared keys.
|
||||
"""
|
||||
if not isinstance(profile, dict):
|
||||
return
|
||||
|
||||
try:
|
||||
declared = frozenset(get_type_hints(ModelProfile).keys())
|
||||
except (TypeError, NameError):
|
||||
# get_type_hints raises NameError on unresolvable forward refs and
|
||||
# TypeError when annotations evaluate to non-type objects.
|
||||
logger.debug(
|
||||
"Could not resolve type hints for ModelProfile; "
|
||||
"skipping unknown-key check.",
|
||||
exc_info=True,
|
||||
)
|
||||
return
|
||||
|
||||
extra = sorted(set(profile) - declared)
|
||||
if extra:
|
||||
warnings.warn(
|
||||
f"Unrecognized keys in model profile: {extra}. "
|
||||
f"This may indicate a version mismatch between langchain-core "
|
||||
f"and your provider package. Consider upgrading langchain-core.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
@@ -109,7 +109,6 @@ from langchain_core.load.mapping import (
|
||||
SERIALIZABLE_MAPPING,
|
||||
)
|
||||
from langchain_core.load.serializable import Serializable
|
||||
from langchain_core.load.validators import CLASS_INIT_VALIDATORS
|
||||
|
||||
DEFAULT_NAMESPACES = [
|
||||
"langchain",
|
||||
@@ -481,19 +480,6 @@ class Reviver:
|
||||
msg = f"Invalid namespace: {value}"
|
||||
raise ValueError(msg)
|
||||
|
||||
# We don't need to recurse on kwargs
|
||||
# as json.loads will do that for us.
|
||||
kwargs = value.get("kwargs", {})
|
||||
|
||||
# Run class-specific validators before the general init_validator.
|
||||
# These run before importing to fail fast on security violations.
|
||||
if mapping_key in CLASS_INIT_VALIDATORS:
|
||||
CLASS_INIT_VALIDATORS[mapping_key](mapping_key, kwargs)
|
||||
|
||||
# Also run general init_validator (e.g., jinja2 blocking)
|
||||
if self.init_validator is not None:
|
||||
self.init_validator(mapping_key, kwargs)
|
||||
|
||||
mod = importlib.import_module(".".join(import_dir))
|
||||
|
||||
cls = getattr(mod, name)
|
||||
@@ -503,6 +489,13 @@ class Reviver:
|
||||
msg = f"Invalid namespace: {value}"
|
||||
raise ValueError(msg)
|
||||
|
||||
# We don't need to recurse on kwargs
|
||||
# as json.loads will do that for us.
|
||||
kwargs = value.get("kwargs", {})
|
||||
|
||||
if self.init_validator is not None:
|
||||
self.init_validator(mapping_key, kwargs)
|
||||
|
||||
return cls(**kwargs)
|
||||
|
||||
return value
|
||||
|
||||
@@ -283,11 +283,6 @@ SERIALIZABLE_MAPPING: dict[tuple[str, ...], tuple[str, ...]] = {
|
||||
"chat_models",
|
||||
"ChatXAI",
|
||||
),
|
||||
("langchain_baseten", "chat_models", "ChatBaseten"): (
|
||||
"langchain_baseten",
|
||||
"chat_models",
|
||||
"ChatBaseten",
|
||||
),
|
||||
("langchain", "chat_models", "fireworks", "ChatFireworks"): (
|
||||
"langchain_fireworks",
|
||||
"chat_models",
|
||||
@@ -321,12 +316,6 @@ SERIALIZABLE_MAPPING: dict[tuple[str, ...], tuple[str, ...]] = {
|
||||
"bedrock",
|
||||
"ChatBedrock",
|
||||
),
|
||||
("langchain_aws", "chat_models", "ChatBedrockConverse"): (
|
||||
"langchain_aws",
|
||||
"chat_models",
|
||||
"bedrock_converse",
|
||||
"ChatBedrockConverse",
|
||||
),
|
||||
("langchain_google_genai", "chat_models", "ChatGoogleGenerativeAI"): (
|
||||
"langchain_google_genai",
|
||||
"chat_models",
|
||||
@@ -386,12 +375,6 @@ SERIALIZABLE_MAPPING: dict[tuple[str, ...], tuple[str, ...]] = {
|
||||
"bedrock",
|
||||
"BedrockLLM",
|
||||
),
|
||||
("langchain", "llms", "bedrock", "BedrockLLM"): (
|
||||
"langchain_aws",
|
||||
"llms",
|
||||
"bedrock",
|
||||
"BedrockLLM",
|
||||
),
|
||||
("langchain", "llms", "fireworks", "Fireworks"): (
|
||||
"langchain_fireworks",
|
||||
"llms",
|
||||
|
||||
@@ -1,77 +0,0 @@
|
||||
"""Init validators for deserialization security.
|
||||
|
||||
This module contains extra validators that are called during deserialization,
|
||||
ex. to prevent security issues such as SSRF attacks.
|
||||
|
||||
Each validator is a callable matching the `InitValidator` protocol: it takes a
|
||||
class path tuple and kwargs dict, returns `None` on success, and raises
|
||||
`ValueError` if the deserialization should be blocked.
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain_core.load.load import InitValidator
|
||||
|
||||
|
||||
def _bedrock_validator(class_path: tuple[str, ...], kwargs: dict[str, Any]) -> None:
|
||||
"""Constructor kwargs validator for AWS Bedrock integrations.
|
||||
|
||||
Blocks deserialization if `endpoint_url` or `base_url` parameters are
|
||||
present, which could enable SSRF attacks.
|
||||
|
||||
Args:
|
||||
class_path: The class path tuple being deserialized.
|
||||
kwargs: The kwargs dict for the class constructor.
|
||||
|
||||
Raises:
|
||||
ValueError: If `endpoint_url` or `base_url` parameters are present.
|
||||
"""
|
||||
dangerous_params = ["endpoint_url", "base_url"]
|
||||
found_params = [p for p in dangerous_params if p in kwargs]
|
||||
|
||||
if found_params:
|
||||
class_name = class_path[-1] if class_path else "Unknown"
|
||||
param_str = ", ".join(found_params)
|
||||
msg = (
|
||||
f"Deserialization of {class_name} with {param_str} is not allowed "
|
||||
f"for security reasons. These parameters can enable Server-Side Request "
|
||||
f"Forgery (SSRF) attacks by directing network requests to arbitrary "
|
||||
f"endpoints during initialization. If you need to use a custom endpoint, "
|
||||
f"instantiate {class_name} directly rather than deserializing it."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
|
||||
# Keys must cover both serialized IDs (SERIALIZABLE_MAPPING keys) and resolved
|
||||
# import paths (SERIALIZABLE_MAPPING values) to prevent bypass via direct paths.
|
||||
CLASS_INIT_VALIDATORS: dict[tuple[str, ...], "InitValidator"] = {
|
||||
# Serialized (legacy) keys
|
||||
("langchain", "chat_models", "bedrock", "BedrockChat"): _bedrock_validator,
|
||||
("langchain", "chat_models", "bedrock", "ChatBedrock"): _bedrock_validator,
|
||||
(
|
||||
"langchain",
|
||||
"chat_models",
|
||||
"anthropic_bedrock",
|
||||
"ChatAnthropicBedrock",
|
||||
): _bedrock_validator,
|
||||
("langchain_aws", "chat_models", "ChatBedrockConverse"): _bedrock_validator,
|
||||
("langchain", "llms", "bedrock", "Bedrock"): _bedrock_validator,
|
||||
("langchain", "llms", "bedrock", "BedrockLLM"): _bedrock_validator,
|
||||
# Resolved import paths (from ALL_SERIALIZABLE_MAPPINGS values) to defend
|
||||
# against payloads that use the target tuple directly as the "id".
|
||||
(
|
||||
"langchain_aws",
|
||||
"chat_models",
|
||||
"bedrock_converse",
|
||||
"ChatBedrockConverse",
|
||||
): _bedrock_validator,
|
||||
(
|
||||
"langchain_aws",
|
||||
"chat_models",
|
||||
"anthropic",
|
||||
"ChatAnthropicBedrock",
|
||||
): _bedrock_validator,
|
||||
("langchain_aws", "chat_models", "ChatBedrock"): _bedrock_validator,
|
||||
("langchain_aws", "llms", "bedrock", "BedrockLLM"): _bedrock_validator,
|
||||
}
|
||||
@@ -103,13 +103,11 @@ def convert_to_openai_data_block(
|
||||
# Backward compat
|
||||
file["filename"] = extras["filename"]
|
||||
else:
|
||||
# Can't infer filename; set a placeholder default for compatibility.
|
||||
file["filename"] = "LC_AUTOGENERATED"
|
||||
# Can't infer filename
|
||||
warnings.warn(
|
||||
"OpenAI may require a filename for file uploads. Specify a filename"
|
||||
" in the content block, e.g.: {'type': 'file', 'mime_type': "
|
||||
"'...', 'base64': '...', 'filename': 'my-file.pdf'}. "
|
||||
"Using placeholder filename 'LC_AUTOGENERATED'.",
|
||||
"'...', 'base64': '...', 'filename': 'my-file.pdf'}",
|
||||
stacklevel=1,
|
||||
)
|
||||
formatted_block = {"type": "file", "file": file}
|
||||
@@ -335,9 +333,10 @@ def _convert_from_v03_ai_message(message: AIMessage) -> AIMessage:
|
||||
|
||||
# Reasoning
|
||||
if reasoning := message.additional_kwargs.get("reasoning"):
|
||||
if "type" not in reasoning:
|
||||
reasoning = {**reasoning, "type": "reasoning"}
|
||||
buckets["reasoning"].append(reasoning)
|
||||
if isinstance(message, AIMessageChunk) and message.chunk_position != "last":
|
||||
buckets["reasoning"].append({**reasoning, "type": "reasoning"})
|
||||
else:
|
||||
buckets["reasoning"].append(reasoning)
|
||||
|
||||
# Refusal
|
||||
if refusal := message.additional_kwargs.get("refusal"):
|
||||
@@ -732,11 +731,6 @@ def _convert_to_v1_from_responses(message: AIMessage) -> list[types.ContentBlock
|
||||
tool_call_block["extras"]["item_id"] = block["id"]
|
||||
if "index" in block:
|
||||
tool_call_block["index"] = f"lc_tc_{block['index']}"
|
||||
for extra_key in ("status", "namespace"):
|
||||
if extra_key in block:
|
||||
if "extras" not in tool_call_block:
|
||||
tool_call_block["extras"] = {}
|
||||
tool_call_block["extras"][extra_key] = block[extra_key]
|
||||
yield tool_call_block
|
||||
|
||||
elif block_type == "web_search_call":
|
||||
@@ -985,51 +979,6 @@ def _convert_to_v1_from_responses(message: AIMessage) -> list[types.ContentBlock
|
||||
mcp_list_tools_result["index"] = f"lc_mltr_{block['index'] + 1}"
|
||||
yield cast("types.ServerToolResult", mcp_list_tools_result)
|
||||
|
||||
elif (
|
||||
block_type == "tool_search_call" and block.get("execution") == "server"
|
||||
):
|
||||
tool_search_call: dict[str, Any] = {
|
||||
"type": "server_tool_call",
|
||||
"name": "tool_search",
|
||||
"id": block["id"],
|
||||
"args": block.get("arguments", {}),
|
||||
}
|
||||
if "index" in block:
|
||||
tool_search_call["index"] = f"lc_tsc_{block['index']}"
|
||||
extras: dict[str, Any] = {}
|
||||
known = {"type", "id", "arguments", "index"}
|
||||
for key in block:
|
||||
if key not in known:
|
||||
extras[key] = block[key]
|
||||
if extras:
|
||||
tool_search_call["extras"] = extras
|
||||
yield cast("types.ServerToolCall", tool_search_call)
|
||||
|
||||
elif (
|
||||
block_type == "tool_search_output"
|
||||
and block.get("execution") == "server"
|
||||
):
|
||||
tool_search_output: dict[str, Any] = {
|
||||
"type": "server_tool_result",
|
||||
"tool_call_id": block["id"],
|
||||
"output": {"tools": block.get("tools", [])},
|
||||
}
|
||||
status = block.get("status")
|
||||
if status == "failed":
|
||||
tool_search_output["status"] = "error"
|
||||
elif status == "completed":
|
||||
tool_search_output["status"] = "success"
|
||||
if "index" in block and isinstance(block["index"], int):
|
||||
tool_search_output["index"] = f"lc_tso_{block['index']}"
|
||||
extras_out: dict[str, Any] = {"name": "tool_search"}
|
||||
known_out = {"type", "id", "status", "tools", "index"}
|
||||
for key in block:
|
||||
if key not in known_out:
|
||||
extras_out[key] = block[key]
|
||||
if extras_out:
|
||||
tool_search_output["extras"] = extras_out
|
||||
yield cast("types.ServerToolResult", tool_search_output)
|
||||
|
||||
elif block_type in types.KNOWN_BLOCK_TYPES:
|
||||
yield cast("types.ContentBlock", block)
|
||||
else:
|
||||
|
||||
@@ -874,9 +874,9 @@ def filter_messages(
|
||||
|
||||
filter_messages(
|
||||
messages,
|
||||
include_names=("example_user", "example_assistant"),
|
||||
include_types=("system",),
|
||||
exclude_ids=("bar",),
|
||||
incl_names=("example_user", "example_assistant"),
|
||||
incl_types=("system",),
|
||||
excl_ids=("bar",),
|
||||
)
|
||||
```
|
||||
|
||||
@@ -1551,7 +1551,7 @@ def convert_to_openai_messages(
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "what's in this"},
|
||||
{"type": "text", "text": "whats in this"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "data:image/png;base64,'/9j/4AAQSk'"},
|
||||
@@ -1570,15 +1570,15 @@ def convert_to_openai_messages(
|
||||
],
|
||||
),
|
||||
ToolMessage("foobar", tool_call_id="1", name="bar"),
|
||||
{"role": "assistant", "content": "that's nice"},
|
||||
{"role": "assistant", "content": "thats nice"},
|
||||
]
|
||||
oai_messages = convert_to_openai_messages(messages)
|
||||
# -> [
|
||||
# {'role': 'system', 'content': 'foo'},
|
||||
# {'role': 'user', 'content': [{'type': 'text', 'text': 'what's in this'}, {'type': 'image_url', 'image_url': {'url': "data:image/png;base64,'/9j/4AAQSk'"}}]},
|
||||
# {'role': 'user', 'content': [{'type': 'text', 'text': 'whats in this'}, {'type': 'image_url', 'image_url': {'url': "data:image/png;base64,'/9j/4AAQSk'"}}]},
|
||||
# {'role': 'assistant', 'tool_calls': [{'type': 'function', 'id': '1','function': {'name': 'analyze', 'arguments': '{"baz": "buz"}'}}], 'content': ''},
|
||||
# {'role': 'tool', 'name': 'bar', 'content': 'foobar'},
|
||||
# {'role': 'assistant', 'content': 'that's nice'}
|
||||
# {'role': 'assistant', 'content': 'thats nice'}
|
||||
# ]
|
||||
```
|
||||
|
||||
|
||||
@@ -15,7 +15,6 @@ import yaml
|
||||
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
||||
from typing_extensions import Self, override
|
||||
|
||||
from langchain_core._api import deprecated
|
||||
from langchain_core.exceptions import ErrorCode, create_message
|
||||
from langchain_core.load import dumpd
|
||||
from langchain_core.output_parsers.base import BaseOutputParser # noqa: TC001
|
||||
@@ -351,12 +350,6 @@ class BasePromptTemplate(
|
||||
prompt_dict["_type"] = self._prompt_type
|
||||
return prompt_dict
|
||||
|
||||
@deprecated(
|
||||
since="1.2.21",
|
||||
removal="2.0.0",
|
||||
alternative="Use `dumpd`/`dumps` from `langchain_core.load` to serialize "
|
||||
"prompts and `load`/`loads` to deserialize them.",
|
||||
)
|
||||
def save(self, file_path: Path | str) -> None:
|
||||
"""Save the prompt.
|
||||
|
||||
@@ -389,12 +382,11 @@ class BasePromptTemplate(
|
||||
directory_path = save_path.parent
|
||||
directory_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
resolved_path = save_path.resolve()
|
||||
if resolved_path.suffix == ".json":
|
||||
with resolved_path.open("w", encoding="utf-8") as f:
|
||||
if save_path.suffix == ".json":
|
||||
with save_path.open("w", encoding="utf-8") as f:
|
||||
json.dump(prompt_dict, f, indent=4)
|
||||
elif resolved_path.suffix.endswith((".yaml", ".yml")):
|
||||
with resolved_path.open("w", encoding="utf-8") as f:
|
||||
elif save_path.suffix.endswith((".yaml", ".yml")):
|
||||
with save_path.open("w", encoding="utf-8") as f:
|
||||
yaml.dump(prompt_dict, f, default_flow_style=False)
|
||||
else:
|
||||
msg = f"{save_path} must be json or yaml"
|
||||
|
||||
@@ -22,7 +22,6 @@ from pydantic import (
|
||||
)
|
||||
from typing_extensions import Self, override
|
||||
|
||||
from langchain_core._api import deprecated
|
||||
from langchain_core.messages import (
|
||||
AIMessage,
|
||||
AnyMessage,
|
||||
@@ -1306,12 +1305,6 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
||||
"""Name of prompt type. Used for serialization."""
|
||||
return "chat"
|
||||
|
||||
@deprecated(
|
||||
since="1.2.21",
|
||||
removal="2.0.0",
|
||||
alternative="Use `dumpd`/`dumps` from `langchain_core.load` to serialize "
|
||||
"prompts and `load`/`loads` to deserialize them.",
|
||||
)
|
||||
def save(self, file_path: Path | str) -> None:
|
||||
"""Save prompt to file.
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ import warnings
|
||||
from functools import cached_property
|
||||
from typing import Any, Literal, cast
|
||||
|
||||
from pydantic import model_validator
|
||||
from typing_extensions import override
|
||||
|
||||
from langchain_core.load import dumpd
|
||||
@@ -22,35 +21,11 @@ class DictPromptTemplate(RunnableSerializable[dict, dict]):
|
||||
Recognizes variables in f-string or mustache formatted string dict values.
|
||||
|
||||
Does NOT recognize variables in dict keys. Applies recursively.
|
||||
|
||||
Example:
|
||||
```python
|
||||
prompt = DictPromptTemplate(
|
||||
template={
|
||||
"type": "text",
|
||||
"text": "Hello {name}",
|
||||
"metadata": {"source": "{source}"},
|
||||
},
|
||||
template_format="f-string",
|
||||
)
|
||||
prompt.format(name="Alice", source="docs")
|
||||
# {
|
||||
# "type": "text",
|
||||
# "text": "Hello Alice",
|
||||
# "metadata": {"source": "docs"},
|
||||
# }
|
||||
```
|
||||
"""
|
||||
|
||||
template: dict[str, Any]
|
||||
template_format: Literal["f-string", "mustache"]
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_template(self) -> "DictPromptTemplate":
|
||||
"""Validate that the template structure contains only safe variables."""
|
||||
_get_input_variables(self.template, self.template_format)
|
||||
return self
|
||||
|
||||
@property
|
||||
def input_variables(self) -> list[str]:
|
||||
"""Template input variables."""
|
||||
|
||||
@@ -12,7 +12,6 @@ from pydantic import (
|
||||
)
|
||||
from typing_extensions import override
|
||||
|
||||
from langchain_core._api import deprecated
|
||||
from langchain_core.example_selectors import BaseExampleSelector
|
||||
from langchain_core.messages import BaseMessage, get_buffer_string
|
||||
from langchain_core.prompts.chat import BaseChatPromptTemplate
|
||||
@@ -238,12 +237,6 @@ class FewShotPromptTemplate(_FewShotPromptTemplateMixin, StringPromptTemplate):
|
||||
"""Return the prompt type key."""
|
||||
return "few_shot"
|
||||
|
||||
@deprecated(
|
||||
since="1.2.21",
|
||||
removal="2.0.0",
|
||||
alternative="Use `dumpd`/`dumps` from `langchain_core.load` to serialize "
|
||||
"prompts and `load`/`loads` to deserialize them.",
|
||||
)
|
||||
def save(self, file_path: Path | str) -> None:
|
||||
"""Save the prompt template to a file.
|
||||
|
||||
|
||||
@@ -6,7 +6,6 @@ from typing import Any
|
||||
from pydantic import ConfigDict, model_validator
|
||||
from typing_extensions import Self
|
||||
|
||||
from langchain_core._api import deprecated
|
||||
from langchain_core.example_selectors import BaseExampleSelector
|
||||
from langchain_core.prompts.prompt import PromptTemplate
|
||||
from langchain_core.prompts.string import (
|
||||
@@ -216,12 +215,6 @@ class FewShotPromptWithTemplates(StringPromptTemplate):
|
||||
"""Return the prompt type key."""
|
||||
return "few_shot_with_templates"
|
||||
|
||||
@deprecated(
|
||||
since="1.2.21",
|
||||
removal="2.0.0",
|
||||
alternative="Use `dumpd`/`dumps` from `langchain_core.load` to serialize "
|
||||
"prompts and `load`/`loads` to deserialize them.",
|
||||
)
|
||||
def save(self, file_path: Path | str) -> None:
|
||||
"""Save the prompt to a file.
|
||||
|
||||
|
||||
@@ -9,25 +9,12 @@ from langchain_core.prompts.base import BasePromptTemplate
|
||||
from langchain_core.prompts.string import (
|
||||
DEFAULT_FORMATTER_MAPPING,
|
||||
PromptTemplateFormat,
|
||||
get_template_variables,
|
||||
)
|
||||
from langchain_core.runnables import run_in_executor
|
||||
|
||||
|
||||
class ImagePromptTemplate(BasePromptTemplate[ImageURL]):
|
||||
"""Image prompt template for a multimodal model.
|
||||
|
||||
Example:
|
||||
```python
|
||||
prompt = ImagePromptTemplate(
|
||||
input_variables=["image_id"],
|
||||
template={"url": "https://example.com/{image_id}.png", "detail": "high"},
|
||||
template_format="f-string",
|
||||
)
|
||||
prompt.format(image_id="cat")
|
||||
# {"url": "https://example.com/cat.png", "detail": "high"}
|
||||
```
|
||||
"""
|
||||
"""Image prompt template for a multimodal model."""
|
||||
|
||||
template: dict = Field(default_factory=dict)
|
||||
"""Template for the prompt."""
|
||||
@@ -56,13 +43,6 @@ class ImagePromptTemplate(BasePromptTemplate[ImageURL]):
|
||||
f" Found: {overlap}"
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
template = kwargs.get("template", {})
|
||||
template_format = kwargs.get("template_format", "f-string")
|
||||
for value in template.values():
|
||||
if isinstance(value, str):
|
||||
get_template_variables(value, template_format)
|
||||
|
||||
super().__init__(**kwargs)
|
||||
|
||||
@property
|
||||
|
||||
@@ -7,7 +7,6 @@ from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
from langchain_core._api import deprecated
|
||||
from langchain_core.output_parsers.string import StrOutputParser
|
||||
from langchain_core.prompts.base import BasePromptTemplate
|
||||
from langchain_core.prompts.chat import ChatPromptTemplate
|
||||
@@ -18,51 +17,11 @@ URL_BASE = "https://raw.githubusercontent.com/hwchase17/langchain-hub/master/pro
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _validate_path(path: Path) -> None:
|
||||
"""Reject absolute paths and ``..`` traversal components.
|
||||
|
||||
Args:
|
||||
path: The path to validate.
|
||||
|
||||
Raises:
|
||||
ValueError: If the path is absolute or contains ``..`` components.
|
||||
"""
|
||||
if path.is_absolute():
|
||||
msg = (
|
||||
f"Path '{path}' is absolute. Absolute paths are not allowed "
|
||||
f"when loading prompt configurations to prevent path traversal "
|
||||
f"attacks. Use relative paths instead, or pass "
|
||||
f"`allow_dangerous_paths=True` if you trust the input."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
if ".." in path.parts:
|
||||
msg = (
|
||||
f"Path '{path}' contains '..' components. Directory traversal "
|
||||
f"sequences are not allowed when loading prompt configurations. "
|
||||
f"Use direct relative paths instead, or pass "
|
||||
f"`allow_dangerous_paths=True` if you trust the input."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
|
||||
@deprecated(
|
||||
since="1.2.21",
|
||||
removal="2.0.0",
|
||||
alternative="Use `dumpd`/`dumps` from `langchain_core.load` to serialize "
|
||||
"prompts and `load`/`loads` to deserialize them.",
|
||||
)
|
||||
def load_prompt_from_config(
|
||||
config: dict, *, allow_dangerous_paths: bool = False
|
||||
) -> BasePromptTemplate:
|
||||
def load_prompt_from_config(config: dict) -> BasePromptTemplate:
|
||||
"""Load prompt from config dict.
|
||||
|
||||
Args:
|
||||
config: Dict containing the prompt configuration.
|
||||
allow_dangerous_paths: If ``False`` (default), file paths in the
|
||||
config (such as ``template_path``, ``examples``, and
|
||||
``example_prompt_path``) are validated to reject absolute paths
|
||||
and directory traversal (``..``) sequences. Set to ``True`` only
|
||||
if you trust the source of the config.
|
||||
|
||||
Returns:
|
||||
A `PromptTemplate` object.
|
||||
@@ -79,12 +38,10 @@ def load_prompt_from_config(
|
||||
raise ValueError(msg)
|
||||
|
||||
prompt_loader = type_to_loader_dict[config_type]
|
||||
return prompt_loader(config, allow_dangerous_paths=allow_dangerous_paths)
|
||||
return prompt_loader(config)
|
||||
|
||||
|
||||
def _load_template(
|
||||
var_name: str, config: dict, *, allow_dangerous_paths: bool = False
|
||||
) -> dict:
|
||||
def _load_template(var_name: str, config: dict) -> dict:
|
||||
"""Load template from the path if applicable."""
|
||||
# Check if template_path exists in config.
|
||||
if f"{var_name}_path" in config:
|
||||
@@ -94,14 +51,9 @@ def _load_template(
|
||||
raise ValueError(msg)
|
||||
# Pop the template path from the config.
|
||||
template_path = Path(config.pop(f"{var_name}_path"))
|
||||
if not allow_dangerous_paths:
|
||||
_validate_path(template_path)
|
||||
# Resolve symlinks before checking the suffix so that a symlink named
|
||||
# "exploit.txt" pointing to a non-.txt file is caught.
|
||||
resolved_path = template_path.resolve()
|
||||
# Load the template.
|
||||
if resolved_path.suffix == ".txt":
|
||||
template = resolved_path.read_text(encoding="utf-8")
|
||||
if template_path.suffix == ".txt":
|
||||
template = template_path.read_text(encoding="utf-8")
|
||||
else:
|
||||
raise ValueError
|
||||
# Set the template variable to the extracted variable.
|
||||
@@ -109,14 +61,12 @@ def _load_template(
|
||||
return config
|
||||
|
||||
|
||||
def _load_examples(config: dict, *, allow_dangerous_paths: bool = False) -> dict:
|
||||
def _load_examples(config: dict) -> dict:
|
||||
"""Load examples if necessary."""
|
||||
if isinstance(config["examples"], list):
|
||||
pass
|
||||
elif isinstance(config["examples"], str):
|
||||
path = Path(config["examples"])
|
||||
if not allow_dangerous_paths:
|
||||
_validate_path(path)
|
||||
with path.open(encoding="utf-8") as f:
|
||||
if path.suffix == ".json":
|
||||
examples = json.load(f)
|
||||
@@ -142,17 +92,11 @@ def _load_output_parser(config: dict) -> dict:
|
||||
return config
|
||||
|
||||
|
||||
def _load_few_shot_prompt(
|
||||
config: dict, *, allow_dangerous_paths: bool = False
|
||||
) -> FewShotPromptTemplate:
|
||||
def _load_few_shot_prompt(config: dict) -> FewShotPromptTemplate:
|
||||
"""Load the "few shot" prompt from the config."""
|
||||
# Load the suffix and prefix templates.
|
||||
config = _load_template(
|
||||
"suffix", config, allow_dangerous_paths=allow_dangerous_paths
|
||||
)
|
||||
config = _load_template(
|
||||
"prefix", config, allow_dangerous_paths=allow_dangerous_paths
|
||||
)
|
||||
config = _load_template("suffix", config)
|
||||
config = _load_template("prefix", config)
|
||||
# Load the example prompt.
|
||||
if "example_prompt_path" in config:
|
||||
if "example_prompt" in config:
|
||||
@@ -161,30 +105,19 @@ def _load_few_shot_prompt(
|
||||
"be specified."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
example_prompt_path = Path(config.pop("example_prompt_path"))
|
||||
if not allow_dangerous_paths:
|
||||
_validate_path(example_prompt_path)
|
||||
config["example_prompt"] = load_prompt(
|
||||
example_prompt_path, allow_dangerous_paths=allow_dangerous_paths
|
||||
)
|
||||
config["example_prompt"] = load_prompt(config.pop("example_prompt_path"))
|
||||
else:
|
||||
config["example_prompt"] = load_prompt_from_config(
|
||||
config["example_prompt"], allow_dangerous_paths=allow_dangerous_paths
|
||||
)
|
||||
config["example_prompt"] = load_prompt_from_config(config["example_prompt"])
|
||||
# Load the examples.
|
||||
config = _load_examples(config, allow_dangerous_paths=allow_dangerous_paths)
|
||||
config = _load_examples(config)
|
||||
config = _load_output_parser(config)
|
||||
return FewShotPromptTemplate(**config)
|
||||
|
||||
|
||||
def _load_prompt(
|
||||
config: dict, *, allow_dangerous_paths: bool = False
|
||||
) -> PromptTemplate:
|
||||
def _load_prompt(config: dict) -> PromptTemplate:
|
||||
"""Load the prompt template from config."""
|
||||
# Load the template from disk if necessary.
|
||||
config = _load_template(
|
||||
"template", config, allow_dangerous_paths=allow_dangerous_paths
|
||||
)
|
||||
config = _load_template("template", config)
|
||||
config = _load_output_parser(config)
|
||||
|
||||
template_format = config.get("template_format", "f-string")
|
||||
@@ -201,28 +134,12 @@ def _load_prompt(
|
||||
return PromptTemplate(**config)
|
||||
|
||||
|
||||
@deprecated(
|
||||
since="1.2.21",
|
||||
removal="2.0.0",
|
||||
alternative="Use `dumpd`/`dumps` from `langchain_core.load` to serialize "
|
||||
"prompts and `load`/`loads` to deserialize them.",
|
||||
)
|
||||
def load_prompt(
|
||||
path: str | Path,
|
||||
encoding: str | None = None,
|
||||
*,
|
||||
allow_dangerous_paths: bool = False,
|
||||
) -> BasePromptTemplate:
|
||||
def load_prompt(path: str | Path, encoding: str | None = None) -> BasePromptTemplate:
|
||||
"""Unified method for loading a prompt from LangChainHub or local filesystem.
|
||||
|
||||
Args:
|
||||
path: Path to the prompt file.
|
||||
encoding: Encoding of the file.
|
||||
allow_dangerous_paths: If ``False`` (default), file paths referenced
|
||||
inside the loaded config (such as ``template_path``, ``examples``,
|
||||
and ``example_prompt_path``) are validated to reject absolute paths
|
||||
and directory traversal (``..``) sequences. Set to ``True`` only
|
||||
if you trust the source of the config.
|
||||
|
||||
Returns:
|
||||
A `PromptTemplate` object.
|
||||
@@ -237,16 +154,11 @@ def load_prompt(
|
||||
"instead."
|
||||
)
|
||||
raise RuntimeError(msg)
|
||||
return _load_prompt_from_file(
|
||||
path, encoding, allow_dangerous_paths=allow_dangerous_paths
|
||||
)
|
||||
return _load_prompt_from_file(path, encoding)
|
||||
|
||||
|
||||
def _load_prompt_from_file(
|
||||
file: str | Path,
|
||||
encoding: str | None = None,
|
||||
*,
|
||||
allow_dangerous_paths: bool = False,
|
||||
file: str | Path, encoding: str | None = None
|
||||
) -> BasePromptTemplate:
|
||||
"""Load prompt from file."""
|
||||
# Convert file to a Path object.
|
||||
@@ -262,14 +174,10 @@ def _load_prompt_from_file(
|
||||
msg = f"Got unsupported file type {file_path.suffix}"
|
||||
raise ValueError(msg)
|
||||
# Load the prompt from the config now.
|
||||
return load_prompt_from_config(config, allow_dangerous_paths=allow_dangerous_paths)
|
||||
return load_prompt_from_config(config)
|
||||
|
||||
|
||||
def _load_chat_prompt(
|
||||
config: dict,
|
||||
*,
|
||||
allow_dangerous_paths: bool = False, # noqa: ARG001
|
||||
) -> ChatPromptTemplate:
|
||||
def _load_chat_prompt(config: dict) -> ChatPromptTemplate:
|
||||
"""Load chat prompt from config."""
|
||||
messages = config.pop("messages")
|
||||
template = messages[0]["prompt"].pop("template") if messages else None
|
||||
@@ -282,7 +190,7 @@ def _load_chat_prompt(
|
||||
return ChatPromptTemplate.from_template(template=template, **config)
|
||||
|
||||
|
||||
type_to_loader_dict: dict[str, Callable[..., BasePromptTemplate]] = {
|
||||
type_to_loader_dict: dict[str, Callable[[dict], BasePromptTemplate]] = {
|
||||
"prompt": _load_prompt,
|
||||
"few_shot": _load_few_shot_prompt,
|
||||
"chat": _load_chat_prompt,
|
||||
|
||||
@@ -219,46 +219,6 @@ DEFAULT_VALIDATOR_MAPPING: dict[str, Callable] = {
|
||||
}
|
||||
|
||||
|
||||
def _parse_f_string_fields(template: str) -> list[tuple[str, str | None]]:
|
||||
fields: list[tuple[str, str | None]] = []
|
||||
for _, field_name, format_spec, _ in Formatter().parse(template):
|
||||
if field_name is not None:
|
||||
fields.append((field_name, format_spec))
|
||||
return fields
|
||||
|
||||
|
||||
def validate_f_string_template(template: str) -> list[str]:
|
||||
"""Validate an f-string template and return its input variables."""
|
||||
input_variables = set()
|
||||
for var, format_spec in _parse_f_string_fields(template):
|
||||
if "." in var or "[" in var or "]" in var:
|
||||
msg = (
|
||||
f"Invalid variable name {var!r} in f-string template. "
|
||||
f"Variable names cannot contain attribute "
|
||||
f"access (.) or indexing ([])."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
if var.isdigit():
|
||||
msg = (
|
||||
f"Invalid variable name {var!r} in f-string template. "
|
||||
f"Variable names cannot be all digits as they are interpreted "
|
||||
f"as positional arguments."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
if format_spec and ("{" in format_spec or "}" in format_spec):
|
||||
msg = (
|
||||
"Invalid format specifier in f-string template. "
|
||||
"Nested replacement fields are not allowed."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
input_variables.add(var)
|
||||
|
||||
return sorted(input_variables)
|
||||
|
||||
|
||||
def check_valid_template(
|
||||
template: str, template_format: str, input_variables: list[str]
|
||||
) -> None:
|
||||
@@ -283,8 +243,6 @@ def check_valid_template(
|
||||
f" {list(DEFAULT_FORMATTER_MAPPING)}."
|
||||
)
|
||||
raise ValueError(msg) from exc
|
||||
if template_format == "f-string":
|
||||
validate_f_string_template(template)
|
||||
try:
|
||||
validator_func(template, input_variables)
|
||||
except (KeyError, IndexError) as exc:
|
||||
@@ -310,18 +268,43 @@ def get_template_variables(template: str, template_format: str) -> list[str]:
|
||||
Raises:
|
||||
ValueError: If the template format is not supported.
|
||||
"""
|
||||
input_variables: list[str] | set[str]
|
||||
if template_format == "jinja2":
|
||||
# Get the variables for the template
|
||||
input_variables = sorted(_get_jinja2_variables_from_template(template))
|
||||
input_variables = _get_jinja2_variables_from_template(template)
|
||||
elif template_format == "f-string":
|
||||
input_variables = validate_f_string_template(template)
|
||||
input_variables = {
|
||||
v for _, v, _, _ in Formatter().parse(template) if v is not None
|
||||
}
|
||||
elif template_format == "mustache":
|
||||
input_variables = mustache_template_vars(template)
|
||||
else:
|
||||
msg = f"Unsupported template format: {template_format}"
|
||||
raise ValueError(msg)
|
||||
|
||||
# For f-strings, block attribute access and indexing syntax
|
||||
# This prevents template injection attacks via accessing dangerous attributes
|
||||
if template_format == "f-string":
|
||||
for var in input_variables:
|
||||
# Formatter().parse() returns field names with dots/brackets if present
|
||||
# e.g., "obj.attr" or "obj[0]" - we need to block these
|
||||
if "." in var or "[" in var or "]" in var:
|
||||
msg = (
|
||||
f"Invalid variable name {var!r} in f-string template. "
|
||||
f"Variable names cannot contain attribute "
|
||||
f"access (.) or indexing ([])."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
# Block variable names that are all digits (e.g., "0", "100")
|
||||
# These are interpreted as positional arguments, not keyword arguments
|
||||
if var.isdigit():
|
||||
msg = (
|
||||
f"Invalid variable name {var!r} in f-string template. "
|
||||
f"Variable names cannot be all digits as they are interpreted "
|
||||
f"as positional arguments."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
return sorted(input_variables)
|
||||
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
The LangChain Expression Language (LCEL) offers a declarative method to build
|
||||
production-grade programs that harness the power of LLMs.
|
||||
|
||||
Programs created using LCEL and LangChain `Runnable` objects inherently support
|
||||
Programs created using LCEL and LangChain `Runnable` objects inherently suppor
|
||||
synchronous asynchronous, batch, and streaming operations.
|
||||
|
||||
Support for **async** allows servers hosting LCEL based programs to scale bette for
|
||||
|
||||
@@ -138,28 +138,6 @@ COPIABLE_KEYS = [
|
||||
"configurable",
|
||||
]
|
||||
|
||||
|
||||
# Users are expected to use the `context` API with a context object
|
||||
# (which does not get traced)
|
||||
CONFIGURABLE_TO_TRACING_METADATA_EXCLUDED_KEYS = frozenset(("api_key",))
|
||||
|
||||
|
||||
def _get_langsmith_inheritable_metadata_from_config(
|
||||
config: RunnableConfig,
|
||||
) -> dict[str, Any] | None:
|
||||
"""Get LangSmith-only inheritable metadata defaults derived from config."""
|
||||
configurable = config.get("configurable") or {}
|
||||
metadata = {
|
||||
key: value
|
||||
for key, value in configurable.items()
|
||||
if not key.startswith("__")
|
||||
and isinstance(value, (str, int, float, bool))
|
||||
and key not in config.get("metadata", {})
|
||||
and key not in CONFIGURABLE_TO_TRACING_METADATA_EXCLUDED_KEYS
|
||||
}
|
||||
return metadata or None
|
||||
|
||||
|
||||
DEFAULT_RECURSION_LIMIT = 25
|
||||
|
||||
|
||||
@@ -286,17 +264,14 @@ def ensure_config(config: RunnableConfig | None = None) -> RunnableConfig:
|
||||
for k, v in config.items():
|
||||
if k not in CONFIG_KEYS and v is not None:
|
||||
empty["configurable"][k] = v
|
||||
for configurable_key in ("model", "checkpoint_ns"):
|
||||
for key, value in empty.get("configurable", {}).items():
|
||||
if (
|
||||
isinstance(
|
||||
configurable_value := empty.get("configurable", {}).get(
|
||||
configurable_key
|
||||
),
|
||||
str,
|
||||
)
|
||||
and configurable_key not in empty["metadata"]
|
||||
not key.startswith("__")
|
||||
and isinstance(value, (str, int, float, bool))
|
||||
and key not in empty["metadata"]
|
||||
and key != "api_key"
|
||||
):
|
||||
empty["metadata"][configurable_key] = configurable_value
|
||||
empty["metadata"][key] = value
|
||||
return empty
|
||||
|
||||
|
||||
@@ -533,9 +508,6 @@ def get_callback_manager_for_config(config: RunnableConfig) -> CallbackManager:
|
||||
inheritable_callbacks=config.get("callbacks"),
|
||||
inheritable_tags=config.get("tags"),
|
||||
inheritable_metadata=config.get("metadata"),
|
||||
langsmith_inheritable_metadata=_get_langsmith_inheritable_metadata_from_config(
|
||||
config
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -554,9 +526,6 @@ def get_async_callback_manager_for_config(
|
||||
inheritable_callbacks=config.get("callbacks"),
|
||||
inheritable_tags=config.get("tags"),
|
||||
inheritable_metadata=config.get("metadata"),
|
||||
langsmith_inheritable_metadata=_get_langsmith_inheritable_metadata_from_config(
|
||||
config
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -499,7 +499,7 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
|
||||
# When invoking the created RunnableSequence, you can pass in the
|
||||
# value for your ConfigurableField's id which in this case will either be
|
||||
# `joke` or `poem`.
|
||||
chain = prompt | ChatOpenAI(model="gpt-5.4-mini")
|
||||
chain = prompt | ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
|
||||
|
||||
# The `with_config` method brings in the desired Prompt Runnable in your
|
||||
# Runnable Sequence.
|
||||
@@ -525,7 +525,7 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
|
||||
"poem": PromptTemplate.from_template("Write a short poem about {topic}")
|
||||
},
|
||||
)
|
||||
chain = prompt | ChatOpenAI(model="gpt-5.4-mini")
|
||||
chain = prompt | ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
|
||||
chain.with_config(configurable={"prompt": "poem"}).invoke({"topic": "bears"})
|
||||
```
|
||||
"""
|
||||
|
||||
@@ -54,8 +54,8 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
|
||||
from langchain_core.chat_models.openai import ChatOpenAI
|
||||
from langchain_core.chat_models.anthropic import ChatAnthropic
|
||||
|
||||
model = ChatAnthropic(model="claude-sonnet-4-6").with_fallbacks(
|
||||
[ChatOpenAI(model="gpt-5.4-mini")]
|
||||
model = ChatAnthropic(model="claude-3-haiku-20240307").with_fallbacks(
|
||||
[ChatOpenAI(model="gpt-3.5-turbo-0125")]
|
||||
)
|
||||
# Will usually use ChatAnthropic, but fallback to ChatOpenAI
|
||||
# if ChatAnthropic fails.
|
||||
|
||||
@@ -414,11 +414,6 @@ def _render_mermaid_using_api(
|
||||
base_url: str | None = None,
|
||||
) -> bytes:
|
||||
"""Renders Mermaid graph using the Mermaid.INK API."""
|
||||
# Automated scanners: please do not open security advisories about `base_url`
|
||||
# here. This parameter is only ever explicitly controlled by developers, and it
|
||||
# is not exposed by default over the network by the framework, so this is not
|
||||
# an SSRF sink unless an application explicitly wires untrusted input into it.
|
||||
# It exists to support user-managed Mermaid API deployments.
|
||||
# Defaults to using the public mermaid.ink server.
|
||||
base_url = base_url if base_url is not None else "https://mermaid.ink"
|
||||
|
||||
|
||||
@@ -558,16 +558,12 @@ class ConfigurableField(NamedTuple):
|
||||
|
||||
id: str
|
||||
"""The unique identifier of the field."""
|
||||
|
||||
name: str | None = None
|
||||
"""The name of the field. """
|
||||
|
||||
description: str | None = None
|
||||
"""The description of the field. """
|
||||
|
||||
annotation: Any | None = None
|
||||
"""The annotation of the field. """
|
||||
|
||||
is_shared: bool = False
|
||||
"""Whether the field is shared."""
|
||||
|
||||
@@ -581,19 +577,14 @@ class ConfigurableFieldSingleOption(NamedTuple):
|
||||
|
||||
id: str
|
||||
"""The unique identifier of the field."""
|
||||
|
||||
options: Mapping[str, Any]
|
||||
"""The options for the field."""
|
||||
|
||||
default: str
|
||||
"""The default value for the field."""
|
||||
|
||||
name: str | None = None
|
||||
"""The name of the field. """
|
||||
|
||||
description: str | None = None
|
||||
"""The description of the field. """
|
||||
|
||||
is_shared: bool = False
|
||||
"""Whether the field is shared."""
|
||||
|
||||
@@ -607,19 +598,14 @@ class ConfigurableFieldMultiOption(NamedTuple):
|
||||
|
||||
id: str
|
||||
"""The unique identifier of the field."""
|
||||
|
||||
options: Mapping[str, Any]
|
||||
"""The options for the field."""
|
||||
|
||||
default: Sequence[str]
|
||||
"""The default values for the field."""
|
||||
|
||||
name: str | None = None
|
||||
"""The name of the field. """
|
||||
|
||||
description: str | None = None
|
||||
"""The description of the field. """
|
||||
|
||||
is_shared: bool = False
|
||||
"""Whether the field is shared."""
|
||||
|
||||
@@ -638,22 +624,16 @@ class ConfigurableFieldSpec(NamedTuple):
|
||||
|
||||
id: str
|
||||
"""The unique identifier of the field."""
|
||||
|
||||
annotation: Any
|
||||
"""The annotation of the field."""
|
||||
|
||||
name: str | None = None
|
||||
"""The name of the field. """
|
||||
|
||||
description: str | None = None
|
||||
"""The description of the field. """
|
||||
|
||||
default: Any = None
|
||||
"""The default value for the field. """
|
||||
|
||||
is_shared: bool = False
|
||||
"""Whether the field is shared."""
|
||||
|
||||
dependencies: list[str] | None = None
|
||||
"""The dependencies of the field. """
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"""Tools are classes that an Agent uses to interact with the world.
|
||||
|
||||
Each tool has a description. Agent uses the description to choose the right tool for the
|
||||
Each tool has a description. Agent uses the description to choose the righ tool for the
|
||||
job.
|
||||
"""
|
||||
|
||||
|
||||
@@ -47,15 +47,6 @@ class BaseTracer(_TracerCore, BaseCallbackHandler, ABC):
|
||||
if not run.parent_run_id:
|
||||
self._persist_run(run)
|
||||
self.run_map.pop(str(run.id))
|
||||
# If this run's parent was injected from an external tracing context
|
||||
# (e.g. a langsmith @traceable), decrement its child refcount and
|
||||
# remove it from run_map once the last child is done.
|
||||
parent_id = str(run.parent_run_id) if run.parent_run_id else None
|
||||
if parent_id and parent_id in self._external_run_ids:
|
||||
self._external_run_ids[parent_id] -= 1
|
||||
if self._external_run_ids[parent_id] <= 0:
|
||||
self.run_map.pop(parent_id, None)
|
||||
del self._external_run_ids[parent_id]
|
||||
self._on_run_update(run)
|
||||
|
||||
def on_chat_model_start(
|
||||
@@ -577,15 +568,6 @@ class AsyncBaseTracer(_TracerCore, AsyncCallbackHandler, ABC):
|
||||
if not run.parent_run_id:
|
||||
await self._persist_run(run)
|
||||
self.run_map.pop(str(run.id))
|
||||
# If this run's parent was injected from an external tracing context
|
||||
# (e.g. a langsmith @traceable), decrement its child refcount and
|
||||
# remove it from run_map once the last child is done.
|
||||
parent_id = str(run.parent_run_id) if run.parent_run_id else None
|
||||
if parent_id and parent_id in self._external_run_ids:
|
||||
self._external_run_ids[parent_id] -= 1
|
||||
if self._external_run_ids[parent_id] <= 0:
|
||||
self.run_map.pop(parent_id, None)
|
||||
del self._external_run_ids[parent_id]
|
||||
await self._on_run_update(run)
|
||||
|
||||
@override
|
||||
|
||||
@@ -51,9 +51,6 @@ class _TracerCore(ABC):
|
||||
_schema_format: Literal[
|
||||
"original", "streaming_events", "original+chat"
|
||||
] = "original",
|
||||
run_map: dict[str, Run] | None = None,
|
||||
order_map: dict[UUID, tuple[UUID, str]] | None = None,
|
||||
_external_run_ids: dict[str, int] | None = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Initialize the tracer.
|
||||
@@ -73,9 +70,6 @@ class _TracerCore(ABC):
|
||||
streaming events.
|
||||
- `'original+chat'` is a format that is the same as `'original'` except
|
||||
it does NOT raise an attribute error `on_chat_model_start`
|
||||
run_map: Optional shared map of run ID to run.
|
||||
order_map: Optional shared map of run ID to trace ordering data.
|
||||
_external_run_ids: Optional shared set of externally injected run IDs.
|
||||
**kwargs: Additional keyword arguments that will be passed to the
|
||||
superclass.
|
||||
"""
|
||||
@@ -83,22 +77,12 @@ class _TracerCore(ABC):
|
||||
|
||||
self._schema_format = _schema_format # For internal use only API will change.
|
||||
|
||||
self.run_map = run_map if run_map is not None else {}
|
||||
self.run_map: dict[str, Run] = {}
|
||||
"""Map of run ID to run. Cleared on run end."""
|
||||
|
||||
self.order_map = order_map if order_map is not None else {}
|
||||
self.order_map: dict[UUID, tuple[UUID, str]] = {}
|
||||
"""Map of run ID to (trace_id, dotted_order). Cleared when tracer GCed."""
|
||||
|
||||
self._external_run_ids: dict[str, int] = (
|
||||
_external_run_ids if _external_run_ids is not None else {}
|
||||
)
|
||||
"""Refcount of active children per externally-injected run ID.
|
||||
|
||||
These runs are added to `run_map` so child runs can find their parent,
|
||||
but they are not managed by the tracer's callback lifecycle. When
|
||||
the last child finishes the entry is evicted to avoid memory leaks.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def _persist_run(self, run: Run) -> Coroutine[Any, Any, None] | None:
|
||||
"""Persist a run."""
|
||||
@@ -129,9 +113,6 @@ class _TracerCore(ABC):
|
||||
run.dotted_order += "." + current_dotted_order
|
||||
if parent_run := self.run_map.get(str(run.parent_run_id)):
|
||||
self._add_child_run(parent_run, run)
|
||||
parent_key = str(run.parent_run_id)
|
||||
if parent_key in self._external_run_ids:
|
||||
self._external_run_ids[parent_key] += 1
|
||||
else:
|
||||
if self.log_missing_parent:
|
||||
logger.debug(
|
||||
|
||||
@@ -27,8 +27,6 @@ from langchain_core.tracers.base import BaseTracer
|
||||
from langchain_core.tracers.schemas import Run
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Mapping
|
||||
|
||||
from langchain_core.messages import BaseMessage
|
||||
from langchain_core.outputs import ChatGenerationChunk, GenerationChunk
|
||||
|
||||
@@ -36,22 +34,6 @@ logger = logging.getLogger(__name__)
|
||||
_LOGGED = set()
|
||||
_EXECUTOR: ThreadPoolExecutor | None = None
|
||||
|
||||
OVERRIDABLE_LANGSMITH_INHERITABLE_METADATA_KEYS: frozenset[str] = frozenset(
|
||||
{"ls_agent_type"}
|
||||
)
|
||||
"""Allowlist of LangSmith-only tracing metadata keys that bypass the default
|
||||
"first wins" merge semantics used when propagating tracer metadata to nested
|
||||
runs.
|
||||
|
||||
Keys in this set are ALWAYS overridden by the nearest enclosing tracer config,
|
||||
so nested callers (e.g. a subagent) can replace a value inherited from an
|
||||
ancestor.
|
||||
|
||||
Keep this list very small: every key here loses the default "first wins"
|
||||
protection and is always clobbered by the nearest enclosing tracer config.
|
||||
Only keys that are strictly for LangSmith tracing bookkeeping should be added.
|
||||
"""
|
||||
|
||||
|
||||
def log_error_once(method: str, exception: Exception) -> None:
|
||||
"""Log an error once.
|
||||
@@ -142,8 +124,6 @@ class LangChainTracer(BaseTracer):
|
||||
project_name: str | None = None,
|
||||
client: Client | None = None,
|
||||
tags: list[str] | None = None,
|
||||
*,
|
||||
metadata: Mapping[str, str] | None = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Initialize the LangChain tracer.
|
||||
@@ -159,9 +139,6 @@ class LangChainTracer(BaseTracer):
|
||||
tags: The tags.
|
||||
|
||||
Defaults to an empty list.
|
||||
metadata: Additional metadata to include if it isn't already in the run.
|
||||
|
||||
Defaults to None.
|
||||
**kwargs: Additional keyword arguments.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
@@ -173,49 +150,6 @@ class LangChainTracer(BaseTracer):
|
||||
self.tags = tags or []
|
||||
self.latest_run: Run | None = None
|
||||
self.run_has_token_event_map: dict[str, bool] = {}
|
||||
self.tracing_metadata: dict[str, str] | None = (
|
||||
dict(metadata) if metadata is not None else None
|
||||
)
|
||||
|
||||
def copy_with_metadata_defaults(
|
||||
self,
|
||||
*,
|
||||
metadata: Mapping[str, str] | None = None,
|
||||
tags: list[str] | None = None,
|
||||
) -> LangChainTracer:
|
||||
"""Return a new tracer with merged tracer-only defaults."""
|
||||
base_metadata = self.tracing_metadata
|
||||
if metadata is None:
|
||||
merged_metadata = dict(base_metadata) if base_metadata is not None else None
|
||||
elif base_metadata is None:
|
||||
merged_metadata = dict(metadata)
|
||||
else:
|
||||
merged_metadata = dict(base_metadata)
|
||||
for key, value in metadata.items():
|
||||
# For allowlisted LangSmith-only inheritable metadata keys
|
||||
# (e.g. ``ls_agent_type``), nested callers are allowed to
|
||||
# OVERRIDE the value inherited from an ancestor. For all
|
||||
# other keys we keep the existing "first wins" behavior so
|
||||
# that ancestor-provided tracing metadata is not accidentally
|
||||
# clobbered by child runs.
|
||||
if (
|
||||
key not in merged_metadata
|
||||
or key in OVERRIDABLE_LANGSMITH_INHERITABLE_METADATA_KEYS
|
||||
):
|
||||
merged_metadata[key] = value
|
||||
|
||||
merged_tags = sorted(set(self.tags + tags)) if tags else self.tags
|
||||
|
||||
return self.__class__(
|
||||
example_id=self.example_id,
|
||||
project_name=self.project_name,
|
||||
client=self.client,
|
||||
tags=merged_tags,
|
||||
metadata=merged_metadata,
|
||||
run_map=self.run_map,
|
||||
order_map=self.order_map,
|
||||
_external_run_ids=self._external_run_ids,
|
||||
)
|
||||
|
||||
def _start_trace(self, run: Run) -> None:
|
||||
if self.project_name:
|
||||
@@ -329,7 +263,6 @@ class LangChainTracer(BaseTracer):
|
||||
try:
|
||||
run.extra["runtime"] = get_runtime_environment()
|
||||
run.tags = self._get_tags(run)
|
||||
_patch_missing_metadata(self, run)
|
||||
if run.ls_client is not self.client:
|
||||
run.ls_client = self.client
|
||||
run.post()
|
||||
@@ -465,26 +398,3 @@ class LangChainTracer(BaseTracer):
|
||||
"""Wait for the given futures to complete."""
|
||||
if self.client is not None:
|
||||
self.client.flush()
|
||||
|
||||
|
||||
def _patch_missing_metadata(self: LangChainTracer, run: Run) -> None:
|
||||
if not self.tracing_metadata:
|
||||
return
|
||||
metadata = run.metadata
|
||||
patched = None
|
||||
for k, v in self.tracing_metadata.items():
|
||||
# ``OVERRIDABLE_LANGSMITH_INHERITABLE_METADATA_KEYS`` are a small,
|
||||
# LangSmith-only allowlist that bypasses the "first wins" merge
|
||||
# so a nested caller (e.g. a subagent) can override a parent-set value.
|
||||
if k not in metadata or k in OVERRIDABLE_LANGSMITH_INHERITABLE_METADATA_KEYS:
|
||||
# Skip the copy when the value already matches (avoids cloning
|
||||
# the shared dict in the common "already set" case). Use a
|
||||
# ``k in metadata`` guard so a legitimate missing key whose
|
||||
# tracer value happens to be ``None`` is still patched in.
|
||||
if k in metadata and metadata[k] == v:
|
||||
continue
|
||||
if patched is None:
|
||||
# Copy on first miss to avoid mutating the shared dict.
|
||||
patched = {**metadata}
|
||||
run.extra["metadata"] = patched
|
||||
patched[k] = v
|
||||
|
||||
@@ -199,6 +199,8 @@ def _convert_pydantic_to_openai_function(
|
||||
" 1. Converting them to Pydantic models with JSON-compatible fields\n"
|
||||
" 2. Using primitive types (str, int, float, bool, list, dict) instead\n"
|
||||
" 3. Passing the data as serialized JSON strings\n\n"
|
||||
"For more information, see: "
|
||||
"https://python.langchain.com/docs/how_to/custom_tools/"
|
||||
)
|
||||
raise PydanticInvalidForJsonSchema(msg) from e
|
||||
return _convert_json_schema_to_openai_function(
|
||||
@@ -500,7 +502,6 @@ def convert_to_openai_function(
|
||||
_WellKnownOpenAITools = (
|
||||
"function",
|
||||
"file_search",
|
||||
"computer",
|
||||
"computer_use_preview",
|
||||
"code_interpreter",
|
||||
"mcp",
|
||||
@@ -571,7 +572,16 @@ def convert_to_openai_tool(
|
||||
oai_tool["format"] = tool.metadata["format"]
|
||||
return oai_tool
|
||||
oai_function = convert_to_openai_function(tool, strict=strict)
|
||||
return {"type": "function", "function": oai_function}
|
||||
result: dict[str, Any] = {"type": "function", "function": oai_function}
|
||||
if (
|
||||
isinstance(tool, langchain_core.tools.base.BaseTool)
|
||||
and hasattr(tool, "extras")
|
||||
and isinstance(tool.extras, dict)
|
||||
):
|
||||
for key in ("defer_loading",):
|
||||
if key in tool.extras:
|
||||
result[key] = tool.extras[key]
|
||||
return result
|
||||
|
||||
|
||||
def convert_to_json_schema(
|
||||
|
||||
@@ -242,12 +242,7 @@ def _create_subset_model_v2(
|
||||
for field_name in field_names:
|
||||
field = model.model_fields[field_name]
|
||||
description = descriptions_.get(field_name, field.description)
|
||||
field_kwargs: dict[str, Any] = {"description": description}
|
||||
if field.default_factory is not None:
|
||||
field_kwargs["default_factory"] = field.default_factory
|
||||
else:
|
||||
field_kwargs["default"] = field.default
|
||||
field_info = FieldInfoV2(**field_kwargs)
|
||||
field_info = FieldInfoV2(description=description, default=field.default)
|
||||
if field.metadata:
|
||||
field_info.metadata = field.metadata
|
||||
fields[field_name] = (field.annotation, field_info)
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
"""langchain-core version information and utilities."""
|
||||
|
||||
VERSION = "1.3.0"
|
||||
VERSION = "1.2.17"
|
||||
|
||||
@@ -21,7 +21,7 @@ classifiers = [
|
||||
"Topic :: Software Development :: Libraries :: Python Modules",
|
||||
]
|
||||
|
||||
version = "1.3.0"
|
||||
version = "1.2.17"
|
||||
requires-python = ">=3.10.0,<4.0.0"
|
||||
dependencies = [
|
||||
"langsmith>=0.3.45,<1.0.0",
|
||||
@@ -58,12 +58,12 @@ dev = [
|
||||
"grandalf>=0.8.0,<1.0.0",
|
||||
]
|
||||
test = [
|
||||
"pytest>=9.0.3,<10.0.0",
|
||||
"pytest>=8.0.0,<10.0.0",
|
||||
"freezegun>=1.2.2,<2.0.0",
|
||||
"pytest-mock>=3.10.0,<4.0.0",
|
||||
"syrupy>=5.0.0,<6.0.0",
|
||||
"syrupy>=4.0.2,<6.0.0",
|
||||
"pytest-watcher>=0.3.4,<1.0.0",
|
||||
"pytest-asyncio>=1.3.0,<2.0.0",
|
||||
"pytest-asyncio>=0.21.1,<2.0.0",
|
||||
"grandalf>=0.8.0,<1.0.0",
|
||||
"responses>=0.25.0,<1.0.0",
|
||||
"pytest-socket>=0.7.0,<1.0.0",
|
||||
@@ -77,9 +77,6 @@ test = [
|
||||
]
|
||||
test_integration = []
|
||||
|
||||
[tool.uv]
|
||||
constraint-dependencies = ["pygments>=2.20.0"] # CVE-2026-4539
|
||||
|
||||
[tool.uv.sources]
|
||||
langchain-tests = { path = "../standard-tests" }
|
||||
langchain-text-splitters = { path = "../text-splitters" }
|
||||
@@ -135,10 +132,8 @@ ignore-var-parameters = true # ignore missing documentation for *args and **kwa
|
||||
"langchain_core/utils/mustache.py" = [ "PLW0603",]
|
||||
"langchain_core/sys_info.py" = [ "T201",]
|
||||
"tests/unit_tests/test_tools.py" = [ "ARG",]
|
||||
"tests/**" = [ "ARG", "D1", "PLR2004", "S", "SLF",]
|
||||
"tests/**" = [ "D1", "PLR2004", "S", "SLF",]
|
||||
"scripts/**" = [ "INP", "S", "T201",]
|
||||
"langchain_core/_security/_policy.py" = [ "EM101", "EM102", "TRY003", "B008", "TRY300",]
|
||||
"langchain_core/_security/_transport.py" = [ "EM101", "EM102", "TRY003", "TRY203", "B008",]
|
||||
|
||||
[tool.coverage.run]
|
||||
omit = [ "tests/*",]
|
||||
|
||||
@@ -6,9 +6,8 @@ set -eu
|
||||
errors=0
|
||||
|
||||
# make sure not importing from langchain or langchain_experimental
|
||||
# allow langchain.agents and langchain.tools (v1 middleware)
|
||||
git --no-pager grep "^from langchain\." . | grep -v ":from langchain\.agents" | grep -v ":from langchain\.tools" && errors=$((errors+1))
|
||||
git --no-pager grep "^from langchain_experimental\." . && errors=$((errors+1))
|
||||
git --no-pager grep '^from langchain\.' . && errors=$((errors+1))
|
||||
git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1))
|
||||
|
||||
# Decide on an exit status based on the errors
|
||||
if [ "$errors" -gt 0 ]; then
|
||||
|
||||
@@ -17,6 +17,9 @@ def blockbuster() -> Iterator[BlockBuster]:
|
||||
bb.functions[func]
|
||||
.can_block_in("langchain_core/_api/internal.py", "is_caller_internal")
|
||||
.can_block_in("langchain_core/runnables/base.py", "__repr__")
|
||||
.can_block_in(
|
||||
"langchain_core/beta/runnables/context.py", "aconfig_with_context"
|
||||
)
|
||||
)
|
||||
|
||||
for func in ["os.stat", "io.TextIOWrapper.read"]:
|
||||
|
||||
@@ -3,13 +3,10 @@
|
||||
import uuid
|
||||
import warnings
|
||||
from collections.abc import AsyncIterator, Iterator
|
||||
from contextlib import contextmanager
|
||||
from typing import TYPE_CHECKING, Any, Literal
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from pydantic import model_validator
|
||||
from typing_extensions import Self, override
|
||||
from typing_extensions import override
|
||||
|
||||
from langchain_core.callbacks import (
|
||||
CallbackManagerForLLMRun,
|
||||
@@ -19,19 +16,12 @@ from langchain_core.language_models import (
|
||||
FakeListChatModel,
|
||||
ParrotFakeChatModel,
|
||||
)
|
||||
from langchain_core.language_models._utils import (
|
||||
_filter_invocation_params_for_tracing,
|
||||
_normalize_messages,
|
||||
)
|
||||
from langchain_core.language_models.chat_models import (
|
||||
SimpleChatModel,
|
||||
_generate_response_from_error,
|
||||
)
|
||||
from langchain_core.language_models._utils import _normalize_messages
|
||||
from langchain_core.language_models.chat_models import _generate_response_from_error
|
||||
from langchain_core.language_models.fake_chat_models import (
|
||||
FakeListChatModelError,
|
||||
GenericFakeChatModel,
|
||||
)
|
||||
from langchain_core.language_models.model_profile import ModelProfile
|
||||
from langchain_core.messages import (
|
||||
AIMessage,
|
||||
AIMessageChunk,
|
||||
@@ -45,7 +35,6 @@ from langchain_core.tracers import LogStreamCallbackHandler
|
||||
from langchain_core.tracers.base import BaseTracer
|
||||
from langchain_core.tracers.context import collect_runs
|
||||
from langchain_core.tracers.event_stream import _AstreamEventsCallbackHandler
|
||||
from langchain_core.tracers.langchain import LangChainTracer
|
||||
from langchain_core.tracers.schemas import Run
|
||||
from tests.unit_tests.fake.callbacks import (
|
||||
BaseFakeCallbackHandler,
|
||||
@@ -330,20 +319,6 @@ class FakeTracer(BaseTracer):
|
||||
self.traced_run_ids.append(run.id)
|
||||
|
||||
|
||||
class LangChainTracerRunCollector:
|
||||
def __init__(self) -> None:
|
||||
self.tracer = LangChainTracer()
|
||||
self.runs: list[Run] = []
|
||||
|
||||
@contextmanager
|
||||
def tracing_callback(self) -> Iterator[LangChainTracer]:
|
||||
def collect_tracer_run(_: LangChainTracer, run: Run) -> None:
|
||||
self.runs.append(run)
|
||||
|
||||
with patch.object(LangChainTracer, "_persist_run", new=collect_tracer_run):
|
||||
yield self.tracer
|
||||
|
||||
|
||||
def test_pass_run_id() -> None:
|
||||
llm = FakeListChatModel(responses=["a", "b", "c"])
|
||||
cb = FakeTracer()
|
||||
@@ -1255,76 +1230,6 @@ def test_model_profiles() -> None:
|
||||
assert model_with_profile.profile == {"max_input_tokens": 100}
|
||||
|
||||
|
||||
def test_resolve_model_profile_hook_populates_profile() -> None:
|
||||
"""_resolve_model_profile is called when profile is None."""
|
||||
|
||||
class ResolverModel(GenericFakeChatModel):
|
||||
def _resolve_model_profile(self) -> ModelProfile | None:
|
||||
return {"max_input_tokens": 500}
|
||||
|
||||
model = ResolverModel(messages=iter([]))
|
||||
assert model.profile == {"max_input_tokens": 500}
|
||||
|
||||
|
||||
def test_resolve_model_profile_hook_skipped_when_explicit() -> None:
|
||||
"""_resolve_model_profile is NOT called when profile is set explicitly."""
|
||||
|
||||
class ResolverModel(GenericFakeChatModel):
|
||||
def _resolve_model_profile(self) -> ModelProfile | None:
|
||||
return {"max_input_tokens": 500}
|
||||
|
||||
model = ResolverModel(messages=iter([]), profile={"max_input_tokens": 999})
|
||||
assert model.profile is not None
|
||||
assert model.profile["max_input_tokens"] == 999
|
||||
|
||||
|
||||
def test_resolve_model_profile_hook_exception_is_caught() -> None:
|
||||
"""Model is still usable if _resolve_model_profile raises."""
|
||||
|
||||
class BrokenProfileModel(GenericFakeChatModel):
|
||||
def _resolve_model_profile(self) -> ModelProfile | None:
|
||||
msg = "profile file not found"
|
||||
raise RuntimeError(msg)
|
||||
|
||||
with warnings.catch_warnings(record=True):
|
||||
warnings.simplefilter("always")
|
||||
model = BrokenProfileModel(messages=iter([]))
|
||||
|
||||
assert model.profile is None
|
||||
|
||||
|
||||
def test_check_profile_keys_runs_despite_partner_override() -> None:
|
||||
"""Verify _check_profile_keys fires even when _set_model_profile is overridden.
|
||||
|
||||
Because _check_profile_keys has a distinct validator name from
|
||||
_set_model_profile, a partner override of the latter does not suppress
|
||||
the key-checking validator.
|
||||
"""
|
||||
|
||||
class PartnerModel(GenericFakeChatModel):
|
||||
"""Simulates a partner that overrides _set_model_profile."""
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _set_model_profile(self) -> Self:
|
||||
if self.profile is None:
|
||||
profile: dict[str, Any] = {
|
||||
"max_input_tokens": 100,
|
||||
"partner_only_field": True,
|
||||
}
|
||||
self.profile = profile # type: ignore[assignment]
|
||||
return self
|
||||
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
warnings.simplefilter("always")
|
||||
model = PartnerModel(messages=iter([]))
|
||||
|
||||
assert model.profile is not None
|
||||
assert model.profile.get("partner_only_field") is True
|
||||
profile_warnings = [x for x in w if "Unrecognized keys" in str(x.message)]
|
||||
assert len(profile_warnings) == 1
|
||||
assert "partner_only_field" in str(profile_warnings[0].message)
|
||||
|
||||
|
||||
class MockResponse:
|
||||
"""Mock response for testing _generate_response_from_error."""
|
||||
|
||||
@@ -1413,100 +1318,3 @@ def test_generate_response_from_error_handles_streaming_response_failure() -> No
|
||||
assert metadata["body"] is None
|
||||
assert metadata["headers"] == {"content-type": "application/json"}
|
||||
assert metadata["status_code"] == 400
|
||||
|
||||
|
||||
def test_filter_invocation_params_for_tracing() -> None:
|
||||
"""Test that large fields are filtered from invocation params for tracing."""
|
||||
params = {
|
||||
"temperature": 0.7,
|
||||
"tools": [{"name": "test_tool"}],
|
||||
"functions": [{"name": "test_function"}],
|
||||
"messages": [{"role": "system", "content": "test"}],
|
||||
"response_format": {"type": "json_object"},
|
||||
}
|
||||
filtered = _filter_invocation_params_for_tracing(params)
|
||||
|
||||
# Should include temperature
|
||||
assert "temperature" in filtered
|
||||
assert filtered["temperature"] == 0.7
|
||||
|
||||
# Should exclude these large fields
|
||||
assert "tools" not in filtered
|
||||
assert "functions" not in filtered
|
||||
assert "messages" not in filtered
|
||||
assert "response_format" not in filtered
|
||||
|
||||
|
||||
class FakeChatModelWithInvocationParams(SimpleChatModel):
|
||||
"""Fake chat model with invocation params for testing tracing."""
|
||||
|
||||
temperature: float = 0.7
|
||||
|
||||
@property
|
||||
@override
|
||||
def _llm_type(self) -> str:
|
||||
return "fake-chat-model-with-invocation-params"
|
||||
|
||||
@property
|
||||
@override
|
||||
def _identifying_params(self) -> dict[str, Any]:
|
||||
return {
|
||||
"temperature": self.temperature,
|
||||
"tools": [{"name": "test_tool"}],
|
||||
"functions": [{"name": "test_function"}],
|
||||
"messages": [{"role": "system", "content": "test"}],
|
||||
"response_format": {"type": "json_object"},
|
||||
}
|
||||
|
||||
@override
|
||||
def _call(
|
||||
self,
|
||||
messages: list[BaseMessage],
|
||||
stop: list[str] | None = None,
|
||||
run_manager: CallbackManagerForLLMRun | None = None,
|
||||
**kwargs: Any,
|
||||
) -> str:
|
||||
return "test response"
|
||||
|
||||
|
||||
def test_invocation_params_passed_to_tracer_metadata() -> None:
|
||||
"""Test that invocation params are passed to tracer metadata."""
|
||||
llm = FakeChatModelWithInvocationParams()
|
||||
collector = LangChainTracerRunCollector()
|
||||
|
||||
with collector.tracing_callback() as tracer:
|
||||
llm.invoke([HumanMessage(content="Hello")], config={"callbacks": [tracer]})
|
||||
|
||||
assert len(collector.runs) == 1
|
||||
run = collector.runs[0]
|
||||
|
||||
key = "LANGSMITH_LANGGRAPH_API_VARIANT"
|
||||
|
||||
if key in run.extra["metadata"]:
|
||||
del run.extra["metadata"][key]
|
||||
|
||||
assert run.extra == {
|
||||
"batch_size": 1,
|
||||
"invocation_params": {
|
||||
"_type": "fake-chat-model-with-invocation-params",
|
||||
"functions": [{"name": "test_function"}],
|
||||
"messages": [{"content": "test", "role": "system"}],
|
||||
"response_format": {"type": "json_object"},
|
||||
"stop": None,
|
||||
"temperature": 0.7,
|
||||
"tools": [{"name": "test_tool"}],
|
||||
},
|
||||
"metadata": {
|
||||
"_type": "fake-chat-model-with-invocation-params",
|
||||
"ls_integration": "langchain_chat_model",
|
||||
"ls_model_type": "chat",
|
||||
"ls_provider": "fakechatmodelwithinvocationparams",
|
||||
"ls_temperature": 0.7,
|
||||
"revision_id": run.extra["metadata"]["revision_id"],
|
||||
"stop": None,
|
||||
"temperature": 0.7,
|
||||
},
|
||||
"options": {"stop": None},
|
||||
"runtime": run.extra["runtime"],
|
||||
}
|
||||
assert run.metadata == run.extra["metadata"]
|
||||
|
||||
@@ -13,7 +13,6 @@ from langchain_core.language_models import (
|
||||
BaseLLM,
|
||||
FakeListLLM,
|
||||
)
|
||||
from langchain_core.language_models._utils import _filter_invocation_params_for_tracing
|
||||
from langchain_core.outputs import Generation, GenerationChunk, LLMResult
|
||||
from langchain_core.tracers.context import collect_runs
|
||||
from tests.unit_tests.fake.callbacks import (
|
||||
@@ -285,94 +284,3 @@ def test_get_ls_params() -> None:
|
||||
|
||||
ls_params = llm._get_ls_params(stop=["stop"])
|
||||
assert ls_params["ls_stop"] == ["stop"]
|
||||
|
||||
|
||||
def test_filter_invocation_params_for_tracing() -> None:
|
||||
"""Test that large fields are filtered from invocation params for tracing."""
|
||||
params = {
|
||||
"temperature": 0.7,
|
||||
"tools": [{"name": "test_tool"}],
|
||||
"functions": [{"name": "test_function"}],
|
||||
"messages": [{"role": "system", "content": "test"}],
|
||||
"response_format": {"type": "json_object"},
|
||||
}
|
||||
filtered = _filter_invocation_params_for_tracing(params)
|
||||
|
||||
# Should include temperature
|
||||
assert "temperature" in filtered
|
||||
assert filtered["temperature"] == 0.7
|
||||
|
||||
# Should exclude these large fields
|
||||
assert "tools" not in filtered
|
||||
assert "functions" not in filtered
|
||||
assert "messages" not in filtered
|
||||
assert "response_format" not in filtered
|
||||
|
||||
|
||||
class FakeLLMWithInvocationParams(BaseLLM):
|
||||
"""Fake LLM with invocation params for testing tracing."""
|
||||
|
||||
temperature: float = 0.7
|
||||
|
||||
@property
|
||||
@override
|
||||
def _llm_type(self) -> str:
|
||||
return "fake-llm-with-invocation-params"
|
||||
|
||||
@property
|
||||
@override
|
||||
def _identifying_params(self) -> dict[str, Any]:
|
||||
return {
|
||||
"temperature": self.temperature,
|
||||
"tools": [{"name": "test_tool"}],
|
||||
"functions": [{"name": "test_function"}],
|
||||
"messages": [{"role": "system", "content": "test"}],
|
||||
"response_format": {"type": "json_object"},
|
||||
}
|
||||
|
||||
@override
|
||||
def _generate(
|
||||
self,
|
||||
prompts: list[str],
|
||||
stop: list[str] | None = None,
|
||||
run_manager: CallbackManagerForLLMRun | None = None,
|
||||
**kwargs: Any,
|
||||
) -> LLMResult:
|
||||
generations = [[Generation(text="test response")]]
|
||||
return LLMResult(generations=generations)
|
||||
|
||||
@override
|
||||
async def _agenerate(
|
||||
self,
|
||||
prompts: list[str],
|
||||
stop: list[str] | None = None,
|
||||
run_manager: AsyncCallbackManagerForLLMRun | None = None,
|
||||
**kwargs: Any,
|
||||
) -> LLMResult:
|
||||
generations = [[Generation(text="test response")]]
|
||||
return LLMResult(generations=generations)
|
||||
|
||||
|
||||
async def test_llm_invocation_params_filtered_in_stream() -> None:
|
||||
"""Test that invocation params are filtered when streaming."""
|
||||
|
||||
# Create a custom LLM that supports streaming
|
||||
class FakeStreamingLLM(FakeLLMWithInvocationParams):
|
||||
@override
|
||||
def _stream(
|
||||
self,
|
||||
prompt: str,
|
||||
stop: list[str] | None = None,
|
||||
run_manager: CallbackManagerForLLMRun | None = None,
|
||||
**kwargs: Any,
|
||||
) -> Iterator[GenerationChunk]:
|
||||
yield GenerationChunk(text="test ")
|
||||
|
||||
streaming_llm = FakeStreamingLLM()
|
||||
|
||||
with collect_runs() as cb:
|
||||
list(streaming_llm.stream("Hello", config={"callbacks": [cb]}))
|
||||
assert len(cb.traced_runs) == 1
|
||||
run = cb.traced_runs[0]
|
||||
# Verify the run was traced
|
||||
assert run.extra is not None
|
||||
|
||||
@@ -1,87 +0,0 @@
|
||||
"""Tests for model profile types and utilities."""
|
||||
|
||||
import warnings
|
||||
from typing import Any
|
||||
from unittest.mock import patch
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from langchain_core.language_models.model_profile import (
|
||||
ModelProfile,
|
||||
_warn_unknown_profile_keys,
|
||||
)
|
||||
|
||||
|
||||
class TestModelProfileExtraAllow:
|
||||
"""Verify extra='allow' on ModelProfile TypedDict."""
|
||||
|
||||
def test_accepts_declared_keys(self) -> None:
|
||||
profile: ModelProfile = {"max_input_tokens": 100, "tool_calling": True}
|
||||
assert profile["max_input_tokens"] == 100
|
||||
|
||||
def test_extra_keys_accepted_via_typed_dict(self) -> None:
|
||||
"""ModelProfile TypedDict allows extra keys at construction."""
|
||||
profile = ModelProfile(
|
||||
max_input_tokens=100,
|
||||
unknown_future_field="value", # type: ignore[typeddict-unknown-key]
|
||||
)
|
||||
assert profile["unknown_future_field"] == "value" # type: ignore[typeddict-item]
|
||||
|
||||
def test_extra_keys_survive_pydantic_validation(self) -> None:
|
||||
"""Extra keys pass through even when parent model forbids extras."""
|
||||
|
||||
class StrictModel(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
profile: ModelProfile | None = Field(default=None)
|
||||
|
||||
m = StrictModel(
|
||||
profile={
|
||||
"max_input_tokens": 100,
|
||||
"unknown_future_field": True,
|
||||
}
|
||||
)
|
||||
assert m.profile is not None
|
||||
assert m.profile.get("unknown_future_field") is True
|
||||
|
||||
|
||||
class TestWarnUnknownProfileKeys:
|
||||
"""Tests for _warn_unknown_profile_keys."""
|
||||
|
||||
def test_warns_on_extra_keys(self) -> None:
|
||||
profile: dict[str, Any] = {
|
||||
"max_input_tokens": 100,
|
||||
"future_field": True,
|
||||
"another": "val",
|
||||
}
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
warnings.simplefilter("always")
|
||||
_warn_unknown_profile_keys(profile) # type: ignore[arg-type]
|
||||
|
||||
assert len(w) == 1
|
||||
assert "another" in str(w[0].message)
|
||||
assert "future_field" in str(w[0].message)
|
||||
assert "upgrading langchain-core" in str(w[0].message)
|
||||
|
||||
def test_silent_on_declared_keys_only(self) -> None:
|
||||
profile: ModelProfile = {"max_input_tokens": 100, "tool_calling": True}
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
warnings.simplefilter("always")
|
||||
_warn_unknown_profile_keys(profile)
|
||||
|
||||
assert len(w) == 0
|
||||
|
||||
def test_silent_on_empty_profile(self) -> None:
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
warnings.simplefilter("always")
|
||||
_warn_unknown_profile_keys({})
|
||||
|
||||
assert len(w) == 0
|
||||
|
||||
def test_survives_get_type_hints_failure(self) -> None:
|
||||
"""Falls back to silent skip on TypeError from get_type_hints."""
|
||||
profile: dict[str, Any] = {"max_input_tokens": 100, "extra": True}
|
||||
with patch(
|
||||
"langchain_core.language_models.model_profile.get_type_hints",
|
||||
side_effect=TypeError("broken"),
|
||||
):
|
||||
_warn_unknown_profile_keys(profile) # type: ignore[arg-type]
|
||||
@@ -1,4 +1,3 @@
|
||||
import contextlib
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
@@ -7,9 +6,7 @@ from pydantic import BaseModel, ConfigDict, Field, SecretStr
|
||||
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.load import InitValidator, Serializable, dumpd, dumps, load, loads
|
||||
from langchain_core.load.load import ALL_SERIALIZABLE_MAPPINGS
|
||||
from langchain_core.load.serializable import _is_field_useful
|
||||
from langchain_core.load.validators import CLASS_INIT_VALIDATORS, _bedrock_validator
|
||||
from langchain_core.messages import AIMessage
|
||||
from langchain_core.outputs import ChatGeneration, Generation
|
||||
from langchain_core.prompts import (
|
||||
@@ -894,267 +891,3 @@ class TestJinja2SecurityBlocking:
|
||||
# jinja2 should be blocked by default
|
||||
with pytest.raises(ValueError, match="Jinja2 templates are not allowed"):
|
||||
load(serialized_jinja2, allowed_objects=[PromptTemplate])
|
||||
|
||||
|
||||
class TestClassSpecificValidatorsInLoad:
|
||||
"""Tests that load() properly integrates with class-specific validators."""
|
||||
|
||||
def test_validator_registry_keys_in_serializable_mapping(self) -> None:
|
||||
"""All CLASS_INIT_VALIDATORS keys must exist in ALL_SERIALIZABLE_MAPPINGS."""
|
||||
all_known_paths = set(ALL_SERIALIZABLE_MAPPINGS.keys()) | set(
|
||||
ALL_SERIALIZABLE_MAPPINGS.values()
|
||||
)
|
||||
for key in CLASS_INIT_VALIDATORS:
|
||||
assert key in all_known_paths, (
|
||||
f"{key} in CLASS_INIT_VALIDATORS but not in "
|
||||
f"ALL_SERIALIZABLE_MAPPINGS keys or values"
|
||||
)
|
||||
|
||||
def test_init_validator_still_called_without_class_validator(self) -> None:
|
||||
"""Test init_validator fires for classes without a class-specific validator."""
|
||||
msg = AIMessage(content="test")
|
||||
serialized = dumpd(msg)
|
||||
|
||||
init_validator_called = []
|
||||
|
||||
def custom_init_validator(
|
||||
_class_path: tuple[str, ...], _kwargs: dict[str, Any]
|
||||
) -> None:
|
||||
init_validator_called.append(True)
|
||||
|
||||
loaded = load(
|
||||
serialized,
|
||||
allowed_objects=[AIMessage],
|
||||
init_validator=custom_init_validator,
|
||||
)
|
||||
assert loaded == msg
|
||||
assert len(init_validator_called) == 1
|
||||
|
||||
def test_load_blocks_bedrock_with_endpoint_url(self) -> None:
|
||||
"""Test that load() blocks Bedrock deserialization with `endpoint_url`."""
|
||||
payload = {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": ["langchain", "chat_models", "bedrock", "ChatBedrock"],
|
||||
"kwargs": {
|
||||
"model_id": "anthropic.claude-v2",
|
||||
"endpoint_url": "http://169.254.169.254/latest/meta-data",
|
||||
},
|
||||
}
|
||||
with pytest.raises(ValueError, match="SSRF"):
|
||||
load(payload, allowed_objects="all")
|
||||
|
||||
def test_load_blocks_bedrock_chat_legacy_alias(self) -> None:
|
||||
"""Test that load() blocks BedrockChat (legacy alias) with `endpoint_url`."""
|
||||
payload = {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": ["langchain", "chat_models", "bedrock", "BedrockChat"],
|
||||
"kwargs": {
|
||||
"model_id": "anthropic.claude-v2",
|
||||
"endpoint_url": "http://169.254.169.254/latest/meta-data",
|
||||
},
|
||||
}
|
||||
with pytest.raises(ValueError, match="SSRF"):
|
||||
load(payload, allowed_objects="all")
|
||||
|
||||
def test_load_blocks_bedrock_converse_with_base_url(self) -> None:
|
||||
"""Test that load() blocks ChatBedrockConverse with `base_url`."""
|
||||
payload = {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": ["langchain_aws", "chat_models", "ChatBedrockConverse"],
|
||||
"kwargs": {
|
||||
"model": "anthropic.claude-v2",
|
||||
"base_url": "http://malicious-site.com",
|
||||
},
|
||||
}
|
||||
with pytest.raises(ValueError, match="SSRF"):
|
||||
load(payload, allowed_objects="all")
|
||||
|
||||
def test_load_blocks_anthropic_bedrock_legacy_alias(self) -> None:
|
||||
"""Test load() blocks ChatAnthropicBedrock with `endpoint_url`."""
|
||||
payload = {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": [
|
||||
"langchain",
|
||||
"chat_models",
|
||||
"anthropic_bedrock",
|
||||
"ChatAnthropicBedrock",
|
||||
],
|
||||
"kwargs": {
|
||||
"model_id": "anthropic.claude-v2",
|
||||
"endpoint_url": "http://169.254.169.254/latest/meta-data",
|
||||
},
|
||||
}
|
||||
with pytest.raises(ValueError, match="SSRF"):
|
||||
load(payload, allowed_objects="all")
|
||||
|
||||
def test_load_blocks_anthropic_bedrock_via_resolved_path(self) -> None:
|
||||
"""Test load() blocks ChatAnthropicBedrock via resolved import path."""
|
||||
payload = {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": [
|
||||
"langchain_aws",
|
||||
"chat_models",
|
||||
"anthropic",
|
||||
"ChatAnthropicBedrock",
|
||||
],
|
||||
"kwargs": {
|
||||
"model_id": "anthropic.claude-v2",
|
||||
"base_url": "http://malicious-site.com",
|
||||
},
|
||||
}
|
||||
with pytest.raises(ValueError, match="SSRF"):
|
||||
load(payload, allowed_objects="all")
|
||||
|
||||
def test_load_blocks_bedrock_via_resolved_import_path(self) -> None:
|
||||
"""Test load() blocks Bedrock via resolved import path (bypass defense)."""
|
||||
payload = {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": [
|
||||
"langchain_aws",
|
||||
"chat_models",
|
||||
"bedrock_converse",
|
||||
"ChatBedrockConverse",
|
||||
],
|
||||
"kwargs": {
|
||||
"model": "anthropic.claude-v2",
|
||||
"endpoint_url": "http://169.254.169.254/latest/meta-data",
|
||||
},
|
||||
}
|
||||
with pytest.raises(ValueError, match="SSRF"):
|
||||
load(payload, allowed_objects="all")
|
||||
|
||||
def test_both_class_and_general_validators_fire(self) -> None:
|
||||
"""Test both class-specific and general init_validator fire together."""
|
||||
payload = {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": ["langchain", "llms", "bedrock", "Bedrock"],
|
||||
"kwargs": {
|
||||
"model_id": "anthropic.claude-v2",
|
||||
"region_name": "us-west-2",
|
||||
},
|
||||
}
|
||||
|
||||
init_validator_called: list[bool] = []
|
||||
|
||||
def custom_init_validator(
|
||||
_class_path: tuple[str, ...], _kwargs: dict[str, Any]
|
||||
) -> None:
|
||||
init_validator_called.append(True)
|
||||
|
||||
# May fail at import time if langchain_aws not installed, that's OK.
|
||||
# We only care that the init_validator was called before that point.
|
||||
with contextlib.suppress(ModuleNotFoundError):
|
||||
load(
|
||||
payload,
|
||||
allowed_objects="all",
|
||||
init_validator=custom_init_validator,
|
||||
)
|
||||
|
||||
assert len(init_validator_called) == 1
|
||||
|
||||
def test_load_blocks_bedrock_llm_via_resolved_path(self) -> None:
|
||||
"""Test load() blocks BedrockLLM via resolved import path."""
|
||||
payload = {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": ["langchain_aws", "llms", "bedrock", "BedrockLLM"],
|
||||
"kwargs": {
|
||||
"model_id": "anthropic.claude-v2",
|
||||
"endpoint_url": "http://169.254.169.254/latest/meta-data",
|
||||
},
|
||||
}
|
||||
with pytest.raises(ValueError, match="SSRF"):
|
||||
load(payload, allowed_objects="all")
|
||||
|
||||
def test_load_blocks_chat_bedrock_via_resolved_path(self) -> None:
|
||||
"""Test load() blocks ChatBedrock via resolved JS import path."""
|
||||
payload = {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": ["langchain_aws", "chat_models", "ChatBedrock"],
|
||||
"kwargs": {
|
||||
"model_id": "anthropic.claude-v2",
|
||||
"base_url": "http://malicious-site.com",
|
||||
},
|
||||
}
|
||||
with pytest.raises(ValueError, match="SSRF"):
|
||||
load(payload, allowed_objects="all")
|
||||
|
||||
def test_class_validator_fires_with_init_validator_none(self) -> None:
|
||||
"""Class-specific validators cannot be bypassed via init_validator=None."""
|
||||
payload = {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": ["langchain", "chat_models", "bedrock", "ChatBedrock"],
|
||||
"kwargs": {
|
||||
"model_id": "anthropic.claude-v2",
|
||||
"endpoint_url": "http://169.254.169.254/latest/meta-data",
|
||||
},
|
||||
}
|
||||
with pytest.raises(ValueError, match="SSRF"):
|
||||
load(payload, allowed_objects="all", init_validator=None)
|
||||
|
||||
|
||||
class TestBedrockValidators:
|
||||
"""Tests for Bedrock SSRF protection validator."""
|
||||
|
||||
def test_bedrock_validator_blocks_endpoint_url(self) -> None:
|
||||
"""Test that `_bedrock_validator` blocks `endpoint_url` parameter."""
|
||||
class_path = ("langchain", "llms", "bedrock", "BedrockLLM")
|
||||
kwargs = {
|
||||
"model_id": "us.anthropic.claude-sonnet-4-5-20250929-v1:0",
|
||||
"region_name": "us-west-2",
|
||||
"endpoint_url": "http://169.254.169.254/latest/meta-data",
|
||||
}
|
||||
|
||||
with pytest.raises(ValueError, match=r"endpoint_url.*SSRF"):
|
||||
_bedrock_validator(class_path, kwargs)
|
||||
|
||||
def test_bedrock_validator_blocks_base_url(self) -> None:
|
||||
"""Test that `_bedrock_validator` blocks `base_url` parameter."""
|
||||
class_path = ("langchain_aws", "chat_models", "ChatBedrockConverse")
|
||||
kwargs = {
|
||||
"model": "us.anthropic.claude-sonnet-4-5-20250929-v1:0",
|
||||
"region_name": "us-west-2",
|
||||
"base_url": "http://malicious-site.com",
|
||||
}
|
||||
|
||||
with pytest.raises(ValueError, match=r"base_url.*SSRF"):
|
||||
_bedrock_validator(class_path, kwargs)
|
||||
|
||||
def test_bedrock_validator_blocks_both_parameters(self) -> None:
|
||||
"""Test that `_bedrock_validator` blocks when both params are present."""
|
||||
class_path = ("langchain", "chat_models", "bedrock", "ChatBedrock")
|
||||
kwargs = {
|
||||
"model_id": "us.anthropic.claude-sonnet-4-5-20250929-v1:0",
|
||||
"region_name": "us-west-2",
|
||||
"endpoint_url": "http://attacker.com",
|
||||
"base_url": "http://another-attacker.com",
|
||||
}
|
||||
|
||||
with pytest.raises(ValueError, match="SSRF") as exc_info:
|
||||
_bedrock_validator(class_path, kwargs)
|
||||
|
||||
error_msg = str(exc_info.value)
|
||||
assert "endpoint_url" in error_msg
|
||||
assert "base_url" in error_msg
|
||||
|
||||
def test_bedrock_validator_allows_safe_parameters(self) -> None:
|
||||
"""Test that `_bedrock_validator` allows safe parameters through."""
|
||||
class_path = ("langchain", "llms", "bedrock", "Bedrock")
|
||||
kwargs = {
|
||||
"model_id": "us.anthropic.claude-sonnet-4-5-20250929-v1:0",
|
||||
"region_name": "us-west-2",
|
||||
"credentials_profile_name": "default",
|
||||
"streaming": True,
|
||||
"model_kwargs": {"temperature": 0.7},
|
||||
}
|
||||
|
||||
_bedrock_validator(class_path, kwargs)
|
||||
|
||||
@@ -815,7 +815,7 @@ def test_parse_with_different_pydantic_2_v1() -> None:
|
||||
temperature: int
|
||||
forecast: str
|
||||
|
||||
# Can't get pydantic to work here due to the odd typing of trying to support
|
||||
# Can't get pydantic to work here due to the odd typing of tryig to support
|
||||
# both v1 and v2 in the same codebase.
|
||||
parser = PydanticToolsParser(tools=[Forecast])
|
||||
message = AIMessage(
|
||||
@@ -848,7 +848,7 @@ def test_parse_with_different_pydantic_2_proper() -> None:
|
||||
temperature: int
|
||||
forecast: str
|
||||
|
||||
# Can't get pydantic to work here due to the odd typing of trying to support
|
||||
# Can't get pydantic to work here due to the odd typing of tryig to support
|
||||
# both v1 and v2 in the same codebase.
|
||||
parser = PydanticToolsParser(tools=[Forecast])
|
||||
message = AIMessage(
|
||||
|
||||
@@ -1951,24 +1951,6 @@ def test_fstring_rejects_invalid_identifier_variable_names() -> None:
|
||||
assert result.messages[0].content == expected # type: ignore[attr-defined]
|
||||
|
||||
|
||||
def test_fstring_rejects_nested_replacement_field_in_image_url() -> None:
|
||||
with pytest.raises(ValueError, match="Nested replacement fields are not allowed"):
|
||||
ChatPromptTemplate.from_messages(
|
||||
[
|
||||
(
|
||||
"human",
|
||||
[
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "{img:{img.__class__.__name__}}"},
|
||||
}
|
||||
],
|
||||
)
|
||||
],
|
||||
template_format="f-string",
|
||||
)
|
||||
|
||||
|
||||
def test_mustache_template_attribute_access_vulnerability() -> None:
|
||||
"""Test that Mustache template injection is blocked.
|
||||
|
||||
|
||||
@@ -1,9 +1,4 @@
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain_core.load import load, loads
|
||||
from langchain_core.prompts import PromptTemplate
|
||||
from langchain_core.load import load
|
||||
from langchain_core.prompts.dict import DictPromptTemplate
|
||||
|
||||
|
||||
@@ -37,82 +32,3 @@ def test_deserialize_legacy() -> None:
|
||||
template={"type": "audio", "audio": "{audio_data}"}, template_format="f-string"
|
||||
)
|
||||
assert load(ser, allowed_objects=[DictPromptTemplate]) == expected
|
||||
|
||||
|
||||
def test_dict_prompt_template_rejects_attribute_access_to_rich_objects() -> None:
|
||||
with pytest.raises(ValueError, match="Variable names cannot contain attribute"):
|
||||
DictPromptTemplate(
|
||||
template={"output": "{message.additional_kwargs[secret]}"},
|
||||
template_format="f-string",
|
||||
)
|
||||
|
||||
|
||||
def test_dict_prompt_template_loads_payload_rejects_attribute_access() -> None:
|
||||
payload = json.dumps(
|
||||
{
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": ["langchain_core", "prompts", "dict", "DictPromptTemplate"],
|
||||
"kwargs": {
|
||||
"template": {"output": "{message.additional_kwargs[secret]}"},
|
||||
"template_format": "f-string",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="Variable names cannot contain attribute"):
|
||||
loads(payload)
|
||||
|
||||
|
||||
def test_dict_prompt_template_dumpd_round_trip_rejects_attribute_access() -> None:
|
||||
payload = {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": ["langchain_core", "prompts", "dict", "DictPromptTemplate"],
|
||||
"kwargs": {
|
||||
"template": {"output": "{message.additional_kwargs[secret]}"},
|
||||
"template_format": "f-string",
|
||||
},
|
||||
}
|
||||
|
||||
with pytest.raises(ValueError, match="Variable names cannot contain attribute"):
|
||||
load(payload, allowed_objects=[DictPromptTemplate])
|
||||
|
||||
|
||||
def test_dict_prompt_template_deserialization_rejects_attribute_access() -> None:
|
||||
payload = json.dumps(
|
||||
{
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": ["langchain_core", "prompts", "dict", "DictPromptTemplate"],
|
||||
"kwargs": {
|
||||
"template": {"output": "{name.__class__.__name__}"},
|
||||
"template_format": "f-string",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="Variable names cannot contain attribute"):
|
||||
loads(payload)
|
||||
|
||||
|
||||
def test_dict_prompt_template_legacy_deserialization_rejects_attribute_access() -> None:
|
||||
ser = {
|
||||
"type": "constructor",
|
||||
"lc": 1,
|
||||
"id": ["langchain_core", "prompts", "message", "_DictMessagePromptTemplate"],
|
||||
"kwargs": {
|
||||
"template_format": "f-string",
|
||||
"template": {"output": "{name.__class__.__name__}"},
|
||||
},
|
||||
}
|
||||
|
||||
with pytest.raises(ValueError, match="Variable names cannot contain attribute"):
|
||||
load(ser, allowed_objects=[DictPromptTemplate])
|
||||
|
||||
|
||||
def test_prompt_template_blocks_attribute_access() -> None:
|
||||
with pytest.raises(
|
||||
ValueError, match="Variable names cannot contain attribute access"
|
||||
):
|
||||
PromptTemplate.from_template("{name.__class__}", template_format="f-string")
|
||||
|
||||
@@ -1,10 +1,7 @@
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain_core.load import dump, loads
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.prompts.image import ImagePromptTemplate
|
||||
|
||||
|
||||
def test_image_prompt_template_deserializable() -> None:
|
||||
@@ -110,31 +107,3 @@ def test_image_prompt_template_deserializable_old() -> None:
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def test_image_prompt_template_rejects_attribute_access_in_template_values() -> None:
|
||||
with pytest.raises(ValueError, match="Variable names cannot contain attribute"):
|
||||
ImagePromptTemplate(
|
||||
input_variables=["image"],
|
||||
template={"url": "https://example.com/{image.__class__.__name__}.png"},
|
||||
)
|
||||
|
||||
|
||||
def test_image_prompt_template_deserialization_rejects_attribute_access() -> None:
|
||||
payload = json.dumps(
|
||||
{
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": ["langchain", "prompts", "image", "ImagePromptTemplate"],
|
||||
"kwargs": {
|
||||
"template": {
|
||||
"url": "https://example.com/{image.__class__.__name__}.png"
|
||||
},
|
||||
"input_variables": ["image"],
|
||||
"template_format": "f-string",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="Variable names cannot contain attribute"):
|
||||
loads(payload)
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
"""Test loading functionality."""
|
||||
|
||||
import json
|
||||
import os
|
||||
from collections.abc import Iterator
|
||||
from contextlib import contextmanager
|
||||
@@ -8,14 +7,8 @@ from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain_core._api import suppress_langchain_deprecation_warning
|
||||
from langchain_core.prompts.few_shot import FewShotPromptTemplate
|
||||
from langchain_core.prompts.loading import (
|
||||
_load_examples,
|
||||
_load_template,
|
||||
load_prompt,
|
||||
load_prompt_from_config,
|
||||
)
|
||||
from langchain_core.prompts.loading import load_prompt
|
||||
from langchain_core.prompts.prompt import PromptTemplate
|
||||
|
||||
EXAMPLE_DIR = (Path(__file__).parent.parent / "examples").absolute()
|
||||
@@ -34,8 +27,7 @@ def change_directory(dir_path: Path) -> Iterator[None]:
|
||||
|
||||
def test_loading_from_yaml() -> None:
|
||||
"""Test loading from yaml file."""
|
||||
with suppress_langchain_deprecation_warning():
|
||||
prompt = load_prompt(EXAMPLE_DIR / "simple_prompt.yaml")
|
||||
prompt = load_prompt(EXAMPLE_DIR / "simple_prompt.yaml")
|
||||
expected_prompt = PromptTemplate(
|
||||
input_variables=["adjective"],
|
||||
partial_variables={"content": "dogs"},
|
||||
@@ -46,8 +38,7 @@ def test_loading_from_yaml() -> None:
|
||||
|
||||
def test_loading_from_json() -> None:
|
||||
"""Test loading from json file."""
|
||||
with suppress_langchain_deprecation_warning():
|
||||
prompt = load_prompt(EXAMPLE_DIR / "simple_prompt.json")
|
||||
prompt = load_prompt(EXAMPLE_DIR / "simple_prompt.json")
|
||||
expected_prompt = PromptTemplate(
|
||||
input_variables=["adjective", "content"],
|
||||
template="Tell me a {adjective} joke about {content}.",
|
||||
@@ -58,20 +49,14 @@ def test_loading_from_json() -> None:
|
||||
def test_loading_jinja_from_json() -> None:
|
||||
"""Test that loading jinja2 format prompts from JSON raises ValueError."""
|
||||
prompt_path = EXAMPLE_DIR / "jinja_injection_prompt.json"
|
||||
with (
|
||||
suppress_langchain_deprecation_warning(),
|
||||
pytest.raises(ValueError, match=r".*can lead to arbitrary code execution.*"),
|
||||
):
|
||||
with pytest.raises(ValueError, match=r".*can lead to arbitrary code execution.*"):
|
||||
load_prompt(prompt_path)
|
||||
|
||||
|
||||
def test_loading_jinja_from_yaml() -> None:
|
||||
"""Test that loading jinja2 format prompts from YAML raises ValueError."""
|
||||
prompt_path = EXAMPLE_DIR / "jinja_injection_prompt.yaml"
|
||||
with (
|
||||
suppress_langchain_deprecation_warning(),
|
||||
pytest.raises(ValueError, match=r".*can lead to arbitrary code execution.*"),
|
||||
):
|
||||
with pytest.raises(ValueError, match=r".*can lead to arbitrary code execution.*"):
|
||||
load_prompt(prompt_path)
|
||||
|
||||
|
||||
@@ -81,9 +66,8 @@ def test_saving_loading_round_trip(tmp_path: Path) -> None:
|
||||
input_variables=["adjective", "content"],
|
||||
template="Tell me a {adjective} joke about {content}.",
|
||||
)
|
||||
with suppress_langchain_deprecation_warning():
|
||||
simple_prompt.save(file_path=tmp_path / "prompt.yaml")
|
||||
loaded_prompt = load_prompt(tmp_path / "prompt.yaml")
|
||||
simple_prompt.save(file_path=tmp_path / "prompt.yaml")
|
||||
loaded_prompt = load_prompt(tmp_path / "prompt.yaml")
|
||||
assert loaded_prompt == simple_prompt
|
||||
|
||||
few_shot_prompt = FewShotPromptTemplate(
|
||||
@@ -99,18 +83,15 @@ def test_saving_loading_round_trip(tmp_path: Path) -> None:
|
||||
],
|
||||
suffix="Input: {adjective}\nOutput:",
|
||||
)
|
||||
with suppress_langchain_deprecation_warning():
|
||||
few_shot_prompt.save(file_path=tmp_path / "few_shot.yaml")
|
||||
loaded_prompt = load_prompt(tmp_path / "few_shot.yaml")
|
||||
few_shot_prompt.save(file_path=tmp_path / "few_shot.yaml")
|
||||
loaded_prompt = load_prompt(tmp_path / "few_shot.yaml")
|
||||
assert loaded_prompt == few_shot_prompt
|
||||
|
||||
|
||||
def test_loading_with_template_as_file() -> None:
|
||||
"""Test loading when the template is a file."""
|
||||
with change_directory(EXAMPLE_DIR), suppress_langchain_deprecation_warning():
|
||||
prompt = load_prompt(
|
||||
"simple_prompt_with_template_file.json", allow_dangerous_paths=True
|
||||
)
|
||||
with change_directory(EXAMPLE_DIR):
|
||||
prompt = load_prompt("simple_prompt_with_template_file.json")
|
||||
expected_prompt = PromptTemplate(
|
||||
input_variables=["adjective", "content"],
|
||||
template="Tell me a {adjective} joke about {content}.",
|
||||
@@ -118,233 +99,10 @@ def test_loading_with_template_as_file() -> None:
|
||||
assert prompt == expected_prompt
|
||||
|
||||
|
||||
def test_load_template_rejects_absolute_path(tmp_path: Path) -> None:
|
||||
secret = tmp_path / "secret.txt"
|
||||
secret.write_text("SECRET")
|
||||
config = {"template_path": str(secret)}
|
||||
with pytest.raises(ValueError, match="is absolute"):
|
||||
_load_template("template", config)
|
||||
|
||||
|
||||
def test_load_template_rejects_traversal() -> None:
|
||||
config = {"template_path": "../../etc/secret.txt"}
|
||||
with pytest.raises(ValueError, match=r"contains '\.\.' components"):
|
||||
_load_template("template", config)
|
||||
|
||||
|
||||
def test_load_template_allows_dangerous_paths_when_opted_in(tmp_path: Path) -> None:
|
||||
secret = tmp_path / "secret.txt"
|
||||
secret.write_text("SECRET")
|
||||
config = {"template_path": str(secret)}
|
||||
result = _load_template("template", config, allow_dangerous_paths=True)
|
||||
assert result["template"] == "SECRET"
|
||||
|
||||
|
||||
def test_load_examples_rejects_absolute_path(tmp_path: Path) -> None:
|
||||
examples_file = tmp_path / "examples.json"
|
||||
examples_file.write_text(json.dumps([{"input": "a", "output": "b"}]))
|
||||
config = {"examples": str(examples_file)}
|
||||
with pytest.raises(ValueError, match="is absolute"):
|
||||
_load_examples(config)
|
||||
|
||||
|
||||
def test_load_examples_rejects_traversal() -> None:
|
||||
config = {"examples": "../../secrets/data.json"}
|
||||
with pytest.raises(ValueError, match=r"contains '\.\.' components"):
|
||||
_load_examples(config)
|
||||
|
||||
|
||||
def test_load_examples_allows_dangerous_paths_when_opted_in(tmp_path: Path) -> None:
|
||||
examples_file = tmp_path / "examples.json"
|
||||
examples_file.write_text(json.dumps([{"input": "a", "output": "b"}]))
|
||||
config = {"examples": str(examples_file)}
|
||||
result = _load_examples(config, allow_dangerous_paths=True)
|
||||
assert result["examples"] == [{"input": "a", "output": "b"}]
|
||||
|
||||
|
||||
def test_load_prompt_from_config_rejects_absolute_template_path(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
secret = tmp_path / "secret.txt"
|
||||
secret.write_text("SECRET")
|
||||
config = {
|
||||
"_type": "prompt",
|
||||
"template_path": str(secret),
|
||||
"input_variables": [],
|
||||
}
|
||||
with (
|
||||
suppress_langchain_deprecation_warning(),
|
||||
pytest.raises(ValueError, match="is absolute"),
|
||||
):
|
||||
load_prompt_from_config(config)
|
||||
|
||||
|
||||
def test_load_prompt_from_config_rejects_traversal_template_path() -> None:
|
||||
config = {
|
||||
"_type": "prompt",
|
||||
"template_path": "../../../tmp/secret.txt",
|
||||
"input_variables": [],
|
||||
}
|
||||
with (
|
||||
suppress_langchain_deprecation_warning(),
|
||||
pytest.raises(ValueError, match=r"contains '\.\.' components"),
|
||||
):
|
||||
load_prompt_from_config(config)
|
||||
|
||||
|
||||
def test_load_prompt_from_config_allows_dangerous_paths(tmp_path: Path) -> None:
|
||||
secret = tmp_path / "secret.txt"
|
||||
secret.write_text("SECRET")
|
||||
config = {
|
||||
"_type": "prompt",
|
||||
"template_path": str(secret),
|
||||
"input_variables": [],
|
||||
}
|
||||
with suppress_langchain_deprecation_warning():
|
||||
prompt = load_prompt_from_config(config, allow_dangerous_paths=True)
|
||||
assert isinstance(prompt, PromptTemplate)
|
||||
assert prompt.template == "SECRET"
|
||||
|
||||
|
||||
def test_load_prompt_from_config_few_shot_rejects_traversal_examples() -> None:
|
||||
config = {
|
||||
"_type": "few_shot",
|
||||
"input_variables": ["query"],
|
||||
"prefix": "Examples:",
|
||||
"example_prompt": {
|
||||
"_type": "prompt",
|
||||
"input_variables": ["input", "output"],
|
||||
"template": "{input}: {output}",
|
||||
},
|
||||
"examples": "../../../../.docker/config.json",
|
||||
"suffix": "Query: {query}",
|
||||
}
|
||||
with (
|
||||
suppress_langchain_deprecation_warning(),
|
||||
pytest.raises(ValueError, match=r"contains '\.\.' components"),
|
||||
):
|
||||
load_prompt_from_config(config)
|
||||
|
||||
|
||||
def test_load_prompt_from_config_few_shot_rejects_absolute_examples(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
examples_file = tmp_path / "examples.json"
|
||||
examples_file.write_text(json.dumps([{"input": "a", "output": "b"}]))
|
||||
config = {
|
||||
"_type": "few_shot",
|
||||
"input_variables": ["query"],
|
||||
"prefix": "Examples:",
|
||||
"example_prompt": {
|
||||
"_type": "prompt",
|
||||
"input_variables": ["input", "output"],
|
||||
"template": "{input}: {output}",
|
||||
},
|
||||
"examples": str(examples_file),
|
||||
"suffix": "Query: {query}",
|
||||
}
|
||||
with (
|
||||
suppress_langchain_deprecation_warning(),
|
||||
pytest.raises(ValueError, match="is absolute"),
|
||||
):
|
||||
load_prompt_from_config(config)
|
||||
|
||||
|
||||
def test_load_prompt_from_config_few_shot_rejects_absolute_example_prompt_path(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
prompt_file = tmp_path / "prompt.json"
|
||||
prompt_file.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"_type": "prompt",
|
||||
"template": "{input}: {output}",
|
||||
"input_variables": ["input", "output"],
|
||||
}
|
||||
)
|
||||
)
|
||||
config = {
|
||||
"_type": "few_shot",
|
||||
"input_variables": ["query"],
|
||||
"prefix": "Examples:",
|
||||
"example_prompt_path": str(prompt_file),
|
||||
"examples": [{"input": "a", "output": "b"}],
|
||||
"suffix": "Query: {query}",
|
||||
}
|
||||
with (
|
||||
suppress_langchain_deprecation_warning(),
|
||||
pytest.raises(ValueError, match="is absolute"),
|
||||
):
|
||||
load_prompt_from_config(config)
|
||||
|
||||
|
||||
def test_symlink_txt_to_py_is_blocked(tmp_path: Path) -> None:
|
||||
"""Test symlink redirects cannot get around file extension check."""
|
||||
sensitive = tmp_path / "sensitive_source.py"
|
||||
sensitive.write_text("INTERNAL_SECRET='ABC-123-XYZ'")
|
||||
symlink = tmp_path / "exploit_link.txt"
|
||||
symlink.symlink_to(sensitive)
|
||||
|
||||
config = {
|
||||
"_type": "prompt",
|
||||
"template_path": "exploit_link.txt",
|
||||
"input_variables": [],
|
||||
}
|
||||
original_dir = Path.cwd()
|
||||
try:
|
||||
os.chdir(tmp_path)
|
||||
with (
|
||||
suppress_langchain_deprecation_warning(),
|
||||
pytest.raises(ValueError), # noqa: PT011
|
||||
):
|
||||
load_prompt_from_config(config)
|
||||
finally:
|
||||
os.chdir(original_dir)
|
||||
|
||||
|
||||
def test_symlink_jinja2_rce_is_blocked(tmp_path: Path) -> None:
|
||||
"""Check jinja2 templates cannot be used to perform RCE via symlinks."""
|
||||
payload = tmp_path / "rce_payload.py"
|
||||
payload.write_text(
|
||||
"{{ self.__init__.__globals__.__builtins__"
|
||||
".__import__('os').popen('id').read() }}"
|
||||
)
|
||||
symlink = tmp_path / "rce_bypass.txt"
|
||||
symlink.symlink_to(payload)
|
||||
|
||||
config = {
|
||||
"_type": "prompt",
|
||||
"template_path": str(symlink),
|
||||
"template_format": "jinja2",
|
||||
"input_variables": [],
|
||||
}
|
||||
with (
|
||||
suppress_langchain_deprecation_warning(),
|
||||
pytest.raises(ValueError), # noqa: PT011
|
||||
):
|
||||
load_prompt_from_config(config, allow_dangerous_paths=True)
|
||||
|
||||
|
||||
def test_save_symlink_to_py_is_blocked(tmp_path: Path) -> None:
|
||||
"""Test that save() resolves symlinks before checking the file extension."""
|
||||
target = tmp_path / "malicious.py"
|
||||
symlink = tmp_path / "output.json"
|
||||
symlink.symlink_to(target)
|
||||
|
||||
prompt = PromptTemplate(input_variables=["name"], template="Hello {name}")
|
||||
with (
|
||||
suppress_langchain_deprecation_warning(),
|
||||
pytest.raises(ValueError, match="must be json or yaml"),
|
||||
):
|
||||
prompt.save(symlink)
|
||||
|
||||
assert not target.exists()
|
||||
|
||||
|
||||
def test_loading_few_shot_prompt_from_yaml() -> None:
|
||||
"""Test loading few shot prompt from yaml."""
|
||||
with change_directory(EXAMPLE_DIR), suppress_langchain_deprecation_warning():
|
||||
prompt = load_prompt("few_shot_prompt.yaml", allow_dangerous_paths=True)
|
||||
with change_directory(EXAMPLE_DIR):
|
||||
prompt = load_prompt("few_shot_prompt.yaml")
|
||||
expected_prompt = FewShotPromptTemplate(
|
||||
input_variables=["adjective"],
|
||||
prefix="Write antonyms for the following words.",
|
||||
@@ -363,8 +121,8 @@ def test_loading_few_shot_prompt_from_yaml() -> None:
|
||||
|
||||
def test_loading_few_shot_prompt_from_json() -> None:
|
||||
"""Test loading few shot prompt from json."""
|
||||
with change_directory(EXAMPLE_DIR), suppress_langchain_deprecation_warning():
|
||||
prompt = load_prompt("few_shot_prompt.json", allow_dangerous_paths=True)
|
||||
with change_directory(EXAMPLE_DIR):
|
||||
prompt = load_prompt("few_shot_prompt.json")
|
||||
expected_prompt = FewShotPromptTemplate(
|
||||
input_variables=["adjective"],
|
||||
prefix="Write antonyms for the following words.",
|
||||
@@ -383,10 +141,8 @@ def test_loading_few_shot_prompt_from_json() -> None:
|
||||
|
||||
def test_loading_few_shot_prompt_when_examples_in_config() -> None:
|
||||
"""Test loading few shot prompt when the examples are in the config."""
|
||||
with change_directory(EXAMPLE_DIR), suppress_langchain_deprecation_warning():
|
||||
prompt = load_prompt(
|
||||
"few_shot_prompt_examples_in.json", allow_dangerous_paths=True
|
||||
)
|
||||
with change_directory(EXAMPLE_DIR):
|
||||
prompt = load_prompt("few_shot_prompt_examples_in.json")
|
||||
expected_prompt = FewShotPromptTemplate(
|
||||
input_variables=["adjective"],
|
||||
prefix="Write antonyms for the following words.",
|
||||
@@ -405,10 +161,8 @@ def test_loading_few_shot_prompt_when_examples_in_config() -> None:
|
||||
|
||||
def test_loading_few_shot_prompt_example_prompt() -> None:
|
||||
"""Test loading few shot when the example prompt is in its own file."""
|
||||
with change_directory(EXAMPLE_DIR), suppress_langchain_deprecation_warning():
|
||||
prompt = load_prompt(
|
||||
"few_shot_prompt_example_prompt.json", allow_dangerous_paths=True
|
||||
)
|
||||
with change_directory(EXAMPLE_DIR):
|
||||
prompt = load_prompt("few_shot_prompt_example_prompt.json")
|
||||
expected_prompt = FewShotPromptTemplate(
|
||||
input_variables=["adjective"],
|
||||
prefix="Write antonyms for the following words.",
|
||||
|
||||
@@ -1,12 +1,7 @@
|
||||
import pytest
|
||||
from packaging import version
|
||||
|
||||
from langchain_core.prompts.string import (
|
||||
check_valid_template,
|
||||
get_template_variables,
|
||||
mustache_schema,
|
||||
)
|
||||
from langchain_core.utils.formatting import formatter
|
||||
from langchain_core.prompts.string import get_template_variables, mustache_schema
|
||||
from langchain_core.utils.pydantic import PYDANTIC_VERSION
|
||||
|
||||
PYDANTIC_VERSION_AT_LEAST_29 = version.parse("2.9") <= PYDANTIC_VERSION
|
||||
@@ -44,47 +39,3 @@ def test_get_template_variables_mustache_nested() -> None:
|
||||
expected = ["user"]
|
||||
actual = get_template_variables(template, template_format)
|
||||
assert actual == expected
|
||||
|
||||
|
||||
def test_get_template_variables_rejects_nested_replacement_field_in_format_spec() -> (
|
||||
None
|
||||
):
|
||||
template = "{name:{name.__class__.__name__}}"
|
||||
|
||||
with pytest.raises(ValueError, match="Nested replacement fields are not allowed"):
|
||||
get_template_variables(template, "f-string")
|
||||
|
||||
|
||||
def test_formatter_rejects_nested_replacement_field_in_format_spec() -> None:
|
||||
template = "{name:{name.__class__.__name__}}"
|
||||
|
||||
with pytest.raises(ValueError, match="Invalid format specifier"):
|
||||
formatter.format(template, name="hello")
|
||||
|
||||
|
||||
def test_check_valid_template_rejects_nested_replacement_field_in_format_spec() -> None:
|
||||
template = "{name:{name.__class__.__name__}}"
|
||||
|
||||
with pytest.raises(ValueError, match="Nested replacement fields are not allowed"):
|
||||
check_valid_template(template, "f-string", ["name"])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("template", "kwargs", "expected_variables", "expected_output"),
|
||||
[
|
||||
("{value:.2f}", {"value": 3.14159}, ["value"], "3.14"),
|
||||
("{value:>10}", {"value": "cat"}, ["value"], " cat"),
|
||||
("{value:*^10}", {"value": "cat"}, ["value"], "***cat****"),
|
||||
("{value:,}", {"value": 1234567}, ["value"], "1,234,567"),
|
||||
("{value:%}", {"value": 0.125}, ["value"], "12.500000%"),
|
||||
("{value!r}", {"value": "cat"}, ["value"], "'cat'"),
|
||||
],
|
||||
)
|
||||
def test_f_string_templates_allow_safe_format_specs(
|
||||
template: str,
|
||||
kwargs: dict[str, object],
|
||||
expected_variables: list[str],
|
||||
expected_output: str,
|
||||
) -> None:
|
||||
assert get_template_variables(template, "f-string") == expected_variables
|
||||
assert formatter.format(template, **kwargs) == expected_output
|
||||
|
||||
@@ -16,7 +16,6 @@ from langchain_core.callbacks.streaming_stdout import StreamingStdOutCallbackHan
|
||||
from langchain_core.runnables import RunnableBinding, RunnablePassthrough
|
||||
from langchain_core.runnables.config import (
|
||||
RunnableConfig,
|
||||
_get_langsmith_inheritable_metadata_from_config,
|
||||
_set_config_context,
|
||||
ensure_config,
|
||||
merge_configs,
|
||||
@@ -62,7 +61,7 @@ def test_ensure_config() -> None:
|
||||
assert config["configurable"] is not arg["configurable"]
|
||||
assert config == {
|
||||
"tags": ["tag1", "tag2"],
|
||||
"metadata": {"foo": "bar"},
|
||||
"metadata": {"foo": "bar", "baz": "qux", "something": "else"},
|
||||
"callbacks": [arg["callbacks"][0]],
|
||||
"recursion_limit": 100,
|
||||
"configurable": {"baz": "qux", "something": "else"},
|
||||
@@ -72,164 +71,6 @@ def test_ensure_config() -> None:
|
||||
}
|
||||
|
||||
|
||||
def test_ensure_config_copies_model_to_metadata() -> None:
|
||||
config = ensure_config(
|
||||
{
|
||||
"configurable": {
|
||||
"thread_id": "th-123",
|
||||
"checkpoint_id": "ckpt-1",
|
||||
"checkpoint_ns": "ns-1",
|
||||
"task_id": "task-1",
|
||||
"run_id": "run-456",
|
||||
"assistant_id": "asst-789",
|
||||
"graph_id": "graph-0",
|
||||
"model": "gpt-4o",
|
||||
"user_id": "uid-1",
|
||||
"cron_id": "cron-1",
|
||||
"langgraph_auth_user_id": "user-1",
|
||||
"some_api_key": "opaque-token",
|
||||
"custom_setting": {"nested": True},
|
||||
"none_value": None,
|
||||
},
|
||||
"metadata": {"nooverride": 18},
|
||||
}
|
||||
)
|
||||
|
||||
assert config["metadata"] == {
|
||||
"nooverride": 18,
|
||||
"model": "gpt-4o",
|
||||
"checkpoint_ns": "ns-1",
|
||||
}
|
||||
assert config["configurable"] == {
|
||||
"thread_id": "th-123",
|
||||
"checkpoint_id": "ckpt-1",
|
||||
"checkpoint_ns": "ns-1",
|
||||
"task_id": "task-1",
|
||||
"run_id": "run-456",
|
||||
"assistant_id": "asst-789",
|
||||
"graph_id": "graph-0",
|
||||
"model": "gpt-4o",
|
||||
"user_id": "uid-1",
|
||||
"cron_id": "cron-1",
|
||||
"langgraph_auth_user_id": "user-1",
|
||||
"some_api_key": "opaque-token",
|
||||
"custom_setting": {"nested": True},
|
||||
"none_value": None,
|
||||
}
|
||||
|
||||
|
||||
def test_ensure_config_metadata_is_not_overridden_by_configurable_model() -> None:
|
||||
config = ensure_config(
|
||||
{
|
||||
"configurable": {
|
||||
"model": "from-configurable",
|
||||
"run_id": None,
|
||||
"checkpoint_ns": "from-configurable",
|
||||
},
|
||||
"metadata": {
|
||||
"model": "from-metadata",
|
||||
"run_id": "from-metadata",
|
||||
"checkpoint_ns": "from-metadata",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
assert config["metadata"] == {
|
||||
"model": "from-metadata",
|
||||
"run_id": "from-metadata",
|
||||
"checkpoint_ns": "from-metadata",
|
||||
}
|
||||
assert config["configurable"] == {
|
||||
"model": "from-configurable",
|
||||
"run_id": None,
|
||||
"checkpoint_ns": "from-configurable",
|
||||
}
|
||||
|
||||
|
||||
def test_ensure_config_copies_top_level_model_to_metadata() -> None:
|
||||
config = ensure_config(
|
||||
cast(
|
||||
"RunnableConfig",
|
||||
{
|
||||
"model": "gpt-4o",
|
||||
"metadata": {"nooverride": 18},
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
assert config["metadata"] == {"nooverride": 18, "model": "gpt-4o"}
|
||||
assert config["configurable"] == {"model": "gpt-4o"}
|
||||
|
||||
|
||||
def test_ensure_config_copies_top_level_checkpoint_ns_to_metadata() -> None:
|
||||
config = ensure_config(
|
||||
cast(
|
||||
"RunnableConfig",
|
||||
{
|
||||
"checkpoint_ns": "ns-1",
|
||||
"metadata": {"nooverride": 18},
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
assert config["metadata"] == {"nooverride": 18, "checkpoint_ns": "ns-1"}
|
||||
assert config["configurable"] == {"checkpoint_ns": "ns-1"}
|
||||
|
||||
|
||||
def test_get_langsmith_inheritable_metadata_from_config_uses_previous_copy_rules() -> (
|
||||
None
|
||||
):
|
||||
config = ensure_config(
|
||||
cast(
|
||||
"RunnableConfig",
|
||||
{
|
||||
"something": "else",
|
||||
"metadata": {
|
||||
"foo": "bar",
|
||||
"model": "from-metadata",
|
||||
"checkpoint_ns": "from-metadata",
|
||||
},
|
||||
"configurable": {
|
||||
"baz": "qux",
|
||||
"thread_id": "th-123",
|
||||
"checkpoint_id": "ckpt-1",
|
||||
"checkpoint_ns": "from-configurable",
|
||||
"task_id": "task-1",
|
||||
"run_id": "run-456",
|
||||
"assistant_id": "asst-789",
|
||||
"graph_id": "graph-0",
|
||||
"model": "from-configurable",
|
||||
"user_id": "uid-1",
|
||||
"cron_id": "cron-1",
|
||||
"langgraph_auth_user_id": "user-1",
|
||||
"api_key": "should-not-propagate",
|
||||
"__secret_key": "should-not-propagate",
|
||||
"temperature": 0.5,
|
||||
"streaming": True,
|
||||
"custom_setting": {"nested": True},
|
||||
"none_value": None,
|
||||
},
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
assert _get_langsmith_inheritable_metadata_from_config(config) == {
|
||||
"something": "else",
|
||||
"baz": "qux",
|
||||
"thread_id": "th-123",
|
||||
"checkpoint_id": "ckpt-1",
|
||||
"task_id": "task-1",
|
||||
"run_id": "run-456",
|
||||
"assistant_id": "asst-789",
|
||||
"graph_id": "graph-0",
|
||||
"user_id": "uid-1",
|
||||
"cron_id": "cron-1",
|
||||
"langgraph_auth_user_id": "user-1",
|
||||
"temperature": 0.5,
|
||||
"streaming": True,
|
||||
}
|
||||
|
||||
|
||||
async def test_merge_config_callbacks() -> None:
|
||||
manager: RunnableConfig = {
|
||||
"callbacks": CallbackManager(handlers=[StdOutCallbackHandler()])
|
||||
|
||||
@@ -1162,7 +1162,7 @@ async def test_with_config_metadata_passthrough(mocker: MockerFixture) -> None:
|
||||
"callbacks": None,
|
||||
"recursion_limit": 25,
|
||||
"configurable": {"hello": "there", "__secret_key": "nahnah"},
|
||||
"metadata": {"bye": "now"},
|
||||
"metadata": {"hello": "there", "bye": "now"},
|
||||
},
|
||||
)
|
||||
spy.reset_mock()
|
||||
|
||||
@@ -2843,7 +2843,7 @@ async def test_tool_error_event_includes_tool_call_id() -> None:
|
||||
"""Test that on_tool_error event includes tool_call_id when provided."""
|
||||
|
||||
@tool
|
||||
def failing_tool(x: int) -> str:
|
||||
def failing_tool(x: int) -> str: # noqa: ARG001
|
||||
"""A tool that always fails."""
|
||||
msg = "Tool execution failed"
|
||||
raise ValueError(msg)
|
||||
@@ -2883,7 +2883,7 @@ async def test_tool_error_event_tool_call_id_is_none_when_not_provided() -> None
|
||||
"""Test that on_tool_error event has tool_call_id=None when not provided."""
|
||||
|
||||
@tool
|
||||
def failing_tool_no_id(x: int) -> str:
|
||||
def failing_tool_no_id(x: int) -> str: # noqa: ARG001
|
||||
"""A tool that always fails."""
|
||||
msg = "Tool execution failed"
|
||||
raise ValueError(msg)
|
||||
|
||||
@@ -1,10 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import concurrent.futures
|
||||
import json
|
||||
import sys
|
||||
import threading
|
||||
import uuid
|
||||
from inspect import isasyncgenfunction
|
||||
from typing import TYPE_CHECKING, Any, Literal
|
||||
@@ -15,15 +12,13 @@ from langsmith import Client, RunTree, get_current_run_tree, traceable
|
||||
from langsmith.run_helpers import tracing_context
|
||||
from langsmith.utils import get_env_var
|
||||
|
||||
from langchain_core.callbacks.base import BaseCallbackHandler
|
||||
from langchain_core.callbacks.manager import CallbackManager
|
||||
from langchain_core.runnables.base import RunnableLambda, RunnableParallel
|
||||
from langchain_core.tracers.langchain import LangChainTracer
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import AsyncGenerator, Callable, Coroutine, Generator, Mapping
|
||||
from collections.abc import AsyncGenerator, Callable, Coroutine, Generator
|
||||
|
||||
from langchain_core.runnables.config import RunnableConfig
|
||||
from langchain_core.callbacks import BaseCallbackHandler
|
||||
|
||||
|
||||
def _get_posts(client: Client) -> list[dict[str, Any]]:
|
||||
@@ -48,15 +43,12 @@ def _get_posts(client: Client) -> list[dict[str, Any]]:
|
||||
def _create_tracer_with_mocked_client(
|
||||
project_name: str | None = None,
|
||||
tags: list[str] | None = None,
|
||||
metadata: Mapping[str, str] | None = None,
|
||||
) -> LangChainTracer:
|
||||
mock_session = MagicMock()
|
||||
mock_client_ = Client(
|
||||
session=mock_session, api_key="test", auto_batch_tracing=False
|
||||
)
|
||||
return LangChainTracer(
|
||||
client=mock_client_, project_name=project_name, tags=tags, metadata=metadata
|
||||
)
|
||||
return LangChainTracer(client=mock_client_, project_name=project_name, tags=tags)
|
||||
|
||||
|
||||
def test_tracing_context() -> None:
|
||||
@@ -83,38 +75,6 @@ def test_tracing_context() -> None:
|
||||
assert all(post["session_name"] == project_name for post in posts)
|
||||
|
||||
|
||||
def test_inheritable_metadata_respects_explicit_metadata_with_tracing_context() -> None:
|
||||
"""Tracer defaults fill missing keys while run metadata keeps precedence."""
|
||||
tracer = _create_tracer_with_mocked_client()
|
||||
|
||||
@RunnableLambda
|
||||
def my_func(x: int) -> int:
|
||||
return x
|
||||
|
||||
callbacks = CallbackManager.configure(
|
||||
inheritable_callbacks=[tracer],
|
||||
langsmith_inheritable_metadata={
|
||||
"tenant": "from_tracer",
|
||||
"shared": "from_tracer",
|
||||
},
|
||||
)
|
||||
with tracing_context(enabled=True, client=tracer.client):
|
||||
my_func.invoke(
|
||||
1,
|
||||
{
|
||||
"callbacks": callbacks,
|
||||
"metadata": {"shared": "from_run", "explicit": "from_run"},
|
||||
},
|
||||
)
|
||||
|
||||
posts = _get_posts(tracer.client)
|
||||
assert len(posts) == 1
|
||||
metadata = posts[0].get("extra", {}).get("metadata", {})
|
||||
assert metadata["tenant"] == "from_tracer"
|
||||
assert metadata["shared"] == "from_run"
|
||||
assert metadata["explicit"] == "from_run"
|
||||
|
||||
|
||||
def test_config_traceable_handoff() -> None:
|
||||
if hasattr(get_env_var, "cache_clear"):
|
||||
get_env_var.cache_clear() # type: ignore[attr-defined]
|
||||
@@ -506,10 +466,7 @@ def test_tree_is_constructed(parent_type: Literal["ls", "lc"]) -> None:
|
||||
):
|
||||
collected: dict[str, RunTree] = {}
|
||||
|
||||
def collect_langsmith_run(run: RunTree) -> None:
|
||||
collected[str(run.id)] = run
|
||||
|
||||
def collect_tracer_run(_: LangChainTracer, run: RunTree) -> None:
|
||||
def collect_run(run: RunTree) -> None:
|
||||
collected[str(run.id)] = run
|
||||
|
||||
if parent_type == "ls":
|
||||
@@ -519,8 +476,7 @@ def test_tree_is_constructed(parent_type: Literal["ls", "lc"]) -> None:
|
||||
return child.invoke("foo")
|
||||
|
||||
assert (
|
||||
parent(langsmith_extra={"on_end": collect_langsmith_run, "run_id": rid})
|
||||
== "foo"
|
||||
parent(langsmith_extra={"on_end": collect_run, "run_id": rid}) == "foo"
|
||||
)
|
||||
assert collected
|
||||
|
||||
@@ -531,10 +487,9 @@ def test_tree_is_constructed(parent_type: Literal["ls", "lc"]) -> None:
|
||||
return child.invoke("foo")
|
||||
|
||||
tracer = LangChainTracer()
|
||||
with patch.object(LangChainTracer, "_persist_run", new=collect_tracer_run):
|
||||
assert (
|
||||
parent.invoke(..., {"run_id": rid, "callbacks": [tracer]}) == "foo" # type: ignore[attr-defined]
|
||||
)
|
||||
tracer._persist_run = collect_run # type: ignore[method-assign]
|
||||
|
||||
assert parent.invoke(..., {"run_id": rid, "callbacks": [tracer]}) == "foo" # type: ignore[attr-defined]
|
||||
run = collected.get(str(rid))
|
||||
|
||||
assert run is not None
|
||||
@@ -553,749 +508,3 @@ def test_tree_is_constructed(parent_type: Literal["ls", "lc"]) -> None:
|
||||
assert "afoo" in kitten_run.tags # type: ignore[operator]
|
||||
assert grandchild_run is not None
|
||||
assert kitten_run.dotted_order.startswith(grandchild_run.dotted_order)
|
||||
|
||||
|
||||
def test_traceable_parent_run_map_cleanup() -> None:
|
||||
"""External RunTree injected into run_map is cleaned up when its child ends.
|
||||
|
||||
When a `@traceable` function invokes a LangChain `Runnable`, the
|
||||
`RunTree` is added to the tracer's `run_map` so child runs can
|
||||
reference it. Previously the entry was never removed, causing a
|
||||
memory leak that grew with every call.
|
||||
|
||||
Uses an explicit tracer so we can inspect `run_map` directly after
|
||||
the call — the `_configure` insertion path is identical regardless
|
||||
of whether the tracer was created internally or passed in.
|
||||
"""
|
||||
tracer = _create_tracer_with_mocked_client()
|
||||
|
||||
@RunnableLambda
|
||||
def child(x: str) -> str:
|
||||
return x
|
||||
|
||||
with tracing_context(client=tracer.client, enabled=True):
|
||||
|
||||
@traceable
|
||||
def parent(x: str) -> str:
|
||||
return child.invoke(x, config={"callbacks": [tracer]})
|
||||
|
||||
parent("hello")
|
||||
|
||||
assert tracer.run_map == {}, (
|
||||
f"run_map should be empty but contains: "
|
||||
f"{[getattr(v, 'name', k) for k, v in tracer.run_map.items()]}"
|
||||
)
|
||||
|
||||
|
||||
def test_traceable_parent_run_map_cleanup_with_sibling_children() -> None:
|
||||
"""External parent survives in run_map until ALL its children finish.
|
||||
|
||||
When a `@traceable` function invokes a chain with multiple steps
|
||||
(e.g. prompt | llm), each step is a sibling child of the same
|
||||
intermediate run. The external parent must stay in `run_map` until
|
||||
the last child completes, not be removed when the first child ends.
|
||||
"""
|
||||
from langchain_core.language_models.fake_chat_models import ( # noqa: PLC0415
|
||||
FakeListChatModel,
|
||||
)
|
||||
from langchain_core.prompts import ChatPromptTemplate # noqa: PLC0415
|
||||
|
||||
tracer = _create_tracer_with_mocked_client()
|
||||
|
||||
prompt = ChatPromptTemplate.from_messages([("system", "bot"), ("human", "{input}")])
|
||||
llm = FakeListChatModel(responses=["hi"])
|
||||
chain = prompt | llm
|
||||
|
||||
with tracing_context(client=tracer.client, enabled=True):
|
||||
|
||||
@traceable
|
||||
def parent(x: dict) -> Any:
|
||||
return chain.invoke(x, config={"callbacks": [tracer]})
|
||||
|
||||
result = parent({"input": "hello"})
|
||||
|
||||
assert result is not None
|
||||
assert tracer.run_map == {}, (
|
||||
f"run_map should be empty but contains: "
|
||||
f"{[getattr(v, 'name', k) for k, v in tracer.run_map.items()]}"
|
||||
)
|
||||
|
||||
|
||||
def test_traceable_parent_run_map_no_runttree_accumulation() -> None:
|
||||
"""RunTree objects reachable from run_map must not grow across calls.
|
||||
|
||||
This is the memory-level regression test: a long-lived tracer is
|
||||
reused across many @traceable → Runnable invocations. Without the
|
||||
fix, each call leaves a RunTree (plus its child tree) in run_map,
|
||||
causing unbounded growth. With the fix, run_map is empty after
|
||||
every call, so the count stays flat.
|
||||
"""
|
||||
import gc # noqa: PLC0415
|
||||
|
||||
tracer = _create_tracer_with_mocked_client()
|
||||
|
||||
@RunnableLambda
|
||||
def child(x: str) -> str:
|
||||
return x
|
||||
|
||||
counts: list[int] = []
|
||||
with tracing_context(client=tracer.client, enabled=True):
|
||||
|
||||
@traceable
|
||||
def parent(x: str) -> str:
|
||||
return child.invoke(x, config={"callbacks": [tracer]})
|
||||
|
||||
for _ in range(5):
|
||||
parent("hello")
|
||||
gc.collect()
|
||||
# Count RunTree objects reachable from the tracer's run_map.
|
||||
run_map_runtrees = sum(
|
||||
1 + len(v.child_runs) for v in tracer.run_map.values()
|
||||
)
|
||||
counts.append(run_map_runtrees)
|
||||
|
||||
# With the fix every call cleans up → counts are all 0.
|
||||
# Without the fix they grow: [1, 2, 3, 4, 5] (or more with children).
|
||||
assert counts == [0, 0, 0, 0, 0], (
|
||||
f"RunTree objects in run_map should not accumulate, got counts: {counts}"
|
||||
)
|
||||
|
||||
|
||||
class TestTracerMetadataThroughInvoke:
|
||||
"""Tests for tracer metadata merging through invoke calls."""
|
||||
|
||||
def test_tracer_metadata_applied_to_all_runs(self) -> None:
|
||||
"""Tracer metadata appears on every run when no config metadata is set."""
|
||||
tracer = _create_tracer_with_mocked_client(
|
||||
metadata={"env": "prod", "service": "api"}
|
||||
)
|
||||
|
||||
@RunnableLambda
|
||||
def child(x: int) -> int:
|
||||
return x + 1
|
||||
|
||||
@RunnableLambda
|
||||
def parent(x: int) -> int:
|
||||
return child.invoke(x)
|
||||
|
||||
parent.invoke(1, {"callbacks": [tracer]})
|
||||
|
||||
posts = _get_posts(tracer.client)
|
||||
assert len(posts) == 2
|
||||
for post in posts:
|
||||
md = post.get("extra", {}).get("metadata", {})
|
||||
assert md.get("env") == "prod", f"run {post['name']} missing env"
|
||||
assert md.get("service") == "api", f"run {post['name']} missing service"
|
||||
|
||||
def test_config_metadata_takes_precedence(self) -> None:
|
||||
"""Config metadata wins over tracer metadata for overlapping keys."""
|
||||
tracer = _create_tracer_with_mocked_client(
|
||||
metadata={"env": "prod", "tracer_only": "yes"}
|
||||
)
|
||||
|
||||
@RunnableLambda
|
||||
def my_func(x: int) -> int:
|
||||
return x
|
||||
|
||||
my_func.invoke(
|
||||
1,
|
||||
{
|
||||
"callbacks": [tracer],
|
||||
"metadata": {"env": "staging", "config_only": "yes"},
|
||||
},
|
||||
)
|
||||
|
||||
posts = _get_posts(tracer.client)
|
||||
assert len(posts) == 1
|
||||
md = posts[0].get("extra", {}).get("metadata", {})
|
||||
# Config wins for overlapping key
|
||||
assert md["env"] == "staging"
|
||||
# Both non-overlapping keys are present
|
||||
assert md["tracer_only"] == "yes"
|
||||
assert md["config_only"] == "yes"
|
||||
|
||||
def test_nested_calls_inherit_config_metadata(self) -> None:
|
||||
"""Child runs inherit config metadata; tracer metadata fills gaps."""
|
||||
tracer = _create_tracer_with_mocked_client(
|
||||
metadata={"tracer_key": "tracer_val"}
|
||||
)
|
||||
|
||||
@RunnableLambda
|
||||
def child(x: int) -> int:
|
||||
return x + 1
|
||||
|
||||
@RunnableLambda
|
||||
def parent(x: int) -> int:
|
||||
return child.invoke(x)
|
||||
|
||||
parent.invoke(
|
||||
1,
|
||||
{
|
||||
"callbacks": [tracer],
|
||||
"metadata": {"config_key": "config_val"},
|
||||
},
|
||||
)
|
||||
|
||||
posts = _get_posts(tracer.client)
|
||||
assert len(posts) == 2
|
||||
name_to_md = {
|
||||
post["name"]: post.get("extra", {}).get("metadata", {}) for post in posts
|
||||
}
|
||||
# Both parent and child should have config metadata (inherited)
|
||||
# and tracer metadata (patched in)
|
||||
for name, md in name_to_md.items():
|
||||
assert md.get("config_key") == "config_val", f"{name} missing config_key"
|
||||
assert md.get("tracer_key") == "tracer_val", f"{name} missing tracer_key"
|
||||
|
||||
def test_tracer_metadata_not_applied_to_sibling_handlers(self) -> None:
|
||||
"""Tracer metadata is not applied to other callback handlers.
|
||||
|
||||
`_patch_missing_metadata` copies the metadata dict before patching,
|
||||
so the callback manager's shared metadata dict is not mutated.
|
||||
Other handlers should only see config metadata, not tracer metadata.
|
||||
"""
|
||||
tracer = _create_tracer_with_mocked_client(
|
||||
metadata={"tracer_key": "tracer_val"}
|
||||
)
|
||||
|
||||
received_metadata: list[dict[str, Any]] = []
|
||||
|
||||
class MetadataCapture(BaseCallbackHandler):
|
||||
"""Callback handler that records metadata from chain events."""
|
||||
|
||||
def on_chain_start(self, *_args: Any, **kwargs: Any) -> None:
|
||||
received_metadata.append(dict(kwargs.get("metadata", {})))
|
||||
|
||||
capture = MetadataCapture()
|
||||
|
||||
@RunnableLambda
|
||||
def my_func(x: int) -> int:
|
||||
return x
|
||||
|
||||
my_func.invoke(
|
||||
1,
|
||||
{
|
||||
"callbacks": [tracer, capture],
|
||||
"metadata": {"shared_key": "shared_val"},
|
||||
},
|
||||
)
|
||||
|
||||
assert len(received_metadata) >= 1
|
||||
for md in received_metadata:
|
||||
assert md["shared_key"] == "shared_val"
|
||||
assert "tracer_key" not in md
|
||||
|
||||
# But the posted run DOES have tracer metadata
|
||||
posts = _get_posts(tracer.client)
|
||||
assert len(posts) >= 1
|
||||
for post in posts:
|
||||
post_md = post.get("extra", {}).get("metadata", {})
|
||||
assert post_md["shared_key"] == "shared_val"
|
||||
assert post_md["tracer_key"] == "tracer_val"
|
||||
|
||||
def test_tracer_metadata_with_no_config_metadata(self) -> None:
|
||||
"""When no config metadata is set, tracer metadata is the sole source."""
|
||||
tracer = _create_tracer_with_mocked_client(
|
||||
metadata={"only_from_tracer": "value"}
|
||||
)
|
||||
|
||||
@RunnableLambda
|
||||
def my_func(x: int) -> int:
|
||||
return x
|
||||
|
||||
my_func.invoke(1, {"callbacks": [tracer]})
|
||||
|
||||
posts = _get_posts(tracer.client)
|
||||
assert len(posts) == 1
|
||||
md = posts[0].get("extra", {}).get("metadata", {})
|
||||
assert md["only_from_tracer"] == "value"
|
||||
|
||||
def test_empty_tracer_metadata_does_not_interfere(self) -> None:
|
||||
"""Tracer with no metadata does not interfere with config metadata."""
|
||||
tracer = _create_tracer_with_mocked_client(metadata=None)
|
||||
|
||||
@RunnableLambda
|
||||
def my_func(x: int) -> int:
|
||||
return x
|
||||
|
||||
my_func.invoke(
|
||||
1,
|
||||
{"callbacks": [tracer], "metadata": {"config_key": "config_val"}},
|
||||
)
|
||||
|
||||
posts = _get_posts(tracer.client)
|
||||
assert len(posts) == 1
|
||||
md = posts[0].get("extra", {}).get("metadata", {})
|
||||
assert md["config_key"] == "config_val"
|
||||
|
||||
|
||||
def test_inheritable_metadata_nested_runs_preserve_parent_child_shape() -> None:
|
||||
"""Concurrent nested runs keep parent-child linkage within each invocation."""
|
||||
tracer = _create_tracer_with_mocked_client()
|
||||
barrier = threading.Barrier(2)
|
||||
|
||||
@RunnableLambda
|
||||
def child(x: int) -> int:
|
||||
barrier.wait()
|
||||
return x + 1
|
||||
|
||||
@RunnableLambda
|
||||
def parent(x: int) -> int:
|
||||
return child.invoke(x)
|
||||
|
||||
def invoke_for_tenant(tenant: str, value: int) -> int:
|
||||
callbacks = CallbackManager.configure(
|
||||
inheritable_callbacks=[tracer],
|
||||
langsmith_inheritable_metadata={"tenant": tenant},
|
||||
)
|
||||
return parent.invoke(value, {"callbacks": callbacks})
|
||||
|
||||
threads = [
|
||||
threading.Thread(target=invoke_for_tenant, args=("alpha", 1)),
|
||||
threading.Thread(target=invoke_for_tenant, args=("beta", 2)),
|
||||
]
|
||||
for thread in threads:
|
||||
thread.start()
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
|
||||
posts = _get_posts(tracer.client)
|
||||
assert len(posts) == 4
|
||||
parents = [post for post in posts if post["name"] == "parent"]
|
||||
children = [post for post in posts if post["name"] == "child"]
|
||||
assert len(parents) == 2
|
||||
assert len(children) == 2
|
||||
parent_ids = {parent["id"] for parent in parents}
|
||||
assert {child["parent_run_id"] for child in children} == parent_ids
|
||||
assert {
|
||||
post.get("extra", {}).get("metadata", {}).get("tenant") for post in posts
|
||||
} == {
|
||||
"alpha",
|
||||
"beta",
|
||||
}
|
||||
|
||||
|
||||
def test_inheritable_metadata_parallel_children_keep_tenant_isolation() -> None:
|
||||
"""Concurrent roots with parallel child runs keep tenant metadata isolated."""
|
||||
tracer = _create_tracer_with_mocked_client()
|
||||
barrier = threading.Barrier(4)
|
||||
|
||||
@RunnableLambda
|
||||
def add_one(x: int) -> int:
|
||||
barrier.wait()
|
||||
return x + 1
|
||||
|
||||
@RunnableLambda
|
||||
def add_two(x: int) -> int:
|
||||
barrier.wait()
|
||||
return x + 2
|
||||
|
||||
parallel = RunnableParallel(first=add_one, second=add_two)
|
||||
|
||||
def invoke_for_tenant(tenant: str, value: int) -> dict[str, int]:
|
||||
callbacks = CallbackManager.configure(
|
||||
inheritable_callbacks=[tracer],
|
||||
langsmith_inheritable_metadata={"tenant": tenant},
|
||||
)
|
||||
return parallel.invoke(value, {"callbacks": callbacks})
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
||||
list(executor.map(invoke_for_tenant, ["alpha", "beta"], [1, 2]))
|
||||
|
||||
posts = _get_posts(tracer.client)
|
||||
assert len(posts) == 6
|
||||
assert {
|
||||
post.get("extra", {}).get("metadata", {}).get("tenant") for post in posts
|
||||
} == {
|
||||
"alpha",
|
||||
"beta",
|
||||
}
|
||||
posts_by_trace: dict[str, list[dict[str, Any]]] = {}
|
||||
for post in posts:
|
||||
posts_by_trace.setdefault(post["trace_id"], []).append(post)
|
||||
assert len(posts_by_trace) == 2
|
||||
assert all(len(trace_posts) == 3 for trace_posts in posts_by_trace.values())
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
sys.version_info < (3, 11), reason="Asyncio context vars require Python 3.11+"
|
||||
)
|
||||
async def test_langsmith_inheritable_metadata_mixed_sync_async_managers_isolated() -> (
|
||||
None
|
||||
):
|
||||
"""Sync and async manager configure paths can overlap without metadata sharing."""
|
||||
tracer = _create_tracer_with_mocked_client()
|
||||
|
||||
@RunnableLambda
|
||||
async def async_runnable(x: int) -> int:
|
||||
await asyncio.sleep(0)
|
||||
return x + 1
|
||||
|
||||
@RunnableLambda
|
||||
def sync_runnable(x: int) -> int:
|
||||
return x + 1
|
||||
|
||||
async def run_sync() -> int:
|
||||
callbacks = CallbackManager.configure(
|
||||
inheritable_callbacks=[tracer],
|
||||
langsmith_inheritable_metadata={"path": "sync"},
|
||||
)
|
||||
return await asyncio.to_thread(
|
||||
sync_runnable.invoke, 1, {"callbacks": callbacks}
|
||||
)
|
||||
|
||||
async def run_async() -> int:
|
||||
callbacks = CallbackManager.configure(
|
||||
inheritable_callbacks=[tracer],
|
||||
langsmith_inheritable_metadata={"path": "async"},
|
||||
)
|
||||
return await async_runnable.ainvoke(1, {"callbacks": callbacks})
|
||||
|
||||
await asyncio.gather(run_sync(), run_async())
|
||||
|
||||
posts = _get_posts(tracer.client)
|
||||
assert len(posts) == 2
|
||||
assert {
|
||||
post.get("extra", {}).get("metadata", {}).get("path") for post in posts
|
||||
} == {
|
||||
"sync",
|
||||
"async",
|
||||
}
|
||||
|
||||
|
||||
class TestLangsmithInheritableTracingDefaultsInConfigure:
|
||||
"""Tests for LangSmith inheritable tracing defaults in configure."""
|
||||
|
||||
def test_langsmith_inheritable_metadata_applied_via_configure(self) -> None:
|
||||
"""langsmith_inheritable_metadata flows to a copied tracer."""
|
||||
tracer = _create_tracer_with_mocked_client()
|
||||
cm = CallbackManager.configure(
|
||||
inheritable_callbacks=[tracer],
|
||||
langsmith_inheritable_metadata={"env": "prod", "service": "api"},
|
||||
)
|
||||
lc_tracers = [h for h in cm.handlers if isinstance(h, LangChainTracer)]
|
||||
assert len(lc_tracers) == 1
|
||||
assert lc_tracers[0] is not tracer
|
||||
assert lc_tracers[0].tracing_metadata == {"env": "prod", "service": "api"}
|
||||
assert tracer.tracing_metadata is None
|
||||
|
||||
def test_langsmith_inheritable_metadata_does_not_overwrite_tracer_metadata(
|
||||
self,
|
||||
) -> None:
|
||||
"""Tracer metadata takes precedence over langsmith_inheritable_metadata."""
|
||||
tracer = _create_tracer_with_mocked_client(metadata={"env": "staging"})
|
||||
cm = CallbackManager.configure(
|
||||
inheritable_callbacks=[tracer],
|
||||
langsmith_inheritable_metadata={"env": "prod", "service": "api"},
|
||||
)
|
||||
lc_tracer = next(h for h in cm.handlers if isinstance(h, LangChainTracer))
|
||||
assert tracer.tracing_metadata == {"env": "staging"}
|
||||
assert lc_tracer.tracing_metadata == {"env": "staging", "service": "api"}
|
||||
|
||||
def test_tracing_context_metadata_merged_into_langsmith_inheritable_metadata(
|
||||
self,
|
||||
) -> None:
|
||||
"""Tracing-context metadata merges into tracer defaults.
|
||||
|
||||
LangSmith metadata keeps precedence on collisions.
|
||||
"""
|
||||
tracer = _create_tracer_with_mocked_client()
|
||||
with tracing_context(
|
||||
enabled=True,
|
||||
client=tracer.client,
|
||||
metadata={"trace_only": "value", "shared": "trace"},
|
||||
):
|
||||
cm = CallbackManager.configure(
|
||||
inheritable_callbacks=[tracer],
|
||||
langsmith_inheritable_metadata={
|
||||
"shared": "langsmith",
|
||||
"tenant": "alpha",
|
||||
},
|
||||
)
|
||||
|
||||
lc_tracer = next(h for h in cm.handlers if isinstance(h, LangChainTracer))
|
||||
assert lc_tracer.tracing_metadata == {
|
||||
"trace_only": "value",
|
||||
"shared": "langsmith",
|
||||
"tenant": "alpha",
|
||||
}
|
||||
|
||||
def test_langsmith_inheritable_metadata_end_to_end(self) -> None:
|
||||
"""langsmith_inheritable_metadata in configure propagates to posted runs."""
|
||||
tracer = _create_tracer_with_mocked_client()
|
||||
|
||||
@RunnableLambda
|
||||
def my_func(x: int) -> int:
|
||||
return x
|
||||
|
||||
# Use langsmith_inheritable_metadata through the config callbacks path
|
||||
cm = CallbackManager.configure(
|
||||
inheritable_callbacks=[tracer],
|
||||
langsmith_inheritable_metadata={"env": "prod"},
|
||||
)
|
||||
my_func.invoke(1, {"callbacks": cm})
|
||||
|
||||
posts = _get_posts(tracer.client)
|
||||
assert len(posts) == 1
|
||||
md = posts[0].get("extra", {}).get("metadata", {})
|
||||
assert md["env"] == "prod"
|
||||
|
||||
def test_runnable_config_copies_configurable_values_to_tracing_metadata(
|
||||
self,
|
||||
) -> None:
|
||||
tracer = _create_tracer_with_mocked_client()
|
||||
|
||||
@RunnableLambda
|
||||
def my_func(x: int) -> int:
|
||||
return x
|
||||
|
||||
config: RunnableConfig = {
|
||||
"callbacks": [tracer],
|
||||
"metadata": {
|
||||
"something": "else",
|
||||
"checkpoint_ns": "from-metadata",
|
||||
"model": "from-metadata",
|
||||
},
|
||||
"configurable": {
|
||||
"thread_id": "th-123",
|
||||
"checkpoint_id": "ckpt-1",
|
||||
"checkpoint_ns": "from-configurable",
|
||||
"task_id": "task-1",
|
||||
"run_id": "run-456",
|
||||
"assistant_id": "asst-789",
|
||||
"graph_id": "graph-0",
|
||||
"model": "from-configurable",
|
||||
"user_id": "uid-1",
|
||||
"cron_id": "cron-1",
|
||||
"langgraph_auth_user_id": "user-1",
|
||||
"api_key": "should-not-propagate",
|
||||
"__secret_key": "should-not-propagate",
|
||||
"temperature": 0.5,
|
||||
"streaming": True,
|
||||
"custom_setting": {"nested": True},
|
||||
"none_value": None,
|
||||
},
|
||||
}
|
||||
my_func.invoke(1, config)
|
||||
|
||||
posts = _get_posts(tracer.client)
|
||||
assert len(posts) == 1
|
||||
md = posts[0].get("extra", {}).get("metadata", {})
|
||||
assert {
|
||||
key: md[key]
|
||||
for key in (
|
||||
"something",
|
||||
"thread_id",
|
||||
"checkpoint_id",
|
||||
"task_id",
|
||||
"run_id",
|
||||
"assistant_id",
|
||||
"graph_id",
|
||||
"user_id",
|
||||
"cron_id",
|
||||
"langgraph_auth_user_id",
|
||||
"temperature",
|
||||
"streaming",
|
||||
"model",
|
||||
"checkpoint_ns",
|
||||
)
|
||||
} == {
|
||||
"something": "else",
|
||||
"thread_id": "th-123",
|
||||
"checkpoint_id": "ckpt-1",
|
||||
"task_id": "task-1",
|
||||
"run_id": "run-456",
|
||||
"assistant_id": "asst-789",
|
||||
"graph_id": "graph-0",
|
||||
"user_id": "uid-1",
|
||||
"cron_id": "cron-1",
|
||||
"langgraph_auth_user_id": "user-1",
|
||||
"temperature": 0.5,
|
||||
"streaming": True,
|
||||
"model": "from-metadata",
|
||||
"checkpoint_ns": "from-metadata",
|
||||
}
|
||||
assert "api_key" not in md
|
||||
assert "__secret_key" not in md
|
||||
assert "custom_setting" not in md
|
||||
assert "none_value" not in md
|
||||
|
||||
def test_langsmith_inheritable_metadata_does_not_affect_non_tracer_handlers(
|
||||
self,
|
||||
) -> None:
|
||||
"""langsmith_inheritable_metadata only applies to tracers."""
|
||||
tracer = _create_tracer_with_mocked_client()
|
||||
|
||||
received_metadata: list[dict[str, Any]] = []
|
||||
|
||||
class MetadataCapture(BaseCallbackHandler):
|
||||
def on_chain_start(self, *_args: Any, **kwargs: Any) -> None:
|
||||
received_metadata.append(dict(kwargs.get("metadata", {})))
|
||||
|
||||
capture = MetadataCapture()
|
||||
cm = CallbackManager.configure(
|
||||
inheritable_callbacks=[tracer, capture],
|
||||
langsmith_inheritable_metadata={"tracer_only": "yes"},
|
||||
)
|
||||
|
||||
@RunnableLambda
|
||||
def my_func(x: int) -> int:
|
||||
return x
|
||||
|
||||
my_func.invoke(1, {"callbacks": cm})
|
||||
|
||||
# Non-tracer handler should NOT see langsmith_inheritable_metadata
|
||||
assert len(received_metadata) >= 1
|
||||
for md in received_metadata:
|
||||
assert "tracer_only" not in md
|
||||
|
||||
# But the tracer's posted runs SHOULD have it
|
||||
posts = _get_posts(tracer.client)
|
||||
assert len(posts) >= 1
|
||||
for post in posts:
|
||||
post_md = post.get("extra", {}).get("metadata", {})
|
||||
assert post_md["tracer_only"] == "yes"
|
||||
|
||||
def test_no_langsmith_inheritable_metadata_is_noop(self) -> None:
|
||||
"""Passing langsmith_inheritable_metadata=None does not alter tracer state."""
|
||||
tracer = _create_tracer_with_mocked_client()
|
||||
cm = CallbackManager.configure(
|
||||
inheritable_callbacks=[tracer],
|
||||
langsmith_inheritable_metadata=None,
|
||||
)
|
||||
lc_tracer = next(h for h in cm.handlers if isinstance(h, LangChainTracer))
|
||||
assert lc_tracer is tracer
|
||||
assert tracer.tracing_metadata is None
|
||||
|
||||
def test_langsmith_inheritable_tags_applied_via_configure(self) -> None:
|
||||
"""langsmith_inheritable_tags flow to a copied tracer."""
|
||||
tracer = _create_tracer_with_mocked_client()
|
||||
tracer.tags = ["existing"]
|
||||
cm = CallbackManager.configure(
|
||||
inheritable_callbacks=[tracer],
|
||||
langsmith_inheritable_tags=["tenant:alpha", "existing"],
|
||||
)
|
||||
lc_tracer = next(h for h in cm.handlers if isinstance(h, LangChainTracer))
|
||||
assert lc_tracer is not tracer
|
||||
assert lc_tracer.tags == ["existing", "tenant:alpha"]
|
||||
assert tracer.tags == ["existing"]
|
||||
|
||||
def test_inheritable_tags_do_not_affect_non_tracer_handlers(self) -> None:
|
||||
"""langsmith_inheritable_tags only apply to tracers."""
|
||||
tracer = _create_tracer_with_mocked_client()
|
||||
|
||||
received_tags: list[list[str]] = []
|
||||
|
||||
class TagCapture(BaseCallbackHandler):
|
||||
def on_chain_start(self, *_args: Any, **kwargs: Any) -> None:
|
||||
received_tags.append(list(kwargs.get("tags", [])))
|
||||
|
||||
capture = TagCapture()
|
||||
cm = CallbackManager.configure(
|
||||
inheritable_callbacks=[tracer, capture],
|
||||
langsmith_inheritable_tags=["tracer-only"],
|
||||
)
|
||||
|
||||
@RunnableLambda
|
||||
def my_func(x: int) -> int:
|
||||
return x
|
||||
|
||||
my_func.invoke(1, {"callbacks": cm})
|
||||
|
||||
assert received_tags
|
||||
assert all("tracer-only" not in tags for tags in received_tags)
|
||||
|
||||
posts = _get_posts(tracer.client)
|
||||
assert posts
|
||||
assert all("tracer-only" in post.get("tags", []) for post in posts)
|
||||
|
||||
def test_langsmith_inheritable_metadata_copies_handlers_without_mutating_original(
|
||||
self,
|
||||
) -> None:
|
||||
"""Configured manager copies tracers and leaves the original unchanged."""
|
||||
tracer = _create_tracer_with_mocked_client()
|
||||
cm = CallbackManager.configure(
|
||||
inheritable_callbacks=[tracer],
|
||||
langsmith_inheritable_metadata={"env": "prod"},
|
||||
)
|
||||
handler_tracer = next(h for h in cm.handlers if isinstance(h, LangChainTracer))
|
||||
inheritable_tracer = next(
|
||||
h for h in cm.inheritable_handlers if isinstance(h, LangChainTracer)
|
||||
)
|
||||
assert handler_tracer is not tracer
|
||||
assert inheritable_tracer is not tracer
|
||||
assert tracer.tracing_metadata is None
|
||||
|
||||
def test_langsmith_inheritable_metadata_configure_isolated_per_manager(
|
||||
self,
|
||||
) -> None:
|
||||
"""Separate configure calls keep tracer-only defaults isolated."""
|
||||
tracer = _create_tracer_with_mocked_client()
|
||||
alpha_manager = CallbackManager.configure(
|
||||
inheritable_callbacks=[tracer],
|
||||
langsmith_inheritable_metadata={"tenant": "alpha"},
|
||||
)
|
||||
beta_manager = CallbackManager.configure(
|
||||
inheritable_callbacks=[tracer],
|
||||
langsmith_inheritable_metadata={"tenant": "beta"},
|
||||
)
|
||||
|
||||
alpha_tracer = next(
|
||||
handler
|
||||
for handler in alpha_manager.handlers
|
||||
if isinstance(handler, LangChainTracer)
|
||||
)
|
||||
beta_tracer = next(
|
||||
handler
|
||||
for handler in beta_manager.handlers
|
||||
if isinstance(handler, LangChainTracer)
|
||||
)
|
||||
|
||||
assert tracer.tracing_metadata is None
|
||||
assert alpha_tracer is not tracer
|
||||
assert beta_tracer is not tracer
|
||||
assert alpha_tracer is not beta_tracer
|
||||
assert alpha_tracer.tracing_metadata == {"tenant": "alpha"}
|
||||
assert beta_tracer.tracing_metadata == {"tenant": "beta"}
|
||||
assert alpha_tracer.run_map is tracer.run_map
|
||||
assert beta_tracer.run_map is tracer.run_map
|
||||
assert alpha_tracer.order_map is tracer.order_map
|
||||
assert beta_tracer.order_map is tracer.order_map
|
||||
|
||||
def test_inheritable_metadata_concurrent_invocations_remain_isolated(
|
||||
self,
|
||||
) -> None:
|
||||
"""Parallel invocations through copied tracers keep metadata separated."""
|
||||
tracer = _create_tracer_with_mocked_client()
|
||||
barrier = threading.Barrier(2)
|
||||
|
||||
@traceable
|
||||
def traced_leaf(x: int) -> int:
|
||||
barrier.wait()
|
||||
return x
|
||||
|
||||
@RunnableLambda
|
||||
def my_func(x: int) -> int:
|
||||
return traced_leaf(x)
|
||||
|
||||
def invoke_for_tenant(tenant: str, value: int) -> int:
|
||||
callbacks = CallbackManager.configure(
|
||||
inheritable_callbacks=[tracer],
|
||||
langsmith_inheritable_metadata={"tenant": tenant},
|
||||
)
|
||||
return my_func.invoke(value, {"callbacks": callbacks})
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
||||
list(executor.map(invoke_for_tenant, ["alpha", "beta"], [1, 2]))
|
||||
|
||||
posts = _get_posts(tracer.client)
|
||||
assert len(posts) == 4
|
||||
assert {post["name"] for post in posts} == {"my_func", "traced_leaf"}
|
||||
my_func_posts = [post for post in posts if post["name"] == "my_func"]
|
||||
assert len(my_func_posts) == 2
|
||||
assert {
|
||||
post.get("extra", {}).get("metadata", {}).get("tenant")
|
||||
for post in my_func_posts
|
||||
} == {"alpha", "beta"}
|
||||
assert tracer.run_map == {}
|
||||
assert len(tracer.order_map) == 2
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user