audit - update comments, add frontmatter explanations

This commit is contained in:
Mason Daugherty
2025-09-23 12:58:12 -04:00
parent 9c1285cf5b
commit 6e1a39c7a2
27 changed files with 215 additions and 108 deletions

View File

@@ -1,4 +1,6 @@
# Adapted from https://github.com/tiangolo/fastapi/blob/master/.github/actions/people/action.yml
# TODO: fix this, migrate to new docs repo?
name: "Generate LangChain People"
description: "Generate the data for the LangChain People page"
author: "Jacob Lee <jacob@langchain.dev>"

View File

@@ -1,3 +1,5 @@
# Helper to set up Python and uv with caching
name: uv-install
description: Set up Python and uv with caching
@@ -8,15 +10,15 @@ inputs:
enable-cache:
description: Enable caching for uv dependencies
required: false
default: 'true'
default: "true"
cache-suffix:
description: Custom cache key suffix for cache invalidation
required: false
default: ''
default: ""
working-directory:
description: Working directory for cache glob scoping
required: false
default: '**'
default: "**"
env:
UV_VERSION: "0.5.25"

View File

@@ -1,4 +1,4 @@
# GitHub PR Labeler Configuration for LangChain
# Label PRs (config)
# Automatically applies labels based on changed files and branch patterns
# Core packages

View File

@@ -1,5 +1,7 @@
# PR Title Labeler Configuration
# PR title labeler config
#
# Labels PRs based on conventional commit patterns in titles
#
# Format: type(scope): description or type!: description (breaking)
add-missing-labels: true

View File

@@ -1,3 +1,18 @@
"""Analyze git diffs to determine which directories need to be tested.
Intelligently determines which LangChain packages and directories need to be tested,
linted, or built based on the changes. Handles dependency relationships between
packages, maps file changes to appropriate CI job configurations, and outputs JSON
configurations for GitHub Actions.
- Maps changed files to affected package directories (libs/core, libs/partners/*, etc.)
- Builds dependency graph to include dependent packages when core components change
- Generates test matrix configurations with appropriate Python versions
- Handles special cases for Pydantic version testing and performance benchmarks
Used as part of the check_diffs workflow.
"""
import glob
import json
import os
@@ -17,7 +32,7 @@ LANGCHAIN_DIRS = [
"libs/langchain_v1",
]
# when set to True, we are ignoring core dependents
# When set to True, we are ignoring core dependents
# in order to be able to get CI to pass for each individual
# package that depends on core
# e.g. if you touch core, we don't then add textsplitters/etc to CI
@@ -49,9 +64,9 @@ def all_package_dirs() -> Set[str]:
def dependents_graph() -> dict:
"""
Construct a mapping of package -> dependents, such that we can
run tests on all dependents of a package when a change is made.
"""Construct a mapping of package -> dependents
Done such that we can run tests on all dependents of a package when a change is made.
"""
dependents = defaultdict(set)

View File

@@ -1,3 +1,5 @@
"""Check that no dependencies allow prereleases unless we're releasing a prerelease."""
import sys
import tomllib
@@ -6,15 +8,14 @@ if __name__ == "__main__":
# Get the TOML file path from the command line argument
toml_file = sys.argv[1]
# read toml file
with open(toml_file, "rb") as file:
toml_data = tomllib.load(file)
# see if we're releasing an rc
# See if we're releasing an rc or dev version
version = toml_data["project"]["version"]
releasing_rc = "rc" in version or "dev" in version
# if not, iterate through dependencies and make sure none allow prereleases
# If not, iterate through dependencies and make sure none allow prereleases
if not releasing_rc:
dependencies = toml_data["project"]["dependencies"]
for dep_version in dependencies:

View File

@@ -1,3 +1,5 @@
"""Get minimum versions of dependencies from a pyproject.toml file."""
import sys
from collections import defaultdict
from typing import Optional
@@ -5,7 +7,7 @@ from typing import Optional
if sys.version_info >= (3, 11):
import tomllib
else:
# for python 3.10 and below, which doesnt have stdlib tomllib
# For Python 3.10 and below, which doesnt have stdlib tomllib
import tomli as tomllib
import re
@@ -34,14 +36,13 @@ SKIP_IF_PULL_REQUEST = [
def get_pypi_versions(package_name: str) -> List[str]:
"""
Fetch all available versions for a package from PyPI.
"""Fetch all available versions for a package from PyPI.
Args:
package_name (str): Name of the package
package_name: Name of the package
Returns:
List[str]: List of all available versions
List of all available versions
Raises:
requests.exceptions.RequestException: If PyPI API request fails
@@ -54,24 +55,23 @@ def get_pypi_versions(package_name: str) -> List[str]:
def get_minimum_version(package_name: str, spec_string: str) -> Optional[str]:
"""
Find the minimum published version that satisfies the given constraints.
"""Find the minimum published version that satisfies the given constraints.
Args:
package_name (str): Name of the package
spec_string (str): Version specification string (e.g., ">=0.2.43,<0.4.0,!=0.3.0")
package_name: Name of the package
spec_string: Version specification string (e.g., ">=0.2.43,<0.4.0,!=0.3.0")
Returns:
Optional[str]: Minimum compatible version or None if no compatible version found
Minimum compatible version or None if no compatible version found
"""
# rewrite occurrences of ^0.0.z to 0.0.z (can be anywhere in constraint string)
# Rewrite occurrences of ^0.0.z to 0.0.z (can be anywhere in constraint string)
spec_string = re.sub(r"\^0\.0\.(\d+)", r"0.0.\1", spec_string)
# rewrite occurrences of ^0.y.z to >=0.y.z,<0.y+1 (can be anywhere in constraint string)
# Rewrite occurrences of ^0.y.z to >=0.y.z,<0.y+1 (can be anywhere in constraint string)
for y in range(1, 10):
spec_string = re.sub(
rf"\^0\.{y}\.(\d+)", rf">=0.{y}.\1,<0.{y + 1}", spec_string
)
# rewrite occurrences of ^x.y.z to >=x.y.z,<x+1.0.0 (can be anywhere in constraint string)
# Rewrite occurrences of ^x.y.z to >=x.y.z,<x+1.0.0 (can be anywhere in constraint string)
for x in range(1, 10):
spec_string = re.sub(
rf"\^{x}\.(\d+)\.(\d+)", rf">={x}.\1.\2,<{x + 1}", spec_string
@@ -154,22 +154,25 @@ def get_min_version_from_toml(
def check_python_version(version_string, constraint_string):
"""
Check if the given Python version matches the given constraints.
"""Check if the given Python version matches the given constraints.
:param version_string: A string representing the Python version (e.g. "3.8.5").
:param constraint_string: A string representing the package's Python version constraints (e.g. ">=3.6, <4.0").
:return: True if the version matches the constraints, False otherwise.
Args:
version_string: A string representing the Python version (e.g. "3.8.5").
constraint_string: A string representing the package's Python version
constraints (e.g. ">=3.6, <4.0").
Returns:
True if the version matches the constraints
"""
# rewrite occurrences of ^0.0.z to 0.0.z (can be anywhere in constraint string)
# Rewrite occurrences of ^0.0.z to 0.0.z (can be anywhere in constraint string)
constraint_string = re.sub(r"\^0\.0\.(\d+)", r"0.0.\1", constraint_string)
# rewrite occurrences of ^0.y.z to >=0.y.z,<0.y+1.0 (can be anywhere in constraint string)
# Rewrite occurrences of ^0.y.z to >=0.y.z,<0.y+1.0 (can be anywhere in constraint string)
for y in range(1, 10):
constraint_string = re.sub(
rf"\^0\.{y}\.(\d+)", rf">=0.{y}.\1,<0.{y + 1}.0", constraint_string
)
# rewrite occurrences of ^x.y.z to >=x.y.z,<x+1.0.0 (can be anywhere in constraint string)
# Rewrite occurrences of ^x.y.z to >=x.y.z,<x+1.0.0 (can be anywhere in constraint string)
for x in range(1, 10):
constraint_string = re.sub(
rf"\^{x}\.0\.(\d+)", rf">={x}.0.\1,<{x + 1}.0.0", constraint_string

View File

@@ -1,5 +1,5 @@
#!/usr/bin/env python
"""Script to sync libraries from various repositories into the main langchain repository."""
"""Sync libraries from various repositories into this monorepo."""
import os
import shutil

View File

@@ -1,3 +1,11 @@
# Validates that a package's integration tests compile without syntax or import errors.
#
# (If an integration test fails to compile, it won't run.)
#
# Called as part of check_diffs.yml workflow
#
# Runs pytest with compile marker to check syntax/imports.
name: '🔗 Compile Integration Tests'
on:

View File

@@ -1,3 +1,10 @@
# Runs `make integration_tests` on the specified package.
#
# Manually triggered via workflow_dispatch for testing with real APIs.
#
# Installs integration test dependencies and executes full test suite.
name: '🚀 Integration Tests'
run-name: 'Test ${{ inputs.working-directory }} on Python ${{ inputs.python-version }}'
@@ -83,7 +90,7 @@ jobs:
run: |
make integration_tests
- name: Ensure the tests did not create any additional files
- name: 'Ensure testing did not create/modify files'
shell: bash
run: |
set -eu

View File

@@ -1,6 +1,11 @@
name: '🧹 Code Linting'
# Runs code quality checks using ruff, mypy, and other linting tools
# Checks both package code and test code for consistency
# Runs linting.
#
# Uses the package's Makefile to run the checks, specifically the
# `lint_package` and `lint_tests` targets.
#
# Called as part of check_diffs.yml workflow.
name: '🧹 Linting'
on:
workflow_call:
@@ -43,14 +48,6 @@ jobs:
working-directory: ${{ inputs.working-directory }}
- name: '📦 Install Lint & Typing Dependencies'
# Also installs dev/lint/test/typing dependencies, to ensure we have
# type hints for as many of our libraries as possible.
# This helps catch errors that require dependencies to be spotted, for example:
# https://github.com/langchain-ai/langchain/pull/10249/files#diff-935185cd488d015f026dcd9e19616ff62863e8cde8c0bee70318d3ccbca98341
#
# If you change this configuration, make sure to change the `cache-key`
# in the `poetry_setup` action above to stop using the old cache.
# It doesn't matter how you change it, any change will cause a cache-bust.
working-directory: ${{ inputs.working-directory }}
run: |
uv sync --group lint --group typing
@@ -60,20 +57,13 @@ jobs:
run: |
make lint_package
- name: '📦 Install Unit Test Dependencies'
# Also installs dev/lint/test/typing dependencies, to ensure we have
# type hints for as many of our libraries as possible.
# This helps catch errors that require dependencies to be spotted, for example:
# https://github.com/langchain-ai/langchain/pull/10249/files#diff-935185cd488d015f026dcd9e19616ff62863e8cde8c0bee70318d3ccbca98341
#
# If you change this configuration, make sure to change the `cache-key`
# in the `poetry_setup` action above to stop using the old cache.
# It doesn't matter how you change it, any change will cause a cache-bust.
- name: '📦 Install Test Dependencies (non-partners)'
# (For directories NOT starting with libs/partners/)
if: ${{ ! startsWith(inputs.working-directory, 'libs/partners/') }}
working-directory: ${{ inputs.working-directory }}
run: |
uv sync --inexact --group test
- name: '📦 Install Unit + Integration Test Dependencies'
- name: '📦 Install Test Dependencies'
if: ${{ startsWith(inputs.working-directory, 'libs/partners/') }}
working-directory: ${{ inputs.working-directory }}
run: |

View File

@@ -1,3 +1,9 @@
# Builds and publishes LangChain packages to PyPI.
#
# Manually triggered, though can be used as a reusable workflow (workflow_call).
#
# Handles version bumping, building, and publishing to PyPI with authentication.
name: '🚀 Package Release'
run-name: 'Release ${{ inputs.working-directory }} ${{ inputs.release-version }}'
on:
@@ -52,8 +58,8 @@ jobs:
# We want to keep this build stage *separate* from the release stage,
# so that there's no sharing of permissions between them.
# The release stage has trusted publishing and GitHub repo contents write access,
# and we want to keep the scope of that access limited just to the release job.
# (Release stage has trusted publishing and GitHub repo contents write access,
#
# Otherwise, a malicious `build` step (e.g. via a compromised dependency)
# could get access to our GitHub or PyPI credentials.
#
@@ -293,11 +299,14 @@ jobs:
working-directory: ${{ inputs.working-directory }}
- name: Check for prerelease versions
# Block release if any dependencies allow prerelease versions
# (unless this is itself a prerelease version)
working-directory: ${{ inputs.working-directory }}
run: |
uv run python $GITHUB_WORKSPACE/.github/scripts/check_prerelease_dependencies.py pyproject.toml
- name: Get minimum versions
# Find the minimum published versions that satisfies the given constraints
working-directory: ${{ inputs.working-directory }}
id: min-version
run: |
@@ -322,6 +331,7 @@ jobs:
working-directory: ${{ inputs.working-directory }}
- name: Run integration tests
# Uses the Makefile's `integration_tests` target for the specified package
if: ${{ startsWith(inputs.working-directory, 'libs/partners/') }}
env:
AI21_API_KEY: ${{ secrets.AI21_API_KEY }}
@@ -362,7 +372,10 @@ jobs:
working-directory: ${{ inputs.working-directory }}
# Test select published packages against new core
# Done when code changes are made to langchain-core
test-prior-published-packages-against-new-core:
# Installs the new core with old partners: Installs the new unreleased core
# alongside the previously published partner packages and runs integration tests
needs:
- build
- release-notes
@@ -390,6 +403,7 @@ jobs:
# We implement this conditional as Github Actions does not have good support
# for conditionally needing steps. https://github.com/actions/runner/issues/491
# TODO: this seems to be resolved upstream, so we can probably remove this workaround
- name: Check if libs/core
run: |
if [ "${{ startsWith(inputs.working-directory, 'libs/core') }}" != "true" ]; then
@@ -444,6 +458,7 @@ jobs:
make integration_tests
publish:
# Publishes the package to PyPI
needs:
- build
- release-notes
@@ -486,6 +501,7 @@ jobs:
attestations: false
mark-release:
# Marks the GitHub release with the new version tag
needs:
- build
- release-notes
@@ -495,7 +511,7 @@ jobs:
runs-on: ubuntu-latest
permissions:
# This permission is needed by `ncipollo/release-action` to
# create the GitHub release.
# create the GitHub release/tag
contents: write
defaults:

View File

@@ -1,6 +1,7 @@
name: '🧪 Unit Testing'
# Runs unit tests with both current and minimum supported dependency versions
# to ensure compatibility across the supported range
# to ensure compatibility across the supported range.
name: '🧪 Unit Testing'
on:
workflow_call:

View File

@@ -1,3 +1,10 @@
# Validates that all import statements in `.ipynb` notebooks are correct and functional.
#
# Called as part of check_diffs.yml.
#
# Installs test dependencies and LangChain packages in editable mode and
# runs check_imports.py.
name: '📑 Documentation Import Testing'
on:

View File

@@ -1,3 +1,5 @@
# Facilitate unit testing against different Pydantic versions for a provided package.
name: '🐍 Pydantic Version Testing'
on:

View File

@@ -1,11 +1,14 @@
# Build the API reference documentation.
#
# Runs daily. Can also be triggered manually for immediate updates.
name: '📚 API Docs'
run-name: 'Build & Deploy API Reference'
# Runs daily or can be triggered manually for immediate updates
on:
workflow_dispatch:
schedule:
- cron: '0 13 * * *' # Daily at 1PM UTC
- cron: '0 13 * * *' # Runs daily at 1PM UTC (9AM EDT/6AM PDT)
env:
PYTHON_VERSION: "3.11"

View File

@@ -1,9 +1,11 @@
# Runs broken link checker in /docs on a daily schedule.
name: '🔗 Check Broken Links'
on:
workflow_dispatch:
schedule:
- cron: '0 13 * * *'
- cron: '0 13 * * *' # Runs daily at 1PM UTC (9AM EDT/6AM PDT)
permissions:
contents: read

View File

@@ -1,6 +1,8 @@
# Ensures version numbers in pyproject.toml and version.py stay in sync.
#
# (Prevents releases with mismatched version numbers)
name: '🔍 Check `core` Version Equality'
# Ensures version numbers in pyproject.toml and version.py stay in sync
# Prevents releases with mismatched version numbers
on:
pull_request:

View File

@@ -1,3 +1,18 @@
# Primary CI workflow.
#
# Only runs against packages that have changed files.
#
# Runs:
# - Linting (_lint.yml)
# - Unit Tests (_test.yml)
# - Pydantic compatibility tests (_test_pydantic.yml)
# - Documentation import tests (_test_doc_imports.yml)
# - Integration test compilation checks (_compile_integration_test.yml)
# - Extended test suites that require additional dependencies
# - Codspeed benchmarks (if not labeled 'codspeed-ignore')
#
# Reports status to GitHub checks and PR status.
name: '🔧 CI'
on:
@@ -11,8 +26,8 @@ on:
# cancel the earlier run in favor of the next run.
#
# There's no point in testing an outdated version of the code. GitHub only allows
# a limited number of job runners to be active at the same time, so it's better to cancel
# pointless jobs early so that more useful jobs can run sooner.
# a limited number of job runners to be active at the same time, so it's better to
# cancel pointless jobs early so that more useful jobs can run sooner.
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

View File

@@ -1,3 +1,6 @@
# For integrations, we run check_templates.py to ensure that new docs use the correct
# templates based on their type. See the script for more details.
name: '📑 Integration Docs Lint'
on:

View File

@@ -1,9 +1,11 @@
# Updates the LangChain People data by fetching the latest info from the LangChain Git.
# TODO: broken/not used
name: '👥 LangChain People'
run-name: 'Update People Data'
# This workflow updates the LangChain People data by fetching the latest information from the LangChain Git
on:
schedule:
- cron: "0 14 1 * *"
- cron: "0 14 1 * *" # Runs at 14:00 UTC on the 1st of every month (10AM EDT/7AM PDT)
push:
branches: [jacob/people]
workflow_dispatch:

View File

@@ -1,3 +1,7 @@
# Label PRs based on changed files.
#
# See `.github/pr-file-labeler.yml` to see rules for each label/directory.
name: "🏷️ Pull Request Labeler"
on:
@@ -18,5 +22,5 @@ jobs:
uses: actions/labeler@v6
with:
repo-token: "${{ secrets.GITHUB_TOKEN }}"
configuration-path: .github/labeler.yml
configuration-path: .github/pr-file-labeler.yml
sync-labels: false

View File

@@ -1,3 +1,7 @@
# Label PRs based on their titles.
#
# See `.github/pr-title-labeler.yml` to see rules for each label/title pattern.
name: "🏷️ PR Title Labeler"
on:

View File

@@ -1,29 +1,30 @@
# -----------------------------------------------------------------------------
# PR Title Lint Workflow
# PR title linting.
#
# Purpose:
# Enforces Conventional Commits format for pull request titles to maintain a
# clear, consistent, and machine-readable change history across our repository.
# This helps with automated changelog generation and semantic versioning.
# FORMAT (Conventional Commits 1.0.0):
#
# Enforced Commit Message Format (Conventional Commits 1.0.0):
# <type>[optional scope]: <description>
# [optional body]
# [optional footer(s)]
#
# Examples:
# feat(core): add multitenant support
# fix(cli): resolve flag parsing error
# docs: update API usage examples
# docs(openai): update API usage examples
#
# Allowed Types:
# feat — a new feature (MINOR bump)
# fix — a bug fix (PATCH bump)
# docs — documentation only changes
# style — formatting, missing semi-colons, etc.; no code change
# refactor — code change that neither fixes a bug nor adds a feature
# perf — code change that improves performance
# test — adding missing tests or correcting existing tests
# build — changes that affect the build system or external dependencies
# ci — continuous integration/configuration changes
# chore — other changes that don't modify src or test files
# revert — reverts a previous commit
# release — prepare a new release
# * feat — a new feature (MINOR)
# * fix — a bug fix (PATCH)
# * docs — documentation only changes (either in /docs or code comments)
# * style — formatting, linting, etc.; no code change or typing refactors
# * refactor — code change that neither fixes a bug nor adds a feature
# * perf — code change that improves performance
# * test — adding tests or correcting existing
# * build — changes that affect the build system/external dependencies
# * ci — continuous integration/configuration changes
# * chore — other changes that don't modify source or test files
# * revert — reverts a previous commit
# * release — prepare a new release
#
# Allowed Scopes (optional):
# core, cli, langchain, langchain_v1, langchain_legacy, standard-tests,
@@ -31,21 +32,12 @@
# huggingface, mistralai, nomic, ollama, openai, perplexity, prompty, qdrant,
# xai, infra
#
# Rules & Tips for New Committers:
# 1. Subject (type) must start with a lowercase letter and, if possible, be
# followed by a scope wrapped in parenthesis `(scope)`
# 2. Breaking changes:
# Append "!" after type/scope (e.g., feat!: drop Node 12 support)
# Or include a footer "BREAKING CHANGE: <details>"
# 3. Example PR titles:
# feat(core): add multitenant support
# fix(cli): resolve flag parsing error
# docs: update API usage examples
# docs(openai): update API usage examples
# Rules:
# 1. The 'Type' must start with a lowercase letter.
# 2. Breaking changes: append "!" after type/scope (e.g., feat!: drop x support)
#
# Resources:
# • Conventional Commits spec: https://www.conventionalcommits.org/en/v1.0.0/
# -----------------------------------------------------------------------------
# Enforces Conventional Commits format for pull request titles to maintain a clear and
# machine-readable change history.
name: '🏷️ PR Title Lint'
@@ -57,7 +49,7 @@ on:
types: [opened, edited, synchronize]
jobs:
# Validates that PR title follows Conventional Commits specification
# Validates that PR title follows Conventional Commits 1.0.0 specification
lint-pr-title:
name: 'validate format'
runs-on: ubuntu-latest

View File

@@ -1,3 +1,5 @@
# Integration tests for documentation notebooks.
name: '📓 Validate Documentation Notebooks'
run-name: 'Test notebooks in ${{ inputs.working-directory }}'
on:

View File

@@ -1,8 +1,14 @@
# Routine integration tests against partner libraries with live API credentials.
#
# Uses `make integration_tests` for each library in the matrix.
#
# Runs daily. Can also be triggered manually for immediate updates.
name: '⏰ Scheduled Integration Tests'
run-name: "Run Integration Tests - ${{ inputs.working-directory-force || 'all libs' }} (Python ${{ inputs.python-version-force || '3.9, 3.11' }})"
on:
workflow_dispatch: # Allows maintainers to trigger the workflow manually in GitHub UI
workflow_dispatch:
inputs:
working-directory-force:
type: string
@@ -54,7 +60,7 @@ jobs:
echo $matrix
echo "matrix=$matrix" >> $GITHUB_OUTPUT
# Run integration tests against partner libraries with live API credentials
# Tests are run with both Poetry and UV depending on the library's setup
# Tests are run with Poetry or UV depending on the library's setup
build:
if: github.repository_owner == 'langchain-ai' || github.event_name != 'schedule'
name: '🐍 Python ${{ matrix.python-version }}: ${{ matrix.working-directory }}'

View File

@@ -1,4 +1,20 @@
"""This script checks documentation for broken import statements."""
"""Check documentation for broken import statements.
Validates that all import statements in Jupyter notebooks within the documentation
directory are functional and can be successfully imported.
- Scans all `.ipynb` files in `docs/`
- Extracts import statements from code cells
- Tests each import to ensure it works
- Reports any broken imports that would fail for users
Usage:
python docs/scripts/check_imports.py
Exit codes:
0: All imports are valid
1: Found broken imports (ImportError raised)
"""
import importlib
import json