mirror of
https://github.com/hwchase17/langchain.git
synced 2026-04-16 01:47:07 +00:00
Compare commits
60 Commits
jacob/agen
...
harrison/s
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c708652957 | ||
|
|
b46802ad88 | ||
|
|
247950ebaa | ||
|
|
7051e83ae0 | ||
|
|
76fb19f205 | ||
|
|
d1a0140659 | ||
|
|
9e745c8e91 | ||
|
|
0de6e09659 | ||
|
|
3e0c5a331c | ||
|
|
34709ec34f | ||
|
|
279155f02d | ||
|
|
a2c447938c | ||
|
|
f541e0235c | ||
|
|
89f74bf808 | ||
|
|
dee5fdfe2b | ||
|
|
a7cbdccb8a | ||
|
|
91dd47b12a | ||
|
|
af2ad3bb4f | ||
|
|
60fc995c3b | ||
|
|
c3232a8aa3 | ||
|
|
243465da3e | ||
|
|
32499164ab | ||
|
|
9a5a52dd99 | ||
|
|
deab168da5 | ||
|
|
d7793a0a66 | ||
|
|
f0304a8d9e | ||
|
|
bbc795b752 | ||
|
|
6a0a7a7f66 | ||
|
|
de690b081e | ||
|
|
cea3d61bf4 | ||
|
|
5631e7e397 | ||
|
|
a66df25a89 | ||
|
|
8226b81fb3 | ||
|
|
280aec4e1d | ||
|
|
12039a0057 | ||
|
|
85eae95764 | ||
|
|
6b1af2d19c | ||
|
|
fd2fe4aacf | ||
|
|
d6403c6696 | ||
|
|
11fc0a5004 | ||
|
|
ce884f801f | ||
|
|
75ade6116d | ||
|
|
8ccc18fd73 | ||
|
|
f62331a20c | ||
|
|
f922f9fca3 | ||
|
|
c6c9d93283 | ||
|
|
e08017fcf7 | ||
|
|
a2129713c3 | ||
|
|
960faa6ebe | ||
|
|
c4f32cb0e9 | ||
|
|
f56c0359dc | ||
|
|
98c4f2a5fe | ||
|
|
31c9081246 | ||
|
|
f5ed74d56f | ||
|
|
5535c7851a | ||
|
|
ee1478b6e1 | ||
|
|
bf7b59ec44 | ||
|
|
4965f9a10e | ||
|
|
2d18c65ff7 | ||
|
|
88d39708a9 |
@@ -5,31 +5,26 @@ This project includes a [dev container](https://containers.dev/), which lets you
|
||||
You can use the dev container configuration in this folder to build and run the app without needing to install any of its tools locally! You can use it in [GitHub Codespaces](https://github.com/features/codespaces) or the [VS Code Dev Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers).
|
||||
|
||||
## GitHub Codespaces
|
||||
|
||||
[](https://codespaces.new/langchain-ai/langchain)
|
||||
|
||||
You may use the button above, or follow these steps to open this repo in a Codespace:
|
||||
|
||||
1. Click the **Code** drop-down menu at the top of <https://github.com/langchain-ai/langchain>.
|
||||
1. Click the **Code** drop-down menu at the top of https://github.com/langchain-ai/langchain.
|
||||
1. Click on the **Codespaces** tab.
|
||||
1. Click **Create codespace on master**.
|
||||
1. Click **Create codespace on master** .
|
||||
|
||||
For more info, check out the [GitHub documentation](https://docs.github.com/en/free-pro-team@latest/github/developing-online-with-codespaces/creating-a-codespace#creating-a-codespace).
|
||||
|
||||
|
||||
## VS Code Dev Containers
|
||||
|
||||
[](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/langchain-ai/langchain)
|
||||
|
||||
> [!NOTE]
|
||||
> If you click the link above you will open the main repo (`langchain-ai/langchain`) and *not* your local cloned repo. This is fine if you only want to run and test the library, but if you want to contribute you can use the link below and replace with your username and cloned repo name:
|
||||
|
||||
```txt
|
||||
https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/<YOUR_USERNAME>/<YOUR_CLONED_REPO_NAME>
|
||||
Note: If you click the link above you will open the main repo (langchain-ai/langchain) and not your local cloned repo. This is fine if you only want to run and test the library, but if you want to contribute you can use the link below and replace with your username and cloned repo name:
|
||||
```
|
||||
https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/<yourusername>/<yourclonedreponame>
|
||||
|
||||
```
|
||||
Then you will have a local cloned repo where you can contribute and then create pull requests.
|
||||
|
||||
If you already have VS Code and Docker installed, you can use the button above to get started. This will use VSCode to automatically install the Dev Containers extension if needed, clone the source code into a container volume, and spin up a dev container for use.
|
||||
If you already have VS Code and Docker installed, you can use the button above to get started. This will cause VS Code to automatically install the Dev Containers extension if needed, clone the source code into a container volume, and spin up a dev container for use.
|
||||
|
||||
Alternatively you can also follow these steps to open this repo in a container using the VS Code Dev Containers extension:
|
||||
|
||||
@@ -45,5 +40,5 @@ You can learn more in the [Dev Containers documentation](https://code.visualstud
|
||||
|
||||
## Tips and tricks
|
||||
|
||||
- If you are working with the same repository folder in a container and Windows, you'll want consistent line endings (otherwise you may see hundreds of changes in the SCM view). The `.gitattributes` file in the root of this repo will disable line ending conversion and should prevent this. See [tips and tricks](https://code.visualstudio.com/docs/devcontainers/tips-and-tricks#_resolving-git-line-ending-issues-in-containers-resulting-in-many-modified-files) for more info.
|
||||
- If you'd like to review the contents of the image used in this dev container, you can check it out in the [devcontainers/images](https://github.com/devcontainers/images/tree/main/src/python) repo.
|
||||
* If you are working with the same repository folder in a container and Windows, you'll want consistent line endings (otherwise you may see hundreds of changes in the SCM view). The `.gitattributes` file in the root of this repo will disable line ending conversion and should prevent this. See [tips and tricks](https://code.visualstudio.com/docs/devcontainers/tips-and-tricks#_resolving-git-line-ending-issues-in-containers-resulting-in-many-modified-files) for more info.
|
||||
* If you'd like to review the contents of the image used in this dev container, you can check it out in the [devcontainers/images](https://github.com/devcontainers/images/tree/main/src/python) repo.
|
||||
|
||||
@@ -1,58 +1,36 @@
|
||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||
// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-docker-compose
|
||||
{
|
||||
// Name for the dev container
|
||||
"name": "langchain",
|
||||
// Point to a Docker Compose file
|
||||
"dockerComposeFile": "./docker-compose.yaml",
|
||||
// Required when using Docker Compose. The name of the service to connect to once running
|
||||
"service": "langchain",
|
||||
// The optional 'workspaceFolder' property is the path VS Code should open by default when
|
||||
// connected. This is typically a file mount in .devcontainer/docker-compose.yml
|
||||
"workspaceFolder": "/workspaces/langchain",
|
||||
"mounts": [
|
||||
"source=langchain-workspaces,target=/workspaces/langchain,type=volume"
|
||||
],
|
||||
// Prevent the container from shutting down
|
||||
"overrideCommand": true,
|
||||
// Features to add to the dev container. More info: https://containers.dev/features
|
||||
"features": {
|
||||
"ghcr.io/devcontainers/features/git:1": {},
|
||||
"ghcr.io/devcontainers/features/github-cli:1": {}
|
||||
},
|
||||
"containerEnv": {
|
||||
"UV_LINK_MODE": "copy"
|
||||
},
|
||||
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
||||
// "forwardPorts": [],
|
||||
// Run commands after the container is created
|
||||
"postCreateCommand": "cd libs/langchain_v1 && uv sync && echo 'LangChain (Python) dev environment ready!'",
|
||||
// Configure tool-specific properties.
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"ms-python.python",
|
||||
"ms-python.debugpy",
|
||||
"ms-python.mypy-type-checker",
|
||||
"ms-python.isort",
|
||||
"unifiedjs.vscode-mdx",
|
||||
"davidanson.vscode-markdownlint",
|
||||
"ms-toolsai.jupyter",
|
||||
"GitHub.copilot",
|
||||
"GitHub.copilot-chat"
|
||||
],
|
||||
"settings": {
|
||||
"python.defaultInterpreterPath": "libs/langchain_v1/.venv/bin/python",
|
||||
"python.formatting.provider": "none",
|
||||
"[python]": {
|
||||
"editor.formatOnSave": true,
|
||||
"editor.codeActionsOnSave": {
|
||||
"source.organizeImports": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
|
||||
// "remoteUser": "root"
|
||||
// Name for the dev container
|
||||
"name": "langchain",
|
||||
|
||||
// Point to a Docker Compose file
|
||||
"dockerComposeFile": "./docker-compose.yaml",
|
||||
|
||||
// Required when using Docker Compose. The name of the service to connect to once running
|
||||
"service": "langchain",
|
||||
|
||||
// The optional 'workspaceFolder' property is the path VS Code should open by default when
|
||||
// connected. This is typically a file mount in .devcontainer/docker-compose.yml
|
||||
"workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}",
|
||||
|
||||
// Prevent the container from shutting down
|
||||
"overrideCommand": true
|
||||
|
||||
// Features to add to the dev container. More info: https://containers.dev/features
|
||||
// "features": {
|
||||
// "ghcr.io/devcontainers-contrib/features/poetry:2": {}
|
||||
// }
|
||||
|
||||
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
||||
// "forwardPorts": [],
|
||||
|
||||
// Uncomment the next line to run commands after the container is created.
|
||||
// "postCreateCommand": "cat /etc/os-release",
|
||||
|
||||
// Configure tool-specific properties.
|
||||
// "customizations": {},
|
||||
|
||||
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
|
||||
// "remoteUser": "root"
|
||||
}
|
||||
|
||||
@@ -4,10 +4,29 @@ services:
|
||||
build:
|
||||
dockerfile: libs/langchain/dev.Dockerfile
|
||||
context: ..
|
||||
|
||||
volumes:
|
||||
# Update this to wherever you want VS Code to mount the folder of your project
|
||||
- ..:/workspaces:cached
|
||||
networks:
|
||||
- langchain-network
|
||||
- langchain-network
|
||||
# environment:
|
||||
# MONGO_ROOT_USERNAME: root
|
||||
# MONGO_ROOT_PASSWORD: example123
|
||||
# depends_on:
|
||||
# - mongo
|
||||
# mongo:
|
||||
# image: mongo
|
||||
# restart: unless-stopped
|
||||
# environment:
|
||||
# MONGO_INITDB_ROOT_USERNAME: root
|
||||
# MONGO_INITDB_ROOT_PASSWORD: example123
|
||||
# ports:
|
||||
# - "27017:27017"
|
||||
# networks:
|
||||
# - langchain-network
|
||||
|
||||
networks:
|
||||
langchain-network:
|
||||
driver: bridge
|
||||
|
||||
|
||||
|
||||
@@ -1,34 +0,0 @@
|
||||
# Git
|
||||
.git
|
||||
.github
|
||||
|
||||
# Python
|
||||
__pycache__
|
||||
*.pyc
|
||||
*.pyo
|
||||
.venv
|
||||
.mypy_cache
|
||||
.pytest_cache
|
||||
.ruff_cache
|
||||
*.egg-info
|
||||
.tox
|
||||
|
||||
# IDE
|
||||
.idea
|
||||
.vscode
|
||||
|
||||
# Worktree
|
||||
worktree
|
||||
|
||||
# Test artifacts
|
||||
.coverage
|
||||
htmlcov
|
||||
coverage.xml
|
||||
|
||||
# Build artifacts
|
||||
dist
|
||||
build
|
||||
|
||||
# Misc
|
||||
*.log
|
||||
.DS_Store
|
||||
@@ -1,52 +0,0 @@
|
||||
# top-most EditorConfig file
|
||||
root = true
|
||||
|
||||
# All files
|
||||
[*]
|
||||
charset = utf-8
|
||||
end_of_line = lf
|
||||
insert_final_newline = true
|
||||
trim_trailing_whitespace = true
|
||||
|
||||
# Python files
|
||||
[*.py]
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
max_line_length = 88
|
||||
|
||||
# JSON files
|
||||
[*.json]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
|
||||
# YAML files
|
||||
[*.{yml,yaml}]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
|
||||
# Markdown files
|
||||
[*.md]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
trim_trailing_whitespace = false
|
||||
|
||||
# Configuration files
|
||||
[*.{toml,ini,cfg}]
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
|
||||
# Shell scripts
|
||||
[*.sh]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
|
||||
# Makefile
|
||||
[Makefile]
|
||||
indent_style = tab
|
||||
indent_size = 4
|
||||
|
||||
# Jupyter notebooks
|
||||
[*.ipynb]
|
||||
# Jupyter may include trailing whitespace in cell
|
||||
# outputs that's semantically meaningful
|
||||
trim_trailing_whitespace = false
|
||||
3
.github/CODEOWNERS
vendored
3
.github/CODEOWNERS
vendored
@@ -1,3 +0,0 @@
|
||||
/.github/ @ccurme @eyurtsev @mdrxy
|
||||
/libs/core/ @eyurtsev
|
||||
/libs/partners/ @ccurme @mdrxy
|
||||
132
.github/CODE_OF_CONDUCT.md
vendored
Normal file
132
.github/CODE_OF_CONDUCT.md
vendored
Normal file
@@ -0,0 +1,132 @@
|
||||
# Contributor Covenant Code of Conduct
|
||||
|
||||
## Our Pledge
|
||||
|
||||
We as members, contributors, and leaders pledge to make participation in our
|
||||
community a harassment-free experience for everyone, regardless of age, body
|
||||
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
||||
identity and expression, level of experience, education, socio-economic status,
|
||||
nationality, personal appearance, race, caste, color, religion, or sexual
|
||||
identity and orientation.
|
||||
|
||||
We pledge to act and interact in ways that contribute to an open, welcoming,
|
||||
diverse, inclusive, and healthy community.
|
||||
|
||||
## Our Standards
|
||||
|
||||
Examples of behavior that contributes to a positive environment for our
|
||||
community include:
|
||||
|
||||
* Demonstrating empathy and kindness toward other people
|
||||
* Being respectful of differing opinions, viewpoints, and experiences
|
||||
* Giving and gracefully accepting constructive feedback
|
||||
* Accepting responsibility and apologizing to those affected by our mistakes,
|
||||
and learning from the experience
|
||||
* Focusing on what is best not just for us as individuals, but for the overall
|
||||
community
|
||||
|
||||
Examples of unacceptable behavior include:
|
||||
|
||||
* The use of sexualized language or imagery, and sexual attention or advances of
|
||||
any kind
|
||||
* Trolling, insulting or derogatory comments, and personal or political attacks
|
||||
* Public or private harassment
|
||||
* Publishing others' private information, such as a physical or email address,
|
||||
without their explicit permission
|
||||
* Other conduct which could reasonably be considered inappropriate in a
|
||||
professional setting
|
||||
|
||||
## Enforcement Responsibilities
|
||||
|
||||
Community leaders are responsible for clarifying and enforcing our standards of
|
||||
acceptable behavior and will take appropriate and fair corrective action in
|
||||
response to any behavior that they deem inappropriate, threatening, offensive,
|
||||
or harmful.
|
||||
|
||||
Community leaders have the right and responsibility to remove, edit, or reject
|
||||
comments, commits, code, wiki edits, issues, and other contributions that are
|
||||
not aligned to this Code of Conduct, and will communicate reasons for moderation
|
||||
decisions when appropriate.
|
||||
|
||||
## Scope
|
||||
|
||||
This Code of Conduct applies within all community spaces, and also applies when
|
||||
an individual is officially representing the community in public spaces.
|
||||
Examples of representing our community include using an official e-mail address,
|
||||
posting via an official social media account, or acting as an appointed
|
||||
representative at an online or offline event.
|
||||
|
||||
## Enforcement
|
||||
|
||||
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
||||
reported to the community leaders responsible for enforcement at
|
||||
conduct@langchain.dev.
|
||||
All complaints will be reviewed and investigated promptly and fairly.
|
||||
|
||||
All community leaders are obligated to respect the privacy and security of the
|
||||
reporter of any incident.
|
||||
|
||||
## Enforcement Guidelines
|
||||
|
||||
Community leaders will follow these Community Impact Guidelines in determining
|
||||
the consequences for any action they deem in violation of this Code of Conduct:
|
||||
|
||||
### 1. Correction
|
||||
|
||||
**Community Impact**: Use of inappropriate language or other behavior deemed
|
||||
unprofessional or unwelcome in the community.
|
||||
|
||||
**Consequence**: A private, written warning from community leaders, providing
|
||||
clarity around the nature of the violation and an explanation of why the
|
||||
behavior was inappropriate. A public apology may be requested.
|
||||
|
||||
### 2. Warning
|
||||
|
||||
**Community Impact**: A violation through a single incident or series of
|
||||
actions.
|
||||
|
||||
**Consequence**: A warning with consequences for continued behavior. No
|
||||
interaction with the people involved, including unsolicited interaction with
|
||||
those enforcing the Code of Conduct, for a specified period of time. This
|
||||
includes avoiding interactions in community spaces as well as external channels
|
||||
like social media. Violating these terms may lead to a temporary or permanent
|
||||
ban.
|
||||
|
||||
### 3. Temporary Ban
|
||||
|
||||
**Community Impact**: A serious violation of community standards, including
|
||||
sustained inappropriate behavior.
|
||||
|
||||
**Consequence**: A temporary ban from any sort of interaction or public
|
||||
communication with the community for a specified period of time. No public or
|
||||
private interaction with the people involved, including unsolicited interaction
|
||||
with those enforcing the Code of Conduct, is allowed during this period.
|
||||
Violating these terms may lead to a permanent ban.
|
||||
|
||||
### 4. Permanent Ban
|
||||
|
||||
**Community Impact**: Demonstrating a pattern of violation of community
|
||||
standards, including sustained inappropriate behavior, harassment of an
|
||||
individual, or aggression toward or disparagement of classes of individuals.
|
||||
|
||||
**Consequence**: A permanent ban from any sort of public interaction within the
|
||||
community.
|
||||
|
||||
## Attribution
|
||||
|
||||
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
||||
version 2.1, available at
|
||||
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
|
||||
|
||||
Community Impact Guidelines were inspired by
|
||||
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
|
||||
|
||||
For answers to common questions about this code of conduct, see the FAQ at
|
||||
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
|
||||
[https://www.contributor-covenant.org/translations][translations].
|
||||
|
||||
[homepage]: https://www.contributor-covenant.org
|
||||
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
|
||||
[Mozilla CoC]: https://github.com/mozilla/diversity
|
||||
[FAQ]: https://www.contributor-covenant.org/faq
|
||||
[translations]: https://www.contributor-covenant.org/translations
|
||||
344
.github/CONTRIBUTING.md
vendored
Normal file
344
.github/CONTRIBUTING.md
vendored
Normal file
@@ -0,0 +1,344 @@
|
||||
# Contributing to LangChain
|
||||
|
||||
Hi there! Thank you for even being interested in contributing to LangChain.
|
||||
As an open-source project in a rapidly developing field, we are extremely open to contributions, whether they involve new features, improved infrastructure, better documentation, or bug fixes.
|
||||
|
||||
## 🗺️ Guidelines
|
||||
|
||||
### 👩💻 Contributing Code
|
||||
|
||||
To contribute to this project, please follow the ["fork and pull request"](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) workflow.
|
||||
Please do not try to push directly to this repo unless you are a maintainer.
|
||||
|
||||
Please follow the checked-in pull request template when opening pull requests. Note related issues and tag relevant
|
||||
maintainers.
|
||||
|
||||
Pull requests cannot land without passing the formatting, linting, and testing checks first. See [Testing](#testing) and
|
||||
[Formatting and Linting](#formatting-and-linting) for how to run these checks locally.
|
||||
|
||||
It's essential that we maintain great documentation and testing. If you:
|
||||
- Fix a bug
|
||||
- Add a relevant unit or integration test when possible. These live in `tests/unit_tests` and `tests/integration_tests`.
|
||||
- Make an improvement
|
||||
- Update any affected example notebooks and documentation. These live in `docs`.
|
||||
- Update unit and integration tests when relevant.
|
||||
- Add a feature
|
||||
- Add a demo notebook in `docs/docs/`.
|
||||
- Add unit and integration tests.
|
||||
|
||||
We are a small, progress-oriented team. If there's something you'd like to add or change, opening a pull request is the
|
||||
best way to get our attention.
|
||||
|
||||
### 🚩GitHub Issues
|
||||
|
||||
Our [issues](https://github.com/langchain-ai/langchain/issues) page is kept up to date with bugs, improvements, and feature requests.
|
||||
|
||||
There is a taxonomy of labels to help with sorting and discovery of issues of interest. Please use these to help organize issues.
|
||||
|
||||
If you start working on an issue, please assign it to yourself.
|
||||
|
||||
If you are adding an issue, please try to keep it focused on a single, modular bug/improvement/feature.
|
||||
If two issues are related, or blocking, please link them rather than combining them.
|
||||
|
||||
We will try to keep these issues as up-to-date as possible, though
|
||||
with the rapid rate of development in this field some may get out of date.
|
||||
If you notice this happening, please let us know.
|
||||
|
||||
### 🙋Getting Help
|
||||
|
||||
Our goal is to have the simplest developer setup possible. Should you experience any difficulty getting setup, please
|
||||
contact a maintainer! Not only do we want to help get you unblocked, but we also want to make sure that the process is
|
||||
smooth for future contributors.
|
||||
|
||||
In a similar vein, we do enforce certain linting, formatting, and documentation standards in the codebase.
|
||||
If you are finding these difficult (or even just annoying) to work with, feel free to contact a maintainer for help -
|
||||
we do not want these to get in the way of getting good code into the codebase.
|
||||
|
||||
## 🚀 Quick Start
|
||||
|
||||
This quick start guide explains how to run the repository locally.
|
||||
For a [development container](https://containers.dev/), see the [.devcontainer folder](https://github.com/langchain-ai/langchain/tree/master/.devcontainer).
|
||||
|
||||
### Dependency Management: Poetry and other env/dependency managers
|
||||
|
||||
This project utilizes [Poetry](https://python-poetry.org/) v1.6.1+ as a dependency manager.
|
||||
|
||||
❗Note: *Before installing Poetry*, if you use `Conda`, create and activate a new Conda env (e.g. `conda create -n langchain python=3.9`)
|
||||
|
||||
Install Poetry: **[documentation on how to install it](https://python-poetry.org/docs/#installation)**.
|
||||
|
||||
❗Note: If you use `Conda` or `Pyenv` as your environment/package manager, after installing Poetry,
|
||||
tell Poetry to use the virtualenv python environment (`poetry config virtualenvs.prefer-active-python true`)
|
||||
|
||||
### Core vs. Experimental
|
||||
|
||||
This repository contains three separate projects:
|
||||
- `langchain`: core langchain code, abstractions, and use cases.
|
||||
- `langchain_core`: contain interfaces for key abstractions as well as logic for combining them in chains (LCEL).
|
||||
- `langchain_experimental`: see the [Experimental README](https://github.com/langchain-ai/langchain/tree/master/libs/experimental/README.md) for more information.
|
||||
|
||||
Each of these has its own development environment. Docs are run from the top-level makefile, but development
|
||||
is split across separate test & release flows.
|
||||
|
||||
For this quickstart, start with langchain core:
|
||||
|
||||
```bash
|
||||
cd libs/langchain
|
||||
```
|
||||
|
||||
### Local Development Dependencies
|
||||
|
||||
Install langchain development requirements (for running langchain, running examples, linting, formatting, tests, and coverage):
|
||||
|
||||
```bash
|
||||
poetry install --with test
|
||||
```
|
||||
|
||||
Then verify dependency installation:
|
||||
|
||||
```bash
|
||||
make test
|
||||
```
|
||||
|
||||
If the tests don't pass, you may need to pip install additional dependencies, such as `numexpr` and `openapi_schema_pydantic`.
|
||||
|
||||
If during installation you receive a `WheelFileValidationError` for `debugpy`, please make sure you are running
|
||||
Poetry v1.6.1+. This bug was present in older versions of Poetry (e.g. 1.4.1) and has been resolved in newer releases.
|
||||
If you are still seeing this bug on v1.6.1, you may also try disabling "modern installation"
|
||||
(`poetry config installer.modern-installation false`) and re-installing requirements.
|
||||
See [this `debugpy` issue](https://github.com/microsoft/debugpy/issues/1246) for more details.
|
||||
|
||||
### Testing
|
||||
|
||||
_some test dependencies are optional; see section about optional dependencies_.
|
||||
|
||||
Unit tests cover modular logic that does not require calls to outside APIs.
|
||||
If you add new logic, please add a unit test.
|
||||
|
||||
To run unit tests:
|
||||
|
||||
```bash
|
||||
make test
|
||||
```
|
||||
|
||||
To run unit tests in Docker:
|
||||
|
||||
```bash
|
||||
make docker_tests
|
||||
```
|
||||
|
||||
There are also [integration tests and code-coverage](https://github.com/langchain-ai/langchain/tree/master/libs/langchain/tests/README.md) available.
|
||||
|
||||
### Only develop langchain_core or langchain_experimental
|
||||
|
||||
If you are only developing `langchain_core` or `langchain_experimental`, you can simply install the dependencies for the respective projects and run tests:
|
||||
|
||||
```bash
|
||||
cd libs/core
|
||||
poetry install --with test
|
||||
make test
|
||||
```
|
||||
|
||||
Or:
|
||||
|
||||
```bash
|
||||
cd libs/experimental
|
||||
poetry install --with test
|
||||
make test
|
||||
```
|
||||
|
||||
### Formatting and Linting
|
||||
|
||||
Run these locally before submitting a PR; the CI system will check also.
|
||||
|
||||
#### Code Formatting
|
||||
|
||||
Formatting for this project is done via [ruff](https://docs.astral.sh/ruff/rules/).
|
||||
|
||||
To run formatting for docs, cookbook and templates:
|
||||
|
||||
```bash
|
||||
make format
|
||||
```
|
||||
|
||||
To run formatting for a library, run the same command from the relevant library directory:
|
||||
|
||||
```bash
|
||||
cd libs/{LIBRARY}
|
||||
make format
|
||||
```
|
||||
|
||||
Additionally, you can run the formatter only on the files that have been modified in your current branch as compared to the master branch using the format_diff command:
|
||||
|
||||
```bash
|
||||
make format_diff
|
||||
```
|
||||
|
||||
This is especially useful when you have made changes to a subset of the project and want to ensure your changes are properly formatted without affecting the rest of the codebase.
|
||||
|
||||
#### Linting
|
||||
|
||||
Linting for this project is done via a combination of [ruff](https://docs.astral.sh/ruff/rules/) and [mypy](http://mypy-lang.org/).
|
||||
|
||||
To run linting for docs, cookbook and templates:
|
||||
|
||||
```bash
|
||||
make lint
|
||||
```
|
||||
|
||||
To run linting for a library, run the same command from the relevant library directory:
|
||||
|
||||
```bash
|
||||
cd libs/{LIBRARY}
|
||||
make lint
|
||||
```
|
||||
|
||||
In addition, you can run the linter only on the files that have been modified in your current branch as compared to the master branch using the lint_diff command:
|
||||
|
||||
```bash
|
||||
make lint_diff
|
||||
```
|
||||
|
||||
This can be very helpful when you've made changes to only certain parts of the project and want to ensure your changes meet the linting standards without having to check the entire codebase.
|
||||
|
||||
We recognize linting can be annoying - if you do not want to do it, please contact a project maintainer, and they can help you with it. We do not want this to be a blocker for good code getting contributed.
|
||||
|
||||
#### Spellcheck
|
||||
|
||||
Spellchecking for this project is done via [codespell](https://github.com/codespell-project/codespell).
|
||||
Note that `codespell` finds common typos, so it could have false-positive (correctly spelled but rarely used) and false-negatives (not finding misspelled) words.
|
||||
|
||||
To check spelling for this project:
|
||||
|
||||
```bash
|
||||
make spell_check
|
||||
```
|
||||
|
||||
To fix spelling in place:
|
||||
|
||||
```bash
|
||||
make spell_fix
|
||||
```
|
||||
|
||||
If codespell is incorrectly flagging a word, you can skip spellcheck for that word by adding it to the codespell config in the `pyproject.toml` file.
|
||||
|
||||
```python
|
||||
[tool.codespell]
|
||||
...
|
||||
# Add here:
|
||||
ignore-words-list = 'momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogyny,unsecure'
|
||||
```
|
||||
|
||||
## Working with Optional Dependencies
|
||||
|
||||
Langchain relies heavily on optional dependencies to keep the Langchain package lightweight.
|
||||
|
||||
You only need to add a new dependency if a **unit test** relies on the package.
|
||||
If your package is only required for **integration tests**, then you can skip these
|
||||
steps and leave all pyproject.toml and poetry.lock files alone.
|
||||
|
||||
If you're adding a new dependency to Langchain, assume that it will be an optional dependency, and
|
||||
that most users won't have it installed.
|
||||
|
||||
Users who do not have the dependency installed should be able to **import** your code without
|
||||
any side effects (no warnings, no errors, no exceptions).
|
||||
|
||||
To introduce the dependency to the pyproject.toml file correctly, please do the following:
|
||||
|
||||
1. Add the dependency to the main group as an optional dependency
|
||||
```bash
|
||||
poetry add --optional [package_name]
|
||||
```
|
||||
2. Open pyproject.toml and add the dependency to the `extended_testing` extra
|
||||
3. Relock the poetry file to update the extra.
|
||||
```bash
|
||||
poetry lock --no-update
|
||||
```
|
||||
4. Add a unit test that the very least attempts to import the new code. Ideally, the unit
|
||||
test makes use of lightweight fixtures to test the logic of the code.
|
||||
5. Please use the `@pytest.mark.requires(package_name)` decorator for any tests that require the dependency.
|
||||
|
||||
## Adding a Jupyter Notebook
|
||||
|
||||
If you are adding a Jupyter Notebook example, you'll want to install the optional `dev` dependencies.
|
||||
|
||||
To install dev dependencies:
|
||||
|
||||
```bash
|
||||
poetry install --with dev
|
||||
```
|
||||
|
||||
Launch a notebook:
|
||||
|
||||
```bash
|
||||
poetry run jupyter notebook
|
||||
```
|
||||
|
||||
When you run `poetry install`, the `langchain` package is installed as editable in the virtualenv, so your new logic can be imported into the notebook.
|
||||
|
||||
## Documentation
|
||||
|
||||
While the code is split between `langchain` and `langchain.experimental`, the documentation is one holistic thing.
|
||||
This covers how to get started contributing to documentation.
|
||||
|
||||
From the top-level of this repo, install documentation dependencies:
|
||||
|
||||
```bash
|
||||
poetry install
|
||||
```
|
||||
|
||||
### Contribute Documentation
|
||||
|
||||
The docs directory contains Documentation and API Reference.
|
||||
|
||||
Documentation is built using [Docusaurus 2](https://docusaurus.io/).
|
||||
|
||||
API Reference are largely autogenerated by [sphinx](https://www.sphinx-doc.org/en/master/) from the code.
|
||||
For that reason, we ask that you add good documentation to all classes and methods.
|
||||
|
||||
Similar to linting, we recognize documentation can be annoying. If you do not want to do it, please contact a project maintainer, and they can help you with it. We do not want this to be a blocker for good code getting contributed.
|
||||
|
||||
### Build Documentation Locally
|
||||
|
||||
In the following commands, the prefix `api_` indicates that those are operations for the API Reference.
|
||||
|
||||
Before building the documentation, it is always a good idea to clean the build directory:
|
||||
|
||||
```bash
|
||||
make docs_clean
|
||||
make api_docs_clean
|
||||
```
|
||||
|
||||
Next, you can build the documentation as outlined below:
|
||||
|
||||
```bash
|
||||
make docs_build
|
||||
make api_docs_build
|
||||
```
|
||||
|
||||
Finally, run the link checker to ensure all links are valid:
|
||||
|
||||
```bash
|
||||
make docs_linkcheck
|
||||
make api_docs_linkcheck
|
||||
```
|
||||
|
||||
### Verify Documentation changes
|
||||
|
||||
After pushing documentation changes to the repository, you can preview and verify that the changes are
|
||||
what you wanted by clicking the `View deployment` or `Visit Preview` buttons on the pull request `Conversation` page.
|
||||
This will take you to a preview of the documentation changes.
|
||||
This preview is created by [Vercel](https://vercel.com/docs/getting-started-with-vercel).
|
||||
|
||||
## 🏭 Release Process
|
||||
|
||||
As of now, LangChain has an ad hoc release process: releases are cut with high frequency by
|
||||
a developer and published to [PyPI](https://pypi.org/project/langchain/).
|
||||
|
||||
LangChain follows the [semver](https://semver.org/) versioning standard. However, as pre-1.0 software,
|
||||
even patch releases may contain [non-backwards-compatible changes](https://semver.org/#spec-item-4).
|
||||
|
||||
### 🌟 Recognition
|
||||
|
||||
If your contribution has made its way into a release, we will want to give you credit on Twitter (only if you want though)!
|
||||
If you have a Twitter account you would like us to mention, please let us know in the PR or through another means.
|
||||
215
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
215
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
@@ -1,153 +1,106 @@
|
||||
name: "\U0001F41B Bug Report"
|
||||
description: Report a bug in LangChain. To report a security issue, please instead use the security option (below). For questions, please use the LangChain forum (below).
|
||||
labels: ["bug"]
|
||||
type: bug
|
||||
description: Submit a bug report to help us improve LangChain. To report a security issue, please instead use the security option below.
|
||||
labels: ["02 Bug Report"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
> **All contributions must be in English.** See the [language policy](https://docs.langchain.com/oss/python/contributing/overview#language-policy).
|
||||
value: >
|
||||
Thank you for taking the time to file a bug report. Before creating a new
|
||||
issue, please make sure to take a few moments to check the issue tracker
|
||||
for existing issues about the bug.
|
||||
|
||||
Thank you for taking the time to file a bug report.
|
||||
|
||||
For usage questions, feature requests and general design questions, please use the [LangChain Forum](https://forum.langchain.com/).
|
||||
|
||||
Check these before submitting to see if your issue has already been reported, fixed or if there's another way to solve your problem:
|
||||
|
||||
* [Documentation](https://docs.langchain.com/oss/python/langchain/overview),
|
||||
* [API Reference Documentation](https://reference.langchain.com/python/),
|
||||
* [LangChain ChatBot](https://chat.langchain.com/)
|
||||
* [GitHub search](https://github.com/langchain-ai/langchain),
|
||||
* [LangChain Forum](https://forum.langchain.com/),
|
||||
- type: checkboxes
|
||||
id: checks
|
||||
attributes:
|
||||
label: Checked other resources
|
||||
description: Please confirm and check all the following options.
|
||||
options:
|
||||
- label: This is a bug, not a usage question.
|
||||
required: true
|
||||
- label: I added a clear and descriptive title that summarizes this issue.
|
||||
required: true
|
||||
- label: I used the GitHub search to find a similar question and didn't find it.
|
||||
required: true
|
||||
- label: I am sure that this is a bug in LangChain rather than my code.
|
||||
required: true
|
||||
- label: The bug is not resolved by updating to the latest stable version of LangChain (or the specific integration package).
|
||||
required: true
|
||||
- label: This is not related to the langchain-community package.
|
||||
required: true
|
||||
- label: I posted a self-contained, minimal, reproducible example. A maintainer can copy it and run it AS IS.
|
||||
required: true
|
||||
- type: checkboxes
|
||||
id: package
|
||||
attributes:
|
||||
label: Package (Required)
|
||||
description: |
|
||||
Which `langchain` package(s) is this bug related to? Select at least one.
|
||||
|
||||
Note that if the package you are reporting for is not listed here, it is not in this repository (e.g. `langchain-google-genai` is in [`langchain-ai/langchain-google`](https://github.com/langchain-ai/langchain-google/)).
|
||||
|
||||
Please report issues for other packages to their respective repositories.
|
||||
options:
|
||||
- label: langchain
|
||||
- label: langchain-openai
|
||||
- label: langchain-anthropic
|
||||
- label: langchain-classic
|
||||
- label: langchain-core
|
||||
- label: langchain-model-profiles
|
||||
- label: langchain-tests
|
||||
- label: langchain-text-splitters
|
||||
- label: langchain-chroma
|
||||
- label: langchain-deepseek
|
||||
- label: langchain-exa
|
||||
- label: langchain-fireworks
|
||||
- label: langchain-groq
|
||||
- label: langchain-huggingface
|
||||
- label: langchain-mistralai
|
||||
- label: langchain-nomic
|
||||
- label: langchain-ollama
|
||||
- label: langchain-openrouter
|
||||
- label: langchain-perplexity
|
||||
- label: langchain-qdrant
|
||||
- label: langchain-xai
|
||||
- label: Other / not sure / general
|
||||
- type: textarea
|
||||
id: related
|
||||
validations:
|
||||
required: false
|
||||
id: system-info
|
||||
attributes:
|
||||
label: Related Issues / PRs
|
||||
label: System Info
|
||||
description: Please share your system info with us.
|
||||
placeholder: LangChain version, platform, python version, ...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: who-can-help
|
||||
attributes:
|
||||
label: Who can help?
|
||||
description: |
|
||||
If this bug is related to any existing issues or pull requests, please link them here.
|
||||
placeholder: |
|
||||
* e.g. #123, #456
|
||||
Your issue will be replied to more quickly if you can figure out the right person to tag with @
|
||||
If you know how to use git blame, that is the easiest way, otherwise, here is a rough guide of **who to tag**.
|
||||
|
||||
The core maintainers strive to read all issues, but tagging them will help them prioritize.
|
||||
|
||||
Please tag fewer than 3 people.
|
||||
|
||||
@hwchase17 - project lead
|
||||
|
||||
Tracing / Callbacks
|
||||
- @agola11
|
||||
|
||||
Async
|
||||
- @agola11
|
||||
|
||||
DataLoader Abstractions
|
||||
- @eyurtsev
|
||||
|
||||
LLM/Chat Wrappers
|
||||
- @hwchase17
|
||||
- @agola11
|
||||
|
||||
Tools / Toolkits
|
||||
- ...
|
||||
|
||||
placeholder: "@Username ..."
|
||||
|
||||
- type: checkboxes
|
||||
id: information-scripts-examples
|
||||
attributes:
|
||||
label: Information
|
||||
description: "The problem arises when using:"
|
||||
options:
|
||||
- label: "The official example notebooks/scripts"
|
||||
- label: "My own modified scripts"
|
||||
|
||||
- type: checkboxes
|
||||
id: related-components
|
||||
attributes:
|
||||
label: Related Components
|
||||
description: "Select the components related to the issue (if applicable):"
|
||||
options:
|
||||
- label: "LLMs/Chat Models"
|
||||
- label: "Embedding Models"
|
||||
- label: "Prompts / Prompt Templates / Prompt Selectors"
|
||||
- label: "Output Parsers"
|
||||
- label: "Document Loaders"
|
||||
- label: "Vector Stores / Retrievers"
|
||||
- label: "Memory"
|
||||
- label: "Agents / Agent Executors"
|
||||
- label: "Tools / Toolkits"
|
||||
- label: "Chains"
|
||||
- label: "Callbacks/Tracing"
|
||||
- label: "Async"
|
||||
|
||||
- type: textarea
|
||||
id: reproduction
|
||||
validations:
|
||||
required: true
|
||||
attributes:
|
||||
label: Reproduction Steps / Example Code (Python)
|
||||
label: Reproduction
|
||||
description: |
|
||||
Please add a self-contained, [minimal, reproducible, example](https://stackoverflow.com/help/minimal-reproducible-example) with your use case.
|
||||
Please provide a [code sample](https://stackoverflow.com/help/minimal-reproducible-example) that reproduces the problem you ran into. It can be a Colab link or just a code snippet.
|
||||
If you have code snippets, error messages, stack traces please provide them here as well.
|
||||
Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting
|
||||
Avoid screenshots when possible, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
|
||||
|
||||
If a maintainer can copy it, run it, and see it right away, there's a much higher chance that you'll be able to get help.
|
||||
|
||||
**Important!**
|
||||
|
||||
* Avoid screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
|
||||
* Reduce your code to the minimum required to reproduce the issue if possible.
|
||||
|
||||
(This will be automatically formatted into code, so no need for backticks.)
|
||||
render: python
|
||||
placeholder: |
|
||||
from langchain_core.runnables import RunnableLambda
|
||||
Steps to reproduce the behavior:
|
||||
|
||||
def bad_code(inputs) -> int:
|
||||
raise NotImplementedError('For demo purpose')
|
||||
1.
|
||||
2.
|
||||
3.
|
||||
|
||||
chain = RunnableLambda(bad_code)
|
||||
chain.invoke('Hello!')
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Error Message and Stack Trace (if applicable)
|
||||
description: |
|
||||
If you are reporting an error, please copy and paste the full error message and
|
||||
stack trace.
|
||||
(This will be automatically formatted into code, so no need for backticks.)
|
||||
render: shell
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
label: Description
|
||||
description: |
|
||||
What is the problem, question, or error?
|
||||
|
||||
Write a short description telling what you are doing, what you expect to happen, and what is currently happening.
|
||||
placeholder: |
|
||||
* I'm trying to use the `langchain` library to do X.
|
||||
* I expect to see Y.
|
||||
* Instead, it does Z.
|
||||
id: expected-behavior
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: system-info
|
||||
attributes:
|
||||
label: System Info
|
||||
description: |
|
||||
Please share your system info with us.
|
||||
|
||||
Run the following command in your terminal and paste the output here:
|
||||
|
||||
`python -m langchain_core.sys_info`
|
||||
|
||||
or if you have an existing python interpreter running:
|
||||
|
||||
```python
|
||||
from langchain_core import sys_info
|
||||
sys_info.print_sys_info()
|
||||
```
|
||||
placeholder: |
|
||||
python -m langchain_core.sys_info
|
||||
validations:
|
||||
required: true
|
||||
label: Expected behavior
|
||||
description: "A clear and concise description of what you would expect to happen."
|
||||
|
||||
17
.github/ISSUE_TEMPLATE/config.yml
vendored
17
.github/ISSUE_TEMPLATE/config.yml
vendored
@@ -1,15 +1,6 @@
|
||||
blank_issues_enabled: false
|
||||
blank_issues_enabled: true
|
||||
version: 2.1
|
||||
contact_links:
|
||||
- name: 💬 LangChain Forum
|
||||
url: https://forum.langchain.com/
|
||||
about: General community discussions and support
|
||||
- name: 📚 LangChain Documentation
|
||||
url: https://docs.langchain.com/oss/python/langchain/overview
|
||||
about: View the official LangChain documentation
|
||||
- name: 📚 API Reference Documentation
|
||||
url: https://reference.langchain.com/python/
|
||||
about: View the official LangChain API reference documentation
|
||||
- name: 📚 Documentation issue
|
||||
url: https://github.com/langchain-ai/docs/issues/new?template=01-langchain.yml
|
||||
about: Report an issue related to the LangChain documentation
|
||||
- name: Discord
|
||||
url: https://discord.gg/6adMQxSpJS
|
||||
about: General community discussions
|
||||
|
||||
19
.github/ISSUE_TEMPLATE/documentation.yml
vendored
Normal file
19
.github/ISSUE_TEMPLATE/documentation.yml
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
name: Documentation
|
||||
description: Report an issue related to the LangChain documentation.
|
||||
title: "DOC: <Please write a comprehensive title after the 'DOC: ' prefix>"
|
||||
labels: [03 - Documentation]
|
||||
|
||||
body:
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: "Issue with current documentation:"
|
||||
description: >
|
||||
Please make sure to leave a reference to the document/code you're
|
||||
referring to.
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: "Idea or request for content:"
|
||||
description: >
|
||||
Please describe as clearly as possible what topics you think are missing
|
||||
from the current documentation.
|
||||
151
.github/ISSUE_TEMPLATE/feature-request.yml
vendored
151
.github/ISSUE_TEMPLATE/feature-request.yml
vendored
@@ -1,155 +1,30 @@
|
||||
name: "✨ Feature Request"
|
||||
description: Request a new feature or enhancement for LangChain. For questions, please use the LangChain forum (below).
|
||||
labels: ["feature request"]
|
||||
type: feature
|
||||
name: "\U0001F680 Feature request"
|
||||
description: Submit a proposal/request for a new LangChain feature
|
||||
labels: ["02 Feature Request"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
> **All contributions must be in English.** See the [language policy](https://docs.langchain.com/oss/python/contributing/overview#language-policy).
|
||||
|
||||
Thank you for taking the time to request a new feature.
|
||||
|
||||
Use this to request NEW FEATURES or ENHANCEMENTS in LangChain. For bug reports, please use the bug report template. For usage questions and general design questions, please use the [LangChain Forum](https://forum.langchain.com/).
|
||||
|
||||
Relevant links to check before filing a feature request to see if your request has already been made or
|
||||
if there's another way to achieve what you want:
|
||||
|
||||
* [Documentation](https://docs.langchain.com/oss/python/langchain/overview),
|
||||
* [API Reference Documentation](https://reference.langchain.com/python/),
|
||||
* [LangChain ChatBot](https://chat.langchain.com/)
|
||||
* [GitHub search](https://github.com/langchain-ai/langchain),
|
||||
* [LangChain Forum](https://forum.langchain.com/),
|
||||
|
||||
**Note:** Do not begin work on a PR unless explicitly assigned to this issue by a maintainer.
|
||||
- type: checkboxes
|
||||
id: checks
|
||||
attributes:
|
||||
label: Checked other resources
|
||||
description: Please confirm and check all the following options.
|
||||
options:
|
||||
- label: This is a feature request, not a bug report or usage question.
|
||||
required: true
|
||||
- label: I added a clear and descriptive title that summarizes the feature request.
|
||||
required: true
|
||||
- label: I used the GitHub search to find a similar feature request and didn't find it.
|
||||
required: true
|
||||
- label: I checked the LangChain documentation and API reference to see if this feature already exists.
|
||||
required: true
|
||||
- label: This is not related to the langchain-community package.
|
||||
required: true
|
||||
- type: checkboxes
|
||||
id: package
|
||||
attributes:
|
||||
label: Package (Required)
|
||||
description: |
|
||||
Which `langchain` package(s) is this request related to? Select at least one.
|
||||
|
||||
Note that if the package you are requesting for is not listed here, it is not in this repository (e.g. `langchain-google-genai` is in `langchain-ai/langchain`).
|
||||
|
||||
Please submit feature requests for other packages to their respective repositories.
|
||||
options:
|
||||
- label: langchain
|
||||
- label: langchain-openai
|
||||
- label: langchain-anthropic
|
||||
- label: langchain-classic
|
||||
- label: langchain-core
|
||||
- label: langchain-model-profiles
|
||||
- label: langchain-tests
|
||||
- label: langchain-text-splitters
|
||||
- label: langchain-chroma
|
||||
- label: langchain-deepseek
|
||||
- label: langchain-exa
|
||||
- label: langchain-fireworks
|
||||
- label: langchain-groq
|
||||
- label: langchain-huggingface
|
||||
- label: langchain-mistralai
|
||||
- label: langchain-nomic
|
||||
- label: langchain-ollama
|
||||
- label: langchain-openrouter
|
||||
- label: langchain-perplexity
|
||||
- label: langchain-qdrant
|
||||
- label: langchain-xai
|
||||
- label: Other / not sure / general
|
||||
- type: textarea
|
||||
id: feature-description
|
||||
id: feature-request
|
||||
validations:
|
||||
required: true
|
||||
attributes:
|
||||
label: Feature Description
|
||||
label: Feature request
|
||||
description: |
|
||||
Please provide a clear and concise description of the feature you would like to see added to LangChain.
|
||||
A clear and concise description of the feature proposal. Please provide links to any relevant GitHub repos, papers, or other resources if relevant.
|
||||
|
||||
What specific functionality are you requesting? Be as detailed as possible.
|
||||
placeholder: |
|
||||
I would like LangChain to support...
|
||||
|
||||
This feature would allow users to...
|
||||
- type: textarea
|
||||
id: use-case
|
||||
id: motivation
|
||||
validations:
|
||||
required: true
|
||||
attributes:
|
||||
label: Use Case
|
||||
label: Motivation
|
||||
description: |
|
||||
Describe the specific use case or problem this feature would solve.
|
||||
Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too.
|
||||
|
||||
Why do you need this feature? What problem does it solve for you or other users?
|
||||
placeholder: |
|
||||
I'm trying to build an application that...
|
||||
|
||||
Currently, I have to work around this by...
|
||||
|
||||
This feature would help me/users to...
|
||||
- type: textarea
|
||||
id: proposed-solution
|
||||
id: contribution
|
||||
validations:
|
||||
required: false
|
||||
required: true
|
||||
attributes:
|
||||
label: Proposed Solution
|
||||
label: Your contribution
|
||||
description: |
|
||||
If you have ideas about how this feature could be implemented, please describe them here.
|
||||
|
||||
This is optional but can be helpful for maintainers to understand your vision.
|
||||
placeholder: |
|
||||
I think this could be implemented by...
|
||||
|
||||
The API could look like...
|
||||
|
||||
```python
|
||||
# Example of how the feature might work
|
||||
```
|
||||
- type: textarea
|
||||
id: alternatives
|
||||
validations:
|
||||
required: false
|
||||
attributes:
|
||||
label: Alternatives Considered
|
||||
description: |
|
||||
Have you considered any alternative solutions or workarounds?
|
||||
|
||||
What other approaches have you tried or considered?
|
||||
placeholder: |
|
||||
I've tried using...
|
||||
|
||||
Alternative approaches I considered:
|
||||
1. ...
|
||||
2. ...
|
||||
|
||||
But these don't work because...
|
||||
- type: textarea
|
||||
id: additional-context
|
||||
validations:
|
||||
required: false
|
||||
attributes:
|
||||
label: Additional Context
|
||||
description: |
|
||||
Add any other context, screenshots, examples, or references that would help explain your feature request.
|
||||
placeholder: |
|
||||
Related issues: #...
|
||||
|
||||
Similar features in other libraries:
|
||||
- ...
|
||||
|
||||
Additional context or examples:
|
||||
- ...
|
||||
Is there any way that you could help, e.g. by submitting a PR? Make sure to read the CONTRIBUTING.MD [readme](https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md)
|
||||
|
||||
18
.github/ISSUE_TEMPLATE/other.yml
vendored
Normal file
18
.github/ISSUE_TEMPLATE/other.yml
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
name: Other Issue
|
||||
description: Raise an issue that wouldn't be covered by the other templates.
|
||||
title: "Issue: <Please write a comprehensive title after the 'Issue: ' prefix>"
|
||||
labels: [04 - Other]
|
||||
|
||||
body:
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: "Issue you'd like to raise."
|
||||
description: >
|
||||
Please describe the issue you'd like to raise as clearly as possible.
|
||||
Make sure to include any relevant links or references.
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: "Suggestion:"
|
||||
description: >
|
||||
Please outline a suggestion to improve the issue here.
|
||||
49
.github/ISSUE_TEMPLATE/privileged.yml
vendored
49
.github/ISSUE_TEMPLATE/privileged.yml
vendored
@@ -1,49 +0,0 @@
|
||||
name: 🔒 Privileged
|
||||
description: You are a LangChain maintainer, or was asked directly by a maintainer to create an issue here. If not, check the other options.
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
If you are not a LangChain maintainer, employee, or were not asked directly by a maintainer to create an issue, then please start the conversation on the [LangChain Forum](https://forum.langchain.com/) instead.
|
||||
- type: checkboxes
|
||||
id: privileged
|
||||
attributes:
|
||||
label: Privileged issue
|
||||
description: Confirm that you are allowed to create an issue here.
|
||||
options:
|
||||
- label: I am a LangChain maintainer, or was asked directly by a LangChain maintainer to create an issue here.
|
||||
required: true
|
||||
- type: textarea
|
||||
id: content
|
||||
attributes:
|
||||
label: Issue Content
|
||||
description: Add the content of the issue here.
|
||||
- type: checkboxes
|
||||
id: package
|
||||
attributes:
|
||||
label: Package (Required)
|
||||
description: |
|
||||
Please select package(s) that this issue is related to.
|
||||
options:
|
||||
- label: langchain
|
||||
- label: langchain-openai
|
||||
- label: langchain-anthropic
|
||||
- label: langchain-classic
|
||||
- label: langchain-core
|
||||
- label: langchain-model-profiles
|
||||
- label: langchain-tests
|
||||
- label: langchain-text-splitters
|
||||
- label: langchain-chroma
|
||||
- label: langchain-deepseek
|
||||
- label: langchain-exa
|
||||
- label: langchain-fireworks
|
||||
- label: langchain-groq
|
||||
- label: langchain-huggingface
|
||||
- label: langchain-mistralai
|
||||
- label: langchain-nomic
|
||||
- label: langchain-ollama
|
||||
- label: langchain-openrouter
|
||||
- label: langchain-perplexity
|
||||
- label: langchain-qdrant
|
||||
- label: langchain-xai
|
||||
- label: Other / not sure / general
|
||||
120
.github/ISSUE_TEMPLATE/task.yml
vendored
120
.github/ISSUE_TEMPLATE/task.yml
vendored
@@ -1,120 +0,0 @@
|
||||
name: "📋 Task"
|
||||
description: Create a task for project management and tracking by LangChain maintainers. If you are not a maintainer, please use other templates or the forum.
|
||||
labels: ["task"]
|
||||
type: task
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for creating a task to help organize LangChain development.
|
||||
|
||||
This template is for **maintainer tasks** such as project management, development planning, refactoring, documentation updates, and other organizational work.
|
||||
|
||||
If you are not a LangChain maintainer or were not asked directly by a maintainer to create a task, then please start the conversation on the [LangChain Forum](https://forum.langchain.com/) instead or use the appropriate bug report or feature request templates on the previous page.
|
||||
- type: checkboxes
|
||||
id: maintainer
|
||||
attributes:
|
||||
label: Maintainer task
|
||||
description: Confirm that you are allowed to create a task here.
|
||||
options:
|
||||
- label: I am a LangChain maintainer, or was asked directly by a LangChain maintainer to create a task here.
|
||||
required: true
|
||||
- type: textarea
|
||||
id: task-description
|
||||
attributes:
|
||||
label: Task Description
|
||||
description: |
|
||||
Provide a clear and detailed description of the task.
|
||||
|
||||
What needs to be done? Be specific about the scope and requirements.
|
||||
placeholder: |
|
||||
This task involves...
|
||||
|
||||
The goal is to...
|
||||
|
||||
Specific requirements:
|
||||
- ...
|
||||
- ...
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: acceptance-criteria
|
||||
attributes:
|
||||
label: Acceptance Criteria
|
||||
description: |
|
||||
Define the criteria that must be met for this task to be considered complete.
|
||||
|
||||
What are the specific deliverables or outcomes expected?
|
||||
placeholder: |
|
||||
This task will be complete when:
|
||||
- [ ] ...
|
||||
- [ ] ...
|
||||
- [ ] ...
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: context
|
||||
attributes:
|
||||
label: Context and Background
|
||||
description: |
|
||||
Provide any relevant context, background information, or links to related issues/PRs.
|
||||
|
||||
Why is this task needed? What problem does it solve?
|
||||
placeholder: |
|
||||
Background:
|
||||
- ...
|
||||
|
||||
Related issues/PRs:
|
||||
- #...
|
||||
|
||||
Additional context:
|
||||
- ...
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: dependencies
|
||||
attributes:
|
||||
label: Dependencies
|
||||
description: |
|
||||
List any dependencies or blockers for this task.
|
||||
|
||||
Are there other tasks, issues, or external factors that need to be completed first?
|
||||
placeholder: |
|
||||
This task depends on:
|
||||
- [ ] Issue #...
|
||||
- [ ] PR #...
|
||||
- [ ] External dependency: ...
|
||||
|
||||
Blocked by:
|
||||
- ...
|
||||
validations:
|
||||
required: false
|
||||
- type: checkboxes
|
||||
id: package
|
||||
attributes:
|
||||
label: Package (Required)
|
||||
description: |
|
||||
Please select package(s) that this task is related to.
|
||||
options:
|
||||
- label: langchain
|
||||
- label: langchain-openai
|
||||
- label: langchain-anthropic
|
||||
- label: langchain-classic
|
||||
- label: langchain-core
|
||||
- label: langchain-model-profiles
|
||||
- label: langchain-tests
|
||||
- label: langchain-text-splitters
|
||||
- label: langchain-chroma
|
||||
- label: langchain-deepseek
|
||||
- label: langchain-exa
|
||||
- label: langchain-fireworks
|
||||
- label: langchain-groq
|
||||
- label: langchain-huggingface
|
||||
- label: langchain-mistralai
|
||||
- label: langchain-nomic
|
||||
- label: langchain-ollama
|
||||
- label: langchain-openrouter
|
||||
- label: langchain-perplexity
|
||||
- label: langchain-qdrant
|
||||
- label: langchain-xai
|
||||
- label: Other / not sure / general
|
||||
53
.github/PULL_REQUEST_TEMPLATE.md
vendored
53
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -1,43 +1,20 @@
|
||||
Fixes #
|
||||
<!-- Thank you for contributing to LangChain!
|
||||
|
||||
<!-- Replace everything above this line with a 1-2 sentence description of your change. Keep the "Fixes #xx" keyword and update the issue number. -->
|
||||
Replace this entire comment with:
|
||||
- **Description:** a description of the change,
|
||||
- **Issue:** the issue # it fixes (if applicable),
|
||||
- **Dependencies:** any dependencies required for this change,
|
||||
- **Tag maintainer:** for a quicker response, tag the relevant maintainer (see below),
|
||||
- **Twitter handle:** we announce bigger features on Twitter. If your PR gets announced, and you'd like a mention, we'll gladly shout you out!
|
||||
|
||||
Read the full contributing guidelines: https://docs.langchain.com/oss/python/contributing/overview
|
||||
Please make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` to check this locally.
|
||||
|
||||
> **All contributions must be in English.** See the [language policy](https://docs.langchain.com/oss/python/contributing/overview#language-policy).
|
||||
See contribution guidelines for more information on how to write/run tests, lint, etc:
|
||||
https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md
|
||||
|
||||
If you paste a large clearly AI generated description here your PR may be IGNORED or CLOSED!
|
||||
If you're adding a new integration, please include:
|
||||
1. a test for the integration, preferably unit tests that do not rely on network access,
|
||||
2. an example notebook showing its use. It lives in `docs/extras` directory.
|
||||
|
||||
Thank you for contributing to LangChain! Follow these steps to have your pull request considered as ready for review.
|
||||
|
||||
1. PR title: Should follow the format: TYPE(SCOPE): DESCRIPTION
|
||||
|
||||
- Examples:
|
||||
- fix(anthropic): resolve flag parsing error
|
||||
- feat(core): add multi-tenant support
|
||||
- test(openai): update API usage tests
|
||||
- Allowed TYPE and SCOPE values: https://github.com/langchain-ai/langchain/blob/master/.github/workflows/pr_lint.yml#L15-L33
|
||||
|
||||
2. PR description:
|
||||
|
||||
- Write 1-2 sentences summarizing the change.
|
||||
- The `Fixes #xx` line at the top is **required** for external contributions — update the issue number and keep the keyword. This links your PR to the approved issue and auto-closes it on merge.
|
||||
- If there are any breaking changes, please clearly describe them.
|
||||
- If this PR depends on another PR being merged first, please include "Depends on #PR_NUMBER" in the description.
|
||||
|
||||
3. Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified.
|
||||
|
||||
- We will not consider a PR unless these three are passing in CI.
|
||||
|
||||
4. How did you verify your code works?
|
||||
|
||||
Additional guidelines:
|
||||
|
||||
- All external PRs must link to an issue or discussion where a solution has been approved by a maintainer, and you must be assigned to that issue. PRs without prior approval will be closed.
|
||||
- PRs should not touch more than one package unless absolutely necessary.
|
||||
- Do not update the `uv.lock` files or add dependencies to `pyproject.toml` files (even optional ones) unless you have explicit permission to do so by a maintainer.
|
||||
|
||||
## Social handles (optional)
|
||||
<!-- If you'd like a shoutout on release, add your socials below -->
|
||||
Twitter: @
|
||||
LinkedIn: https://linkedin.com/in/
|
||||
If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17.
|
||||
-->
|
||||
|
||||
91
.github/actions/poetry_setup/action.yml
vendored
Normal file
91
.github/actions/poetry_setup/action.yml
vendored
Normal file
@@ -0,0 +1,91 @@
|
||||
# An action for setting up poetry install with caching.
|
||||
# Using a custom action since the default action does not
|
||||
# take poetry install groups into account.
|
||||
# Action code from:
|
||||
# https://github.com/actions/setup-python/issues/505#issuecomment-1273013236
|
||||
name: poetry-install-with-caching
|
||||
description: Poetry install with support for caching of dependency groups.
|
||||
|
||||
inputs:
|
||||
python-version:
|
||||
description: Python version, supporting MAJOR.MINOR only
|
||||
required: true
|
||||
|
||||
poetry-version:
|
||||
description: Poetry version
|
||||
required: true
|
||||
|
||||
cache-key:
|
||||
description: Cache key to use for manual handling of caching
|
||||
required: true
|
||||
|
||||
working-directory:
|
||||
description: Directory whose poetry.lock file should be cached
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- uses: actions/setup-python@v4
|
||||
name: Setup python ${{ inputs.python-version }}
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
|
||||
- uses: actions/cache@v3
|
||||
id: cache-bin-poetry
|
||||
name: Cache Poetry binary - Python ${{ inputs.python-version }}
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "1"
|
||||
with:
|
||||
path: |
|
||||
/opt/pipx/venvs/poetry
|
||||
# This step caches the poetry installation, so make sure it's keyed on the poetry version as well.
|
||||
key: bin-poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-${{ inputs.poetry-version }}
|
||||
|
||||
- name: Refresh shell hashtable and fixup softlinks
|
||||
if: steps.cache-bin-poetry.outputs.cache-hit == 'true'
|
||||
shell: bash
|
||||
env:
|
||||
POETRY_VERSION: ${{ inputs.poetry-version }}
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
run: |
|
||||
set -eux
|
||||
|
||||
# Refresh the shell hashtable, to ensure correct `which` output.
|
||||
hash -r
|
||||
|
||||
# `actions/cache@v3` doesn't always seem able to correctly unpack softlinks.
|
||||
# Delete and recreate the softlinks pipx expects to have.
|
||||
rm /opt/pipx/venvs/poetry/bin/python
|
||||
cd /opt/pipx/venvs/poetry/bin
|
||||
ln -s "$(which "python$PYTHON_VERSION")" python
|
||||
chmod +x python
|
||||
cd /opt/pipx_bin/
|
||||
ln -s /opt/pipx/venvs/poetry/bin/poetry poetry
|
||||
chmod +x poetry
|
||||
|
||||
# Ensure everything got set up correctly.
|
||||
/opt/pipx/venvs/poetry/bin/python --version
|
||||
/opt/pipx_bin/poetry --version
|
||||
|
||||
- name: Install poetry
|
||||
if: steps.cache-bin-poetry.outputs.cache-hit != 'true'
|
||||
shell: bash
|
||||
env:
|
||||
POETRY_VERSION: ${{ inputs.poetry-version }}
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
run: pipx install "poetry==$POETRY_VERSION" --python "python$PYTHON_VERSION" --verbose
|
||||
|
||||
- name: Restore pip and poetry cached dependencies
|
||||
uses: actions/cache@v3
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "4"
|
||||
WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }}
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pip
|
||||
~/.cache/pypoetry/virtualenvs
|
||||
~/.cache/pypoetry/cache
|
||||
~/.cache/pypoetry/artifacts
|
||||
${{ env.WORKDIR }}/.venv
|
||||
key: py-deps-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-poetry-${{ inputs.poetry-version }}-${{ inputs.cache-key }}-${{ hashFiles(format('{0}/**/poetry.lock', env.WORKDIR)) }}
|
||||
39
.github/actions/uv_setup/action.yml
vendored
39
.github/actions/uv_setup/action.yml
vendored
@@ -1,39 +0,0 @@
|
||||
# Helper to set up Python and uv with caching
|
||||
|
||||
name: uv-install
|
||||
description: Set up Python and uv with caching
|
||||
|
||||
inputs:
|
||||
python-version:
|
||||
description: Python version, supporting MAJOR.MINOR only
|
||||
required: true
|
||||
enable-cache:
|
||||
description: Enable caching for uv dependencies
|
||||
required: false
|
||||
default: "true"
|
||||
cache-suffix:
|
||||
description: Custom cache key suffix for cache invalidation
|
||||
required: false
|
||||
default: ""
|
||||
working-directory:
|
||||
description: Working directory for cache glob scoping
|
||||
required: false
|
||||
default: "**"
|
||||
|
||||
env:
|
||||
UV_VERSION: "0.5.25"
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Install uv and set the python version
|
||||
uses: astral-sh/setup-uv@0ca8f610542aa7f4acaf39e65cf4eb3c35091883 # v7
|
||||
with:
|
||||
version: ${{ env.UV_VERSION }}
|
||||
python-version: ${{ inputs.python-version }}
|
||||
enable-cache: ${{ inputs.enable-cache }}
|
||||
cache-dependency-glob: |
|
||||
${{ inputs.working-directory }}/pyproject.toml
|
||||
${{ inputs.working-directory }}/uv.lock
|
||||
${{ inputs.working-directory }}/requirements*.txt
|
||||
cache-suffix: ${{ inputs.cache-suffix }}
|
||||
95
.github/dependabot.yml
vendored
95
.github/dependabot.yml
vendored
@@ -1,95 +0,0 @@
|
||||
# Please see the documentation for all configuration options:
|
||||
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
|
||||
# and
|
||||
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "github-actions"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "monthly"
|
||||
groups:
|
||||
minor-and-patch:
|
||||
patterns:
|
||||
- "*"
|
||||
update-types:
|
||||
- "minor"
|
||||
- "patch"
|
||||
major:
|
||||
patterns:
|
||||
- "*"
|
||||
update-types:
|
||||
- "major"
|
||||
|
||||
- package-ecosystem: "uv"
|
||||
directories:
|
||||
- "/libs/core/"
|
||||
- "/libs/langchain/"
|
||||
- "/libs/langchain_v1/"
|
||||
schedule:
|
||||
interval: "monthly"
|
||||
groups:
|
||||
minor-and-patch:
|
||||
patterns:
|
||||
- "*"
|
||||
update-types:
|
||||
- "minor"
|
||||
- "patch"
|
||||
major:
|
||||
patterns:
|
||||
- "*"
|
||||
update-types:
|
||||
- "major"
|
||||
|
||||
- package-ecosystem: "uv"
|
||||
directories:
|
||||
- "/libs/partners/anthropic/"
|
||||
- "/libs/partners/chroma/"
|
||||
- "/libs/partners/deepseek/"
|
||||
- "/libs/partners/exa/"
|
||||
- "/libs/partners/fireworks/"
|
||||
- "/libs/partners/groq/"
|
||||
- "/libs/partners/huggingface/"
|
||||
- "/libs/partners/mistralai/"
|
||||
- "/libs/partners/nomic/"
|
||||
- "/libs/partners/ollama/"
|
||||
- "/libs/partners/openai/"
|
||||
- "/libs/partners/openrouter/"
|
||||
- "/libs/partners/perplexity/"
|
||||
- "/libs/partners/qdrant/"
|
||||
- "/libs/partners/xai/"
|
||||
schedule:
|
||||
interval: "monthly"
|
||||
groups:
|
||||
minor-and-patch:
|
||||
patterns:
|
||||
- "*"
|
||||
update-types:
|
||||
- "minor"
|
||||
- "patch"
|
||||
major:
|
||||
patterns:
|
||||
- "*"
|
||||
update-types:
|
||||
- "major"
|
||||
|
||||
- package-ecosystem: "uv"
|
||||
directories:
|
||||
- "/libs/text-splitters/"
|
||||
- "/libs/standard-tests/"
|
||||
- "/libs/model-profiles/"
|
||||
schedule:
|
||||
interval: "monthly"
|
||||
groups:
|
||||
minor-and-patch:
|
||||
patterns:
|
||||
- "*"
|
||||
update-types:
|
||||
- "minor"
|
||||
- "patch"
|
||||
major:
|
||||
patterns:
|
||||
- "*"
|
||||
update-types:
|
||||
- "major"
|
||||
6
.github/images/logo-dark.svg
vendored
6
.github/images/logo-dark.svg
vendored
@@ -1,6 +0,0 @@
|
||||
<svg width="472" height="100" viewBox="0 0 472 100" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<rect width="100" height="100" rx="20" fill="#161F34"/>
|
||||
<path d="M54.2612 54.2583L63.1942 45.3253C67.8979 40.6215 67.8979 32.9952 63.1942 28.2914C58.4904 23.5877 50.8641 23.5877 46.1603 28.2914L37.2273 37.2244" stroke="#7FC8FF" stroke-width="12.0389"/>
|
||||
<path d="M45.7427 45.7412L36.8098 54.6742C32.106 59.3779 32.106 67.0042 36.8098 71.708C41.5135 76.4118 49.1398 76.4118 53.8436 71.708L62.7766 62.775" stroke="#7FC8FF" stroke-width="12.0389"/>
|
||||
<path d="M142.427 70.248V65.748H153.227V32.748H142.427V28.248H158.147V65.748H168.947V70.248H142.427ZM189.174 70.608C182.454 70.608 177.894 67.248 177.894 61.668C177.894 55.548 182.154 52.128 190.194 52.128H199.194V50.028C199.194 46.068 196.374 43.668 191.574 43.668C187.254 43.668 184.374 45.708 183.774 48.828H178.854C179.574 42.828 184.434 39.288 191.814 39.288C199.614 39.288 204.114 43.188 204.114 50.328V63.708C204.114 65.328 204.714 65.748 206.094 65.748H207.654V70.248H204.954C200.874 70.248 199.494 68.508 199.434 65.508C197.514 68.268 194.454 70.608 189.174 70.608ZM189.534 66.408C195.654 66.408 199.194 62.868 199.194 57.768V56.268H189.714C185.334 56.268 182.874 57.888 182.874 61.368C182.874 64.368 185.454 66.408 189.534 66.408ZM216.601 70.248V39.648H220.861L221.521 43.788C223.321 41.448 226.321 39.288 231.121 39.288C237.601 39.288 243.001 42.948 243.001 52.848V70.248H238.081V53.148C238.081 47.028 235.201 43.788 230.281 43.788C224.941 43.788 221.521 47.928 221.521 53.988V70.248H216.601ZM266.348 82.608C258.548 82.608 253.088 78.948 252.308 72.228H257.348C258.188 76.068 261.608 78.228 266.708 78.228C273.128 78.228 276.608 75.228 276.608 68.568V64.968C274.568 68.448 271.268 70.608 266.108 70.608C257.648 70.608 251.408 64.908 251.408 54.948C251.408 45.588 257.648 39.288 266.108 39.288C271.268 39.288 274.688 41.508 276.608 44.928L277.268 39.648H281.528V68.748C281.528 77.568 276.848 82.608 266.348 82.608ZM266.588 66.228C272.588 66.228 276.668 61.608 276.668 55.068C276.668 48.348 272.588 43.668 266.588 43.668C260.528 43.668 256.448 48.288 256.448 54.948C256.448 61.608 260.528 66.228 266.588 66.228ZM304.875 70.608C295.935 70.608 290.055 64.548 290.055 55.008C290.055 45.648 296.115 39.288 304.995 39.288C312.495 39.288 317.235 43.488 318.495 50.208H313.335C312.435 46.128 309.435 43.668 304.935 43.668C299.055 43.668 295.095 48.348 295.095 55.008C295.095 61.668 299.055 66.228 304.935 66.228C309.315 66.228 312.315 63.708 313.275 59.808H318.495C317.295 66.408 312.315 70.608 304.875 70.608ZM328.042 70.248V28.248H332.962V43.788C335.242 40.968 338.782 39.288 342.742 39.288C350.422 39.288 354.802 44.388 354.802 53.208V70.248H349.882V53.508C349.882 47.268 347.002 43.788 341.902 43.788C336.442 43.788 332.962 48.108 332.962 54.948V70.248H328.042ZM375.209 70.608C368.489 70.608 363.929 67.248 363.929 61.668C363.929 55.548 368.189 52.128 376.229 52.128H385.229V50.028C385.229 46.068 382.409 43.668 377.609 43.668C373.289 43.668 370.409 45.708 369.809 48.828H364.889C365.609 42.828 370.469 39.288 377.849 39.288C385.649 39.288 390.149 43.188 390.149 50.328V63.708C390.149 65.328 390.749 65.748 392.129 65.748H393.689V70.248H390.989C386.909 70.248 385.529 68.508 385.469 65.508C383.549 68.268 380.489 70.608 375.209 70.608ZM375.569 66.408C381.689 66.408 385.229 62.868 385.229 57.768V56.268H375.749C371.369 56.268 368.909 57.888 368.909 61.368C368.909 64.368 371.489 66.408 375.569 66.408ZM403.476 70.248V65.748H414.276V44.148H403.476V39.648H419.196V65.748H429.996V70.248H403.476ZM416.796 34.248C414.576 34.248 412.836 32.568 412.836 30.288C412.836 28.068 414.576 26.388 416.796 26.388C419.016 26.388 420.756 28.068 420.756 30.288C420.756 32.568 419.016 34.248 416.796 34.248ZM439.843 70.248V39.648H444.103L444.763 43.788C446.563 41.448 449.563 39.288 454.363 39.288C460.843 39.288 466.243 42.948 466.243 52.848V70.248H461.323V53.148C461.323 47.028 458.443 43.788 453.523 43.788C448.183 43.788 444.763 47.928 444.763 53.988V70.248H439.843Z" fill="white"/>
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 3.9 KiB |
6
.github/images/logo-light.svg
vendored
6
.github/images/logo-light.svg
vendored
@@ -1,6 +0,0 @@
|
||||
<svg width="472" height="100" viewBox="0 0 472 100" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<rect width="100" height="100" rx="20" fill="#161F34"/>
|
||||
<path d="M54.2612 54.2583L63.1942 45.3253C67.8979 40.6215 67.8979 32.9952 63.1942 28.2914C58.4904 23.5877 50.8641 23.5877 46.1603 28.2914L37.2273 37.2244" stroke="#7FC8FF" stroke-width="12.0389"/>
|
||||
<path d="M45.7427 45.7411L36.8098 54.6741C32.106 59.3779 32.106 67.0042 36.8098 71.7079C41.5135 76.4117 49.1398 76.4117 53.8436 71.7079L62.7766 62.775" stroke="#7FC8FF" stroke-width="12.0389"/>
|
||||
<path d="M142.427 70.248V65.748H153.227V32.748H142.427V28.248H158.147V65.748H168.947V70.248H142.427ZM189.174 70.608C182.454 70.608 177.894 67.248 177.894 61.668C177.894 55.548 182.154 52.128 190.194 52.128H199.194V50.028C199.194 46.068 196.374 43.668 191.574 43.668C187.254 43.668 184.374 45.708 183.774 48.828H178.854C179.574 42.828 184.434 39.288 191.814 39.288C199.614 39.288 204.114 43.188 204.114 50.328V63.708C204.114 65.328 204.714 65.748 206.094 65.748H207.654V70.248H204.954C200.874 70.248 199.494 68.508 199.434 65.508C197.514 68.268 194.454 70.608 189.174 70.608ZM189.534 66.408C195.654 66.408 199.194 62.868 199.194 57.768V56.268H189.714C185.334 56.268 182.874 57.888 182.874 61.368C182.874 64.368 185.454 66.408 189.534 66.408ZM216.601 70.248V39.648H220.861L221.521 43.788C223.321 41.448 226.321 39.288 231.121 39.288C237.601 39.288 243.001 42.948 243.001 52.848V70.248H238.081V53.148C238.081 47.028 235.201 43.788 230.281 43.788C224.941 43.788 221.521 47.928 221.521 53.988V70.248H216.601ZM266.348 82.608C258.548 82.608 253.088 78.948 252.308 72.228H257.348C258.188 76.068 261.608 78.228 266.708 78.228C273.128 78.228 276.608 75.228 276.608 68.568V64.968C274.568 68.448 271.268 70.608 266.108 70.608C257.648 70.608 251.408 64.908 251.408 54.948C251.408 45.588 257.648 39.288 266.108 39.288C271.268 39.288 274.688 41.508 276.608 44.928L277.268 39.648H281.528V68.748C281.528 77.568 276.848 82.608 266.348 82.608ZM266.588 66.228C272.588 66.228 276.668 61.608 276.668 55.068C276.668 48.348 272.588 43.668 266.588 43.668C260.528 43.668 256.448 48.288 256.448 54.948C256.448 61.608 260.528 66.228 266.588 66.228ZM304.875 70.608C295.935 70.608 290.055 64.548 290.055 55.008C290.055 45.648 296.115 39.288 304.995 39.288C312.495 39.288 317.235 43.488 318.495 50.208H313.335C312.435 46.128 309.435 43.668 304.935 43.668C299.055 43.668 295.095 48.348 295.095 55.008C295.095 61.668 299.055 66.228 304.935 66.228C309.315 66.228 312.315 63.708 313.275 59.808H318.495C317.295 66.408 312.315 70.608 304.875 70.608ZM328.042 70.248V28.248H332.962V43.788C335.242 40.968 338.782 39.288 342.742 39.288C350.422 39.288 354.802 44.388 354.802 53.208V70.248H349.882V53.508C349.882 47.268 347.002 43.788 341.902 43.788C336.442 43.788 332.962 48.108 332.962 54.948V70.248H328.042ZM375.209 70.608C368.489 70.608 363.929 67.248 363.929 61.668C363.929 55.548 368.189 52.128 376.229 52.128H385.229V50.028C385.229 46.068 382.409 43.668 377.609 43.668C373.289 43.668 370.409 45.708 369.809 48.828H364.889C365.609 42.828 370.469 39.288 377.849 39.288C385.649 39.288 390.149 43.188 390.149 50.328V63.708C390.149 65.328 390.749 65.748 392.129 65.748H393.689V70.248H390.989C386.909 70.248 385.529 68.508 385.469 65.508C383.549 68.268 380.489 70.608 375.209 70.608ZM375.569 66.408C381.689 66.408 385.229 62.868 385.229 57.768V56.268H375.749C371.369 56.268 368.909 57.888 368.909 61.368C368.909 64.368 371.489 66.408 375.569 66.408ZM403.476 70.248V65.748H414.276V44.148H403.476V39.648H419.196V65.748H429.996V70.248H403.476ZM416.796 34.248C414.576 34.248 412.836 32.568 412.836 30.288C412.836 28.068 414.576 26.388 416.796 26.388C419.016 26.388 420.756 28.068 420.756 30.288C420.756 32.568 419.016 34.248 416.796 34.248ZM439.843 70.248V39.648H444.103L444.763 43.788C446.563 41.448 449.563 39.288 454.363 39.288C460.843 39.288 466.243 42.948 466.243 52.848V70.248H461.323V53.148C461.323 47.028 458.443 43.788 453.523 43.788C448.183 43.788 444.763 47.928 444.763 53.988V70.248H439.843Z" fill="#161F34"/>
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 3.9 KiB |
362
.github/scripts/check_diff.py
vendored
362
.github/scripts/check_diff.py
vendored
@@ -1,263 +1,17 @@
|
||||
"""Analyze git diffs to determine which directories need to be tested.
|
||||
|
||||
Intelligently determines which LangChain packages and directories need to be tested,
|
||||
linted, or built based on the changes. Handles dependency relationships between
|
||||
packages, maps file changes to appropriate CI job configurations, and outputs JSON
|
||||
configurations for GitHub Actions.
|
||||
|
||||
- Maps changed files to affected package directories (libs/core, libs/partners/*, etc.)
|
||||
- Builds dependency graph to include dependent packages when core components change
|
||||
- Generates test matrix configurations with appropriate Python versions
|
||||
- Handles special cases for Pydantic version testing and performance benchmarks
|
||||
|
||||
Used as part of the check_diffs workflow.
|
||||
"""
|
||||
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Set
|
||||
|
||||
import tomllib
|
||||
from get_min_versions import get_min_version_from_toml
|
||||
from packaging.requirements import Requirement
|
||||
|
||||
LANGCHAIN_DIRS = [
|
||||
ALL_DIRS = {
|
||||
"libs/core",
|
||||
"libs/text-splitters",
|
||||
"libs/langchain",
|
||||
"libs/langchain_v1",
|
||||
"libs/model-profiles",
|
||||
]
|
||||
|
||||
# Packages with VCR cassette-backed integration tests.
|
||||
# These get a playback-only CI check to catch stale cassettes.
|
||||
VCR_PACKAGES = {
|
||||
"libs/experimental",
|
||||
"libs/community",
|
||||
"libs/partners/openai",
|
||||
}
|
||||
|
||||
# When set to True, we are ignoring core dependents
|
||||
# in order to be able to get CI to pass for each individual
|
||||
# package that depends on core
|
||||
# e.g. if you touch core, we don't then add textsplitters/etc to CI
|
||||
IGNORE_CORE_DEPENDENTS = False
|
||||
|
||||
# Ignored partners are removed from dependents but still run if directly edited
|
||||
IGNORED_PARTNERS = [
|
||||
# remove huggingface from dependents because of CI instability
|
||||
# specifically in huggingface jobs
|
||||
"huggingface",
|
||||
]
|
||||
|
||||
|
||||
def all_package_dirs() -> Set[str]:
|
||||
return {
|
||||
"/".join(path.split("/")[:-1]).lstrip("./")
|
||||
for path in glob.glob("./libs/**/pyproject.toml", recursive=True)
|
||||
if "libs/standard-tests" not in path
|
||||
}
|
||||
|
||||
|
||||
def dependents_graph() -> dict:
|
||||
"""Construct a mapping of package -> dependents
|
||||
|
||||
Done such that we can run tests on all dependents of a package when a change is made.
|
||||
"""
|
||||
dependents = defaultdict(set)
|
||||
|
||||
for path in glob.glob("./libs/**/pyproject.toml", recursive=True):
|
||||
if "template" in path:
|
||||
continue
|
||||
|
||||
# load regular and test deps from pyproject.toml
|
||||
with open(path, "rb") as f:
|
||||
pyproject = tomllib.load(f)
|
||||
|
||||
pkg_dir = "libs" + "/".join(path.split("libs")[1].split("/")[:-1])
|
||||
for dep in [
|
||||
*pyproject["project"]["dependencies"],
|
||||
*pyproject["dependency-groups"]["test"],
|
||||
]:
|
||||
requirement = Requirement(dep)
|
||||
package_name = requirement.name
|
||||
if "langchain" in dep:
|
||||
dependents[package_name].add(pkg_dir)
|
||||
continue
|
||||
|
||||
# load extended deps from extended_testing_deps.txt
|
||||
package_path = Path(path).parent
|
||||
extended_requirement_path = package_path / "extended_testing_deps.txt"
|
||||
if extended_requirement_path.exists():
|
||||
with open(extended_requirement_path, "r") as f:
|
||||
extended_deps = f.read().splitlines()
|
||||
for depline in extended_deps:
|
||||
if depline.startswith("-e "):
|
||||
# editable dependency
|
||||
assert depline.startswith("-e ../partners/"), (
|
||||
"Extended test deps should only editable install partner packages"
|
||||
)
|
||||
partner = depline.split("partners/")[1]
|
||||
dep = f"langchain-{partner}"
|
||||
else:
|
||||
dep = depline.split("==")[0]
|
||||
|
||||
if "langchain" in dep:
|
||||
dependents[dep].add(pkg_dir)
|
||||
|
||||
for k in dependents:
|
||||
for partner in IGNORED_PARTNERS:
|
||||
if f"libs/partners/{partner}" in dependents[k]:
|
||||
dependents[k].remove(f"libs/partners/{partner}")
|
||||
return dependents
|
||||
|
||||
|
||||
def add_dependents(dirs_to_eval: Set[str], dependents: dict) -> List[str]:
|
||||
updated = set()
|
||||
for dir_ in dirs_to_eval:
|
||||
# handle core manually because it has so many dependents
|
||||
if "core" in dir_:
|
||||
updated.add(dir_)
|
||||
continue
|
||||
pkg = "langchain-" + dir_.split("/")[-1]
|
||||
updated.update(dependents[pkg])
|
||||
updated.add(dir_)
|
||||
return list(updated)
|
||||
|
||||
|
||||
def _get_configs_for_single_dir(job: str, dir_: str) -> List[Dict[str, str]]:
|
||||
if job == "test-pydantic":
|
||||
return _get_pydantic_test_configs(dir_)
|
||||
|
||||
if job == "codspeed":
|
||||
# CPU simulation (<1% variance, Valgrind-based) is the default.
|
||||
# Partners with heavy SDK inits use walltime instead to keep CI fast.
|
||||
CODSPEED_WALLTIME_DIRS = {
|
||||
"libs/core",
|
||||
"libs/partners/fireworks", # ~328s under simulation
|
||||
"libs/partners/openai", # 6 benchmarks, ~6 min under simulation
|
||||
}
|
||||
mode = "walltime" if dir_ in CODSPEED_WALLTIME_DIRS else "simulation"
|
||||
return [
|
||||
{
|
||||
"working-directory": dir_,
|
||||
"python-version": "3.13",
|
||||
"codspeed-mode": mode,
|
||||
}
|
||||
]
|
||||
if dir_ == "libs/core":
|
||||
py_versions = ["3.10", "3.11", "3.12", "3.13", "3.14"]
|
||||
else:
|
||||
py_versions = ["3.10", "3.14"]
|
||||
|
||||
return [{"working-directory": dir_, "python-version": py_v} for py_v in py_versions]
|
||||
|
||||
|
||||
def _get_pydantic_test_configs(
|
||||
dir_: str, *, python_version: str = "3.12"
|
||||
) -> List[Dict[str, str]]:
|
||||
with open("./libs/core/uv.lock", "rb") as f:
|
||||
core_uv_lock_data = tomllib.load(f)
|
||||
for package in core_uv_lock_data["package"]:
|
||||
if package["name"] == "pydantic":
|
||||
core_max_pydantic_minor = package["version"].split(".")[1]
|
||||
break
|
||||
|
||||
with open(f"./{dir_}/uv.lock", "rb") as f:
|
||||
dir_uv_lock_data = tomllib.load(f)
|
||||
|
||||
for package in dir_uv_lock_data["package"]:
|
||||
if package["name"] == "pydantic":
|
||||
dir_max_pydantic_minor = package["version"].split(".")[1]
|
||||
break
|
||||
|
||||
core_min_pydantic_version = get_min_version_from_toml(
|
||||
"./libs/core/pyproject.toml", "release", python_version, include=["pydantic"]
|
||||
)["pydantic"]
|
||||
core_min_pydantic_minor = (
|
||||
core_min_pydantic_version.split(".")[1]
|
||||
if "." in core_min_pydantic_version
|
||||
else "0"
|
||||
)
|
||||
dir_min_pydantic_version = get_min_version_from_toml(
|
||||
f"./{dir_}/pyproject.toml", "release", python_version, include=["pydantic"]
|
||||
).get("pydantic", "0.0.0")
|
||||
dir_min_pydantic_minor = (
|
||||
dir_min_pydantic_version.split(".")[1]
|
||||
if "." in dir_min_pydantic_version
|
||||
else "0"
|
||||
)
|
||||
|
||||
max_pydantic_minor = min(
|
||||
int(dir_max_pydantic_minor),
|
||||
int(core_max_pydantic_minor),
|
||||
)
|
||||
min_pydantic_minor = max(
|
||||
int(dir_min_pydantic_minor),
|
||||
int(core_min_pydantic_minor),
|
||||
)
|
||||
|
||||
configs = [
|
||||
{
|
||||
"working-directory": dir_,
|
||||
"pydantic-version": f"2.{v}.0",
|
||||
"python-version": python_version,
|
||||
}
|
||||
for v in range(min_pydantic_minor, max_pydantic_minor + 1)
|
||||
]
|
||||
return configs
|
||||
|
||||
|
||||
def _get_configs_for_multi_dirs(
|
||||
job: str, dirs_to_run: Dict[str, Set[str]], dependents: dict
|
||||
) -> List[Dict[str, str]]:
|
||||
if job == "lint":
|
||||
dirs = add_dependents(
|
||||
dirs_to_run["lint"] | dirs_to_run["test"] | dirs_to_run["extended-test"],
|
||||
dependents,
|
||||
)
|
||||
elif job in ["test", "compile-integration-tests", "dependencies", "test-pydantic"]:
|
||||
dirs = add_dependents(
|
||||
dirs_to_run["test"] | dirs_to_run["extended-test"], dependents
|
||||
)
|
||||
elif job == "extended-tests":
|
||||
dirs = list(dirs_to_run["extended-test"])
|
||||
elif job == "codspeed":
|
||||
dirs = list(dirs_to_run["codspeed"])
|
||||
elif job == "vcr-tests":
|
||||
# Only run VCR tests for packages that have cassettes and are affected
|
||||
all_affected = set(
|
||||
add_dependents(
|
||||
dirs_to_run["test"] | dirs_to_run["extended-test"], dependents
|
||||
)
|
||||
)
|
||||
dirs = [d for d in VCR_PACKAGES if d in all_affected]
|
||||
else:
|
||||
raise ValueError(f"Unknown job: {job}")
|
||||
|
||||
return [
|
||||
config for dir_ in dirs for config in _get_configs_for_single_dir(job, dir_)
|
||||
]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
files = sys.argv[1:]
|
||||
|
||||
dirs_to_run: Dict[str, set] = {
|
||||
"lint": set(),
|
||||
"test": set(),
|
||||
"extended-test": set(),
|
||||
"codspeed": set(),
|
||||
}
|
||||
docs_edited = False
|
||||
|
||||
if len(files) >= 300:
|
||||
# max diff length is 300 files - there are likely files missing
|
||||
dirs_to_run["lint"] = all_package_dirs()
|
||||
dirs_to_run["test"] = all_package_dirs()
|
||||
dirs_to_run["extended-test"] = set(LANGCHAIN_DIRS)
|
||||
dirs_to_run = set()
|
||||
|
||||
for file in files:
|
||||
if any(
|
||||
@@ -266,94 +20,28 @@ if __name__ == "__main__":
|
||||
".github/workflows",
|
||||
".github/tools",
|
||||
".github/actions",
|
||||
"libs/core",
|
||||
".github/scripts/check_diff.py",
|
||||
)
|
||||
):
|
||||
# Infrastructure changes (workflows, actions, CI scripts) trigger tests on
|
||||
# all core packages as a safety measure. This ensures that changes to CI/CD
|
||||
# infrastructure don't inadvertently break package testing, even if the change
|
||||
# appears unrelated (e.g., documentation build workflows). This is intentionally
|
||||
# conservative to catch unexpected side effects from workflow modifications.
|
||||
#
|
||||
# Example: A PR modifying .github/workflows/api_doc_build.yml will trigger
|
||||
# lint/test jobs for libs/core, libs/text-splitters, libs/langchain, and
|
||||
# libs/langchain_v1, even though the workflow may only affect documentation.
|
||||
dirs_to_run["extended-test"].update(LANGCHAIN_DIRS)
|
||||
|
||||
if file.startswith("libs/core"):
|
||||
dirs_to_run["codspeed"].add("libs/core")
|
||||
if file.startswith("libs/langchain_v1"):
|
||||
dirs_to_run["codspeed"].add("libs/langchain_v1")
|
||||
if any(file.startswith(dir_) for dir_ in LANGCHAIN_DIRS):
|
||||
# add that dir and all dirs after in LANGCHAIN_DIRS
|
||||
# for extended testing
|
||||
|
||||
found = False
|
||||
for dir_ in LANGCHAIN_DIRS:
|
||||
if dir_ == "libs/core" and IGNORE_CORE_DEPENDENTS:
|
||||
dirs_to_run["extended-test"].add(dir_)
|
||||
continue
|
||||
if file.startswith(dir_):
|
||||
found = True
|
||||
if found:
|
||||
dirs_to_run["extended-test"].add(dir_)
|
||||
elif file.startswith("libs/standard-tests"):
|
||||
# TODO: update to include all packages that rely on standard-tests (all partner packages)
|
||||
# Note: won't run on external repo partners
|
||||
dirs_to_run["lint"].add("libs/standard-tests")
|
||||
dirs_to_run["test"].add("libs/standard-tests")
|
||||
dirs_to_run["test"].add("libs/partners/mistralai")
|
||||
dirs_to_run["test"].add("libs/partners/openai")
|
||||
dirs_to_run["test"].add("libs/partners/anthropic")
|
||||
dirs_to_run["test"].add("libs/partners/fireworks")
|
||||
dirs_to_run["test"].add("libs/partners/groq")
|
||||
|
||||
elif file.startswith("libs/partners"):
|
||||
partner_dir = file.split("/")[2]
|
||||
if os.path.isdir(f"libs/partners/{partner_dir}") and [
|
||||
filename
|
||||
for filename in os.listdir(f"libs/partners/{partner_dir}")
|
||||
if not filename.startswith(".")
|
||||
] != ["README.md"]:
|
||||
dirs_to_run["test"].add(f"libs/partners/{partner_dir}")
|
||||
# Skip codspeed for partners without benchmarks or in IGNORED_PARTNERS
|
||||
if partner_dir not in IGNORED_PARTNERS:
|
||||
dirs_to_run["codspeed"].add(f"libs/partners/{partner_dir}")
|
||||
# Skip if the directory was deleted or is just a tombstone readme
|
||||
elif file.startswith("libs/"):
|
||||
# Check if this is a root-level file in libs/ (e.g., libs/README.md)
|
||||
file_parts = file.split("/")
|
||||
if len(file_parts) == 2:
|
||||
# Root-level file in libs/, skip it (no tests needed)
|
||||
continue
|
||||
raise ValueError(
|
||||
f"Unknown lib: {file}. check_diff.py likely needs "
|
||||
"an update for this new library!"
|
||||
dirs_to_run = ALL_DIRS
|
||||
break
|
||||
elif "libs/community" in file:
|
||||
dirs_to_run.update(
|
||||
("libs/community", "libs/langchain", "libs/experimental")
|
||||
)
|
||||
elif file in [
|
||||
"pyproject.toml",
|
||||
"uv.lock",
|
||||
]: # root uv files
|
||||
docs_edited = True
|
||||
|
||||
dependents = dependents_graph()
|
||||
|
||||
# we now have dirs_by_job
|
||||
# todo: clean this up
|
||||
map_job_to_configs = {
|
||||
job: _get_configs_for_multi_dirs(job, dirs_to_run, dependents)
|
||||
for job in [
|
||||
"lint",
|
||||
"test",
|
||||
"extended-tests",
|
||||
"compile-integration-tests",
|
||||
"dependencies",
|
||||
"test-pydantic",
|
||||
"codspeed",
|
||||
"vcr-tests",
|
||||
]
|
||||
}
|
||||
|
||||
for key, value in map_job_to_configs.items():
|
||||
json_output = json.dumps(value)
|
||||
print(f"{key}={json_output}")
|
||||
elif "libs/partners" in file:
|
||||
partner_dir = file.split("/")[2]
|
||||
dirs_to_run.update(
|
||||
(f"libs/partners/{partner_dir}", "libs/langchain", "libs/experimental")
|
||||
)
|
||||
elif "libs/langchain" in file:
|
||||
dirs_to_run.update(("libs/langchain", "libs/experimental"))
|
||||
elif "libs/experimental" in file:
|
||||
dirs_to_run.add("libs/experimental")
|
||||
elif file.startswith("libs/"):
|
||||
dirs_to_run = ALL_DIRS
|
||||
break
|
||||
else:
|
||||
pass
|
||||
print(json.dumps(list(dirs_to_run)))
|
||||
|
||||
36
.github/scripts/check_prerelease_dependencies.py
vendored
36
.github/scripts/check_prerelease_dependencies.py
vendored
@@ -1,36 +0,0 @@
|
||||
"""Check that no dependencies allow prereleases unless we're releasing a prerelease."""
|
||||
|
||||
import sys
|
||||
|
||||
import tomllib
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Get the TOML file path from the command line argument
|
||||
toml_file = sys.argv[1]
|
||||
|
||||
with open(toml_file, "rb") as file:
|
||||
toml_data = tomllib.load(file)
|
||||
|
||||
# See if we're releasing an rc or dev version
|
||||
version = toml_data["project"]["version"]
|
||||
releasing_rc = "rc" in version or "dev" in version
|
||||
|
||||
# If not, iterate through dependencies and make sure none allow prereleases
|
||||
if not releasing_rc:
|
||||
dependencies = toml_data["project"]["dependencies"]
|
||||
for dep_version in dependencies:
|
||||
dep_version_string = (
|
||||
dep_version["version"] if isinstance(dep_version, dict) else dep_version
|
||||
)
|
||||
|
||||
if "rc" in dep_version_string:
|
||||
raise ValueError(
|
||||
f"Dependency {dep_version} has a prerelease version. Please remove this."
|
||||
)
|
||||
|
||||
if isinstance(dep_version, dict) and dep_version.get(
|
||||
"allow-prereleases", False
|
||||
):
|
||||
raise ValueError(
|
||||
f"Dependency {dep_version} has allow-prereleases set to true. Please remove this."
|
||||
)
|
||||
199
.github/scripts/get_min_versions.py
vendored
199
.github/scripts/get_min_versions.py
vendored
@@ -1,199 +0,0 @@
|
||||
"""Get minimum versions of dependencies from a pyproject.toml file."""
|
||||
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
|
||||
if sys.version_info >= (3, 11):
|
||||
import tomllib
|
||||
else:
|
||||
# For Python 3.10 and below, which doesnt have stdlib tomllib
|
||||
import tomli as tomllib
|
||||
|
||||
import re
|
||||
from typing import List
|
||||
|
||||
import requests
|
||||
from packaging.requirements import Requirement
|
||||
from packaging.specifiers import SpecifierSet
|
||||
from packaging.version import Version, parse
|
||||
|
||||
MIN_VERSION_LIBS = [
|
||||
"langchain-core",
|
||||
"langchain",
|
||||
"langchain-text-splitters",
|
||||
"numpy",
|
||||
"SQLAlchemy",
|
||||
]
|
||||
|
||||
# some libs only get checked on release because of simultaneous changes in
|
||||
# multiple libs
|
||||
SKIP_IF_PULL_REQUEST = [
|
||||
"langchain-core",
|
||||
"langchain-text-splitters",
|
||||
"langchain",
|
||||
]
|
||||
|
||||
|
||||
def get_pypi_versions(package_name: str) -> List[str]:
|
||||
"""Fetch all available versions for a package from PyPI.
|
||||
|
||||
Args:
|
||||
package_name: Name of the package
|
||||
|
||||
Returns:
|
||||
List of all available versions
|
||||
|
||||
Raises:
|
||||
requests.exceptions.RequestException: If PyPI API request fails
|
||||
KeyError: If package not found or response format unexpected
|
||||
"""
|
||||
pypi_url = f"https://pypi.org/pypi/{package_name}/json"
|
||||
response = requests.get(pypi_url, timeout=10.0)
|
||||
response.raise_for_status()
|
||||
return list(response.json()["releases"].keys())
|
||||
|
||||
|
||||
def get_minimum_version(package_name: str, spec_string: str) -> str | None:
|
||||
"""Find the minimum published version that satisfies the given constraints.
|
||||
|
||||
Args:
|
||||
package_name: Name of the package
|
||||
spec_string: Version specification string (e.g., ">=0.2.43,<0.4.0,!=0.3.0")
|
||||
|
||||
Returns:
|
||||
Minimum compatible version or None if no compatible version found
|
||||
"""
|
||||
# Rewrite occurrences of ^0.0.z to 0.0.z (can be anywhere in constraint string)
|
||||
spec_string = re.sub(r"\^0\.0\.(\d+)", r"0.0.\1", spec_string)
|
||||
# Rewrite occurrences of ^0.y.z to >=0.y.z,<0.y+1 (can be anywhere in constraint string)
|
||||
for y in range(1, 10):
|
||||
spec_string = re.sub(
|
||||
rf"\^0\.{y}\.(\d+)", rf">=0.{y}.\1,<0.{y + 1}", spec_string
|
||||
)
|
||||
# Rewrite occurrences of ^x.y.z to >=x.y.z,<x+1.0.0 (can be anywhere in constraint string)
|
||||
for x in range(1, 10):
|
||||
spec_string = re.sub(
|
||||
rf"\^{x}\.(\d+)\.(\d+)", rf">={x}.\1.\2,<{x + 1}", spec_string
|
||||
)
|
||||
|
||||
spec_set = SpecifierSet(spec_string)
|
||||
all_versions = get_pypi_versions(package_name)
|
||||
|
||||
valid_versions = []
|
||||
for version_str in all_versions:
|
||||
try:
|
||||
version = parse(version_str)
|
||||
if spec_set.contains(version):
|
||||
valid_versions.append(version)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
return str(min(valid_versions)) if valid_versions else None
|
||||
|
||||
|
||||
def _check_python_version_from_requirement(
|
||||
requirement: Requirement, python_version: str
|
||||
) -> bool:
|
||||
if not requirement.marker:
|
||||
return True
|
||||
else:
|
||||
marker_str = str(requirement.marker)
|
||||
if "python_version" in marker_str or "python_full_version" in marker_str:
|
||||
python_version_str = "".join(
|
||||
char
|
||||
for char in marker_str
|
||||
if char.isdigit() or char in (".", "<", ">", "=", ",")
|
||||
)
|
||||
return check_python_version(python_version, python_version_str)
|
||||
return True
|
||||
|
||||
|
||||
def get_min_version_from_toml(
|
||||
toml_path: str,
|
||||
versions_for: str,
|
||||
python_version: str,
|
||||
*,
|
||||
include: list | None = None,
|
||||
):
|
||||
# Parse the TOML file
|
||||
with open(toml_path, "rb") as file:
|
||||
toml_data = tomllib.load(file)
|
||||
|
||||
dependencies = defaultdict(list)
|
||||
for dep in toml_data["project"]["dependencies"]:
|
||||
requirement = Requirement(dep)
|
||||
dependencies[requirement.name].append(requirement)
|
||||
|
||||
# Initialize a dictionary to store the minimum versions
|
||||
min_versions = {}
|
||||
|
||||
# Iterate over the libs in MIN_VERSION_LIBS
|
||||
for lib in set(MIN_VERSION_LIBS + (include or [])):
|
||||
if versions_for == "pull_request" and lib in SKIP_IF_PULL_REQUEST:
|
||||
# some libs only get checked on release because of simultaneous
|
||||
# changes in multiple libs
|
||||
continue
|
||||
# Check if the lib is present in the dependencies
|
||||
if lib in dependencies:
|
||||
if include and lib not in include:
|
||||
continue
|
||||
requirements = dependencies[lib]
|
||||
for requirement in requirements:
|
||||
if _check_python_version_from_requirement(requirement, python_version):
|
||||
version_string = str(requirement.specifier)
|
||||
break
|
||||
|
||||
# Use parse_version to get the minimum supported version from version_string
|
||||
min_version = get_minimum_version(lib, version_string)
|
||||
|
||||
# Store the minimum version in the min_versions dictionary
|
||||
min_versions[lib] = min_version
|
||||
|
||||
return min_versions
|
||||
|
||||
|
||||
def check_python_version(version_string, constraint_string):
|
||||
"""Check if the given Python version matches the given constraints.
|
||||
|
||||
Args:
|
||||
version_string: A string representing the Python version (e.g. "3.8.5").
|
||||
constraint_string: A string representing the package's Python version
|
||||
constraints (e.g. ">=3.6, <4.0").
|
||||
|
||||
Returns:
|
||||
True if the version matches the constraints
|
||||
"""
|
||||
|
||||
# Rewrite occurrences of ^0.0.z to 0.0.z (can be anywhere in constraint string)
|
||||
constraint_string = re.sub(r"\^0\.0\.(\d+)", r"0.0.\1", constraint_string)
|
||||
# Rewrite occurrences of ^0.y.z to >=0.y.z,<0.y+1.0 (can be anywhere in constraint string)
|
||||
for y in range(1, 10):
|
||||
constraint_string = re.sub(
|
||||
rf"\^0\.{y}\.(\d+)", rf">=0.{y}.\1,<0.{y + 1}.0", constraint_string
|
||||
)
|
||||
# Rewrite occurrences of ^x.y.z to >=x.y.z,<x+1.0.0 (can be anywhere in constraint string)
|
||||
for x in range(1, 10):
|
||||
constraint_string = re.sub(
|
||||
rf"\^{x}\.0\.(\d+)", rf">={x}.0.\1,<{x + 1}.0.0", constraint_string
|
||||
)
|
||||
|
||||
try:
|
||||
version = Version(version_string)
|
||||
constraints = SpecifierSet(constraint_string)
|
||||
return version in constraints
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Get the TOML file path from the command line argument
|
||||
toml_file = sys.argv[1]
|
||||
versions_for = sys.argv[2]
|
||||
python_version = sys.argv[3]
|
||||
assert versions_for in ["release", "pull_request"]
|
||||
|
||||
# Call the function to get the minimum versions
|
||||
min_versions = get_min_version_from_toml(toml_file, versions_for, python_version)
|
||||
|
||||
print(" ".join([f"{lib}=={version}" for lib, version in min_versions.items()]))
|
||||
84
.github/scripts/pr-labeler-config.json
vendored
84
.github/scripts/pr-labeler-config.json
vendored
@@ -1,84 +0,0 @@
|
||||
{
|
||||
"trustedThreshold": 5,
|
||||
"labelColor": "b76e79",
|
||||
"sizeThresholds": [
|
||||
{ "label": "size: XS", "max": 50 },
|
||||
{ "label": "size: S", "max": 200 },
|
||||
{ "label": "size: M", "max": 500 },
|
||||
{ "label": "size: L", "max": 1000 },
|
||||
{ "label": "size: XL" }
|
||||
],
|
||||
"excludedFiles": ["uv.lock"],
|
||||
"excludedPaths": ["docs/"],
|
||||
"typeToLabel": {
|
||||
"feat": "feature",
|
||||
"fix": "fix",
|
||||
"docs": "documentation",
|
||||
"style": "linting",
|
||||
"refactor": "refactor",
|
||||
"perf": "performance",
|
||||
"test": "tests",
|
||||
"build": "infra",
|
||||
"ci": "infra",
|
||||
"chore": "infra",
|
||||
"revert": "revert",
|
||||
"release": "release",
|
||||
"hotfix": "hotfix",
|
||||
"breaking": "breaking"
|
||||
},
|
||||
"scopeToLabel": {
|
||||
"core": "core",
|
||||
"langchain": "langchain",
|
||||
"langchain-classic": "langchain-classic",
|
||||
"model-profiles": "model-profiles",
|
||||
"standard-tests": "standard-tests",
|
||||
"text-splitters": "text-splitters",
|
||||
"anthropic": "anthropic",
|
||||
"chroma": "chroma",
|
||||
"deepseek": "deepseek",
|
||||
"exa": "exa",
|
||||
"fireworks": "fireworks",
|
||||
"groq": "groq",
|
||||
"huggingface": "huggingface",
|
||||
"mistralai": "mistralai",
|
||||
"nomic": "nomic",
|
||||
"ollama": "ollama",
|
||||
"openai": "openai",
|
||||
"openrouter": "openrouter",
|
||||
"perplexity": "perplexity",
|
||||
"qdrant": "qdrant",
|
||||
"xai": "xai",
|
||||
"deps": "dependencies",
|
||||
"docs": "documentation",
|
||||
"infra": "infra"
|
||||
},
|
||||
"fileRules": [
|
||||
{ "label": "core", "prefix": "libs/core/", "skipExcludedFiles": true },
|
||||
{ "label": "langchain-classic", "prefix": "libs/langchain/", "skipExcludedFiles": true },
|
||||
{ "label": "langchain", "prefix": "libs/langchain_v1/", "skipExcludedFiles": true },
|
||||
{ "label": "standard-tests", "prefix": "libs/standard-tests/", "skipExcludedFiles": true },
|
||||
{ "label": "model-profiles", "prefix": "libs/model-profiles/", "skipExcludedFiles": true },
|
||||
{ "label": "text-splitters", "prefix": "libs/text-splitters/", "skipExcludedFiles": true },
|
||||
{ "label": "integration", "prefix": "libs/partners/", "skipExcludedFiles": true },
|
||||
{ "label": "anthropic", "prefix": "libs/partners/anthropic/", "skipExcludedFiles": true },
|
||||
{ "label": "chroma", "prefix": "libs/partners/chroma/", "skipExcludedFiles": true },
|
||||
{ "label": "deepseek", "prefix": "libs/partners/deepseek/", "skipExcludedFiles": true },
|
||||
{ "label": "exa", "prefix": "libs/partners/exa/", "skipExcludedFiles": true },
|
||||
{ "label": "fireworks", "prefix": "libs/partners/fireworks/", "skipExcludedFiles": true },
|
||||
{ "label": "groq", "prefix": "libs/partners/groq/", "skipExcludedFiles": true },
|
||||
{ "label": "huggingface", "prefix": "libs/partners/huggingface/", "skipExcludedFiles": true },
|
||||
{ "label": "mistralai", "prefix": "libs/partners/mistralai/", "skipExcludedFiles": true },
|
||||
{ "label": "nomic", "prefix": "libs/partners/nomic/", "skipExcludedFiles": true },
|
||||
{ "label": "ollama", "prefix": "libs/partners/ollama/", "skipExcludedFiles": true },
|
||||
{ "label": "openai", "prefix": "libs/partners/openai/", "skipExcludedFiles": true },
|
||||
{ "label": "openrouter", "prefix": "libs/partners/openrouter/", "skipExcludedFiles": true },
|
||||
{ "label": "perplexity", "prefix": "libs/partners/perplexity/", "skipExcludedFiles": true },
|
||||
{ "label": "qdrant", "prefix": "libs/partners/qdrant/", "skipExcludedFiles": true },
|
||||
{ "label": "xai", "prefix": "libs/partners/xai/", "skipExcludedFiles": true },
|
||||
{ "label": "github_actions", "prefix": ".github/workflows/" },
|
||||
{ "label": "github_actions", "prefix": ".github/actions/" },
|
||||
{ "label": "dependencies", "suffix": "pyproject.toml" },
|
||||
{ "label": "dependencies", "exact": "uv.lock" },
|
||||
{ "label": "dependencies", "pattern": "(?:^|/)requirements[^/]*\\.txt$" }
|
||||
]
|
||||
}
|
||||
278
.github/scripts/pr-labeler.js
vendored
278
.github/scripts/pr-labeler.js
vendored
@@ -1,278 +0,0 @@
|
||||
// Shared helpers for pr_labeler.yml and tag-external-issues.yml.
|
||||
//
|
||||
// Usage from actions/github-script (requires actions/checkout first):
|
||||
// const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
function loadConfig() {
|
||||
const configPath = path.join(__dirname, 'pr-labeler-config.json');
|
||||
let raw;
|
||||
try {
|
||||
raw = fs.readFileSync(configPath, 'utf8');
|
||||
} catch (e) {
|
||||
throw new Error(`Failed to read ${configPath}: ${e.message}`);
|
||||
}
|
||||
let config;
|
||||
try {
|
||||
config = JSON.parse(raw);
|
||||
} catch (e) {
|
||||
throw new Error(`Failed to parse pr-labeler-config.json: ${e.message}`);
|
||||
}
|
||||
const required = [
|
||||
'labelColor', 'sizeThresholds', 'fileRules',
|
||||
'typeToLabel', 'scopeToLabel', 'trustedThreshold',
|
||||
'excludedFiles', 'excludedPaths',
|
||||
];
|
||||
const missing = required.filter(k => !(k in config));
|
||||
if (missing.length > 0) {
|
||||
throw new Error(`pr-labeler-config.json missing required keys: ${missing.join(', ')}`);
|
||||
}
|
||||
return config;
|
||||
}
|
||||
|
||||
function init(github, owner, repo, config, core) {
|
||||
if (!core) {
|
||||
throw new Error('init() requires a `core` parameter (e.g., from actions/github-script)');
|
||||
}
|
||||
const {
|
||||
trustedThreshold,
|
||||
labelColor,
|
||||
sizeThresholds,
|
||||
scopeToLabel,
|
||||
typeToLabel,
|
||||
fileRules: fileRulesDef,
|
||||
excludedFiles,
|
||||
excludedPaths,
|
||||
} = config;
|
||||
|
||||
const sizeLabels = sizeThresholds.map(t => t.label);
|
||||
const allTypeLabels = [...new Set(Object.values(typeToLabel))];
|
||||
const tierLabels = ['new-contributor', 'trusted-contributor'];
|
||||
|
||||
// ── Label management ──────────────────────────────────────────────
|
||||
|
||||
async function ensureLabel(name, color = labelColor) {
|
||||
try {
|
||||
await github.rest.issues.getLabel({ owner, repo, name });
|
||||
} catch (e) {
|
||||
if (e.status !== 404) throw e;
|
||||
try {
|
||||
await github.rest.issues.createLabel({ owner, repo, name, color });
|
||||
} catch (createErr) {
|
||||
// 422 = label created by a concurrent run between our get and create
|
||||
if (createErr.status !== 422) throw createErr;
|
||||
core.info(`Label "${name}" creation returned 422 (likely already exists)`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Size calculation ──────────────────────────────────────────────
|
||||
|
||||
function getSizeLabel(totalChanged) {
|
||||
for (const t of sizeThresholds) {
|
||||
if (t.max != null && totalChanged < t.max) return t.label;
|
||||
}
|
||||
// Last entry has no max — it's the catch-all
|
||||
return sizeThresholds[sizeThresholds.length - 1].label;
|
||||
}
|
||||
|
||||
function computeSize(files) {
|
||||
const excluded = new Set(excludedFiles);
|
||||
const totalChanged = files.reduce((sum, f) => {
|
||||
const p = f.filename ?? '';
|
||||
const base = p.split('/').pop();
|
||||
if (excluded.has(base)) return sum;
|
||||
for (const prefix of excludedPaths) {
|
||||
if (p.startsWith(prefix)) return sum;
|
||||
}
|
||||
return sum + (f.additions ?? 0) + (f.deletions ?? 0);
|
||||
}, 0);
|
||||
return { totalChanged, sizeLabel: getSizeLabel(totalChanged) };
|
||||
}
|
||||
|
||||
// ── File-based labels ─────────────────────────────────────────────
|
||||
|
||||
function buildFileRules() {
|
||||
return fileRulesDef.map((rule, i) => {
|
||||
let test;
|
||||
if (rule.prefix) test = p => p.startsWith(rule.prefix);
|
||||
else if (rule.suffix) test = p => p.endsWith(rule.suffix);
|
||||
else if (rule.exact) test = p => p === rule.exact;
|
||||
else if (rule.pattern) {
|
||||
const re = new RegExp(rule.pattern);
|
||||
test = p => re.test(p);
|
||||
} else {
|
||||
throw new Error(
|
||||
`fileRules[${i}] (label: "${rule.label}") has no recognized matcher ` +
|
||||
`(expected one of: prefix, suffix, exact, pattern)`
|
||||
);
|
||||
}
|
||||
return { label: rule.label, test, skipExcluded: !!rule.skipExcludedFiles };
|
||||
});
|
||||
}
|
||||
|
||||
function matchFileLabels(files, fileRules) {
|
||||
const rules = fileRules || buildFileRules();
|
||||
const excluded = new Set(excludedFiles);
|
||||
const labels = new Set();
|
||||
for (const rule of rules) {
|
||||
// skipExcluded: ignore files whose basename is in the top-level
|
||||
// "excludedFiles" list (e.g. uv.lock) so lockfile-only changes
|
||||
// don't trigger package labels.
|
||||
const candidates = rule.skipExcluded
|
||||
? files.filter(f => !excluded.has((f.filename ?? '').split('/').pop()))
|
||||
: files;
|
||||
if (candidates.some(f => rule.test(f.filename ?? ''))) {
|
||||
labels.add(rule.label);
|
||||
}
|
||||
}
|
||||
return labels;
|
||||
}
|
||||
|
||||
// ── Title-based labels ────────────────────────────────────────────
|
||||
|
||||
function matchTitleLabels(title) {
|
||||
const labels = new Set();
|
||||
const m = (title ?? '').match(/^(\w+)(?:\(([^)]+)\))?(!)?:/);
|
||||
if (!m) return { labels, type: null, typeLabel: null, scopes: [], breaking: false };
|
||||
|
||||
const type = m[1].toLowerCase();
|
||||
const scopeStr = m[2] ?? '';
|
||||
const breaking = !!m[3];
|
||||
|
||||
const typeLabel = typeToLabel[type] || null;
|
||||
if (typeLabel) labels.add(typeLabel);
|
||||
if (breaking) labels.add('breaking');
|
||||
|
||||
const scopes = scopeStr.split(',').map(s => s.trim()).filter(Boolean);
|
||||
for (const scope of scopes) {
|
||||
const sl = scopeToLabel[scope];
|
||||
if (sl) labels.add(sl);
|
||||
}
|
||||
|
||||
return { labels, type, typeLabel, scopes, breaking };
|
||||
}
|
||||
|
||||
// ── Org membership ────────────────────────────────────────────────
|
||||
|
||||
async function checkMembership(author, userType) {
|
||||
if (userType === 'Bot') {
|
||||
console.log(`${author} is a Bot — treating as internal`);
|
||||
return { isExternal: false };
|
||||
}
|
||||
|
||||
try {
|
||||
const membership = await github.rest.orgs.getMembershipForUser({
|
||||
org: 'langchain-ai',
|
||||
username: author,
|
||||
});
|
||||
const isExternal = membership.data.state !== 'active';
|
||||
console.log(
|
||||
isExternal
|
||||
? `${author} has pending membership — treating as external`
|
||||
: `${author} is an active member of langchain-ai`,
|
||||
);
|
||||
return { isExternal };
|
||||
} catch (e) {
|
||||
if (e.status === 404) {
|
||||
console.log(`${author} is not a member of langchain-ai`);
|
||||
return { isExternal: true };
|
||||
}
|
||||
// Non-404 errors (rate limit, auth failure, server error) must not
|
||||
// silently default to external — rethrow to fail the step.
|
||||
throw new Error(
|
||||
`Membership check failed for ${author} (${e.status}): ${e.message}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Contributor analysis ──────────────────────────────────────────
|
||||
|
||||
async function getContributorInfo(contributorCache, author, userType) {
|
||||
if (contributorCache.has(author)) return contributorCache.get(author);
|
||||
|
||||
const { isExternal } = await checkMembership(author, userType);
|
||||
|
||||
let mergedCount = null;
|
||||
if (isExternal) {
|
||||
try {
|
||||
const result = await github.rest.search.issuesAndPullRequests({
|
||||
q: `repo:${owner}/${repo} is:pr is:merged author:"${author}"`,
|
||||
per_page: 1,
|
||||
});
|
||||
mergedCount = result?.data?.total_count ?? null;
|
||||
} catch (e) {
|
||||
if (e?.status !== 422) throw e;
|
||||
core.warning(`Search failed for ${author}; skipping tier.`);
|
||||
}
|
||||
}
|
||||
|
||||
const info = { isExternal, mergedCount };
|
||||
contributorCache.set(author, info);
|
||||
return info;
|
||||
}
|
||||
|
||||
// ── Tier label resolution ───────────────────────────────────────────
|
||||
|
||||
async function applyTierLabel(issueNumber, author, { skipNewContributor = false } = {}) {
|
||||
let mergedCount;
|
||||
try {
|
||||
const result = await github.rest.search.issuesAndPullRequests({
|
||||
q: `repo:${owner}/${repo} is:pr is:merged author:"${author}"`,
|
||||
per_page: 1,
|
||||
});
|
||||
mergedCount = result?.data?.total_count;
|
||||
} catch (error) {
|
||||
if (error?.status !== 422) throw error;
|
||||
core.warning(`Search failed for ${author}; skipping tier label.`);
|
||||
return;
|
||||
}
|
||||
|
||||
if (mergedCount == null) {
|
||||
core.warning(`Search response missing total_count for ${author}; skipping tier label.`);
|
||||
return;
|
||||
}
|
||||
|
||||
let tierLabel = null;
|
||||
if (mergedCount >= trustedThreshold) tierLabel = 'trusted-contributor';
|
||||
else if (mergedCount === 0 && !skipNewContributor) tierLabel = 'new-contributor';
|
||||
|
||||
if (tierLabel) {
|
||||
await ensureLabel(tierLabel);
|
||||
await github.rest.issues.addLabels({
|
||||
owner, repo, issue_number: issueNumber, labels: [tierLabel],
|
||||
});
|
||||
console.log(`Applied '${tierLabel}' to #${issueNumber} (${mergedCount} merged PRs)`);
|
||||
} else {
|
||||
console.log(`No tier label for ${author} (${mergedCount} merged PRs)`);
|
||||
}
|
||||
|
||||
return tierLabel;
|
||||
}
|
||||
|
||||
return {
|
||||
ensureLabel,
|
||||
getSizeLabel,
|
||||
computeSize,
|
||||
buildFileRules,
|
||||
matchFileLabels,
|
||||
matchTitleLabels,
|
||||
allTypeLabels,
|
||||
checkMembership,
|
||||
getContributorInfo,
|
||||
applyTierLabel,
|
||||
sizeLabels,
|
||||
tierLabels,
|
||||
trustedThreshold,
|
||||
labelColor,
|
||||
};
|
||||
}
|
||||
|
||||
function loadAndInit(github, owner, repo, core) {
|
||||
const config = loadConfig();
|
||||
return { config, h: init(github, owner, repo, config, core) };
|
||||
}
|
||||
|
||||
module.exports = { loadConfig, init, loadAndInit };
|
||||
48
.github/scripts/test_release_options.py
vendored
48
.github/scripts/test_release_options.py
vendored
@@ -1,48 +0,0 @@
|
||||
"""Verify _release.yml dropdown options match actual package directories."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
|
||||
|
||||
def _get_release_options() -> list[str]:
|
||||
workflow = REPO_ROOT / ".github" / "workflows" / "_release.yml"
|
||||
with open(workflow) as f:
|
||||
data = yaml.safe_load(f)
|
||||
try:
|
||||
# PyYAML (YAML 1.1) parses the bare key `on` as boolean True
|
||||
return data[True]["workflow_dispatch"]["inputs"]["working-directory"]["options"]
|
||||
except (KeyError, TypeError) as e:
|
||||
msg = f"Could not find workflow_dispatch options in {workflow}: {e}"
|
||||
raise AssertionError(msg) from e
|
||||
|
||||
|
||||
def _get_package_dirs() -> set[str]:
|
||||
libs = REPO_ROOT / "libs"
|
||||
dirs: set[str] = set()
|
||||
# Top-level packages (libs/core, libs/langchain, etc.)
|
||||
for p in libs.iterdir():
|
||||
if p.is_dir() and (p / "pyproject.toml").exists():
|
||||
dirs.add(f"libs/{p.name}")
|
||||
# Partner packages (libs/partners/*)
|
||||
partners = libs / "partners"
|
||||
if partners.exists():
|
||||
for p in partners.iterdir():
|
||||
if p.is_dir() and (p / "pyproject.toml").exists():
|
||||
dirs.add(f"libs/partners/{p.name}")
|
||||
return dirs
|
||||
|
||||
|
||||
def test_release_options_match_packages() -> None:
|
||||
options = set(_get_release_options())
|
||||
packages = _get_package_dirs()
|
||||
missing_from_dropdown = packages - options
|
||||
extra_in_dropdown = options - packages
|
||||
assert not missing_from_dropdown, (
|
||||
f"Packages on disk missing from _release.yml dropdown: {missing_from_dropdown}"
|
||||
)
|
||||
assert not extra_in_dropdown, (
|
||||
f"Dropdown options with no matching package directory: {extra_in_dropdown}"
|
||||
)
|
||||
416
.github/tools/git-restore-mtime
vendored
416
.github/tools/git-restore-mtime
vendored
@@ -81,93 +81,56 @@ import time
|
||||
__version__ = "2022.12+dev"
|
||||
|
||||
# Update symlinks only if the platform supports not following them
|
||||
UPDATE_SYMLINKS = bool(os.utime in getattr(os, "supports_follow_symlinks", []))
|
||||
UPDATE_SYMLINKS = bool(os.utime in getattr(os, 'supports_follow_symlinks', []))
|
||||
|
||||
# Call os.path.normpath() only if not in a POSIX platform (Windows)
|
||||
NORMALIZE_PATHS = os.path.sep != "/"
|
||||
NORMALIZE_PATHS = (os.path.sep != '/')
|
||||
|
||||
# How many files to process in each batch when re-trying merge commits
|
||||
STEPMISSING = 100
|
||||
|
||||
# (Extra) keywords for the os.utime() call performed by touch()
|
||||
UTIME_KWS = {} if not UPDATE_SYMLINKS else {"follow_symlinks": False}
|
||||
UTIME_KWS = {} if not UPDATE_SYMLINKS else {'follow_symlinks': False}
|
||||
|
||||
|
||||
# Command-line interface ######################################################
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description=__doc__.split("\n---")[0])
|
||||
parser = argparse.ArgumentParser(
|
||||
description=__doc__.split('\n---')[0])
|
||||
|
||||
group = parser.add_mutually_exclusive_group()
|
||||
group.add_argument(
|
||||
"--quiet",
|
||||
"-q",
|
||||
dest="loglevel",
|
||||
action="store_const",
|
||||
const=logging.WARNING,
|
||||
default=logging.INFO,
|
||||
help="Suppress informative messages and summary statistics.",
|
||||
)
|
||||
group.add_argument(
|
||||
"--verbose",
|
||||
"-v",
|
||||
action="count",
|
||||
help="""
|
||||
group.add_argument('--quiet', '-q', dest='loglevel',
|
||||
action="store_const", const=logging.WARNING, default=logging.INFO,
|
||||
help="Suppress informative messages and summary statistics.")
|
||||
group.add_argument('--verbose', '-v', action="count", help="""
|
||||
Print additional information for each processed file.
|
||||
Specify twice to further increase verbosity.
|
||||
""",
|
||||
)
|
||||
""")
|
||||
|
||||
parser.add_argument(
|
||||
"--cwd",
|
||||
"-C",
|
||||
metavar="DIRECTORY",
|
||||
help="""
|
||||
parser.add_argument('--cwd', '-C', metavar="DIRECTORY", help="""
|
||||
Run as if %(prog)s was started in directory %(metavar)s.
|
||||
This affects how --work-tree, --git-dir and PATHSPEC arguments are handled.
|
||||
See 'man 1 git' or 'git --help' for more information.
|
||||
""",
|
||||
)
|
||||
""")
|
||||
|
||||
parser.add_argument(
|
||||
"--git-dir",
|
||||
dest="gitdir",
|
||||
metavar="GITDIR",
|
||||
help="""
|
||||
parser.add_argument('--git-dir', dest='gitdir', metavar="GITDIR", help="""
|
||||
Path to the git repository, by default auto-discovered by searching
|
||||
the current directory and its parents for a .git/ subdirectory.
|
||||
""",
|
||||
)
|
||||
""")
|
||||
|
||||
parser.add_argument(
|
||||
"--work-tree",
|
||||
dest="workdir",
|
||||
metavar="WORKTREE",
|
||||
help="""
|
||||
parser.add_argument('--work-tree', dest='workdir', metavar="WORKTREE", help="""
|
||||
Path to the work tree root, by default the parent of GITDIR if it's
|
||||
automatically discovered, or the current directory if GITDIR is set.
|
||||
""",
|
||||
)
|
||||
""")
|
||||
|
||||
parser.add_argument(
|
||||
"--force",
|
||||
"-f",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="""
|
||||
parser.add_argument('--force', '-f', default=False, action="store_true", help="""
|
||||
Force updating files with uncommitted modifications.
|
||||
Untracked files and uncommitted deletions, renames and additions are
|
||||
always ignored.
|
||||
""",
|
||||
)
|
||||
""")
|
||||
|
||||
parser.add_argument(
|
||||
"--merge",
|
||||
"-m",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="""
|
||||
parser.add_argument('--merge', '-m', default=False, action="store_true", help="""
|
||||
Include merge commits.
|
||||
Leads to more recent times and more files per commit, thus with the same
|
||||
time, which may or may not be what you want.
|
||||
@@ -175,130 +138,71 @@ def parse_args():
|
||||
are found sooner, which can improve performance, sometimes substantially.
|
||||
But as merge commits are usually huge, processing them may also take longer.
|
||||
By default, merge commits are only used for files missing from regular commits.
|
||||
""",
|
||||
)
|
||||
""")
|
||||
|
||||
parser.add_argument(
|
||||
"--first-parent",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="""
|
||||
parser.add_argument('--first-parent', default=False, action="store_true", help="""
|
||||
Consider only the first parent, the "main branch", when evaluating merge commits.
|
||||
Only effective when merge commits are processed, either when --merge is
|
||||
used or when finding missing files after the first regular log search.
|
||||
See --skip-missing.
|
||||
""",
|
||||
)
|
||||
""")
|
||||
|
||||
parser.add_argument(
|
||||
"--skip-missing",
|
||||
"-s",
|
||||
dest="missing",
|
||||
default=True,
|
||||
action="store_false",
|
||||
help="""
|
||||
parser.add_argument('--skip-missing', '-s', dest="missing", default=True,
|
||||
action="store_false", help="""
|
||||
Do not try to find missing files.
|
||||
If merge commits were not evaluated with --merge and some files were
|
||||
not found in regular commits, by default %(prog)s searches for these
|
||||
files again in the merge commits.
|
||||
This option disables this retry, so files found only in merge commits
|
||||
will not have their timestamp updated.
|
||||
""",
|
||||
)
|
||||
""")
|
||||
|
||||
parser.add_argument(
|
||||
"--no-directories",
|
||||
"-D",
|
||||
dest="dirs",
|
||||
default=True,
|
||||
action="store_false",
|
||||
help="""
|
||||
parser.add_argument('--no-directories', '-D', dest='dirs', default=True,
|
||||
action="store_false", help="""
|
||||
Do not update directory timestamps.
|
||||
By default, use the time of its most recently created, renamed or deleted file.
|
||||
Note that just modifying a file will NOT update its directory time.
|
||||
""",
|
||||
)
|
||||
""")
|
||||
|
||||
parser.add_argument(
|
||||
"--test",
|
||||
"-t",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Test run: do not actually update any file timestamp.",
|
||||
)
|
||||
parser.add_argument('--test', '-t', default=False, action="store_true",
|
||||
help="Test run: do not actually update any file timestamp.")
|
||||
|
||||
parser.add_argument(
|
||||
"--commit-time",
|
||||
"-c",
|
||||
dest="commit_time",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Use commit time instead of author time.",
|
||||
)
|
||||
parser.add_argument('--commit-time', '-c', dest='commit_time', default=False,
|
||||
action='store_true', help="Use commit time instead of author time.")
|
||||
|
||||
parser.add_argument(
|
||||
"--oldest-time",
|
||||
"-o",
|
||||
dest="reverse_order",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="""
|
||||
parser.add_argument('--oldest-time', '-o', dest='reverse_order', default=False,
|
||||
action='store_true', help="""
|
||||
Update times based on the oldest, instead of the most recent commit of a file.
|
||||
This reverses the order in which the git log is processed to emulate a
|
||||
file "creation" date. Note this will be inaccurate for files deleted and
|
||||
re-created at later dates.
|
||||
""",
|
||||
)
|
||||
""")
|
||||
|
||||
parser.add_argument(
|
||||
"--skip-older-than",
|
||||
metavar="SECONDS",
|
||||
type=int,
|
||||
help="""
|
||||
parser.add_argument('--skip-older-than', metavar='SECONDS', type=int, help="""
|
||||
Ignore files that are currently older than %(metavar)s.
|
||||
Useful in workflows that assume such files already have a correct timestamp,
|
||||
as it may improve performance by processing fewer files.
|
||||
""",
|
||||
)
|
||||
""")
|
||||
|
||||
parser.add_argument(
|
||||
"--skip-older-than-commit",
|
||||
"-N",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="""
|
||||
parser.add_argument('--skip-older-than-commit', '-N', default=False,
|
||||
action='store_true', help="""
|
||||
Ignore files older than the timestamp it would be updated to.
|
||||
Such files may be considered "original", likely in the author's repository.
|
||||
""",
|
||||
)
|
||||
""")
|
||||
|
||||
parser.add_argument(
|
||||
"--unique-times",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="""
|
||||
parser.add_argument('--unique-times', default=False, action="store_true", help="""
|
||||
Set the microseconds to a unique value per commit.
|
||||
Allows telling apart changes that would otherwise have identical timestamps,
|
||||
as git's time accuracy is in seconds.
|
||||
""",
|
||||
)
|
||||
""")
|
||||
|
||||
parser.add_argument(
|
||||
"pathspec",
|
||||
nargs="*",
|
||||
metavar="PATHSPEC",
|
||||
help="""
|
||||
parser.add_argument('pathspec', nargs='*', metavar='PATHSPEC', help="""
|
||||
Only modify paths matching %(metavar)s, relative to current directory.
|
||||
By default, update all but untracked files and submodules.
|
||||
""",
|
||||
)
|
||||
""")
|
||||
|
||||
parser.add_argument(
|
||||
"--version",
|
||||
"-V",
|
||||
action="version",
|
||||
version="%(prog)s version {version}".format(version=get_version()),
|
||||
)
|
||||
parser.add_argument('--version', '-V', action='version',
|
||||
version='%(prog)s version {version}'.format(version=get_version()))
|
||||
|
||||
args_ = parser.parse_args()
|
||||
if args_.verbose:
|
||||
@@ -308,18 +212,17 @@ def parse_args():
|
||||
|
||||
|
||||
def get_version(version=__version__):
|
||||
if not version.endswith("+dev"):
|
||||
if not version.endswith('+dev'):
|
||||
return version
|
||||
try:
|
||||
cwd = os.path.dirname(os.path.realpath(__file__))
|
||||
return Git(cwd=cwd, errors=False).describe().lstrip("v")
|
||||
return Git(cwd=cwd, errors=False).describe().lstrip('v')
|
||||
except Git.Error:
|
||||
return "-".join((version, "unknown"))
|
||||
return '-'.join((version, "unknown"))
|
||||
|
||||
|
||||
# Helper functions ############################################################
|
||||
|
||||
|
||||
def setup_logging():
|
||||
"""Add TRACE logging level and corresponding method, return the root logger"""
|
||||
logging.TRACE = TRACE = logging.DEBUG // 2
|
||||
@@ -352,13 +255,11 @@ def normalize(path):
|
||||
if path and path[0] == '"':
|
||||
# Python 2: path = path[1:-1].decode("string-escape")
|
||||
# Python 3: https://stackoverflow.com/a/46650050/624066
|
||||
path = (
|
||||
path[1:-1] # Remove enclosing double quotes
|
||||
.encode("latin1") # Convert to bytes, required by 'unicode-escape'
|
||||
.decode("unicode-escape") # Perform the actual octal-escaping decode
|
||||
.encode("latin1") # 1:1 mapping to bytes, UTF-8 encoded
|
||||
.decode("utf8", "surrogateescape")
|
||||
) # Decode from UTF-8
|
||||
path = (path[1:-1] # Remove enclosing double quotes
|
||||
.encode('latin1') # Convert to bytes, required by 'unicode-escape'
|
||||
.decode('unicode-escape') # Perform the actual octal-escaping decode
|
||||
.encode('latin1') # 1:1 mapping to bytes, UTF-8 encoded
|
||||
.decode('utf8', 'surrogateescape')) # Decode from UTF-8
|
||||
if NORMALIZE_PATHS:
|
||||
# Make sure the slash matches the OS; for Windows we need a backslash
|
||||
path = os.path.normpath(path)
|
||||
@@ -381,12 +282,12 @@ def touch_ns(path, mtime_ns):
|
||||
|
||||
def isodate(secs: int):
|
||||
# time.localtime() accepts floats, but discards fractional part
|
||||
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(secs))
|
||||
return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(secs))
|
||||
|
||||
|
||||
def isodate_ns(ns: int):
|
||||
# for integers fromtimestamp() is equivalent and ~16% slower than isodate()
|
||||
return datetime.datetime.fromtimestamp(ns / 1000000000).isoformat(sep=" ")
|
||||
return datetime.datetime.fromtimestamp(ns / 1000000000).isoformat(sep=' ')
|
||||
|
||||
|
||||
def get_mtime_ns(secs: int, idx: int):
|
||||
@@ -404,49 +305,35 @@ def get_mtime_path(path):
|
||||
|
||||
# Git class and parse_log(), the heart of the script ##########################
|
||||
|
||||
|
||||
class Git:
|
||||
def __init__(self, workdir=None, gitdir=None, cwd=None, errors=True):
|
||||
self.gitcmd = ["git"]
|
||||
self.gitcmd = ['git']
|
||||
self.errors = errors
|
||||
self._proc = None
|
||||
if workdir:
|
||||
self.gitcmd.extend(("--work-tree", workdir))
|
||||
if gitdir:
|
||||
self.gitcmd.extend(("--git-dir", gitdir))
|
||||
if cwd:
|
||||
self.gitcmd.extend(("-C", cwd))
|
||||
if workdir: self.gitcmd.extend(('--work-tree', workdir))
|
||||
if gitdir: self.gitcmd.extend(('--git-dir', gitdir))
|
||||
if cwd: self.gitcmd.extend(('-C', cwd))
|
||||
self.workdir, self.gitdir = self._get_repo_dirs()
|
||||
|
||||
def ls_files(self, paths: list = None):
|
||||
return (normalize(_) for _ in self._run("ls-files --full-name", paths))
|
||||
return (normalize(_) for _ in self._run('ls-files --full-name', paths))
|
||||
|
||||
def ls_dirty(self, force=False):
|
||||
return (
|
||||
normalize(_[3:].split(" -> ", 1)[-1])
|
||||
for _ in self._run("status --porcelain")
|
||||
if _[:2] != "??" and (not force or (_[0] in ("R", "A") or _[1] == "D"))
|
||||
)
|
||||
return (normalize(_[3:].split(' -> ', 1)[-1])
|
||||
for _ in self._run('status --porcelain')
|
||||
if _[:2] != '??' and (not force or (_[0] in ('R', 'A')
|
||||
or _[1] == 'D')))
|
||||
|
||||
def log(
|
||||
self,
|
||||
merge=False,
|
||||
first_parent=False,
|
||||
commit_time=False,
|
||||
reverse_order=False,
|
||||
paths: list = None,
|
||||
):
|
||||
cmd = "whatchanged --pretty={}".format("%ct" if commit_time else "%at")
|
||||
if merge:
|
||||
cmd += " -m"
|
||||
if first_parent:
|
||||
cmd += " --first-parent"
|
||||
if reverse_order:
|
||||
cmd += " --reverse"
|
||||
def log(self, merge=False, first_parent=False, commit_time=False,
|
||||
reverse_order=False, paths: list = None):
|
||||
cmd = 'whatchanged --pretty={}'.format('%ct' if commit_time else '%at')
|
||||
if merge: cmd += ' -m'
|
||||
if first_parent: cmd += ' --first-parent'
|
||||
if reverse_order: cmd += ' --reverse'
|
||||
return self._run(cmd, paths)
|
||||
|
||||
def describe(self):
|
||||
return self._run("describe --tags", check=True)[0]
|
||||
return self._run('describe --tags', check=True)[0]
|
||||
|
||||
def terminate(self):
|
||||
if self._proc is None:
|
||||
@@ -458,22 +345,18 @@ class Git:
|
||||
pass
|
||||
|
||||
def _get_repo_dirs(self):
|
||||
return (
|
||||
os.path.normpath(_)
|
||||
for _ in self._run(
|
||||
"rev-parse --show-toplevel --absolute-git-dir", check=True
|
||||
)
|
||||
)
|
||||
return (os.path.normpath(_) for _ in
|
||||
self._run('rev-parse --show-toplevel --absolute-git-dir', check=True))
|
||||
|
||||
def _run(self, cmdstr: str, paths: list = None, output=True, check=False):
|
||||
cmdlist = self.gitcmd + shlex.split(cmdstr)
|
||||
if paths:
|
||||
cmdlist.append("--")
|
||||
cmdlist.append('--')
|
||||
cmdlist.extend(paths)
|
||||
popen_args = dict(universal_newlines=True, encoding="utf8")
|
||||
popen_args = dict(universal_newlines=True, encoding='utf8')
|
||||
if not self.errors:
|
||||
popen_args["stderr"] = subprocess.DEVNULL
|
||||
log.trace("Executing: %s", " ".join(cmdlist))
|
||||
popen_args['stderr'] = subprocess.DEVNULL
|
||||
log.trace("Executing: %s", ' '.join(cmdlist))
|
||||
if not output:
|
||||
return subprocess.call(cmdlist, **popen_args)
|
||||
if check:
|
||||
@@ -496,26 +379,30 @@ def parse_log(filelist, dirlist, stats, git, merge=False, filterlist=None):
|
||||
mtime = 0
|
||||
datestr = isodate(0)
|
||||
for line in git.log(
|
||||
merge, args.first_parent, args.commit_time, args.reverse_order, filterlist
|
||||
merge,
|
||||
args.first_parent,
|
||||
args.commit_time,
|
||||
args.reverse_order,
|
||||
filterlist
|
||||
):
|
||||
stats["loglines"] += 1
|
||||
stats['loglines'] += 1
|
||||
|
||||
# Blank line between Date and list of files
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# Date line
|
||||
if line[0] != ":": # Faster than `not line.startswith(':')`
|
||||
stats["commits"] += 1
|
||||
if line[0] != ':': # Faster than `not line.startswith(':')`
|
||||
stats['commits'] += 1
|
||||
mtime = int(line)
|
||||
if args.unique_times:
|
||||
mtime = get_mtime_ns(mtime, stats["commits"])
|
||||
mtime = get_mtime_ns(mtime, stats['commits'])
|
||||
if args.debug:
|
||||
datestr = isodate(mtime)
|
||||
continue
|
||||
|
||||
# File line: three tokens if it describes a renaming, otherwise two
|
||||
tokens = line.split("\t")
|
||||
tokens = line.split('\t')
|
||||
|
||||
# Possible statuses:
|
||||
# M: Modified (content changed)
|
||||
@@ -524,7 +411,7 @@ def parse_log(filelist, dirlist, stats, git, merge=False, filterlist=None):
|
||||
# T: Type changed: to/from regular file, symlinks, submodules
|
||||
# R099: Renamed (moved), with % of unchanged content. 100 = pure rename
|
||||
# Not possible in log: C=Copied, U=Unmerged, X=Unknown, B=pairing Broken
|
||||
status = tokens[0].split(" ")[-1]
|
||||
status = tokens[0].split(' ')[-1]
|
||||
file = tokens[-1]
|
||||
|
||||
# Handles non-ASCII chars and OS path separator
|
||||
@@ -532,76 +419,56 @@ def parse_log(filelist, dirlist, stats, git, merge=False, filterlist=None):
|
||||
|
||||
def do_file():
|
||||
if args.skip_older_than_commit and get_mtime_path(file) <= mtime:
|
||||
stats["skip"] += 1
|
||||
stats['skip'] += 1
|
||||
return
|
||||
if args.debug:
|
||||
log.debug(
|
||||
"%d\t%d\t%d\t%s\t%s",
|
||||
stats["loglines"],
|
||||
stats["commits"],
|
||||
stats["files"],
|
||||
datestr,
|
||||
file,
|
||||
)
|
||||
log.debug("%d\t%d\t%d\t%s\t%s",
|
||||
stats['loglines'], stats['commits'], stats['files'],
|
||||
datestr, file)
|
||||
try:
|
||||
touch(os.path.join(git.workdir, file), mtime)
|
||||
stats["touches"] += 1
|
||||
stats['touches'] += 1
|
||||
except Exception as e:
|
||||
log.error("ERROR: %s: %s", e, file)
|
||||
stats["errors"] += 1
|
||||
stats['errors'] += 1
|
||||
|
||||
def do_dir():
|
||||
if args.debug:
|
||||
log.debug(
|
||||
"%d\t%d\t-\t%s\t%s",
|
||||
stats["loglines"],
|
||||
stats["commits"],
|
||||
datestr,
|
||||
"{}/".format(dirname or "."),
|
||||
)
|
||||
log.debug("%d\t%d\t-\t%s\t%s",
|
||||
stats['loglines'], stats['commits'],
|
||||
datestr, "{}/".format(dirname or '.'))
|
||||
try:
|
||||
touch(os.path.join(git.workdir, dirname), mtime)
|
||||
stats["dirtouches"] += 1
|
||||
stats['dirtouches'] += 1
|
||||
except Exception as e:
|
||||
log.error("ERROR: %s: %s", e, dirname)
|
||||
stats["direrrors"] += 1
|
||||
stats['direrrors'] += 1
|
||||
|
||||
if file in filelist:
|
||||
stats["files"] -= 1
|
||||
stats['files'] -= 1
|
||||
filelist.remove(file)
|
||||
do_file()
|
||||
|
||||
if args.dirs and status in ("A", "D"):
|
||||
if args.dirs and status in ('A', 'D'):
|
||||
dirname = os.path.dirname(file)
|
||||
if dirname in dirlist:
|
||||
dirlist.remove(dirname)
|
||||
do_dir()
|
||||
|
||||
# All files done?
|
||||
if not stats["files"]:
|
||||
if not stats['files']:
|
||||
git.terminate()
|
||||
return
|
||||
|
||||
|
||||
# Main Logic ##################################################################
|
||||
|
||||
|
||||
def main():
|
||||
start = time.time() # yes, Wall time. CPU time is not realistic for users.
|
||||
stats = {
|
||||
_: 0
|
||||
for _ in (
|
||||
"loglines",
|
||||
"commits",
|
||||
"touches",
|
||||
"skip",
|
||||
"errors",
|
||||
"dirtouches",
|
||||
"direrrors",
|
||||
)
|
||||
}
|
||||
stats = {_: 0 for _ in ('loglines', 'commits', 'touches', 'skip', 'errors',
|
||||
'dirtouches', 'direrrors')}
|
||||
|
||||
logging.basicConfig(level=args.loglevel, format="%(message)s")
|
||||
logging.basicConfig(level=args.loglevel, format='%(message)s')
|
||||
log.trace("Arguments: %s", args)
|
||||
|
||||
# First things first: Where and Who are we?
|
||||
@@ -632,16 +499,13 @@ def main():
|
||||
|
||||
# Symlink (to file, to dir or broken - git handles the same way)
|
||||
if not UPDATE_SYMLINKS and os.path.islink(fullpath):
|
||||
log.warning(
|
||||
"WARNING: Skipping symlink, no OS support for updates: %s", path
|
||||
)
|
||||
log.warning("WARNING: Skipping symlink, no OS support for updates: %s",
|
||||
path)
|
||||
continue
|
||||
|
||||
# skip files which are older than given threshold
|
||||
if (
|
||||
args.skip_older_than
|
||||
and start - get_mtime_path(fullpath) > args.skip_older_than
|
||||
):
|
||||
if (args.skip_older_than
|
||||
and start - get_mtime_path(fullpath) > args.skip_older_than):
|
||||
continue
|
||||
|
||||
# Always add files relative to worktree root
|
||||
@@ -655,17 +519,15 @@ def main():
|
||||
else:
|
||||
dirty = set(git.ls_dirty())
|
||||
if dirty:
|
||||
log.warning(
|
||||
"WARNING: Modified files in the working directory were ignored."
|
||||
"\nTo include such files, commit your changes or use --force."
|
||||
)
|
||||
log.warning("WARNING: Modified files in the working directory were ignored."
|
||||
"\nTo include such files, commit your changes or use --force.")
|
||||
filelist -= dirty
|
||||
|
||||
# Build dir list to be processed
|
||||
dirlist = set(os.path.dirname(_) for _ in filelist) if args.dirs else set()
|
||||
|
||||
stats["totalfiles"] = stats["files"] = len(filelist)
|
||||
log.info("{0:,} files to be processed in work dir".format(stats["totalfiles"]))
|
||||
stats['totalfiles'] = stats['files'] = len(filelist)
|
||||
log.info("{0:,} files to be processed in work dir".format(stats['totalfiles']))
|
||||
|
||||
if not filelist:
|
||||
# Nothing to do. Exit silently and without errors, just like git does
|
||||
@@ -682,18 +544,10 @@ def main():
|
||||
if args.missing and not args.merge:
|
||||
filterlist = list(filelist)
|
||||
missing = len(filterlist)
|
||||
log.info(
|
||||
"{0:,} files not found in log, trying merge commits".format(missing)
|
||||
)
|
||||
log.info("{0:,} files not found in log, trying merge commits".format(missing))
|
||||
for i in range(0, missing, STEPMISSING):
|
||||
parse_log(
|
||||
filelist,
|
||||
dirlist,
|
||||
stats,
|
||||
git,
|
||||
merge=True,
|
||||
filterlist=filterlist[i : i + STEPMISSING],
|
||||
)
|
||||
parse_log(filelist, dirlist, stats, git,
|
||||
merge=True, filterlist=filterlist[i:i + STEPMISSING])
|
||||
|
||||
# Still missing some?
|
||||
for file in filelist:
|
||||
@@ -702,33 +556,29 @@ def main():
|
||||
# Final statistics
|
||||
# Suggestion: use git-log --before=mtime to brag about skipped log entries
|
||||
def log_info(msg, *a, width=13):
|
||||
ifmt = "{:%d,}" % (width,) # not using 'n' for consistency with ffmt
|
||||
ffmt = "{:%d,.2f}" % (width,)
|
||||
ifmt = '{:%d,}' % (width,) # not using 'n' for consistency with ffmt
|
||||
ffmt = '{:%d,.2f}' % (width,)
|
||||
# %-formatting lacks a thousand separator, must pre-render with .format()
|
||||
log.info(msg.replace("%d", ifmt).replace("%f", ffmt).format(*a))
|
||||
log.info(msg.replace('%d', ifmt).replace('%f', ffmt).format(*a))
|
||||
|
||||
log_info(
|
||||
"Statistics:\n%f seconds\n%d log lines processed\n%d commits evaluated",
|
||||
time.time() - start,
|
||||
stats["loglines"],
|
||||
stats["commits"],
|
||||
)
|
||||
"Statistics:\n"
|
||||
"%f seconds\n"
|
||||
"%d log lines processed\n"
|
||||
"%d commits evaluated",
|
||||
time.time() - start, stats['loglines'], stats['commits'])
|
||||
|
||||
if args.dirs:
|
||||
if stats["direrrors"]:
|
||||
log_info("%d directory update errors", stats["direrrors"])
|
||||
log_info("%d directories updated", stats["dirtouches"])
|
||||
if stats['direrrors']: log_info("%d directory update errors", stats['direrrors'])
|
||||
log_info("%d directories updated", stats['dirtouches'])
|
||||
|
||||
if stats["touches"] != stats["totalfiles"]:
|
||||
log_info("%d files", stats["totalfiles"])
|
||||
if stats["skip"]:
|
||||
log_info("%d files skipped", stats["skip"])
|
||||
if stats["files"]:
|
||||
log_info("%d files missing", stats["files"])
|
||||
if stats["errors"]:
|
||||
log_info("%d file update errors", stats["errors"])
|
||||
if stats['touches'] != stats['totalfiles']:
|
||||
log_info("%d files", stats['totalfiles'])
|
||||
if stats['skip']: log_info("%d files skipped", stats['skip'])
|
||||
if stats['files']: log_info("%d files missing", stats['files'])
|
||||
if stats['errors']: log_info("%d file update errors", stats['errors'])
|
||||
|
||||
log_info("%d files updated", stats["touches"])
|
||||
log_info("%d files updated", stats['touches'])
|
||||
|
||||
if args.test:
|
||||
log.info("TEST RUN - No files modified!")
|
||||
|
||||
104
.github/workflows/_all_ci.yml
vendored
Normal file
104
.github/workflows/_all_ci.yml
vendored
Normal file
@@ -0,0 +1,104 @@
|
||||
---
|
||||
name: langchain CI
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
working-directory:
|
||||
required: true
|
||||
type: string
|
||||
description: "From which folder this pipeline executes"
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
working-directory:
|
||||
required: true
|
||||
type: choice
|
||||
default: 'libs/langchain'
|
||||
options:
|
||||
- libs/langchain
|
||||
- libs/core
|
||||
- libs/experimental
|
||||
|
||||
|
||||
# If another push to the same PR or branch happens while this workflow is still running,
|
||||
# cancel the earlier run in favor of the next run.
|
||||
#
|
||||
# There's no point in testing an outdated version of the code. GitHub only allows
|
||||
# a limited number of job runners to be active at the same time, so it's better to cancel
|
||||
# pointless jobs early so that more useful jobs can run sooner.
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}-${{ inputs.working-directory }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.6.1"
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
uses: ./.github/workflows/_lint.yml
|
||||
with:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
secrets: inherit
|
||||
|
||||
test:
|
||||
uses: ./.github/workflows/_test.yml
|
||||
with:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
secrets: inherit
|
||||
|
||||
compile-integration-tests:
|
||||
uses: ./.github/workflows/_compile_integration_test.yml
|
||||
with:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
secrets: inherit
|
||||
|
||||
pydantic-compatibility:
|
||||
uses: ./.github/workflows/_pydantic_compatibility.yml
|
||||
with:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
secrets: inherit
|
||||
|
||||
extended-tests:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
name: Python ${{ matrix.python-version }} extended tests
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
cache-key: extended
|
||||
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Running extended tests, installing dependencies with poetry..."
|
||||
poetry install -E extended_testing --with test
|
||||
|
||||
- name: Run extended tests
|
||||
run: make extended_tests
|
||||
|
||||
- name: Ensure the tests did not create any additional files
|
||||
shell: bash
|
||||
run: |
|
||||
set -eu
|
||||
|
||||
STATUS="$(git status)"
|
||||
echo "$STATUS"
|
||||
|
||||
# grep will exit non-zero if the target message isn't found,
|
||||
# and `set -e` above will cause the step to fail.
|
||||
echo "$STATUS" | grep 'nothing to commit, working tree clean'
|
||||
50
.github/workflows/_compile_integration_test.yml
vendored
50
.github/workflows/_compile_integration_test.yml
vendored
@@ -1,12 +1,4 @@
|
||||
# Validates that a package's integration tests compile without syntax or import errors.
|
||||
#
|
||||
# (If an integration test fails to compile, it won't run.)
|
||||
#
|
||||
# Called as part of check_diffs.yml workflow
|
||||
#
|
||||
# Runs pytest with compile marker to check syntax/imports.
|
||||
|
||||
name: "🔗 Compile Integration Tests"
|
||||
name: compile-integration-test
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
@@ -15,16 +7,9 @@ on:
|
||||
required: true
|
||||
type: string
|
||||
description: "From which folder this pipeline executes"
|
||||
python-version:
|
||||
required: true
|
||||
type: string
|
||||
description: "Python version to use"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
UV_FROZEN: "true"
|
||||
POETRY_VERSION: "1.6.1"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
@@ -32,27 +17,34 @@ jobs:
|
||||
run:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
name: "Python ${{ inputs.python-version }}"
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
name: Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
cache-suffix: compile-integration-tests-${{ inputs.working-directory }}
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
cache-key: compile-integration
|
||||
|
||||
- name: "📦 Install Integration Dependencies"
|
||||
- name: Install integration dependencies
|
||||
shell: bash
|
||||
run: uv sync --group test --group test_integration
|
||||
run: poetry install --with=test_integration,test
|
||||
|
||||
- name: "🔗 Check Integration Tests Compile"
|
||||
- name: Check integration tests compile
|
||||
shell: bash
|
||||
run: uv run pytest -m compile tests/integration_tests
|
||||
run: poetry run pytest -m compile tests/integration_tests
|
||||
|
||||
- name: "🧹 Verify Clean Working Directory"
|
||||
- name: Ensure the tests did not create any additional files
|
||||
shell: bash
|
||||
run: |
|
||||
set -eu
|
||||
|
||||
128
.github/workflows/_lint.yml
vendored
128
.github/workflows/_lint.yml
vendored
@@ -1,11 +1,4 @@
|
||||
# Runs linting.
|
||||
#
|
||||
# Uses the package's Makefile to run the checks, specifically the
|
||||
# `lint_package` and `lint_tests` targets.
|
||||
#
|
||||
# Called as part of check_diffs.yml workflow.
|
||||
|
||||
name: "🧹 Linting"
|
||||
name: lint
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
@@ -14,68 +7,115 @@ on:
|
||||
required: true
|
||||
type: string
|
||||
description: "From which folder this pipeline executes"
|
||||
python-version:
|
||||
required: true
|
||||
langchain-location:
|
||||
required: false
|
||||
type: string
|
||||
description: "Python version to use"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
description: "Relative path to the langchain library folder"
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.6.1"
|
||||
WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }}
|
||||
|
||||
# This env var allows us to get inline annotations when ruff has complaints.
|
||||
RUFF_OUTPUT_FORMAT: github
|
||||
|
||||
UV_FROZEN: "true"
|
||||
|
||||
jobs:
|
||||
# Linting job - runs quality checks on package and test code
|
||||
build:
|
||||
name: "Python ${{ inputs.python-version }}"
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
strategy:
|
||||
matrix:
|
||||
# Only lint on the min and max supported Python versions.
|
||||
# It's extremely unlikely that there's a lint issue on any version in between
|
||||
# that doesn't show up on the min or max versions.
|
||||
#
|
||||
# GitHub rate-limits how many jobs can be running at any one time.
|
||||
# Starting new jobs is also relatively slow,
|
||||
# so linting on fewer versions makes CI faster.
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.11"
|
||||
steps:
|
||||
- name: "📋 Checkout Code"
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
cache-suffix: lint-${{ inputs.working-directory }}
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
cache-key: lint-with-extras
|
||||
|
||||
# - name: "🔒 Verify Lockfile is Up-to-Date"
|
||||
# working-directory: ${{ inputs.working-directory }}
|
||||
# run: |
|
||||
# unset UV_FROZEN
|
||||
# uv lock --check
|
||||
|
||||
- name: "📦 Install Lint & Typing Dependencies"
|
||||
- name: Check Poetry File
|
||||
shell: bash
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
uv sync --group lint --group typing
|
||||
poetry check
|
||||
|
||||
- name: "🔍 Analyze Package Code with Linters"
|
||||
- name: Check lock file
|
||||
shell: bash
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
poetry lock --check
|
||||
|
||||
- name: Install dependencies
|
||||
# Also installs dev/lint/test/typing dependencies, to ensure we have
|
||||
# type hints for as many of our libraries as possible.
|
||||
# This helps catch errors that require dependencies to be spotted, for example:
|
||||
# https://github.com/langchain-ai/langchain/pull/10249/files#diff-935185cd488d015f026dcd9e19616ff62863e8cde8c0bee70318d3ccbca98341
|
||||
#
|
||||
# If you change this configuration, make sure to change the `cache-key`
|
||||
# in the `poetry_setup` action above to stop using the old cache.
|
||||
# It doesn't matter how you change it, any change will cause a cache-bust.
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
poetry install --with lint,typing
|
||||
|
||||
- name: Install langchain editable
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
if: ${{ inputs.langchain-location }}
|
||||
env:
|
||||
LANGCHAIN_LOCATION: ${{ inputs.langchain-location }}
|
||||
run: |
|
||||
poetry run pip install -e "$LANGCHAIN_LOCATION"
|
||||
|
||||
- name: Get .mypy_cache to speed up mypy
|
||||
uses: actions/cache@v3
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "2"
|
||||
with:
|
||||
path: |
|
||||
${{ env.WORKDIR }}/.mypy_cache
|
||||
key: mypy-lint-${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-${{ inputs.working-directory }}-${{ hashFiles(format('{0}/poetry.lock', env.WORKDIR)) }}
|
||||
|
||||
|
||||
- name: Analysing the code with our lint
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
make lint_package
|
||||
|
||||
- name: "📦 Install Test Dependencies (non-partners)"
|
||||
# (For directories NOT starting with libs/partners/)
|
||||
if: ${{ ! startsWith(inputs.working-directory, 'libs/partners/') }}
|
||||
- name: Install test dependencies
|
||||
# Also installs dev/lint/test/typing dependencies, to ensure we have
|
||||
# type hints for as many of our libraries as possible.
|
||||
# This helps catch errors that require dependencies to be spotted, for example:
|
||||
# https://github.com/langchain-ai/langchain/pull/10249/files#diff-935185cd488d015f026dcd9e19616ff62863e8cde8c0bee70318d3ccbca98341
|
||||
#
|
||||
# If you change this configuration, make sure to change the `cache-key`
|
||||
# in the `poetry_setup` action above to stop using the old cache.
|
||||
# It doesn't matter how you change it, any change will cause a cache-bust.
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
uv sync --inexact --group test
|
||||
- name: "📦 Install Test Dependencies"
|
||||
if: ${{ startsWith(inputs.working-directory, 'libs/partners/') }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
uv sync --inexact --group test --group test_integration
|
||||
poetry install --with test
|
||||
|
||||
- name: "🔍 Analyze Test Code with Linters"
|
||||
- name: Get .mypy_cache_test to speed up mypy
|
||||
uses: actions/cache@v3
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "2"
|
||||
with:
|
||||
path: |
|
||||
${{ env.WORKDIR }}/.mypy_cache_test
|
||||
key: mypy-test-${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-${{ inputs.working-directory }}-${{ hashFiles(format('{0}/poetry.lock', env.WORKDIR)) }}
|
||||
|
||||
- name: Analysing the code with our lint
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
make lint_tests
|
||||
|
||||
105
.github/workflows/_pydantic_compatibility.yml
vendored
Normal file
105
.github/workflows/_pydantic_compatibility.yml
vendored
Normal file
@@ -0,0 +1,105 @@
|
||||
name: pydantic v1/v2 compatibility
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
working-directory:
|
||||
required: true
|
||||
type: string
|
||||
description: "From which folder this pipeline executes"
|
||||
langchain-location:
|
||||
required: false
|
||||
type: string
|
||||
description: "Relative path to the langchain library folder"
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.6.1"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
name: Pydantic v1/v2 compatibility - Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
cache-key: pydantic-cross-compat
|
||||
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: poetry install --with test
|
||||
|
||||
- name: Install langchain editable
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
if: ${{ inputs.langchain-location }}
|
||||
env:
|
||||
LANGCHAIN_LOCATION: ${{ inputs.langchain-location }}
|
||||
run: |
|
||||
poetry run pip install -e "$LANGCHAIN_LOCATION"
|
||||
|
||||
- name: Install the opposite major version of pydantic
|
||||
# If normal tests use pydantic v1, here we'll use v2, and vice versa.
|
||||
shell: bash
|
||||
run: |
|
||||
# Determine the major part of pydantic version
|
||||
REGULAR_VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
|
||||
|
||||
if [[ "$REGULAR_VERSION" == "1" ]]; then
|
||||
PYDANTIC_DEP=">=2.1,<3"
|
||||
TEST_WITH_VERSION="2"
|
||||
elif [[ "$REGULAR_VERSION" == "2" ]]; then
|
||||
PYDANTIC_DEP="<2"
|
||||
TEST_WITH_VERSION="1"
|
||||
else
|
||||
echo "Unexpected pydantic major version '$REGULAR_VERSION', cannot determine which version to use for cross-compatibility test."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Install via `pip` instead of `poetry add` to avoid changing lockfile,
|
||||
# which would prevent caching from working: the cache would get saved
|
||||
# to a different key than where it gets loaded from.
|
||||
poetry run pip install "pydantic${PYDANTIC_DEP}"
|
||||
|
||||
# Ensure that the correct pydantic is installed now.
|
||||
echo "Checking pydantic version... Expecting ${TEST_WITH_VERSION}"
|
||||
|
||||
# Determine the major part of pydantic version
|
||||
CURRENT_VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
|
||||
|
||||
# Check that the major part of pydantic version is as expected, if not
|
||||
# raise an error
|
||||
if [[ "$CURRENT_VERSION" != "$TEST_WITH_VERSION" ]]; then
|
||||
echo "Error: expected pydantic version ${CURRENT_VERSION} to have been installed, but found: ${TEST_WITH_VERSION}"
|
||||
exit 1
|
||||
fi
|
||||
echo "Found pydantic version ${CURRENT_VERSION}, as expected"
|
||||
- name: Run pydantic compatibility tests
|
||||
shell: bash
|
||||
run: make test
|
||||
|
||||
- name: Ensure the tests did not create any additional files
|
||||
shell: bash
|
||||
run: |
|
||||
set -eu
|
||||
|
||||
STATUS="$(git status)"
|
||||
echo "$STATUS"
|
||||
|
||||
# grep will exit non-zero if the target message isn't found,
|
||||
# and `set -e` above will cause the step to fail.
|
||||
echo "$STATUS" | grep 'nothing to commit, working tree clean'
|
||||
202
.github/workflows/_refresh_model_profiles.yml
vendored
202
.github/workflows/_refresh_model_profiles.yml
vendored
@@ -1,202 +0,0 @@
|
||||
# Reusable workflow: refreshes model profile data for any repo that uses the
|
||||
# `langchain-profiles` CLI. Creates (or updates) a pull request with the
|
||||
# resulting changes.
|
||||
#
|
||||
# Callers MUST set `permissions: { contents: write, pull-requests: write }` —
|
||||
# reusable workflows cannot escalate the caller's token permissions.
|
||||
#
|
||||
# ── Example: external repo (langchain-google) ──────────────────────────
|
||||
#
|
||||
# jobs:
|
||||
# refresh-profiles:
|
||||
# uses: langchain-ai/langchain/.github/workflows/_refresh_model_profiles.yml@master
|
||||
# with:
|
||||
# providers: >-
|
||||
# [
|
||||
# {"provider":"google", "data_dir":"libs/genai/langchain_google_genai/data"},
|
||||
# ]
|
||||
# secrets:
|
||||
# MODEL_PROFILE_BOT_APP_ID: ${{ secrets.MODEL_PROFILE_BOT_APP_ID }}
|
||||
# MODEL_PROFILE_BOT_PRIVATE_KEY: ${{ secrets.MODEL_PROFILE_BOT_PRIVATE_KEY }}
|
||||
|
||||
name: "Refresh Model Profiles (reusable)"
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
providers:
|
||||
description: >-
|
||||
JSON array of objects, each with `provider` (models.dev provider ID)
|
||||
and `data_dir` (path relative to repo root where `_profiles.py` and
|
||||
`profile_augmentations.toml` live).
|
||||
required: true
|
||||
type: string
|
||||
cli-path:
|
||||
description: >-
|
||||
Path (relative to workspace) to an existing `libs/model-profiles`
|
||||
checkout. When set the workflow skips cloning the langchain repo and
|
||||
uses this directory for the CLI instead. Useful when the caller IS
|
||||
the langchain monorepo.
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
cli-ref:
|
||||
description: >-
|
||||
Git ref of langchain-ai/langchain to checkout for the CLI.
|
||||
Ignored when `cli-path` is set.
|
||||
required: false
|
||||
type: string
|
||||
default: master
|
||||
add-paths:
|
||||
description: "Glob for files to stage in the PR commit."
|
||||
required: false
|
||||
type: string
|
||||
default: "**/_profiles.py"
|
||||
pr-branch:
|
||||
description: "Branch name for the auto-created PR."
|
||||
required: false
|
||||
type: string
|
||||
default: bot/refresh-model-profiles
|
||||
pr-title:
|
||||
description: "PR / commit title."
|
||||
required: false
|
||||
type: string
|
||||
default: "chore(model-profiles): refresh model profile data"
|
||||
pr-body:
|
||||
description: "PR body."
|
||||
required: false
|
||||
type: string
|
||||
default: |
|
||||
Automated refresh of model profile data via `langchain-profiles refresh`.
|
||||
|
||||
🤖 Generated by the `refresh_model_profiles` workflow.
|
||||
pr-labels:
|
||||
description: "Comma-separated labels to apply to the PR."
|
||||
required: false
|
||||
type: string
|
||||
default: bot
|
||||
secrets:
|
||||
MODEL_PROFILE_BOT_APP_ID:
|
||||
required: true
|
||||
MODEL_PROFILE_BOT_PRIVATE_KEY:
|
||||
required: true
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
|
||||
jobs:
|
||||
refresh-profiles:
|
||||
name: refresh model profiles
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: "📋 Checkout"
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
|
||||
- name: "📋 Checkout langchain-profiles CLI"
|
||||
if: inputs.cli-path == ''
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
with:
|
||||
repository: langchain-ai/langchain
|
||||
ref: ${{ inputs.cli-ref }}
|
||||
sparse-checkout: libs/model-profiles
|
||||
path: _langchain-cli
|
||||
|
||||
- name: "🔧 Resolve CLI directory"
|
||||
id: cli
|
||||
env:
|
||||
CLI_PATH: ${{ inputs.cli-path }}
|
||||
run: |
|
||||
if [ -n "${CLI_PATH}" ]; then
|
||||
resolved="${GITHUB_WORKSPACE}/${CLI_PATH}"
|
||||
if [ ! -d "${resolved}" ]; then
|
||||
echo "::error::cli-path '${CLI_PATH}' does not exist at ${resolved}"
|
||||
exit 1
|
||||
fi
|
||||
echo "dir=${CLI_PATH}" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "dir=_langchain-cli/libs/model-profiles" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- name: "🐍 Set up Python + uv"
|
||||
uses: astral-sh/setup-uv@0ca8f610542aa7f4acaf39e65cf4eb3c35091883 # v7
|
||||
with:
|
||||
version: "0.5.25"
|
||||
python-version: "3.12"
|
||||
enable-cache: true
|
||||
cache-dependency-glob: "**/model-profiles/uv.lock"
|
||||
|
||||
- name: "📦 Install langchain-profiles CLI"
|
||||
working-directory: ${{ steps.cli.outputs.dir }}
|
||||
run: uv sync --frozen --no-group test --no-group dev --no-group lint
|
||||
|
||||
- name: "✅ Validate providers input"
|
||||
env:
|
||||
PROVIDERS_JSON: ${{ inputs.providers }}
|
||||
run: |
|
||||
echo "${PROVIDERS_JSON}" | jq -e 'type == "array" and length > 0' > /dev/null || {
|
||||
echo "::error::providers input must be a non-empty JSON array"
|
||||
exit 1
|
||||
}
|
||||
echo "${PROVIDERS_JSON}" | jq -e 'all(has("provider") and has("data_dir"))' > /dev/null || {
|
||||
echo "::error::every entry in providers must have 'provider' and 'data_dir' keys"
|
||||
exit 1
|
||||
}
|
||||
|
||||
- name: "🔄 Refresh profiles"
|
||||
env:
|
||||
PROVIDERS_JSON: ${{ inputs.providers }}
|
||||
run: |
|
||||
cli_dir="${GITHUB_WORKSPACE}/${{ steps.cli.outputs.dir }}"
|
||||
failed=""
|
||||
mapfile -t rows < <(echo "${PROVIDERS_JSON}" | jq -c '.[]')
|
||||
for row in "${rows[@]}"; do
|
||||
provider=$(echo "${row}" | jq -r '.provider')
|
||||
data_dir=$(echo "${row}" | jq -r '.data_dir')
|
||||
echo "--- Refreshing ${provider} -> ${data_dir} ---"
|
||||
if ! echo y | uv run --frozen --project "${cli_dir}" \
|
||||
langchain-profiles refresh \
|
||||
--provider "${provider}" \
|
||||
--data-dir "${GITHUB_WORKSPACE}/${data_dir}"; then
|
||||
echo "::error::Failed to refresh provider: ${provider}"
|
||||
failed="${failed} ${provider}"
|
||||
fi
|
||||
done
|
||||
if [ -n "${failed}" ]; then
|
||||
echo "::error::The following providers failed:${failed}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: "🔑 Generate GitHub App token"
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3
|
||||
with:
|
||||
app-id: ${{ secrets.MODEL_PROFILE_BOT_APP_ID }}
|
||||
private-key: ${{ secrets.MODEL_PROFILE_BOT_PRIVATE_KEY }}
|
||||
|
||||
- name: "🔀 Create pull request"
|
||||
id: create-pr
|
||||
uses: peter-evans/create-pull-request@c0f553fe549906ede9cf27b5156039d195d2ece0 # v8
|
||||
with:
|
||||
token: ${{ steps.app-token.outputs.token }}
|
||||
branch: ${{ inputs.pr-branch }}
|
||||
commit-message: ${{ inputs.pr-title }}
|
||||
title: ${{ inputs.pr-title }}
|
||||
body: ${{ inputs.pr-body }}
|
||||
labels: ${{ inputs.pr-labels }}
|
||||
add-paths: ${{ inputs.add-paths }}
|
||||
|
||||
- name: "📝 Summary"
|
||||
if: always()
|
||||
env:
|
||||
PR_OP: ${{ steps.create-pr.outputs.pull-request-operation }}
|
||||
PR_URL: ${{ steps.create-pr.outputs.pull-request-url }}
|
||||
JOB_STATUS: ${{ job.status }}
|
||||
run: |
|
||||
if [ "${PR_OP}" = "created" ] || [ "${PR_OP}" = "updated" ]; then
|
||||
echo "### ✅ PR ${PR_OP}: ${PR_URL}" >> "$GITHUB_STEP_SUMMARY"
|
||||
elif [ -z "${PR_OP}" ] && [ "${JOB_STATUS}" = "success" ]; then
|
||||
echo "### ⏭️ Skipped: profiles already up to date" >> "$GITHUB_STEP_SUMMARY"
|
||||
elif [ "${JOB_STATUS}" = "failure" ]; then
|
||||
echo "### ❌ Job failed — check step logs for details" >> "$GITHUB_STEP_SUMMARY"
|
||||
fi
|
||||
588
.github/workflows/_release.yml
vendored
588
.github/workflows/_release.yml
vendored
@@ -1,11 +1,5 @@
|
||||
# Builds and publishes LangChain packages to PyPI.
|
||||
#
|
||||
# Manually triggered, though can be used as a reusable workflow (workflow_call).
|
||||
#
|
||||
# Handles version bumping, building, and publishing to PyPI with authentication.
|
||||
name: release
|
||||
|
||||
name: "🚀 Package Release"
|
||||
run-name: "Release ${{ inputs.working-directory-override || inputs.working-directory }} ${{ inputs.release-version }}"
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
@@ -13,86 +7,35 @@ on:
|
||||
required: true
|
||||
type: string
|
||||
description: "From which folder this pipeline executes"
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
working-directory:
|
||||
required: true
|
||||
type: choice
|
||||
description: "From which folder this pipeline executes"
|
||||
default: "libs/langchain_v1"
|
||||
options:
|
||||
- libs/core
|
||||
- libs/langchain
|
||||
- libs/langchain_v1
|
||||
- libs/text-splitters
|
||||
- libs/standard-tests
|
||||
- libs/model-profiles
|
||||
- libs/partners/anthropic
|
||||
- libs/partners/chroma
|
||||
- libs/partners/deepseek
|
||||
- libs/partners/exa
|
||||
- libs/partners/fireworks
|
||||
- libs/partners/groq
|
||||
- libs/partners/huggingface
|
||||
- libs/partners/mistralai
|
||||
- libs/partners/nomic
|
||||
- libs/partners/ollama
|
||||
- libs/partners/openai
|
||||
- libs/partners/openrouter
|
||||
- libs/partners/perplexity
|
||||
- libs/partners/qdrant
|
||||
- libs/partners/xai
|
||||
working-directory-override:
|
||||
required: false
|
||||
type: string
|
||||
description: "Manual override — takes precedence over dropdown (e.g. libs/partners/partner-xyz)"
|
||||
release-version:
|
||||
required: true
|
||||
type: string
|
||||
default: "0.1.0"
|
||||
description: "New version of package being released"
|
||||
dangerous-nonmaster-release:
|
||||
required: false
|
||||
type: boolean
|
||||
default: false
|
||||
description: "Release from a non-master branch (danger!) - Only use for hotfixes"
|
||||
|
||||
env:
|
||||
PYTHON_VERSION: "3.11"
|
||||
UV_FROZEN: "true"
|
||||
UV_NO_SYNC: "true"
|
||||
EFFECTIVE_WORKING_DIR: ${{ inputs.working-directory-override || inputs.working-directory }}
|
||||
|
||||
permissions:
|
||||
contents: read # Job-level overrides grant write only where needed (mark-release)
|
||||
PYTHON_VERSION: "3.10"
|
||||
POETRY_VERSION: "1.6.1"
|
||||
|
||||
jobs:
|
||||
# Build the distribution package and extract version info
|
||||
# Runs in isolated environment with minimal permissions for security
|
||||
build:
|
||||
if: github.ref == 'refs/heads/master' || inputs.dangerous-nonmaster-release
|
||||
environment: Scheduled testing
|
||||
if: github.ref == 'refs/heads/master'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
outputs:
|
||||
pkg-name: ${{ steps.check-version.outputs.pkg-name }}
|
||||
version: ${{ steps.check-version.outputs.version }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python + uv
|
||||
uses: "./.github/actions/uv_setup"
|
||||
- name: Set up Python + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
cache-key: release
|
||||
|
||||
# We want to keep this build stage *separate* from the release stage,
|
||||
# so that there's no sharing of permissions between them.
|
||||
# (Release stage has trusted publishing and GitHub repo contents write access,
|
||||
# which the build stage must not have access to.)
|
||||
#
|
||||
# The release stage has trusted publishing and GitHub repo contents write access,
|
||||
# and we want to keep the scope of that access limited just to the release job.
|
||||
# Otherwise, a malicious `build` step (e.g. via a compromised dependency)
|
||||
# could get access to our GitHub or PyPI credentials.
|
||||
#
|
||||
@@ -101,176 +44,38 @@ jobs:
|
||||
# > from the publish job.
|
||||
# https://github.com/pypa/gh-action-pypi-publish#non-goals
|
||||
- name: Build project for distribution
|
||||
run: uv build
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
run: poetry build
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
- name: Upload build
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ env.EFFECTIVE_WORKING_DIR }}/dist/
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
|
||||
- name: Check version
|
||||
- name: Check Version
|
||||
id: check-version
|
||||
shell: python
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
run: |
|
||||
import os
|
||||
import tomllib
|
||||
with open("pyproject.toml", "rb") as f:
|
||||
data = tomllib.load(f)
|
||||
pkg_name = data["project"]["name"]
|
||||
version = data["project"]["version"]
|
||||
with open(os.environ["GITHUB_OUTPUT"], "a") as f:
|
||||
f.write(f"pkg-name={pkg_name}\n")
|
||||
f.write(f"version={version}\n")
|
||||
release-notes:
|
||||
# release-notes must run before publishing because its check-tags step
|
||||
# validates version/tag state — do not remove this dependency.
|
||||
needs:
|
||||
- build
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
outputs:
|
||||
release-body: ${{ steps.generate-release-body.outputs.release-body }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
with:
|
||||
repository: langchain-ai/langchain
|
||||
path: langchain
|
||||
sparse-checkout: | # this only grabs files for relevant dir
|
||||
${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
ref: ${{ github.ref }} # this scopes to just ref'd branch
|
||||
fetch-depth: 0 # this fetches entire commit history
|
||||
- name: Check tags
|
||||
id: check-tags
|
||||
shell: bash
|
||||
working-directory: langchain/${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
env:
|
||||
PKG_NAME: ${{ needs.build.outputs.pkg-name }}
|
||||
VERSION: ${{ needs.build.outputs.version }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
# Handle regular versions and pre-release versions differently
|
||||
if [[ "$VERSION" == *"-"* ]]; then
|
||||
# This is a pre-release version (contains a hyphen)
|
||||
# Extract the base version without the pre-release suffix
|
||||
BASE_VERSION=${VERSION%%-*}
|
||||
# Look for the latest release of the same base version
|
||||
REGEX="^$PKG_NAME==$BASE_VERSION\$"
|
||||
PREV_TAG=$(git tag --sort=-creatordate | (grep -P "$REGEX" || true) | head -1)
|
||||
|
||||
# If no exact base version match, look for the latest release of any kind
|
||||
if [ -z "$PREV_TAG" ]; then
|
||||
REGEX="^$PKG_NAME==\\d+\\.\\d+\\.\\d+\$"
|
||||
PREV_TAG=$(git tag --sort=-creatordate | (grep -P "$REGEX" || true) | head -1)
|
||||
fi
|
||||
else
|
||||
# Regular version handling
|
||||
PREV_TAG="$PKG_NAME==${VERSION%.*}.$(( ${VERSION##*.} - 1 ))"; [[ "${VERSION##*.}" -eq 0 ]] && PREV_TAG=""
|
||||
|
||||
# backup case if releasing e.g. 0.3.0, looks up last release
|
||||
# note if last release (chronologically) was e.g. 0.1.47 it will get
|
||||
# that instead of the last 0.2 release
|
||||
if [ -z "$PREV_TAG" ]; then
|
||||
REGEX="^$PKG_NAME==\\d+\\.\\d+\\.\\d+\$"
|
||||
echo $REGEX
|
||||
PREV_TAG=$(git tag --sort=-creatordate | (grep -P $REGEX || true) | head -1)
|
||||
fi
|
||||
fi
|
||||
|
||||
# if PREV_TAG is empty or came out to 0.0.0, let it be empty
|
||||
if [ -z "$PREV_TAG" ] || [ "$PREV_TAG" = "$PKG_NAME==0.0.0" ]; then
|
||||
echo "No previous tag found - first release"
|
||||
else
|
||||
# confirm prev-tag actually exists in git repo with git tag
|
||||
GIT_TAG_RESULT=$(git tag -l "$PREV_TAG")
|
||||
if [ -z "$GIT_TAG_RESULT" ]; then
|
||||
echo "Previous tag $PREV_TAG not found in git repo"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
TAG="${PKG_NAME}==${VERSION}"
|
||||
if [ "$TAG" == "$PREV_TAG" ]; then
|
||||
echo "No new version to release"
|
||||
exit 1
|
||||
fi
|
||||
echo tag="$TAG" >> $GITHUB_OUTPUT
|
||||
echo prev-tag="$PREV_TAG" >> $GITHUB_OUTPUT
|
||||
- name: Generate release body
|
||||
id: generate-release-body
|
||||
working-directory: langchain
|
||||
env:
|
||||
WORKING_DIR: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
PKG_NAME: ${{ needs.build.outputs.pkg-name }}
|
||||
TAG: ${{ steps.check-tags.outputs.tag }}
|
||||
PREV_TAG: ${{ steps.check-tags.outputs.prev-tag }}
|
||||
run: |
|
||||
PREAMBLE="Changes since $PREV_TAG"
|
||||
# if PREV_TAG is empty or 0.0.0, then we are releasing the first version
|
||||
if [ -z "$PREV_TAG" ] || [ "$PREV_TAG" = "$PKG_NAME==0.0.0" ]; then
|
||||
PREAMBLE="Initial release"
|
||||
PREV_TAG=$(git rev-list --max-parents=0 HEAD)
|
||||
fi
|
||||
{
|
||||
echo 'release-body<<EOF'
|
||||
echo $PREAMBLE
|
||||
echo
|
||||
git log --format="%s" "$PREV_TAG"..HEAD -- $WORKING_DIR
|
||||
echo EOF
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
echo pkg-name="$(poetry version | cut -d ' ' -f 1)" >> $GITHUB_OUTPUT
|
||||
echo version="$(poetry version --short)" >> $GITHUB_OUTPUT
|
||||
|
||||
test-pypi-publish:
|
||||
# release-notes must run before publishing because its check-tags step
|
||||
# validates version/tag state — do not remove this dependency.
|
||||
needs:
|
||||
- build
|
||||
- release-notes
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
# This permission is used for trusted publishing:
|
||||
# https://blog.pypi.org/posts/2023-04-20-introducing-trusted-publishers/
|
||||
#
|
||||
# Trusted publishing has to also be configured on PyPI for each package:
|
||||
# https://docs.pypi.org/trusted-publishers/adding-a-publisher/
|
||||
id-token: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
|
||||
- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ env.EFFECTIVE_WORKING_DIR }}/dist/
|
||||
|
||||
- name: Publish to test PyPI
|
||||
uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # release/v1
|
||||
with:
|
||||
packages-dir: ${{ env.EFFECTIVE_WORKING_DIR }}/dist/
|
||||
verbose: true
|
||||
print-hash: true
|
||||
repository-url: https://test.pypi.org/legacy/
|
||||
# We overwrite any existing distributions with the same name and version.
|
||||
# This is *only for CI use* and is *extremely dangerous* otherwise!
|
||||
# https://github.com/pypa/gh-action-pypi-publish#tolerating-release-package-file-duplicates
|
||||
skip-existing: true
|
||||
# Temp workaround since attestations are on by default as of gh-action-pypi-publish v1.11.0
|
||||
attestations: false
|
||||
uses:
|
||||
./.github/workflows/_test_release.yml
|
||||
with:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
secrets: inherit
|
||||
|
||||
pre-release-checks:
|
||||
needs:
|
||||
- build
|
||||
- release-notes
|
||||
- test-pypi-publish
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
timeout-minutes: 20
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
|
||||
# We explicitly *don't* set up caching here. This ensures our tests are
|
||||
# maximally sensitive to catching breakage.
|
||||
#
|
||||
@@ -283,295 +88,40 @@ jobs:
|
||||
# - Tests pass, because the dependency is present even though it wasn't specified.
|
||||
# - The package is published, and it breaks on the missing dependency when
|
||||
# used in the real world.
|
||||
|
||||
- name: Set up Python + uv
|
||||
uses: "./.github/actions/uv_setup"
|
||||
id: setup-python
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ env.EFFECTIVE_WORKING_DIR }}/dist/
|
||||
|
||||
- name: Import dist package
|
||||
- name: Test published package
|
||||
shell: bash
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
env:
|
||||
PKG_NAME: ${{ needs.build.outputs.pkg-name }}
|
||||
VERSION: ${{ needs.build.outputs.version }}
|
||||
# Install directly from the locally-built wheel (no index resolution needed)
|
||||
# Here we use:
|
||||
# - The default regular PyPI index as the *primary* index, meaning
|
||||
# that it takes priority (https://pypi.org/simple)
|
||||
# - The test PyPI index as an extra index, so that any dependencies that
|
||||
# are not found on test PyPI can be resolved and installed anyway.
|
||||
# (https://test.pypi.org/simple). This will include the PKG_NAME==VERSION
|
||||
# package because VERSION will not have been uploaded to regular PyPI yet.
|
||||
#
|
||||
# TODO: add more in-depth pre-publish tests after testing that importing works
|
||||
run: |
|
||||
uv venv
|
||||
VIRTUAL_ENV=.venv uv pip install dist/*.whl
|
||||
pip install \
|
||||
--extra-index-url https://test.pypi.org/simple/ \
|
||||
"$PKG_NAME==$VERSION"
|
||||
|
||||
# Replace all dashes in the package name with underscores,
|
||||
# since that's how Python imports packages with dashes in the name.
|
||||
# also remove _official suffix
|
||||
IMPORT_NAME="$(echo "$PKG_NAME" | sed s/-/_/g | sed s/_official//g)"
|
||||
IMPORT_NAME="$(echo "$PKG_NAME" | sed s/-/_/g)"
|
||||
|
||||
uv run python -c "import $IMPORT_NAME; print(dir($IMPORT_NAME))"
|
||||
|
||||
- name: Import test dependencies
|
||||
run: uv sync --group test
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
|
||||
# Overwrite the local version of the package with the built version
|
||||
- name: Import published package (again)
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
shell: bash
|
||||
env:
|
||||
PKG_NAME: ${{ needs.build.outputs.pkg-name }}
|
||||
VERSION: ${{ needs.build.outputs.version }}
|
||||
run: |
|
||||
VIRTUAL_ENV=.venv uv pip install dist/*.whl
|
||||
|
||||
- name: Check for prerelease versions
|
||||
# Block release if any dependencies allow prerelease versions
|
||||
# (unless this is itself a prerelease version)
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
run: |
|
||||
uv run python $GITHUB_WORKSPACE/.github/scripts/check_prerelease_dependencies.py pyproject.toml
|
||||
|
||||
- name: Run unit tests
|
||||
run: make tests
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
|
||||
- name: Get minimum versions
|
||||
# Find the minimum published versions that satisfies the given constraints
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
id: min-version
|
||||
run: |
|
||||
VIRTUAL_ENV=.venv uv pip install packaging requests
|
||||
python_version="$(uv run python --version | awk '{print $2}')"
|
||||
min_versions="$(uv run python $GITHUB_WORKSPACE/.github/scripts/get_min_versions.py pyproject.toml release $python_version)"
|
||||
echo "min-versions=$min_versions" >> "$GITHUB_OUTPUT"
|
||||
echo "min-versions=$min_versions"
|
||||
|
||||
- name: Run unit tests with minimum dependency versions
|
||||
if: ${{ steps.min-version.outputs.min-versions != '' }}
|
||||
env:
|
||||
MIN_VERSIONS: ${{ steps.min-version.outputs.min-versions }}
|
||||
run: |
|
||||
VIRTUAL_ENV=.venv uv pip install --force-reinstall --editable .
|
||||
VIRTUAL_ENV=.venv uv pip install --force-reinstall $MIN_VERSIONS
|
||||
make tests
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
|
||||
- name: Import integration test dependencies
|
||||
run: uv sync --group test --group test_integration
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
|
||||
- name: Run integration tests
|
||||
# Uses the Makefile's `integration_tests` target for the specified package
|
||||
if: ${{ startsWith(env.EFFECTIVE_WORKING_DIR, 'libs/partners/') }}
|
||||
env:
|
||||
AI21_API_KEY: ${{ secrets.AI21_API_KEY }}
|
||||
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
|
||||
TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
AZURE_OPENAI_API_VERSION: ${{ secrets.AZURE_OPENAI_API_VERSION }}
|
||||
AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
|
||||
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
|
||||
AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_CHAT_DEPLOYMENT_NAME }}
|
||||
AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME }}
|
||||
AZURE_OPENAI_LLM_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LLM_DEPLOYMENT_NAME }}
|
||||
AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME }}
|
||||
NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
|
||||
GOOGLE_SEARCH_API_KEY: ${{ secrets.GOOGLE_SEARCH_API_KEY }}
|
||||
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
|
||||
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
|
||||
NOMIC_API_KEY: ${{ secrets.NOMIC_API_KEY }}
|
||||
WATSONX_APIKEY: ${{ secrets.WATSONX_APIKEY }}
|
||||
WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }}
|
||||
ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }}
|
||||
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
|
||||
ASTRA_DB_KEYSPACE: ${{ secrets.ASTRA_DB_KEYSPACE }}
|
||||
ES_URL: ${{ secrets.ES_URL }}
|
||||
ES_CLOUD_ID: ${{ secrets.ES_CLOUD_ID }}
|
||||
ES_API_KEY: ${{ secrets.ES_API_KEY }}
|
||||
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
|
||||
UPSTAGE_API_KEY: ${{ secrets.UPSTAGE_API_KEY }}
|
||||
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
|
||||
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
|
||||
DEEPSEEK_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }}
|
||||
PPLX_API_KEY: ${{ secrets.PPLX_API_KEY }}
|
||||
OLLAMA_API_KEY: ${{ secrets.OLLAMA_API_KEY }}
|
||||
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
||||
LANGCHAIN_TESTS_USER_AGENT: ${{ secrets.LANGCHAIN_TESTS_USER_AGENT }}
|
||||
run: make integration_tests
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
|
||||
# Test select published packages against new core
|
||||
# Done when code changes are made to langchain-core
|
||||
test-prior-published-packages-against-new-core:
|
||||
# Installs the new core with old partners: Installs the new unreleased core
|
||||
# alongside the previously published partner packages and runs integration tests
|
||||
needs:
|
||||
- build
|
||||
- release-notes
|
||||
- test-pypi-publish
|
||||
- pre-release-checks
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
if: false # temporarily skip
|
||||
strategy:
|
||||
matrix:
|
||||
partner: [anthropic]
|
||||
fail-fast: false # Continue testing other partners if one fails
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
ANTHROPIC_FILES_API_IMAGE_ID: ${{ secrets.ANTHROPIC_FILES_API_IMAGE_ID }}
|
||||
ANTHROPIC_FILES_API_PDF_ID: ${{ secrets.ANTHROPIC_FILES_API_PDF_ID }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
AZURE_OPENAI_API_VERSION: ${{ secrets.AZURE_OPENAI_API_VERSION }}
|
||||
AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
|
||||
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
|
||||
AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_CHAT_DEPLOYMENT_NAME }}
|
||||
AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME }}
|
||||
AZURE_OPENAI_LLM_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LLM_DEPLOYMENT_NAME }}
|
||||
AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME }}
|
||||
LANGCHAIN_TESTS_USER_AGENT: ${{ secrets.LANGCHAIN_TESTS_USER_AGENT }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
|
||||
# We implement this conditional as Github Actions does not have good support
|
||||
# for conditionally needing steps. https://github.com/actions/runner/issues/491
|
||||
# TODO: this seems to be resolved upstream, so we can probably remove this workaround
|
||||
- name: Check if libs/core
|
||||
run: |
|
||||
if [ "${{ startsWith(env.EFFECTIVE_WORKING_DIR, 'libs/core') }}" != "true" ]; then
|
||||
echo "Not in libs/core. Exiting successfully."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
- name: Set up Python + uv
|
||||
if: startsWith(env.EFFECTIVE_WORKING_DIR, 'libs/core')
|
||||
uses: "./.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
|
||||
if: startsWith(env.EFFECTIVE_WORKING_DIR, 'libs/core')
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ env.EFFECTIVE_WORKING_DIR }}/dist/
|
||||
|
||||
- name: Test against ${{ matrix.partner }}
|
||||
if: startsWith(env.EFFECTIVE_WORKING_DIR, 'libs/core')
|
||||
run: |
|
||||
# Identify latest tag, excluding pre-releases
|
||||
LATEST_PACKAGE_TAG="$(
|
||||
git ls-remote --tags origin "langchain-${{ matrix.partner }}*" \
|
||||
| awk '{print $2}' \
|
||||
| sed 's|refs/tags/||' \
|
||||
| grep -E '[0-9]+\.[0-9]+\.[0-9]+$' \
|
||||
| sort -Vr \
|
||||
| head -n 1
|
||||
)"
|
||||
echo "Latest package tag: $LATEST_PACKAGE_TAG"
|
||||
|
||||
# Shallow-fetch just that single tag
|
||||
git fetch --depth=1 origin tag "$LATEST_PACKAGE_TAG"
|
||||
|
||||
# Checkout the latest package files
|
||||
rm -rf $GITHUB_WORKSPACE/libs/partners/${{ matrix.partner }}/*
|
||||
rm -rf $GITHUB_WORKSPACE/libs/standard-tests/*
|
||||
cd $GITHUB_WORKSPACE/libs/
|
||||
git checkout "$LATEST_PACKAGE_TAG" -- standard-tests/
|
||||
git checkout "$LATEST_PACKAGE_TAG" -- partners/${{ matrix.partner }}/
|
||||
cd partners/${{ matrix.partner }}
|
||||
|
||||
# Print as a sanity check
|
||||
echo "Version number from pyproject.toml: "
|
||||
cat pyproject.toml | grep "version = "
|
||||
|
||||
# Run tests
|
||||
uv sync --group test --group test_integration
|
||||
uv pip install ../../core/dist/*.whl
|
||||
make integration_tests
|
||||
|
||||
# Test external packages that depend on langchain-core/langchain against the new release
|
||||
# Only runs for core and langchain_v1 releases to catch breaking changes before publish
|
||||
test-dependents:
|
||||
name: "🐍 Python ${{ matrix.python-version }}: ${{ matrix.package.path }}"
|
||||
needs:
|
||||
- build
|
||||
- release-notes
|
||||
- test-pypi-publish
|
||||
- pre-release-checks
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
# Only run for core or langchain_v1 releases.
|
||||
# Job-level 'if' does not support env context; must use inputs directly.
|
||||
if: >-
|
||||
startsWith(inputs.working-directory-override || inputs.working-directory, 'libs/core')
|
||||
|| startsWith(inputs.working-directory-override || inputs.working-directory, 'libs/langchain_v1')
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.11", "3.13"]
|
||||
package:
|
||||
- name: deepagents
|
||||
repo: langchain-ai/deepagents
|
||||
path: libs/deepagents
|
||||
# No API keys needed for now - deepagents `make test` only runs unit tests
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
with:
|
||||
path: langchain
|
||||
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
with:
|
||||
repository: ${{ matrix.package.repo }}
|
||||
path: ${{ matrix.package.name }}
|
||||
|
||||
- name: Set up Python + uv
|
||||
uses: "./langchain/.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
|
||||
with:
|
||||
name: dist
|
||||
path: dist/
|
||||
|
||||
- name: Install ${{ matrix.package.name }} with local packages
|
||||
# External dependents don't have [tool.uv.sources] pointing to this repo,
|
||||
# so we install the package normally then override with the built wheel.
|
||||
run: |
|
||||
cd ${{ matrix.package.name }}/${{ matrix.package.path }}
|
||||
|
||||
# Install the package with test dependencies
|
||||
uv sync --group test
|
||||
|
||||
# Override with the built wheel from this release
|
||||
uv pip install $GITHUB_WORKSPACE/dist/*.whl
|
||||
|
||||
- name: Run ${{ matrix.package.name }} tests
|
||||
run: |
|
||||
cd ${{ matrix.package.name }}/${{ matrix.package.path }}
|
||||
make test
|
||||
python -c "import $IMPORT_NAME; print(dir($IMPORT_NAME))"
|
||||
|
||||
publish:
|
||||
# Publishes the package to PyPI
|
||||
needs:
|
||||
- build
|
||||
- release-notes
|
||||
- test-pypi-publish
|
||||
- pre-release-checks
|
||||
- test-dependents
|
||||
# - test-prior-published-packages-against-new-core
|
||||
# Run if all needed jobs succeeded or were skipped (test-dependents only runs for core/langchain_v1)
|
||||
if: ${{ !cancelled() && !failure() }}
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
# This permission is used for trusted publishing:
|
||||
@@ -583,70 +133,70 @@ jobs:
|
||||
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python + uv
|
||||
uses: "./.github/actions/uv_setup"
|
||||
- name: Set up Python + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
cache-key: release
|
||||
|
||||
- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
|
||||
- uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ env.EFFECTIVE_WORKING_DIR }}/dist/
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
|
||||
- name: Publish package distributions to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # release/v1
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
packages-dir: ${{ env.EFFECTIVE_WORKING_DIR }}/dist/
|
||||
packages-dir: ${{ inputs.working-directory }}/dist/
|
||||
verbose: true
|
||||
print-hash: true
|
||||
# Temp workaround since attestations are on by default as of gh-action-pypi-publish v1.11.0
|
||||
attestations: false
|
||||
|
||||
mark-release:
|
||||
# Marks the GitHub release with the new version tag
|
||||
needs:
|
||||
- build
|
||||
- release-notes
|
||||
- test-pypi-publish
|
||||
- pre-release-checks
|
||||
- publish
|
||||
# Run if all needed jobs succeeded or were skipped
|
||||
if: ${{ !cancelled() && !failure() }}
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
# This permission is needed by `ncipollo/release-action` to
|
||||
# create the GitHub release/tag
|
||||
# create the GitHub release.
|
||||
contents: write
|
||||
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ env.EFFECTIVE_WORKING_DIR }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python + uv
|
||||
uses: "./.github/actions/uv_setup"
|
||||
- name: Set up Python + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
cache-key: release
|
||||
|
||||
- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
|
||||
- uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ env.EFFECTIVE_WORKING_DIR }}/dist/
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
|
||||
- name: Create Tag
|
||||
uses: ncipollo/release-action@339a81892b84b4eeb0f6e744e4574d79d0d9b8dd # v1
|
||||
- name: Create Release
|
||||
uses: ncipollo/release-action@v1
|
||||
if: ${{ inputs.working-directory == 'libs/langchain' }}
|
||||
with:
|
||||
artifacts: "dist/*"
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
generateReleaseNotes: false
|
||||
tag: ${{needs.build.outputs.pkg-name}}==${{ needs.build.outputs.version }}
|
||||
body: ${{ needs.release-notes.outputs.release-body }}
|
||||
commit: ${{ github.sha }}
|
||||
makeLatest: ${{ needs.build.outputs.pkg-name == 'langchain-core'}}
|
||||
draft: false
|
||||
generateReleaseNotes: true
|
||||
tag: v${{ needs.build.outputs.version }}
|
||||
commit: master
|
||||
|
||||
62
.github/workflows/_release_docker.yml
vendored
Normal file
62
.github/workflows/_release_docker.yml
vendored
Normal file
@@ -0,0 +1,62 @@
|
||||
name: release_docker
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
dockerfile:
|
||||
required: true
|
||||
type: string
|
||||
description: "Path to the Dockerfile to build"
|
||||
image:
|
||||
required: true
|
||||
type: string
|
||||
description: "Name of the image to build"
|
||||
|
||||
env:
|
||||
TEST_TAG: ${{ inputs.image }}:test
|
||||
LATEST_TAG: ${{ inputs.image }}:latest
|
||||
|
||||
jobs:
|
||||
docker:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Get git tag
|
||||
uses: actions-ecosystem/action-get-latest-tag@v1
|
||||
id: get-latest-tag
|
||||
- name: Set docker tag
|
||||
env:
|
||||
VERSION: ${{ steps.get-latest-tag.outputs.tag }}
|
||||
run: |
|
||||
echo "VERSION_TAG=${{ inputs.image }}:${VERSION#v}" >> $GITHUB_ENV
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
- name: Build for Test
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
file: ${{ inputs.dockerfile }}
|
||||
load: true
|
||||
tags: ${{ env.TEST_TAG }}
|
||||
- name: Test
|
||||
run: |
|
||||
docker run --rm ${{ env.TEST_TAG }} python -c "import langchain"
|
||||
- name: Build and Push to Docker Hub
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
file: ${{ inputs.dockerfile }}
|
||||
# We can only build for the intersection of platforms supported by
|
||||
# QEMU and base python image, for now build only for
|
||||
# linux/amd64 and linux/arm64
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: ${{ env.LATEST_TAG }},${{ env.VERSION_TAG }}
|
||||
push: true
|
||||
77
.github/workflows/_test.yml
vendored
77
.github/workflows/_test.yml
vendored
@@ -1,7 +1,4 @@
|
||||
# Runs unit tests with both current and minimum supported dependency versions
|
||||
# to ensure compatibility across the supported range.
|
||||
|
||||
name: "🧪 Unit Testing"
|
||||
name: test
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
@@ -10,69 +7,57 @@ on:
|
||||
required: true
|
||||
type: string
|
||||
description: "From which folder this pipeline executes"
|
||||
python-version:
|
||||
required: true
|
||||
langchain-location:
|
||||
required: false
|
||||
type: string
|
||||
description: "Python version to use"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
description: "Relative path to the langchain library folder"
|
||||
|
||||
env:
|
||||
UV_FROZEN: "true"
|
||||
UV_NO_SYNC: "true"
|
||||
POETRY_VERSION: "1.6.1"
|
||||
|
||||
jobs:
|
||||
# Main test job - runs unit tests with current deps, then retests with minimum versions
|
||||
build:
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
name: "Python ${{ inputs.python-version }}"
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
name: Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
- name: "📋 Checkout Code"
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
id: setup-python
|
||||
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
cache-suffix: test-${{ inputs.working-directory }}
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
cache-key: core
|
||||
|
||||
- name: "📦 Install Test Dependencies"
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: uv sync --group test --dev
|
||||
run: poetry install --with test
|
||||
|
||||
- name: "🧪 Run Core Unit Tests"
|
||||
shell: bash
|
||||
run: |
|
||||
make test PYTEST_EXTRA=-q
|
||||
|
||||
- name: "🔍 Calculate Minimum Dependency Versions"
|
||||
- name: Install langchain editable
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
id: min-version
|
||||
shell: bash
|
||||
run: |
|
||||
VIRTUAL_ENV=.venv uv pip install packaging tomli requests
|
||||
python_version="$(uv run python --version | awk '{print $2}')"
|
||||
min_versions="$(uv run python $GITHUB_WORKSPACE/.github/scripts/get_min_versions.py pyproject.toml pull_request $python_version)"
|
||||
echo "min-versions=$min_versions" >> "$GITHUB_OUTPUT"
|
||||
echo "min-versions=$min_versions"
|
||||
|
||||
- name: "🧪 Run Tests with Minimum Dependencies"
|
||||
if: ${{ steps.min-version.outputs.min-versions != '' }}
|
||||
if: ${{ inputs.langchain-location }}
|
||||
env:
|
||||
MIN_VERSIONS: ${{ steps.min-version.outputs.min-versions }}
|
||||
LANGCHAIN_LOCATION: ${{ inputs.langchain-location }}
|
||||
run: |
|
||||
VIRTUAL_ENV=.venv uv pip install $MIN_VERSIONS
|
||||
make tests PYTEST_EXTRA=-q
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
poetry run pip install -e "$LANGCHAIN_LOCATION"
|
||||
|
||||
- name: "🧹 Verify Clean Working Directory"
|
||||
- name: Run core tests
|
||||
shell: bash
|
||||
run: |
|
||||
make test
|
||||
|
||||
- name: Ensure the tests did not create any additional files
|
||||
shell: bash
|
||||
run: |
|
||||
set -eu
|
||||
|
||||
73
.github/workflows/_test_pydantic.yml
vendored
73
.github/workflows/_test_pydantic.yml
vendored
@@ -1,73 +0,0 @@
|
||||
# Facilitate unit testing against different Pydantic versions for a provided package.
|
||||
|
||||
name: "🐍 Pydantic Version Testing"
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
working-directory:
|
||||
required: true
|
||||
type: string
|
||||
description: "From which folder this pipeline executes"
|
||||
python-version:
|
||||
required: false
|
||||
type: string
|
||||
description: "Python version to use"
|
||||
default: "3.12"
|
||||
pydantic-version:
|
||||
required: true
|
||||
type: string
|
||||
description: "Pydantic version to test."
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
UV_FROZEN: "true"
|
||||
UV_NO_SYNC: "true"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
name: "Pydantic ~=${{ inputs.pydantic-version }}"
|
||||
steps:
|
||||
- name: "📋 Checkout Code"
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
cache-suffix: test-pydantic-${{ inputs.working-directory }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
- name: "📦 Install Test Dependencies"
|
||||
shell: bash
|
||||
run: uv sync --group test
|
||||
|
||||
- name: "🔄 Install Specific Pydantic Version"
|
||||
shell: bash
|
||||
env:
|
||||
PYDANTIC_VERSION: ${{ inputs.pydantic-version }}
|
||||
run: VIRTUAL_ENV=.venv uv pip install "pydantic~=$PYDANTIC_VERSION"
|
||||
|
||||
- name: "🧪 Run Core Tests"
|
||||
shell: bash
|
||||
run: |
|
||||
make test
|
||||
|
||||
- name: "🧹 Verify Clean Working Directory"
|
||||
shell: bash
|
||||
run: |
|
||||
set -eu
|
||||
|
||||
STATUS="$(git status)"
|
||||
echo "$STATUS"
|
||||
|
||||
# grep will exit non-zero if the target message isn't found,
|
||||
# and `set -e` above will cause the step to fail.
|
||||
echo "$STATUS" | grep 'nothing to commit, working tree clean'
|
||||
95
.github/workflows/_test_release.yml
vendored
Normal file
95
.github/workflows/_test_release.yml
vendored
Normal file
@@ -0,0 +1,95 @@
|
||||
name: test-release
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
working-directory:
|
||||
required: true
|
||||
type: string
|
||||
description: "From which folder this pipeline executes"
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.6.1"
|
||||
PYTHON_VERSION: "3.10"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
if: github.ref == 'refs/heads/master'
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
outputs:
|
||||
pkg-name: ${{ steps.check-version.outputs.pkg-name }}
|
||||
version: ${{ steps.check-version.outputs.version }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
cache-key: release
|
||||
|
||||
# We want to keep this build stage *separate* from the release stage,
|
||||
# so that there's no sharing of permissions between them.
|
||||
# The release stage has trusted publishing and GitHub repo contents write access,
|
||||
# and we want to keep the scope of that access limited just to the release job.
|
||||
# Otherwise, a malicious `build` step (e.g. via a compromised dependency)
|
||||
# could get access to our GitHub or PyPI credentials.
|
||||
#
|
||||
# Per the trusted publishing GitHub Action:
|
||||
# > It is strongly advised to separate jobs for building [...]
|
||||
# > from the publish job.
|
||||
# https://github.com/pypa/gh-action-pypi-publish#non-goals
|
||||
- name: Build project for distribution
|
||||
run: poetry build
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
- name: Upload build
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: test-dist
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
|
||||
- name: Check Version
|
||||
id: check-version
|
||||
shell: bash
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
echo pkg-name="$(poetry version | cut -d ' ' -f 1)" >> $GITHUB_OUTPUT
|
||||
echo version="$(poetry version --short)" >> $GITHUB_OUTPUT
|
||||
|
||||
publish:
|
||||
needs:
|
||||
- build
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
# This permission is used for trusted publishing:
|
||||
# https://blog.pypi.org/posts/2023-04-20-introducing-trusted-publishers/
|
||||
#
|
||||
# Trusted publishing has to also be configured on PyPI for each package:
|
||||
# https://docs.pypi.org/trusted-publishers/adding-a-publisher/
|
||||
id-token: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: test-dist
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
|
||||
- name: Publish to test PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
packages-dir: ${{ inputs.working-directory }}/dist/
|
||||
verbose: true
|
||||
print-hash: true
|
||||
repository-url: https://test.pypi.org/legacy/
|
||||
|
||||
# We overwrite any existing distributions with the same name and version.
|
||||
# This is *only for CI use* and is *extremely dangerous* otherwise!
|
||||
# https://github.com/pypa/gh-action-pypi-publish#tolerating-release-package-file-duplicates
|
||||
skip-existing: true
|
||||
66
.github/workflows/_test_vcr.yml
vendored
66
.github/workflows/_test_vcr.yml
vendored
@@ -1,66 +0,0 @@
|
||||
# Runs VCR cassette-backed integration tests in playback-only mode.
|
||||
#
|
||||
# No API keys needed — catches stale cassettes caused by test input
|
||||
# changes without re-recording.
|
||||
#
|
||||
# Called as part of check_diffs.yml workflow.
|
||||
|
||||
name: "📼 VCR Cassette Tests"
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
working-directory:
|
||||
required: true
|
||||
type: string
|
||||
description: "From which folder this pipeline executes"
|
||||
python-version:
|
||||
required: true
|
||||
type: string
|
||||
description: "Python version to use"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
UV_FROZEN: "true"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
name: "Python ${{ inputs.python-version }}"
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
cache-suffix: test-vcr-${{ inputs.working-directory }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
- name: "📦 Install Test Dependencies"
|
||||
shell: bash
|
||||
run: uv sync --group test
|
||||
|
||||
- name: "📼 Run VCR Cassette Tests (playback-only)"
|
||||
shell: bash
|
||||
env:
|
||||
OPENAI_API_KEY: sk-fake
|
||||
run: make test_vcr
|
||||
|
||||
- name: "🧹 Verify Clean Working Directory"
|
||||
shell: bash
|
||||
run: |
|
||||
set -eu
|
||||
|
||||
STATUS="$(git status)"
|
||||
echo "$STATUS"
|
||||
|
||||
# grep will exit non-zero if the target message isn't found,
|
||||
# and `set -e` above will cause the step to fail.
|
||||
echo "$STATUS" | grep 'nothing to commit, working tree clean'
|
||||
115
.github/workflows/auto-label-by-package.yml
vendored
115
.github/workflows/auto-label-by-package.yml
vendored
@@ -1,115 +0,0 @@
|
||||
name: Auto Label Issues by Package
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [opened, edited]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
label-by-package:
|
||||
permissions:
|
||||
issues: write
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Sync package labels
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
script: |
|
||||
const body = context.payload.issue.body || "";
|
||||
|
||||
// Extract text under "## Package" or "### Package" (handles " (Required)" suffix and being last section)
|
||||
const match = body.match(/#{2,3} Package[^\n]*\n([\s\S]*?)(?:\n#{2,3} |$)/i);
|
||||
if (!match) {
|
||||
core.setFailed(
|
||||
`Could not find "## Package" section in issue #${context.issue.number} body. ` +
|
||||
`The issue template may have changed — update the regex in this workflow.`
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const packageSection = match[1].trim();
|
||||
|
||||
// Mapping table for package names to labels
|
||||
const mapping = {
|
||||
"langchain": "langchain",
|
||||
"langchain-openai": "openai",
|
||||
"langchain-anthropic": "anthropic",
|
||||
"langchain-classic": "langchain-classic",
|
||||
"langchain-core": "core",
|
||||
"langchain-model-profiles": "model-profiles",
|
||||
"langchain-tests": "standard-tests",
|
||||
"langchain-text-splitters": "text-splitters",
|
||||
"langchain-chroma": "chroma",
|
||||
"langchain-deepseek": "deepseek",
|
||||
"langchain-exa": "exa",
|
||||
"langchain-fireworks": "fireworks",
|
||||
"langchain-groq": "groq",
|
||||
"langchain-huggingface": "huggingface",
|
||||
"langchain-mistralai": "mistralai",
|
||||
"langchain-nomic": "nomic",
|
||||
"langchain-ollama": "ollama",
|
||||
"langchain-openrouter": "openrouter",
|
||||
"langchain-perplexity": "perplexity",
|
||||
"langchain-qdrant": "qdrant",
|
||||
"langchain-xai": "xai",
|
||||
};
|
||||
|
||||
// All possible package labels we manage
|
||||
const allPackageLabels = Object.values(mapping);
|
||||
const selectedLabels = [];
|
||||
|
||||
// Check if this is checkbox format (multiple selection)
|
||||
const checkboxMatches = packageSection.match(/- \[x\]\s+([^\n\r]+)/gi);
|
||||
if (checkboxMatches) {
|
||||
// Handle checkbox format
|
||||
for (const match of checkboxMatches) {
|
||||
const packageName = match.replace(/- \[x\]\s+/i, '').trim();
|
||||
const label = mapping[packageName];
|
||||
if (label && !selectedLabels.includes(label)) {
|
||||
selectedLabels.push(label);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Handle dropdown format (single selection)
|
||||
const label = mapping[packageSection];
|
||||
if (label) {
|
||||
selectedLabels.push(label);
|
||||
}
|
||||
}
|
||||
|
||||
// Get current issue labels
|
||||
const issue = await github.rest.issues.get({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number
|
||||
});
|
||||
|
||||
const currentLabels = issue.data.labels.map(label => label.name);
|
||||
const currentPackageLabels = currentLabels.filter(label => allPackageLabels.includes(label));
|
||||
|
||||
// Determine labels to add and remove
|
||||
const labelsToAdd = selectedLabels.filter(label => !currentPackageLabels.includes(label));
|
||||
const labelsToRemove = currentPackageLabels.filter(label => !selectedLabels.includes(label));
|
||||
|
||||
// Add new labels
|
||||
if (labelsToAdd.length > 0) {
|
||||
await github.rest.issues.addLabels({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
labels: labelsToAdd
|
||||
});
|
||||
}
|
||||
|
||||
// Remove old labels
|
||||
for (const label of labelsToRemove) {
|
||||
await github.rest.issues.removeLabel({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
name: label
|
||||
});
|
||||
}
|
||||
42
.github/workflows/check_agents_sync.yml
vendored
42
.github/workflows/check_agents_sync.yml
vendored
@@ -1,42 +0,0 @@
|
||||
# Ensures CLAUDE.md and AGENTS.md stay synchronized.
|
||||
#
|
||||
# These files contain the same development guidelines but are named differently
|
||||
# for compatibility with different AI coding assistants (Claude Code uses CLAUDE.md,
|
||||
# other tools may use AGENTS.md).
|
||||
|
||||
name: "🔄 Check CLAUDE.md / AGENTS.md Sync"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
paths:
|
||||
- "CLAUDE.md"
|
||||
- "AGENTS.md"
|
||||
pull_request:
|
||||
paths:
|
||||
- "CLAUDE.md"
|
||||
- "AGENTS.md"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
check-sync:
|
||||
name: "verify files are identical"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: "📋 Checkout Code"
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
|
||||
- name: "🔍 Check CLAUDE.md and AGENTS.md are in sync"
|
||||
run: |
|
||||
if ! diff -q CLAUDE.md AGENTS.md > /dev/null 2>&1; then
|
||||
echo "❌ CLAUDE.md and AGENTS.md are out of sync!"
|
||||
echo ""
|
||||
echo "These files must contain identical content."
|
||||
echo "Differences:"
|
||||
echo ""
|
||||
diff --color=always CLAUDE.md AGENTS.md || true
|
||||
exit 1
|
||||
fi
|
||||
echo "✅ CLAUDE.md and AGENTS.md are in sync"
|
||||
67
.github/workflows/check_core_versions.yml
vendored
67
.github/workflows/check_core_versions.yml
vendored
@@ -1,67 +0,0 @@
|
||||
# Ensures version numbers in pyproject.toml and version.py stay in sync.
|
||||
#
|
||||
# (Prevents releases with mismatched version numbers)
|
||||
|
||||
name: "🔍 Check Version Equality"
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- "libs/core/pyproject.toml"
|
||||
- "libs/core/langchain_core/version.py"
|
||||
- "libs/partners/anthropic/pyproject.toml"
|
||||
- "libs/partners/anthropic/langchain_anthropic/_version.py"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
check_version_equality:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
|
||||
- name: "✅ Verify pyproject.toml & version.py Match"
|
||||
run: |
|
||||
# Check core versions
|
||||
CORE_PYPROJECT_VERSION=$(grep -Po '(?<=^version = ")[^"]*' libs/core/pyproject.toml)
|
||||
CORE_VERSION_PY_VERSION=$(grep -Po '(?<=^VERSION = ")[^"]*' libs/core/langchain_core/version.py)
|
||||
|
||||
# Compare core versions
|
||||
if [ "$CORE_PYPROJECT_VERSION" != "$CORE_VERSION_PY_VERSION" ]; then
|
||||
echo "langchain-core versions in pyproject.toml and version.py do not match!"
|
||||
echo "pyproject.toml version: $CORE_PYPROJECT_VERSION"
|
||||
echo "version.py version: $CORE_VERSION_PY_VERSION"
|
||||
exit 1
|
||||
else
|
||||
echo "Core versions match: $CORE_PYPROJECT_VERSION"
|
||||
fi
|
||||
|
||||
# Check langchain_v1 versions
|
||||
LANGCHAIN_PYPROJECT_VERSION=$(grep -Po '(?<=^version = ")[^"]*' libs/langchain_v1/pyproject.toml)
|
||||
LANGCHAIN_INIT_PY_VERSION=$(grep -Po '(?<=^__version__ = ")[^"]*' libs/langchain_v1/langchain/__init__.py)
|
||||
|
||||
# Compare langchain_v1 versions
|
||||
if [ "$LANGCHAIN_PYPROJECT_VERSION" != "$LANGCHAIN_INIT_PY_VERSION" ]; then
|
||||
echo "langchain_v1 versions in pyproject.toml and __init__.py do not match!"
|
||||
echo "pyproject.toml version: $LANGCHAIN_PYPROJECT_VERSION"
|
||||
echo "version.py version: $LANGCHAIN_INIT_PY_VERSION"
|
||||
exit 1
|
||||
else
|
||||
echo "Langchain v1 versions match: $LANGCHAIN_PYPROJECT_VERSION"
|
||||
fi
|
||||
|
||||
# Check langchain-anthropic versions
|
||||
ANTHROPIC_PYPROJECT_VERSION=$(grep -Po '(?<=^version = ")[^"]*' libs/partners/anthropic/pyproject.toml)
|
||||
ANTHROPIC_VERSION_PY_VERSION=$(grep -Po '(?<=^__version__ = ")[^"]*' libs/partners/anthropic/langchain_anthropic/_version.py)
|
||||
|
||||
# Compare langchain-anthropic versions
|
||||
if [ "$ANTHROPIC_PYPROJECT_VERSION" != "$ANTHROPIC_VERSION_PY_VERSION" ]; then
|
||||
echo "langchain-anthropic versions in pyproject.toml and _version.py do not match!"
|
||||
echo "pyproject.toml version: $ANTHROPIC_PYPROJECT_VERSION"
|
||||
echo "_version.py version: $ANTHROPIC_VERSION_PY_VERSION"
|
||||
exit 1
|
||||
else
|
||||
echo "Langchain-anthropic versions match: $ANTHROPIC_PYPROJECT_VERSION"
|
||||
fi
|
||||
227
.github/workflows/check_diffs.yml
vendored
227
.github/workflows/check_diffs.yml
vendored
@@ -1,230 +1,47 @@
|
||||
# Primary CI workflow.
|
||||
#
|
||||
# Only runs against packages that have changed files.
|
||||
#
|
||||
# Runs:
|
||||
# - Linting (_lint.yml)
|
||||
# - Unit Tests (_test.yml)
|
||||
# - Pydantic compatibility tests (_test_pydantic.yml)
|
||||
# - Integration test compilation checks (_compile_integration_test.yml)
|
||||
# - Extended test suites that require additional dependencies
|
||||
#
|
||||
# Reports status to GitHub checks and PR status.
|
||||
|
||||
name: "🔧 CI"
|
||||
---
|
||||
name: Check library diffs
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
pull_request:
|
||||
merge_group:
|
||||
paths:
|
||||
- ".github/actions/**"
|
||||
- ".github/tools/**"
|
||||
- ".github/workflows/**"
|
||||
- "libs/**"
|
||||
|
||||
# Optimizes CI performance by canceling redundant workflow runs
|
||||
# If another push to the same PR or branch happens while this workflow is still running,
|
||||
# cancel the earlier run in favor of the next run.
|
||||
#
|
||||
# There's no point in testing an outdated version of the code. GitHub only allows
|
||||
# a limited number of job runners to be active at the same time, so it's better to
|
||||
# cancel pointless jobs early so that more useful jobs can run sooner.
|
||||
# a limited number of job runners to be active at the same time, so it's better to cancel
|
||||
# pointless jobs early so that more useful jobs can run sooner.
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
UV_FROZEN: "true"
|
||||
UV_NO_SYNC: "true"
|
||||
|
||||
jobs:
|
||||
# This job analyzes which files changed and creates a dynamic test matrix
|
||||
# to only run tests/lints for the affected packages, improving CI efficiency
|
||||
build:
|
||||
name: "Detect Changes & Set Matrix"
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'ci-ignore') }}
|
||||
steps:
|
||||
- name: "📋 Checkout Code"
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- name: "🐍 Setup Python 3.11"
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: "📂 Get Changed Files"
|
||||
id: files
|
||||
uses: Ana06/get-changed-files@25f79e676e7ea1868813e21465014798211fad8c # v2.3.0
|
||||
- name: "🔍 Analyze Changed Files & Generate Build Matrix"
|
||||
id: set-matrix
|
||||
run: |
|
||||
python -m pip install packaging requests
|
||||
python .github/scripts/check_diff.py ${{ steps.files.outputs.all }} >> $GITHUB_OUTPUT
|
||||
python-version: '3.10'
|
||||
- id: files
|
||||
uses: Ana06/get-changed-files@v2.2.0
|
||||
- id: set-matrix
|
||||
run: echo "dirs-to-run=$(python .github/scripts/check_diff.py ${{ steps.files.outputs.all }})" >> $GITHUB_OUTPUT
|
||||
outputs:
|
||||
lint: ${{ steps.set-matrix.outputs.lint }}
|
||||
test: ${{ steps.set-matrix.outputs.test }}
|
||||
extended-tests: ${{ steps.set-matrix.outputs.extended-tests }}
|
||||
compile-integration-tests: ${{ steps.set-matrix.outputs.compile-integration-tests }}
|
||||
dependencies: ${{ steps.set-matrix.outputs.dependencies }}
|
||||
test-pydantic: ${{ steps.set-matrix.outputs.test-pydantic }}
|
||||
vcr-tests: ${{ steps.set-matrix.outputs.vcr-tests }}
|
||||
# Run linting only on packages that have changed files
|
||||
lint:
|
||||
needs: [build]
|
||||
if: ${{ needs.build.outputs.lint != '[]' }}
|
||||
dirs-to-run: ${{ steps.set-matrix.outputs.dirs-to-run }}
|
||||
ci:
|
||||
needs: [ build ]
|
||||
strategy:
|
||||
matrix:
|
||||
job-configs: ${{ fromJson(needs.build.outputs.lint) }}
|
||||
fail-fast: false
|
||||
uses: ./.github/workflows/_lint.yml
|
||||
working-directory: ${{ fromJson(needs.build.outputs.dirs-to-run) }}
|
||||
uses: ./.github/workflows/_all_ci.yml
|
||||
with:
|
||||
working-directory: ${{ matrix.job-configs.working-directory }}
|
||||
python-version: ${{ matrix.job-configs.python-version }}
|
||||
secrets: inherit
|
||||
working-directory: ${{ matrix.working-directory }}
|
||||
|
||||
# Run unit tests only on packages that have changed files
|
||||
test:
|
||||
needs: [build]
|
||||
if: ${{ needs.build.outputs.test != '[]' }}
|
||||
strategy:
|
||||
matrix:
|
||||
job-configs: ${{ fromJson(needs.build.outputs.test) }}
|
||||
fail-fast: false
|
||||
uses: ./.github/workflows/_test.yml
|
||||
with:
|
||||
working-directory: ${{ matrix.job-configs.working-directory }}
|
||||
python-version: ${{ matrix.job-configs.python-version }}
|
||||
secrets: inherit
|
||||
|
||||
# Test compatibility with different Pydantic versions for affected packages
|
||||
test-pydantic:
|
||||
needs: [build]
|
||||
if: ${{ needs.build.outputs.test-pydantic != '[]' }}
|
||||
strategy:
|
||||
matrix:
|
||||
job-configs: ${{ fromJson(needs.build.outputs.test-pydantic) }}
|
||||
fail-fast: false
|
||||
uses: ./.github/workflows/_test_pydantic.yml
|
||||
with:
|
||||
working-directory: ${{ matrix.job-configs.working-directory }}
|
||||
pydantic-version: ${{ matrix.job-configs.pydantic-version }}
|
||||
secrets: inherit
|
||||
|
||||
# Verify integration tests compile without actually running them (faster feedback)
|
||||
compile-integration-tests:
|
||||
name: "Compile Integration Tests"
|
||||
needs: [build]
|
||||
if: ${{ needs.build.outputs.compile-integration-tests != '[]' }}
|
||||
strategy:
|
||||
matrix:
|
||||
job-configs: ${{ fromJson(needs.build.outputs.compile-integration-tests) }}
|
||||
fail-fast: false
|
||||
uses: ./.github/workflows/_compile_integration_test.yml
|
||||
with:
|
||||
working-directory: ${{ matrix.job-configs.working-directory }}
|
||||
python-version: ${{ matrix.job-configs.python-version }}
|
||||
secrets: inherit
|
||||
|
||||
# Run VCR cassette-backed integration tests in playback-only mode (no API keys)
|
||||
vcr-tests:
|
||||
name: "VCR Cassette Tests"
|
||||
needs: [build]
|
||||
if: ${{ needs.build.outputs.vcr-tests != '[]' }}
|
||||
strategy:
|
||||
matrix:
|
||||
job-configs: ${{ fromJson(needs.build.outputs.vcr-tests) }}
|
||||
fail-fast: false
|
||||
uses: ./.github/workflows/_test_vcr.yml
|
||||
with:
|
||||
working-directory: ${{ matrix.job-configs.working-directory }}
|
||||
python-version: ${{ matrix.job-configs.python-version }}
|
||||
secrets: inherit
|
||||
|
||||
# Run extended test suites that require additional dependencies
|
||||
extended-tests:
|
||||
name: "Extended Tests"
|
||||
needs: [build]
|
||||
if: ${{ needs.build.outputs.extended-tests != '[]' }}
|
||||
strategy:
|
||||
matrix:
|
||||
# note different variable for extended test dirs
|
||||
job-configs: ${{ fromJson(needs.build.outputs.extended-tests) }}
|
||||
fail-fast: false
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ matrix.job-configs.working-directory }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ matrix.job-configs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.job-configs.python-version }}
|
||||
cache-suffix: extended-tests-${{ matrix.job-configs.working-directory }}
|
||||
working-directory: ${{ matrix.job-configs.working-directory }}
|
||||
|
||||
- name: "📦 Install Dependencies & Run Extended Tests"
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Running extended tests, installing dependencies with uv..."
|
||||
uv venv
|
||||
uv sync --group test
|
||||
VIRTUAL_ENV=.venv uv pip install -r extended_testing_deps.txt
|
||||
VIRTUAL_ENV=.venv make extended_tests
|
||||
|
||||
- name: "🧹 Verify Clean Working Directory"
|
||||
shell: bash
|
||||
run: |
|
||||
set -eu
|
||||
|
||||
STATUS="$(git status)"
|
||||
echo "$STATUS"
|
||||
|
||||
# grep will exit non-zero if the target message isn't found,
|
||||
# and `set -e` above will cause the step to fail.
|
||||
echo "$STATUS" | grep 'nothing to commit, working tree clean'
|
||||
|
||||
# Verify _release.yml dropdown options stay in sync with package directories
|
||||
check-release-options:
|
||||
name: "Validate Release Options"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- name: "🐍 Setup Python 3.11"
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: "📦 Install Dependencies"
|
||||
run: python -m pip install pyyaml pytest
|
||||
- name: "🔍 Check release dropdown matches packages"
|
||||
run: python -m pytest .github/scripts/test_release_options.py -v
|
||||
|
||||
# Final status check - ensures all required jobs passed before allowing merge
|
||||
ci_success:
|
||||
name: "✅ CI Success"
|
||||
needs:
|
||||
[
|
||||
build,
|
||||
lint,
|
||||
test,
|
||||
compile-integration-tests,
|
||||
vcr-tests,
|
||||
extended-tests,
|
||||
test-pydantic,
|
||||
check-release-options,
|
||||
]
|
||||
if: |
|
||||
always()
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
JOBS_JSON: ${{ toJSON(needs) }}
|
||||
RESULTS_JSON: ${{ toJSON(needs.*.result) }}
|
||||
EXIT_CODE: ${{!contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') && '0' || '1'}}
|
||||
steps:
|
||||
- name: "🎉 All Checks Passed"
|
||||
run: |
|
||||
echo $JOBS_JSON
|
||||
echo $RESULTS_JSON
|
||||
echo "Exiting with $EXIT_CODE"
|
||||
exit $EXIT_CODE
|
||||
|
||||
106
.github/workflows/close_unchecked_issues.yml
vendored
106
.github/workflows/close_unchecked_issues.yml
vendored
@@ -1,106 +0,0 @@
|
||||
# Auto-close issues that bypass or ignore the issue template checkboxes.
|
||||
#
|
||||
# GitHub issue forms enforce `required: true` checkboxes in the web UI,
|
||||
# but the API bypasses form validation entirely — bots/scripts can open
|
||||
# issues with every box unchecked or skip the template altogether.
|
||||
#
|
||||
# Rules:
|
||||
# 1. Checkboxes present, none checked → close
|
||||
# 2. No checkboxes at all → close unless author is an org member or bot
|
||||
#
|
||||
# Org membership check reuses the shared helper from pr-labeler.js and
|
||||
# the same GitHub App used by tag-external-issues.yml.
|
||||
|
||||
name: Close Unchecked Issues
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [opened]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.issue.number }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
check-boxes:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
|
||||
- name: Generate GitHub App token
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3
|
||||
with:
|
||||
app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }}
|
||||
private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }}
|
||||
|
||||
- name: Validate issue checkboxes
|
||||
if: steps.app-token.outcome == 'success'
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
github-token: ${{ steps.app-token.outputs.token }}
|
||||
script: |
|
||||
const body = context.payload.issue.body ?? '';
|
||||
const checked = (body.match(/- \[x\]/gi) || []).length;
|
||||
|
||||
if (checked > 0) {
|
||||
console.log(`Found ${checked} checked checkbox(es) — OK`);
|
||||
return;
|
||||
}
|
||||
|
||||
const unchecked = (body.match(/- \[ \]/g) || []).length;
|
||||
|
||||
// No checkboxes at all — allow org members and bots, close everyone else
|
||||
if (unchecked === 0) {
|
||||
const { owner, repo } = context.repo;
|
||||
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
|
||||
|
||||
const author = context.payload.sender.login;
|
||||
const { isExternal } = await h.checkMembership(
|
||||
author, context.payload.sender.type,
|
||||
);
|
||||
|
||||
if (!isExternal) {
|
||||
console.log(`No checkboxes, but ${author} is internal — OK`);
|
||||
return;
|
||||
}
|
||||
console.log(`No checkboxes and ${author} is external — closing`);
|
||||
} else {
|
||||
console.log(`Found 0 checked and ${unchecked} unchecked checkbox(es) — closing`);
|
||||
}
|
||||
|
||||
const { owner, repo } = context.repo;
|
||||
const issue_number = context.payload.issue.number;
|
||||
|
||||
const reason = unchecked > 0
|
||||
? 'none of the required checkboxes were checked'
|
||||
: 'no issue template was used';
|
||||
|
||||
// Close before commenting — a closed issue without a comment is
|
||||
// less confusing than an open issue with a false "auto-closed" message
|
||||
// if the second API call fails.
|
||||
await github.rest.issues.update({
|
||||
owner,
|
||||
repo,
|
||||
issue_number,
|
||||
state: 'closed',
|
||||
state_reason: 'not_planned',
|
||||
});
|
||||
|
||||
await github.rest.issues.createComment({
|
||||
owner,
|
||||
repo,
|
||||
issue_number,
|
||||
body: [
|
||||
`This issue was automatically closed because ${reason}.`,
|
||||
'',
|
||||
`Please use one of the [issue templates](https://github.com/${owner}/${repo}/issues/new/choose) and complete the checklist.`,
|
||||
].join('\n'),
|
||||
});
|
||||
36
.github/workflows/codespell.yml
vendored
Normal file
36
.github/workflows/codespell.yml
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
---
|
||||
name: Codespell
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
pull_request:
|
||||
branches: [master]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
codespell:
|
||||
name: Check for spelling errors
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
pip install toml
|
||||
|
||||
- name: Extract Ignore Words List
|
||||
run: |
|
||||
# Use a Python script to extract the ignore words list from pyproject.toml
|
||||
python .github/workflows/extract_ignored_words_list.py
|
||||
id: extract_ignore_words
|
||||
|
||||
- name: Codespell
|
||||
uses: codespell-project/actions-codespell@v2
|
||||
with:
|
||||
skip: guide_imports.json
|
||||
ignore_words_list: ${{ steps.extract_ignore_words.outputs.ignore_words_list }}
|
||||
83
.github/workflows/codspeed.yml
vendored
83
.github/workflows/codspeed.yml
vendored
@@ -1,83 +0,0 @@
|
||||
# CodSpeed performance benchmarks.
|
||||
#
|
||||
# Runs benchmarks on changed packages and uploads results to CodSpeed.
|
||||
# Separated from the main CI workflow so that push-to-master baseline runs
|
||||
# are never cancelled by subsequent merges (cancel-in-progress is only
|
||||
# enabled for pull_request events).
|
||||
|
||||
name: "⚡ CodSpeed"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
pull_request:
|
||||
|
||||
# On PRs, cancel stale runs when new commits are pushed.
|
||||
# On push-to-master, never cancel — these runs populate CodSpeed baselines.
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event_name == 'push' && github.sha || github.ref }}
|
||||
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
UV_FROZEN: "true"
|
||||
UV_NO_SYNC: "true"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: "Detect Changes"
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'codspeed-ignore') }}
|
||||
steps:
|
||||
- name: "📋 Checkout Code"
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
- name: "🐍 Setup Python 3.11"
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: "📂 Get Changed Files"
|
||||
id: files
|
||||
uses: Ana06/get-changed-files@25f79e676e7ea1868813e21465014798211fad8c # v2.3.0
|
||||
- name: "🔍 Analyze Changed Files"
|
||||
id: set-matrix
|
||||
run: |
|
||||
python -m pip install packaging requests
|
||||
python .github/scripts/check_diff.py ${{ steps.files.outputs.all }} >> $GITHUB_OUTPUT
|
||||
outputs:
|
||||
codspeed: ${{ steps.set-matrix.outputs.codspeed }}
|
||||
|
||||
benchmarks:
|
||||
name: "⚡ CodSpeed Benchmarks"
|
||||
needs: [build]
|
||||
if: ${{ needs.build.outputs.codspeed != '[]' }}
|
||||
runs-on: codspeed-macro
|
||||
strategy:
|
||||
matrix:
|
||||
job-configs: ${{ fromJson(needs.build.outputs.codspeed) }}
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
|
||||
- name: "📦 Install UV Package Manager"
|
||||
uses: astral-sh/setup-uv@0ca8f610542aa7f4acaf39e65cf4eb3c35091883 # v7
|
||||
with:
|
||||
# Pinned to 3.13.11 to work around CodSpeed walltime segfault on 3.13.12+
|
||||
# See: https://github.com/CodSpeedHQ/pytest-codspeed/issues/106
|
||||
python-version: "3.13.11"
|
||||
|
||||
- name: "📦 Install Test Dependencies"
|
||||
run: uv sync --group test
|
||||
working-directory: ${{ matrix.job-configs.working-directory }}
|
||||
|
||||
- name: "⚡ Run Benchmarks: ${{ matrix.job-configs.working-directory }}"
|
||||
uses: CodSpeedHQ/action@a50965600eafa04edcd6717761f55b77e52aafbd # v4
|
||||
with:
|
||||
token: ${{ secrets.CODSPEED_TOKEN }}
|
||||
run: |
|
||||
cd ${{ matrix.job-configs.working-directory }}
|
||||
if [ "${{ matrix.job-configs.working-directory }}" = "libs/core" ] || [ "${{ matrix.job-configs.working-directory }}" = "libs/langchain_v1" ]; then
|
||||
uv run --no-sync pytest ./tests/benchmarks --codspeed
|
||||
fi
|
||||
mode: ${{ matrix.job-configs.codspeed-mode }}
|
||||
35
.github/workflows/doc_lint.yml
vendored
Normal file
35
.github/workflows/doc_lint.yml
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
---
|
||||
name: Docs, templates, cookbook lint
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
paths:
|
||||
- 'docs/**'
|
||||
- 'templates/**'
|
||||
- 'cookbook/**'
|
||||
- '.github/workflows/_lint.yml'
|
||||
- '.github/workflows/doc_lint.yml'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
check:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Run import check
|
||||
run: |
|
||||
# We should not encourage imports directly from main init file
|
||||
# Expect for hub
|
||||
git grep 'from langchain import' {docs/docs,templates,cookbook} | grep -vE 'from langchain import (hub)' && exit 1 || exit 0
|
||||
|
||||
lint:
|
||||
uses:
|
||||
./.github/workflows/_lint.yml
|
||||
with:
|
||||
working-directory: "."
|
||||
secrets: inherit
|
||||
10
.github/workflows/extract_ignored_words_list.py
vendored
Normal file
10
.github/workflows/extract_ignored_words_list.py
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
import toml
|
||||
|
||||
pyproject_toml = toml.load("pyproject.toml")
|
||||
|
||||
# Extract the ignore words list (adjust the key as per your TOML structure)
|
||||
ignore_words_list = (
|
||||
pyproject_toml.get("tool", {}).get("codespell", {}).get("ignore-words-list")
|
||||
)
|
||||
|
||||
print(f"::set-output name=ignore_words_list::{ignore_words_list}")
|
||||
301
.github/workflows/integration_tests.yml
vendored
301
.github/workflows/integration_tests.yml
vendored
@@ -1,301 +0,0 @@
|
||||
# Routine integration tests against partner libraries with live API credentials.
|
||||
#
|
||||
# Uses `make integration_tests` within each library being tested.
|
||||
#
|
||||
# Runs daily with the option to trigger manually.
|
||||
|
||||
name: "⏰ Integration Tests"
|
||||
run-name: "Run Integration Tests - ${{ inputs.working-directory-override || (inputs.working-directory != 'all' && inputs.working-directory) || 'all libs' }} (Python ${{ inputs.python-version-override || '3.10, 3.13' }})"
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
working-directory:
|
||||
type: choice
|
||||
description: "Library to test (select from dropdown)"
|
||||
default: "all"
|
||||
options:
|
||||
- "all"
|
||||
- "libs/core"
|
||||
- "libs/langchain"
|
||||
- "libs/langchain_v1"
|
||||
- "libs/text-splitters"
|
||||
- "libs/standard-tests"
|
||||
- "libs/model-profiles"
|
||||
- "libs/partners/anthropic"
|
||||
- "libs/partners/chroma"
|
||||
- "libs/partners/deepseek"
|
||||
- "libs/partners/exa"
|
||||
- "libs/partners/fireworks"
|
||||
- "libs/partners/groq"
|
||||
- "libs/partners/huggingface"
|
||||
- "libs/partners/mistralai"
|
||||
- "libs/partners/nomic"
|
||||
- "libs/partners/ollama"
|
||||
- "libs/partners/openai"
|
||||
- "libs/partners/openrouter"
|
||||
- "libs/partners/perplexity"
|
||||
- "libs/partners/qdrant"
|
||||
- "libs/partners/xai"
|
||||
working-directory-override:
|
||||
type: string
|
||||
description: "Manual override — takes precedence over dropdown (e.g. libs/partners/partner-xyz)"
|
||||
python-version-override:
|
||||
type: string
|
||||
description: "Python version override — defaults to 3.10 and 3.13 in matrix (e.g. 3.11)"
|
||||
schedule:
|
||||
- cron: "0 13 * * *" # Runs daily at 1PM UTC (9AM EDT/6AM PDT)
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
UV_FROZEN: "true"
|
||||
DEFAULT_LIBS: >-
|
||||
["libs/partners/openai",
|
||||
"libs/partners/anthropic",
|
||||
"libs/partners/fireworks",
|
||||
"libs/partners/groq",
|
||||
"libs/partners/mistralai",
|
||||
"libs/partners/xai",
|
||||
"libs/partners/google-vertexai",
|
||||
"libs/partners/google-genai",
|
||||
"libs/partners/aws"]
|
||||
|
||||
jobs:
|
||||
# Generate dynamic test matrix based on input parameters or defaults
|
||||
# Only runs on the main repo (for scheduled runs) or when manually triggered
|
||||
compute-matrix:
|
||||
# Defend against forks running scheduled jobs, but allow manual runs from forks
|
||||
if: github.repository_owner == 'langchain-ai' || github.event_name != 'schedule'
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
name: "📋 Compute Test Matrix"
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
python-version-min-3-11: ${{ steps.set-matrix.outputs.python-version-min-3-11 }}
|
||||
steps:
|
||||
- name: "🔢 Generate Python & Library Matrix"
|
||||
id: set-matrix
|
||||
env:
|
||||
DEFAULT_LIBS: ${{ env.DEFAULT_LIBS }}
|
||||
WORKING_DIRECTORY_OVERRIDE: ${{ github.event.inputs.working-directory-override || '' }}
|
||||
WORKING_DIRECTORY_CHOICE: ${{ github.event.inputs.working-directory || 'all' }}
|
||||
PYTHON_VERSION_OVERRIDE: ${{ github.event.inputs.python-version-override || '' }}
|
||||
run: |
|
||||
# echo "matrix=..." where matrix is a json formatted str with keys python-version and working-directory
|
||||
# python-version defaults to 3.10 and 3.13, overridden to [PYTHON_VERSION_OVERRIDE] if set
|
||||
# working-directory priority: override string > dropdown choice > DEFAULT_LIBS
|
||||
python_version='["3.10", "3.13"]'
|
||||
python_version_min_3_11='["3.11", "3.13"]'
|
||||
working_directory="$DEFAULT_LIBS"
|
||||
if [ -n "$PYTHON_VERSION_OVERRIDE" ]; then
|
||||
python_version="[\"$PYTHON_VERSION_OVERRIDE\"]"
|
||||
# Bound override version to >= 3.11 for packages requiring it
|
||||
if [ "$(echo "$PYTHON_VERSION_OVERRIDE >= 3.11" | bc -l)" -eq 1 ]; then
|
||||
python_version_min_3_11="[\"$PYTHON_VERSION_OVERRIDE\"]"
|
||||
else
|
||||
python_version_min_3_11='["3.11"]'
|
||||
fi
|
||||
fi
|
||||
if [ -n "$WORKING_DIRECTORY_OVERRIDE" ]; then
|
||||
working_directory="[\"$WORKING_DIRECTORY_OVERRIDE\"]"
|
||||
elif [ "$WORKING_DIRECTORY_CHOICE" != "all" ]; then
|
||||
working_directory="[\"$WORKING_DIRECTORY_CHOICE\"]"
|
||||
fi
|
||||
matrix="{\"python-version\": $python_version, \"working-directory\": $working_directory}"
|
||||
echo "$matrix"
|
||||
echo "matrix=$matrix" >> $GITHUB_OUTPUT
|
||||
echo "python-version-min-3-11=$python_version_min_3_11" >> $GITHUB_OUTPUT
|
||||
|
||||
# Run integration tests against partner libraries with live API credentials
|
||||
integration-tests:
|
||||
if: github.repository_owner == 'langchain-ai' || github.event_name != 'schedule'
|
||||
name: "🐍 Python ${{ matrix.python-version }}: ${{ matrix.working-directory }}"
|
||||
runs-on: ubuntu-latest
|
||||
needs: [compute-matrix]
|
||||
timeout-minutes: 30
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ${{ fromJSON(needs.compute-matrix.outputs.matrix).python-version }}
|
||||
working-directory: ${{ fromJSON(needs.compute-matrix.outputs.matrix).working-directory }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
with:
|
||||
path: langchain
|
||||
|
||||
# These libraries exist outside of the monorepo and need to be checked out separately
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
with:
|
||||
repository: langchain-ai/langchain-google
|
||||
path: langchain-google
|
||||
- name: "🔐 Authenticate to Google Cloud"
|
||||
id: "auth"
|
||||
uses: google-github-actions/auth@7c6bc770dae815cd3e89ee6cdf493a5fab2cc093 # v3
|
||||
with:
|
||||
credentials_json: "${{ secrets.GOOGLE_CREDENTIALS }}"
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
with:
|
||||
repository: langchain-ai/langchain-aws
|
||||
path: langchain-aws
|
||||
- name: "🔐 Configure AWS Credentials"
|
||||
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # v6
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: ${{ secrets.AWS_REGION }}
|
||||
- name: "📦 Organize External Libraries"
|
||||
run: |
|
||||
rm -rf \
|
||||
langchain/libs/partners/google-genai \
|
||||
langchain/libs/partners/google-vertexai
|
||||
mv langchain-google/libs/genai langchain/libs/partners/google-genai
|
||||
mv langchain-google/libs/vertexai langchain/libs/partners/google-vertexai
|
||||
mv langchain-aws/libs/aws langchain/libs/partners/aws
|
||||
|
||||
- name: "🐍 Set up Python ${{ matrix.python-version }} + UV"
|
||||
uses: "./langchain/.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: "📦 Install Dependencies"
|
||||
# Partner packages use [tool.uv.sources] in their pyproject.toml to resolve
|
||||
# langchain-core/langchain to local editable installs, so `uv sync` automatically
|
||||
# tests against the versions from the current branch (not published releases).
|
||||
|
||||
# TODO: external google/aws don't have local resolution since they live in
|
||||
# separate repos, so they pull `core`/`langchain_v1` from PyPI. We should update
|
||||
# their dev groups to use git source dependencies pointing to the current
|
||||
# branch's latest commit SHA to fully test against local langchain changes.
|
||||
run: |
|
||||
echo "Running scheduled tests, installing dependencies with uv..."
|
||||
cd langchain/${{ matrix.working-directory }}
|
||||
uv sync --group test --group test_integration
|
||||
|
||||
- name: "🚀 Run Integration Tests"
|
||||
# WARNING: All secrets below are available to every matrix job regardless of
|
||||
# which package is being tested. This is intentional for simplicity, but means
|
||||
# any test file could technically access any key. Only use for trusted code.
|
||||
env:
|
||||
LANGCHAIN_TESTS_USER_AGENT: ${{ secrets.LANGCHAIN_TESTS_USER_AGENT }}
|
||||
|
||||
AI21_API_KEY: ${{ secrets.AI21_API_KEY }}
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
ANTHROPIC_FILES_API_IMAGE_ID: ${{ secrets.ANTHROPIC_FILES_API_IMAGE_ID }}
|
||||
ANTHROPIC_FILES_API_PDF_ID: ${{ secrets.ANTHROPIC_FILES_API_PDF_ID }}
|
||||
ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }}
|
||||
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
|
||||
ASTRA_DB_KEYSPACE: ${{ secrets.ASTRA_DB_KEYSPACE }}
|
||||
AZURE_OPENAI_API_VERSION: ${{ secrets.AZURE_OPENAI_API_VERSION }}
|
||||
AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
|
||||
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
|
||||
AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_CHAT_DEPLOYMENT_NAME }}
|
||||
AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME }}
|
||||
AZURE_OPENAI_LLM_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LLM_DEPLOYMENT_NAME }}
|
||||
AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME }}
|
||||
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
|
||||
DEEPSEEK_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }}
|
||||
ES_URL: ${{ secrets.ES_URL }}
|
||||
ES_CLOUD_ID: ${{ secrets.ES_CLOUD_ID }}
|
||||
ES_API_KEY: ${{ secrets.ES_API_KEY }}
|
||||
EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
|
||||
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
|
||||
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
GOOGLE_SEARCH_API_KEY: ${{ secrets.GOOGLE_SEARCH_API_KEY }}
|
||||
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
|
||||
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
|
||||
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
|
||||
NOMIC_API_KEY: ${{ secrets.NOMIC_API_KEY }}
|
||||
NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
|
||||
OLLAMA_API_KEY: ${{ secrets.OLLAMA_API_KEY }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
||||
PPLX_API_KEY: ${{ secrets.PPLX_API_KEY }}
|
||||
TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
|
||||
UPSTAGE_API_KEY: ${{ secrets.UPSTAGE_API_KEY }}
|
||||
WATSONX_APIKEY: ${{ secrets.WATSONX_APIKEY }}
|
||||
WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }}
|
||||
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
|
||||
run: |
|
||||
cd langchain/${{ matrix.working-directory }}
|
||||
make integration_tests
|
||||
|
||||
- name: "🧹 Clean up External Libraries"
|
||||
# Clean up external libraries to avoid affecting the following git status check
|
||||
run: |
|
||||
rm -rf \
|
||||
langchain/libs/partners/google-genai \
|
||||
langchain/libs/partners/google-vertexai \
|
||||
langchain/libs/partners/aws
|
||||
|
||||
- name: "🧹 Verify Clean Working Directory"
|
||||
working-directory: langchain
|
||||
run: |
|
||||
set -eu
|
||||
|
||||
STATUS="$(git status)"
|
||||
echo "$STATUS"
|
||||
|
||||
# grep will exit non-zero if the target message isn't found,
|
||||
# and `set -e` above will cause the step to fail.
|
||||
echo "$STATUS" | grep 'nothing to commit, working tree clean'
|
||||
|
||||
# Test dependent packages against local packages to catch breaking changes
|
||||
test-dependents:
|
||||
# Defend against forks running scheduled jobs, but allow manual runs from forks
|
||||
if: github.repository_owner == 'langchain-ai' || github.event_name != 'schedule'
|
||||
|
||||
name: "🐍 Python ${{ matrix.python-version }}: ${{ matrix.package.path }}"
|
||||
runs-on: ubuntu-latest
|
||||
needs: [compute-matrix]
|
||||
timeout-minutes: 30
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
# deepagents requires Python >= 3.11, use bounded version from compute-matrix
|
||||
python-version: ${{ fromJSON(needs.compute-matrix.outputs.python-version-min-3-11) }}
|
||||
package:
|
||||
- name: deepagents
|
||||
repo: langchain-ai/deepagents
|
||||
path: libs/deepagents
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
with:
|
||||
path: langchain
|
||||
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
with:
|
||||
repository: ${{ matrix.package.repo }}
|
||||
path: ${{ matrix.package.name }}
|
||||
|
||||
- name: "🐍 Set up Python ${{ matrix.python-version }} + UV"
|
||||
uses: "./langchain/.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: "📦 Install ${{ matrix.package.name }} with Local"
|
||||
# Unlike partner packages (which use [tool.uv.sources] for local resolution),
|
||||
# external dependents live in separate repos and need explicit overrides to
|
||||
# test against the langchain versions from the current branch, as their
|
||||
# pyproject.toml files point to released versions.
|
||||
run: |
|
||||
cd ${{ matrix.package.name }}/${{ matrix.package.path }}
|
||||
|
||||
# Install the package with test dependencies
|
||||
uv sync --group test
|
||||
|
||||
# Override langchain packages with local versions
|
||||
uv pip install \
|
||||
-e $GITHUB_WORKSPACE/langchain/libs/core \
|
||||
-e $GITHUB_WORKSPACE/langchain/libs/langchain_v1
|
||||
|
||||
# No API keys needed for now - deepagents `make test` only runs unit tests
|
||||
- name: "🚀 Run ${{ matrix.package.name }} Tests"
|
||||
run: |
|
||||
cd ${{ matrix.package.name }}/${{ matrix.package.path }}
|
||||
make test
|
||||
13
.github/workflows/langchain_cli_release.yml
vendored
Normal file
13
.github/workflows/langchain_cli_release.yml
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
---
|
||||
name: libs/cli Release
|
||||
|
||||
on:
|
||||
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
|
||||
|
||||
jobs:
|
||||
release:
|
||||
uses:
|
||||
./.github/workflows/_release.yml
|
||||
with:
|
||||
working-directory: libs/cli
|
||||
secrets: inherit
|
||||
13
.github/workflows/langchain_community_release.yml
vendored
Normal file
13
.github/workflows/langchain_community_release.yml
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
---
|
||||
name: libs/community Release
|
||||
|
||||
on:
|
||||
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
|
||||
|
||||
jobs:
|
||||
release:
|
||||
uses:
|
||||
./.github/workflows/_release.yml
|
||||
with:
|
||||
working-directory: libs/community
|
||||
secrets: inherit
|
||||
13
.github/workflows/langchain_core_release.yml
vendored
Normal file
13
.github/workflows/langchain_core_release.yml
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
---
|
||||
name: libs/core Release
|
||||
|
||||
on:
|
||||
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
|
||||
|
||||
jobs:
|
||||
release:
|
||||
uses:
|
||||
./.github/workflows/_release.yml
|
||||
with:
|
||||
working-directory: libs/core
|
||||
secrets: inherit
|
||||
13
.github/workflows/langchain_experimental_release.yml
vendored
Normal file
13
.github/workflows/langchain_experimental_release.yml
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
---
|
||||
name: libs/experimental Release
|
||||
|
||||
on:
|
||||
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
|
||||
|
||||
jobs:
|
||||
release:
|
||||
uses:
|
||||
./.github/workflows/_release.yml
|
||||
with:
|
||||
working-directory: libs/experimental
|
||||
secrets: inherit
|
||||
13
.github/workflows/langchain_experimental_test_release.yml
vendored
Normal file
13
.github/workflows/langchain_experimental_test_release.yml
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
---
|
||||
name: Experimental Test Release
|
||||
|
||||
on:
|
||||
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
|
||||
|
||||
jobs:
|
||||
release:
|
||||
uses:
|
||||
./.github/workflows/_test_release.yml
|
||||
with:
|
||||
working-directory: libs/experimental
|
||||
secrets: inherit
|
||||
13
.github/workflows/langchain_openai_release.yml
vendored
Normal file
13
.github/workflows/langchain_openai_release.yml
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
---
|
||||
name: libs/core Release
|
||||
|
||||
on:
|
||||
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
|
||||
|
||||
jobs:
|
||||
release:
|
||||
uses:
|
||||
./.github/workflows/_release.yml
|
||||
with:
|
||||
working-directory: libs/core
|
||||
secrets: inherit
|
||||
27
.github/workflows/langchain_release.yml
vendored
Normal file
27
.github/workflows/langchain_release.yml
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
---
|
||||
name: libs/langchain Release
|
||||
|
||||
on:
|
||||
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
|
||||
|
||||
jobs:
|
||||
release:
|
||||
uses:
|
||||
./.github/workflows/_release.yml
|
||||
with:
|
||||
working-directory: libs/langchain
|
||||
secrets: inherit
|
||||
|
||||
# N.B.: It's possible that PyPI doesn't make the new release visible / available
|
||||
# immediately after publishing. If that happens, the docker build might not
|
||||
# create a new docker image for the new release, since it won't see it.
|
||||
#
|
||||
# If this ends up being a problem, add a check to the end of the `_release.yml`
|
||||
# workflow that prevents the workflow from finishing until the new release
|
||||
# is visible and installable on PyPI.
|
||||
release-docker:
|
||||
needs:
|
||||
- release
|
||||
uses:
|
||||
./.github/workflows/langchain_release_docker.yml
|
||||
secrets: inherit
|
||||
14
.github/workflows/langchain_release_docker.yml
vendored
Normal file
14
.github/workflows/langchain_release_docker.yml
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
---
|
||||
name: docker/langchain/langchain Release
|
||||
|
||||
on:
|
||||
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
|
||||
workflow_call: # Allows triggering from another workflow
|
||||
|
||||
jobs:
|
||||
release:
|
||||
uses: ./.github/workflows/_release_docker.yml
|
||||
with:
|
||||
dockerfile: docker/Dockerfile.base
|
||||
image: langchain/langchain
|
||||
secrets: inherit
|
||||
13
.github/workflows/langchain_test_release.yml
vendored
Normal file
13
.github/workflows/langchain_test_release.yml
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
---
|
||||
name: Test Release
|
||||
|
||||
on:
|
||||
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
|
||||
|
||||
jobs:
|
||||
release:
|
||||
uses:
|
||||
./.github/workflows/_test_release.yml
|
||||
with:
|
||||
working-directory: libs/langchain
|
||||
secrets: inherit
|
||||
213
.github/workflows/pr_labeler.yml
vendored
213
.github/workflows/pr_labeler.yml
vendored
@@ -1,213 +0,0 @@
|
||||
# Unified PR labeler — applies size, file-based, title-based, and
|
||||
# contributor classification labels in a single sequential workflow.
|
||||
#
|
||||
# Consolidates pr_labeler_file.yml, pr_labeler_title.yml,
|
||||
# pr_size_labeler.yml, and PR-handling from tag-external-contributions.yml
|
||||
# into one workflow to eliminate race conditions from concurrent label
|
||||
# mutations. tag-external-issues.yml remains active for issue-only
|
||||
# labeling. Backfill lives in pr_labeler_backfill.yml.
|
||||
#
|
||||
# Config and shared logic live in .github/scripts/pr-labeler-config.json
|
||||
# and .github/scripts/pr-labeler.js — update those when adding partners.
|
||||
#
|
||||
# Setup Requirements:
|
||||
# 1. Create a GitHub App with permissions:
|
||||
# - Repository: Pull requests (write)
|
||||
# - Repository: Issues (write)
|
||||
# - Organization: Members (read)
|
||||
# 2. Install the app on your organization and this repository
|
||||
# 3. Add these repository secrets:
|
||||
# - ORG_MEMBERSHIP_APP_ID: Your app's ID
|
||||
# - ORG_MEMBERSHIP_APP_PRIVATE_KEY: Your app's private key
|
||||
#
|
||||
# The GitHub App token is required to check private organization membership
|
||||
# and to propagate label events to downstream workflows.
|
||||
|
||||
name: "🏷️ PR Labeler"
|
||||
|
||||
on:
|
||||
# Safe since we're not checking out or running the PR's code.
|
||||
# NEVER CHECK OUT UNTRUSTED CODE FROM A PR's HEAD IN A pull_request_target JOB.
|
||||
# Doing so would allow attackers to execute arbitrary code in the context of your repository.
|
||||
pull_request_target:
|
||||
types: [opened, synchronize, reopened, edited]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
# Separate opened events so external/tier labels are never lost to cancellation
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}-${{ github.event.action == 'opened' && 'opened' || 'update' }}
|
||||
cancel-in-progress: ${{ github.event.action != 'opened' }}
|
||||
|
||||
jobs:
|
||||
label:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
issues: write
|
||||
|
||||
steps:
|
||||
# Checks out the BASE branch (safe for pull_request_target — never
|
||||
# the PR head). Needed to load .github/scripts/pr-labeler*.
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
|
||||
- name: Generate GitHub App token
|
||||
if: github.event.action == 'opened'
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3
|
||||
with:
|
||||
app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }}
|
||||
private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }}
|
||||
|
||||
- name: Verify App token
|
||||
if: github.event.action == 'opened'
|
||||
run: |
|
||||
if [ -z "${{ steps.app-token.outputs.token }}" ]; then
|
||||
echo "::error::GitHub App token generation failed — cannot classify contributor"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Check org membership
|
||||
if: github.event.action == 'opened'
|
||||
id: check-membership
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
github-token: ${{ steps.app-token.outputs.token }}
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
|
||||
|
||||
const author = context.payload.sender.login;
|
||||
const { isExternal } = await h.checkMembership(
|
||||
author, context.payload.sender.type,
|
||||
);
|
||||
core.setOutput('is-external', isExternal ? 'true' : 'false');
|
||||
|
||||
- name: Apply PR labels
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
env:
|
||||
IS_EXTERNAL: ${{ steps.check-membership.outputs.is-external }}
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
|
||||
|
||||
const pr = context.payload.pull_request;
|
||||
if (!pr) return;
|
||||
const prNumber = pr.number;
|
||||
const action = context.payload.action;
|
||||
|
||||
const toAdd = new Set();
|
||||
const toRemove = new Set();
|
||||
|
||||
const currentLabels = (await github.paginate(
|
||||
github.rest.issues.listLabelsOnIssue,
|
||||
{ owner, repo, issue_number: prNumber, per_page: 100 },
|
||||
)).map(l => l.name ?? '');
|
||||
|
||||
// ── Size + file labels (skip on 'edited' — files unchanged) ──
|
||||
if (action !== 'edited') {
|
||||
for (const sl of h.sizeLabels) await h.ensureLabel(sl);
|
||||
|
||||
const files = await github.paginate(github.rest.pulls.listFiles, {
|
||||
owner, repo, pull_number: prNumber, per_page: 100,
|
||||
});
|
||||
|
||||
const { totalChanged, sizeLabel } = h.computeSize(files);
|
||||
toAdd.add(sizeLabel);
|
||||
for (const sl of h.sizeLabels) {
|
||||
if (currentLabels.includes(sl) && sl !== sizeLabel) toRemove.add(sl);
|
||||
}
|
||||
console.log(`Size: ${totalChanged} changed lines → ${sizeLabel}`);
|
||||
|
||||
for (const label of h.matchFileLabels(files)) {
|
||||
toAdd.add(label);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Title-based labels ──
|
||||
const { labels: titleLabels, typeLabel } = h.matchTitleLabels(pr.title || '');
|
||||
for (const label of titleLabels) toAdd.add(label);
|
||||
|
||||
// Remove stale type labels only when a type was detected
|
||||
if (typeLabel) {
|
||||
for (const tl of h.allTypeLabels) {
|
||||
if (currentLabels.includes(tl) && !titleLabels.has(tl)) toRemove.add(tl);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Internal label (only on open, non-external contributors) ──
|
||||
// IS_EXTERNAL is empty string on non-opened events (step didn't
|
||||
// run), so this guard is only true for opened + internal.
|
||||
if (action === 'opened' && process.env.IS_EXTERNAL === 'false') {
|
||||
toAdd.add('internal');
|
||||
}
|
||||
|
||||
// ── Apply changes ──
|
||||
// Ensure all labels we're about to add exist (addLabels returns
|
||||
// 422 if any label in the batch is missing, which would prevent
|
||||
// ALL labels from being applied).
|
||||
for (const name of toAdd) {
|
||||
await h.ensureLabel(name);
|
||||
}
|
||||
|
||||
for (const name of toRemove) {
|
||||
if (toAdd.has(name)) continue;
|
||||
try {
|
||||
await github.rest.issues.removeLabel({
|
||||
owner, repo, issue_number: prNumber, name,
|
||||
});
|
||||
} catch (e) {
|
||||
if (e.status !== 404) throw e;
|
||||
}
|
||||
}
|
||||
|
||||
const addList = [...toAdd];
|
||||
if (addList.length > 0) {
|
||||
await github.rest.issues.addLabels({
|
||||
owner, repo, issue_number: prNumber, labels: addList,
|
||||
});
|
||||
}
|
||||
|
||||
const removed = [...toRemove].filter(r => !toAdd.has(r));
|
||||
console.log(`PR #${prNumber}: +[${addList.join(', ')}] -[${removed.join(', ')}]`);
|
||||
|
||||
# Apply tier label BEFORE the external label so that
|
||||
# "trusted-contributor" is already present when the "external" labeled
|
||||
# event fires and triggers require_issue_link.yml.
|
||||
- name: Apply contributor tier label
|
||||
if: github.event.action == 'opened' && steps.check-membership.outputs.is-external == 'true'
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
github-token: ${{ steps.app-token.outputs.token }}
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
|
||||
|
||||
const pr = context.payload.pull_request;
|
||||
await h.applyTierLabel(pr.number, pr.user.login);
|
||||
|
||||
- name: Add external label
|
||||
if: github.event.action == 'opened' && steps.check-membership.outputs.is-external == 'true'
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
# Use App token so the "labeled" event propagates to downstream
|
||||
# workflows (e.g. require_issue_link.yml). Events created by the
|
||||
# default GITHUB_TOKEN do not trigger additional workflow runs.
|
||||
github-token: ${{ steps.app-token.outputs.token }}
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const prNumber = context.payload.pull_request.number;
|
||||
|
||||
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
|
||||
|
||||
await h.ensureLabel('external');
|
||||
await github.rest.issues.addLabels({
|
||||
owner, repo,
|
||||
issue_number: prNumber,
|
||||
labels: ['external'],
|
||||
});
|
||||
console.log(`Added 'external' label to PR #${prNumber}`);
|
||||
130
.github/workflows/pr_labeler_backfill.yml
vendored
130
.github/workflows/pr_labeler_backfill.yml
vendored
@@ -1,130 +0,0 @@
|
||||
# Backfill PR labels on all open PRs.
|
||||
#
|
||||
# Manual-only workflow that applies the same labels as pr_labeler.yml
|
||||
# (size, file, title, contributor classification) to existing open PRs.
|
||||
# Reuses shared logic from .github/scripts/pr-labeler.js.
|
||||
|
||||
name: "🏷️ PR Labeler Backfill"
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
max_items:
|
||||
description: "Maximum number of open PRs to process"
|
||||
default: "100"
|
||||
type: string
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
backfill:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
issues: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
|
||||
- name: Generate GitHub App token
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3
|
||||
with:
|
||||
app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }}
|
||||
private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }}
|
||||
|
||||
- name: Backfill labels on open PRs
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
github-token: ${{ steps.app-token.outputs.token }}
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const rawMax = '${{ inputs.max_items }}';
|
||||
const maxItems = parseInt(rawMax, 10);
|
||||
if (isNaN(maxItems) || maxItems <= 0) {
|
||||
core.setFailed(`Invalid max_items: "${rawMax}" — must be a positive integer`);
|
||||
return;
|
||||
}
|
||||
|
||||
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
|
||||
|
||||
for (const name of [...h.sizeLabels, ...h.tierLabels]) {
|
||||
await h.ensureLabel(name);
|
||||
}
|
||||
|
||||
const contributorCache = new Map();
|
||||
const fileRules = h.buildFileRules();
|
||||
|
||||
const prs = await github.paginate(github.rest.pulls.list, {
|
||||
owner, repo, state: 'open', per_page: 100,
|
||||
});
|
||||
|
||||
let processed = 0;
|
||||
let failures = 0;
|
||||
for (const pr of prs) {
|
||||
if (processed >= maxItems) break;
|
||||
try {
|
||||
const author = pr.user.login;
|
||||
const info = await h.getContributorInfo(contributorCache, author, pr.user.type);
|
||||
const labels = new Set();
|
||||
|
||||
labels.add(info.isExternal ? 'external' : 'internal');
|
||||
if (info.isExternal && info.mergedCount != null && info.mergedCount >= h.trustedThreshold) {
|
||||
labels.add('trusted-contributor');
|
||||
} else if (info.isExternal && info.mergedCount === 0) {
|
||||
labels.add('new-contributor');
|
||||
}
|
||||
|
||||
// Size + file labels
|
||||
const files = await github.paginate(github.rest.pulls.listFiles, {
|
||||
owner, repo, pull_number: pr.number, per_page: 100,
|
||||
});
|
||||
const { sizeLabel } = h.computeSize(files);
|
||||
labels.add(sizeLabel);
|
||||
|
||||
for (const label of h.matchFileLabels(files, fileRules)) {
|
||||
labels.add(label);
|
||||
}
|
||||
|
||||
// Title labels
|
||||
const { labels: titleLabels } = h.matchTitleLabels(pr.title ?? '');
|
||||
for (const tl of titleLabels) labels.add(tl);
|
||||
|
||||
// Ensure all labels exist before batch add
|
||||
for (const name of labels) {
|
||||
await h.ensureLabel(name);
|
||||
}
|
||||
|
||||
// Remove stale managed labels
|
||||
const currentLabels = (await github.paginate(
|
||||
github.rest.issues.listLabelsOnIssue,
|
||||
{ owner, repo, issue_number: pr.number, per_page: 100 },
|
||||
)).map(l => l.name ?? '');
|
||||
|
||||
const managed = [...h.sizeLabels, ...h.tierLabels, ...h.allTypeLabels];
|
||||
for (const name of currentLabels) {
|
||||
if (managed.includes(name) && !labels.has(name)) {
|
||||
try {
|
||||
await github.rest.issues.removeLabel({
|
||||
owner, repo, issue_number: pr.number, name,
|
||||
});
|
||||
} catch (e) {
|
||||
if (e.status !== 404) throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
await github.rest.issues.addLabels({
|
||||
owner, repo, issue_number: pr.number, labels: [...labels],
|
||||
});
|
||||
console.log(`PR #${pr.number} (${author}): ${[...labels].join(', ')}`);
|
||||
processed++;
|
||||
} catch (e) {
|
||||
failures++;
|
||||
core.warning(`Failed to process PR #${pr.number}: ${e.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\nBackfill complete. Processed ${processed} PRs, ${failures} failures. ${contributorCache.size} unique authors.`);
|
||||
128
.github/workflows/pr_lint.yml
vendored
128
.github/workflows/pr_lint.yml
vendored
@@ -1,128 +0,0 @@
|
||||
# PR title linting.
|
||||
#
|
||||
# FORMAT (Conventional Commits 1.0.0):
|
||||
#
|
||||
# <type>[optional scope]: <description>
|
||||
# [optional body]
|
||||
# [optional footer(s)]
|
||||
#
|
||||
# Examples:
|
||||
# feat(core): add multi‐tenant support
|
||||
# fix(langchain): resolve error
|
||||
# docs: update API usage examples
|
||||
# docs(openai): update API usage examples
|
||||
#
|
||||
# Allowed Types:
|
||||
# * feat — a new feature (MINOR)
|
||||
# * fix — a bug fix (PATCH)
|
||||
# * docs — documentation only changes
|
||||
# * style — formatting, linting, etc.; no code change or typing refactors
|
||||
# * refactor — code change that neither fixes a bug nor adds a feature
|
||||
# * perf — code change that improves performance
|
||||
# * test — adding tests or correcting existing
|
||||
# * build — changes that affect the build system/external dependencies
|
||||
# * ci — continuous integration/configuration changes
|
||||
# * chore — other changes that don't modify source or test files
|
||||
# * revert — reverts a previous commit
|
||||
# * release — prepare a new release
|
||||
# * hotfix — urgent fix
|
||||
#
|
||||
# Allowed Scope(s) (optional):
|
||||
# core, langchain, langchain-classic, model-profiles,
|
||||
# standard-tests, text-splitters, docs, anthropic, chroma, deepseek, exa,
|
||||
# fireworks, groq, huggingface, mistralai, nomic, ollama, openai,
|
||||
# perplexity, qdrant, xai, infra, deps, partners
|
||||
#
|
||||
# Multiple scopes can be used by separating them with a comma. For example:
|
||||
#
|
||||
# feat(core,langchain): add multi‐tenant support to core and langchain
|
||||
#
|
||||
# Note: PRs touching the langchain package should use the 'langchain' scope. It is not
|
||||
# acceptable to omit the scope for changes to the langchain package, despite it being
|
||||
# the main package & name of the repo.
|
||||
#
|
||||
# Rules:
|
||||
# 1. The 'Type' must start with a lowercase letter.
|
||||
# 2. Breaking changes: append "!" after type/scope (e.g., feat!: drop x support)
|
||||
# 3. When releasing (updating the pyproject.toml and uv.lock), the commit message
|
||||
# should be: `release(scope): x.y.z` (e.g., `release(core): 1.2.0` with no
|
||||
# body, footer, or preceeding/proceeding text).
|
||||
#
|
||||
# Enforces Conventional Commits format for pull request titles to maintain a clear and
|
||||
# machine-readable change history.
|
||||
|
||||
name: "🏷️ PR Title Lint"
|
||||
|
||||
permissions:
|
||||
pull-requests: read
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, edited, synchronize]
|
||||
|
||||
jobs:
|
||||
# Validates that PR title follows Conventional Commits 1.0.0 specification
|
||||
lint-pr-title:
|
||||
name: "validate format"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: "🚫 Reject empty scope"
|
||||
env:
|
||||
PR_TITLE: ${{ github.event.pull_request.title }}
|
||||
run: |
|
||||
if [[ "$PR_TITLE" =~ ^[a-z]+\(\)[!]?: ]]; then
|
||||
echo "::error::PR title has empty scope parentheses: '$PR_TITLE'"
|
||||
echo "Either remove the parentheses or provide a scope (e.g., 'fix(core): ...')."
|
||||
exit 1
|
||||
fi
|
||||
- name: "✅ Validate Conventional Commits Format"
|
||||
uses: amannn/action-semantic-pull-request@48f256284bd46cdaab1048c3721360e808335d50 # v6
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
types: |
|
||||
feat
|
||||
fix
|
||||
docs
|
||||
style
|
||||
refactor
|
||||
perf
|
||||
test
|
||||
build
|
||||
ci
|
||||
chore
|
||||
revert
|
||||
release
|
||||
hotfix
|
||||
scopes: |
|
||||
core
|
||||
langchain
|
||||
langchain-classic
|
||||
model-profiles
|
||||
standard-tests
|
||||
text-splitters
|
||||
docs
|
||||
anthropic
|
||||
chroma
|
||||
deepseek
|
||||
exa
|
||||
fireworks
|
||||
groq
|
||||
huggingface
|
||||
mistralai
|
||||
nomic
|
||||
ollama
|
||||
openai
|
||||
openrouter
|
||||
perplexity
|
||||
qdrant
|
||||
xai
|
||||
infra
|
||||
deps
|
||||
partners
|
||||
requireScope: false
|
||||
disallowScopes: |
|
||||
release
|
||||
[A-Z]+
|
||||
ignoreLabels: |
|
||||
ignore-lint-pr-title
|
||||
45
.github/workflows/refresh_model_profiles.yml
vendored
45
.github/workflows/refresh_model_profiles.yml
vendored
@@ -1,45 +0,0 @@
|
||||
# Refreshes model profile data for all in-monorepo partner integrations by
|
||||
# pulling the latest metadata from models.dev via the `langchain-profiles` CLI.
|
||||
#
|
||||
# Creates a pull request with any changes. Runs daily and can be triggered
|
||||
# manually from the Actions UI. Uses a fixed branch so each run supersedes
|
||||
# any stale PR from a previous run.
|
||||
|
||||
name: "🔄 Refresh Model Profiles"
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 8 * * *" # daily at 08:00 UTC
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
|
||||
jobs:
|
||||
refresh-profiles:
|
||||
uses: ./.github/workflows/_refresh_model_profiles.yml
|
||||
with:
|
||||
providers: >-
|
||||
[
|
||||
{"provider":"anthropic", "data_dir":"libs/partners/anthropic/langchain_anthropic/data"},
|
||||
{"provider":"deepseek", "data_dir":"libs/partners/deepseek/langchain_deepseek/data"},
|
||||
{"provider":"fireworks-ai", "data_dir":"libs/partners/fireworks/langchain_fireworks/data"},
|
||||
{"provider":"groq", "data_dir":"libs/partners/groq/langchain_groq/data"},
|
||||
{"provider":"huggingface", "data_dir":"libs/partners/huggingface/langchain_huggingface/data"},
|
||||
{"provider":"mistral", "data_dir":"libs/partners/mistralai/langchain_mistralai/data"},
|
||||
{"provider":"openai", "data_dir":"libs/partners/openai/langchain_openai/data"},
|
||||
{"provider":"openrouter", "data_dir":"libs/partners/openrouter/langchain_openrouter/data"},
|
||||
{"provider":"perplexity", "data_dir":"libs/partners/perplexity/langchain_perplexity/data"},
|
||||
{"provider":"xai", "data_dir":"libs/partners/xai/langchain_xai/data"}
|
||||
]
|
||||
cli-path: libs/model-profiles
|
||||
add-paths: libs/partners/**/data/_profiles.py
|
||||
pr-body: |
|
||||
Automated refresh of model profile data for all in-monorepo partner
|
||||
integrations via `langchain-profiles refresh`.
|
||||
|
||||
🤖 Generated by the `refresh_model_profiles` workflow.
|
||||
secrets:
|
||||
MODEL_PROFILE_BOT_APP_ID: ${{ secrets.MODEL_PROFILE_BOT_APP_ID }}
|
||||
MODEL_PROFILE_BOT_PRIVATE_KEY: ${{ secrets.MODEL_PROFILE_BOT_PRIVATE_KEY }}
|
||||
195
.github/workflows/reopen_on_assignment.yml
vendored
195
.github/workflows/reopen_on_assignment.yml
vendored
@@ -1,195 +0,0 @@
|
||||
# Reopen PRs that were auto-closed by require_issue_link.yml when the
|
||||
# contributor was not assigned to the linked issue. When a maintainer
|
||||
# assigns the contributor to the issue, this workflow finds matching
|
||||
# closed PRs, verifies the issue link, and reopens them.
|
||||
#
|
||||
# Uses the default GITHUB_TOKEN (not a PAT or app token) so that the
|
||||
# reopen and label-removal events do NOT re-trigger other workflows.
|
||||
# GitHub suppresses events created by the default GITHUB_TOKEN within
|
||||
# workflow runs to prevent infinite loops.
|
||||
|
||||
name: Reopen PR on Issue Assignment
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [assigned]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
reopen-linked-prs:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
actions: write
|
||||
pull-requests: write
|
||||
|
||||
steps:
|
||||
- name: Find and reopen matching PRs
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const issueNumber = context.payload.issue.number;
|
||||
const assignee = context.payload.assignee.login;
|
||||
|
||||
console.log(
|
||||
`Issue #${issueNumber} assigned to ${assignee} — searching for closed PRs to reopen`,
|
||||
);
|
||||
|
||||
const q = [
|
||||
`is:pr`,
|
||||
`is:closed`,
|
||||
`author:${assignee}`,
|
||||
`label:missing-issue-link`,
|
||||
`repo:${owner}/${repo}`,
|
||||
].join(' ');
|
||||
|
||||
let data;
|
||||
try {
|
||||
({ data } = await github.rest.search.issuesAndPullRequests({
|
||||
q,
|
||||
per_page: 30,
|
||||
}));
|
||||
} catch (e) {
|
||||
throw new Error(
|
||||
`Failed to search for closed PRs to reopen after assigning ${assignee} ` +
|
||||
`to #${issueNumber} (HTTP ${e.status ?? 'unknown'}): ${e.message}`,
|
||||
);
|
||||
}
|
||||
|
||||
if (data.total_count === 0) {
|
||||
console.log('No matching closed PRs found');
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Found ${data.total_count} candidate PR(s)`);
|
||||
|
||||
// Must stay in sync with the identical pattern in require_issue_link.yml
|
||||
const pattern = /(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)\s*#(\d+)/gi;
|
||||
|
||||
for (const item of data.items) {
|
||||
const prNumber = item.number;
|
||||
const body = item.body || '';
|
||||
const matches = [...body.matchAll(pattern)];
|
||||
const referencedIssues = matches.map(m => parseInt(m[1], 10));
|
||||
|
||||
if (!referencedIssues.includes(issueNumber)) {
|
||||
console.log(`PR #${prNumber} does not reference #${issueNumber} — skipping`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip if already bypassed
|
||||
const labels = item.labels.map(l => l.name);
|
||||
if (labels.includes('bypass-issue-check')) {
|
||||
console.log(`PR #${prNumber} already has bypass-issue-check — skipping`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Reopen first, remove label second — a closed PR that still has
|
||||
// missing-issue-link is recoverable; a closed PR with the label
|
||||
// stripped is invisible to both workflows.
|
||||
try {
|
||||
await github.rest.pulls.update({
|
||||
owner,
|
||||
repo,
|
||||
pull_number: prNumber,
|
||||
state: 'open',
|
||||
});
|
||||
console.log(`Reopened PR #${prNumber}`);
|
||||
} catch (e) {
|
||||
if (e.status === 422) {
|
||||
// Head branch deleted — PR is unrecoverable. Notify the
|
||||
// contributor so they know to open a new PR.
|
||||
core.warning(`Cannot reopen PR #${prNumber}: head branch was likely deleted`);
|
||||
try {
|
||||
await github.rest.issues.createComment({
|
||||
owner,
|
||||
repo,
|
||||
issue_number: prNumber,
|
||||
body:
|
||||
`You have been assigned to #${issueNumber}, but this PR could not be ` +
|
||||
`reopened because the head branch has been deleted. Please open a new ` +
|
||||
`PR referencing the issue.`,
|
||||
});
|
||||
} catch (commentErr) {
|
||||
core.warning(
|
||||
`Also failed to post comment on PR #${prNumber}: ${commentErr.message}`,
|
||||
);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// Transient errors (rate limit, 5xx) should fail the job so
|
||||
// the label is NOT removed and the run can be retried.
|
||||
throw e;
|
||||
}
|
||||
|
||||
// Remove missing-issue-link label only after successful reopen
|
||||
try {
|
||||
await github.rest.issues.removeLabel({
|
||||
owner,
|
||||
repo,
|
||||
issue_number: prNumber,
|
||||
name: 'missing-issue-link',
|
||||
});
|
||||
console.log(`Removed missing-issue-link from PR #${prNumber}`);
|
||||
} catch (e) {
|
||||
if (e.status !== 404) throw e;
|
||||
}
|
||||
|
||||
// Minimize stale enforcement comment (best-effort;
|
||||
// sync w/ require_issue_link.yml minimize blocks)
|
||||
try {
|
||||
const marker = '<!-- require-issue-link -->';
|
||||
const comments = await github.paginate(
|
||||
github.rest.issues.listComments,
|
||||
{ owner, repo, issue_number: prNumber, per_page: 100 },
|
||||
);
|
||||
const stale = comments.find(c => c.body && c.body.includes(marker));
|
||||
if (stale) {
|
||||
await github.graphql(`
|
||||
mutation($id: ID!) {
|
||||
minimizeComment(input: {subjectId: $id, classifier: OUTDATED}) {
|
||||
minimizedComment { isMinimized }
|
||||
}
|
||||
}
|
||||
`, { id: stale.node_id });
|
||||
console.log(`Minimized stale enforcement comment ${stale.id} as outdated`);
|
||||
}
|
||||
} catch (e) {
|
||||
core.warning(`Could not minimize stale comment on PR #${prNumber}: ${e.message}`);
|
||||
}
|
||||
|
||||
// Re-run the failed require_issue_link check so it picks up the
|
||||
// new assignment. The re-run uses the original event payload but
|
||||
// fetches live issue data, so the assignment check will pass.
|
||||
//
|
||||
// Limitation: we look up runs by the PR's current head SHA. If the
|
||||
// contributor pushed new commits while the PR was closed, head.sha
|
||||
// won't match the SHA of the original failed run and the query will
|
||||
// return 0 results. This is acceptable because any push after reopen
|
||||
// triggers a fresh require_issue_link run against the new SHA.
|
||||
try {
|
||||
const { data: pr } = await github.rest.pulls.get({
|
||||
owner, repo, pull_number: prNumber,
|
||||
});
|
||||
const { data: runs } = await github.rest.actions.listWorkflowRuns({
|
||||
owner, repo,
|
||||
workflow_id: 'require_issue_link.yml',
|
||||
head_sha: pr.head.sha,
|
||||
status: 'failure',
|
||||
per_page: 1,
|
||||
});
|
||||
if (runs.workflow_runs.length > 0) {
|
||||
await github.rest.actions.reRunWorkflowFailedJobs({
|
||||
owner, repo,
|
||||
run_id: runs.workflow_runs[0].id,
|
||||
});
|
||||
console.log(`Re-ran failed require_issue_link run ${runs.workflow_runs[0].id} for PR #${prNumber}`);
|
||||
} else {
|
||||
console.log(`No failed require_issue_link runs found for PR #${prNumber} — skipping re-run`);
|
||||
}
|
||||
} catch (e) {
|
||||
core.warning(`Could not re-run require_issue_link check for PR #${prNumber} (HTTP ${e.status ?? 'unknown'}): ${e.message}`);
|
||||
}
|
||||
}
|
||||
467
.github/workflows/require_issue_link.yml
vendored
467
.github/workflows/require_issue_link.yml
vendored
@@ -1,467 +0,0 @@
|
||||
# Require external PRs to reference an approved issue (e.g. Fixes #NNN) and
|
||||
# the PR author to be assigned to that issue. On failure the PR is
|
||||
# labeled "missing-issue-link", commented on, and closed.
|
||||
#
|
||||
# Maintainer override: an org member can reopen the PR or remove
|
||||
# "missing-issue-link" — both add "bypass-issue-check" and reopen.
|
||||
#
|
||||
# Dependency: pr_labeler.yml must apply the "external" label first. This
|
||||
# workflow does NOT trigger on "opened" (new PRs have no labels yet, so the
|
||||
# gate would always skip).
|
||||
|
||||
name: Require Issue Link
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
# NEVER CHECK OUT UNTRUSTED CODE FROM A PR's HEAD IN A pull_request_target JOB.
|
||||
# Doing so would allow attackers to execute arbitrary code in the context of your repository.
|
||||
types: [edited, reopened, labeled, unlabeled]
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
# Enforcement gate: set to 'true' to activate the issue link requirement.
|
||||
# When 'false', the workflow still runs the check logic (useful for dry-run
|
||||
# visibility) but will NOT label, comment, close, or fail PRs.
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
env:
|
||||
ENFORCE_ISSUE_LINK: "true"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
check-issue-link:
|
||||
# Run when the "external" label is added, on edit/reopen if already labeled,
|
||||
# or when "missing-issue-link" is removed (triggers maintainer override check).
|
||||
# Skip entirely when the PR already carries "trusted-contributor" or
|
||||
# "bypass-issue-check".
|
||||
if: >-
|
||||
!contains(github.event.pull_request.labels.*.name, 'trusted-contributor') &&
|
||||
!contains(github.event.pull_request.labels.*.name, 'bypass-issue-check') &&
|
||||
(
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'external') ||
|
||||
(github.event.action == 'unlabeled' && github.event.label.name == 'missing-issue-link' && contains(github.event.pull_request.labels.*.name, 'external')) ||
|
||||
(github.event.action != 'labeled' && github.event.action != 'unlabeled' && contains(github.event.pull_request.labels.*.name, 'external'))
|
||||
)
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
actions: write
|
||||
pull-requests: write
|
||||
|
||||
steps:
|
||||
- name: Check for issue link and assignee
|
||||
id: check-link
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const prNumber = context.payload.pull_request.number;
|
||||
const action = context.payload.action;
|
||||
|
||||
// ── Helper: ensure a label exists, then add it to the PR ────────
|
||||
async function ensureAndAddLabel(labelName, color) {
|
||||
try {
|
||||
await github.rest.issues.getLabel({ owner, repo, name: labelName });
|
||||
} catch (e) {
|
||||
if (e.status !== 404) throw e;
|
||||
try {
|
||||
await github.rest.issues.createLabel({ owner, repo, name: labelName, color });
|
||||
} catch (createErr) {
|
||||
// 422 = label was created by a concurrent run between our
|
||||
// GET and POST — safe to ignore.
|
||||
if (createErr.status !== 422) throw createErr;
|
||||
}
|
||||
}
|
||||
await github.rest.issues.addLabels({
|
||||
owner, repo, issue_number: prNumber, labels: [labelName],
|
||||
});
|
||||
}
|
||||
|
||||
// ── Helper: check if the user who triggered this event (reopened
|
||||
// the PR / removed the label) has write+ access on the repo ───
|
||||
// Uses the repo collaborator permission endpoint instead of the
|
||||
// org membership endpoint. The org endpoint requires the caller
|
||||
// to be an org member, which GITHUB_TOKEN (an app installation
|
||||
// token) never is — so it always returns 403.
|
||||
async function senderIsOrgMember() {
|
||||
const sender = context.payload.sender?.login;
|
||||
if (!sender) {
|
||||
throw new Error('Event has no sender — cannot check permissions');
|
||||
}
|
||||
try {
|
||||
const { data } = await github.rest.repos.getCollaboratorPermissionLevel({
|
||||
owner, repo, username: sender,
|
||||
});
|
||||
const perm = data.permission;
|
||||
if (['admin', 'maintain', 'write'].includes(perm)) {
|
||||
console.log(`${sender} has ${perm} permission — treating as maintainer`);
|
||||
return { isMember: true, login: sender };
|
||||
}
|
||||
console.log(`${sender} has ${perm} permission — not a maintainer`);
|
||||
return { isMember: false, login: sender };
|
||||
} catch (e) {
|
||||
if (e.status === 404) {
|
||||
console.log(`Cannot check permissions for ${sender} — treating as non-maintainer`);
|
||||
return { isMember: false, login: sender };
|
||||
}
|
||||
const status = e.status ?? 'unknown';
|
||||
throw new Error(
|
||||
`Permission check failed for ${sender} (HTTP ${status}): ${e.message}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Helper: apply maintainer bypass (shared by both override paths) ──
|
||||
async function applyMaintainerBypass(reason) {
|
||||
console.log(reason);
|
||||
|
||||
// Remove missing-issue-link if present
|
||||
try {
|
||||
await github.rest.issues.removeLabel({
|
||||
owner, repo, issue_number: prNumber, name: 'missing-issue-link',
|
||||
});
|
||||
} catch (e) {
|
||||
if (e.status !== 404) throw e;
|
||||
}
|
||||
|
||||
// Reopen before adding bypass label — a failed reopen is more
|
||||
// actionable than a closed PR with a bypass label stuck on it.
|
||||
if (context.payload.pull_request.state === 'closed') {
|
||||
try {
|
||||
await github.rest.pulls.update({
|
||||
owner, repo, pull_number: prNumber, state: 'open',
|
||||
});
|
||||
console.log(`Reopened PR #${prNumber}`);
|
||||
} catch (e) {
|
||||
// 422 if head branch deleted; 403 if permissions insufficient.
|
||||
// Bypass labels still apply — maintainer can reopen manually.
|
||||
core.warning(
|
||||
`Could not reopen PR #${prNumber} (HTTP ${e.status ?? 'unknown'}): ${e.message}. ` +
|
||||
`Bypass labels were applied — a maintainer may need to reopen manually.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Add bypass-issue-check so future triggers skip enforcement
|
||||
await ensureAndAddLabel('bypass-issue-check', '0e8a16');
|
||||
|
||||
// Minimize stale enforcement comment (best-effort; must not
|
||||
// abort bypass — sync w/ reopen_on_assignment.yml & step below)
|
||||
try {
|
||||
const marker = '<!-- require-issue-link -->';
|
||||
const comments = await github.paginate(
|
||||
github.rest.issues.listComments,
|
||||
{ owner, repo, issue_number: prNumber, per_page: 100 },
|
||||
);
|
||||
const stale = comments.find(c => c.body && c.body.includes(marker));
|
||||
if (stale) {
|
||||
await github.graphql(`
|
||||
mutation($id: ID!) {
|
||||
minimizeComment(input: {subjectId: $id, classifier: OUTDATED}) {
|
||||
minimizedComment { isMinimized }
|
||||
}
|
||||
}
|
||||
`, { id: stale.node_id });
|
||||
console.log(`Minimized stale enforcement comment ${stale.id} as outdated`);
|
||||
}
|
||||
} catch (e) {
|
||||
core.warning(`Could not minimize stale comment on PR #${prNumber}: ${e.message}`);
|
||||
}
|
||||
|
||||
core.setOutput('has-link', 'true');
|
||||
core.setOutput('is-assigned', 'true');
|
||||
}
|
||||
|
||||
// ── Maintainer override: removed "missing-issue-link" label ─────
|
||||
if (action === 'unlabeled') {
|
||||
const { isMember, login } = await senderIsOrgMember();
|
||||
if (isMember) {
|
||||
await applyMaintainerBypass(
|
||||
`Maintainer ${login} removed missing-issue-link from PR #${prNumber} — bypassing enforcement`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
// Non-member removed the label — re-add it defensively and
|
||||
// set failure outputs so downstream steps (comment, close) fire.
|
||||
// NOTE: addLabels fires a "labeled" event, but the job-level gate
|
||||
// only matches labeled events for "external", so no re-trigger.
|
||||
console.log(`Non-member ${login} removed missing-issue-link — re-adding`);
|
||||
try {
|
||||
await ensureAndAddLabel('missing-issue-link', 'b76e79');
|
||||
} catch (e) {
|
||||
core.warning(
|
||||
`Failed to re-add missing-issue-link (HTTP ${e.status ?? 'unknown'}): ${e.message}. ` +
|
||||
`Downstream step will retry.`,
|
||||
);
|
||||
}
|
||||
core.setOutput('has-link', 'false');
|
||||
core.setOutput('is-assigned', 'false');
|
||||
return;
|
||||
}
|
||||
|
||||
// ── Maintainer override: reopened PR with "missing-issue-link" ──
|
||||
const prLabels = context.payload.pull_request.labels.map(l => l.name);
|
||||
if (action === 'reopened' && prLabels.includes('missing-issue-link')) {
|
||||
const { isMember, login } = await senderIsOrgMember();
|
||||
if (isMember) {
|
||||
await applyMaintainerBypass(
|
||||
`Maintainer ${login} reopened PR #${prNumber} — bypassing enforcement`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
console.log(`Non-member ${login} reopened PR — proceeding with check`);
|
||||
}
|
||||
|
||||
// ── Fetch live labels (race guard) ──────────────────────────────
|
||||
const { data: liveLabels } = await github.rest.issues.listLabelsOnIssue({
|
||||
owner, repo, issue_number: prNumber,
|
||||
});
|
||||
const liveNames = liveLabels.map(l => l.name);
|
||||
if (liveNames.includes('trusted-contributor') || liveNames.includes('bypass-issue-check')) {
|
||||
console.log('PR has trusted-contributor or bypass-issue-check label — bypassing');
|
||||
core.setOutput('has-link', 'true');
|
||||
core.setOutput('is-assigned', 'true');
|
||||
return;
|
||||
}
|
||||
|
||||
const body = context.payload.pull_request.body || '';
|
||||
const pattern = /(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)\s*#(\d+)/gi;
|
||||
const matches = [...body.matchAll(pattern)];
|
||||
|
||||
if (matches.length === 0) {
|
||||
console.log('No issue link found in PR body');
|
||||
core.setOutput('has-link', 'false');
|
||||
core.setOutput('is-assigned', 'false');
|
||||
return;
|
||||
}
|
||||
|
||||
const issues = matches.map(m => `#${m[1]}`).join(', ');
|
||||
console.log(`Found issue link(s): ${issues}`);
|
||||
core.setOutput('has-link', 'true');
|
||||
|
||||
// Check whether the PR author is assigned to at least one linked issue
|
||||
const prAuthor = context.payload.pull_request.user.login;
|
||||
const MAX_ISSUES = 5;
|
||||
const allIssueNumbers = [...new Set(matches.map(m => parseInt(m[1], 10)))];
|
||||
const issueNumbers = allIssueNumbers.slice(0, MAX_ISSUES);
|
||||
if (allIssueNumbers.length > MAX_ISSUES) {
|
||||
core.warning(
|
||||
`PR references ${allIssueNumbers.length} issues — only checking the first ${MAX_ISSUES}`,
|
||||
);
|
||||
}
|
||||
|
||||
let assignedToAny = false;
|
||||
for (const num of issueNumbers) {
|
||||
try {
|
||||
const { data: issue } = await github.rest.issues.get({
|
||||
owner, repo, issue_number: num,
|
||||
});
|
||||
const assignees = issue.assignees.map(a => a.login.toLowerCase());
|
||||
if (assignees.includes(prAuthor.toLowerCase())) {
|
||||
console.log(`PR author "${prAuthor}" is assigned to #${num}`);
|
||||
assignedToAny = true;
|
||||
break;
|
||||
} else {
|
||||
console.log(`PR author "${prAuthor}" is NOT assigned to #${num} (assignees: ${assignees.join(', ') || 'none'})`);
|
||||
}
|
||||
} catch (error) {
|
||||
if (error.status === 404) {
|
||||
console.log(`Issue #${num} not found — skipping`);
|
||||
} else {
|
||||
// Non-404 errors (rate limit, server error) must not be
|
||||
// silently skipped — they could cause false enforcement
|
||||
// (closing a legitimate PR whose assignment can't be verified).
|
||||
throw new Error(
|
||||
`Cannot verify assignee for issue #${num} (${error.status}): ${error.message}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
core.setOutput('is-assigned', assignedToAny ? 'true' : 'false');
|
||||
|
||||
- name: Add missing-issue-link label
|
||||
if: >-
|
||||
env.ENFORCE_ISSUE_LINK == 'true' &&
|
||||
(steps.check-link.outputs.has-link != 'true' || steps.check-link.outputs.is-assigned != 'true')
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const prNumber = context.payload.pull_request.number;
|
||||
const labelName = 'missing-issue-link';
|
||||
|
||||
// Ensure the label exists (no checkout/shared helper available)
|
||||
try {
|
||||
await github.rest.issues.getLabel({ owner, repo, name: labelName });
|
||||
} catch (e) {
|
||||
if (e.status !== 404) throw e;
|
||||
try {
|
||||
await github.rest.issues.createLabel({
|
||||
owner, repo, name: labelName, color: 'b76e79',
|
||||
});
|
||||
} catch (createErr) {
|
||||
if (createErr.status !== 422) throw createErr;
|
||||
}
|
||||
}
|
||||
|
||||
await github.rest.issues.addLabels({
|
||||
owner, repo, issue_number: prNumber, labels: [labelName],
|
||||
});
|
||||
|
||||
- name: Remove missing-issue-link label and reopen PR
|
||||
if: >-
|
||||
env.ENFORCE_ISSUE_LINK == 'true' &&
|
||||
steps.check-link.outputs.has-link == 'true' && steps.check-link.outputs.is-assigned == 'true'
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const prNumber = context.payload.pull_request.number;
|
||||
try {
|
||||
await github.rest.issues.removeLabel({
|
||||
owner, repo, issue_number: prNumber, name: 'missing-issue-link',
|
||||
});
|
||||
} catch (error) {
|
||||
if (error.status !== 404) throw error;
|
||||
}
|
||||
|
||||
// Reopen if this workflow previously closed the PR. We check the
|
||||
// event payload labels (not live labels) because we already removed
|
||||
// missing-issue-link above; the payload still reflects pre-step state.
|
||||
const labels = context.payload.pull_request.labels.map(l => l.name);
|
||||
if (context.payload.pull_request.state === 'closed' && labels.includes('missing-issue-link')) {
|
||||
await github.rest.pulls.update({
|
||||
owner,
|
||||
repo,
|
||||
pull_number: prNumber,
|
||||
state: 'open',
|
||||
});
|
||||
console.log(`Reopened PR #${prNumber}`);
|
||||
}
|
||||
|
||||
// Minimize stale enforcement comment (best-effort;
|
||||
// sync w/ applyMaintainerBypass above & reopen_on_assignment.yml)
|
||||
try {
|
||||
const marker = '<!-- require-issue-link -->';
|
||||
const comments = await github.paginate(
|
||||
github.rest.issues.listComments,
|
||||
{ owner, repo, issue_number: prNumber, per_page: 100 },
|
||||
);
|
||||
const stale = comments.find(c => c.body && c.body.includes(marker));
|
||||
if (stale) {
|
||||
await github.graphql(`
|
||||
mutation($id: ID!) {
|
||||
minimizeComment(input: {subjectId: $id, classifier: OUTDATED}) {
|
||||
minimizedComment { isMinimized }
|
||||
}
|
||||
}
|
||||
`, { id: stale.node_id });
|
||||
console.log(`Minimized stale enforcement comment ${stale.id} as outdated`);
|
||||
}
|
||||
} catch (e) {
|
||||
core.warning(`Could not minimize stale comment on PR #${prNumber}: ${e.message}`);
|
||||
}
|
||||
|
||||
- name: Post comment, close PR, and fail
|
||||
if: >-
|
||||
env.ENFORCE_ISSUE_LINK == 'true' &&
|
||||
(steps.check-link.outputs.has-link != 'true' || steps.check-link.outputs.is-assigned != 'true')
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const prNumber = context.payload.pull_request.number;
|
||||
const hasLink = '${{ steps.check-link.outputs.has-link }}' === 'true';
|
||||
const isAssigned = '${{ steps.check-link.outputs.is-assigned }}' === 'true';
|
||||
const marker = '<!-- require-issue-link -->';
|
||||
|
||||
let lines;
|
||||
if (!hasLink) {
|
||||
lines = [
|
||||
marker,
|
||||
'**This PR has been automatically closed** because it does not link to an approved issue.',
|
||||
'',
|
||||
'All external contributions must reference an approved issue or discussion. Please:',
|
||||
'1. Find or [open an issue](https://github.com/' + owner + '/' + repo + '/issues/new/choose) describing the change',
|
||||
'2. Wait for a maintainer to approve and assign you',
|
||||
'3. Add `Fixes #<issue_number>`, `Closes #<issue_number>`, or `Resolves #<issue_number>` to your PR description and the PR will be reopened automatically',
|
||||
'',
|
||||
'*Maintainers: reopen this PR or remove the `missing-issue-link` label to bypass this check.*',
|
||||
];
|
||||
} else {
|
||||
lines = [
|
||||
marker,
|
||||
'**This PR has been automatically closed** because you are not assigned to the linked issue.',
|
||||
'',
|
||||
'External contributors must be assigned to an issue before opening a PR for it. Please:',
|
||||
'1. Comment on the linked issue to request assignment from a maintainer',
|
||||
'2. Once assigned, your PR will be reopened automatically',
|
||||
'',
|
||||
'*Maintainers: reopen this PR or remove the `missing-issue-link` label to bypass this check.*',
|
||||
];
|
||||
}
|
||||
|
||||
const body = lines.join('\n');
|
||||
|
||||
// Deduplicate: check for existing comment with the marker
|
||||
const comments = await github.paginate(
|
||||
github.rest.issues.listComments,
|
||||
{ owner, repo, issue_number: prNumber, per_page: 100 },
|
||||
);
|
||||
const existing = comments.find(c => c.body && c.body.includes(marker));
|
||||
|
||||
if (!existing) {
|
||||
await github.rest.issues.createComment({
|
||||
owner,
|
||||
repo,
|
||||
issue_number: prNumber,
|
||||
body,
|
||||
});
|
||||
console.log('Posted requirement comment');
|
||||
} else if (existing.body !== body) {
|
||||
await github.rest.issues.updateComment({
|
||||
owner,
|
||||
repo,
|
||||
comment_id: existing.id,
|
||||
body,
|
||||
});
|
||||
console.log('Updated existing comment with new message');
|
||||
} else {
|
||||
console.log('Comment already exists — skipping');
|
||||
}
|
||||
|
||||
// Close the PR
|
||||
if (context.payload.pull_request.state === 'open') {
|
||||
await github.rest.pulls.update({
|
||||
owner,
|
||||
repo,
|
||||
pull_number: prNumber,
|
||||
state: 'closed',
|
||||
});
|
||||
console.log(`Closed PR #${prNumber}`);
|
||||
}
|
||||
|
||||
// Cancel all other in-progress and queued workflow runs for this PR
|
||||
const headSha = context.payload.pull_request.head.sha;
|
||||
for (const status of ['in_progress', 'queued']) {
|
||||
const runs = await github.paginate(
|
||||
github.rest.actions.listWorkflowRunsForRepo,
|
||||
{ owner, repo, head_sha: headSha, status, per_page: 100 },
|
||||
);
|
||||
for (const run of runs) {
|
||||
if (run.id === context.runId) continue;
|
||||
try {
|
||||
await github.rest.actions.cancelWorkflowRun({
|
||||
owner, repo, run_id: run.id,
|
||||
});
|
||||
console.log(`Cancelled ${status} run ${run.id} (${run.name})`);
|
||||
} catch (err) {
|
||||
console.log(`Could not cancel run ${run.id}: ${err.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const reason = !hasLink
|
||||
? 'PR must reference an issue using auto-close keywords (e.g., "Fixes #123").'
|
||||
: 'PR author must be assigned to the linked issue.';
|
||||
core.setFailed(reason);
|
||||
86
.github/workflows/scheduled_test.yml
vendored
Normal file
86
.github/workflows/scheduled_test.yml
vendored
Normal file
@@ -0,0 +1,86 @@
|
||||
name: Scheduled tests
|
||||
|
||||
on:
|
||||
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
|
||||
schedule:
|
||||
- cron: '0 13 * * *'
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.6.1"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
defaults:
|
||||
run:
|
||||
working-directory: libs/langchain
|
||||
runs-on: ubuntu-latest
|
||||
environment: Scheduled testing
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
name: Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: libs/langchain
|
||||
cache-key: scheduled
|
||||
|
||||
- name: 'Authenticate to Google Cloud'
|
||||
id: 'auth'
|
||||
uses: 'google-github-actions/auth@v1'
|
||||
with:
|
||||
credentials_json: '${{ secrets.GOOGLE_CREDENTIALS }}'
|
||||
|
||||
- name: Configure AWS Credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: ${{ vars.AWS_REGION }}
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: libs/langchain
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Running scheduled tests, installing dependencies with poetry..."
|
||||
poetry install --with=test_integration
|
||||
poetry run pip install google-cloud-aiplatform
|
||||
poetry run pip install "boto3>=1.28.57"
|
||||
if [[ ${{ matrix.python-version }} != "3.8" ]]
|
||||
then
|
||||
poetry run pip install fireworks-ai
|
||||
fi
|
||||
|
||||
- name: Run tests
|
||||
shell: bash
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
AZURE_OPENAI_API_VERSION: ${{ secrets.AZURE_OPENAI_API_VERSION }}
|
||||
AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
|
||||
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
|
||||
AZURE_OPENAI_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_DEPLOYMENT_NAME }}
|
||||
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
|
||||
run: |
|
||||
make scheduled_tests
|
||||
|
||||
- name: Ensure the tests did not create any additional files
|
||||
shell: bash
|
||||
run: |
|
||||
set -eu
|
||||
|
||||
STATUS="$(git status)"
|
||||
echo "$STATUS"
|
||||
|
||||
# grep will exit non-zero if the target message isn't found,
|
||||
# and `set -e` above will cause the step to fail.
|
||||
echo "$STATUS" | grep 'nothing to commit, working tree clean'
|
||||
205
.github/workflows/tag-external-issues.yml
vendored
205
.github/workflows/tag-external-issues.yml
vendored
@@ -1,205 +0,0 @@
|
||||
# Automatically tag issues as "external" or "internal" based on whether
|
||||
# the author is a member of the langchain-ai GitHub organization, and
|
||||
# apply contributor tier labels to external contributors based on their
|
||||
# merged PR history.
|
||||
#
|
||||
# NOTE: PR labeling (including external/internal, tier, size, file, and
|
||||
# title labels) is handled by pr_labeler.yml. This workflow handles
|
||||
# issues only.
|
||||
#
|
||||
# Config (trustedThreshold, labelColor) is read from
|
||||
# .github/scripts/pr-labeler-config.json to stay in sync with
|
||||
# pr_labeler.yml.
|
||||
#
|
||||
# Setup Requirements:
|
||||
# 1. Create a GitHub App with permissions:
|
||||
# - Repository: Issues (write)
|
||||
# - Organization: Members (read)
|
||||
# 2. Install the app on your organization and this repository
|
||||
# 3. Add these repository secrets:
|
||||
# - ORG_MEMBERSHIP_APP_ID: Your app's ID
|
||||
# - ORG_MEMBERSHIP_APP_PRIVATE_KEY: Your app's private key
|
||||
#
|
||||
# The GitHub App token is required to check private organization membership.
|
||||
# Without it, the workflow will fail.
|
||||
|
||||
name: Tag External Issues
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [opened]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
max_items:
|
||||
description: "Maximum number of open issues to process"
|
||||
default: "100"
|
||||
type: string
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.issue.number || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
tag-external:
|
||||
if: github.event_name != 'workflow_dispatch'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
|
||||
- name: Generate GitHub App token
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3
|
||||
with:
|
||||
app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }}
|
||||
private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }}
|
||||
|
||||
- name: Check if contributor is external
|
||||
if: steps.app-token.outcome == 'success'
|
||||
id: check-membership
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
github-token: ${{ steps.app-token.outputs.token }}
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
|
||||
|
||||
const author = context.payload.sender.login;
|
||||
const { isExternal } = await h.checkMembership(
|
||||
author, context.payload.sender.type,
|
||||
);
|
||||
core.setOutput('is-external', isExternal ? 'true' : 'false');
|
||||
|
||||
- name: Apply contributor tier label
|
||||
if: steps.check-membership.outputs.is-external == 'true'
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
# GITHUB_TOKEN is fine here — no downstream workflow chains
|
||||
# off tier labels on issues (unlike PRs where App token is
|
||||
# needed for require_issue_link.yml).
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
|
||||
|
||||
const issue = context.payload.issue;
|
||||
// new-contributor is only meaningful on PRs, not issues
|
||||
await h.applyTierLabel(issue.number, issue.user.login, { skipNewContributor: true });
|
||||
|
||||
- name: Add external/internal label
|
||||
if: steps.check-membership.outputs.is-external != ''
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const issue_number = context.payload.issue.number;
|
||||
|
||||
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
|
||||
|
||||
const label = '${{ steps.check-membership.outputs.is-external }}' === 'true'
|
||||
? 'external' : 'internal';
|
||||
await h.ensureLabel(label);
|
||||
await github.rest.issues.addLabels({
|
||||
owner, repo, issue_number, labels: [label],
|
||||
});
|
||||
console.log(`Added '${label}' label to issue #${issue_number}`);
|
||||
|
||||
backfill:
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
|
||||
- name: Generate GitHub App token
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3
|
||||
with:
|
||||
app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }}
|
||||
private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }}
|
||||
|
||||
- name: Backfill labels on open issues
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
with:
|
||||
github-token: ${{ steps.app-token.outputs.token }}
|
||||
script: |
|
||||
const { owner, repo } = context.repo;
|
||||
const rawMax = '${{ inputs.max_items }}';
|
||||
const maxItems = parseInt(rawMax, 10);
|
||||
if (isNaN(maxItems) || maxItems <= 0) {
|
||||
core.setFailed(`Invalid max_items: "${rawMax}" — must be a positive integer`);
|
||||
return;
|
||||
}
|
||||
|
||||
const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo, core);
|
||||
|
||||
const tierLabels = ['trusted-contributor'];
|
||||
for (const name of tierLabels) {
|
||||
await h.ensureLabel(name);
|
||||
}
|
||||
|
||||
const contributorCache = new Map();
|
||||
|
||||
const issues = await github.paginate(github.rest.issues.listForRepo, {
|
||||
owner, repo, state: 'open', per_page: 100,
|
||||
});
|
||||
|
||||
let processed = 0;
|
||||
let failures = 0;
|
||||
for (const issue of issues) {
|
||||
if (processed >= maxItems) break;
|
||||
if (issue.pull_request) continue;
|
||||
|
||||
try {
|
||||
const author = issue.user.login;
|
||||
const info = await h.getContributorInfo(contributorCache, author, issue.user.type);
|
||||
|
||||
const labels = [info.isExternal ? 'external' : 'internal'];
|
||||
if (info.isExternal && info.mergedCount != null && info.mergedCount >= h.trustedThreshold) {
|
||||
labels.push('trusted-contributor');
|
||||
}
|
||||
|
||||
// Ensure all labels exist before batch add
|
||||
for (const name of labels) {
|
||||
await h.ensureLabel(name);
|
||||
}
|
||||
|
||||
// Remove stale tier labels
|
||||
const currentLabels = (await github.paginate(
|
||||
github.rest.issues.listLabelsOnIssue,
|
||||
{ owner, repo, issue_number: issue.number, per_page: 100 },
|
||||
)).map(l => l.name ?? '');
|
||||
for (const name of currentLabels) {
|
||||
if (tierLabels.includes(name) && !labels.includes(name)) {
|
||||
try {
|
||||
await github.rest.issues.removeLabel({
|
||||
owner, repo, issue_number: issue.number, name,
|
||||
});
|
||||
} catch (e) {
|
||||
if (e.status !== 404) throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
await github.rest.issues.addLabels({
|
||||
owner, repo, issue_number: issue.number, labels,
|
||||
});
|
||||
console.log(`Issue #${issue.number} (${author}): ${labels.join(', ')}`);
|
||||
processed++;
|
||||
} catch (e) {
|
||||
failures++;
|
||||
core.warning(`Failed to process issue #${issue.number}: ${e.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\nBackfill complete. Processed ${processed} issues, ${failures} failures. ${contributorCache.size} unique authors.`);
|
||||
36
.github/workflows/templates_ci.yml
vendored
Normal file
36
.github/workflows/templates_ci.yml
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
---
|
||||
name: templates CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
paths:
|
||||
- '.github/actions/poetry_setup/action.yml'
|
||||
- '.github/tools/**'
|
||||
- '.github/workflows/_lint.yml'
|
||||
- '.github/workflows/templates_ci.yml'
|
||||
- 'templates/**'
|
||||
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
|
||||
|
||||
# If another push to the same PR or branch happens while this workflow is still running,
|
||||
# cancel the earlier run in favor of the next run.
|
||||
#
|
||||
# There's no point in testing an outdated version of the code. GitHub only allows
|
||||
# a limited number of job runners to be active at the same time, so it's better to cancel
|
||||
# pointless jobs early so that more useful jobs can run sooner.
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.6.1"
|
||||
WORKDIR: "templates"
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
uses:
|
||||
./.github/workflows/_lint.yml
|
||||
with:
|
||||
working-directory: templates
|
||||
secrets: inherit
|
||||
167
.github/workflows/v03_api_doc_build.yml
vendored
167
.github/workflows/v03_api_doc_build.yml
vendored
@@ -1,167 +0,0 @@
|
||||
# Build the API reference documentation for v0.3 branch.
|
||||
#
|
||||
# Manual trigger only.
|
||||
#
|
||||
# Built HTML pushed to langchain-ai/langchain-api-docs-html.
|
||||
#
|
||||
# Looks for langchain-ai org repos in packages.yml and checks them out.
|
||||
# Calls prep_api_docs_build.py.
|
||||
|
||||
name: "📚 API Docs (v0.3)"
|
||||
run-name: "Build & Deploy API Reference (v0.3)"
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
PYTHON_VERSION: "3.11"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
if: github.repository == 'langchain-ai/langchain' || github.event_name != 'schedule'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
with:
|
||||
ref: v0.3
|
||||
path: langchain
|
||||
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
||||
with:
|
||||
repository: langchain-ai/langchain-api-docs-html
|
||||
path: langchain-api-docs-html
|
||||
token: ${{ secrets.TOKEN_GITHUB_API_DOCS_HTML }}
|
||||
|
||||
- name: "📋 Extract Repository List with yq"
|
||||
id: get-unsorted-repos
|
||||
uses: mikefarah/yq@17f66dc6c6a177fafd8b71a6abea6d6340aa1e16 # master
|
||||
with:
|
||||
cmd: |
|
||||
# Extract repos from packages.yml that are in the langchain-ai org
|
||||
# (excluding 'langchain' itself)
|
||||
yq '
|
||||
.packages[]
|
||||
| select(
|
||||
(
|
||||
(.repo | test("^langchain-ai/"))
|
||||
and
|
||||
(.repo != "langchain-ai/langchain")
|
||||
)
|
||||
or
|
||||
(.include_in_api_ref // false)
|
||||
)
|
||||
| .repo
|
||||
' langchain/libs/packages.yml
|
||||
|
||||
- name: "📋 Parse YAML & Checkout Repositories"
|
||||
env:
|
||||
REPOS_UNSORTED: ${{ steps.get-unsorted-repos.outputs.result }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
# Get unique repositories
|
||||
REPOS=$(echo "$REPOS_UNSORTED" | sort -u)
|
||||
# Checkout each unique repository
|
||||
for repo in $REPOS; do
|
||||
# Validate repository format (allow any org with proper format)
|
||||
if [[ ! "$repo" =~ ^[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+$ ]]; then
|
||||
echo "Error: Invalid repository format: $repo"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
REPO_NAME=$(echo $repo | cut -d'/' -f2)
|
||||
|
||||
# Additional validation for repo name
|
||||
if [[ ! "$REPO_NAME" =~ ^[a-zA-Z0-9_.-]+$ ]]; then
|
||||
echo "Error: Invalid repository name: $REPO_NAME"
|
||||
exit 1
|
||||
fi
|
||||
echo "Checking out $repo to $REPO_NAME"
|
||||
|
||||
# Special handling for langchain-tavily: checkout by commit hash
|
||||
if [[ "$REPO_NAME" == "langchain-tavily" ]]; then
|
||||
git clone https://github.com/$repo.git $REPO_NAME
|
||||
cd $REPO_NAME
|
||||
git checkout f3515654724a9e87bdfe2c2f509d6cdde646e563
|
||||
cd ..
|
||||
else
|
||||
git clone --depth 1 --branch v0.3 https://github.com/$repo.git $REPO_NAME
|
||||
fi
|
||||
done
|
||||
|
||||
- name: "🐍 Setup Python ${{ env.PYTHON_VERSION }}"
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
|
||||
id: setup-python
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: "📦 Install Initial Python Dependencies using uv"
|
||||
working-directory: langchain
|
||||
run: |
|
||||
python -m pip install -U uv
|
||||
python -m uv pip install --upgrade --no-cache-dir pip setuptools pyyaml
|
||||
|
||||
- name: "📦 Organize Library Directories"
|
||||
# Places cloned partner packages into libs/partners structure
|
||||
run: python langchain/.github/scripts/prep_api_docs_build.py
|
||||
|
||||
- name: "🧹 Clear Prior Build"
|
||||
run:
|
||||
# Remove artifacts from prior docs build
|
||||
rm -rf langchain-api-docs-html/api_reference_build/html
|
||||
|
||||
- name: "📦 Install Documentation Dependencies using uv"
|
||||
working-directory: langchain
|
||||
run: |
|
||||
# Install all partner packages in editable mode with overrides
|
||||
python -m uv pip install $(ls ./libs/partners | grep -v azure-ai | xargs -I {} echo "./libs/partners/{}") --overrides ./docs/vercel_overrides.txt --prerelease=allow
|
||||
|
||||
# Install langchain-azure-ai with tools extra
|
||||
python -m uv pip install "./libs/partners/azure-ai[tools]" --overrides ./docs/vercel_overrides.txt --prerelease=allow
|
||||
|
||||
# Install core langchain and other main packages
|
||||
python -m uv pip install libs/core libs/langchain libs/text-splitters libs/community libs/experimental libs/standard-tests
|
||||
|
||||
# Install Sphinx and related packages for building docs
|
||||
python -m uv pip install -r docs/api_reference/requirements.txt
|
||||
|
||||
- name: "🔧 Configure Git Settings"
|
||||
working-directory: langchain
|
||||
run: |
|
||||
git config --local user.email "actions@github.com"
|
||||
git config --local user.name "Github Actions"
|
||||
|
||||
- name: "📚 Build API Documentation"
|
||||
working-directory: langchain
|
||||
run: |
|
||||
# Generate the API reference RST files
|
||||
python docs/api_reference/create_api_rst.py
|
||||
|
||||
# Build the HTML documentation using Sphinx
|
||||
# -T: show full traceback on exception
|
||||
# -E: don't use cached environment (force rebuild, ignore cached doctrees)
|
||||
# -b html: build HTML docs (vs PDS, etc.)
|
||||
# -d: path for the cached environment (parsed document trees / doctrees)
|
||||
# - Separate from output dir for faster incremental builds
|
||||
# -c: path to conf.py
|
||||
# -j auto: parallel build using all available CPU cores
|
||||
python -m sphinx -T -E -b html -d ../langchain-api-docs-html/_build/doctrees -c docs/api_reference docs/api_reference ../langchain-api-docs-html/api_reference_build/html -j auto
|
||||
|
||||
# Post-process the generated HTML
|
||||
python docs/api_reference/scripts/custom_formatter.py ../langchain-api-docs-html/api_reference_build/html
|
||||
|
||||
# Default index page is blank so we copy in the actual home page.
|
||||
cp ../langchain-api-docs-html/api_reference_build/html/{reference,index}.html
|
||||
|
||||
# Removes Sphinx's intermediate build artifacts after the build is complete.
|
||||
rm -rf ../langchain-api-docs-html/_build/
|
||||
|
||||
# Commit and push changes to langchain-api-docs-html repo
|
||||
- uses: EndBug/add-and-commit@290ea2c423ad77ca9c62ae0f5b224379612c0321 # v10.0.0
|
||||
with:
|
||||
cwd: langchain-api-docs-html
|
||||
message: "Update API docs build from v0.3 branch"
|
||||
38
.gitignore
vendored
38
.gitignore
vendored
@@ -1,8 +1,6 @@
|
||||
.vs/
|
||||
.claude/
|
||||
.vscode/
|
||||
.idea/
|
||||
#Emacs backup
|
||||
*~
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
@@ -61,7 +59,6 @@ coverage.xml
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
.codspeed/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
@@ -80,6 +77,10 @@ instance/
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
docs/docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
@@ -114,11 +115,13 @@ celerybeat.pid
|
||||
# Environments
|
||||
.env
|
||||
.envrc
|
||||
.venv*
|
||||
venv*
|
||||
.venv
|
||||
.venvs
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
@@ -132,7 +135,6 @@ env.bak/
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.mypy_cache_test/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
@@ -160,9 +162,19 @@ data_map*
|
||||
*replit*
|
||||
|
||||
node_modules
|
||||
|
||||
prof
|
||||
virtualenv/
|
||||
scratch/
|
||||
|
||||
.langgraph_api/
|
||||
docs/.yarn/
|
||||
docs/node_modules/
|
||||
docs/.docusaurus/
|
||||
docs/.cache-loader/
|
||||
docs/_dist
|
||||
docs/api_reference/*api_reference.rst
|
||||
docs/api_reference/_build
|
||||
docs/api_reference/*/
|
||||
!docs/api_reference/_static/
|
||||
!docs/api_reference/templates/
|
||||
!docs/api_reference/themes/
|
||||
docs/docs/build
|
||||
docs/docs/node_modules
|
||||
docs/docs/yarn.lock
|
||||
_dist
|
||||
docs/docs/templates
|
||||
@@ -1,14 +0,0 @@
|
||||
{
|
||||
"MD013": false,
|
||||
"MD024": {
|
||||
"siblings_only": true
|
||||
},
|
||||
"MD025": false,
|
||||
"MD033": false,
|
||||
"MD034": false,
|
||||
"MD036": false,
|
||||
"MD041": false,
|
||||
"MD046": {
|
||||
"style": "fenced"
|
||||
}
|
||||
}
|
||||
12
.mcp.json
12
.mcp.json
@@ -1,12 +0,0 @@
|
||||
{
|
||||
"mcpServers": {
|
||||
"docs-langchain": {
|
||||
"type": "http",
|
||||
"url": "https://docs.langchain.com/mcp"
|
||||
},
|
||||
"reference-langchain": {
|
||||
"type": "http",
|
||||
"url": "https://reference.langchain.com/mcp"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,125 +0,0 @@
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.3.0
|
||||
hooks:
|
||||
- id: no-commit-to-branch # prevent direct commits to protected branches
|
||||
args: ["--branch", "master"]
|
||||
- id: check-yaml # validate YAML syntax
|
||||
args: ["--unsafe"] # allow custom tags
|
||||
- id: check-toml # validate TOML syntax
|
||||
- id: end-of-file-fixer # ensure files end with a newline
|
||||
- id: trailing-whitespace # remove trailing whitespace from lines
|
||||
exclude: \.ambr$
|
||||
|
||||
# Text normalization hooks for consistent formatting
|
||||
- repo: https://github.com/sirosen/texthooks
|
||||
rev: 0.6.8
|
||||
hooks:
|
||||
- id: fix-smartquotes # replace curly quotes with straight quotes
|
||||
- id: fix-spaces # replace non-standard spaces (e.g., non-breaking) with regular spaces
|
||||
|
||||
# Per-package format and lint hooks for the monorepo
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: core
|
||||
name: format and lint core
|
||||
language: system
|
||||
entry: make -C libs/core format lint
|
||||
files: ^libs/core/
|
||||
pass_filenames: false
|
||||
- id: langchain
|
||||
name: format and lint langchain
|
||||
language: system
|
||||
entry: make -C libs/langchain format lint
|
||||
files: ^libs/langchain/
|
||||
pass_filenames: false
|
||||
- id: standard-tests
|
||||
name: format and lint standard-tests
|
||||
language: system
|
||||
entry: make -C libs/standard-tests format lint
|
||||
files: ^libs/standard-tests/
|
||||
pass_filenames: false
|
||||
- id: text-splitters
|
||||
name: format and lint text-splitters
|
||||
language: system
|
||||
entry: make -C libs/text-splitters format lint
|
||||
files: ^libs/text-splitters/
|
||||
pass_filenames: false
|
||||
- id: anthropic
|
||||
name: format and lint partners/anthropic
|
||||
language: system
|
||||
entry: make -C libs/partners/anthropic format lint
|
||||
files: ^libs/partners/anthropic/
|
||||
pass_filenames: false
|
||||
- id: chroma
|
||||
name: format and lint partners/chroma
|
||||
language: system
|
||||
entry: make -C libs/partners/chroma format lint
|
||||
files: ^libs/partners/chroma/
|
||||
pass_filenames: false
|
||||
- id: exa
|
||||
name: format and lint partners/exa
|
||||
language: system
|
||||
entry: make -C libs/partners/exa format lint
|
||||
files: ^libs/partners/exa/
|
||||
pass_filenames: false
|
||||
- id: fireworks
|
||||
name: format and lint partners/fireworks
|
||||
language: system
|
||||
entry: make -C libs/partners/fireworks format lint
|
||||
files: ^libs/partners/fireworks/
|
||||
pass_filenames: false
|
||||
- id: groq
|
||||
name: format and lint partners/groq
|
||||
language: system
|
||||
entry: make -C libs/partners/groq format lint
|
||||
files: ^libs/partners/groq/
|
||||
pass_filenames: false
|
||||
- id: huggingface
|
||||
name: format and lint partners/huggingface
|
||||
language: system
|
||||
entry: make -C libs/partners/huggingface format lint
|
||||
files: ^libs/partners/huggingface/
|
||||
pass_filenames: false
|
||||
- id: mistralai
|
||||
name: format and lint partners/mistralai
|
||||
language: system
|
||||
entry: make -C libs/partners/mistralai format lint
|
||||
files: ^libs/partners/mistralai/
|
||||
pass_filenames: false
|
||||
- id: nomic
|
||||
name: format and lint partners/nomic
|
||||
language: system
|
||||
entry: make -C libs/partners/nomic format lint
|
||||
files: ^libs/partners/nomic/
|
||||
pass_filenames: false
|
||||
- id: ollama
|
||||
name: format and lint partners/ollama
|
||||
language: system
|
||||
entry: make -C libs/partners/ollama format lint
|
||||
files: ^libs/partners/ollama/
|
||||
pass_filenames: false
|
||||
- id: openai
|
||||
name: format and lint partners/openai
|
||||
language: system
|
||||
entry: make -C libs/partners/openai format lint
|
||||
files: ^libs/partners/openai/
|
||||
pass_filenames: false
|
||||
- id: qdrant
|
||||
name: format and lint partners/qdrant
|
||||
language: system
|
||||
entry: make -C libs/partners/qdrant format lint
|
||||
files: ^libs/partners/qdrant/
|
||||
pass_filenames: false
|
||||
- id: core-version
|
||||
name: check core version consistency
|
||||
language: system
|
||||
entry: make -C libs/core check_version
|
||||
files: ^libs/core/(pyproject\.toml|langchain_core/version\.py)$
|
||||
pass_filenames: false
|
||||
- id: langchain-v1-version
|
||||
name: check langchain version consistency
|
||||
language: system
|
||||
entry: make -C libs/langchain_v1 check_version
|
||||
files: ^libs/langchain_v1/(pyproject\.toml|langchain/__init__\.py)$
|
||||
pass_filenames: false
|
||||
32
.readthedocs.yaml
Normal file
32
.readthedocs.yaml
Normal file
@@ -0,0 +1,32 @@
|
||||
# Read the Docs configuration file
|
||||
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
|
||||
|
||||
# Required
|
||||
version: 2
|
||||
|
||||
# Set the version of Python and other tools you might need
|
||||
build:
|
||||
os: ubuntu-22.04
|
||||
tools:
|
||||
python: "3.11"
|
||||
commands:
|
||||
- python -mvirtualenv $READTHEDOCS_VIRTUALENV_PATH
|
||||
- python -m pip install --upgrade --no-cache-dir pip setuptools
|
||||
- python -m pip install --upgrade --no-cache-dir sphinx readthedocs-sphinx-ext
|
||||
- python -m pip install --exists-action=w --no-cache-dir -r docs/api_reference/requirements.txt
|
||||
- python docs/api_reference/create_api_rst.py
|
||||
- cat docs/api_reference/conf.py
|
||||
- python -m sphinx -T -E -b html -d _build/doctrees -c docs/api_reference docs/api_reference $READTHEDOCS_OUTPUT/html -j auto
|
||||
|
||||
# Build documentation in the docs/ directory with Sphinx
|
||||
sphinx:
|
||||
configuration: docs/api_reference/conf.py
|
||||
|
||||
# If using Sphinx, optionally build your docs in additional formats such as PDF
|
||||
# formats:
|
||||
# - pdf
|
||||
|
||||
# Optionally declare the Python requirements required to build your docs
|
||||
python:
|
||||
install:
|
||||
- requirements: docs/api_reference/requirements.txt
|
||||
@@ -0,0 +1,9 @@
|
||||
"""Main entrypoint into package."""
|
||||
from importlib import metadata
|
||||
|
||||
try:
|
||||
__version__ = metadata.version(__package__)
|
||||
except metadata.PackageNotFoundError:
|
||||
# Case where package metadata is not available.
|
||||
__version__ = ""
|
||||
del metadata # optional, avoids polluting the results of dir(__package__)
|
||||
@@ -0,0 +1,123 @@
|
||||
"""Agent toolkits contain integrations with various resources and services.
|
||||
|
||||
LangChain has a large ecosystem of integrations with various external resources
|
||||
like local and remote file systems, APIs and databases.
|
||||
|
||||
These integrations allow developers to create versatile applications that combine the
|
||||
power of LLMs with the ability to access, interact with and manipulate external
|
||||
resources.
|
||||
|
||||
When developing an application, developers should inspect the capabilities and
|
||||
permissions of the tools that underlie the given agent toolkit, and determine
|
||||
whether permissions of the given toolkit are appropriate for the application.
|
||||
|
||||
See [Security](https://python.langchain.com/docs/security) for more information.
|
||||
"""
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from langchain_core._api.path import as_import_path
|
||||
|
||||
from langchain_community.agent_toolkits.ainetwork.toolkit import AINetworkToolkit
|
||||
from langchain_community.agent_toolkits.amadeus.toolkit import AmadeusToolkit
|
||||
from langchain_community.agent_toolkits.azure_cognitive_services import (
|
||||
AzureCognitiveServicesToolkit,
|
||||
)
|
||||
from langchain_community.agent_toolkits.conversational_retrieval.openai_functions import ( # noqa: E501
|
||||
create_conversational_retrieval_agent,
|
||||
)
|
||||
from langchain_community.agent_toolkits.file_management.toolkit import (
|
||||
FileManagementToolkit,
|
||||
)
|
||||
from langchain_community.agent_toolkits.gmail.toolkit import GmailToolkit
|
||||
from langchain_community.agent_toolkits.jira.toolkit import JiraToolkit
|
||||
from langchain_community.agent_toolkits.json.base import create_json_agent
|
||||
from langchain_community.agent_toolkits.json.toolkit import JsonToolkit
|
||||
from langchain_community.agent_toolkits.multion.toolkit import MultionToolkit
|
||||
from langchain_community.agent_toolkits.nasa.toolkit import NasaToolkit
|
||||
from langchain_community.agent_toolkits.nla.toolkit import NLAToolkit
|
||||
from langchain_community.agent_toolkits.office365.toolkit import O365Toolkit
|
||||
from langchain_community.agent_toolkits.openapi.base import create_openapi_agent
|
||||
from langchain_community.agent_toolkits.openapi.toolkit import OpenAPIToolkit
|
||||
from langchain_community.agent_toolkits.playwright.toolkit import (
|
||||
PlayWrightBrowserToolkit,
|
||||
)
|
||||
from langchain_community.agent_toolkits.powerbi.base import create_pbi_agent
|
||||
from langchain_community.agent_toolkits.powerbi.chat_base import create_pbi_chat_agent
|
||||
from langchain_community.agent_toolkits.powerbi.toolkit import PowerBIToolkit
|
||||
from langchain_community.agent_toolkits.slack.toolkit import SlackToolkit
|
||||
from langchain_community.agent_toolkits.spark_sql.base import create_spark_sql_agent
|
||||
from langchain_community.agent_toolkits.spark_sql.toolkit import SparkSQLToolkit
|
||||
from langchain_community.agent_toolkits.sql.base import create_sql_agent
|
||||
from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
|
||||
from langchain_community.agent_toolkits.steam.toolkit import SteamToolkit
|
||||
from langchain_community.agent_toolkits.vectorstore.base import (
|
||||
create_vectorstore_agent,
|
||||
create_vectorstore_router_agent,
|
||||
)
|
||||
from langchain_community.agent_toolkits.vectorstore.toolkit import (
|
||||
VectorStoreInfo,
|
||||
VectorStoreRouterToolkit,
|
||||
VectorStoreToolkit,
|
||||
)
|
||||
from langchain_community.agent_toolkits.zapier.toolkit import ZapierToolkit
|
||||
from langchain_community.tools.retriever import create_retriever_tool
|
||||
|
||||
DEPRECATED_AGENTS = [
|
||||
"create_csv_agent",
|
||||
"create_pandas_dataframe_agent",
|
||||
"create_xorbits_agent",
|
||||
"create_python_agent",
|
||||
"create_spark_dataframe_agent",
|
||||
]
|
||||
|
||||
|
||||
def __getattr__(name: str) -> Any:
|
||||
"""Get attr name."""
|
||||
if name in DEPRECATED_AGENTS:
|
||||
relative_path = as_import_path(Path(__file__).parent, suffix=name)
|
||||
old_path = "langchain." + relative_path
|
||||
new_path = "langchain_experimental." + relative_path
|
||||
raise ImportError(
|
||||
f"{name} has been moved to langchain experimental. "
|
||||
"See https://github.com/langchain-ai/langchain/discussions/11680"
|
||||
"for more information.\n"
|
||||
f"Please update your import statement from: `{old_path}` to `{new_path}`."
|
||||
)
|
||||
raise AttributeError(f"{name} does not exist")
|
||||
|
||||
|
||||
__all__ = [
|
||||
"AINetworkToolkit",
|
||||
"AmadeusToolkit",
|
||||
"AzureCognitiveServicesToolkit",
|
||||
"FileManagementToolkit",
|
||||
"GmailToolkit",
|
||||
"JiraToolkit",
|
||||
"JsonToolkit",
|
||||
"MultionToolkit",
|
||||
"NasaToolkit",
|
||||
"NLAToolkit",
|
||||
"O365Toolkit",
|
||||
"OpenAPIToolkit",
|
||||
"PlayWrightBrowserToolkit",
|
||||
"PowerBIToolkit",
|
||||
"SlackToolkit",
|
||||
"SteamToolkit",
|
||||
"SQLDatabaseToolkit",
|
||||
"SparkSQLToolkit",
|
||||
"VectorStoreInfo",
|
||||
"VectorStoreRouterToolkit",
|
||||
"VectorStoreToolkit",
|
||||
"ZapierToolkit",
|
||||
"create_json_agent",
|
||||
"create_openapi_agent",
|
||||
"create_pbi_agent",
|
||||
"create_pbi_chat_agent",
|
||||
"create_spark_sql_agent",
|
||||
"create_sql_agent",
|
||||
"create_vectorstore_agent",
|
||||
"create_vectorstore_router_agent",
|
||||
"create_conversational_retrieval_agent",
|
||||
"create_retriever_tool",
|
||||
]
|
||||
@@ -0,0 +1,88 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, List, Optional, TYPE_CHECKING
|
||||
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.memory import BaseMemory
|
||||
from langchain_core.messages import SystemMessage
|
||||
from langchain_core.prompts.chat import MessagesPlaceholder
|
||||
from langchain_core.tools import BaseTool
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain.agents.agent import AgentExecutor
|
||||
|
||||
|
||||
def _get_default_system_message() -> SystemMessage:
|
||||
return SystemMessage(
|
||||
content=(
|
||||
"Do your best to answer the questions. "
|
||||
"Feel free to use any tools available to look up "
|
||||
"relevant information, only if necessary"
|
||||
)
|
||||
)
|
||||
|
||||
def create_conversational_retrieval_agent(
|
||||
llm: BaseLanguageModel,
|
||||
tools: List[BaseTool],
|
||||
remember_intermediate_steps: bool = True,
|
||||
memory_key: str = "chat_history",
|
||||
system_message: Optional[SystemMessage] = None,
|
||||
verbose: bool = False,
|
||||
max_token_limit: int = 2000,
|
||||
**kwargs: Any,
|
||||
) -> AgentExecutor:
|
||||
"""A convenience method for creating a conversational retrieval agent.
|
||||
|
||||
Args:
|
||||
llm: The language model to use, should be ChatOpenAI
|
||||
tools: A list of tools the agent has access to
|
||||
remember_intermediate_steps: Whether the agent should remember intermediate
|
||||
steps or not. Intermediate steps refer to prior action/observation
|
||||
pairs from previous questions. The benefit of remembering these is if
|
||||
there is relevant information in there, the agent can use it to answer
|
||||
follow up questions. The downside is it will take up more tokens.
|
||||
memory_key: The name of the memory key in the prompt.
|
||||
system_message: The system message to use. By default, a basic one will
|
||||
be used.
|
||||
verbose: Whether or not the final AgentExecutor should be verbose or not,
|
||||
defaults to False.
|
||||
max_token_limit: The max number of tokens to keep around in memory.
|
||||
Defaults to 2000.
|
||||
|
||||
Returns:
|
||||
An agent executor initialized appropriately
|
||||
"""
|
||||
from langchain.agents.agent import AgentExecutor
|
||||
from langchain.agents.openai_functions_agent.agent_token_buffer_memory import (
|
||||
AgentTokenBufferMemory,
|
||||
)
|
||||
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
|
||||
from langchain.memory.token_buffer import ConversationTokenBufferMemory
|
||||
|
||||
if remember_intermediate_steps:
|
||||
memory: BaseMemory = AgentTokenBufferMemory(
|
||||
memory_key=memory_key, llm=llm, max_token_limit=max_token_limit
|
||||
)
|
||||
else:
|
||||
memory = ConversationTokenBufferMemory(
|
||||
memory_key=memory_key,
|
||||
return_messages=True,
|
||||
output_key="output",
|
||||
llm=llm,
|
||||
max_token_limit=max_token_limit,
|
||||
)
|
||||
|
||||
_system_message = system_message or _get_default_system_message()
|
||||
prompt = OpenAIFunctionsAgent.create_prompt(
|
||||
system_message=_system_message,
|
||||
extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key)],
|
||||
)
|
||||
agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)
|
||||
return AgentExecutor(
|
||||
agent=agent,
|
||||
tools=tools,
|
||||
memory=memory,
|
||||
verbose=verbose,
|
||||
return_intermediate_steps=remember_intermediate_steps,
|
||||
**kwargs,
|
||||
)
|
||||
@@ -0,0 +1,53 @@
|
||||
"""Json agent."""
|
||||
from __future__ import annotations
|
||||
from typing import Any, Dict, List, Optional, TYPE_CHECKING
|
||||
|
||||
from langchain_core.callbacks import BaseCallbackManager
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
|
||||
from langchain_community.agent_toolkits.json.prompt import JSON_PREFIX, JSON_SUFFIX
|
||||
from langchain_community.agent_toolkits.json.toolkit import JsonToolkit
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain.agents.agent import AgentExecutor
|
||||
|
||||
|
||||
def create_json_agent(
|
||||
llm: BaseLanguageModel,
|
||||
toolkit: JsonToolkit,
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
prefix: str = JSON_PREFIX,
|
||||
suffix: str = JSON_SUFFIX,
|
||||
format_instructions: Optional[str] = None,
|
||||
input_variables: Optional[List[str]] = None,
|
||||
verbose: bool = False,
|
||||
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> AgentExecutor:
|
||||
"""Construct a json agent from an LLM and tools."""
|
||||
from langchain.agents.agent import AgentExecutor
|
||||
from langchain.agents.mrkl.base import ZeroShotAgent
|
||||
from langchain.chains.llm import LLMChain
|
||||
tools = toolkit.get_tools()
|
||||
prompt_params = {"format_instructions": format_instructions} if format_instructions is not None else {}
|
||||
prompt = ZeroShotAgent.create_prompt(
|
||||
tools,
|
||||
prefix=prefix,
|
||||
suffix=suffix,
|
||||
input_variables=input_variables,
|
||||
**prompt_params,
|
||||
)
|
||||
llm_chain = LLMChain(
|
||||
llm=llm,
|
||||
prompt=prompt,
|
||||
callback_manager=callback_manager,
|
||||
)
|
||||
tool_names = [tool.name for tool in tools]
|
||||
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
|
||||
return AgentExecutor.from_agent_and_tools(
|
||||
agent=agent,
|
||||
tools=tools,
|
||||
callback_manager=callback_manager,
|
||||
verbose=verbose,
|
||||
**(agent_executor_kwargs or {}),
|
||||
)
|
||||
@@ -0,0 +1,57 @@
|
||||
"""Tool for interacting with a single API with natural language definition."""
|
||||
|
||||
from __future__ import annotations
|
||||
from typing import Any, Optional, TYPE_CHECKING
|
||||
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.tools import Tool
|
||||
|
||||
from langchain_community.tools.openapi.utils.api_models import APIOperation
|
||||
from langchain_community.tools.openapi.utils.openapi_utils import OpenAPISpec
|
||||
from langchain_community.utilities.requests import Requests
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain.chains.api.openapi.chain import OpenAPIEndpointChain
|
||||
|
||||
|
||||
class NLATool(Tool):
|
||||
"""Natural Language API Tool."""
|
||||
|
||||
@classmethod
|
||||
def from_open_api_endpoint_chain(
|
||||
cls, chain: OpenAPIEndpointChain, api_title: str
|
||||
) -> "NLATool":
|
||||
"""Convert an endpoint chain to an API endpoint tool."""
|
||||
expanded_name = (
|
||||
f'{api_title.replace(" ", "_")}.{chain.api_operation.operation_id}'
|
||||
)
|
||||
description = (
|
||||
f"I'm an AI from {api_title}. Instruct what you want,"
|
||||
" and I'll assist via an API with description:"
|
||||
f" {chain.api_operation.description}"
|
||||
)
|
||||
return cls(name=expanded_name, func=chain.run, description=description)
|
||||
|
||||
@classmethod
|
||||
def from_llm_and_method(
|
||||
cls,
|
||||
llm: BaseLanguageModel,
|
||||
path: str,
|
||||
method: str,
|
||||
spec: OpenAPISpec,
|
||||
requests: Optional[Requests] = None,
|
||||
verbose: bool = False,
|
||||
return_intermediate_steps: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> "NLATool":
|
||||
"""Instantiate the tool from the specified path and method."""
|
||||
api_operation = APIOperation.from_openapi_spec(spec, path, method)
|
||||
chain = OpenAPIEndpointChain.from_api_operation(
|
||||
api_operation,
|
||||
llm,
|
||||
requests=requests,
|
||||
verbose=verbose,
|
||||
return_intermediate_steps=return_intermediate_steps,
|
||||
**kwargs,
|
||||
)
|
||||
return cls.from_open_api_endpoint_chain(chain, spec.info.title)
|
||||
@@ -0,0 +1,77 @@
|
||||
"""OpenAPI spec agent."""
|
||||
from __future__ import annotations
|
||||
from typing import Any, Dict, List, Optional, TYPE_CHECKING
|
||||
|
||||
from langchain_core.callbacks import BaseCallbackManager
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
|
||||
from langchain_community.agent_toolkits.openapi.prompt import (
|
||||
OPENAPI_PREFIX,
|
||||
OPENAPI_SUFFIX,
|
||||
)
|
||||
from langchain_community.agent_toolkits.openapi.toolkit import OpenAPIToolkit
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain.agents.agent import AgentExecutor
|
||||
|
||||
|
||||
def create_openapi_agent(
|
||||
llm: BaseLanguageModel,
|
||||
toolkit: OpenAPIToolkit,
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
prefix: str = OPENAPI_PREFIX,
|
||||
suffix: str = OPENAPI_SUFFIX,
|
||||
format_instructions: Optional[str] = None,
|
||||
input_variables: Optional[List[str]] = None,
|
||||
max_iterations: Optional[int] = 15,
|
||||
max_execution_time: Optional[float] = None,
|
||||
early_stopping_method: str = "force",
|
||||
verbose: bool = False,
|
||||
return_intermediate_steps: bool = False,
|
||||
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> AgentExecutor:
|
||||
"""Construct an OpenAPI agent from an LLM and tools.
|
||||
|
||||
*Security Note*: When creating an OpenAPI agent, check the permissions
|
||||
and capabilities of the underlying toolkit.
|
||||
|
||||
For example, if the default implementation of OpenAPIToolkit
|
||||
uses the RequestsToolkit which contains tools to make arbitrary
|
||||
network requests against any URL (e.g., GET, POST, PATCH, PUT, DELETE),
|
||||
|
||||
Control access to who can submit issue requests using this toolkit and
|
||||
what network access it has.
|
||||
|
||||
See https://python.langchain.com/docs/security for more information.
|
||||
"""
|
||||
from langchain.agents.agent import AgentExecutor
|
||||
from langchain.agents.mrkl.base import ZeroShotAgent
|
||||
from langchain.chains.llm import LLMChain
|
||||
tools = toolkit.get_tools()
|
||||
prompt_params = {"format_instructions": format_instructions} if format_instructions is not None else {}
|
||||
prompt = ZeroShotAgent.create_prompt(
|
||||
tools,
|
||||
prefix=prefix,
|
||||
suffix=suffix,
|
||||
input_variables=input_variables,
|
||||
**prompt_params
|
||||
)
|
||||
llm_chain = LLMChain(
|
||||
llm=llm,
|
||||
prompt=prompt,
|
||||
callback_manager=callback_manager,
|
||||
)
|
||||
tool_names = [tool.name for tool in tools]
|
||||
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
|
||||
return AgentExecutor.from_agent_and_tools(
|
||||
agent=agent,
|
||||
tools=tools,
|
||||
callback_manager=callback_manager,
|
||||
verbose=verbose,
|
||||
return_intermediate_steps=return_intermediate_steps,
|
||||
max_iterations=max_iterations,
|
||||
max_execution_time=max_execution_time,
|
||||
early_stopping_method=early_stopping_method,
|
||||
**(agent_executor_kwargs or {}),
|
||||
)
|
||||
@@ -0,0 +1,366 @@
|
||||
"""Agent that interacts with OpenAPI APIs via a hierarchical planning approach."""
|
||||
import json
|
||||
import re
|
||||
from functools import partial
|
||||
from typing import Any, Callable, Dict, List, Optional, TYPE_CHECKING
|
||||
|
||||
import yaml
|
||||
from langchain_core.callbacks import BaseCallbackManager
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.prompts import BasePromptTemplate, PromptTemplate
|
||||
from langchain_core.pydantic_v1 import Field
|
||||
from langchain_core.tools import BaseTool, Tool
|
||||
from langchain_openai.llms import OpenAI
|
||||
|
||||
from langchain_community.agent_toolkits.openapi.planner_prompt import (
|
||||
API_CONTROLLER_PROMPT,
|
||||
API_CONTROLLER_TOOL_DESCRIPTION,
|
||||
API_CONTROLLER_TOOL_NAME,
|
||||
API_ORCHESTRATOR_PROMPT,
|
||||
API_PLANNER_PROMPT,
|
||||
API_PLANNER_TOOL_DESCRIPTION,
|
||||
API_PLANNER_TOOL_NAME,
|
||||
PARSING_DELETE_PROMPT,
|
||||
PARSING_GET_PROMPT,
|
||||
PARSING_PATCH_PROMPT,
|
||||
PARSING_POST_PROMPT,
|
||||
PARSING_PUT_PROMPT,
|
||||
REQUESTS_DELETE_TOOL_DESCRIPTION,
|
||||
REQUESTS_GET_TOOL_DESCRIPTION,
|
||||
REQUESTS_PATCH_TOOL_DESCRIPTION,
|
||||
REQUESTS_POST_TOOL_DESCRIPTION,
|
||||
REQUESTS_PUT_TOOL_DESCRIPTION,
|
||||
)
|
||||
from langchain_community.agent_toolkits.openapi.spec import ReducedOpenAPISpec
|
||||
from langchain_community.output_parsers.json import parse_json_markdown
|
||||
from langchain_community.tools.requests.tool import BaseRequestsTool
|
||||
from langchain_community.utilities.requests import RequestsWrapper
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain.agents.agent import AgentExecutor
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.memory import ReadOnlySharedMemory
|
||||
|
||||
#
|
||||
# Requests tools with LLM-instructed extraction of truncated responses.
|
||||
#
|
||||
# Of course, truncating so bluntly may lose a lot of valuable
|
||||
# information in the response.
|
||||
# However, the goal for now is to have only a single inference step.
|
||||
MAX_RESPONSE_LENGTH = 5000
|
||||
"""Maximum length of the response to be returned."""
|
||||
|
||||
|
||||
def _get_default_llm_chain(prompt: BasePromptTemplate) -> LLMChain:
|
||||
from langchain.chains.llm import LLMChain
|
||||
return LLMChain(
|
||||
llm=OpenAI(),
|
||||
prompt=prompt,
|
||||
)
|
||||
|
||||
|
||||
def _get_default_llm_chain_factory(
|
||||
prompt: BasePromptTemplate,
|
||||
) -> Callable[[], LLMChain]:
|
||||
"""Returns a default LLMChain factory."""
|
||||
return partial(_get_default_llm_chain, prompt)
|
||||
|
||||
|
||||
class RequestsGetToolWithParsing(BaseRequestsTool, BaseTool):
|
||||
"""Requests GET tool with LLM-instructed extraction of truncated responses."""
|
||||
|
||||
name: str = "requests_get"
|
||||
"""Tool name."""
|
||||
description = REQUESTS_GET_TOOL_DESCRIPTION
|
||||
"""Tool description."""
|
||||
response_length: Optional[int] = MAX_RESPONSE_LENGTH
|
||||
"""Maximum length of the response to be returned."""
|
||||
llm_chain: Any = Field(
|
||||
default_factory=_get_default_llm_chain_factory(PARSING_GET_PROMPT)
|
||||
)
|
||||
"""LLMChain used to extract the response."""
|
||||
|
||||
def _run(self, text: str) -> str:
|
||||
try:
|
||||
data = parse_json_markdown(text)
|
||||
except json.JSONDecodeError as e:
|
||||
raise e
|
||||
data_params = data.get("params")
|
||||
response = self.requests_wrapper.get(data["url"], params=data_params)
|
||||
response = response[: self.response_length]
|
||||
return self.llm_chain.predict(
|
||||
response=response, instructions=data["output_instructions"]
|
||||
).strip()
|
||||
|
||||
async def _arun(self, text: str) -> str:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class RequestsPostToolWithParsing(BaseRequestsTool, BaseTool):
|
||||
"""Requests POST tool with LLM-instructed extraction of truncated responses."""
|
||||
|
||||
name: str = "requests_post"
|
||||
"""Tool name."""
|
||||
description = REQUESTS_POST_TOOL_DESCRIPTION
|
||||
"""Tool description."""
|
||||
response_length: Optional[int] = MAX_RESPONSE_LENGTH
|
||||
"""Maximum length of the response to be returned."""
|
||||
llm_chain: Any = Field(
|
||||
default_factory=_get_default_llm_chain_factory(PARSING_POST_PROMPT)
|
||||
)
|
||||
"""LLMChain used to extract the response."""
|
||||
|
||||
def _run(self, text: str) -> str:
|
||||
try:
|
||||
data = parse_json_markdown(text)
|
||||
except json.JSONDecodeError as e:
|
||||
raise e
|
||||
response = self.requests_wrapper.post(data["url"], data["data"])
|
||||
response = response[: self.response_length]
|
||||
return self.llm_chain.predict(
|
||||
response=response, instructions=data["output_instructions"]
|
||||
).strip()
|
||||
|
||||
async def _arun(self, text: str) -> str:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class RequestsPatchToolWithParsing(BaseRequestsTool, BaseTool):
|
||||
"""Requests PATCH tool with LLM-instructed extraction of truncated responses."""
|
||||
|
||||
name: str = "requests_patch"
|
||||
"""Tool name."""
|
||||
description = REQUESTS_PATCH_TOOL_DESCRIPTION
|
||||
"""Tool description."""
|
||||
response_length: Optional[int] = MAX_RESPONSE_LENGTH
|
||||
"""Maximum length of the response to be returned."""
|
||||
llm_chain: Any = Field(
|
||||
default_factory=_get_default_llm_chain_factory(PARSING_PATCH_PROMPT)
|
||||
)
|
||||
"""LLMChain used to extract the response."""
|
||||
|
||||
def _run(self, text: str) -> str:
|
||||
try:
|
||||
data = parse_json_markdown(text)
|
||||
except json.JSONDecodeError as e:
|
||||
raise e
|
||||
response = self.requests_wrapper.patch(data["url"], data["data"])
|
||||
response = response[: self.response_length]
|
||||
return self.llm_chain.predict(
|
||||
response=response, instructions=data["output_instructions"]
|
||||
).strip()
|
||||
|
||||
async def _arun(self, text: str) -> str:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class RequestsPutToolWithParsing(BaseRequestsTool, BaseTool):
|
||||
"""Requests PUT tool with LLM-instructed extraction of truncated responses."""
|
||||
|
||||
name: str = "requests_put"
|
||||
"""Tool name."""
|
||||
description = REQUESTS_PUT_TOOL_DESCRIPTION
|
||||
"""Tool description."""
|
||||
response_length: Optional[int] = MAX_RESPONSE_LENGTH
|
||||
"""Maximum length of the response to be returned."""
|
||||
llm_chain: Any = Field(
|
||||
default_factory=_get_default_llm_chain_factory(PARSING_PUT_PROMPT)
|
||||
)
|
||||
"""LLMChain used to extract the response."""
|
||||
|
||||
def _run(self, text: str) -> str:
|
||||
try:
|
||||
data = parse_json_markdown(text)
|
||||
except json.JSONDecodeError as e:
|
||||
raise e
|
||||
response = self.requests_wrapper.put(data["url"], data["data"])
|
||||
response = response[: self.response_length]
|
||||
return self.llm_chain.predict(
|
||||
response=response, instructions=data["output_instructions"]
|
||||
).strip()
|
||||
|
||||
async def _arun(self, text: str) -> str:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class RequestsDeleteToolWithParsing(BaseRequestsTool, BaseTool):
|
||||
"""A tool that sends a DELETE request and parses the response."""
|
||||
|
||||
name: str = "requests_delete"
|
||||
"""The name of the tool."""
|
||||
description = REQUESTS_DELETE_TOOL_DESCRIPTION
|
||||
"""The description of the tool."""
|
||||
|
||||
response_length: Optional[int] = MAX_RESPONSE_LENGTH
|
||||
"""The maximum length of the response."""
|
||||
llm_chain: Any = Field(
|
||||
default_factory=_get_default_llm_chain_factory(PARSING_DELETE_PROMPT)
|
||||
)
|
||||
"""The LLM chain used to parse the response."""
|
||||
|
||||
def _run(self, text: str) -> str:
|
||||
try:
|
||||
data = parse_json_markdown(text)
|
||||
except json.JSONDecodeError as e:
|
||||
raise e
|
||||
response = self.requests_wrapper.delete(data["url"])
|
||||
response = response[: self.response_length]
|
||||
return self.llm_chain.predict(
|
||||
response=response, instructions=data["output_instructions"]
|
||||
).strip()
|
||||
|
||||
async def _arun(self, text: str) -> str:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
#
|
||||
# Orchestrator, planner, controller.
|
||||
#
|
||||
def _create_api_planner_tool(
|
||||
api_spec: ReducedOpenAPISpec, llm: BaseLanguageModel
|
||||
) -> Tool:
|
||||
from langchain.chains.llm import LLMChain
|
||||
endpoint_descriptions = [
|
||||
f"{name} {description}" for name, description, _ in api_spec.endpoints
|
||||
]
|
||||
prompt = PromptTemplate(
|
||||
template=API_PLANNER_PROMPT,
|
||||
input_variables=["query"],
|
||||
partial_variables={"endpoints": "- " + "- ".join(endpoint_descriptions)},
|
||||
)
|
||||
chain = LLMChain(llm=llm, prompt=prompt)
|
||||
tool = Tool(
|
||||
name=API_PLANNER_TOOL_NAME,
|
||||
description=API_PLANNER_TOOL_DESCRIPTION,
|
||||
func=chain.run,
|
||||
)
|
||||
return tool
|
||||
|
||||
|
||||
def _create_api_controller_agent(
|
||||
api_url: str,
|
||||
api_docs: str,
|
||||
requests_wrapper: RequestsWrapper,
|
||||
llm: BaseLanguageModel,
|
||||
) -> AgentExecutor:
|
||||
from langchain.agents.mrkl.base import ZeroShotAgent
|
||||
from langchain.agents.agent import AgentExecutor
|
||||
from langchain.chains.llm import LLMChain
|
||||
get_llm_chain = LLMChain(llm=llm, prompt=PARSING_GET_PROMPT)
|
||||
post_llm_chain = LLMChain(llm=llm, prompt=PARSING_POST_PROMPT)
|
||||
tools: List[BaseTool] = [
|
||||
RequestsGetToolWithParsing(
|
||||
requests_wrapper=requests_wrapper, llm_chain=get_llm_chain
|
||||
),
|
||||
RequestsPostToolWithParsing(
|
||||
requests_wrapper=requests_wrapper, llm_chain=post_llm_chain
|
||||
),
|
||||
]
|
||||
prompt = PromptTemplate(
|
||||
template=API_CONTROLLER_PROMPT,
|
||||
input_variables=["input", "agent_scratchpad"],
|
||||
partial_variables={
|
||||
"api_url": api_url,
|
||||
"api_docs": api_docs,
|
||||
"tool_names": ", ".join([tool.name for tool in tools]),
|
||||
"tool_descriptions": "\n".join(
|
||||
[f"{tool.name}: {tool.description}" for tool in tools]
|
||||
),
|
||||
},
|
||||
)
|
||||
agent = ZeroShotAgent(
|
||||
llm_chain=LLMChain(llm=llm, prompt=prompt),
|
||||
allowed_tools=[tool.name for tool in tools],
|
||||
)
|
||||
return AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)
|
||||
|
||||
|
||||
def _create_api_controller_tool(
|
||||
api_spec: ReducedOpenAPISpec,
|
||||
requests_wrapper: RequestsWrapper,
|
||||
llm: BaseLanguageModel,
|
||||
) -> Tool:
|
||||
"""Expose controller as a tool.
|
||||
|
||||
The tool is invoked with a plan from the planner, and dynamically
|
||||
creates a controller agent with relevant documentation only to
|
||||
constrain the context.
|
||||
"""
|
||||
|
||||
base_url = api_spec.servers[0]["url"] # TODO: do better.
|
||||
|
||||
def _create_and_run_api_controller_agent(plan_str: str) -> str:
|
||||
pattern = r"\b(GET|POST|PATCH|DELETE)\s+(/\S+)*"
|
||||
matches = re.findall(pattern, plan_str)
|
||||
endpoint_names = [
|
||||
"{method} {route}".format(method=method, route=route.split("?")[0])
|
||||
for method, route in matches
|
||||
]
|
||||
docs_str = ""
|
||||
for endpoint_name in endpoint_names:
|
||||
found_match = False
|
||||
for name, _, docs in api_spec.endpoints:
|
||||
regex_name = re.compile(re.sub("\{.*?\}", ".*", name))
|
||||
if regex_name.match(endpoint_name):
|
||||
found_match = True
|
||||
docs_str += f"== Docs for {endpoint_name} == \n{yaml.dump(docs)}\n"
|
||||
if not found_match:
|
||||
raise ValueError(f"{endpoint_name} endpoint does not exist.")
|
||||
|
||||
agent = _create_api_controller_agent(base_url, docs_str, requests_wrapper, llm)
|
||||
return agent.run(plan_str)
|
||||
|
||||
return Tool(
|
||||
name=API_CONTROLLER_TOOL_NAME,
|
||||
func=_create_and_run_api_controller_agent,
|
||||
description=API_CONTROLLER_TOOL_DESCRIPTION,
|
||||
)
|
||||
|
||||
|
||||
def create_openapi_agent(
|
||||
api_spec: ReducedOpenAPISpec,
|
||||
requests_wrapper: RequestsWrapper,
|
||||
llm: BaseLanguageModel,
|
||||
shared_memory: Optional[ReadOnlySharedMemory] = None,
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
verbose: bool = True,
|
||||
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> AgentExecutor:
|
||||
"""Instantiate OpenAI API planner and controller for a given spec.
|
||||
|
||||
Inject credentials via requests_wrapper.
|
||||
|
||||
We use a top-level "orchestrator" agent to invoke the planner and controller,
|
||||
rather than a top-level planner
|
||||
that invokes a controller with its plan. This is to keep the planner simple.
|
||||
"""
|
||||
from langchain.agents.mrkl.base import ZeroShotAgent
|
||||
from langchain.agents.agent import AgentExecutor
|
||||
from langchain.chains.llm import LLMChain
|
||||
tools = [
|
||||
_create_api_planner_tool(api_spec, llm),
|
||||
_create_api_controller_tool(api_spec, requests_wrapper, llm),
|
||||
]
|
||||
prompt = PromptTemplate(
|
||||
template=API_ORCHESTRATOR_PROMPT,
|
||||
input_variables=["input", "agent_scratchpad"],
|
||||
partial_variables={
|
||||
"tool_names": ", ".join([tool.name for tool in tools]),
|
||||
"tool_descriptions": "\n".join(
|
||||
[f"{tool.name}: {tool.description}" for tool in tools]
|
||||
),
|
||||
},
|
||||
)
|
||||
agent = ZeroShotAgent(
|
||||
llm_chain=LLMChain(llm=llm, prompt=prompt, memory=shared_memory),
|
||||
allowed_tools=[tool.name for tool in tools],
|
||||
**kwargs,
|
||||
)
|
||||
return AgentExecutor.from_agent_and_tools(
|
||||
agent=agent,
|
||||
tools=tools,
|
||||
callback_manager=callback_manager,
|
||||
verbose=verbose,
|
||||
**(agent_executor_kwargs or {}),
|
||||
)
|
||||
@@ -0,0 +1,90 @@
|
||||
"""Requests toolkit."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, List
|
||||
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.tools import Tool
|
||||
|
||||
from langchain_community.agent_toolkits.base import BaseToolkit
|
||||
from langchain_community.agent_toolkits.json.base import create_json_agent
|
||||
from langchain_community.agent_toolkits.json.toolkit import JsonToolkit
|
||||
from langchain_community.agent_toolkits.openapi.prompt import DESCRIPTION
|
||||
from langchain_community.tools import BaseTool
|
||||
from langchain_community.tools.json.tool import JsonSpec
|
||||
from langchain_community.tools.requests.tool import (
|
||||
RequestsDeleteTool,
|
||||
RequestsGetTool,
|
||||
RequestsPatchTool,
|
||||
RequestsPostTool,
|
||||
RequestsPutTool,
|
||||
)
|
||||
from langchain_community.utilities.requests import TextRequestsWrapper
|
||||
|
||||
|
||||
class RequestsToolkit(BaseToolkit):
|
||||
"""Toolkit for making REST requests.
|
||||
|
||||
*Security Note*: This toolkit contains tools to make GET, POST, PATCH, PUT,
|
||||
and DELETE requests to an API.
|
||||
|
||||
Exercise care in who is allowed to use this toolkit. If exposing
|
||||
to end users, consider that users will be able to make arbitrary
|
||||
requests on behalf of the server hosting the code. For example,
|
||||
users could ask the server to make a request to a private API
|
||||
that is only accessible from the server.
|
||||
|
||||
Control access to who can submit issue requests using this toolkit and
|
||||
what network access it has.
|
||||
|
||||
See https://python.langchain.com/docs/security for more information.
|
||||
"""
|
||||
|
||||
requests_wrapper: TextRequestsWrapper
|
||||
|
||||
def get_tools(self) -> List[BaseTool]:
|
||||
"""Return a list of tools."""
|
||||
return [
|
||||
RequestsGetTool(requests_wrapper=self.requests_wrapper),
|
||||
RequestsPostTool(requests_wrapper=self.requests_wrapper),
|
||||
RequestsPatchTool(requests_wrapper=self.requests_wrapper),
|
||||
RequestsPutTool(requests_wrapper=self.requests_wrapper),
|
||||
RequestsDeleteTool(requests_wrapper=self.requests_wrapper),
|
||||
]
|
||||
|
||||
|
||||
class OpenAPIToolkit(BaseToolkit):
|
||||
"""Toolkit for interacting with an OpenAPI API.
|
||||
|
||||
*Security Note*: This toolkit contains tools that can read and modify
|
||||
the state of a service; e.g., by creating, deleting, or updating,
|
||||
reading underlying data.
|
||||
|
||||
For example, this toolkit can be used to delete data exposed via
|
||||
an OpenAPI compliant API.
|
||||
"""
|
||||
|
||||
json_agent: Any
|
||||
requests_wrapper: TextRequestsWrapper
|
||||
|
||||
def get_tools(self) -> List[BaseTool]:
|
||||
"""Get the tools in the toolkit."""
|
||||
json_agent_tool = Tool(
|
||||
name="json_explorer",
|
||||
func=self.json_agent.run,
|
||||
description=DESCRIPTION,
|
||||
)
|
||||
request_toolkit = RequestsToolkit(requests_wrapper=self.requests_wrapper)
|
||||
return [*request_toolkit.get_tools(), json_agent_tool]
|
||||
|
||||
@classmethod
|
||||
def from_llm(
|
||||
cls,
|
||||
llm: BaseLanguageModel,
|
||||
json_spec: JsonSpec,
|
||||
requests_wrapper: TextRequestsWrapper,
|
||||
**kwargs: Any,
|
||||
) -> OpenAPIToolkit:
|
||||
"""Create json agent from llm, then initialize."""
|
||||
json_agent = create_json_agent(llm, JsonToolkit(spec=json_spec), **kwargs)
|
||||
return cls(json_agent=json_agent, requests_wrapper=requests_wrapper)
|
||||
@@ -0,0 +1,68 @@
|
||||
"""Power BI agent."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional, TYPE_CHECKING
|
||||
|
||||
from langchain_core.callbacks import BaseCallbackManager
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
|
||||
from langchain_community.agent_toolkits.powerbi.prompt import (
|
||||
POWERBI_PREFIX,
|
||||
POWERBI_SUFFIX,
|
||||
)
|
||||
from langchain_community.agent_toolkits.powerbi.toolkit import PowerBIToolkit
|
||||
from langchain_community.utilities.powerbi import PowerBIDataset
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain.agents import AgentExecutor
|
||||
|
||||
|
||||
def create_pbi_agent(
|
||||
llm: BaseLanguageModel,
|
||||
toolkit: Optional[PowerBIToolkit] = None,
|
||||
powerbi: Optional[PowerBIDataset] = None,
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
prefix: str = POWERBI_PREFIX,
|
||||
suffix: str = POWERBI_SUFFIX,
|
||||
format_instructions: Optional[str] = None,
|
||||
examples: Optional[str] = None,
|
||||
input_variables: Optional[List[str]] = None,
|
||||
top_k: int = 10,
|
||||
verbose: bool = False,
|
||||
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> AgentExecutor:
|
||||
"""Construct a Power BI agent from an LLM and tools."""
|
||||
from langchain.agents.mrkl.base import ZeroShotAgent
|
||||
from langchain.agents import AgentExecutor
|
||||
from langchain.chains.llm import LLMChain
|
||||
if toolkit is None:
|
||||
if powerbi is None:
|
||||
raise ValueError("Must provide either a toolkit or powerbi dataset")
|
||||
toolkit = PowerBIToolkit(powerbi=powerbi, llm=llm, examples=examples)
|
||||
tools = toolkit.get_tools()
|
||||
tables = powerbi.table_names if powerbi else toolkit.powerbi.table_names
|
||||
prompt_params = {"format_instructions": format_instructions} if format_instructions is not None else {}
|
||||
agent = ZeroShotAgent(
|
||||
llm_chain=LLMChain(
|
||||
llm=llm,
|
||||
prompt=ZeroShotAgent.create_prompt(
|
||||
tools,
|
||||
prefix=prefix.format(top_k=top_k).format(tables=tables),
|
||||
suffix=suffix,
|
||||
input_variables=input_variables,
|
||||
**prompt_params,
|
||||
),
|
||||
callback_manager=callback_manager, # type: ignore
|
||||
verbose=verbose,
|
||||
),
|
||||
allowed_tools=[tool.name for tool in tools],
|
||||
**kwargs,
|
||||
)
|
||||
return AgentExecutor.from_agent_and_tools(
|
||||
agent=agent,
|
||||
tools=tools,
|
||||
callback_manager=callback_manager,
|
||||
verbose=verbose,
|
||||
**(agent_executor_kwargs or {}),
|
||||
)
|
||||
@@ -0,0 +1,69 @@
|
||||
"""Power BI agent."""
|
||||
from __future__ import annotations
|
||||
from typing import Any, Dict, List, Optional, TYPE_CHECKING
|
||||
|
||||
from langchain_core.callbacks import BaseCallbackManager
|
||||
from langchain_core.language_models.chat_models import BaseChatModel
|
||||
|
||||
from langchain_community.agent_toolkits.powerbi.prompt import (
|
||||
POWERBI_CHAT_PREFIX,
|
||||
POWERBI_CHAT_SUFFIX,
|
||||
)
|
||||
from langchain_community.agent_toolkits.powerbi.toolkit import PowerBIToolkit
|
||||
from langchain_community.utilities.powerbi import PowerBIDataset
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain.agents import AgentExecutor
|
||||
from langchain.agents.agent import AgentOutputParser
|
||||
from langchain.memory.chat_memory import BaseChatMemory
|
||||
|
||||
|
||||
def create_pbi_chat_agent(
|
||||
llm: BaseChatModel,
|
||||
toolkit: Optional[PowerBIToolkit] = None,
|
||||
powerbi: Optional[PowerBIDataset] = None,
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
output_parser: Optional[AgentOutputParser] = None,
|
||||
prefix: str = POWERBI_CHAT_PREFIX,
|
||||
suffix: str = POWERBI_CHAT_SUFFIX,
|
||||
examples: Optional[str] = None,
|
||||
input_variables: Optional[List[str]] = None,
|
||||
memory: Optional[BaseChatMemory] = None,
|
||||
top_k: int = 10,
|
||||
verbose: bool = False,
|
||||
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> AgentExecutor:
|
||||
"""Construct a Power BI agent from a Chat LLM and tools.
|
||||
|
||||
If you supply only a toolkit and no Power BI dataset, the same LLM is used for both.
|
||||
"""
|
||||
from langchain.agents import AgentExecutor
|
||||
from langchain.agents.conversational_chat.base import ConversationalChatAgent
|
||||
from langchain.memory import ConversationBufferMemory
|
||||
if toolkit is None:
|
||||
if powerbi is None:
|
||||
raise ValueError("Must provide either a toolkit or powerbi dataset")
|
||||
toolkit = PowerBIToolkit(powerbi=powerbi, llm=llm, examples=examples)
|
||||
tools = toolkit.get_tools()
|
||||
tables = powerbi.table_names if powerbi else toolkit.powerbi.table_names
|
||||
agent = ConversationalChatAgent.from_llm_and_tools(
|
||||
llm=llm,
|
||||
tools=tools,
|
||||
system_message=prefix.format(top_k=top_k).format(tables=tables),
|
||||
human_message=suffix,
|
||||
input_variables=input_variables,
|
||||
callback_manager=callback_manager,
|
||||
output_parser=output_parser,
|
||||
verbose=verbose,
|
||||
**kwargs,
|
||||
)
|
||||
return AgentExecutor.from_agent_and_tools(
|
||||
agent=agent,
|
||||
tools=tools,
|
||||
callback_manager=callback_manager,
|
||||
memory=memory
|
||||
or ConversationBufferMemory(memory_key="chat_history", return_messages=True),
|
||||
verbose=verbose,
|
||||
**(agent_executor_kwargs or {}),
|
||||
)
|
||||
@@ -0,0 +1,106 @@
|
||||
"""Toolkit for interacting with a Power BI dataset."""
|
||||
from __future__ import annotations
|
||||
from typing import List, Optional, Union, TYPE_CHECKING
|
||||
|
||||
from langchain_core.callbacks import BaseCallbackManager
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.language_models.chat_models import BaseChatModel
|
||||
from langchain_core.prompts import PromptTemplate
|
||||
from langchain_core.prompts.chat import (
|
||||
ChatPromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
SystemMessagePromptTemplate,
|
||||
)
|
||||
from langchain_core.pydantic_v1 import Field
|
||||
|
||||
from langchain_community.agent_toolkits.base import BaseToolkit
|
||||
from langchain_community.tools import BaseTool
|
||||
from langchain_community.tools.powerbi.prompt import (
|
||||
QUESTION_TO_QUERY_BASE,
|
||||
SINGLE_QUESTION_TO_QUERY,
|
||||
USER_INPUT,
|
||||
)
|
||||
from langchain_community.tools.powerbi.tool import (
|
||||
InfoPowerBITool,
|
||||
ListPowerBITool,
|
||||
QueryPowerBITool,
|
||||
)
|
||||
from langchain_community.utilities.powerbi import PowerBIDataset
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain.chains.llm import LLMChain
|
||||
|
||||
|
||||
class PowerBIToolkit(BaseToolkit):
|
||||
"""Toolkit for interacting with Power BI dataset.
|
||||
|
||||
*Security Note*: This toolkit interacts with an external service.
|
||||
|
||||
Control access to who can use this toolkit.
|
||||
|
||||
Make sure that the capabilities given by this toolkit to the calling
|
||||
code are appropriately scoped to the application.
|
||||
|
||||
See https://python.langchain.com/docs/security for more information.
|
||||
"""
|
||||
|
||||
powerbi: PowerBIDataset = Field(exclude=True)
|
||||
llm: Union[BaseLanguageModel, BaseChatModel] = Field(exclude=True)
|
||||
examples: Optional[str] = None
|
||||
max_iterations: int = 5
|
||||
callback_manager: Optional[BaseCallbackManager] = None
|
||||
output_token_limit: Optional[int] = None
|
||||
tiktoken_model_name: Optional[str] = None
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
def get_tools(self) -> List[BaseTool]:
|
||||
"""Get the tools in the toolkit."""
|
||||
return [
|
||||
QueryPowerBITool(
|
||||
llm_chain=self._get_chain(),
|
||||
powerbi=self.powerbi,
|
||||
examples=self.examples,
|
||||
max_iterations=self.max_iterations,
|
||||
output_token_limit=self.output_token_limit,
|
||||
tiktoken_model_name=self.tiktoken_model_name,
|
||||
),
|
||||
InfoPowerBITool(powerbi=self.powerbi),
|
||||
ListPowerBITool(powerbi=self.powerbi),
|
||||
]
|
||||
|
||||
def _get_chain(self) -> LLMChain:
|
||||
"""Construct the chain based on the callback manager and model type."""
|
||||
from langchain.chains.llm import LLMChain
|
||||
if isinstance(self.llm, BaseLanguageModel):
|
||||
return LLMChain(
|
||||
llm=self.llm,
|
||||
callback_manager=self.callback_manager
|
||||
if self.callback_manager
|
||||
else None,
|
||||
prompt=PromptTemplate(
|
||||
template=SINGLE_QUESTION_TO_QUERY,
|
||||
input_variables=["tool_input", "tables", "schemas", "examples"],
|
||||
),
|
||||
)
|
||||
|
||||
system_prompt = SystemMessagePromptTemplate(
|
||||
prompt=PromptTemplate(
|
||||
template=QUESTION_TO_QUERY_BASE,
|
||||
input_variables=["tables", "schemas", "examples"],
|
||||
)
|
||||
)
|
||||
human_prompt = HumanMessagePromptTemplate(
|
||||
prompt=PromptTemplate(
|
||||
template=USER_INPUT,
|
||||
input_variables=["tool_input"],
|
||||
)
|
||||
)
|
||||
return LLMChain(
|
||||
llm=self.llm,
|
||||
callback_manager=self.callback_manager if self.callback_manager else None,
|
||||
prompt=ChatPromptTemplate.from_messages([system_prompt, human_prompt]),
|
||||
)
|
||||
@@ -0,0 +1,64 @@
|
||||
"""Spark SQL agent."""
|
||||
from __future__ import annotations
|
||||
from typing import Any, Dict, List, Optional, TYPE_CHECKING
|
||||
|
||||
from langchain_core.callbacks import BaseCallbackManager, Callbacks
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
|
||||
from langchain_community.agent_toolkits.spark_sql.prompt import SQL_PREFIX, SQL_SUFFIX
|
||||
from langchain_community.agent_toolkits.spark_sql.toolkit import SparkSQLToolkit
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain.agents.agent import AgentExecutor
|
||||
|
||||
|
||||
def create_spark_sql_agent(
|
||||
llm: BaseLanguageModel,
|
||||
toolkit: SparkSQLToolkit,
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
callbacks: Callbacks = None,
|
||||
prefix: str = SQL_PREFIX,
|
||||
suffix: str = SQL_SUFFIX,
|
||||
format_instructions: Optional[str] = None,
|
||||
input_variables: Optional[List[str]] = None,
|
||||
top_k: int = 10,
|
||||
max_iterations: Optional[int] = 15,
|
||||
max_execution_time: Optional[float] = None,
|
||||
early_stopping_method: str = "force",
|
||||
verbose: bool = False,
|
||||
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> AgentExecutor:
|
||||
"""Construct a Spark SQL agent from an LLM and tools."""
|
||||
from langchain.agents.agent import AgentExecutor
|
||||
from langchain.agents.mrkl.base import ZeroShotAgent
|
||||
from langchain.chains.llm import LLMChain
|
||||
tools = toolkit.get_tools()
|
||||
prefix = prefix.format(top_k=top_k)
|
||||
prompt_params = {"format_instructions": format_instructions} if format_instructions is not None else {}
|
||||
prompt = ZeroShotAgent.create_prompt(
|
||||
tools,
|
||||
prefix=prefix,
|
||||
suffix=suffix,
|
||||
input_variables=input_variables,
|
||||
**prompt_params,
|
||||
)
|
||||
llm_chain = LLMChain(
|
||||
llm=llm,
|
||||
prompt=prompt,
|
||||
callback_manager=callback_manager,
|
||||
callbacks=callbacks,
|
||||
)
|
||||
tool_names = [tool.name for tool in tools]
|
||||
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
|
||||
return AgentExecutor.from_agent_and_tools(
|
||||
agent=agent,
|
||||
tools=tools,
|
||||
callback_manager=callback_manager,
|
||||
callbacks=callbacks,
|
||||
verbose=verbose,
|
||||
max_iterations=max_iterations,
|
||||
max_execution_time=max_execution_time,
|
||||
early_stopping_method=early_stopping_method,
|
||||
**(agent_executor_kwargs or {}),
|
||||
)
|
||||
@@ -0,0 +1,102 @@
|
||||
"""SQL agent."""
|
||||
from __future__ import annotations
|
||||
from typing import Any, Dict, List, Optional, Sequence, TYPE_CHECKING
|
||||
|
||||
from langchain_core.callbacks import BaseCallbackManager
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.messages import AIMessage, SystemMessage
|
||||
from langchain_core.prompts.chat import (
|
||||
ChatPromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
MessagesPlaceholder,
|
||||
)
|
||||
|
||||
from langchain_community.agent_toolkits.sql.prompt import (
|
||||
SQL_FUNCTIONS_SUFFIX,
|
||||
SQL_PREFIX,
|
||||
SQL_SUFFIX,
|
||||
)
|
||||
from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
|
||||
from langchain_community.tools import BaseTool
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain.agents.agent import AgentExecutor
|
||||
from langchain.agents.agent_types import AgentType
|
||||
|
||||
|
||||
def create_sql_agent(
|
||||
llm: BaseLanguageModel,
|
||||
toolkit: SQLDatabaseToolkit,
|
||||
agent_type: Optional[AgentType] = None,
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
prefix: str = SQL_PREFIX,
|
||||
suffix: Optional[str] = None,
|
||||
format_instructions: Optional[str] = None,
|
||||
input_variables: Optional[List[str]] = None,
|
||||
top_k: int = 10,
|
||||
max_iterations: Optional[int] = 15,
|
||||
max_execution_time: Optional[float] = None,
|
||||
early_stopping_method: str = "force",
|
||||
verbose: bool = False,
|
||||
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
|
||||
extra_tools: Sequence[BaseTool] = (),
|
||||
**kwargs: Any,
|
||||
) -> AgentExecutor:
|
||||
"""Construct an SQL agent from an LLM and tools."""
|
||||
from langchain.agents.agent import AgentExecutor, BaseSingleActionAgent
|
||||
from langchain.agents.agent_types import AgentType
|
||||
from langchain.agents.mrkl.base import ZeroShotAgent
|
||||
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
|
||||
from langchain.chains.llm import LLMChain
|
||||
agent_type = agent_type or AgentType.ZERO_SHOT_REACT_DESCRIPTION
|
||||
tools = toolkit.get_tools() + list(extra_tools)
|
||||
prefix = prefix.format(dialect=toolkit.dialect, top_k=top_k)
|
||||
agent: BaseSingleActionAgent
|
||||
|
||||
if agent_type == AgentType.ZERO_SHOT_REACT_DESCRIPTION:
|
||||
prompt_params = {"format_instructions": format_instructions} if format_instructions is not None else {}
|
||||
prompt = ZeroShotAgent.create_prompt(
|
||||
tools,
|
||||
prefix=prefix,
|
||||
suffix=suffix or SQL_SUFFIX,
|
||||
input_variables=input_variables,
|
||||
**prompt_params,
|
||||
)
|
||||
llm_chain = LLMChain(
|
||||
llm=llm,
|
||||
prompt=prompt,
|
||||
callback_manager=callback_manager,
|
||||
)
|
||||
tool_names = [tool.name for tool in tools]
|
||||
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
|
||||
|
||||
elif agent_type == AgentType.OPENAI_FUNCTIONS:
|
||||
messages = [
|
||||
SystemMessage(content=prefix),
|
||||
HumanMessagePromptTemplate.from_template("{input}"),
|
||||
AIMessage(content=suffix or SQL_FUNCTIONS_SUFFIX),
|
||||
MessagesPlaceholder(variable_name="agent_scratchpad"),
|
||||
]
|
||||
input_variables = ["input", "agent_scratchpad"]
|
||||
_prompt = ChatPromptTemplate(input_variables=input_variables, messages=messages)
|
||||
|
||||
agent = OpenAIFunctionsAgent(
|
||||
llm=llm,
|
||||
prompt=_prompt,
|
||||
tools=tools,
|
||||
callback_manager=callback_manager,
|
||||
**kwargs,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Agent type {agent_type} not supported at the moment.")
|
||||
|
||||
return AgentExecutor.from_agent_and_tools(
|
||||
agent=agent,
|
||||
tools=tools,
|
||||
callback_manager=callback_manager,
|
||||
verbose=verbose,
|
||||
max_iterations=max_iterations,
|
||||
max_execution_time=max_execution_time,
|
||||
early_stopping_method=early_stopping_method,
|
||||
**(agent_executor_kwargs or {}),
|
||||
)
|
||||
@@ -0,0 +1,103 @@
|
||||
"""VectorStore agent."""
|
||||
from __future__ import annotations
|
||||
from typing import Any, Dict, Optional, TYPE_CHECKING
|
||||
|
||||
from langchain_core.callbacks import BaseCallbackManager
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
|
||||
from langchain_community.agent_toolkits.vectorstore.prompt import PREFIX, ROUTER_PREFIX
|
||||
from langchain_community.agent_toolkits.vectorstore.toolkit import (
|
||||
VectorStoreRouterToolkit,
|
||||
VectorStoreToolkit,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain.agents.agent import AgentExecutor
|
||||
|
||||
|
||||
def create_vectorstore_agent(
|
||||
llm: BaseLanguageModel,
|
||||
toolkit: VectorStoreToolkit,
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
prefix: str = PREFIX,
|
||||
verbose: bool = False,
|
||||
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> AgentExecutor:
|
||||
"""Construct a VectorStore agent from an LLM and tools.
|
||||
|
||||
Args:
|
||||
llm (BaseLanguageModel): LLM that will be used by the agent
|
||||
toolkit (VectorStoreToolkit): Set of tools for the agent
|
||||
callback_manager (Optional[BaseCallbackManager], optional): Object to handle the callback [ Defaults to None. ]
|
||||
prefix (str, optional): The prefix prompt for the agent. If not provided uses default PREFIX.
|
||||
verbose (bool, optional): If you want to see the content of the scratchpad. [ Defaults to False ]
|
||||
agent_executor_kwargs (Optional[Dict[str, Any]], optional): If there is any other parameter you want to send to the agent. [ Defaults to None ]
|
||||
**kwargs: Additional named parameters to pass to the ZeroShotAgent.
|
||||
|
||||
Returns:
|
||||
AgentExecutor: Returns a callable AgentExecutor object. Either you can call it or use run method with the query to get the response
|
||||
""" # noqa: E501
|
||||
from langchain.agents.agent import AgentExecutor
|
||||
from langchain.agents.mrkl.base import ZeroShotAgent
|
||||
from langchain.chains.llm import LLMChain
|
||||
tools = toolkit.get_tools()
|
||||
prompt = ZeroShotAgent.create_prompt(tools, prefix=prefix)
|
||||
llm_chain = LLMChain(
|
||||
llm=llm,
|
||||
prompt=prompt,
|
||||
callback_manager=callback_manager,
|
||||
)
|
||||
tool_names = [tool.name for tool in tools]
|
||||
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
|
||||
return AgentExecutor.from_agent_and_tools(
|
||||
agent=agent,
|
||||
tools=tools,
|
||||
callback_manager=callback_manager,
|
||||
verbose=verbose,
|
||||
**(agent_executor_kwargs or {}),
|
||||
)
|
||||
|
||||
|
||||
def create_vectorstore_router_agent(
|
||||
llm: BaseLanguageModel,
|
||||
toolkit: VectorStoreRouterToolkit,
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
prefix: str = ROUTER_PREFIX,
|
||||
verbose: bool = False,
|
||||
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> AgentExecutor:
|
||||
"""Construct a VectorStore router agent from an LLM and tools.
|
||||
|
||||
Args:
|
||||
llm (BaseLanguageModel): LLM that will be used by the agent
|
||||
toolkit (VectorStoreRouterToolkit): Set of tools for the agent which have routing capability with multiple vector stores
|
||||
callback_manager (Optional[BaseCallbackManager], optional): Object to handle the callback [ Defaults to None. ]
|
||||
prefix (str, optional): The prefix prompt for the router agent. If not provided uses default ROUTER_PREFIX.
|
||||
verbose (bool, optional): If you want to see the content of the scratchpad. [ Defaults to False ]
|
||||
agent_executor_kwargs (Optional[Dict[str, Any]], optional): If there is any other parameter you want to send to the agent. [ Defaults to None ]
|
||||
**kwargs: Additional named parameters to pass to the ZeroShotAgent.
|
||||
|
||||
Returns:
|
||||
AgentExecutor: Returns a callable AgentExecutor object. Either you can call it or use run method with the query to get the response.
|
||||
""" # noqa: E501
|
||||
from langchain.agents.agent import AgentExecutor
|
||||
from langchain.agents.mrkl.base import ZeroShotAgent
|
||||
from langchain.chains.llm import LLMChain
|
||||
tools = toolkit.get_tools()
|
||||
prompt = ZeroShotAgent.create_prompt(tools, prefix=prefix)
|
||||
llm_chain = LLMChain(
|
||||
llm=llm,
|
||||
prompt=prompt,
|
||||
callback_manager=callback_manager,
|
||||
)
|
||||
tool_names = [tool.name for tool in tools]
|
||||
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
|
||||
return AgentExecutor.from_agent_and_tools(
|
||||
agent=agent,
|
||||
tools=tools,
|
||||
callback_manager=callback_manager,
|
||||
verbose=verbose,
|
||||
**(agent_executor_kwargs or {}),
|
||||
)
|
||||
@@ -0,0 +1,83 @@
|
||||
"""**Callback handlers** allow listening to events in LangChain.
|
||||
|
||||
**Class hierarchy:**
|
||||
|
||||
.. code-block::
|
||||
|
||||
BaseCallbackHandler --> <name>CallbackHandler # Example: AimCallbackHandler
|
||||
"""
|
||||
|
||||
from langchain_core.callbacks import (
|
||||
StdOutCallbackHandler,
|
||||
StreamingStdOutCallbackHandler,
|
||||
)
|
||||
from langchain_core.tracers.langchain import LangChainTracer
|
||||
|
||||
from langchain_community.callbacks.aim_callback import AimCallbackHandler
|
||||
from langchain_community.callbacks.argilla_callback import ArgillaCallbackHandler
|
||||
from langchain_community.callbacks.arize_callback import ArizeCallbackHandler
|
||||
from langchain_community.callbacks.arthur_callback import ArthurCallbackHandler
|
||||
from langchain_community.callbacks.clearml_callback import ClearMLCallbackHandler
|
||||
from langchain_community.callbacks.comet_ml_callback import CometCallbackHandler
|
||||
from langchain_community.callbacks.context_callback import ContextCallbackHandler
|
||||
from langchain_community.callbacks.file import FileCallbackHandler
|
||||
from langchain_community.callbacks.flyte_callback import FlyteCallbackHandler
|
||||
from langchain_community.callbacks.human import HumanApprovalCallbackHandler
|
||||
from langchain_community.callbacks.infino_callback import InfinoCallbackHandler
|
||||
from langchain_community.callbacks.labelstudio_callback import (
|
||||
LabelStudioCallbackHandler,
|
||||
)
|
||||
from langchain_community.callbacks.llmonitor_callback import LLMonitorCallbackHandler
|
||||
from langchain_community.callbacks.manager import (
|
||||
get_openai_callback,
|
||||
wandb_tracing_enabled,
|
||||
)
|
||||
from langchain_community.callbacks.mlflow_callback import MlflowCallbackHandler
|
||||
from langchain_community.callbacks.openai_info import OpenAICallbackHandler
|
||||
from langchain_community.callbacks.promptlayer_callback import (
|
||||
PromptLayerCallbackHandler,
|
||||
)
|
||||
from langchain_community.callbacks.sagemaker_callback import SageMakerCallbackHandler
|
||||
from langchain_community.callbacks.streaming_aiter import AsyncIteratorCallbackHandler
|
||||
from langchain_community.callbacks.streaming_stdout_final_only import (
|
||||
FinalStreamingStdOutCallbackHandler,
|
||||
)
|
||||
from langchain_community.callbacks.streamlit import (
|
||||
LLMThoughtLabeler,
|
||||
StreamlitCallbackHandler,
|
||||
)
|
||||
from langchain_community.callbacks.trubrics_callback import TrubricsCallbackHandler
|
||||
from langchain_community.callbacks.wandb_callback import WandbCallbackHandler
|
||||
from langchain_community.callbacks.whylabs_callback import WhyLabsCallbackHandler
|
||||
|
||||
__all__ = [
|
||||
"AimCallbackHandler",
|
||||
"ArgillaCallbackHandler",
|
||||
"ArizeCallbackHandler",
|
||||
"PromptLayerCallbackHandler",
|
||||
"ArthurCallbackHandler",
|
||||
"ClearMLCallbackHandler",
|
||||
"CometCallbackHandler",
|
||||
"ContextCallbackHandler",
|
||||
"FileCallbackHandler",
|
||||
"HumanApprovalCallbackHandler",
|
||||
"InfinoCallbackHandler",
|
||||
"MlflowCallbackHandler",
|
||||
"LLMonitorCallbackHandler",
|
||||
"OpenAICallbackHandler",
|
||||
"StdOutCallbackHandler",
|
||||
"AsyncIteratorCallbackHandler",
|
||||
"StreamingStdOutCallbackHandler",
|
||||
"FinalStreamingStdOutCallbackHandler",
|
||||
"LLMThoughtLabeler",
|
||||
"LangChainTracer",
|
||||
"StreamlitCallbackHandler",
|
||||
"WandbCallbackHandler",
|
||||
"WhyLabsCallbackHandler",
|
||||
"get_openai_callback",
|
||||
"wandb_tracing_enabled",
|
||||
"FlyteCallbackHandler",
|
||||
"SageMakerCallbackHandler",
|
||||
"LabelStudioCallbackHandler",
|
||||
"TrubricsCallbackHandler",
|
||||
]
|
||||
@@ -0,0 +1,69 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from contextlib import contextmanager
|
||||
from contextvars import ContextVar
|
||||
from typing import (
|
||||
Generator,
|
||||
Optional,
|
||||
)
|
||||
|
||||
from langchain_core.tracers.context import register_configure_hook
|
||||
|
||||
from langchain_community.callbacks.openai_info import OpenAICallbackHandler
|
||||
from langchain_community.callbacks.tracers.wandb import WandbTracer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
openai_callback_var: ContextVar[Optional[OpenAICallbackHandler]] = ContextVar(
|
||||
"openai_callback", default=None
|
||||
)
|
||||
wandb_tracing_callback_var: ContextVar[Optional[WandbTracer]] = ContextVar( # noqa: E501
|
||||
"tracing_wandb_callback", default=None
|
||||
)
|
||||
|
||||
register_configure_hook(openai_callback_var, True)
|
||||
register_configure_hook(
|
||||
wandb_tracing_callback_var, True, WandbTracer, "LANGCHAIN_WANDB_TRACING"
|
||||
)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def get_openai_callback() -> Generator[OpenAICallbackHandler, None, None]:
|
||||
"""Get the OpenAI callback handler in a context manager.
|
||||
which conveniently exposes token and cost information.
|
||||
|
||||
Returns:
|
||||
OpenAICallbackHandler: The OpenAI callback handler.
|
||||
|
||||
Example:
|
||||
>>> with get_openai_callback() as cb:
|
||||
... # Use the OpenAI callback handler
|
||||
"""
|
||||
cb = OpenAICallbackHandler()
|
||||
openai_callback_var.set(cb)
|
||||
yield cb
|
||||
openai_callback_var.set(None)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def wandb_tracing_enabled(
|
||||
session_name: str = "default",
|
||||
) -> Generator[None, None, None]:
|
||||
"""Get the WandbTracer in a context manager.
|
||||
|
||||
Args:
|
||||
session_name (str, optional): The name of the session.
|
||||
Defaults to "default".
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
Example:
|
||||
>>> with wandb_tracing_enabled() as session:
|
||||
... # Use the WandbTracer session
|
||||
"""
|
||||
cb = WandbTracer()
|
||||
wandb_tracing_callback_var.set(cb)
|
||||
yield None
|
||||
wandb_tracing_callback_var.set(None)
|
||||
@@ -0,0 +1,20 @@
|
||||
"""Tracers that record execution of LangChain runs."""
|
||||
|
||||
from langchain_core.tracers.langchain import LangChainTracer
|
||||
from langchain_core.tracers.langchain_v1 import LangChainTracerV1
|
||||
from langchain_core.tracers.stdout import (
|
||||
ConsoleCallbackHandler,
|
||||
FunctionCallbackHandler,
|
||||
)
|
||||
|
||||
from langchain_community.callbacks.tracers.logging import LoggingCallbackHandler
|
||||
from langchain_community.callbacks.tracers.wandb import WandbTracer
|
||||
|
||||
__all__ = [
|
||||
"ConsoleCallbackHandler",
|
||||
"FunctionCallbackHandler",
|
||||
"LoggingCallbackHandler",
|
||||
"LangChainTracer",
|
||||
"LangChainTracerV1",
|
||||
"WandbTracer",
|
||||
]
|
||||
@@ -0,0 +1,78 @@
|
||||
"""**Chat Models** are a variation on language models.
|
||||
|
||||
While Chat Models use language models under the hood, the interface they expose
|
||||
is a bit different. Rather than expose a "text in, text out" API, they expose
|
||||
an interface where "chat messages" are the inputs and outputs.
|
||||
|
||||
**Class hierarchy:**
|
||||
|
||||
.. code-block::
|
||||
|
||||
BaseLanguageModel --> BaseChatModel --> <name> # Examples: ChatOpenAI, ChatGooglePalm
|
||||
|
||||
**Main helpers:**
|
||||
|
||||
.. code-block::
|
||||
|
||||
AIMessage, BaseMessage, HumanMessage
|
||||
""" # noqa: E501
|
||||
|
||||
from langchain_community.chat_models.anthropic import ChatAnthropic
|
||||
from langchain_community.chat_models.anyscale import ChatAnyscale
|
||||
from langchain_community.chat_models.baichuan import ChatBaichuan
|
||||
from langchain_community.chat_models.baidu_qianfan_endpoint import QianfanChatEndpoint
|
||||
from langchain_community.chat_models.bedrock import BedrockChat
|
||||
from langchain_community.chat_models.cohere import ChatCohere
|
||||
from langchain_community.chat_models.databricks import ChatDatabricks
|
||||
from langchain_community.chat_models.ernie import ErnieBotChat
|
||||
from langchain_community.chat_models.everlyai import ChatEverlyAI
|
||||
from langchain_community.chat_models.fake import FakeListChatModel
|
||||
from langchain_community.chat_models.fireworks import ChatFireworks
|
||||
from langchain_community.chat_models.gigachat import GigaChat
|
||||
from langchain_community.chat_models.google_palm import ChatGooglePalm
|
||||
from langchain_community.chat_models.human import HumanInputChatModel
|
||||
from langchain_community.chat_models.hunyuan import ChatHunyuan
|
||||
from langchain_community.chat_models.javelin_ai_gateway import ChatJavelinAIGateway
|
||||
from langchain_community.chat_models.jinachat import JinaChat
|
||||
from langchain_community.chat_models.konko import ChatKonko
|
||||
from langchain_community.chat_models.litellm import ChatLiteLLM
|
||||
from langchain_community.chat_models.minimax import MiniMaxChat
|
||||
from langchain_community.chat_models.mlflow import ChatMlflow
|
||||
from langchain_community.chat_models.mlflow_ai_gateway import ChatMLflowAIGateway
|
||||
from langchain_community.chat_models.ollama import ChatOllama
|
||||
from langchain_community.chat_models.pai_eas_endpoint import PaiEasChatEndpoint
|
||||
from langchain_community.chat_models.promptlayer_openai import PromptLayerChatOpenAI
|
||||
from langchain_community.chat_models.vertexai import ChatVertexAI
|
||||
from langchain_community.chat_models.volcengine_maas import VolcEngineMaasChat
|
||||
from langchain_community.chat_models.yandex import ChatYandexGPT
|
||||
|
||||
__all__ = [
|
||||
"BedrockChat",
|
||||
"FakeListChatModel",
|
||||
"PromptLayerChatOpenAI",
|
||||
"ChatDatabricks",
|
||||
"ChatEverlyAI",
|
||||
"ChatAnthropic",
|
||||
"ChatCohere",
|
||||
"ChatGooglePalm",
|
||||
"ChatMlflow",
|
||||
"ChatMLflowAIGateway",
|
||||
"ChatOllama",
|
||||
"ChatVertexAI",
|
||||
"JinaChat",
|
||||
"HumanInputChatModel",
|
||||
"MiniMaxChat",
|
||||
"ChatAnyscale",
|
||||
"ChatLiteLLM",
|
||||
"ErnieBotChat",
|
||||
"ChatJavelinAIGateway",
|
||||
"ChatKonko",
|
||||
"PaiEasChatEndpoint",
|
||||
"QianfanChatEndpoint",
|
||||
"ChatFireworks",
|
||||
"ChatYandexGPT",
|
||||
"ChatBaichuan",
|
||||
"ChatHunyuan",
|
||||
"GigaChat",
|
||||
"VolcEngineMaasChat",
|
||||
]
|
||||
@@ -0,0 +1,101 @@
|
||||
"""Abstract interface for document loader implementations."""
|
||||
from __future__ import annotations
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Iterator, List, Optional, TYPE_CHECKING
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.document_loaders.blob_loaders import Blob
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain.text_splitter import TextSplitter
|
||||
|
||||
|
||||
class BaseLoader(ABC):
|
||||
"""Interface for Document Loader.
|
||||
|
||||
Implementations should implement the lazy-loading method using generators
|
||||
to avoid loading all Documents into memory at once.
|
||||
|
||||
The `load` method will remain as is for backwards compatibility, but its
|
||||
implementation should be just `list(self.lazy_load())`.
|
||||
"""
|
||||
|
||||
# Sub-classes should implement this method
|
||||
# as return list(self.lazy_load()).
|
||||
# This method returns a List which is materialized in memory.
|
||||
@abstractmethod
|
||||
def load(self) -> List[Document]:
|
||||
"""Load data into Document objects."""
|
||||
|
||||
def load_and_split(
|
||||
self, text_splitter: Optional[TextSplitter] = None
|
||||
) -> List[Document]:
|
||||
"""Load Documents and split into chunks. Chunks are returned as Documents.
|
||||
|
||||
Args:
|
||||
text_splitter: TextSplitter instance to use for splitting documents.
|
||||
Defaults to RecursiveCharacterTextSplitter.
|
||||
|
||||
Returns:
|
||||
List of Documents.
|
||||
"""
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
|
||||
if text_splitter is None:
|
||||
_text_splitter: TextSplitter = RecursiveCharacterTextSplitter()
|
||||
else:
|
||||
_text_splitter = text_splitter
|
||||
docs = self.load()
|
||||
return _text_splitter.split_documents(docs)
|
||||
|
||||
# Attention: This method will be upgraded into an abstractmethod once it's
|
||||
# implemented in all the existing subclasses.
|
||||
def lazy_load(
|
||||
self,
|
||||
) -> Iterator[Document]:
|
||||
"""A lazy loader for Documents."""
|
||||
raise NotImplementedError(
|
||||
f"{self.__class__.__name__} does not implement lazy_load()"
|
||||
)
|
||||
|
||||
|
||||
class BaseBlobParser(ABC):
|
||||
"""Abstract interface for blob parsers.
|
||||
|
||||
A blob parser provides a way to parse raw data stored in a blob into one
|
||||
or more documents.
|
||||
|
||||
The parser can be composed with blob loaders, making it easy to reuse
|
||||
a parser independent of how the blob was originally loaded.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
||||
"""Lazy parsing interface.
|
||||
|
||||
Subclasses are required to implement this method.
|
||||
|
||||
Args:
|
||||
blob: Blob instance
|
||||
|
||||
Returns:
|
||||
Generator of documents
|
||||
"""
|
||||
|
||||
def parse(self, blob: Blob) -> List[Document]:
|
||||
"""Eagerly parse the blob into a document or documents.
|
||||
|
||||
This is a convenience method for interactive development environment.
|
||||
|
||||
Production applications should favor the lazy_parse method instead.
|
||||
|
||||
Subclasses should generally not over-ride this parse method.
|
||||
|
||||
Args:
|
||||
blob: Blob instance
|
||||
|
||||
Returns:
|
||||
List of documents
|
||||
"""
|
||||
return list(self.lazy_parse(blob))
|
||||
@@ -0,0 +1,147 @@
|
||||
"""Use to load blobs from the local file system."""
|
||||
from pathlib import Path
|
||||
from typing import Callable, Iterable, Iterator, Optional, Sequence, TypeVar, Union
|
||||
|
||||
from langchain_community.document_loaders.blob_loaders.schema import Blob, BlobLoader
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
def _make_iterator(
|
||||
length_func: Callable[[], int], show_progress: bool = False
|
||||
) -> Callable[[Iterable[T]], Iterator[T]]:
|
||||
"""Create a function that optionally wraps an iterable in tqdm."""
|
||||
if show_progress:
|
||||
try:
|
||||
from tqdm.auto import tqdm
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"You must install tqdm to use show_progress=True."
|
||||
"You can install tqdm with `pip install tqdm`."
|
||||
)
|
||||
|
||||
# Make sure to provide `total` here so that tqdm can show
|
||||
# a progress bar that takes into account the total number of files.
|
||||
def _with_tqdm(iterable: Iterable[T]) -> Iterator[T]:
|
||||
"""Wrap an iterable in a tqdm progress bar."""
|
||||
return tqdm(iterable, total=length_func())
|
||||
|
||||
iterator = _with_tqdm
|
||||
else:
|
||||
iterator = iter # type: ignore
|
||||
|
||||
return iterator
|
||||
|
||||
|
||||
# PUBLIC API
|
||||
|
||||
|
||||
class FileSystemBlobLoader(BlobLoader):
|
||||
"""Load blobs in the local file system.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader
|
||||
loader = FileSystemBlobLoader("/path/to/directory")
|
||||
for blob in loader.yield_blobs():
|
||||
print(blob)
|
||||
""" # noqa: E501
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
path: Union[str, Path],
|
||||
*,
|
||||
glob: str = "**/[!.]*",
|
||||
exclude: Sequence[str] = (),
|
||||
suffixes: Optional[Sequence[str]] = None,
|
||||
show_progress: bool = False,
|
||||
) -> None:
|
||||
"""Initialize with a path to directory and how to glob over it.
|
||||
|
||||
Args:
|
||||
path: Path to directory to load from or path to file to load.
|
||||
If a path to a file is provided, glob/exclude/suffixes are ignored.
|
||||
glob: Glob pattern relative to the specified path
|
||||
by default set to pick up all non-hidden files
|
||||
exclude: patterns to exclude from results, use glob syntax
|
||||
suffixes: Provide to keep only files with these suffixes
|
||||
Useful when wanting to keep files with different suffixes
|
||||
Suffixes must include the dot, e.g. ".txt"
|
||||
show_progress: If true, will show a progress bar as the files are loaded.
|
||||
This forces an iteration through all matching files
|
||||
to count them prior to loading them.
|
||||
|
||||
Examples:
|
||||
|
||||
.. code-block:: python
|
||||
from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader
|
||||
|
||||
# Load a single file.
|
||||
loader = FileSystemBlobLoader("/path/to/file.txt")
|
||||
|
||||
# Recursively load all text files in a directory.
|
||||
loader = FileSystemBlobLoader("/path/to/directory", glob="**/*.txt")
|
||||
|
||||
# Recursively load all non-hidden files in a directory.
|
||||
loader = FileSystemBlobLoader("/path/to/directory", glob="**/[!.]*")
|
||||
|
||||
# Load all files in a directory without recursion.
|
||||
loader = FileSystemBlobLoader("/path/to/directory", glob="*")
|
||||
|
||||
# Recursively load all files in a directory, except for py or pyc files.
|
||||
loader = FileSystemBlobLoader(
|
||||
"/path/to/directory",
|
||||
glob="**/*.txt",
|
||||
exclude=["**/*.py", "**/*.pyc"]
|
||||
)
|
||||
""" # noqa: E501
|
||||
if isinstance(path, Path):
|
||||
_path = path
|
||||
elif isinstance(path, str):
|
||||
_path = Path(path)
|
||||
else:
|
||||
raise TypeError(f"Expected str or Path, got {type(path)}")
|
||||
|
||||
self.path = _path.expanduser() # Expand user to handle ~
|
||||
self.glob = glob
|
||||
self.suffixes = set(suffixes or [])
|
||||
self.show_progress = show_progress
|
||||
self.exclude = exclude
|
||||
|
||||
def yield_blobs(
|
||||
self,
|
||||
) -> Iterable[Blob]:
|
||||
"""Yield blobs that match the requested pattern."""
|
||||
iterator = _make_iterator(
|
||||
length_func=self.count_matching_files, show_progress=self.show_progress
|
||||
)
|
||||
|
||||
for path in iterator(self._yield_paths()):
|
||||
yield Blob.from_path(path)
|
||||
|
||||
def _yield_paths(self) -> Iterable[Path]:
|
||||
"""Yield paths that match the requested pattern."""
|
||||
if self.path.is_file():
|
||||
yield self.path
|
||||
return
|
||||
|
||||
paths = self.path.glob(self.glob)
|
||||
for path in paths:
|
||||
if self.exclude:
|
||||
if any(path.match(glob) for glob in self.exclude):
|
||||
continue
|
||||
if path.is_file():
|
||||
if self.suffixes and path.suffix not in self.suffixes:
|
||||
continue
|
||||
yield path
|
||||
|
||||
def count_matching_files(self) -> int:
|
||||
"""Count files that match the pattern without loading them."""
|
||||
# Carry out a full iteration to count the files without
|
||||
# materializing anything expensive in memory.
|
||||
num = 0
|
||||
for _ in self._yield_paths():
|
||||
num += 1
|
||||
return num
|
||||
@@ -0,0 +1,190 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Iterator,
|
||||
List,
|
||||
Literal,
|
||||
Optional,
|
||||
Sequence,
|
||||
Union,
|
||||
)
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.document_loaders.base import BaseBlobParser, BaseLoader
|
||||
from langchain_community.document_loaders.blob_loaders import (
|
||||
BlobLoader,
|
||||
FileSystemBlobLoader,
|
||||
)
|
||||
from langchain_community.document_loaders.parsers.registry import get_parser
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain.text_splitter import TextSplitter
|
||||
|
||||
_PathLike = Union[str, Path]
|
||||
|
||||
DEFAULT = Literal["default"]
|
||||
|
||||
|
||||
class GenericLoader(BaseLoader):
|
||||
"""Generic Document Loader.
|
||||
|
||||
A generic document loader that allows combining an arbitrary blob loader with
|
||||
a blob parser.
|
||||
|
||||
Examples:
|
||||
|
||||
Parse a specific PDF file:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.document_loaders import GenericLoader
|
||||
from langchain_community.document_loaders.parsers.pdf import PyPDFParser
|
||||
|
||||
# Recursively load all text files in a directory.
|
||||
loader = GenericLoader.from_filesystem(
|
||||
"my_lovely_pdf.pdf",
|
||||
parser=PyPDFParser()
|
||||
)
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.document_loaders import GenericLoader
|
||||
from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader
|
||||
|
||||
|
||||
loader = GenericLoader.from_filesystem(
|
||||
path="path/to/directory",
|
||||
glob="**/[!.]*",
|
||||
suffixes=[".pdf"],
|
||||
show_progress=True,
|
||||
)
|
||||
|
||||
docs = loader.lazy_load()
|
||||
next(docs)
|
||||
|
||||
Example instantiations to change which files are loaded:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
# Recursively load all text files in a directory.
|
||||
loader = GenericLoader.from_filesystem("/path/to/dir", glob="**/*.txt")
|
||||
|
||||
# Recursively load all non-hidden files in a directory.
|
||||
loader = GenericLoader.from_filesystem("/path/to/dir", glob="**/[!.]*")
|
||||
|
||||
# Load all files in a directory without recursion.
|
||||
loader = GenericLoader.from_filesystem("/path/to/dir", glob="*")
|
||||
|
||||
Example instantiations to change which parser is used:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.document_loaders.parsers.pdf import PyPDFParser
|
||||
|
||||
# Recursively load all text files in a directory.
|
||||
loader = GenericLoader.from_filesystem(
|
||||
"/path/to/dir",
|
||||
glob="**/*.pdf",
|
||||
parser=PyPDFParser()
|
||||
)
|
||||
|
||||
""" # noqa: E501
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
blob_loader: BlobLoader,
|
||||
blob_parser: BaseBlobParser,
|
||||
) -> None:
|
||||
"""A generic document loader.
|
||||
|
||||
Args:
|
||||
blob_loader: A blob loader which knows how to yield blobs
|
||||
blob_parser: A blob parser which knows how to parse blobs into documents
|
||||
"""
|
||||
self.blob_loader = blob_loader
|
||||
self.blob_parser = blob_parser
|
||||
|
||||
def lazy_load(
|
||||
self,
|
||||
) -> Iterator[Document]:
|
||||
"""Load documents lazily. Use this when working at a large scale."""
|
||||
for blob in self.blob_loader.yield_blobs():
|
||||
yield from self.blob_parser.lazy_parse(blob)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load all documents."""
|
||||
return list(self.lazy_load())
|
||||
|
||||
def load_and_split(
|
||||
self, text_splitter: Optional[TextSplitter] = None
|
||||
) -> List[Document]:
|
||||
"""Load all documents and split them into sentences."""
|
||||
raise NotImplementedError(
|
||||
"Loading and splitting is not yet implemented for generic loaders. "
|
||||
"When they will be implemented they will be added via the initializer. "
|
||||
"This method should not be used going forward."
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_filesystem(
|
||||
cls,
|
||||
path: _PathLike,
|
||||
*,
|
||||
glob: str = "**/[!.]*",
|
||||
exclude: Sequence[str] = (),
|
||||
suffixes: Optional[Sequence[str]] = None,
|
||||
show_progress: bool = False,
|
||||
parser: Union[DEFAULT, BaseBlobParser] = "default",
|
||||
parser_kwargs: Optional[dict] = None,
|
||||
) -> GenericLoader:
|
||||
"""Create a generic document loader using a filesystem blob loader.
|
||||
|
||||
Args:
|
||||
path: The path to the directory to load documents from OR the path to a
|
||||
single file to load. If this is a file, glob, exclude, suffixes
|
||||
will be ignored.
|
||||
glob: The glob pattern to use to find documents.
|
||||
suffixes: The suffixes to use to filter documents. If None, all files
|
||||
matching the glob will be loaded.
|
||||
exclude: A list of patterns to exclude from the loader.
|
||||
show_progress: Whether to show a progress bar or not (requires tqdm).
|
||||
Proxies to the file system loader.
|
||||
parser: A blob parser which knows how to parse blobs into documents,
|
||||
will instantiate a default parser if not provided.
|
||||
The default can be overridden by either passing a parser or
|
||||
setting the class attribute `blob_parser` (the latter
|
||||
should be used with inheritance).
|
||||
parser_kwargs: Keyword arguments to pass to the parser.
|
||||
|
||||
Returns:
|
||||
A generic document loader.
|
||||
"""
|
||||
blob_loader = FileSystemBlobLoader(
|
||||
path,
|
||||
glob=glob,
|
||||
exclude=exclude,
|
||||
suffixes=suffixes,
|
||||
show_progress=show_progress,
|
||||
)
|
||||
if isinstance(parser, str):
|
||||
if parser == "default":
|
||||
try:
|
||||
# If there is an implementation of get_parser on the class, use it.
|
||||
blob_parser = cls.get_parser(**(parser_kwargs or {}))
|
||||
except NotImplementedError:
|
||||
# if not then use the global registry.
|
||||
blob_parser = get_parser(parser)
|
||||
else:
|
||||
blob_parser = get_parser(parser)
|
||||
else:
|
||||
blob_parser = parser
|
||||
return cls(blob_loader, blob_parser)
|
||||
|
||||
@staticmethod
|
||||
def get_parser(**kwargs: Any) -> BaseBlobParser:
|
||||
"""Override this method to associate a default parser with the class."""
|
||||
raise NotImplementedError()
|
||||
@@ -0,0 +1,70 @@
|
||||
"""Code for generic / auxiliary parsers.
|
||||
|
||||
This module contains some logic to help assemble more sophisticated parsers.
|
||||
"""
|
||||
from typing import Iterator, Mapping, Optional
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.document_loaders.base import BaseBlobParser
|
||||
from langchain_community.document_loaders.blob_loaders.schema import Blob
|
||||
|
||||
|
||||
class MimeTypeBasedParser(BaseBlobParser):
|
||||
"""Parser that uses `mime`-types to parse a blob.
|
||||
|
||||
This parser is useful for simple pipelines where the mime-type is sufficient
|
||||
to determine how to parse a blob.
|
||||
|
||||
To use, configure handlers based on mime-types and pass them to the initializer.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.document_loaders.parsers.generic import MimeTypeBasedParser
|
||||
|
||||
parser = MimeTypeBasedParser(
|
||||
handlers={
|
||||
"application/pdf": ...,
|
||||
},
|
||||
fallback_parser=...,
|
||||
)
|
||||
""" # noqa: E501
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
handlers: Mapping[str, BaseBlobParser],
|
||||
*,
|
||||
fallback_parser: Optional[BaseBlobParser] = None,
|
||||
) -> None:
|
||||
"""Define a parser that uses mime-types to determine how to parse a blob.
|
||||
|
||||
Args:
|
||||
handlers: A mapping from mime-types to functions that take a blob, parse it
|
||||
and return a document.
|
||||
fallback_parser: A fallback_parser parser to use if the mime-type is not
|
||||
found in the handlers. If provided, this parser will be
|
||||
used to parse blobs with all mime-types not found in
|
||||
the handlers.
|
||||
If not provided, a ValueError will be raised if the
|
||||
mime-type is not found in the handlers.
|
||||
"""
|
||||
self.handlers = handlers
|
||||
self.fallback_parser = fallback_parser
|
||||
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
||||
"""Load documents from a blob."""
|
||||
mimetype = blob.mimetype
|
||||
|
||||
if mimetype is None:
|
||||
raise ValueError(f"{blob} does not have a mimetype.")
|
||||
|
||||
if mimetype in self.handlers:
|
||||
handler = self.handlers[mimetype]
|
||||
yield from handler.lazy_parse(blob)
|
||||
else:
|
||||
if self.fallback_parser is not None:
|
||||
yield from self.fallback_parser.lazy_parse(blob)
|
||||
else:
|
||||
raise ValueError(f"Unsupported mime type: {mimetype}")
|
||||
@@ -0,0 +1,157 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Iterator, Optional, TYPE_CHECKING
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.document_loaders.base import BaseBlobParser
|
||||
from langchain_community.document_loaders.blob_loaders import Blob
|
||||
from langchain_community.document_loaders.parsers.language.cobol import CobolSegmenter
|
||||
from langchain_community.document_loaders.parsers.language.javascript import (
|
||||
JavaScriptSegmenter,
|
||||
)
|
||||
from langchain_community.document_loaders.parsers.language.python import PythonSegmenter
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain.text_splitter import Language
|
||||
|
||||
try:
|
||||
from langchain.text_splitter import Language
|
||||
LANGUAGE_EXTENSIONS: Dict[str, str] = {
|
||||
"py": Language.PYTHON,
|
||||
"js": Language.JS,
|
||||
"cobol": Language.COBOL,
|
||||
}
|
||||
|
||||
LANGUAGE_SEGMENTERS: Dict[str, Any] = {
|
||||
Language.PYTHON: PythonSegmenter,
|
||||
Language.JS: JavaScriptSegmenter,
|
||||
Language.COBOL: CobolSegmenter,
|
||||
}
|
||||
except ImportError:
|
||||
LANGUAGE_EXTENSIONS = {}
|
||||
LANGUAGE_SEGMENTERS = {}
|
||||
|
||||
|
||||
class LanguageParser(BaseBlobParser):
|
||||
"""Parse using the respective programming language syntax.
|
||||
|
||||
Each top-level function and class in the code is loaded into separate documents.
|
||||
Furthermore, an extra document is generated, containing the remaining top-level code
|
||||
that excludes the already segmented functions and classes.
|
||||
|
||||
This approach can potentially improve the accuracy of QA models over source code.
|
||||
|
||||
Currently, the supported languages for code parsing are Python and JavaScript.
|
||||
|
||||
The language used for parsing can be configured, along with the minimum number of
|
||||
lines required to activate the splitting based on syntax.
|
||||
|
||||
Examples:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.text_splitter.Language
|
||||
from langchain_community.document_loaders.generic import GenericLoader
|
||||
from langchain_community.document_loaders.parsers import LanguageParser
|
||||
|
||||
loader = GenericLoader.from_filesystem(
|
||||
"./code",
|
||||
glob="**/*",
|
||||
suffixes=[".py", ".js"],
|
||||
parser=LanguageParser()
|
||||
)
|
||||
docs = loader.load()
|
||||
|
||||
Example instantiations to manually select the language:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.text_splitter import Language
|
||||
|
||||
loader = GenericLoader.from_filesystem(
|
||||
"./code",
|
||||
glob="**/*",
|
||||
suffixes=[".py"],
|
||||
parser=LanguageParser(language=Language.PYTHON)
|
||||
)
|
||||
|
||||
Example instantiations to set number of lines threshold:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
loader = GenericLoader.from_filesystem(
|
||||
"./code",
|
||||
glob="**/*",
|
||||
suffixes=[".py"],
|
||||
parser=LanguageParser(parser_threshold=200)
|
||||
)
|
||||
"""
|
||||
|
||||
def __init__(self, language: Optional[Language] = None, parser_threshold: int = 0):
|
||||
"""
|
||||
Language parser that split code using the respective language syntax.
|
||||
|
||||
Args:
|
||||
language: If None (default), it will try to infer language from source.
|
||||
parser_threshold: Minimum lines needed to activate parsing (0 by default).
|
||||
"""
|
||||
self.language = language
|
||||
self.parser_threshold = parser_threshold
|
||||
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
||||
code = blob.as_string()
|
||||
|
||||
language = self.language or (
|
||||
LANGUAGE_EXTENSIONS.get(blob.source.rsplit(".", 1)[-1])
|
||||
if isinstance(blob.source, str)
|
||||
else None
|
||||
)
|
||||
|
||||
if language is None:
|
||||
yield Document(
|
||||
page_content=code,
|
||||
metadata={
|
||||
"source": blob.source,
|
||||
},
|
||||
)
|
||||
return
|
||||
|
||||
if self.parser_threshold >= len(code.splitlines()):
|
||||
yield Document(
|
||||
page_content=code,
|
||||
metadata={
|
||||
"source": blob.source,
|
||||
"language": language,
|
||||
},
|
||||
)
|
||||
return
|
||||
|
||||
self.Segmenter = LANGUAGE_SEGMENTERS[language]
|
||||
segmenter = self.Segmenter(blob.as_string())
|
||||
if not segmenter.is_valid():
|
||||
yield Document(
|
||||
page_content=code,
|
||||
metadata={
|
||||
"source": blob.source,
|
||||
},
|
||||
)
|
||||
return
|
||||
|
||||
for functions_classes in segmenter.extract_functions_classes():
|
||||
yield Document(
|
||||
page_content=functions_classes,
|
||||
metadata={
|
||||
"source": blob.source,
|
||||
"content_type": "functions_classes",
|
||||
"language": language,
|
||||
},
|
||||
)
|
||||
yield Document(
|
||||
page_content=segmenter.simplify_code(),
|
||||
metadata={
|
||||
"source": blob.source,
|
||||
"content_type": "simplified_code",
|
||||
"language": language,
|
||||
},
|
||||
)
|
||||
@@ -0,0 +1,262 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Dict, List, Optional, Union
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.document_loaders.base import BaseLoader
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd
|
||||
from telethon.hints import EntityLike
|
||||
|
||||
|
||||
def concatenate_rows(row: dict) -> str:
|
||||
"""Combine message information in a readable format ready to be used."""
|
||||
date = row["date"]
|
||||
sender = row["from"]
|
||||
text = row["text"]
|
||||
return f"{sender} on {date}: {text}\n\n"
|
||||
|
||||
|
||||
class TelegramChatFileLoader(BaseLoader):
|
||||
"""Load from `Telegram chat` dump."""
|
||||
|
||||
def __init__(self, path: str):
|
||||
"""Initialize with a path."""
|
||||
self.file_path = path
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load documents."""
|
||||
p = Path(self.file_path)
|
||||
|
||||
with open(p, encoding="utf8") as f:
|
||||
d = json.load(f)
|
||||
|
||||
text = "".join(
|
||||
concatenate_rows(message)
|
||||
for message in d["messages"]
|
||||
if message["type"] == "message" and isinstance(message["text"], str)
|
||||
)
|
||||
metadata = {"source": str(p)}
|
||||
|
||||
return [Document(page_content=text, metadata=metadata)]
|
||||
|
||||
|
||||
def text_to_docs(text: Union[str, List[str]]) -> List[Document]:
|
||||
"""Convert a string or list of strings to a list of Documents with metadata."""
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
if isinstance(text, str):
|
||||
# Take a single string as one page
|
||||
text = [text]
|
||||
page_docs = [Document(page_content=page) for page in text]
|
||||
|
||||
# Add page numbers as metadata
|
||||
for i, doc in enumerate(page_docs):
|
||||
doc.metadata["page"] = i + 1
|
||||
|
||||
# Split pages into chunks
|
||||
doc_chunks = []
|
||||
|
||||
for doc in page_docs:
|
||||
text_splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=800,
|
||||
separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""],
|
||||
chunk_overlap=20,
|
||||
)
|
||||
chunks = text_splitter.split_text(doc.page_content)
|
||||
for i, chunk in enumerate(chunks):
|
||||
doc = Document(
|
||||
page_content=chunk, metadata={"page": doc.metadata["page"], "chunk": i}
|
||||
)
|
||||
# Add sources a metadata
|
||||
doc.metadata["source"] = f"{doc.metadata['page']}-{doc.metadata['chunk']}"
|
||||
doc_chunks.append(doc)
|
||||
return doc_chunks
|
||||
|
||||
|
||||
class TelegramChatApiLoader(BaseLoader):
|
||||
"""Load `Telegram` chat json directory dump."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
chat_entity: Optional[EntityLike] = None,
|
||||
api_id: Optional[int] = None,
|
||||
api_hash: Optional[str] = None,
|
||||
username: Optional[str] = None,
|
||||
file_path: str = "telegram_data.json",
|
||||
):
|
||||
"""Initialize with API parameters.
|
||||
|
||||
Args:
|
||||
chat_entity: The chat entity to fetch data from.
|
||||
api_id: The API ID.
|
||||
api_hash: The API hash.
|
||||
username: The username.
|
||||
file_path: The file path to save the data to. Defaults to
|
||||
"telegram_data.json".
|
||||
"""
|
||||
self.chat_entity = chat_entity
|
||||
self.api_id = api_id
|
||||
self.api_hash = api_hash
|
||||
self.username = username
|
||||
self.file_path = file_path
|
||||
|
||||
async def fetch_data_from_telegram(self) -> None:
|
||||
"""Fetch data from Telegram API and save it as a JSON file."""
|
||||
from telethon.sync import TelegramClient
|
||||
|
||||
data = []
|
||||
async with TelegramClient(self.username, self.api_id, self.api_hash) as client:
|
||||
async for message in client.iter_messages(self.chat_entity):
|
||||
is_reply = message.reply_to is not None
|
||||
reply_to_id = message.reply_to.reply_to_msg_id if is_reply else None
|
||||
data.append(
|
||||
{
|
||||
"sender_id": message.sender_id,
|
||||
"text": message.text,
|
||||
"date": message.date.isoformat(),
|
||||
"message.id": message.id,
|
||||
"is_reply": is_reply,
|
||||
"reply_to_id": reply_to_id,
|
||||
}
|
||||
)
|
||||
|
||||
with open(self.file_path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=4)
|
||||
|
||||
def _get_message_threads(self, data: pd.DataFrame) -> dict:
|
||||
"""Create a dictionary of message threads from the given data.
|
||||
|
||||
Args:
|
||||
data (pd.DataFrame): A DataFrame containing the conversation \
|
||||
data with columns:
|
||||
- message.sender_id
|
||||
- text
|
||||
- date
|
||||
- message.id
|
||||
- is_reply
|
||||
- reply_to_id
|
||||
|
||||
Returns:
|
||||
dict: A dictionary where the key is the parent message ID and \
|
||||
the value is a list of message IDs in ascending order.
|
||||
"""
|
||||
|
||||
def find_replies(parent_id: int, reply_data: pd.DataFrame) -> List[int]:
|
||||
"""
|
||||
Recursively find all replies to a given parent message ID.
|
||||
|
||||
Args:
|
||||
parent_id (int): The parent message ID.
|
||||
reply_data (pd.DataFrame): A DataFrame containing reply messages.
|
||||
|
||||
Returns:
|
||||
list: A list of message IDs that are replies to the parent message ID.
|
||||
"""
|
||||
# Find direct replies to the parent message ID
|
||||
direct_replies = reply_data[reply_data["reply_to_id"] == parent_id][
|
||||
"message.id"
|
||||
].tolist()
|
||||
|
||||
# Recursively find replies to the direct replies
|
||||
all_replies = []
|
||||
for reply_id in direct_replies:
|
||||
all_replies += [reply_id] + find_replies(reply_id, reply_data)
|
||||
|
||||
return all_replies
|
||||
|
||||
# Filter out parent messages
|
||||
parent_messages = data[~data["is_reply"]]
|
||||
|
||||
# Filter out reply messages and drop rows with NaN in 'reply_to_id'
|
||||
reply_messages = data[data["is_reply"]].dropna(subset=["reply_to_id"])
|
||||
|
||||
# Convert 'reply_to_id' to integer
|
||||
reply_messages["reply_to_id"] = reply_messages["reply_to_id"].astype(int)
|
||||
|
||||
# Create a dictionary of message threads with parent message IDs as keys and \
|
||||
# lists of reply message IDs as values
|
||||
message_threads = {
|
||||
parent_id: [parent_id] + find_replies(parent_id, reply_messages)
|
||||
for parent_id in parent_messages["message.id"]
|
||||
}
|
||||
|
||||
return message_threads
|
||||
|
||||
def _combine_message_texts(
|
||||
self, message_threads: Dict[int, List[int]], data: pd.DataFrame
|
||||
) -> str:
|
||||
"""
|
||||
Combine the message texts for each parent message ID based \
|
||||
on the list of message threads.
|
||||
|
||||
Args:
|
||||
message_threads (dict): A dictionary where the key is the parent message \
|
||||
ID and the value is a list of message IDs in ascending order.
|
||||
data (pd.DataFrame): A DataFrame containing the conversation data:
|
||||
- message.sender_id
|
||||
- text
|
||||
- date
|
||||
- message.id
|
||||
- is_reply
|
||||
- reply_to_id
|
||||
|
||||
Returns:
|
||||
str: A combined string of message texts sorted by date.
|
||||
"""
|
||||
combined_text = ""
|
||||
|
||||
# Iterate through sorted parent message IDs
|
||||
for parent_id, message_ids in message_threads.items():
|
||||
# Get the message texts for the message IDs and sort them by date
|
||||
message_texts = (
|
||||
data[data["message.id"].isin(message_ids)]
|
||||
.sort_values(by="date")["text"]
|
||||
.tolist()
|
||||
)
|
||||
message_texts = [str(elem) for elem in message_texts]
|
||||
|
||||
# Combine the message texts
|
||||
combined_text += " ".join(message_texts) + ".\n"
|
||||
|
||||
return combined_text.strip()
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load documents."""
|
||||
|
||||
if self.chat_entity is not None:
|
||||
try:
|
||||
import nest_asyncio
|
||||
|
||||
nest_asyncio.apply()
|
||||
asyncio.run(self.fetch_data_from_telegram())
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"""`nest_asyncio` package not found.
|
||||
please install with `pip install nest_asyncio`
|
||||
"""
|
||||
)
|
||||
|
||||
p = Path(self.file_path)
|
||||
|
||||
with open(p, encoding="utf8") as f:
|
||||
d = json.load(f)
|
||||
try:
|
||||
import pandas as pd
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"""`pandas` package not found.
|
||||
please install with `pip install pandas`
|
||||
"""
|
||||
)
|
||||
normalized_messages = pd.json_normalize(d)
|
||||
df = pd.DataFrame(normalized_messages)
|
||||
|
||||
message_threads = self._get_message_threads(df)
|
||||
combined_texts = self._combine_message_texts(message_threads, df)
|
||||
|
||||
return text_to_docs(combined_texts)
|
||||
@@ -0,0 +1,149 @@
|
||||
from typing import Any, Iterator, List, Sequence, cast
|
||||
|
||||
from langchain_core.documents import BaseDocumentTransformer, Document
|
||||
|
||||
|
||||
class BeautifulSoupTransformer(BaseDocumentTransformer):
|
||||
"""Transform HTML content by extracting specific tags and removing unwanted ones.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.document_transformers import BeautifulSoupTransformer
|
||||
|
||||
bs4_transformer = BeautifulSoupTransformer()
|
||||
docs_transformed = bs4_transformer.transform_documents(docs)
|
||||
""" # noqa: E501
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""
|
||||
Initialize the transformer.
|
||||
|
||||
This checks if the BeautifulSoup4 package is installed.
|
||||
If not, it raises an ImportError.
|
||||
"""
|
||||
try:
|
||||
import bs4 # noqa:F401
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"BeautifulSoup4 is required for BeautifulSoupTransformer. "
|
||||
"Please install it with `pip install beautifulsoup4`."
|
||||
)
|
||||
|
||||
def transform_documents(
|
||||
self,
|
||||
documents: Sequence[Document],
|
||||
unwanted_tags: List[str] = ["script", "style"],
|
||||
tags_to_extract: List[str] = ["p", "li", "div", "a"],
|
||||
remove_lines: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> Sequence[Document]:
|
||||
"""
|
||||
Transform a list of Document objects by cleaning their HTML content.
|
||||
|
||||
Args:
|
||||
documents: A sequence of Document objects containing HTML content.
|
||||
unwanted_tags: A list of tags to be removed from the HTML.
|
||||
tags_to_extract: A list of tags whose content will be extracted.
|
||||
remove_lines: If set to True, unnecessary lines will be
|
||||
removed from the HTML content.
|
||||
|
||||
Returns:
|
||||
A sequence of Document objects with transformed content.
|
||||
"""
|
||||
for doc in documents:
|
||||
cleaned_content = doc.page_content
|
||||
|
||||
cleaned_content = self.remove_unwanted_tags(cleaned_content, unwanted_tags)
|
||||
|
||||
cleaned_content = self.extract_tags(cleaned_content, tags_to_extract)
|
||||
|
||||
if remove_lines:
|
||||
cleaned_content = self.remove_unnecessary_lines(cleaned_content)
|
||||
|
||||
doc.page_content = cleaned_content
|
||||
|
||||
return documents
|
||||
|
||||
@staticmethod
|
||||
def remove_unwanted_tags(html_content: str, unwanted_tags: List[str]) -> str:
|
||||
"""
|
||||
Remove unwanted tags from a given HTML content.
|
||||
|
||||
Args:
|
||||
html_content: The original HTML content string.
|
||||
unwanted_tags: A list of tags to be removed from the HTML.
|
||||
|
||||
Returns:
|
||||
A cleaned HTML string with unwanted tags removed.
|
||||
"""
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
for tag in unwanted_tags:
|
||||
for element in soup.find_all(tag):
|
||||
element.decompose()
|
||||
return str(soup)
|
||||
|
||||
@staticmethod
|
||||
def extract_tags(html_content: str, tags: List[str]) -> str:
|
||||
"""
|
||||
Extract specific tags from a given HTML content.
|
||||
|
||||
Args:
|
||||
html_content: The original HTML content string.
|
||||
tags: A list of tags to be extracted from the HTML.
|
||||
|
||||
Returns:
|
||||
A string combining the content of the extracted tags.
|
||||
"""
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
text_parts: List[str] = []
|
||||
for element in soup.find_all():
|
||||
if element.name in tags:
|
||||
# Extract all navigable strings recursively from this element.
|
||||
text_parts += get_navigable_strings(element)
|
||||
|
||||
# To avoid duplicate text, remove all descendants from the soup.
|
||||
element.decompose()
|
||||
|
||||
return " ".join(text_parts)
|
||||
|
||||
@staticmethod
|
||||
def remove_unnecessary_lines(content: str) -> str:
|
||||
"""
|
||||
Clean up the content by removing unnecessary lines.
|
||||
|
||||
Args:
|
||||
content: A string, which may contain unnecessary lines or spaces.
|
||||
|
||||
Returns:
|
||||
A cleaned string with unnecessary lines removed.
|
||||
"""
|
||||
lines = content.split("\n")
|
||||
stripped_lines = [line.strip() for line in lines]
|
||||
non_empty_lines = [line for line in stripped_lines if line]
|
||||
cleaned_content = " ".join(non_empty_lines)
|
||||
return cleaned_content
|
||||
|
||||
async def atransform_documents(
|
||||
self,
|
||||
documents: Sequence[Document],
|
||||
**kwargs: Any,
|
||||
) -> Sequence[Document]:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def get_navigable_strings(element: Any) -> Iterator[str]:
|
||||
from bs4 import NavigableString, Tag
|
||||
|
||||
for child in cast(Tag, element).children:
|
||||
if isinstance(child, Tag):
|
||||
yield from get_navigable_strings(child)
|
||||
elif isinstance(child, NavigableString):
|
||||
if (element.name == "a") and (href := element.get("href")):
|
||||
yield f"{child.strip()} ({href})"
|
||||
else:
|
||||
yield child.strip()
|
||||
@@ -0,0 +1,140 @@
|
||||
"""Document transformers that use OpenAI Functions models"""
|
||||
from typing import Any, Dict, Optional, Sequence, Type, Union
|
||||
|
||||
from langchain_core.documents import BaseDocumentTransformer, Document
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.pydantic_v1 import BaseModel
|
||||
|
||||
|
||||
class OpenAIMetadataTagger(BaseDocumentTransformer, BaseModel):
|
||||
"""Extract metadata tags from document contents using OpenAI functions.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.chat_models import ChatOpenAI
|
||||
from langchain_community.document_transformers import OpenAIMetadataTagger
|
||||
from langchain_core.documents import Document
|
||||
|
||||
schema = {
|
||||
"properties": {
|
||||
"movie_title": { "type": "string" },
|
||||
"critic": { "type": "string" },
|
||||
"tone": {
|
||||
"type": "string",
|
||||
"enum": ["positive", "negative"]
|
||||
},
|
||||
"rating": {
|
||||
"type": "integer",
|
||||
"description": "The number of stars the critic rated the movie"
|
||||
}
|
||||
},
|
||||
"required": ["movie_title", "critic", "tone"]
|
||||
}
|
||||
|
||||
# Must be an OpenAI model that supports functions
|
||||
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")
|
||||
tagging_chain = create_tagging_chain(schema, llm)
|
||||
document_transformer = OpenAIMetadataTagger(tagging_chain=tagging_chain)
|
||||
original_documents = [
|
||||
Document(page_content="Review of The Bee Movie\nBy Roger Ebert\n\nThis is the greatest movie ever made. 4 out of 5 stars."),
|
||||
Document(page_content="Review of The Godfather\nBy Anonymous\n\nThis movie was super boring. 1 out of 5 stars.", metadata={"reliable": False}),
|
||||
]
|
||||
|
||||
enhanced_documents = document_transformer.transform_documents(original_documents)
|
||||
""" # noqa: E501
|
||||
|
||||
tagging_chain: Any
|
||||
"""The chain used to extract metadata from each document."""
|
||||
|
||||
def transform_documents(
|
||||
self, documents: Sequence[Document], **kwargs: Any
|
||||
) -> Sequence[Document]:
|
||||
"""Automatically extract and populate metadata
|
||||
for each document according to the provided schema."""
|
||||
|
||||
new_documents = []
|
||||
|
||||
for document in documents:
|
||||
extracted_metadata: Dict = self.tagging_chain.run(document.page_content) # type: ignore[assignment] # noqa: E501
|
||||
new_document = Document(
|
||||
page_content=document.page_content,
|
||||
metadata={**extracted_metadata, **document.metadata},
|
||||
)
|
||||
new_documents.append(new_document)
|
||||
return new_documents
|
||||
|
||||
async def atransform_documents(
|
||||
self, documents: Sequence[Document], **kwargs: Any
|
||||
) -> Sequence[Document]:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def create_metadata_tagger(
|
||||
metadata_schema: Union[Dict[str, Any], Type[BaseModel]],
|
||||
llm: BaseLanguageModel,
|
||||
prompt: Optional[ChatPromptTemplate] = None,
|
||||
*,
|
||||
tagging_chain_kwargs: Optional[Dict] = None,
|
||||
) -> OpenAIMetadataTagger:
|
||||
"""Create a DocumentTransformer that uses an OpenAI function chain to automatically
|
||||
tag documents with metadata based on their content and an input schema.
|
||||
|
||||
Args:
|
||||
metadata_schema: Either a dictionary or pydantic.BaseModel class. If a dictionary
|
||||
is passed in, it's assumed to already be a valid JsonSchema.
|
||||
For best results, pydantic.BaseModels should have docstrings describing what
|
||||
the schema represents and descriptions for the parameters.
|
||||
llm: Language model to use, assumed to support the OpenAI function-calling API.
|
||||
Defaults to use "gpt-3.5-turbo-0613"
|
||||
prompt: BasePromptTemplate to pass to the model.
|
||||
|
||||
Returns:
|
||||
An LLMChain that will pass the given function to the model.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.chat_models import ChatOpenAI
|
||||
from langchain_community.document_transformers import create_metadata_tagger
|
||||
from langchain_core.documents import Document
|
||||
|
||||
schema = {
|
||||
"properties": {
|
||||
"movie_title": { "type": "string" },
|
||||
"critic": { "type": "string" },
|
||||
"tone": {
|
||||
"type": "string",
|
||||
"enum": ["positive", "negative"]
|
||||
},
|
||||
"rating": {
|
||||
"type": "integer",
|
||||
"description": "The number of stars the critic rated the movie"
|
||||
}
|
||||
},
|
||||
"required": ["movie_title", "critic", "tone"]
|
||||
}
|
||||
|
||||
# Must be an OpenAI model that supports functions
|
||||
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")
|
||||
|
||||
document_transformer = create_metadata_tagger(schema, llm)
|
||||
original_documents = [
|
||||
Document(page_content="Review of The Bee Movie\nBy Roger Ebert\n\nThis is the greatest movie ever made. 4 out of 5 stars."),
|
||||
Document(page_content="Review of The Godfather\nBy Anonymous\n\nThis movie was super boring. 1 out of 5 stars.", metadata={"reliable": False}),
|
||||
]
|
||||
|
||||
enhanced_documents = document_transformer.transform_documents(original_documents)
|
||||
""" # noqa: E501
|
||||
from langchain.chains.openai_functions import create_tagging_chain
|
||||
metadata_schema = (
|
||||
metadata_schema
|
||||
if isinstance(metadata_schema, dict)
|
||||
else metadata_schema.schema()
|
||||
)
|
||||
_tagging_chain_kwargs = tagging_chain_kwargs or {}
|
||||
tagging_chain = create_tagging_chain(
|
||||
metadata_schema, llm, prompt=prompt, **_tagging_chain_kwargs
|
||||
)
|
||||
return OpenAIMetadataTagger(tagging_chain=tagging_chain)
|
||||
@@ -0,0 +1,160 @@
|
||||
"""**Embedding models** are wrappers around embedding models
|
||||
from different APIs and services.
|
||||
|
||||
**Embedding models** can be LLMs or not.
|
||||
|
||||
**Class hierarchy:**
|
||||
|
||||
.. code-block::
|
||||
|
||||
Embeddings --> <name>Embeddings # Examples: CohereEmbeddings, HuggingFaceEmbeddings
|
||||
"""
|
||||
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
|
||||
from langchain_community.embeddings.aleph_alpha import (
|
||||
AlephAlphaAsymmetricSemanticEmbedding,
|
||||
AlephAlphaSymmetricSemanticEmbedding,
|
||||
)
|
||||
from langchain_community.embeddings.awa import AwaEmbeddings
|
||||
from langchain_community.embeddings.baidu_qianfan_endpoint import (
|
||||
QianfanEmbeddingsEndpoint,
|
||||
)
|
||||
from langchain_community.embeddings.bedrock import BedrockEmbeddings
|
||||
from langchain_community.embeddings.bookend import BookendEmbeddings
|
||||
from langchain_community.embeddings.cache import CacheBackedEmbeddings
|
||||
from langchain_community.embeddings.clarifai import ClarifaiEmbeddings
|
||||
from langchain_community.embeddings.cohere import CohereEmbeddings
|
||||
from langchain_community.embeddings.dashscope import DashScopeEmbeddings
|
||||
from langchain_community.embeddings.databricks import DatabricksEmbeddings
|
||||
from langchain_community.embeddings.deepinfra import DeepInfraEmbeddings
|
||||
from langchain_community.embeddings.edenai import EdenAiEmbeddings
|
||||
from langchain_community.embeddings.elasticsearch import ElasticsearchEmbeddings
|
||||
from langchain_community.embeddings.embaas import EmbaasEmbeddings
|
||||
from langchain_community.embeddings.ernie import ErnieEmbeddings
|
||||
from langchain_community.embeddings.fake import (
|
||||
DeterministicFakeEmbedding,
|
||||
FakeEmbeddings,
|
||||
)
|
||||
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
|
||||
from langchain_community.embeddings.google_palm import GooglePalmEmbeddings
|
||||
from langchain_community.embeddings.gpt4all import GPT4AllEmbeddings
|
||||
from langchain_community.embeddings.gradient_ai import GradientEmbeddings
|
||||
from langchain_community.embeddings.huggingface import (
|
||||
HuggingFaceBgeEmbeddings,
|
||||
HuggingFaceEmbeddings,
|
||||
HuggingFaceInferenceAPIEmbeddings,
|
||||
HuggingFaceInstructEmbeddings,
|
||||
)
|
||||
from langchain_community.embeddings.huggingface_hub import HuggingFaceHubEmbeddings
|
||||
from langchain_community.embeddings.infinity import InfinityEmbeddings
|
||||
from langchain_community.embeddings.javelin_ai_gateway import JavelinAIGatewayEmbeddings
|
||||
from langchain_community.embeddings.jina import JinaEmbeddings
|
||||
from langchain_community.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings
|
||||
from langchain_community.embeddings.llamacpp import LlamaCppEmbeddings
|
||||
from langchain_community.embeddings.localai import LocalAIEmbeddings
|
||||
from langchain_community.embeddings.minimax import MiniMaxEmbeddings
|
||||
from langchain_community.embeddings.mlflow import MlflowEmbeddings
|
||||
from langchain_community.embeddings.mlflow_gateway import MlflowAIGatewayEmbeddings
|
||||
from langchain_community.embeddings.modelscope_hub import ModelScopeEmbeddings
|
||||
from langchain_community.embeddings.mosaicml import MosaicMLInstructorEmbeddings
|
||||
from langchain_community.embeddings.nlpcloud import NLPCloudEmbeddings
|
||||
from langchain_community.embeddings.octoai_embeddings import OctoAIEmbeddings
|
||||
from langchain_community.embeddings.ollama import OllamaEmbeddings
|
||||
from langchain_community.embeddings.sagemaker_endpoint import (
|
||||
SagemakerEndpointEmbeddings,
|
||||
)
|
||||
from langchain_community.embeddings.self_hosted import SelfHostedEmbeddings
|
||||
from langchain_community.embeddings.self_hosted_hugging_face import (
|
||||
SelfHostedHuggingFaceEmbeddings,
|
||||
SelfHostedHuggingFaceInstructEmbeddings,
|
||||
)
|
||||
from langchain_community.embeddings.sentence_transformer import (
|
||||
SentenceTransformerEmbeddings,
|
||||
)
|
||||
from langchain_community.embeddings.spacy_embeddings import SpacyEmbeddings
|
||||
from langchain_community.embeddings.tensorflow_hub import TensorflowHubEmbeddings
|
||||
from langchain_community.embeddings.vertexai import VertexAIEmbeddings
|
||||
from langchain_community.embeddings.voyageai import VoyageEmbeddings
|
||||
from langchain_community.embeddings.xinference import XinferenceEmbeddings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
__all__ = [
|
||||
"CacheBackedEmbeddings",
|
||||
"ClarifaiEmbeddings",
|
||||
"CohereEmbeddings",
|
||||
"DatabricksEmbeddings",
|
||||
"ElasticsearchEmbeddings",
|
||||
"FastEmbedEmbeddings",
|
||||
"HuggingFaceEmbeddings",
|
||||
"HuggingFaceInferenceAPIEmbeddings",
|
||||
"InfinityEmbeddings",
|
||||
"GradientEmbeddings",
|
||||
"JinaEmbeddings",
|
||||
"LlamaCppEmbeddings",
|
||||
"HuggingFaceHubEmbeddings",
|
||||
"MlflowEmbeddings",
|
||||
"MlflowAIGatewayEmbeddings",
|
||||
"ModelScopeEmbeddings",
|
||||
"TensorflowHubEmbeddings",
|
||||
"SagemakerEndpointEmbeddings",
|
||||
"HuggingFaceInstructEmbeddings",
|
||||
"MosaicMLInstructorEmbeddings",
|
||||
"SelfHostedEmbeddings",
|
||||
"SelfHostedHuggingFaceEmbeddings",
|
||||
"SelfHostedHuggingFaceInstructEmbeddings",
|
||||
"FakeEmbeddings",
|
||||
"DeterministicFakeEmbedding",
|
||||
"AlephAlphaAsymmetricSemanticEmbedding",
|
||||
"AlephAlphaSymmetricSemanticEmbedding",
|
||||
"SentenceTransformerEmbeddings",
|
||||
"GooglePalmEmbeddings",
|
||||
"MiniMaxEmbeddings",
|
||||
"VertexAIEmbeddings",
|
||||
"BedrockEmbeddings",
|
||||
"DeepInfraEmbeddings",
|
||||
"EdenAiEmbeddings",
|
||||
"DashScopeEmbeddings",
|
||||
"EmbaasEmbeddings",
|
||||
"OctoAIEmbeddings",
|
||||
"SpacyEmbeddings",
|
||||
"NLPCloudEmbeddings",
|
||||
"GPT4AllEmbeddings",
|
||||
"XinferenceEmbeddings",
|
||||
"LocalAIEmbeddings",
|
||||
"AwaEmbeddings",
|
||||
"HuggingFaceBgeEmbeddings",
|
||||
"ErnieEmbeddings",
|
||||
"JavelinAIGatewayEmbeddings",
|
||||
"OllamaEmbeddings",
|
||||
"QianfanEmbeddingsEndpoint",
|
||||
"JohnSnowLabsEmbeddings",
|
||||
"VoyageEmbeddings",
|
||||
"BookendEmbeddings",
|
||||
]
|
||||
|
||||
|
||||
# TODO: this is in here to maintain backwards compatibility
|
||||
class HypotheticalDocumentEmbedder:
|
||||
def __init__(self, *args: Any, **kwargs: Any):
|
||||
logger.warning(
|
||||
"Using a deprecated class. Please use "
|
||||
"`from langchain.chains import HypotheticalDocumentEmbedder` instead"
|
||||
)
|
||||
from langchain.chains.hyde.base import HypotheticalDocumentEmbedder as H
|
||||
|
||||
return H(*args, **kwargs) # type: ignore
|
||||
|
||||
@classmethod
|
||||
def from_llm(cls, *args: Any, **kwargs: Any) -> Any:
|
||||
logger.warning(
|
||||
"Using a deprecated class. Please use "
|
||||
"`from langchain.chains import HypotheticalDocumentEmbedder` instead"
|
||||
)
|
||||
from langchain.chains.hyde.base import HypotheticalDocumentEmbedder as H
|
||||
|
||||
return H.from_llm(*args, **kwargs)
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user